From 234f989ddf6e42b33090adc47345725a3d74d956 Mon Sep 17 00:00:00 2001 From: github-action-benchmark Date: Fri, 8 Nov 2024 06:24:55 +0000 Subject: [PATCH] add Lux Benchmarks (julia) benchmark result for ed0d75c41a3a4c443c753d52d4bb28766391005f --- benchmarks/data.js | 8546 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 8545 insertions(+), 1 deletion(-) diff --git a/benchmarks/data.js b/benchmarks/data.js index a203a1ab15..f84dff7601 100644 --- a/benchmarks/data.js +++ b/benchmarks/data.js @@ -1,5 +1,5 @@ window.BENCHMARK_DATA = { - "lastUpdate": 1730951129896, + "lastUpdate": 1731047094755, "repoUrl": "https://github.com/LuxDL/Lux.jl", "entries": { "Lux Benchmarks": [ @@ -184322,6 +184322,8550 @@ window.BENCHMARK_DATA = { "extra": "gctime=0\nmemory=36952\nallocs=1107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" } ] + }, + { + "commit": { + "author": { + "email": "41898282+github-actions[bot]@users.noreply.github.com", + "name": "github-actions[bot]", + "username": "github-actions[bot]" + }, + "committer": { + "email": "noreply@github.com", + "name": "GitHub", + "username": "web-flow" + }, + "distinct": true, + "id": "ed0d75c41a3a4c443c753d52d4bb28766391005f", + "message": "chore: bump compat for Optimisers to 0.4 for package DDIM, (keep existing compat) (#1059)\n\nCo-authored-by: CompatHelper Julia ", + "timestamp": "2024-11-07T22:02:46-05:00", + "tree_id": "62cbaaedda9fb2bedd8e3e2db68af5f6dbfe5f8b", + "url": "https://github.com/LuxDL/Lux.jl/commit/ed0d75c41a3a4c443c753d52d4bb28766391005f" + }, + "date": 1731047072038, + "tool": "julia", + "benches": [ + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 4375, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 4583, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 8042, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 4125, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 60754, + "unit": "ns", + "extra": "gctime=0\nmemory=6432\nallocs=316\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 10083, + "unit": "ns", + "extra": "gctime=0\nmemory=15936\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 10208, + "unit": "ns", + "extra": "gctime=0\nmemory=15936\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 11292, + "unit": "ns", + "extra": "gctime=0\nmemory=15936\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 10625, + "unit": "ns", + "extra": "gctime=0\nmemory=15936\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 429099, + "unit": "ns", + "extra": "gctime=0\nmemory=46336\nallocs=2066\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/forward/CPU/2 thread(s)", + "value": 1083, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/forward/CPU/4 thread(s)", + "value": 1083, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/forward/CPU/8 thread(s)", + "value": 1333, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/forward/CPU/1 thread(s)", + "value": 3583, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/forward/GPU/CUDA", + "value": 18440, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 4166, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 4000, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 4334, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 3958, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=relu)(32 x 128)/zygote/GPU/CUDA", + "value": 112468, + "unit": "ns", + "extra": "gctime=0\nmemory=9896\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 57792, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 46125, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 46167, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 81125, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 38404, + "unit": "ns", + "extra": "gctime=0\nmemory=3696\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 2028916, + "unit": "ns", + "extra": "gctime=0\nmemory=7356096\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 2086083, + "unit": "ns", + "extra": "gctime=0\nmemory=7356096\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 2090541.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7356096\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1986270.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7356096\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 199754, + "unit": "ns", + "extra": "gctime=0\nmemory=15064\nallocs=616\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 144083, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 146458, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 147167, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 145750, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 166912, + "unit": "ns", + "extra": "gctime=0\nmemory=8480\nallocs=386\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 1116416, + "unit": "ns", + "extra": "gctime=0\nmemory=8401280\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 1112187, + "unit": "ns", + "extra": "gctime=0\nmemory=8401280\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 1123395.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8401280\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1104416, + "unit": "ns", + "extra": "gctime=0\nmemory=8401280\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 523848, + "unit": "ns", + "extra": "gctime=0\nmemory=49504\nallocs=2117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 3500, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 3541.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 4437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 3125, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 67435, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=376\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 9083, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 9500, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 10000, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 9250, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 492306, + "unit": "ns", + "extra": "gctime=0\nmemory=47104\nallocs=2185\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 16646, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 14937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 18292, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 15000, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 54321, + "unit": "ns", + "extra": "gctime=0\nmemory=7456\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 213937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=813456\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 212729.5, + "unit": "ns", + "extra": "gctime=0\nmemory=813456\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 214000, + "unit": "ns", + "extra": "gctime=0\nmemory=813456\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 213500, + "unit": "ns", + "extra": "gctime=0\nmemory=813456\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 273554, + "unit": "ns", + "extra": "gctime=0\nmemory=47248\nallocs=1061\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/forward/CPU/2 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/forward/CPU/4 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/forward/CPU/8 thread(s)", + "value": 750, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/forward/CPU/1 thread(s)", + "value": 666, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/forward/GPU/CUDA", + "value": 17546, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 1375, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 1500, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 1875, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 1625, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=relu)(2 x 128)/zygote/GPU/CUDA", + "value": 104313, + "unit": "ns", + "extra": "gctime=0\nmemory=8952\nallocs=406\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 6792, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 5875, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 5875, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 9916, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 24032, + "unit": "ns", + "extra": "gctime=0\nmemory=2816\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 221667, + "unit": "ns", + "extra": "gctime=0\nmemory=932800\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 228854.5, + "unit": "ns", + "extra": "gctime=0\nmemory=932800\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 229292, + "unit": "ns", + "extra": "gctime=0\nmemory=932800\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 213000, + "unit": "ns", + "extra": "gctime=0\nmemory=932800\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 170365, + "unit": "ns", + "extra": "gctime=0\nmemory=17624\nallocs=574\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/forward/CPU/2 thread(s)", + "value": 3834, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/forward/CPU/4 thread(s)", + "value": 3916, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/forward/CPU/8 thread(s)", + "value": 3875, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/forward/CPU/1 thread(s)", + "value": 3833, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/forward/GPU/CUDA", + "value": 23655, + "unit": "ns", + "extra": "gctime=0\nmemory=960\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 16666, + "unit": "ns", + "extra": "gctime=0\nmemory=71920\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 16917, + "unit": "ns", + "extra": "gctime=0\nmemory=71920\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 17041, + "unit": "ns", + "extra": "gctime=0\nmemory=71920\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 16750, + "unit": "ns", + "extra": "gctime=0\nmemory=71920\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=relu)(32 x 128)/zygote/GPU/CUDA", + "value": 163843.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10456\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 577166, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 578250, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 598583, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 577750, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/forward/GPU/CUDA", + "value": 113312, + "unit": "ns", + "extra": "gctime=0\nmemory=960\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 1420667, + "unit": "ns", + "extra": "gctime=0\nmemory=2360672\nallocs=33\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 1422916, + "unit": "ns", + "extra": "gctime=0\nmemory=2360672\nallocs=33\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 1452041, + "unit": "ns", + "extra": "gctime=0\nmemory=2360672\nallocs=33\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 1416667, + "unit": "ns", + "extra": "gctime=0\nmemory=2360672\nallocs=33\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=gelu)(512 x 128)/zygote/GPU/CUDA", + "value": 214659, + "unit": "ns", + "extra": "gctime=0\nmemory=12520\nallocs=548\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/2 thread(s)", + "value": 1068354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1691904\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/4 thread(s)", + "value": 970500, + "unit": "ns", + "extra": "gctime=0\nmemory=1983808\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/8 thread(s)", + "value": 1344437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2567616\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/1 thread(s)", + "value": 1302166, + "unit": "ns", + "extra": "gctime=0\nmemory=1593952\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/GPU/CUDA", + "value": 275365.5, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/2 thread(s)", + "value": 5882458, + "unit": "ns", + "extra": "gctime=0\nmemory=5138672\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/4 thread(s)", + "value": 4543542, + "unit": "ns", + "extra": "gctime=0\nmemory=5722480\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/8 thread(s)", + "value": 4907291, + "unit": "ns", + "extra": "gctime=0\nmemory=6890096\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/1 thread(s)", + "value": 5670583, + "unit": "ns", + "extra": "gctime=0\nmemory=4942768\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/GPU/CUDA", + "value": 1092293.5, + "unit": "ns", + "extra": "gctime=0\nmemory=140544\nallocs=2606\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/forward/CPU/2 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/forward/CPU/4 thread(s)", + "value": 542, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/forward/CPU/8 thread(s)", + "value": 542, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/forward/CPU/1 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/forward/GPU/CUDA", + "value": 23682, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 2083, + "unit": "ns", + "extra": "gctime=0\nmemory=4880\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 2167, + "unit": "ns", + "extra": "gctime=0\nmemory=4880\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 2208, + "unit": "ns", + "extra": "gctime=0\nmemory=4880\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 2084, + "unit": "ns", + "extra": "gctime=0\nmemory=4880\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=relu)(2 x 128)/zygote/GPU/CUDA", + "value": 173257, + "unit": "ns", + "extra": "gctime=0\nmemory=10904\nallocs=456\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 5625, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 5834, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 7166, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 5500, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 65459, + "unit": "ns", + "extra": "gctime=0\nmemory=6432\nallocs=316\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 11625, + "unit": "ns", + "extra": "gctime=0\nmemory=51632\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 11645.5, + "unit": "ns", + "extra": "gctime=0\nmemory=51632\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 12187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=51632\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 11167, + "unit": "ns", + "extra": "gctime=0\nmemory=51632\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 447697, + "unit": "ns", + "extra": "gctime=0\nmemory=43616\nallocs=2020\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 7625, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 6875, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 8500, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 6750, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 52309, + "unit": "ns", + "extra": "gctime=0\nmemory=8080\nallocs=225\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 17292, + "unit": "ns", + "extra": "gctime=0\nmemory=60944\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 18333, + "unit": "ns", + "extra": "gctime=0\nmemory=60944\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 18792, + "unit": "ns", + "extra": "gctime=0\nmemory=60944\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 17708, + "unit": "ns", + "extra": "gctime=0\nmemory=60944\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 302468, + "unit": "ns", + "extra": "gctime=0\nmemory=54264\nallocs=1229\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 583, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 584, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 33300, + "unit": "ns", + "extra": "gctime=0\nmemory=4016\nallocs=144\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 8541.5, + "unit": "ns", + "extra": "gctime=0\nmemory=40176\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 9083, + "unit": "ns", + "extra": "gctime=0\nmemory=40176\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 9625, + "unit": "ns", + "extra": "gctime=0\nmemory=40176\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 8750, + "unit": "ns", + "extra": "gctime=0\nmemory=40176\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 161219, + "unit": "ns", + "extra": "gctime=0\nmemory=16944\nallocs=551\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/forward/CPU/2 thread(s)", + "value": 64333, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/forward/CPU/4 thread(s)", + "value": 64687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/forward/CPU/8 thread(s)", + "value": 64375, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/forward/CPU/1 thread(s)", + "value": 63917, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/forward/GPU/CUDA", + "value": 112623.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1216\nallocs=51\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 288354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1836144\nallocs=29\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 281500, + "unit": "ns", + "extra": "gctime=0\nmemory=1836144\nallocs=29\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 277333.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1836144\nallocs=29\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 282958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1836144\nallocs=29\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=identity)(512 x 128)/zygote/GPU/CUDA", + "value": 187466, + "unit": "ns", + "extra": "gctime=0\nmemory=9520\nallocs=393\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 3321750, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 3035042, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 3019750, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 3935396, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/GPU/CUDA", + "value": 580332, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 7600458.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 7434083, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 7457646, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 8171125, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/GPU/CUDA", + "value": 1357218, + "unit": "ns", + "extra": "gctime=0\nmemory=366272\nallocs=3997\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 18821167, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 19108667, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 19164083, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 15675625, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23766000, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 33980750, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 36953187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34917125, + "unit": "ns", + "extra": "gctime=290125\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1857952, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 188752208, + "unit": "ns", + "extra": "gctime=0\nmemory=278243472\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 164370333, + "unit": "ns", + "extra": "gctime=3043208\nmemory=304817104\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 153098083, + "unit": "ns", + "extra": "gctime=3806375\nmemory=357964368\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 437817875, + "unit": "ns", + "extra": "gctime=162021793\nmemory=269384944\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 13921239, + "unit": "ns", + "extra": "gctime=0\nmemory=30104\nallocs=927\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 289969375, + "unit": "ns", + "extra": "gctime=19669000\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 340002146, + "unit": "ns", + "extra": "gctime=96129396\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 299659604.5, + "unit": "ns", + "extra": "gctime=68586438\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 336790458, + "unit": "ns", + "extra": "gctime=2501209\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 23709, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 23958, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 25375, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 23666, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 95315, + "unit": "ns", + "extra": "gctime=0\nmemory=8480\nallocs=386\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 103625, + "unit": "ns", + "extra": "gctime=0\nmemory=1061248\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 103250, + "unit": "ns", + "extra": "gctime=0\nmemory=1061248\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 106729, + "unit": "ns", + "extra": "gctime=0\nmemory=1061248\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 103291, + "unit": "ns", + "extra": "gctime=0\nmemory=1061248\nallocs=142\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 500314.5, + "unit": "ns", + "extra": "gctime=0\nmemory=49504\nallocs=2117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 7167, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 6667, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 7667, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 6500, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 67818, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=376\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 15042, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 16208, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 16750, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 15125, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 474547, + "unit": "ns", + "extra": "gctime=0\nmemory=47104\nallocs=2185\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 2918125, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2093292, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2262166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4794666.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 587541, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 23488583, + "unit": "ns", + "extra": "gctime=0\nmemory=26917776\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 18018687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30243024\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 18014917, + "unit": "ns", + "extra": "gctime=818250\nmemory=36893264\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 35776875, + "unit": "ns", + "extra": "gctime=618104\nmemory=25808880\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3117851, + "unit": "ns", + "extra": "gctime=0\nmemory=28416\nallocs=834\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 33272812.5, + "unit": "ns", + "extra": "gctime=0\nmemory=26914048\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 27626875, + "unit": "ns", + "extra": "gctime=0\nmemory=30239296\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 27533833, + "unit": "ns", + "extra": "gctime=0\nmemory=36889536\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 41814417, + "unit": "ns", + "extra": "gctime=447208\nmemory=25805152\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 74625, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 74292, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 75666, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 74417, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 100380.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9440\nallocs=446\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 292896, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 222583, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 210416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 205792, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 540203.5, + "unit": "ns", + "extra": "gctime=0\nmemory=52816\nallocs=2361\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 12583, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 12333, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 13291, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 12791, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 70496, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=376\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 26917, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 27125, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 28125, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 26625, + "unit": "ns", + "extra": "gctime=0\nmemory=89840\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 470516, + "unit": "ns", + "extra": "gctime=0\nmemory=47360\nallocs=2205\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 12917, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 13084, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 13666, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 12667, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 51912, + "unit": "ns", + "extra": "gctime=0\nmemory=8080\nallocs=225\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 25667, + "unit": "ns", + "extra": "gctime=0\nmemory=52368\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 25875, + "unit": "ns", + "extra": "gctime=0\nmemory=52368\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 26625, + "unit": "ns", + "extra": "gctime=0\nmemory=52368\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 26167, + "unit": "ns", + "extra": "gctime=0\nmemory=52368\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 301936, + "unit": "ns", + "extra": "gctime=0\nmemory=41896\nallocs=1250\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 180583, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 181729, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 183250, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 179708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 55883, + "unit": "ns", + "extra": "gctime=0\nmemory=7456\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 593354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1064816\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 590687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1064816\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 591291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1064816\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 584292, + "unit": "ns", + "extra": "gctime=0\nmemory=1064816\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 283120, + "unit": "ns", + "extra": "gctime=0\nmemory=37864\nallocs=1159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 7208, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 7042, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 7875, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 6875, + "unit": "ns", + "extra": "gctime=0\nmemory=13728\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 69418.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=376\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 14208, + "unit": "ns", + "extra": "gctime=0\nmemory=81136\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 15042, + "unit": "ns", + "extra": "gctime=0\nmemory=81136\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 15584, + "unit": "ns", + "extra": "gctime=0\nmemory=81136\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 14042, + "unit": "ns", + "extra": "gctime=0\nmemory=81136\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 454550, + "unit": "ns", + "extra": "gctime=0\nmemory=44832\nallocs=2081\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/forward/CPU/2 thread(s)", + "value": 1165604, + "unit": "ns", + "extra": "gctime=0\nmemory=4195488\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/forward/CPU/4 thread(s)", + "value": 1224917, + "unit": "ns", + "extra": "gctime=0\nmemory=4196560\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/forward/CPU/8 thread(s)", + "value": 1272500, + "unit": "ns", + "extra": "gctime=0\nmemory=4198704\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/forward/CPU/1 thread(s)", + "value": 1318479, + "unit": "ns", + "extra": "gctime=0\nmemory=4194960\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/forward/GPU/CUDA", + "value": 300980.5, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/zygote/CPU/2 thread(s)", + "value": 4116000, + "unit": "ns", + "extra": "gctime=0\nmemory=16781488\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/zygote/CPU/4 thread(s)", + "value": 4366375, + "unit": "ns", + "extra": "gctime=0\nmemory=16784704\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/zygote/CPU/8 thread(s)", + "value": 4511145.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16791136\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/zygote/CPU/1 thread(s)", + "value": 4453083, + "unit": "ns", + "extra": "gctime=0\nmemory=16779904\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=4)/zygote/GPU/CUDA", + "value": 1040994, + "unit": "ns", + "extra": "gctime=0\nmemory=7408\nallocs=322\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/forward/CPU/2 thread(s)", + "value": 1750, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/forward/CPU/4 thread(s)", + "value": 1875, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/forward/CPU/8 thread(s)", + "value": 1833, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/forward/CPU/1 thread(s)", + "value": 1833, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/forward/GPU/CUDA", + "value": 23357, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 4833, + "unit": "ns", + "extra": "gctime=0\nmemory=5776\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 5083, + "unit": "ns", + "extra": "gctime=0\nmemory=5776\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 4959, + "unit": "ns", + "extra": "gctime=0\nmemory=5776\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 4875, + "unit": "ns", + "extra": "gctime=0\nmemory=5776\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=gelu)(2 x 128)/zygote/GPU/CUDA", + "value": 187819.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12392\nallocs=533\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 6833, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 6520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 6958, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 7292, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 53959.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7952\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 11437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=34432\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 11708, + "unit": "ns", + "extra": "gctime=0\nmemory=34432\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 12125, + "unit": "ns", + "extra": "gctime=0\nmemory=34432\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 10875, + "unit": "ns", + "extra": "gctime=0\nmemory=34432\nallocs=124\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 325142, + "unit": "ns", + "extra": "gctime=0\nmemory=54008\nallocs=1213\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/forward/CPU/2 thread(s)", + "value": 250, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/forward/CPU/4 thread(s)", + "value": 375, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/forward/CPU/8 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/forward/CPU/1 thread(s)", + "value": 333, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/forward/GPU/CUDA", + "value": 22864, + "unit": "ns", + "extra": "gctime=0\nmemory=960\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 2750, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 2959, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 3042, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 2750, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=relu)(2 x 128)/zygote/GPU/CUDA", + "value": 158390, + "unit": "ns", + "extra": "gctime=0\nmemory=9512\nallocs=406\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 13292, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 12333, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 15167, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 13542, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 55595.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 24354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=48912\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 24917, + "unit": "ns", + "extra": "gctime=0\nmemory=48912\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 25625, + "unit": "ns", + "extra": "gctime=0\nmemory=48912\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 24500, + "unit": "ns", + "extra": "gctime=0\nmemory=48912\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 291342, + "unit": "ns", + "extra": "gctime=0\nmemory=35880\nallocs=1127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/forward/CPU/2 thread(s)", + "value": 4125, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/forward/CPU/4 thread(s)", + "value": 4166, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/forward/CPU/8 thread(s)", + "value": 4208, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/forward/CPU/1 thread(s)", + "value": 4167, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/forward/GPU/CUDA", + "value": 24702, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 16208, + "unit": "ns", + "extra": "gctime=0\nmemory=71488\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 16292, + "unit": "ns", + "extra": "gctime=0\nmemory=71488\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 16375, + "unit": "ns", + "extra": "gctime=0\nmemory=71488\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 16084, + "unit": "ns", + "extra": "gctime=0\nmemory=71488\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=relu)(32 x 128)/zygote/GPU/CUDA", + "value": 196319.5, + "unit": "ns", + "extra": "gctime=0\nmemory=11848\nallocs=491\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 5625, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 5708, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 5833, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 5667, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 34095, + "unit": "ns", + "extra": "gctime=0\nmemory=4016\nallocs=144\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 20542, + "unit": "ns", + "extra": "gctime=0\nmemory=48880\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 21000, + "unit": "ns", + "extra": "gctime=0\nmemory=48880\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 21375, + "unit": "ns", + "extra": "gctime=0\nmemory=48880\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 20292, + "unit": "ns", + "extra": "gctime=0\nmemory=48880\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 175766, + "unit": "ns", + "extra": "gctime=0\nmemory=15832\nallocs=664\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/forward/CPU/2 thread(s)", + "value": 399250, + "unit": "ns", + "extra": "gctime=0\nmemory=525472\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/forward/CPU/4 thread(s)", + "value": 379792, + "unit": "ns", + "extra": "gctime=0\nmemory=526544\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/forward/CPU/8 thread(s)", + "value": 489500, + "unit": "ns", + "extra": "gctime=0\nmemory=528688\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/forward/CPU/1 thread(s)", + "value": 532604.5, + "unit": "ns", + "extra": "gctime=0\nmemory=524944\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/forward/GPU/CUDA", + "value": 66554, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/zygote/CPU/2 thread(s)", + "value": 963624.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2101424\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/zygote/CPU/4 thread(s)", + "value": 856312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2104640\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/zygote/CPU/8 thread(s)", + "value": 1230417, + "unit": "ns", + "extra": "gctime=0\nmemory=2111072\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/zygote/CPU/1 thread(s)", + "value": 1311562, + "unit": "ns", + "extra": "gctime=0\nmemory=2099840\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=512)/zygote/GPU/CUDA", + "value": 191675, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=321\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 82792, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 80875, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 84083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 82521, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 192735.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7472\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 1915250, + "unit": "ns", + "extra": "gctime=0\nmemory=6318480\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 1909021, + "unit": "ns", + "extra": "gctime=0\nmemory=6318480\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 1928396.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6318480\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1912916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6318480\nallocs=127\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 406192, + "unit": "ns", + "extra": "gctime=0\nmemory=46768\nallocs=1061\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/forward/CPU/2 thread(s)", + "value": 291, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/forward/CPU/4 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/forward/CPU/8 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/forward/CPU/1 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/forward/GPU/CUDA", + "value": 22043, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 1792, + "unit": "ns", + "extra": "gctime=0\nmemory=3584\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 1875, + "unit": "ns", + "extra": "gctime=0\nmemory=3584\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 1917, + "unit": "ns", + "extra": "gctime=0\nmemory=3584\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 1792, + "unit": "ns", + "extra": "gctime=0\nmemory=3584\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=true, act=identity)(2 x 128)/zygote/GPU/CUDA", + "value": 170121.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9392\nallocs=378\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 6083, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 6875, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 9000, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 8375, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 60029.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7264\nallocs=193\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 8875, + "unit": "ns", + "extra": "gctime=0\nmemory=18896\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 9333, + "unit": "ns", + "extra": "gctime=0\nmemory=18896\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 9667, + "unit": "ns", + "extra": "gctime=0\nmemory=18896\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 9333, + "unit": "ns", + "extra": "gctime=0\nmemory=18896\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 304727, + "unit": "ns", + "extra": "gctime=0\nmemory=35624\nallocs=1111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 121958916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 173853917, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 147607125, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 103815750, + "unit": "ns", + "extra": "gctime=296917\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5473827, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 616983729, + "unit": "ns", + "extra": "gctime=136665708.5\nmemory=556544784\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 554296083, + "unit": "ns", + "extra": "gctime=79052709\nmemory=609688144\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 450713625, + "unit": "ns", + "extra": "gctime=51362917\nmemory=715974864\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 754890083, + "unit": "ns", + "extra": "gctime=134631520.5\nmemory=538829680\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 38208156, + "unit": "ns", + "extra": "gctime=0\nmemory=28408\nallocs=850\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 651694958, + "unit": "ns", + "extra": "gctime=0\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 668848604.5, + "unit": "ns", + "extra": "gctime=22616541.5\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 588116250, + "unit": "ns", + "extra": "gctime=21189396\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 750086792, + "unit": "ns", + "extra": "gctime=3418125\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 59458, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 47459, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 47791, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 83958, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 37381, + "unit": "ns", + "extra": "gctime=0\nmemory=2736\nallocs=99\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 1929916, + "unit": "ns", + "extra": "gctime=0\nmemory=6307920\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 1974333, + "unit": "ns", + "extra": "gctime=0\nmemory=6307920\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 1985833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6307920\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1859375, + "unit": "ns", + "extra": "gctime=0\nmemory=6307920\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 173072.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16096\nallocs=495\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 269020.5, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 268292, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 270292, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 267000, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 127268.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9440\nallocs=446\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 587354, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 693917, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 589417, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 581937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2509296\nallocs=149\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 661051, + "unit": "ns", + "extra": "gctime=0\nmemory=53056\nallocs=2380\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 2096834, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 2094750, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 2206250, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 2187125, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 133017, + "unit": "ns", + "extra": "gctime=0\nmemory=9440\nallocs=446\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 5491854, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 5493249.5, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 5515395.5, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 5510750, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 703282.5, + "unit": "ns", + "extra": "gctime=0\nmemory=53056\nallocs=2380\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 645833, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 648583, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 647708, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 650958, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/forward/GPU/CUDA", + "value": 46809.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 1821000, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 1727666.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 1746292, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 2100333, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=gelu)(512 x 128)/zygote/GPU/CUDA", + "value": 221775, + "unit": "ns", + "extra": "gctime=0\nmemory=13656\nallocs=588\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 58333, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 47000, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 46333, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 83625, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 28634, + "unit": "ns", + "extra": "gctime=0\nmemory=2816\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 2035667, + "unit": "ns", + "extra": "gctime=0\nmemory=7356560\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 2083749.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7356560\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 2091208, + "unit": "ns", + "extra": "gctime=0\nmemory=7356560\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1962146, + "unit": "ns", + "extra": "gctime=0\nmemory=7356560\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 189516.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17624\nallocs=574\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 13367687, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 12447041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 12572666, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 15028854.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 514202, + "unit": "ns", + "extra": "gctime=0\nmemory=6128\nallocs=209\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 47344292, + "unit": "ns", + "extra": "gctime=356083\nmemory=42658560\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 41848500.5, + "unit": "ns", + "extra": "gctime=424021.5\nmemory=45983808\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 40966542, + "unit": "ns", + "extra": "gctime=474083\nmemory=52634048\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 58373500, + "unit": "ns", + "extra": "gctime=455083\nmemory=41549664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3196704, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 73579562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 91406041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 90565250, + "unit": "ns", + "extra": "gctime=0\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 76782875, + "unit": "ns", + "extra": "gctime=581916\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 59000, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 46917, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 47333, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 82792, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 46869, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 1927021, + "unit": "ns", + "extra": "gctime=0\nmemory=6307456\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 1714541, + "unit": "ns", + "extra": "gctime=0\nmemory=6307456\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 1977833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6307456\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1884375, + "unit": "ns", + "extra": "gctime=0\nmemory=6307456\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 191330, + "unit": "ns", + "extra": "gctime=0\nmemory=17376\nallocs=557\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 375, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 416, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 32378, + "unit": "ns", + "extra": "gctime=0\nmemory=4016\nallocs=144\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 6209, + "unit": "ns", + "extra": "gctime=0\nmemory=16512\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 6541, + "unit": "ns", + "extra": "gctime=0\nmemory=16512\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 6959, + "unit": "ns", + "extra": "gctime=0\nmemory=16512\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 6333, + "unit": "ns", + "extra": "gctime=0\nmemory=16512\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 168990.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16944\nallocs=551\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/forward/CPU/2 thread(s)", + "value": 250, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/forward/CPU/4 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/forward/CPU/8 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/forward/CPU/1 thread(s)", + "value": 250, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/forward/GPU/CUDA", + "value": 31794, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 2625, + "unit": "ns", + "extra": "gctime=0\nmemory=4288\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 2958, + "unit": "ns", + "extra": "gctime=0\nmemory=4288\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 2916, + "unit": "ns", + "extra": "gctime=0\nmemory=4288\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 2625, + "unit": "ns", + "extra": "gctime=0\nmemory=4288\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=identity)(2 x 128)/zygote/GPU/CUDA", + "value": 156010.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8096\nallocs=328\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 287628499.5, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 340509375, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 315088770.5, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 267551959, + "unit": "ns", + "extra": "gctime=317000\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 7063426, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 1001322625, + "unit": "ns", + "extra": "gctime=151352250\nmemory=682500624\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 944816500, + "unit": "ns", + "extra": "gctime=94023333\nmemory=735643984\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 856957937.5, + "unit": "ns", + "extra": "gctime=85331521\nmemory=841930704\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 1159027042, + "unit": "ns", + "extra": "gctime=168642584\nmemory=664785520\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 34074066, + "unit": "ns", + "extra": "gctime=0\nmemory=29352\nallocs=937\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 1313285104.5, + "unit": "ns", + "extra": "gctime=0\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 1697633000, + "unit": "ns", + "extra": "gctime=22290333\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 1638900292, + "unit": "ns", + "extra": "gctime=21642625\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 1318281104, + "unit": "ns", + "extra": "gctime=3364041.5\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 1410625, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 1407792, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 1412125, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 1407708, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 127251.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8160\nallocs=223\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 5023916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8408304\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 5011417, + "unit": "ns", + "extra": "gctime=0\nmemory=8408304\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 5023520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8408304\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 5000791, + "unit": "ns", + "extra": "gctime=0\nmemory=8408304\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 530171, + "unit": "ns", + "extra": "gctime=0\nmemory=45864\nallocs=1354\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/2 thread(s)", + "value": 168946208, + "unit": "ns", + "extra": "gctime=1196916\nmemory=89286640\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/4 thread(s)", + "value": 132469479, + "unit": "ns", + "extra": "gctime=10739437.5\nmemory=109329168\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/8 thread(s)", + "value": 121413791.5, + "unit": "ns", + "extra": "gctime=19505708.5\nmemory=149414224\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/1 thread(s)", + "value": 161625979.5, + "unit": "ns", + "extra": "gctime=9246187\nmemory=82601568\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/GPU/CUDA", + "value": 4880231, + "unit": "ns", + "extra": "gctime=0\nmemory=140208\nallocs=4465\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/2 thread(s)", + "value": 827387542, + "unit": "ns", + "extra": "gctime=341826375\nmemory=466623152\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/4 thread(s)", + "value": 641003042, + "unit": "ns", + "extra": "gctime=218489791\nmemory=506708208\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/8 thread(s)", + "value": 530713834, + "unit": "ns", + "extra": "gctime=137572208\nmemory=586878320\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/1 thread(s)", + "value": 675863750, + "unit": "ns", + "extra": "gctime=179561667\nmemory=453253008\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/GPU/CUDA", + "value": 16337541, + "unit": "ns", + "extra": "gctime=0\nmemory=815240\nallocs=13293\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/forward/CPU/2 thread(s)", + "value": 9000792, + "unit": "ns", + "extra": "gctime=0\nmemory=33555616\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/forward/CPU/4 thread(s)", + "value": 8791292, + "unit": "ns", + "extra": "gctime=0\nmemory=33556688\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/forward/CPU/8 thread(s)", + "value": 7890062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=33558832\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/forward/CPU/1 thread(s)", + "value": 10164708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=33555088\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/forward/GPU/CUDA", + "value": 1595476, + "unit": "ns", + "extra": "gctime=0\nmemory=576\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/zygote/CPU/2 thread(s)", + "value": 36017917, + "unit": "ns", + "extra": "gctime=990875\nmemory=134222000\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/zygote/CPU/4 thread(s)", + "value": 36663604, + "unit": "ns", + "extra": "gctime=979354.5\nmemory=134225216\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/zygote/CPU/8 thread(s)", + "value": 33249104.5, + "unit": "ns", + "extra": "gctime=1041854\nmemory=134231648\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/zygote/CPU/1 thread(s)", + "value": 38766083.5, + "unit": "ns", + "extra": "gctime=1730812.5\nmemory=134220416\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=32)/zygote/GPU/CUDA", + "value": 6515962, + "unit": "ns", + "extra": "gctime=0\nmemory=7424\nallocs=323\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/forward/CPU/2 thread(s)", + "value": 47250, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/forward/CPU/4 thread(s)", + "value": 47500, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/forward/CPU/8 thread(s)", + "value": 47625, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/forward/CPU/1 thread(s)", + "value": 47416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/forward/GPU/CUDA", + "value": 18983.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 50250, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 50625, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 50625, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 50333, + "unit": "ns", + "extra": "gctime=0\nmemory=50496\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=tanh)(32 x 128)/zygote/GPU/CUDA", + "value": 163825, + "unit": "ns", + "extra": "gctime=0\nmemory=9896\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 8084, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 7125, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 9667, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 8458, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 77103.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7952\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 9375, + "unit": "ns", + "extra": "gctime=0\nmemory=22128\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 10042, + "unit": "ns", + "extra": "gctime=0\nmemory=22128\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 10583, + "unit": "ns", + "extra": "gctime=0\nmemory=22128\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 10000, + "unit": "ns", + "extra": "gctime=0\nmemory=22128\nallocs=104\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=gelu, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 461429.5, + "unit": "ns", + "extra": "gctime=0\nmemory=41640\nallocs=1234\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 7833, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 6771, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 9625, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 8125, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 90885, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 12979, + "unit": "ns", + "extra": "gctime=0\nmemory=44512\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 15208.5, + "unit": "ns", + "extra": "gctime=0\nmemory=44512\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 14041, + "unit": "ns", + "extra": "gctime=0\nmemory=44512\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 13791.5, + "unit": "ns", + "extra": "gctime=0\nmemory=44512\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 419264.5, + "unit": "ns", + "extra": "gctime=0\nmemory=34936\nallocs=1073\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 959, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 1042, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 1083, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 1042, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 32012, + "unit": "ns", + "extra": "gctime=0\nmemory=4016\nallocs=144\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 7833, + "unit": "ns", + "extra": "gctime=0\nmemory=17664\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 8208, + "unit": "ns", + "extra": "gctime=0\nmemory=17664\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 8709, + "unit": "ns", + "extra": "gctime=0\nmemory=17664\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 7958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17664\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 189226.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15832\nallocs=664\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/forward/CPU/2 thread(s)", + "value": 22875, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/forward/CPU/4 thread(s)", + "value": 23292, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/forward/CPU/8 thread(s)", + "value": 23500, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/forward/CPU/1 thread(s)", + "value": 23000, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/forward/GPU/CUDA", + "value": 18216, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 51917, + "unit": "ns", + "extra": "gctime=0\nmemory=68064\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 52792, + "unit": "ns", + "extra": "gctime=0\nmemory=68064\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 52875, + "unit": "ns", + "extra": "gctime=0\nmemory=68064\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 52416, + "unit": "ns", + "extra": "gctime=0\nmemory=68064\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(32, act=gelu)(32 x 128)/zygote/GPU/CUDA", + "value": 223112, + "unit": "ns", + "extra": "gctime=0\nmemory=11384\nallocs=518\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 1402916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 1402812.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 1406208, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 1403604.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 195501, + "unit": "ns", + "extra": "gctime=0\nmemory=7472\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 5020125, + "unit": "ns", + "extra": "gctime=0\nmemory=8404848\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 5010437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8404848\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 5024792, + "unit": "ns", + "extra": "gctime=0\nmemory=8404848\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 5012270.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8404848\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 554008, + "unit": "ns", + "extra": "gctime=0\nmemory=37896\nallocs=1161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 2999416, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2081166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2289708, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4780250, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 584316.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 24335979, + "unit": "ns", + "extra": "gctime=0\nmemory=34790400\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 18882500, + "unit": "ns", + "extra": "gctime=0\nmemory=38115648\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 18838542, + "unit": "ns", + "extra": "gctime=901875\nmemory=44765888\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 36465395.5, + "unit": "ns", + "extra": "gctime=751896\nmemory=33681504\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3208033, + "unit": "ns", + "extra": "gctime=0\nmemory=29864\nallocs=912\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 34044458, + "unit": "ns", + "extra": "gctime=0\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 28308645.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 28011708, + "unit": "ns", + "extra": "gctime=0\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 41401312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/forward/CPU/2 thread(s)", + "value": 144926583, + "unit": "ns", + "extra": "gctime=0\nmemory=536872096\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/forward/CPU/4 thread(s)", + "value": 142567458, + "unit": "ns", + "extra": "gctime=0\nmemory=536873168\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/forward/CPU/8 thread(s)", + "value": 123989708, + "unit": "ns", + "extra": "gctime=0\nmemory=536875312\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/forward/CPU/1 thread(s)", + "value": 174263458.5, + "unit": "ns", + "extra": "gctime=345667\nmemory=536871568\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/forward/GPU/CUDA", + "value": 22544725, + "unit": "ns", + "extra": "gctime=0\nmemory=576\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/zygote/CPU/2 thread(s)", + "value": 1382510875, + "unit": "ns", + "extra": "gctime=395558396\nmemory=2147487920\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/zygote/CPU/4 thread(s)", + "value": 1114547875, + "unit": "ns", + "extra": "gctime=273966104\nmemory=2147491136\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/zygote/CPU/8 thread(s)", + "value": 1238644000, + "unit": "ns", + "extra": "gctime=213265543\nmemory=2147497568\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/zygote/CPU/1 thread(s)", + "value": 669081959, + "unit": "ns", + "extra": "gctime=32384875\nmemory=2147486336\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=512)/zygote/GPU/CUDA", + "value": 118397054.5, + "unit": "ns", + "extra": "gctime=1600997.5\nmemory=7424\nallocs=323\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 73750, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 74062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 76666, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 72500, + "unit": "ns", + "extra": "gctime=0\nmemory=394480\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 212207.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9440\nallocs=446\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 282500, + "unit": "ns", + "extra": "gctime=0\nmemory=2246800\nallocs=143\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 285000, + "unit": "ns", + "extra": "gctime=0\nmemory=2246800\nallocs=143\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 279062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2246800\nallocs=143\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 244104.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2246800\nallocs=143\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 1165691.5, + "unit": "ns", + "extra": "gctime=0\nmemory=50656\nallocs=2256\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/forward/CPU/2 thread(s)", + "value": 35470229.5, + "unit": "ns", + "extra": "gctime=0\nmemory=134218912\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/forward/CPU/4 thread(s)", + "value": 35610729, + "unit": "ns", + "extra": "gctime=0\nmemory=134219984\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/forward/CPU/8 thread(s)", + "value": 32380708, + "unit": "ns", + "extra": "gctime=0\nmemory=134222128\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/forward/CPU/1 thread(s)", + "value": 40906104, + "unit": "ns", + "extra": "gctime=350833\nmemory=134218384\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/forward/GPU/CUDA", + "value": 5839614, + "unit": "ns", + "extra": "gctime=0\nmemory=576\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/zygote/CPU/2 thread(s)", + "value": 147585625, + "unit": "ns", + "extra": "gctime=4910292\nmemory=536875184\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/zygote/CPU/4 thread(s)", + "value": 153161750, + "unit": "ns", + "extra": "gctime=4917354\nmemory=536878400\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/zygote/CPU/8 thread(s)", + "value": 134802500, + "unit": "ns", + "extra": "gctime=4885625\nmemory=536884832\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/zygote/CPU/1 thread(s)", + "value": 286930833, + "unit": "ns", + "extra": "gctime=141309667\nmemory=536873600\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(512, Bsize=128)/zygote/GPU/CUDA", + "value": 34862634, + "unit": "ns", + "extra": "gctime=0\nmemory=7424\nallocs=323\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 122086979, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 174426667, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 148037375, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 103434500, + "unit": "ns", + "extra": "gctime=331396\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5431132, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 469930000, + "unit": "ns", + "extra": "gctime=3848959\nmemory=430584480\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 467117250, + "unit": "ns", + "extra": "gctime=3746708\nmemory=483727840\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 440742583, + "unit": "ns", + "extra": "gctime=52907583\nmemory=590014560\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 740751583, + "unit": "ns", + "extra": "gctime=141072209\nmemory=412869376\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 35158004, + "unit": "ns", + "extra": "gctime=0\nmemory=26960\nallocs=772\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 646279541, + "unit": "ns", + "extra": "gctime=0\nmemory=430580512\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 655203979, + "unit": "ns", + "extra": "gctime=23410021\nmemory=483723872\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 572927666, + "unit": "ns", + "extra": "gctime=21825791.5\nmemory=590010592\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 850524375, + "unit": "ns", + "extra": "gctime=140391167\nmemory=412865408\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1278041, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 973833, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 990208, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 1941459, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/GPU/CUDA", + "value": 582790.5, + "unit": "ns", + "extra": "gctime=0\nmemory=26048\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 2969750, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2465416, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2613271, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3708542, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/GPU/CUDA", + "value": 1712702, + "unit": "ns", + "extra": "gctime=0\nmemory=359712\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 6655667, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 6494792, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 6503416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 4455333.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 7292, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 6000, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 6041, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 9917, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 25550, + "unit": "ns", + "extra": "gctime=0\nmemory=2736\nallocs=99\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 212437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=801664\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 220625, + "unit": "ns", + "extra": "gctime=0\nmemory=801664\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 225708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=801664\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 207291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=801664\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 255206, + "unit": "ns", + "extra": "gctime=0\nmemory=16096\nallocs=495\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/2 thread(s)", + "value": 315688312.5, + "unit": "ns", + "extra": "gctime=22032541.5\nmemory=165113248\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/4 thread(s)", + "value": 223172541, + "unit": "ns", + "extra": "gctime=22824125\nmemory=185155776\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/8 thread(s)", + "value": 190206958.5, + "unit": "ns", + "extra": "gctime=21682770.5\nmemory=225240832\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/1 thread(s)", + "value": 311635083, + "unit": "ns", + "extra": "gctime=20417459\nmemory=158428176\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/GPU/CUDA", + "value": 7671660.5, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/2 thread(s)", + "value": 1080721667, + "unit": "ns", + "extra": "gctime=168738645.5\nmemory=764421648\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/4 thread(s)", + "value": 916406083, + "unit": "ns", + "extra": "gctime=123410520.5\nmemory=804506704\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/8 thread(s)", + "value": 811802750, + "unit": "ns", + "extra": "gctime=77213584\nmemory=884676816\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/1 thread(s)", + "value": 1154276750, + "unit": "ns", + "extra": "gctime=177722813\nmemory=751051504\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/GPU/CUDA", + "value": 26458418, + "unit": "ns", + "extra": "gctime=0\nmemory=814792\nallocs=13283\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 5625, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 6333, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 8125, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 5458, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 163515, + "unit": "ns", + "extra": "gctime=0\nmemory=7264\nallocs=193\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 7042, + "unit": "ns", + "extra": "gctime=0\nmemory=18224\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 7833, + "unit": "ns", + "extra": "gctime=0\nmemory=18224\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 7625, + "unit": "ns", + "extra": "gctime=0\nmemory=18224\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 7625, + "unit": "ns", + "extra": "gctime=0\nmemory=18224\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=relu, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 635993.5, + "unit": "ns", + "extra": "gctime=0\nmemory=34680\nallocs=1057\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 417, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 583, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 584, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 458, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 23629, + "unit": "ns", + "extra": "gctime=0\nmemory=3008\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 9000, + "unit": "ns", + "extra": "gctime=0\nmemory=40640\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 9583, + "unit": "ns", + "extra": "gctime=0\nmemory=40640\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 10000, + "unit": "ns", + "extra": "gctime=0\nmemory=40640\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 8562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=40640\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 232549, + "unit": "ns", + "extra": "gctime=0\nmemory=16000\nallocs=489\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 350979, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 351250, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 351479.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 351771, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/forward/GPU/CUDA", + "value": 21199, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 821375, + "unit": "ns", + "extra": "gctime=0\nmemory=1052384\nallocs=38\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 774000, + "unit": "ns", + "extra": "gctime=0\nmemory=1052384\nallocs=38\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 814249.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1052384\nallocs=38\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 823854, + "unit": "ns", + "extra": "gctime=0\nmemory=1052384\nallocs=38\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=gelu)(512 x 128)/zygote/GPU/CUDA", + "value": 305212.5, + "unit": "ns", + "extra": "gctime=0\nmemory=11384\nallocs=518\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/forward/CPU/2 thread(s)", + "value": 333979, + "unit": "ns", + "extra": "gctime=0\nmemory=33952\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/forward/CPU/4 thread(s)", + "value": 338146, + "unit": "ns", + "extra": "gctime=0\nmemory=35024\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/forward/CPU/8 thread(s)", + "value": 448750, + "unit": "ns", + "extra": "gctime=0\nmemory=37168\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/forward/CPU/1 thread(s)", + "value": 336917, + "unit": "ns", + "extra": "gctime=0\nmemory=33424\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/forward/GPU/CUDA", + "value": 17454, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/zygote/CPU/2 thread(s)", + "value": 691709, + "unit": "ns", + "extra": "gctime=0\nmemory=135344\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/zygote/CPU/4 thread(s)", + "value": 748375, + "unit": "ns", + "extra": "gctime=0\nmemory=138560\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/zygote/CPU/8 thread(s)", + "value": 1025583, + "unit": "ns", + "extra": "gctime=0\nmemory=144992\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/zygote/CPU/1 thread(s)", + "value": 685896, + "unit": "ns", + "extra": "gctime=0\nmemory=133760\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=32)/zygote/GPU/CUDA", + "value": 284244, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=321\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/forward/CPU/2 thread(s)", + "value": 353083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=132256\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/forward/CPU/4 thread(s)", + "value": 350875, + "unit": "ns", + "extra": "gctime=0\nmemory=133328\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/forward/CPU/8 thread(s)", + "value": 433770.5, + "unit": "ns", + "extra": "gctime=0\nmemory=135472\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/forward/CPU/1 thread(s)", + "value": 379520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=131728\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/forward/GPU/CUDA", + "value": 22107, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/zygote/CPU/2 thread(s)", + "value": 754208, + "unit": "ns", + "extra": "gctime=0\nmemory=528560\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/zygote/CPU/4 thread(s)", + "value": 751666, + "unit": "ns", + "extra": "gctime=0\nmemory=531776\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/zygote/CPU/8 thread(s)", + "value": 1064541.5, + "unit": "ns", + "extra": "gctime=0\nmemory=538208\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/zygote/CPU/1 thread(s)", + "value": 821250, + "unit": "ns", + "extra": "gctime=0\nmemory=526976\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=128)/zygote/GPU/CUDA", + "value": 223088.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=321\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/forward/CPU/2 thread(s)", + "value": 3333, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/forward/CPU/4 thread(s)", + "value": 3500, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/forward/CPU/8 thread(s)", + "value": 3791, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/forward/CPU/1 thread(s)", + "value": 3583, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/forward/GPU/CUDA", + "value": 17921, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 4208, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 4541, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 4375, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 4458, + "unit": "ns", + "extra": "gctime=0\nmemory=3712\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=tanh)(2 x 128)/zygote/GPU/CUDA", + "value": 285329, + "unit": "ns", + "extra": "gctime=0\nmemory=8952\nallocs=406\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 4458.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 4000, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 6125, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 3333, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 222015.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=376\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 8125, + "unit": "ns", + "extra": "gctime=0\nmemory=16720\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 8625, + "unit": "ns", + "extra": "gctime=0\nmemory=16720\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 9250, + "unit": "ns", + "extra": "gctime=0\nmemory=16720\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 8500, + "unit": "ns", + "extra": "gctime=0\nmemory=16720\nallocs=86\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 1221788, + "unit": "ns", + "extra": "gctime=0\nmemory=44832\nallocs=2081\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 202834, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 210459, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 209708, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 200375, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 34798, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 628000, + "unit": "ns", + "extra": "gctime=0\nmemory=1063696\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 624875.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1063696\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 633791, + "unit": "ns", + "extra": "gctime=0\nmemory=1063696\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 628750, + "unit": "ns", + "extra": "gctime=0\nmemory=1063696\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 343726, + "unit": "ns", + "extra": "gctime=0\nmemory=16008\nallocs=670\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/forward/CPU/2 thread(s)", + "value": 959833, + "unit": "ns", + "extra": "gctime=0\nmemory=8389792\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/forward/CPU/4 thread(s)", + "value": 938500, + "unit": "ns", + "extra": "gctime=0\nmemory=8390864\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/forward/CPU/8 thread(s)", + "value": 948167, + "unit": "ns", + "extra": "gctime=0\nmemory=8393008\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/forward/CPU/1 thread(s)", + "value": 1293584, + "unit": "ns", + "extra": "gctime=0\nmemory=8389264\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/forward/GPU/CUDA", + "value": 207285, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/zygote/CPU/2 thread(s)", + "value": 4498417, + "unit": "ns", + "extra": "gctime=0\nmemory=33558704\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/zygote/CPU/4 thread(s)", + "value": 4485958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=33561920\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/zygote/CPU/8 thread(s)", + "value": 4301916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=33568352\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/zygote/CPU/1 thread(s)", + "value": 6237771, + "unit": "ns", + "extra": "gctime=802395.5\nmemory=33557120\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=128)/zygote/GPU/CUDA", + "value": 977432, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=321\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 3541.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 4000, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 6520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 3270.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 218695, + "unit": "ns", + "extra": "gctime=0\nmemory=6432\nallocs=316\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 6916, + "unit": "ns", + "extra": "gctime=0\nmemory=12560\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 7958, + "unit": "ns", + "extra": "gctime=0\nmemory=12560\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 7770.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12560\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 7084, + "unit": "ns", + "extra": "gctime=0\nmemory=12560\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 1002409, + "unit": "ns", + "extra": "gctime=0\nmemory=41376\nallocs=1916\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1587875, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1157417, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1362500, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2449833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 212742.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6592\nallocs=197\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12321000, + "unit": "ns", + "extra": "gctime=0\nmemory=13466816\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 9541250, + "unit": "ns", + "extra": "gctime=0\nmemory=15131392\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9282937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=18460288\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 17977708, + "unit": "ns", + "extra": "gctime=0\nmemory=12911392\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1958143, + "unit": "ns", + "extra": "gctime=0\nmemory=28416\nallocs=834\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17266875, + "unit": "ns", + "extra": "gctime=0\nmemory=13463104\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14360083, + "unit": "ns", + "extra": "gctime=0\nmemory=15127680\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14299667, + "unit": "ns", + "extra": "gctime=0\nmemory=18456576\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21040375.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12907680\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 92000, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 88708, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 93500, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 90750, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 126341, + "unit": "ns", + "extra": "gctime=0\nmemory=8160\nallocs=223\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 2026042, + "unit": "ns", + "extra": "gctime=0\nmemory=7359616\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 1932750, + "unit": "ns", + "extra": "gctime=0\nmemory=7359616\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 2038687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7359616\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 2023042, + "unit": "ns", + "extra": "gctime=0\nmemory=7359616\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 1038826, + "unit": "ns", + "extra": "gctime=0\nmemory=44920\nallocs=1300\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/forward/CPU/2 thread(s)", + "value": 342396, + "unit": "ns", + "extra": "gctime=0\nmemory=1248\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/forward/CPU/4 thread(s)", + "value": 348521, + "unit": "ns", + "extra": "gctime=0\nmemory=2320\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/forward/CPU/8 thread(s)", + "value": 398542, + "unit": "ns", + "extra": "gctime=0\nmemory=4464\nallocs=42\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/forward/CPU/1 thread(s)", + "value": 315291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=720\nallocs=7\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/forward/GPU/CUDA", + "value": 15350, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/zygote/CPU/2 thread(s)", + "value": 702708, + "unit": "ns", + "extra": "gctime=0\nmemory=4528\nallocs=58\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/zygote/CPU/4 thread(s)", + "value": 735833, + "unit": "ns", + "extra": "gctime=0\nmemory=7744\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/zygote/CPU/8 thread(s)", + "value": 1022333, + "unit": "ns", + "extra": "gctime=0\nmemory=14176\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/zygote/CPU/1 thread(s)", + "value": 642041, + "unit": "ns", + "extra": "gctime=0\nmemory=2944\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=4)/zygote/GPU/CUDA", + "value": 195993, + "unit": "ns", + "extra": "gctime=0\nmemory=6416\nallocs=285\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 7250, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 6000, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 5875, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 10000, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 34698, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 212229.5, + "unit": "ns", + "extra": "gctime=0\nmemory=801424\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 221334, + "unit": "ns", + "extra": "gctime=0\nmemory=801424\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 223542, + "unit": "ns", + "extra": "gctime=0\nmemory=801424\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 214291, + "unit": "ns", + "extra": "gctime=0\nmemory=801424\nallocs=105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=identity, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 330238.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17376\nallocs=557\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/forward/CPU/2 thread(s)", + "value": 3708, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/forward/CPU/4 thread(s)", + "value": 3708, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/forward/CPU/8 thread(s)", + "value": 3625, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/forward/CPU/1 thread(s)", + "value": 3667, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/forward/GPU/CUDA", + "value": 23068, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 14459, + "unit": "ns", + "extra": "gctime=0\nmemory=54640\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 14416, + "unit": "ns", + "extra": "gctime=0\nmemory=54640\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 14375, + "unit": "ns", + "extra": "gctime=0\nmemory=54640\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 14416, + "unit": "ns", + "extra": "gctime=0\nmemory=54640\nallocs=9\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=identity)(32 x 128)/zygote/GPU/CUDA", + "value": 475565.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10336\nallocs=413\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 95667, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 93084, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 98228.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 142542, + "unit": "ns", + "extra": "gctime=0\nmemory=1052048\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 125649, + "unit": "ns", + "extra": "gctime=0\nmemory=8160\nallocs=223\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 1928708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6310928\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 1918770.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6310928\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 1920229.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6310928\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1922417, + "unit": "ns", + "extra": "gctime=0\nmemory=6310928\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 986877, + "unit": "ns", + "extra": "gctime=0\nmemory=43392\nallocs=1221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/2 thread(s)", + "value": 870500, + "unit": "ns", + "extra": "gctime=0\nmemory=947952\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/4 thread(s)", + "value": 822542, + "unit": "ns", + "extra": "gctime=0\nmemory=1239856\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/8 thread(s)", + "value": 1224667, + "unit": "ns", + "extra": "gctime=0\nmemory=1823664\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/1 thread(s)", + "value": 959708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=850000\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/GPU/CUDA", + "value": 280652, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/2 thread(s)", + "value": 2768000, + "unit": "ns", + "extra": "gctime=0\nmemory=2951280\nallocs=548\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/4 thread(s)", + "value": 2463583, + "unit": "ns", + "extra": "gctime=0\nmemory=3535088\nallocs=588\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/8 thread(s)", + "value": 3332229, + "unit": "ns", + "extra": "gctime=0\nmemory=4702704\nallocs=668\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/1 thread(s)", + "value": 3408208, + "unit": "ns", + "extra": "gctime=0\nmemory=2755376\nallocs=528\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/GPU/CUDA", + "value": 1607675.5, + "unit": "ns", + "extra": "gctime=0\nmemory=140544\nallocs=2606\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 17166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 15250, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 20000, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 17125, + "unit": "ns", + "extra": "gctime=0\nmemory=134288\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 144011, + "unit": "ns", + "extra": "gctime=0\nmemory=7456\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 259584, + "unit": "ns", + "extra": "gctime=0\nmemory=933632\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 223937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=933632\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 217584, + "unit": "ns", + "extra": "gctime=0\nmemory=933632\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 256291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=933632\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 639995.5, + "unit": "ns", + "extra": "gctime=0\nmemory=36920\nallocs=1105\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 220833, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 220916, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 223833, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 222000, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 270421, + "unit": "ns", + "extra": "gctime=0\nmemory=8480\nallocs=386\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 506875, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 560584, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 503459, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 524208, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 1352767, + "unit": "ns", + "extra": "gctime=0\nmemory=49408\nallocs=2215\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/forward/CPU/2 thread(s)", + "value": 328125, + "unit": "ns", + "extra": "gctime=0\nmemory=5472\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/forward/CPU/4 thread(s)", + "value": 340334, + "unit": "ns", + "extra": "gctime=0\nmemory=6544\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/forward/CPU/8 thread(s)", + "value": 381292, + "unit": "ns", + "extra": "gctime=0\nmemory=8688\nallocs=42\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/forward/CPU/1 thread(s)", + "value": 328083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4944\nallocs=7\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/forward/GPU/CUDA", + "value": 16321, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/zygote/CPU/2 thread(s)", + "value": 715937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=21424\nallocs=58\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/zygote/CPU/4 thread(s)", + "value": 730917, + "unit": "ns", + "extra": "gctime=0\nmemory=24640\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/zygote/CPU/8 thread(s)", + "value": 1019083, + "unit": "ns", + "extra": "gctime=0\nmemory=31072\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/zygote/CPU/1 thread(s)", + "value": 654375, + "unit": "ns", + "extra": "gctime=0\nmemory=19840\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(16, Bsize=4)/zygote/GPU/CUDA", + "value": 194891, + "unit": "ns", + "extra": "gctime=0\nmemory=6416\nallocs=285\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 17625, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 17833, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 20625, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 18500, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 144691, + "unit": "ns", + "extra": "gctime=0\nmemory=8144\nallocs=222\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 211958, + "unit": "ns", + "extra": "gctime=0\nmemory=805680\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 211770.5, + "unit": "ns", + "extra": "gctime=0\nmemory=805680\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 213792, + "unit": "ns", + "extra": "gctime=0\nmemory=805680\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 224209, + "unit": "ns", + "extra": "gctime=0\nmemory=805680\nallocs=117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=identity, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 927744, + "unit": "ns", + "extra": "gctime=0\nmemory=43360\nallocs=1219\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 6083, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 5333, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 7333, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 6375, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 195900.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6432\nallocs=316\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 10125, + "unit": "ns", + "extra": "gctime=0\nmemory=42768\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 10959, + "unit": "ns", + "extra": "gctime=0\nmemory=42768\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 11167, + "unit": "ns", + "extra": "gctime=0\nmemory=42768\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 10250, + "unit": "ns", + "extra": "gctime=0\nmemory=42768\nallocs=85\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=identity, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 1052511.5, + "unit": "ns", + "extra": "gctime=0\nmemory=41376\nallocs=1916\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 3084, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 3500, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 6208, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 2833, + "unit": "ns", + "extra": "gctime=0\nmemory=1632\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 237907.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6432\nallocs=316\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 7125, + "unit": "ns", + "extra": "gctime=0\nmemory=13872\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 7833, + "unit": "ns", + "extra": "gctime=0\nmemory=13872\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 8000, + "unit": "ns", + "extra": "gctime=0\nmemory=13872\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 7375, + "unit": "ns", + "extra": "gctime=0\nmemory=13872\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=relu, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 1089693, + "unit": "ns", + "extra": "gctime=0\nmemory=43616\nallocs=2020\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23765688, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 34116708, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 37548791, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34871603.5, + "unit": "ns", + "extra": "gctime=326687\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1848733, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 184574541, + "unit": "ns", + "extra": "gctime=3554875\nmemory=215263264\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 158637292, + "unit": "ns", + "extra": "gctime=2537334\nmemory=241836896\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 146492249.5, + "unit": "ns", + "extra": "gctime=2717604.5\nmemory=294984160\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 412825666, + "unit": "ns", + "extra": "gctime=141827500\nmemory=206404736\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 16516963.5, + "unit": "ns", + "extra": "gctime=0\nmemory=28656\nallocs=849\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 428413125, + "unit": "ns", + "extra": "gctime=165364917\nmemory=215259424\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 245145812.5, + "unit": "ns", + "extra": "gctime=2967770.5\nmemory=241833056\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 232582250, + "unit": "ns", + "extra": "gctime=2973500\nmemory=294980320\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 482166083, + "unit": "ns", + "extra": "gctime=160809791\nmemory=206400896\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 182208, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 182167, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 187187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 184125, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 228276.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8144\nallocs=222\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 596834, + "unit": "ns", + "extra": "gctime=0\nmemory=1068048\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 635292, + "unit": "ns", + "extra": "gctime=0\nmemory=1068048\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 596083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1068048\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 631333.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1068048\nallocs=123\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 1083975, + "unit": "ns", + "extra": "gctime=0\nmemory=45832\nallocs=1352\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/forward/CPU/2 thread(s)", + "value": 3833209, + "unit": "ns", + "extra": "gctime=0\nmemory=33555616\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/forward/CPU/4 thread(s)", + "value": 3802458.5, + "unit": "ns", + "extra": "gctime=0\nmemory=33556688\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/forward/CPU/8 thread(s)", + "value": 3469041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=33558832\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/forward/CPU/1 thread(s)", + "value": 5353229, + "unit": "ns", + "extra": "gctime=0\nmemory=33555088\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/forward/GPU/CUDA", + "value": 538332.5, + "unit": "ns", + "extra": "gctime=0\nmemory=576\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/zygote/CPU/2 thread(s)", + "value": 17399875, + "unit": "ns", + "extra": "gctime=993417\nmemory=134222000\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/zygote/CPU/4 thread(s)", + "value": 17212083, + "unit": "ns", + "extra": "gctime=1000250\nmemory=134225216\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/zygote/CPU/8 thread(s)", + "value": 16570812.5, + "unit": "ns", + "extra": "gctime=989708\nmemory=134231648\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/zygote/CPU/1 thread(s)", + "value": 22200875, + "unit": "ns", + "extra": "gctime=2286667\nmemory=134220416\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=512)/zygote/GPU/CUDA", + "value": 2641650, + "unit": "ns", + "extra": "gctime=0\nmemory=7424\nallocs=323\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 417, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 583, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 583, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 32777, + "unit": "ns", + "extra": "gctime=0\nmemory=4080\nallocs=144\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 8792, + "unit": "ns", + "extra": "gctime=0\nmemory=44528\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 9437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=44528\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 9875, + "unit": "ns", + "extra": "gctime=0\nmemory=44528\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 8750, + "unit": "ns", + "extra": "gctime=0\nmemory=44528\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 268383.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14888\nallocs=609\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/2 thread(s)", + "value": 505323209, + "unit": "ns", + "extra": "gctime=22747625\nmemory=316766112\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/4 thread(s)", + "value": 430592979, + "unit": "ns", + "extra": "gctime=60994521\nmemory=336808640\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/8 thread(s)", + "value": 372723313, + "unit": "ns", + "extra": "gctime=67794313\nmemory=376893696\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/1 thread(s)", + "value": 593451250, + "unit": "ns", + "extra": "gctime=0\nmemory=310081040\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/GPU/CUDA", + "value": 12484712, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/2 thread(s)", + "value": 2052799291.5, + "unit": "ns", + "extra": "gctime=342789208.5\nmemory=1360017936\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/4 thread(s)", + "value": 1634023584, + "unit": "ns", + "extra": "gctime=112967833\nmemory=1400102992\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/8 thread(s)", + "value": 1488799646, + "unit": "ns", + "extra": "gctime=82122104.5\nmemory=1480273104\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/1 thread(s)", + "value": 2213028333.5, + "unit": "ns", + "extra": "gctime=256107104\nmemory=1346647792\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/GPU/CUDA", + "value": 49227788.5, + "unit": "ns", + "extra": "gctime=0\nmemory=815736\nallocs=13318\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1616500, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1177979, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1381750, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2503313, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 216079, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12726146.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17403184\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 9927958, + "unit": "ns", + "extra": "gctime=0\nmemory=19067760\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9644812, + "unit": "ns", + "extra": "gctime=0\nmemory=22396656\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 18434917, + "unit": "ns", + "extra": "gctime=0\nmemory=16847760\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 2044196, + "unit": "ns", + "extra": "gctime=0\nmemory=30008\nallocs=913\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17630500.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14697728.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14551916, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21455334, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/forward/CPU/2 thread(s)", + "value": 26208, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/forward/CPU/4 thread(s)", + "value": 26209, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/forward/CPU/8 thread(s)", + "value": 26375, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/forward/CPU/1 thread(s)", + "value": 26167, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/forward/GPU/CUDA", + "value": 24313, + "unit": "ns", + "extra": "gctime=0\nmemory=960\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 67125, + "unit": "ns", + "extra": "gctime=0\nmemory=88640\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 66958, + "unit": "ns", + "extra": "gctime=0\nmemory=88640\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 68417, + "unit": "ns", + "extra": "gctime=0\nmemory=88640\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 66833, + "unit": "ns", + "extra": "gctime=0\nmemory=88640\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=gelu)(32 x 128)/zygote/GPU/CUDA", + "value": 408721.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12040\nallocs=518\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 203333, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 209334, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 210375, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 198959, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 26930, + "unit": "ns", + "extra": "gctime=0\nmemory=2736\nallocs=99\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 606708, + "unit": "ns", + "extra": "gctime=0\nmemory=1063936\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 670042, + "unit": "ns", + "extra": "gctime=0\nmemory=1063936\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 666042, + "unit": "ns", + "extra": "gctime=0\nmemory=1063936\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 630875, + "unit": "ns", + "extra": "gctime=0\nmemory=1063936\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 354960.5, + "unit": "ns", + "extra": "gctime=0\nmemory=18808\nallocs=628\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 604375, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 540500, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 645667, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 638000, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 132024, + "unit": "ns", + "extra": "gctime=0\nmemory=9440\nallocs=446\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 2256750, + "unit": "ns", + "extra": "gctime=0\nmemory=17901328\nallocs=145\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 2054209, + "unit": "ns", + "extra": "gctime=0\nmemory=17901328\nallocs=145\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 2228916, + "unit": "ns", + "extra": "gctime=0\nmemory=17901328\nallocs=145\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 2248437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17901328\nallocs=145\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=identity, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 1184553, + "unit": "ns", + "extra": "gctime=0\nmemory=50656\nallocs=2256\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 18000, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 16917, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 23250, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 16917, + "unit": "ns", + "extra": "gctime=0\nmemory=134544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 146620, + "unit": "ns", + "extra": "gctime=0\nmemory=8144\nallocs=222\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 230417, + "unit": "ns", + "extra": "gctime=0\nmemory=936864\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 230104.5, + "unit": "ns", + "extra": "gctime=0\nmemory=936864\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 260042, + "unit": "ns", + "extra": "gctime=0\nmemory=936864\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 262271, + "unit": "ns", + "extra": "gctime=0\nmemory=936864\nallocs=120\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=true)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 1058952, + "unit": "ns", + "extra": "gctime=0\nmemory=44888\nallocs=1298\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 459, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 583, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 584, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 500, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 23956, + "unit": "ns", + "extra": "gctime=0\nmemory=3072\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 9250, + "unit": "ns", + "extra": "gctime=0\nmemory=44992\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 9750, + "unit": "ns", + "extra": "gctime=0\nmemory=44992\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 10292, + "unit": "ns", + "extra": "gctime=0\nmemory=44992\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 9792, + "unit": "ns", + "extra": "gctime=0\nmemory=44992\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 261926, + "unit": "ns", + "extra": "gctime=0\nmemory=17496\nallocs=567\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 5625, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 6333, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 8083, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 7792, + "unit": "ns", + "extra": "gctime=0\nmemory=3360\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 235109, + "unit": "ns", + "extra": "gctime=0\nmemory=7264\nallocs=193\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 7042, + "unit": "ns", + "extra": "gctime=0\nmemory=17552\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 7625, + "unit": "ns", + "extra": "gctime=0\nmemory=17552\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 7792, + "unit": "ns", + "extra": "gctime=0\nmemory=17552\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 7459, + "unit": "ns", + "extra": "gctime=0\nmemory=17552\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 809933, + "unit": "ns", + "extra": "gctime=0\nmemory=33152\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/forward/CPU/2 thread(s)", + "value": 1916, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/forward/CPU/4 thread(s)", + "value": 2208, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/forward/CPU/8 thread(s)", + "value": 2416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/forward/CPU/1 thread(s)", + "value": 2125, + "unit": "ns", + "extra": "gctime=0\nmemory=1280\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/forward/GPU/CUDA", + "value": 17938, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 6542, + "unit": "ns", + "extra": "gctime=0\nmemory=5728\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 6625, + "unit": "ns", + "extra": "gctime=0\nmemory=5728\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 6708, + "unit": "ns", + "extra": "gctime=0\nmemory=5728\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 6709, + "unit": "ns", + "extra": "gctime=0\nmemory=5728\nallocs=34\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(2, act=gelu)(2 x 128)/zygote/GPU/CUDA", + "value": 333610.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10440\nallocs=483\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/forward/CPU/2 thread(s)", + "value": 746541, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/forward/CPU/4 thread(s)", + "value": 748729, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/forward/CPU/8 thread(s)", + "value": 749667, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/forward/CPU/1 thread(s)", + "value": 747333.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/forward/GPU/CUDA", + "value": 21921, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 791604.5, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 776416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 791708, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 790958, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=tanh)(512 x 128)/zygote/GPU/CUDA", + "value": 297538.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9896\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 7250, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 5833, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 3875, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 10042, + "unit": "ns", + "extra": "gctime=0\nmemory=132096\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 34255, + "unit": "ns", + "extra": "gctime=0\nmemory=3696\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 228167, + "unit": "ns", + "extra": "gctime=0\nmemory=932560\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 227333, + "unit": "ns", + "extra": "gctime=0\nmemory=932560\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 269083, + "unit": "ns", + "extra": "gctime=0\nmemory=932560\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 254291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=932560\nallocs=107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 364412.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15064\nallocs=616\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 12334, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 10417, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 13041, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 10416, + "unit": "ns", + "extra": "gctime=0\nmemory=5408\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 249045.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6432\nallocs=316\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 24667, + "unit": "ns", + "extra": "gctime=0\nmemory=53696\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 24583, + "unit": "ns", + "extra": "gctime=0\nmemory=53696\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 26458.5, + "unit": "ns", + "extra": "gctime=0\nmemory=53696\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 24750, + "unit": "ns", + "extra": "gctime=0\nmemory=53696\nallocs=102\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 1131953, + "unit": "ns", + "extra": "gctime=0\nmemory=46336\nallocs=2066\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 106536125, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 117252875.5, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 120784042, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 117622625, + "unit": "ns", + "extra": "gctime=330667\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 2632972, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 394299334, + "unit": "ns", + "extra": "gctime=22349167\nmemory=341219216\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 367861709, + "unit": "ns", + "extra": "gctime=22047792\nmemory=367792848\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 421890563, + "unit": "ns", + "extra": "gctime=90839020.5\nmemory=420940112\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 488934791, + "unit": "ns", + "extra": "gctime=22626208\nmemory=332360688\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 15275033, + "unit": "ns", + "extra": "gctime=0\nmemory=31048\nallocs=1014\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 759409875, + "unit": "ns", + "extra": "gctime=165555062.5\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 757340667, + "unit": "ns", + "extra": "gctime=3572834\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 744212312.5, + "unit": "ns", + "extra": "gctime=3003312.5\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 765863541.5, + "unit": "ns", + "extra": "gctime=162010500.5\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 7666, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 6959, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 8708, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 7250, + "unit": "ns", + "extra": "gctime=0\nmemory=7344\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 239312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8080\nallocs=225\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 13542, + "unit": "ns", + "extra": "gctime=0\nmemory=43568\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 13875, + "unit": "ns", + "extra": "gctime=0\nmemory=43568\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 14875, + "unit": "ns", + "extra": "gctime=0\nmemory=43568\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 14000, + "unit": "ns", + "extra": "gctime=0\nmemory=43568\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 1090703, + "unit": "ns", + "extra": "gctime=0\nmemory=39424\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/2 thread(s)", + "value": 7625, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/4 thread(s)", + "value": 7937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/8 thread(s)", + "value": 9583, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/forward/CPU/1 thread(s)", + "value": 8459, + "unit": "ns", + "extra": "gctime=0\nmemory=7088\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/forward/GPU/CUDA", + "value": 237956.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 12125, + "unit": "ns", + "extra": "gctime=0\nmemory=40112\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 12666, + "unit": "ns", + "extra": "gctime=0\nmemory=40112\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 13375, + "unit": "ns", + "extra": "gctime=0\nmemory=40112\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 12417, + "unit": "ns", + "extra": "gctime=0\nmemory=40112\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=false)(32 x 32)/zygote/GPU/CUDA", + "value": 799124.5, + "unit": "ns", + "extra": "gctime=0\nmemory=33408\nallocs=994\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/forward/CPU/2 thread(s)", + "value": 342375, + "unit": "ns", + "extra": "gctime=0\nmemory=3424\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/forward/CPU/4 thread(s)", + "value": 347229.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4496\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/forward/CPU/8 thread(s)", + "value": 395334, + "unit": "ns", + "extra": "gctime=0\nmemory=6640\nallocs=42\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/forward/CPU/1 thread(s)", + "value": 320000, + "unit": "ns", + "extra": "gctime=0\nmemory=2896\nallocs=7\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/forward/GPU/CUDA", + "value": 16875, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/zygote/CPU/2 thread(s)", + "value": 702437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13232\nallocs=58\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/zygote/CPU/4 thread(s)", + "value": 736167, + "unit": "ns", + "extra": "gctime=0\nmemory=16448\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/zygote/CPU/8 thread(s)", + "value": 1020125, + "unit": "ns", + "extra": "gctime=0\nmemory=22880\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/zygote/CPU/1 thread(s)", + "value": 650833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=11648\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=128)/zygote/GPU/CUDA", + "value": 201562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6416\nallocs=285\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 291, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 375, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 375, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 23767, + "unit": "ns", + "extra": "gctime=0\nmemory=3008\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 6334, + "unit": "ns", + "extra": "gctime=0\nmemory=16752\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 6625, + "unit": "ns", + "extra": "gctime=0\nmemory=16752\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 6917, + "unit": "ns", + "extra": "gctime=0\nmemory=16752\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 6292, + "unit": "ns", + "extra": "gctime=0\nmemory=16752\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=identity, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 243851, + "unit": "ns", + "extra": "gctime=0\nmemory=16000\nallocs=489\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/2 thread(s)", + "value": 5708, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/4 thread(s)", + "value": 5792, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/8 thread(s)", + "value": 5875, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/forward/CPU/1 thread(s)", + "value": 5708, + "unit": "ns", + "extra": "gctime=0\nmemory=5440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/forward/GPU/CUDA", + "value": 25012, + "unit": "ns", + "extra": "gctime=0\nmemory=3008\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/2 thread(s)", + "value": 23750, + "unit": "ns", + "extra": "gctime=0\nmemory=49344\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/4 thread(s)", + "value": 21416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=49344\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/8 thread(s)", + "value": 21875, + "unit": "ns", + "extra": "gctime=0\nmemory=49344\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/zygote/CPU/1 thread(s)", + "value": 21395.5, + "unit": "ns", + "extra": "gctime=0\nmemory=49344\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(32 x 32)/zygote/GPU/CUDA", + "value": 265929, + "unit": "ns", + "extra": "gctime=0\nmemory=18440\nallocs=622\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 147104.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 144042, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 150708, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 145416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 167550.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8480\nallocs=386\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 1335979.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 1317041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 1303209, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 1321541, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 1351455.5, + "unit": "ns", + "extra": "gctime=0\nmemory=49168\nallocs=2196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/2 thread(s)", + "value": 24500, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/4 thread(s)", + "value": 23792, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/8 thread(s)", + "value": 25791, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/CPU/1 thread(s)", + "value": 24396, + "unit": "ns", + "extra": "gctime=0\nmemory=132624\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/forward/GPU/CUDA", + "value": 352723.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8480\nallocs=386\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/2 thread(s)", + "value": 181833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/4 thread(s)", + "value": 179333, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/8 thread(s)", + "value": 128333, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/CPU/1 thread(s)", + "value": 130104.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1321456\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=false)(16 x 16 x 4 x 32)/zygote/GPU/CUDA", + "value": 1448170.5, + "unit": "ns", + "extra": "gctime=0\nmemory=49168\nallocs=2196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 250, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 375, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 416, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 22864, + "unit": "ns", + "extra": "gctime=0\nmemory=3072\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 6208, + "unit": "ns", + "extra": "gctime=0\nmemory=17328\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 6604.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17328\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 6875, + "unit": "ns", + "extra": "gctime=0\nmemory=17328\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 6417, + "unit": "ns", + "extra": "gctime=0\nmemory=17328\nallocs=89\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 254855.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17496\nallocs=567\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 4833, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 4979, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 7000, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 4583, + "unit": "ns", + "extra": "gctime=0\nmemory=2400\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 253659, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=376\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 9958, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 10125, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 10520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 10125, + "unit": "ns", + "extra": "gctime=0\nmemory=17872\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(2, act=gelu, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 1348895, + "unit": "ns", + "extra": "gctime=0\nmemory=47360\nallocs=2205\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/forward/CPU/2 thread(s)", + "value": 1584, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/forward/CPU/4 thread(s)", + "value": 1625, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/forward/CPU/8 thread(s)", + "value": 1583, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/forward/CPU/1 thread(s)", + "value": 1583, + "unit": "ns", + "extra": "gctime=0\nmemory=1088\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/forward/GPU/CUDA", + "value": 22776, + "unit": "ns", + "extra": "gctime=0\nmemory=960\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/zygote/CPU/2 thread(s)", + "value": 5708, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/zygote/CPU/4 thread(s)", + "value": 5958, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/zygote/CPU/8 thread(s)", + "value": 6042, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/zygote/CPU/1 thread(s)", + "value": 5625, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=27\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(2, bias=false, act=gelu)(2 x 128)/zygote/GPU/CUDA", + "value": 272052, + "unit": "ns", + "extra": "gctime=0\nmemory=11096\nallocs=483\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 6789167, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 6396375, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 6536083, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 7542208, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 212440, + "unit": "ns", + "extra": "gctime=0\nmemory=6112\nallocs=208\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 24091354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=21335088\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 21305541, + "unit": "ns", + "extra": "gctime=0\nmemory=22999664\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 21007521, + "unit": "ns", + "extra": "gctime=0\nmemory=26328560\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 29773354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=20779664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 2122649, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 37337124.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 45672500.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 45753542, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 38138500, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 7375, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 6729.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 8520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 7250, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=36\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 238000, + "unit": "ns", + "extra": "gctime=0\nmemory=7952\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 8042, + "unit": "ns", + "extra": "gctime=0\nmemory=20784\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 8334, + "unit": "ns", + "extra": "gctime=0\nmemory=20784\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 9250, + "unit": "ns", + "extra": "gctime=0\nmemory=20784\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 8542, + "unit": "ns", + "extra": "gctime=0\nmemory=20784\nallocs=100\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(2, act=identity, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 1071201, + "unit": "ns", + "extra": "gctime=0\nmemory=39168\nallocs=1101\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/2 thread(s)", + "value": 1501083, + "unit": "ns", + "extra": "gctime=0\nmemory=3180288\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/4 thread(s)", + "value": 1262125, + "unit": "ns", + "extra": "gctime=0\nmemory=3472192\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/8 thread(s)", + "value": 1631083, + "unit": "ns", + "extra": "gctime=0\nmemory=4056000\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/1 thread(s)", + "value": 2165291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3082336\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/GPU/CUDA", + "value": 282584, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/2 thread(s)", + "value": 7848208, + "unit": "ns", + "extra": "gctime=0\nmemory=9515248\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/4 thread(s)", + "value": 6228916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10099056\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/8 thread(s)", + "value": 7164000, + "unit": "ns", + "extra": "gctime=0\nmemory=11266672\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/1 thread(s)", + "value": 10495396, + "unit": "ns", + "extra": "gctime=0\nmemory=9319344\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/GPU/CUDA", + "value": 1889865, + "unit": "ns", + "extra": "gctime=0\nmemory=141600\nallocs=2648\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/forward/CPU/2 thread(s)", + "value": 336187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=263328\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/forward/CPU/4 thread(s)", + "value": 351479, + "unit": "ns", + "extra": "gctime=0\nmemory=264400\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/forward/CPU/8 thread(s)", + "value": 397354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=266544\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/forward/CPU/1 thread(s)", + "value": 346166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262800\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/forward/GPU/CUDA", + "value": 42753.5, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/zygote/CPU/2 thread(s)", + "value": 746334, + "unit": "ns", + "extra": "gctime=0\nmemory=1052848\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/zygote/CPU/4 thread(s)", + "value": 790062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1056064\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/zygote/CPU/8 thread(s)", + "value": 1079583, + "unit": "ns", + "extra": "gctime=0\nmemory=1062496\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/zygote/CPU/1 thread(s)", + "value": 740292, + "unit": "ns", + "extra": "gctime=0\nmemory=1051264\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=4)/zygote/GPU/CUDA", + "value": 308349.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=321\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/forward/CPU/2 thread(s)", + "value": 397125, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/forward/CPU/4 thread(s)", + "value": 288791, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/forward/CPU/8 thread(s)", + "value": 288208, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/forward/CPU/1 thread(s)", + "value": 749667, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/forward/GPU/CUDA", + "value": 44616.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 672395.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1837680\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 529625, + "unit": "ns", + "extra": "gctime=0\nmemory=1837680\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 529667, + "unit": "ns", + "extra": "gctime=0\nmemory=1837680\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 987916, + "unit": "ns", + "extra": "gctime=0\nmemory=1837680\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=identity)(512 x 128)/zygote/GPU/CUDA", + "value": 193722.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10656\nallocs=433\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 595958, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 642042, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 649708, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 646709, + "unit": "ns", + "extra": "gctime=0\nmemory=3146992\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 132676, + "unit": "ns", + "extra": "gctime=0\nmemory=9440\nallocs=446\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 2466250.5, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 2439562, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 2451645.5, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 2457083, + "unit": "ns", + "extra": "gctime=0\nmemory=19998832\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=relu, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 1304894, + "unit": "ns", + "extra": "gctime=0\nmemory=52816\nallocs=2361\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/forward/CPU/2 thread(s)", + "value": 341042, + "unit": "ns", + "extra": "gctime=0\nmemory=1696\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/forward/CPU/4 thread(s)", + "value": 352125, + "unit": "ns", + "extra": "gctime=0\nmemory=2768\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/forward/CPU/8 thread(s)", + "value": 397209, + "unit": "ns", + "extra": "gctime=0\nmemory=4912\nallocs=42\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/forward/CPU/1 thread(s)", + "value": 319750, + "unit": "ns", + "extra": "gctime=0\nmemory=1168\nallocs=7\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/forward/GPU/CUDA", + "value": 16162, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/zygote/CPU/2 thread(s)", + "value": 704500, + "unit": "ns", + "extra": "gctime=0\nmemory=6320\nallocs=58\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/zygote/CPU/4 thread(s)", + "value": 730916, + "unit": "ns", + "extra": "gctime=0\nmemory=9536\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/zygote/CPU/8 thread(s)", + "value": 1018500, + "unit": "ns", + "extra": "gctime=0\nmemory=15968\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/zygote/CPU/1 thread(s)", + "value": 639666.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4736\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=32)/zygote/GPU/CUDA", + "value": 200487.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6416\nallocs=285\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 1458125, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 1500666, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 1500792, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 1439500, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 41288, + "unit": "ns", + "extra": "gctime=0\nmemory=2736\nallocs=99\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 5129292, + "unit": "ns", + "extra": "gctime=0\nmemory=8405200\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 5286854, + "unit": "ns", + "extra": "gctime=0\nmemory=8405200\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 5283687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8405200\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 4974916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8405200\nallocs=111\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=true)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 199307.5, + "unit": "ns", + "extra": "gctime=0\nmemory=18808\nallocs=628\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/forward/CPU/2 thread(s)", + "value": 3667, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/forward/CPU/4 thread(s)", + "value": 3708, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/forward/CPU/8 thread(s)", + "value": 3667, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/forward/CPU/1 thread(s)", + "value": 3667, + "unit": "ns", + "extra": "gctime=0\nmemory=16640\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/forward/GPU/CUDA", + "value": 34015, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 15083, + "unit": "ns", + "extra": "gctime=0\nmemory=55216\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 15333, + "unit": "ns", + "extra": "gctime=0\nmemory=55216\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 15416, + "unit": "ns", + "extra": "gctime=0\nmemory=55216\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 15083, + "unit": "ns", + "extra": "gctime=0\nmemory=55216\nallocs=25\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=false, act=identity)(32 x 128)/zygote/GPU/CUDA", + "value": 381336.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9040\nallocs=363\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 71417, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 70459, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 71125, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 70958, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/forward/GPU/CUDA", + "value": 114243, + "unit": "ns", + "extra": "gctime=0\nmemory=960\nallocs=41\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 318042, + "unit": "ns", + "extra": "gctime=0\nmemory=2098400\nallocs=31\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 319333, + "unit": "ns", + "extra": "gctime=0\nmemory=2098400\nallocs=31\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 318708, + "unit": "ns", + "extra": "gctime=0\nmemory=2098400\nallocs=31\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 318583, + "unit": "ns", + "extra": "gctime=0\nmemory=2098400\nallocs=31\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=false, act=relu)(512 x 128)/zygote/GPU/CUDA", + "value": 197759, + "unit": "ns", + "extra": "gctime=0\nmemory=10776\nallocs=461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/2 thread(s)", + "value": 958, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/4 thread(s)", + "value": 1083, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/8 thread(s)", + "value": 1083, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/forward/CPU/1 thread(s)", + "value": 959, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/forward/GPU/CUDA", + "value": 24130, + "unit": "ns", + "extra": "gctime=0\nmemory=3008\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 7916, + "unit": "ns", + "extra": "gctime=0\nmemory=17904\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 8395.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17904\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 8750, + "unit": "ns", + "extra": "gctime=0\nmemory=17904\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 8000, + "unit": "ns", + "extra": "gctime=0\nmemory=17904\nallocs=90\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=gelu, affine=true)(4 x 32)/zygote/GPU/CUDA", + "value": 263324.5, + "unit": "ns", + "extra": "gctime=0\nmemory=18440\nallocs=622\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/forward/CPU/2 thread(s)", + "value": 462583, + "unit": "ns", + "extra": "gctime=0\nmemory=2098336\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/forward/CPU/4 thread(s)", + "value": 475584, + "unit": "ns", + "extra": "gctime=0\nmemory=2099408\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/forward/CPU/8 thread(s)", + "value": 552146, + "unit": "ns", + "extra": "gctime=0\nmemory=2101552\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/forward/CPU/1 thread(s)", + "value": 551937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2097808\nallocs=8\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/forward/GPU/CUDA", + "value": 130186.5, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/zygote/CPU/2 thread(s)", + "value": 1394875, + "unit": "ns", + "extra": "gctime=0\nmemory=8392880\nallocs=62\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/zygote/CPU/4 thread(s)", + "value": 1380000, + "unit": "ns", + "extra": "gctime=0\nmemory=8396096\nallocs=92\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/zygote/CPU/8 thread(s)", + "value": 1617687, + "unit": "ns", + "extra": "gctime=0\nmemory=8402528\nallocs=152\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/zygote/CPU/1 thread(s)", + "value": 1583875, + "unit": "ns", + "extra": "gctime=0\nmemory=8391296\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(128, Bsize=32)/zygote/GPU/CUDA", + "value": 276893, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=321\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/2 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/4 thread(s)", + "value": 375, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/8 thread(s)", + "value": 375, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/forward/CPU/1 thread(s)", + "value": 292, + "unit": "ns", + "extra": "gctime=0\nmemory=1440\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/forward/GPU/CUDA", + "value": 32430, + "unit": "ns", + "extra": "gctime=0\nmemory=4080\nallocs=144\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/2 thread(s)", + "value": 5958, + "unit": "ns", + "extra": "gctime=0\nmemory=17088\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/4 thread(s)", + "value": 6666, + "unit": "ns", + "extra": "gctime=0\nmemory=17088\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/8 thread(s)", + "value": 6750, + "unit": "ns", + "extra": "gctime=0\nmemory=17088\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/zygote/CPU/1 thread(s)", + "value": 6042, + "unit": "ns", + "extra": "gctime=0\nmemory=17088\nallocs=87\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(2, act=relu, affine=false)(4 x 32)/zygote/GPU/CUDA", + "value": 267754.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14888\nallocs=609\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 1725917, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 1722625, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 1730791.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 1725417, + "unit": "ns", + "extra": "gctime=0\nmemory=1050128\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 168976.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8480\nallocs=386\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 4353875, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 4352396, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 4382625, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 4352021, + "unit": "ns", + "extra": "gctime=0\nmemory=10496496\nallocs=133\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "layernorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 1246090, + "unit": "ns", + "extra": "gctime=0\nmemory=49408\nallocs=2215\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 6458, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 6709, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 7020.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 6792, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/forward/GPU/CUDA", + "value": 20586, + "unit": "ns", + "extra": "gctime=0\nmemory=1504\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 32667, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 32542, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 51937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 34416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=789264\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "bias_activation(512, act=relu)(512 x 128)/zygote/GPU/CUDA", + "value": 294547, + "unit": "ns", + "extra": "gctime=0\nmemory=9896\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/forward/CPU/2 thread(s)", + "value": 350167, + "unit": "ns", + "extra": "gctime=0\nmemory=9568\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/forward/CPU/4 thread(s)", + "value": 349083, + "unit": "ns", + "extra": "gctime=0\nmemory=10640\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/forward/CPU/8 thread(s)", + "value": 434792, + "unit": "ns", + "extra": "gctime=0\nmemory=12784\nallocs=42\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/forward/CPU/1 thread(s)", + "value": 328937, + "unit": "ns", + "extra": "gctime=0\nmemory=9040\nallocs=7\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/forward/GPU/CUDA", + "value": 18162, + "unit": "ns", + "extra": "gctime=0\nmemory=560\nallocs=26\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/zygote/CPU/2 thread(s)", + "value": 723145.5, + "unit": "ns", + "extra": "gctime=0\nmemory=37808\nallocs=58\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/zygote/CPU/4 thread(s)", + "value": 743896, + "unit": "ns", + "extra": "gctime=0\nmemory=41024\nallocs=88\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/zygote/CPU/8 thread(s)", + "value": 1028021, + "unit": "ns", + "extra": "gctime=0\nmemory=47456\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/zygote/CPU/1 thread(s)", + "value": 667083, + "unit": "ns", + "extra": "gctime=0\nmemory=36224\nallocs=43\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchedmm(2, Bsize=512)/zygote/GPU/CUDA", + "value": 332754, + "unit": "ns", + "extra": "gctime=0\nmemory=7392\nallocs=321\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 75334, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 75375, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 74833, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 75125, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/forward/GPU/CUDA", + "value": 47376, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 340667, + "unit": "ns", + "extra": "gctime=0\nmemory=2100080\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 325625, + "unit": "ns", + "extra": "gctime=0\nmemory=2100080\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 324500, + "unit": "ns", + "extra": "gctime=0\nmemory=2100080\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 325042, + "unit": "ns", + "extra": "gctime=0\nmemory=2100080\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(512, bias=true, act=relu)(512 x 128)/zygote/GPU/CUDA", + "value": 212363.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12168\nallocs=511\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 1484667, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 1526708, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 1526750, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 1463167, + "unit": "ns", + "extra": "gctime=0\nmemory=1049824\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 52318, + "unit": "ns", + "extra": "gctime=0\nmemory=3616\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 5112645.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8404736\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 5287000, + "unit": "ns", + "extra": "gctime=0\nmemory=8404736\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 5288062, + "unit": "ns", + "extra": "gctime=0\nmemory=8404736\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 4975542, + "unit": "ns", + "extra": "gctime=0\nmemory=8404736\nallocs=109\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "batchnorm(4, act=gelu, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 205581, + "unit": "ns", + "extra": "gctime=0\nmemory=16008\nallocs=670\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/forward/CPU/2 thread(s)", + "value": 28166, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/forward/CPU/4 thread(s)", + "value": 28208, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/forward/CPU/8 thread(s)", + "value": 28209, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/forward/CPU/1 thread(s)", + "value": 28250, + "unit": "ns", + "extra": "gctime=0\nmemory=16832\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/forward/GPU/CUDA", + "value": 24403, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/zygote/CPU/2 thread(s)", + "value": 66500, + "unit": "ns", + "extra": "gctime=0\nmemory=87936\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/zygote/CPU/4 thread(s)", + "value": 66542, + "unit": "ns", + "extra": "gctime=0\nmemory=87936\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/zygote/CPU/8 thread(s)", + "value": 66625, + "unit": "ns", + "extra": "gctime=0\nmemory=87936\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/zygote/CPU/1 thread(s)", + "value": 66584, + "unit": "ns", + "extra": "gctime=0\nmemory=87936\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "dense(32, bias=true, act=gelu)(32 x 128)/zygote/GPU/CUDA", + "value": 542042, + "unit": "ns", + "extra": "gctime=0\nmemory=13336\nallocs=568\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1376375, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/4 thread(s)", + "value": 1069750, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/8 thread(s)", + "value": 1150250, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/1 thread(s)", + "value": 2253791.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/GPU/CUDA", + "value": 588018, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 3106500, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2733166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2745500, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3801667, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/GPU/CUDA", + "value": 2007531, + "unit": "ns", + "extra": "gctime=0\nmemory=363360\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 8875333, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 8801916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 8770875, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 6358146, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/2 thread(s)", + "value": 83750, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/4 thread(s)", + "value": 80520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/8 thread(s)", + "value": 85625, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/CPU/1 thread(s)", + "value": 82542, + "unit": "ns", + "extra": "gctime=0\nmemory=1051792\nallocs=39\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/forward/GPU/CUDA", + "value": 193821, + "unit": "ns", + "extra": "gctime=0\nmemory=7472\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/2 thread(s)", + "value": 2013771, + "unit": "ns", + "extra": "gctime=0\nmemory=7356160\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/4 thread(s)", + "value": 2020312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=7356160\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/8 thread(s)", + "value": 2023875, + "unit": "ns", + "extra": "gctime=0\nmemory=7356160\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/CPU/1 thread(s)", + "value": 2016896, + "unit": "ns", + "extra": "gctime=0\nmemory=7356160\nallocs=108\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "groupnorm(4, act=relu, affine=false)(16 x 16 x 32 x 32)/zygote/GPU/CUDA", + "value": 796366, + "unit": "ns", + "extra": "gctime=0\nmemory=36952\nallocs=1107\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + } + ] } ] }