From 50e2153712d8bd8c9a3bc585ceea500691043203 Mon Sep 17 00:00:00 2001 From: github-action-benchmark Date: Tue, 29 Oct 2024 00:50:16 +0000 Subject: [PATCH] add Lux Benchmarks (julia) benchmark result for 4379ec39bba86c20d51684c25280b4e2c3419f2f --- benchmarks/data.js | 2402 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2401 insertions(+), 1 deletion(-) diff --git a/benchmarks/data.js b/benchmarks/data.js index 3f0b3c6027..d540215a09 100644 --- a/benchmarks/data.js +++ b/benchmarks/data.js @@ -1,5 +1,5 @@ window.BENCHMARK_DATA = { - "lastUpdate": 1730151666100, + "lastUpdate": 1730163015759, "repoUrl": "https://github.com/LuxDL/Lux.jl", "entries": { "Lux Benchmarks": [ @@ -134402,6 +134402,2406 @@ window.BENCHMARK_DATA = { "extra": "gctime=135478375\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" } ] + }, + { + "commit": { + "author": { + "email": "avikpal@mit.edu", + "name": "Avik Pal", + "username": "avik-pal" + }, + "committer": { + "email": "noreply@github.com", + "name": "GitHub", + "username": "web-flow" + }, + "distinct": true, + "id": "4379ec39bba86c20d51684c25280b4e2c3419f2f", + "message": "refactor: use Lux primitives for AD (#995)\n\n* refactor: use Lux primitives for AD\r\n\r\n* fix: workaround SciML/Optimization.jl#848", + "timestamp": "2024-10-28T19:36:16-04:00", + "tree_id": "207a3b0199d4cac9337b58e462f280c373a89921", + "url": "https://github.com/LuxDL/Lux.jl/commit/4379ec39bba86c20d51684c25280b4e2c3419f2f" + }, + "date": 1730162997923, + "tool": "julia", + "benches": [ + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/2 thread(s)", + "value": 411750, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/4 thread(s)", + "value": 241583, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/8 thread(s)", + "value": 322167, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/1 thread(s)", + "value": 740459, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/GPU/CUDA", + "value": 44353, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 655917, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 464833, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 468833, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 953354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/GPU/CUDA", + "value": 191177, + "unit": "ns", + "extra": "gctime=0\nmemory=11536\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 762708, + "unit": "ns", + "extra": "gctime=0\nmemory=1575600\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 569834, + "unit": "ns", + "extra": "gctime=0\nmemory=1575632\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 633417, + "unit": "ns", + "extra": "gctime=0\nmemory=1575616\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 954041, + "unit": "ns", + "extra": "gctime=0\nmemory=1575584\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1594000, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1160208, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1349792, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2338000, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 213116.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6592\nallocs=197\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12315208, + "unit": "ns", + "extra": "gctime=0\nmemory=13466816\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 9597708, + "unit": "ns", + "extra": "gctime=0\nmemory=15131392\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9293542, + "unit": "ns", + "extra": "gctime=0\nmemory=18460288\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 17930125, + "unit": "ns", + "extra": "gctime=0\nmemory=12911392\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1907223.5, + "unit": "ns", + "extra": "gctime=0\nmemory=28416\nallocs=834\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17341834, + "unit": "ns", + "extra": "gctime=0\nmemory=13463104\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14420792, + "unit": "ns", + "extra": "gctime=0\nmemory=15127680\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14304834, + "unit": "ns", + "extra": "gctime=0\nmemory=18456576\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21034709, + "unit": "ns", + "extra": "gctime=0\nmemory=12907680\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 120916625, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 182214542, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 148302000, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 108170625, + "unit": "ns", + "extra": "gctime=249459\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5472288, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 591646750.5, + "unit": "ns", + "extra": "gctime=112735354\nmemory=556544784\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 563684334, + "unit": "ns", + "extra": "gctime=68670000\nmemory=609688144\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 441224584, + "unit": "ns", + "extra": "gctime=43071458.5\nmemory=715974864\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 624792917, + "unit": "ns", + "extra": "gctime=3022375\nmemory=538829680\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 34975276, + "unit": "ns", + "extra": "gctime=0\nmemory=28408\nallocs=850\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 711934541.5, + "unit": "ns", + "extra": "gctime=67819291.5\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 694318791, + "unit": "ns", + "extra": "gctime=17727917\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 615736125, + "unit": "ns", + "extra": "gctime=26807249\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 746029625, + "unit": "ns", + "extra": "gctime=3322416.5\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/2 thread(s)", + "value": 864625, + "unit": "ns", + "extra": "gctime=0\nmemory=947952\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/4 thread(s)", + "value": 801041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1239856\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/8 thread(s)", + "value": 1219979.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1823664\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/1 thread(s)", + "value": 954250, + "unit": "ns", + "extra": "gctime=0\nmemory=850000\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/GPU/CUDA", + "value": 271063.5, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/2 thread(s)", + "value": 2719646, + "unit": "ns", + "extra": "gctime=0\nmemory=2951280\nallocs=548\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/4 thread(s)", + "value": 2462708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3535088\nallocs=588\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/8 thread(s)", + "value": 3306125, + "unit": "ns", + "extra": "gctime=0\nmemory=4702704\nallocs=668\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/1 thread(s)", + "value": 3385271, + "unit": "ns", + "extra": "gctime=0\nmemory=2755376\nallocs=528\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/GPU/CUDA", + "value": 1061902, + "unit": "ns", + "extra": "gctime=0\nmemory=140432\nallocs=2599\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 6794666, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 6363417, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 6537167, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 7529437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 211586, + "unit": "ns", + "extra": "gctime=0\nmemory=6112\nallocs=208\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 23988417, + "unit": "ns", + "extra": "gctime=0\nmemory=21335088\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 21318917, + "unit": "ns", + "extra": "gctime=0\nmemory=22999664\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 21539417, + "unit": "ns", + "extra": "gctime=351500\nmemory=26328560\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 29676416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=20779664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1981095, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 37358667, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 45576125, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 34606625, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 49443917, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 13334188, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 12465000, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 12598625, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 15188833, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 512594, + "unit": "ns", + "extra": "gctime=0\nmemory=6128\nallocs=209\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 47191979, + "unit": "ns", + "extra": "gctime=275292\nmemory=42658560\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 41899021, + "unit": "ns", + "extra": "gctime=304187.5\nmemory=45983808\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 40856541, + "unit": "ns", + "extra": "gctime=366458\nmemory=52634048\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 58133083, + "unit": "ns", + "extra": "gctime=0\nmemory=41549664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3233025.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 75271104, + "unit": "ns", + "extra": "gctime=297458.5\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 91865084, + "unit": "ns", + "extra": "gctime=608458\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 68909500, + "unit": "ns", + "extra": "gctime=510542\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 98569708, + "unit": "ns", + "extra": "gctime=631125\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 285118604, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 347578167, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 315462416, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 275551125, + "unit": "ns", + "extra": "gctime=256041\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 7112475.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 973431500, + "unit": "ns", + "extra": "gctime=125188417\nmemory=682500624\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 941010291, + "unit": "ns", + "extra": "gctime=80983042\nmemory=735643984\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 826121208, + "unit": "ns", + "extra": "gctime=56987625\nmemory=841930704\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 1128469333.5, + "unit": "ns", + "extra": "gctime=135485583.5\nmemory=664785520\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 33864812.5, + "unit": "ns", + "extra": "gctime=0\nmemory=29352\nallocs=937\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 1435266167, + "unit": "ns", + "extra": "gctime=129712458\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 1709541500, + "unit": "ns", + "extra": "gctime=18023625\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 1266814792, + "unit": "ns", + "extra": "gctime=0\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 1672710667, + "unit": "ns", + "extra": "gctime=3302250\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/2 thread(s)", + "value": 1549875, + "unit": "ns", + "extra": "gctime=0\nmemory=3180288\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/4 thread(s)", + "value": 1256625.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3472192\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/8 thread(s)", + "value": 1620708, + "unit": "ns", + "extra": "gctime=0\nmemory=4056000\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/1 thread(s)", + "value": 2159125, + "unit": "ns", + "extra": "gctime=0\nmemory=3082336\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/GPU/CUDA", + "value": 276844, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/2 thread(s)", + "value": 7894542, + "unit": "ns", + "extra": "gctime=0\nmemory=9515248\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/4 thread(s)", + "value": 6659583.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10099056\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/8 thread(s)", + "value": 7112458, + "unit": "ns", + "extra": "gctime=0\nmemory=11266672\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/1 thread(s)", + "value": 10466625, + "unit": "ns", + "extra": "gctime=0\nmemory=9319344\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/GPU/CUDA", + "value": 1131295, + "unit": "ns", + "extra": "gctime=0\nmemory=141600\nallocs=2648\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/2 thread(s)", + "value": 178664375, + "unit": "ns", + "extra": "gctime=15379333\nmemory=89286640\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/4 thread(s)", + "value": 183079667, + "unit": "ns", + "extra": "gctime=15733354\nmemory=109329168\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/8 thread(s)", + "value": 110239854.5, + "unit": "ns", + "extra": "gctime=9860958\nmemory=149414224\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/1 thread(s)", + "value": 165902562.5, + "unit": "ns", + "extra": "gctime=14803479\nmemory=82601568\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/GPU/CUDA", + "value": 4850474.5, + "unit": "ns", + "extra": "gctime=0\nmemory=139824\nallocs=4453\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/2 thread(s)", + "value": 637338916, + "unit": "ns", + "extra": "gctime=157315917\nmemory=466623152\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/4 thread(s)", + "value": 688777459, + "unit": "ns", + "extra": "gctime=205619917\nmemory=506708208\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/8 thread(s)", + "value": 453913416, + "unit": "ns", + "extra": "gctime=64430751\nmemory=586878320\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/1 thread(s)", + "value": 656584541, + "unit": "ns", + "extra": "gctime=157712167\nmemory=453253008\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/GPU/CUDA", + "value": 16410189, + "unit": "ns", + "extra": "gctime=0\nmemory=814664\nallocs=13275\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/2 thread(s)", + "value": 1076312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1691904\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/4 thread(s)", + "value": 957271, + "unit": "ns", + "extra": "gctime=0\nmemory=1983808\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/8 thread(s)", + "value": 1343625, + "unit": "ns", + "extra": "gctime=0\nmemory=2567616\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/1 thread(s)", + "value": 1344604, + "unit": "ns", + "extra": "gctime=0\nmemory=1593952\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/GPU/CUDA", + "value": 279240, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/2 thread(s)", + "value": 6007729, + "unit": "ns", + "extra": "gctime=0\nmemory=5138672\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/4 thread(s)", + "value": 4675125, + "unit": "ns", + "extra": "gctime=0\nmemory=5722480\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/8 thread(s)", + "value": 4946791, + "unit": "ns", + "extra": "gctime=0\nmemory=6890096\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/1 thread(s)", + "value": 5677084, + "unit": "ns", + "extra": "gctime=0\nmemory=4942768\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/GPU/CUDA", + "value": 1154307, + "unit": "ns", + "extra": "gctime=0\nmemory=140656\nallocs=2613\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23587458, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 44837937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 37828166, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34890312, + "unit": "ns", + "extra": "gctime=295145.5\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1835859, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 184849458, + "unit": "ns", + "extra": "gctime=3083625\nmemory=215263264\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 173020292, + "unit": "ns", + "extra": "gctime=2724084\nmemory=241836896\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 145743417, + "unit": "ns", + "extra": "gctime=1296458.5\nmemory=294984160\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 391585708, + "unit": "ns", + "extra": "gctime=121101250\nmemory=206404736\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 16488184.5, + "unit": "ns", + "extra": "gctime=0\nmemory=28656\nallocs=849\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 284020041, + "unit": "ns", + "extra": "gctime=18384333\nmemory=215259424\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 257804083.5, + "unit": "ns", + "extra": "gctime=5806021\nmemory=241833056\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 289035959, + "unit": "ns", + "extra": "gctime=62594583\nmemory=294980320\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 440921375, + "unit": "ns", + "extra": "gctime=120600584\nmemory=206400896\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/2 thread(s)", + "value": 623252354.5, + "unit": "ns", + "extra": "gctime=152889833.5\nmemory=316766112\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/4 thread(s)", + "value": 578021459, + "unit": "ns", + "extra": "gctime=177327416\nmemory=336808640\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/8 thread(s)", + "value": 377542125, + "unit": "ns", + "extra": "gctime=76454790\nmemory=376893696\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/1 thread(s)", + "value": 659480437.5, + "unit": "ns", + "extra": "gctime=76158125.5\nmemory=310081040\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/GPU/CUDA", + "value": 12471152, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/2 thread(s)", + "value": 1819111104.5, + "unit": "ns", + "extra": "gctime=89032958\nmemory=1360017936\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/4 thread(s)", + "value": 1660737208, + "unit": "ns", + "extra": "gctime=111424334\nmemory=1400102992\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/8 thread(s)", + "value": 1556316104, + "unit": "ns", + "extra": "gctime=135130438\nmemory=1480273104\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/1 thread(s)", + "value": 2162942771, + "unit": "ns", + "extra": "gctime=151660896\nmemory=1346647792\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/GPU/CUDA", + "value": 49790206, + "unit": "ns", + "extra": "gctime=0\nmemory=815992\nallocs=13320\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 3047666, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2113395.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2276187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4615062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 580103, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 25551000, + "unit": "ns", + "extra": "gctime=793000\nmemory=34790400\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 20372104.5, + "unit": "ns", + "extra": "gctime=929104\nmemory=38115648\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 18984250, + "unit": "ns", + "extra": "gctime=1055375\nmemory=44765888\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 36475396, + "unit": "ns", + "extra": "gctime=773271\nmemory=33681504\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3197534, + "unit": "ns", + "extra": "gctime=0\nmemory=29864\nallocs=912\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 35362875, + "unit": "ns", + "extra": "gctime=598958.5\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 28780458, + "unit": "ns", + "extra": "gctime=0\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 29749292, + "unit": "ns", + "extra": "gctime=998334\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 42359958, + "unit": "ns", + "extra": "gctime=704916\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1644375, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1184292, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1380959, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2490125, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 217958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6592\nallocs=197\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12687000, + "unit": "ns", + "extra": "gctime=0\nmemory=17403184\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 10006167, + "unit": "ns", + "extra": "gctime=0\nmemory=19067760\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9643084, + "unit": "ns", + "extra": "gctime=0\nmemory=22396656\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 18392479, + "unit": "ns", + "extra": "gctime=0\nmemory=16847760\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1945166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=29864\nallocs=912\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17715708, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14807375, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14584104, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21450895.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23273292, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 43934833, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 37907334, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34857583, + "unit": "ns", + "extra": "gctime=291042\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1854216, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 189714750, + "unit": "ns", + "extra": "gctime=3874000\nmemory=278243472\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 252758813, + "unit": "ns", + "extra": "gctime=74996333\nmemory=304817104\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 196003395.5, + "unit": "ns", + "extra": "gctime=45987083\nmemory=357964368\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 431014896, + "unit": "ns", + "extra": "gctime=155004209\nmemory=269384944\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 13876633.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30104\nallocs=927\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 288996271, + "unit": "ns", + "extra": "gctime=17969770.5\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 265619583, + "unit": "ns", + "extra": "gctime=5550958\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 294970666.5, + "unit": "ns", + "extra": "gctime=62491249.5\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 447715041, + "unit": "ns", + "extra": "gctime=120062000\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 3400249.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 2883458, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 3083459, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 4098667, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/GPU/CUDA", + "value": 585962, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 7635041, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 7317312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 7452208.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 8215479, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/GPU/CUDA", + "value": 1410316, + "unit": "ns", + "extra": "gctime=0\nmemory=366272\nallocs=3997\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 18791292, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 19172041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 19131167, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 10737041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 68687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 67375, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 70250, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 68250, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/GPU/CUDA", + "value": 49195, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 323229, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 332709, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 316083, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 318042, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/GPU/CUDA", + "value": 218315, + "unit": "ns", + "extra": "gctime=0\nmemory=13048\nallocs=519\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 444708, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 400125, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 414833, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 356271, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 3032708, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2089166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2260084, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4570896, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 585123, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 23581083, + "unit": "ns", + "extra": "gctime=0\nmemory=26917776\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 18324312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30243024\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 16907208, + "unit": "ns", + "extra": "gctime=0\nmemory=36893264\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 36054896, + "unit": "ns", + "extra": "gctime=682646\nmemory=25808880\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3100409.5, + "unit": "ns", + "extra": "gctime=0\nmemory=28416\nallocs=834\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 33410625, + "unit": "ns", + "extra": "gctime=0\nmemory=26914048\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 27996292, + "unit": "ns", + "extra": "gctime=0\nmemory=30239296\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 27450166, + "unit": "ns", + "extra": "gctime=0\nmemory=36889536\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 41964333, + "unit": "ns", + "extra": "gctime=634313\nmemory=25805152\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 118848041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 181989437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 147982042, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 103352208, + "unit": "ns", + "extra": "gctime=306750\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5461107, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 467760646, + "unit": "ns", + "extra": "gctime=2102187.5\nmemory=430584480\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 486582500, + "unit": "ns", + "extra": "gctime=4369687\nmemory=483727840\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 432022166.5, + "unit": "ns", + "extra": "gctime=49484125.5\nmemory=590014560\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 731293667, + "unit": "ns", + "extra": "gctime=130750166\nmemory=412869376\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 32285579, + "unit": "ns", + "extra": "gctime=0\nmemory=26960\nallocs=772\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 635989438, + "unit": "ns", + "extra": "gctime=0\nmemory=430580512\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 672695645.5, + "unit": "ns", + "extra": "gctime=22732041.5\nmemory=483723872\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 574471979.5, + "unit": "ns", + "extra": "gctime=21745104\nmemory=590010592\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 732788375, + "unit": "ns", + "extra": "gctime=3489000\nmemory=412865408\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1223604, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 730458.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 937229, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 2093708, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/GPU/CUDA", + "value": 576262, + "unit": "ns", + "extra": "gctime=0\nmemory=26048\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 2962583, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2501167, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2629000, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3697500, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/GPU/CUDA", + "value": 1333074.5, + "unit": "ns", + "extra": "gctime=0\nmemory=359712\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 6827375, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 6481666.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 6494979.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 4456292, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/2 thread(s)", + "value": 103500, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/4 thread(s)", + "value": 103895.5, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/8 thread(s)", + "value": 104750, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/1 thread(s)", + "value": 103438, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/GPU/CUDA", + "value": 28118, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 236334, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 237208, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 236958, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 249583, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/GPU/CUDA", + "value": 218894, + "unit": "ns", + "extra": "gctime=0\nmemory=14216\nallocs=576\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 742125, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 754375, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 742375, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 733396, + "unit": "ns", + "extra": "gctime=0\nmemory=328784\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/2 thread(s)", + "value": 13584, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/4 thread(s)", + "value": 13458, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/8 thread(s)", + "value": 14417, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/1 thread(s)", + "value": 13500, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/GPU/CUDA", + "value": 28346, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 25937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 25812.5, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 26167, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 25541.5, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/GPU/CUDA", + "value": 208547.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12728\nallocs=499\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 45562, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 46000, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 46500, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 27041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/2 thread(s)", + "value": 306135083.5, + "unit": "ns", + "extra": "gctime=18171250\nmemory=165113248\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/4 thread(s)", + "value": 279280917, + "unit": "ns", + "extra": "gctime=19626458\nmemory=185155776\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/8 thread(s)", + "value": 187541479, + "unit": "ns", + "extra": "gctime=20015979.5\nmemory=225240832\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/1 thread(s)", + "value": 311366041, + "unit": "ns", + "extra": "gctime=18243750\nmemory=158428176\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/GPU/CUDA", + "value": 7673100.5, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/2 thread(s)", + "value": 1102399979.5, + "unit": "ns", + "extra": "gctime=177800812.5\nmemory=764421648\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/4 thread(s)", + "value": 1066256459, + "unit": "ns", + "extra": "gctime=222160250\nmemory=804506704\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/8 thread(s)", + "value": 890231458, + "unit": "ns", + "extra": "gctime=163523249\nmemory=884676816\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/1 thread(s)", + "value": 1297546000, + "unit": "ns", + "extra": "gctime=320045292\nmemory=751051504\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/GPU/CUDA", + "value": 27302775.5, + "unit": "ns", + "extra": "gctime=0\nmemory=814792\nallocs=13283\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 416791.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 413292, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 416667, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 414084, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/GPU/CUDA", + "value": 48087, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 1365729, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 1233625, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 1273021, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 1719854.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/GPU/CUDA", + "value": 225988.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14536\nallocs=596\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 3499416, + "unit": "ns", + "extra": "gctime=0\nmemory=2100000\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 3462146, + "unit": "ns", + "extra": "gctime=0\nmemory=2100032\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 3426187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2100016\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 3641166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2099984\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1471125, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/4 thread(s)", + "value": 940083, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/8 thread(s)", + "value": 1055666, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/1 thread(s)", + "value": 2211917, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/GPU/CUDA", + "value": 580807.5, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 3085291, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2648667, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2683771, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3833875, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/GPU/CUDA", + "value": 1348747, + "unit": "ns", + "extra": "gctime=0\nmemory=363360\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 8817354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 8751792, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 9138166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 6346145.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/2 thread(s)", + "value": 2167, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/4 thread(s)", + "value": 2584, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/8 thread(s)", + "value": 3083, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/1 thread(s)", + "value": 2500, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/GPU/CUDA", + "value": 25068, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 7333, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 7292, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 7375, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 6916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/GPU/CUDA", + "value": 189664, + "unit": "ns", + "extra": "gctime=0\nmemory=12728\nallocs=499\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 8541, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 8708, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 8542, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 5708, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/2 thread(s)", + "value": 13166, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/4 thread(s)", + "value": 13792, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/8 thread(s)", + "value": 14750, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/1 thread(s)", + "value": 13562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/GPU/CUDA", + "value": 25030, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 29375, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 29000, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 29167, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 29000, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/GPU/CUDA", + "value": 199600, + "unit": "ns", + "extra": "gctime=0\nmemory=14216\nallocs=576\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 93042, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 94458, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 93125, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 91166, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/2 thread(s)", + "value": 28291, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/4 thread(s)", + "value": 27666.5, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/8 thread(s)", + "value": 28417, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/1 thread(s)", + "value": 48583, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/GPU/CUDA", + "value": 26505, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 43792, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 48666, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 44125, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 63417, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/GPU/CUDA", + "value": 171477, + "unit": "ns", + "extra": "gctime=0\nmemory=11216\nallocs=421\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 68333, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 69000, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 67958, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 68500, + "unit": "ns", + "extra": "gctime=0\nmemory=197600\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/2 thread(s)", + "value": 1792, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/4 thread(s)", + "value": 1875, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/8 thread(s)", + "value": 2167, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/1 thread(s)", + "value": 1875, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/GPU/CUDA", + "value": 23390, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 5250, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 5250, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 5291, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 5166, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/GPU/CUDA", + "value": 175020, + "unit": "ns", + "extra": "gctime=0\nmemory=11216\nallocs=421\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 7958, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 8250, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 8250, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 5584, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 106844041, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 126822625.5, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 121529708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 118057875, + "unit": "ns", + "extra": "gctime=314791\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 2630102, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 389182625, + "unit": "ns", + "extra": "gctime=18578770.5\nmemory=341219216\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 453684062.5, + "unit": "ns", + "extra": "gctime=95048854.5\nmemory=367792848\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 353315042, + "unit": "ns", + "extra": "gctime=20527542\nmemory=420940112\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 481224125, + "unit": "ns", + "extra": "gctime=20614166\nmemory=332360688\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 15198787, + "unit": "ns", + "extra": "gctime=0\nmemory=31048\nallocs=1014\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 613025667, + "unit": "ns", + "extra": "gctime=19416167\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 864422958, + "unit": "ns", + "extra": "gctime=99702458\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 626775958, + "unit": "ns", + "extra": "gctime=68837000\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 915632125, + "unit": "ns", + "extra": "gctime=132332792\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + } + ] } ] }