diff --git a/benchmarks/data.js b/benchmarks/data.js index d540215a09..1ca218fc19 100644 --- a/benchmarks/data.js +++ b/benchmarks/data.js @@ -1,5 +1,5 @@ window.BENCHMARK_DATA = { - "lastUpdate": 1730163015759, + "lastUpdate": 1730265968818, "repoUrl": "https://github.com/LuxDL/Lux.jl", "entries": { "Lux Benchmarks": [ @@ -136802,6 +136802,2406 @@ window.BENCHMARK_DATA = { "extra": "gctime=132332792\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" } ] + }, + { + "commit": { + "author": { + "email": "gf7600gs@gmail.com", + "name": "Nero Blackstone​", + "username": "NeroBlackstone" + }, + "committer": { + "email": "noreply@github.com", + "name": "GitHub", + "username": "web-flow" + }, + "distinct": true, + "id": "cb92a563392f7c74ee2072fa765c31bb9b017fc7", + "message": "docs: fix images.jl link (#997)", + "timestamp": "2024-10-29T21:08:07-04:00", + "tree_id": "2bbb92f0323617c1dd74e0a48f0214e3b50ec159", + "url": "https://github.com/LuxDL/Lux.jl/commit/cb92a563392f7c74ee2072fa765c31bb9b017fc7" + }, + "date": 1730265950527, + "tool": "julia", + "benches": [ + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/2 thread(s)", + "value": 412541, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/4 thread(s)", + "value": 242250, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/8 thread(s)", + "value": 322416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/1 thread(s)", + "value": 740041, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/GPU/CUDA", + "value": 43783, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 641833, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 443458, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 478625, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 958167, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/GPU/CUDA", + "value": 190648, + "unit": "ns", + "extra": "gctime=0\nmemory=11536\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 744500, + "unit": "ns", + "extra": "gctime=0\nmemory=1575600\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 516875, + "unit": "ns", + "extra": "gctime=0\nmemory=1575632\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 622959, + "unit": "ns", + "extra": "gctime=0\nmemory=1575616\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 971917, + "unit": "ns", + "extra": "gctime=0\nmemory=1575584\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1626709, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1164083, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1354209, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2382500, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 212090, + "unit": "ns", + "extra": "gctime=0\nmemory=6592\nallocs=197\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12241875, + "unit": "ns", + "extra": "gctime=0\nmemory=13466816\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 9584437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15131392\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9294250, + "unit": "ns", + "extra": "gctime=0\nmemory=18460288\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 18002396, + "unit": "ns", + "extra": "gctime=0\nmemory=12911392\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1909620, + "unit": "ns", + "extra": "gctime=0\nmemory=28400\nallocs=833\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17364333, + "unit": "ns", + "extra": "gctime=0\nmemory=13463104\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14442750, + "unit": "ns", + "extra": "gctime=0\nmemory=15127680\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14311833, + "unit": "ns", + "extra": "gctime=0\nmemory=18456576\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21053667, + "unit": "ns", + "extra": "gctime=0\nmemory=12907680\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 119842562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 182159229.5, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 147780729, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 104816708, + "unit": "ns", + "extra": "gctime=253542\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5472644, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 592166875.5, + "unit": "ns", + "extra": "gctime=112986958.5\nmemory=556544784\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 563821542, + "unit": "ns", + "extra": "gctime=68624833\nmemory=609688144\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 442430104, + "unit": "ns", + "extra": "gctime=42887979\nmemory=715974864\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 625737792, + "unit": "ns", + "extra": "gctime=3007041\nmemory=538829680\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 34972882, + "unit": "ns", + "extra": "gctime=0\nmemory=28408\nallocs=850\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 713130770.5, + "unit": "ns", + "extra": "gctime=67404436.5\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 691544250, + "unit": "ns", + "extra": "gctime=17311625\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 603916250, + "unit": "ns", + "extra": "gctime=11923875\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 742687041, + "unit": "ns", + "extra": "gctime=3293500\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/2 thread(s)", + "value": 870167, + "unit": "ns", + "extra": "gctime=0\nmemory=947952\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/4 thread(s)", + "value": 801000.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1239856\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/8 thread(s)", + "value": 1220750, + "unit": "ns", + "extra": "gctime=0\nmemory=1823664\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/1 thread(s)", + "value": 949688, + "unit": "ns", + "extra": "gctime=0\nmemory=850000\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/GPU/CUDA", + "value": 265193.5, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/2 thread(s)", + "value": 2750250, + "unit": "ns", + "extra": "gctime=0\nmemory=2951280\nallocs=548\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/4 thread(s)", + "value": 2457125, + "unit": "ns", + "extra": "gctime=0\nmemory=3535088\nallocs=588\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/8 thread(s)", + "value": 3329208, + "unit": "ns", + "extra": "gctime=0\nmemory=4702704\nallocs=668\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/1 thread(s)", + "value": 3289292, + "unit": "ns", + "extra": "gctime=0\nmemory=2755376\nallocs=528\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/GPU/CUDA", + "value": 1041014, + "unit": "ns", + "extra": "gctime=0\nmemory=140432\nallocs=2599\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 6810916, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 6350125, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 6502792, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 7507833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 210394, + "unit": "ns", + "extra": "gctime=0\nmemory=6112\nallocs=208\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 24022917, + "unit": "ns", + "extra": "gctime=0\nmemory=21335088\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 21319833, + "unit": "ns", + "extra": "gctime=0\nmemory=22999664\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 21303250, + "unit": "ns", + "extra": "gctime=312334\nmemory=26328560\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 29727625, + "unit": "ns", + "extra": "gctime=0\nmemory=20779664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1967511.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 37215750, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 45637834, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 45715979, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 49236354, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 13381562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 12433834, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 12537792, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 15142500, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 513457.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6128\nallocs=209\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 47126417, + "unit": "ns", + "extra": "gctime=261542\nmemory=42658560\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 41878854, + "unit": "ns", + "extra": "gctime=272854.5\nmemory=45983808\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 40616375, + "unit": "ns", + "extra": "gctime=298208\nmemory=52634048\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 58178583, + "unit": "ns", + "extra": "gctime=0\nmemory=41549664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3235162, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 74629396, + "unit": "ns", + "extra": "gctime=279312.5\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 91798958, + "unit": "ns", + "extra": "gctime=425250\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 91292209, + "unit": "ns", + "extra": "gctime=442500\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 98432125, + "unit": "ns", + "extra": "gctime=594145.5\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 287875563, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 347603000, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 314078500, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 269776750, + "unit": "ns", + "extra": "gctime=234166\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 7105513, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 971474250, + "unit": "ns", + "extra": "gctime=125538375\nmemory=682500624\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 943762542, + "unit": "ns", + "extra": "gctime=80547959\nmemory=735643984\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 823005791, + "unit": "ns", + "extra": "gctime=55950667\nmemory=841930704\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 1118023937.5, + "unit": "ns", + "extra": "gctime=135634146\nmemory=664785520\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 33868329.5, + "unit": "ns", + "extra": "gctime=0\nmemory=29352\nallocs=937\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 1427625104.5, + "unit": "ns", + "extra": "gctime=129979792\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 1702832375, + "unit": "ns", + "extra": "gctime=17415291\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 1637231875, + "unit": "ns", + "extra": "gctime=43279583\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 1670864792, + "unit": "ns", + "extra": "gctime=3304500\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/2 thread(s)", + "value": 1542584, + "unit": "ns", + "extra": "gctime=0\nmemory=3180288\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/4 thread(s)", + "value": 1241333, + "unit": "ns", + "extra": "gctime=0\nmemory=3472192\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/8 thread(s)", + "value": 1613625, + "unit": "ns", + "extra": "gctime=0\nmemory=4056000\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/1 thread(s)", + "value": 2155812.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3082336\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/GPU/CUDA", + "value": 272755, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/2 thread(s)", + "value": 7887667, + "unit": "ns", + "extra": "gctime=0\nmemory=9515248\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/4 thread(s)", + "value": 6453729, + "unit": "ns", + "extra": "gctime=0\nmemory=10099056\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/8 thread(s)", + "value": 7173500, + "unit": "ns", + "extra": "gctime=0\nmemory=11266672\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/1 thread(s)", + "value": 10450125, + "unit": "ns", + "extra": "gctime=0\nmemory=9319344\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/GPU/CUDA", + "value": 1114904, + "unit": "ns", + "extra": "gctime=0\nmemory=141600\nallocs=2648\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/2 thread(s)", + "value": 177697708, + "unit": "ns", + "extra": "gctime=14889292\nmemory=89286640\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/4 thread(s)", + "value": 183127729.5, + "unit": "ns", + "extra": "gctime=15533312.5\nmemory=109329168\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/8 thread(s)", + "value": 108153771, + "unit": "ns", + "extra": "gctime=9850145.5\nmemory=149414224\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/1 thread(s)", + "value": 165745583, + "unit": "ns", + "extra": "gctime=14745250\nmemory=82601568\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/GPU/CUDA", + "value": 4846033.5, + "unit": "ns", + "extra": "gctime=0\nmemory=139824\nallocs=4453\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/2 thread(s)", + "value": 638012042, + "unit": "ns", + "extra": "gctime=158679916\nmemory=466623152\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/4 thread(s)", + "value": 679029417, + "unit": "ns", + "extra": "gctime=197488500\nmemory=506708208\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/8 thread(s)", + "value": 519025667, + "unit": "ns", + "extra": "gctime=119514125\nmemory=586878320\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/1 thread(s)", + "value": 643337167, + "unit": "ns", + "extra": "gctime=139581000\nmemory=453253008\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/GPU/CUDA", + "value": 16351975, + "unit": "ns", + "extra": "gctime=0\nmemory=814664\nallocs=13275\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/2 thread(s)", + "value": 1081646, + "unit": "ns", + "extra": "gctime=0\nmemory=1691904\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/4 thread(s)", + "value": 954500, + "unit": "ns", + "extra": "gctime=0\nmemory=1983808\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/8 thread(s)", + "value": 1344104, + "unit": "ns", + "extra": "gctime=0\nmemory=2567616\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/1 thread(s)", + "value": 1347771, + "unit": "ns", + "extra": "gctime=0\nmemory=1593952\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/GPU/CUDA", + "value": 275416, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/2 thread(s)", + "value": 5770208, + "unit": "ns", + "extra": "gctime=0\nmemory=5138672\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/4 thread(s)", + "value": 4655458.5, + "unit": "ns", + "extra": "gctime=0\nmemory=5722480\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/8 thread(s)", + "value": 4980792, + "unit": "ns", + "extra": "gctime=0\nmemory=6890096\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/1 thread(s)", + "value": 5735812, + "unit": "ns", + "extra": "gctime=0\nmemory=4942768\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/GPU/CUDA", + "value": 1152086, + "unit": "ns", + "extra": "gctime=0\nmemory=140656\nallocs=2613\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23668250, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 43049541.5, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 37347521, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34921604, + "unit": "ns", + "extra": "gctime=288729.5\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1832427, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 183460916.5, + "unit": "ns", + "extra": "gctime=2958271\nmemory=215263264\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 172265542, + "unit": "ns", + "extra": "gctime=3227333\nmemory=241836896\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 144281125, + "unit": "ns", + "extra": "gctime=2581125\nmemory=294984160\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 390189959, + "unit": "ns", + "extra": "gctime=120455666\nmemory=206404736\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 16494935.5, + "unit": "ns", + "extra": "gctime=0\nmemory=28656\nallocs=849\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 284961708, + "unit": "ns", + "extra": "gctime=17767208\nmemory=215259424\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 258063041, + "unit": "ns", + "extra": "gctime=6078583.5\nmemory=241833056\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 285191292, + "unit": "ns", + "extra": "gctime=60976000\nmemory=294980320\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 439855709, + "unit": "ns", + "extra": "gctime=119638959\nmemory=206400896\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/2 thread(s)", + "value": 619813333.5, + "unit": "ns", + "extra": "gctime=151880709\nmemory=316766112\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/4 thread(s)", + "value": 578294708, + "unit": "ns", + "extra": "gctime=176012209\nmemory=336808640\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/8 thread(s)", + "value": 376301604.5, + "unit": "ns", + "extra": "gctime=75774604.5\nmemory=376893696\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/1 thread(s)", + "value": 655157813, + "unit": "ns", + "extra": "gctime=75655750\nmemory=310081040\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/GPU/CUDA", + "value": 12474713, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/2 thread(s)", + "value": 1799700979, + "unit": "ns", + "extra": "gctime=86261750\nmemory=1360017936\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/4 thread(s)", + "value": 1657435625, + "unit": "ns", + "extra": "gctime=112991041\nmemory=1400102992\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/8 thread(s)", + "value": 1521911875, + "unit": "ns", + "extra": "gctime=105803478.5\nmemory=1480273104\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/1 thread(s)", + "value": 2098240625, + "unit": "ns", + "extra": "gctime=149541229\nmemory=1346647792\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/GPU/CUDA", + "value": 49823657, + "unit": "ns", + "extra": "gctime=0\nmemory=815992\nallocs=13320\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 3073437.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2095291, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2281125, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4821625, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 585401, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 25431958, + "unit": "ns", + "extra": "gctime=784625\nmemory=34790400\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 20342750, + "unit": "ns", + "extra": "gctime=925645.5\nmemory=38115648\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 18922583, + "unit": "ns", + "extra": "gctime=1103917\nmemory=44765888\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 36574542, + "unit": "ns", + "extra": "gctime=785563\nmemory=33681504\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3196535, + "unit": "ns", + "extra": "gctime=0\nmemory=29864\nallocs=912\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 35041958.5, + "unit": "ns", + "extra": "gctime=567687.5\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 28788125, + "unit": "ns", + "extra": "gctime=0\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 29576167, + "unit": "ns", + "extra": "gctime=910250\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 42034167, + "unit": "ns", + "extra": "gctime=705625\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1646125, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1175250, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1363396, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2504083, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 216709, + "unit": "ns", + "extra": "gctime=0\nmemory=6592\nallocs=197\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12715000.5, + "unit": "ns", + "extra": "gctime=0\nmemory=17403184\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 9998250, + "unit": "ns", + "extra": "gctime=0\nmemory=19067760\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9683354, + "unit": "ns", + "extra": "gctime=0\nmemory=22396656\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 18453354, + "unit": "ns", + "extra": "gctime=0\nmemory=16847760\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1955756, + "unit": "ns", + "extra": "gctime=0\nmemory=29864\nallocs=912\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17696667, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14806937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14557708, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21432729.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23752041, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 43099166, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 37397812.5, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34904958.5, + "unit": "ns", + "extra": "gctime=290875\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1842817, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 190852833, + "unit": "ns", + "extra": "gctime=3786333\nmemory=278243472\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 251191084, + "unit": "ns", + "extra": "gctime=74288459\nmemory=304817104\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 193659750, + "unit": "ns", + "extra": "gctime=44105584\nmemory=357964368\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 429893688, + "unit": "ns", + "extra": "gctime=154216583\nmemory=269384944\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 13924800, + "unit": "ns", + "extra": "gctime=0\nmemory=30104\nallocs=927\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 289369042, + "unit": "ns", + "extra": "gctime=17772167\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 265637979, + "unit": "ns", + "extra": "gctime=5309792\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 292122354, + "unit": "ns", + "extra": "gctime=61844500.5\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 445323208, + "unit": "ns", + "extra": "gctime=119548667\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 3394917, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 2913791, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 3035709, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 4098958, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/GPU/CUDA", + "value": 578446, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 7619333, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 7367750, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 7464166.5, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 8211250, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/GPU/CUDA", + "value": 1384858.5, + "unit": "ns", + "extra": "gctime=0\nmemory=366272\nallocs=3997\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 13690021, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 19212042, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 19131458, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 15652916, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 69062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 67604, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 70458, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 69562, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/GPU/CUDA", + "value": 48441, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 324458, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 326292, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 236625, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 377708, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/GPU/CUDA", + "value": 214194.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13048\nallocs=519\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 424083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 458041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 356041, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 375854.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 3032834, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2078062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2268541, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4511375, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 583753.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 23595458, + "unit": "ns", + "extra": "gctime=0\nmemory=26917776\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 18331416, + "unit": "ns", + "extra": "gctime=0\nmemory=30243024\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 16965625, + "unit": "ns", + "extra": "gctime=0\nmemory=36893264\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 35767042, + "unit": "ns", + "extra": "gctime=656958\nmemory=25808880\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3121440, + "unit": "ns", + "extra": "gctime=0\nmemory=28416\nallocs=834\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 33311458, + "unit": "ns", + "extra": "gctime=0\nmemory=26914048\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 28023083, + "unit": "ns", + "extra": "gctime=0\nmemory=30239296\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 27412334, + "unit": "ns", + "extra": "gctime=0\nmemory=36889536\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 41849604, + "unit": "ns", + "extra": "gctime=526771\nmemory=25805152\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 121058542, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 181255520.5, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 147913792, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 108516083, + "unit": "ns", + "extra": "gctime=298333\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5463863.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 469339812.5, + "unit": "ns", + "extra": "gctime=2117895.5\nmemory=430584480\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 485979041, + "unit": "ns", + "extra": "gctime=6031042\nmemory=483727840\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 435101416.5, + "unit": "ns", + "extra": "gctime=49287271\nmemory=590014560\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 729625458, + "unit": "ns", + "extra": "gctime=129854125\nmemory=412869376\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 32277729, + "unit": "ns", + "extra": "gctime=0\nmemory=26960\nallocs=772\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 644292937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=430580512\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 675559041.5, + "unit": "ns", + "extra": "gctime=21575729\nmemory=483723872\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 576973396, + "unit": "ns", + "extra": "gctime=23054770.5\nmemory=590010592\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 726825250, + "unit": "ns", + "extra": "gctime=3459625\nmemory=412865408\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1313500, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 757354.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 902583, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 1989917, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/GPU/CUDA", + "value": 572494, + "unit": "ns", + "extra": "gctime=0\nmemory=26048\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 2956604.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2531750, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2470208, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3689875, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/GPU/CUDA", + "value": 1358421.5, + "unit": "ns", + "extra": "gctime=0\nmemory=359712\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 6625021, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 6484250, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 6454333, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 4442292, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/2 thread(s)", + "value": 102875, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/4 thread(s)", + "value": 104104, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/8 thread(s)", + "value": 103209, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/1 thread(s)", + "value": 104917, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/GPU/CUDA", + "value": 28741, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 237083, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 237542, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 236500, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 250125, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/GPU/CUDA", + "value": 220363.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14216\nallocs=576\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 330167, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 744959, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 741959, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 722000, + "unit": "ns", + "extra": "gctime=0\nmemory=328784\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/2 thread(s)", + "value": 13250, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/4 thread(s)", + "value": 13416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/8 thread(s)", + "value": 13875, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/1 thread(s)", + "value": 13750, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/GPU/CUDA", + "value": 28904, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 25625, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 25833, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 25750, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 25833, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/GPU/CUDA", + "value": 212085, + "unit": "ns", + "extra": "gctime=0\nmemory=12728\nallocs=499\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 45645.5, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 46208, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 46208, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 26875, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/2 thread(s)", + "value": 305618500, + "unit": "ns", + "extra": "gctime=18214208\nmemory=165113248\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/4 thread(s)", + "value": 277901833, + "unit": "ns", + "extra": "gctime=19291667\nmemory=185155776\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/8 thread(s)", + "value": 188750583, + "unit": "ns", + "extra": "gctime=21919834\nmemory=225240832\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/1 thread(s)", + "value": 309520834, + "unit": "ns", + "extra": "gctime=18277291\nmemory=158428176\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/GPU/CUDA", + "value": 7623749.5, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/2 thread(s)", + "value": 1091097062.5, + "unit": "ns", + "extra": "gctime=176391000\nmemory=764421648\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/4 thread(s)", + "value": 1062816167, + "unit": "ns", + "extra": "gctime=219469584\nmemory=804506704\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/8 thread(s)", + "value": 898810583, + "unit": "ns", + "extra": "gctime=167591167\nmemory=884676816\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/1 thread(s)", + "value": 1292104708, + "unit": "ns", + "extra": "gctime=319482000\nmemory=751051504\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/GPU/CUDA", + "value": 27087435, + "unit": "ns", + "extra": "gctime=0\nmemory=814792\nallocs=13283\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 417084, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 419167, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 415667, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 431375, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/GPU/CUDA", + "value": 48303, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 1452104.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 1275562, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 1264708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 1725333, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/GPU/CUDA", + "value": 227769, + "unit": "ns", + "extra": "gctime=0\nmemory=14536\nallocs=596\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 1850667, + "unit": "ns", + "extra": "gctime=0\nmemory=2100000\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 3421562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2100032\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 3399499.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2100016\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 3662312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2099984\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1486791, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/4 thread(s)", + "value": 911292, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/8 thread(s)", + "value": 1056417, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/1 thread(s)", + "value": 2195084, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/GPU/CUDA", + "value": 580986.5, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 3080583, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2660084, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2573166, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3820687, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/GPU/CUDA", + "value": 1362672, + "unit": "ns", + "extra": "gctime=0\nmemory=363360\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 8819292, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 8745333, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 8773625, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 6434292, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/2 thread(s)", + "value": 2542, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/4 thread(s)", + "value": 2792, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/8 thread(s)", + "value": 2937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/1 thread(s)", + "value": 2375, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/GPU/CUDA", + "value": 25652, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 7041, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 7208, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 7209, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 7125, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/GPU/CUDA", + "value": 192398.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12728\nallocs=499\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 8583, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 8667, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 8542, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 6000, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/2 thread(s)", + "value": 13375, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/4 thread(s)", + "value": 13417, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/8 thread(s)", + "value": 14084, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/1 thread(s)", + "value": 13583, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/GPU/CUDA", + "value": 25588, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 29125, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 29209, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 29292, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 29187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/GPU/CUDA", + "value": 200255.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14216\nallocs=576\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 42959, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 93000, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 92959, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 91250, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/2 thread(s)", + "value": 28333, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/4 thread(s)", + "value": 27979.5, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/8 thread(s)", + "value": 28291, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/1 thread(s)", + "value": 45916.5, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/GPU/CUDA", + "value": 27099, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 44375, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 45208, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 43708, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 63187, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/GPU/CUDA", + "value": 172626.5, + "unit": "ns", + "extra": "gctime=0\nmemory=11216\nallocs=421\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 68833, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 69041, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 68416, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 68542, + "unit": "ns", + "extra": "gctime=0\nmemory=197600\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/2 thread(s)", + "value": 1916, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/4 thread(s)", + "value": 1875, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/8 thread(s)", + "value": 2333, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/1 thread(s)", + "value": 1625, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/GPU/CUDA", + "value": 24015, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 5042, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 5291, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 5416, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 5542, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/GPU/CUDA", + "value": 176394.5, + "unit": "ns", + "extra": "gctime=0\nmemory=11216\nallocs=421\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 8208, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 8187.5, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 8125, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 5625, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 106620958, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 125627166, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 120144521, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 117625187.5, + "unit": "ns", + "extra": "gctime=284333.5\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 2655445, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 389249875, + "unit": "ns", + "extra": "gctime=18253499.5\nmemory=341219216\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 378229083, + "unit": "ns", + "extra": "gctime=19332083\nmemory=367792848\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 354732875, + "unit": "ns", + "extra": "gctime=21801792\nmemory=420940112\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 489409292, + "unit": "ns", + "extra": "gctime=22882416\nmemory=332360688\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 15161397.5, + "unit": "ns", + "extra": "gctime=0\nmemory=31048\nallocs=1014\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 618241875, + "unit": "ns", + "extra": "gctime=18876333\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 859950833, + "unit": "ns", + "extra": "gctime=97922417\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 803956646, + "unit": "ns", + "extra": "gctime=69543875\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 914357292, + "unit": "ns", + "extra": "gctime=131571042\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + } + ] } ] }