From 0922dd65adf4070da98d81fd98e6186b06688482 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 09:12:23 +0100 Subject: [PATCH] 50gb joins (#61) * update on 50GB joins * data.table timing updates as well * add missing datafusion version --------- Co-authored-by: Ubuntu --- _benchplot/benchplot-dict.R | 2 +- _control/timeout.csv | 2 +- _launcher/launcher.R | 5 ++++- _utils/sleep_and_run.sh | 9 +++++++++ arrow/VERSION | 1 + dask/VERSION | 2 +- datafusion/VERSION | 1 + logs.csv | 18 ++++++++++++++++++ time.csv | 16 ++++++++++++++++ 9 files changed, 52 insertions(+), 4 deletions(-) create mode 100755 _utils/sleep_and_run.sh create mode 100644 arrow/VERSION create mode 100644 datafusion/VERSION diff --git a/_benchplot/benchplot-dict.R b/_benchplot/benchplot-dict.R index f1351858..6ac2df8a 100644 --- a/_benchplot/benchplot-dict.R +++ b/_benchplot/benchplot-dict.R @@ -463,7 +463,7 @@ join.data.exceptions = {list( "collapse" = {list( )}, "data.table" = {list( - "out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") # fread + "timeout" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") # fread )}, "dplyr" = {list( "out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") # fread diff --git a/_control/timeout.csv b/_control/timeout.csv index b66414d7..06783265 100644 --- a/_control/timeout.csv +++ b/_control/timeout.csv @@ -4,7 +4,7 @@ groupby,1e8,120 groupby,1e9,180 join,1e7,120 join,1e8,240 -join,1e9,360 +join,1e9,240 groupby2014,1e7,60 groupby2014,1e8,120 groupby2014,1e9,180 diff --git a/_launcher/launcher.R b/_launcher/launcher.R index 0a7bc36c..167d9dee 100644 --- a/_launcher/launcher.R +++ b/_launcher/launcher.R @@ -201,7 +201,10 @@ launch = function(dt, mockup, out_dir="out") { } cmd = sprintf("%s > %s 2> %s", solution.cmd(s, t, d), out_file, err_file) # ./_launcher/solution.R ... > out 2> err shcmd = sprintf("/bin/bash -c \"%s%s\"", venv, cmd) # this is needed to source python venv -# cat(mockup) + if (mockup) { + cat(cmd) + cat(shcmd) + } if (!mockup) { warn = NULL p = proc.time()[[3L]] diff --git a/_utils/sleep_and_run.sh b/_utils/sleep_and_run.sh new file mode 100755 index 00000000..9d6b2249 --- /dev/null +++ b/_utils/sleep_and_run.sh @@ -0,0 +1,9 @@ +while [ -f run.lock ] +do + sleep 1800 +done + + +rm run.lock + +./run.sh diff --git a/arrow/VERSION b/arrow/VERSION new file mode 100644 index 00000000..40745c30 --- /dev/null +++ b/arrow/VERSION @@ -0,0 +1 @@ +13.0.0.1 diff --git a/dask/VERSION b/dask/VERSION index 3bae6081..ea516b56 100644 --- a/dask/VERSION +++ b/dask/VERSION @@ -1 +1 @@ -2023.10.0 \ No newline at end of file +2023.10.1 \ No newline at end of file diff --git a/datafusion/VERSION b/datafusion/VERSION new file mode 100644 index 00000000..221a8da0 --- /dev/null +++ b/datafusion/VERSION @@ -0,0 +1 @@ +31.0.0 diff --git a/logs.csv b/logs.csv index 1edc4100..1775ca19 100644 --- a/logs.csv +++ b/logs.csv @@ -881,3 +881,21 @@ ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e8_NA_0_1,1698348637.08848, ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e8_NA_0_1,1698349833.44723,finish,0,0 ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e9_NA_0_0,1698349848.46256,start,, ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e9_NA_0_0,1698370827.50361,finish,0,0 +ip-172-31-31-147,1699266597,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699266602.09238,start,, +ip-172-31-31-147,1699266597,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699270995.54609,finish,2,2 +ip-172-31-31-147,1699289348,juliads,0.7.18, ,join,J1_1e9_NA_0_0,1699289354.28059,start,, +ip-172-31-31-147,1699289348,juliads,0.7.18, ,join,J1_1e9_NA_0_0,1699289905.26854,finish,0,0 +ip-172-31-31-147,1699289348,juliadf,1.6.1,04c738083f29f86e62c8afc341f0967d8717bdb8,join,J1_1e9_NA_0_0,1699289920.28361,start,, +ip-172-31-31-147,1699289348,juliadf,1.6.1,04c738083f29f86e62c8afc341f0967d8717bdb8,join,J1_1e9_NA_0_0,1699290895.10819,finish,1,137 +ip-172-31-31-147,1699289348,dplyr,1.1.3,,join,J1_1e9_NA_0_0,1699290910.12338,start,, +ip-172-31-31-147,1699289348,dplyr,1.1.3,,join,J1_1e9_NA_0_0,1699301730.12933,finish,1,137 +ip-172-31-31-147,1699289348,pandas,2.1.1,e86ed377639948c64c429059127bcf5b359ab6be,join,J1_1e9_NA_0_0,1699301745.14452,start,, +ip-172-31-31-147,1699289348,pandas,2.1.1,e86ed377639948c64c429059127bcf5b359ab6be,join,J1_1e9_NA_0_0,1699301939.6746,finish,31,1 +ip-172-31-31-147,1699289348,dask,2023.10.1,4884d56ce751f0691cdee3b6db4a8bcf05c474df,join,J1_1e9_NA_0_0,1699301954.68981,start,, +ip-172-31-31-147,1699289348,dask,2023.10.1,4884d56ce751f0691cdee3b6db4a8bcf05c474df,join,J1_1e9_NA_0_0,1699308552.08238,finish,1,137 +ip-172-31-31-147,1699289348,polars,0.19.11,,join,J1_1e9_NA_0_0,1699308567.0974,start,, +ip-172-31-31-147,1699289348,polars,0.19.11,,join,J1_1e9_NA_0_0,1699309310.23003,finish,1,137 +ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309325.24506,start,, +ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309934.66574,finish,1,137 +ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699437325.61783,start,, +ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699451725.72193,finish,, diff --git a/time.csv b/time.csv index a38c95c0..ab100aa0 100644 --- a/time.csv +++ b/time.csv @@ -6251,3 +6251,19 @@ ip-172-31-31-147,1698326993,1698367195,join,J1_1e9_NA_0_0,1000000000,medium inne ip-172-31-31-147,1698326993,1698368258,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,899989698,11,collapse,2.0.3,,inner_join,2,56.555,NA,TRUE,44998219782;44956313963,3.081,NA,FALSE ip-172-31-31-147,1698326993,1698369538,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,collapse,2.0.3,,inner_join,1,265.079,NA,TRUE,44999013966;45000583182,4.19,NA,FALSE ip-172-31-31-147,1698326993,1698370816,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,collapse,2.0.3,,inner_join,2,269.143,NA,TRUE,44999013966;45000583182,3.782,NA,FALSE +ip-172-31-31-147,1699272070,1699281947,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,1,19.921,NA,TRUE,44998904641;45286789554,2.119,NA,FALSE +ip-172-31-31-147,1699272070,1699282850,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,2,17.261,NA,TRUE,44998904641;45286789554,3.307,NA,FALSE +ip-172-31-31-147,1699289348,1699289625.051615,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,juliads,0.7.18,,join,1,17.938,148.08,TRUE,44998904641.156;45286789553.965,0.209,,FALSE +ip-172-31-31-147,1699289348,1699289648.326663,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,juliads,0.7.18,,join,2,17.713,148.065,TRUE,44998904641.156;45286789553.965,0.129,,FALSE +ip-172-31-31-147,1699289348,1699289674.052402,join,J1_1e9_NA_0_0,1000000000,medium inner on int,899989698,11,juliads,0.7.18,,join,1,20.45,155.681,TRUE,44998219781.541;44956313962.718,0.095,,FALSE +ip-172-31-31-147,1699289348,1699289697.394478,join,J1_1e9_NA_0_0,1000000000,medium inner on int,899989698,11,juliads,0.7.18,,join,2,18.889,155.674,TRUE,44998219781.541;44956313962.718,0.141,,FALSE +ip-172-31-31-147,1699289348,1699289720.019483,join,J1_1e9_NA_0_0,1000000000,medium outer on int,1000000000,11,juliads,0.7.18,,join,1,20.927,161.92,TRUE,49998622440.431;44956313962.718,0.145,,FALSE +ip-172-31-31-147,1699289348,1699289746.001191,join,J1_1e9_NA_0_0,1000000000,medium outer on int,1000000000,11,juliads,0.7.18,,join,2,21.367,161.924,TRUE,49998622440.431;44956313962.718,0.164,,FALSE +ip-172-31-31-147,1699289348,1699289768.963107,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,899989698,11,juliads,0.7.18,,join,1,19.749,156.522,TRUE,44998219781.541;44956313962.718,0.091,,FALSE +ip-172-31-31-147,1699289348,1699289790.044045,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,899989698,11,juliads,0.7.18,,join,2,16.958,156.524,TRUE,44998219781.541;44956313962.718,0.089,,FALSE +ip-172-31-31-147,1699289348,1699289842.847400,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,juliads,0.7.18,,join,1,50.699,170.778,TRUE,44999013966.477;45000583181.581,0.083,,FALSE +ip-172-31-31-147,1699289348,1699289893.449326,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,juliads,0.7.18,,join,2,47.157,170.78,TRUE,44999013966.477;45000583181.581,0.131,,FALSE +ip-172-31-31-147,1699289348,1699301553,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,dplyr,1.1.3,,inner_join,1,198.746,NA,TRUE,44998904641;45286789554,4.282,NA,FALSE +ip-172-31-31-147,1699289348,1699309384,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,1,9.688,NA,TRUE,44998904641;45286789554,1.74,NA,FALSE +ip-172-31-31-147,1699289348,1699309393,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,2,6.281,NA,TRUE,44998904641;45286789554,1.965,NA,FALSE +ip-172-31-31-147,1699437325,1699447786,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,1,18.94,NA,TRUE,44998904641;45286789554,3.059,NA,FALSE