Skip to content

Commit

Permalink
50gb joins (#61)
Browse files Browse the repository at this point in the history
* update on 50GB joins

* data.table timing updates as well

* add missing datafusion version

---------

Co-authored-by: Ubuntu <[email protected]>
  • Loading branch information
Tmonster and Ubuntu authored Nov 13, 2023
1 parent d0a11e0 commit 0922dd6
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 4 deletions.
2 changes: 1 addition & 1 deletion _benchplot/benchplot-dict.R
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ join.data.exceptions = {list(
"collapse" = {list(
)},
"data.table" = {list(
"out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") # fread
"timeout" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") # fread
)},
"dplyr" = {list(
"out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") # fread
Expand Down
2 changes: 1 addition & 1 deletion _control/timeout.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ groupby,1e8,120
groupby,1e9,180
join,1e7,120
join,1e8,240
join,1e9,360
join,1e9,240
groupby2014,1e7,60
groupby2014,1e8,120
groupby2014,1e9,180
5 changes: 4 additions & 1 deletion _launcher/launcher.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,10 @@ launch = function(dt, mockup, out_dir="out") {
}
cmd = sprintf("%s > %s 2> %s", solution.cmd(s, t, d), out_file, err_file) # ./_launcher/solution.R ... > out 2> err
shcmd = sprintf("/bin/bash -c \"%s%s\"", venv, cmd) # this is needed to source python venv
# cat(mockup)
if (mockup) {
cat(cmd)
cat(shcmd)
}
if (!mockup) {
warn = NULL
p = proc.time()[[3L]]
Expand Down
9 changes: 9 additions & 0 deletions _utils/sleep_and_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
while [ -f run.lock ]
do
sleep 1800
done


rm run.lock

./run.sh
1 change: 1 addition & 0 deletions arrow/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
13.0.0.1
2 changes: 1 addition & 1 deletion dask/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2023.10.0
2023.10.1
1 change: 1 addition & 0 deletions datafusion/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
31.0.0
18 changes: 18 additions & 0 deletions logs.csv
Original file line number Diff line number Diff line change
Expand Up @@ -881,3 +881,21 @@ ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e8_NA_0_1,1698348637.08848,
ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e8_NA_0_1,1698349833.44723,finish,0,0
ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e9_NA_0_0,1698349848.46256,start,,
ip-172-31-31-147,1698326993,collapse,2.0.3,,join,J1_1e9_NA_0_0,1698370827.50361,finish,0,0
ip-172-31-31-147,1699266597,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699266602.09238,start,,
ip-172-31-31-147,1699266597,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699270995.54609,finish,2,2
ip-172-31-31-147,1699289348,juliads,0.7.18, ,join,J1_1e9_NA_0_0,1699289354.28059,start,,
ip-172-31-31-147,1699289348,juliads,0.7.18, ,join,J1_1e9_NA_0_0,1699289905.26854,finish,0,0
ip-172-31-31-147,1699289348,juliadf,1.6.1,04c738083f29f86e62c8afc341f0967d8717bdb8,join,J1_1e9_NA_0_0,1699289920.28361,start,,
ip-172-31-31-147,1699289348,juliadf,1.6.1,04c738083f29f86e62c8afc341f0967d8717bdb8,join,J1_1e9_NA_0_0,1699290895.10819,finish,1,137
ip-172-31-31-147,1699289348,dplyr,1.1.3,,join,J1_1e9_NA_0_0,1699290910.12338,start,,
ip-172-31-31-147,1699289348,dplyr,1.1.3,,join,J1_1e9_NA_0_0,1699301730.12933,finish,1,137
ip-172-31-31-147,1699289348,pandas,2.1.1,e86ed377639948c64c429059127bcf5b359ab6be,join,J1_1e9_NA_0_0,1699301745.14452,start,,
ip-172-31-31-147,1699289348,pandas,2.1.1,e86ed377639948c64c429059127bcf5b359ab6be,join,J1_1e9_NA_0_0,1699301939.6746,finish,31,1
ip-172-31-31-147,1699289348,dask,2023.10.1,4884d56ce751f0691cdee3b6db4a8bcf05c474df,join,J1_1e9_NA_0_0,1699301954.68981,start,,
ip-172-31-31-147,1699289348,dask,2023.10.1,4884d56ce751f0691cdee3b6db4a8bcf05c474df,join,J1_1e9_NA_0_0,1699308552.08238,finish,1,137
ip-172-31-31-147,1699289348,polars,0.19.11,,join,J1_1e9_NA_0_0,1699308567.0974,start,,
ip-172-31-31-147,1699289348,polars,0.19.11,,join,J1_1e9_NA_0_0,1699309310.23003,finish,1,137
ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309325.24506,start,,
ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309934.66574,finish,1,137
ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699437325.61783,start,,
ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699451725.72193,finish,,
16 changes: 16 additions & 0 deletions time.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6251,3 +6251,19 @@ ip-172-31-31-147,1698326993,1698367195,join,J1_1e9_NA_0_0,1000000000,medium inne
ip-172-31-31-147,1698326993,1698368258,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,899989698,11,collapse,2.0.3,,inner_join,2,56.555,NA,TRUE,44998219782;44956313963,3.081,NA,FALSE
ip-172-31-31-147,1698326993,1698369538,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,collapse,2.0.3,,inner_join,1,265.079,NA,TRUE,44999013966;45000583182,4.19,NA,FALSE
ip-172-31-31-147,1698326993,1698370816,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,collapse,2.0.3,,inner_join,2,269.143,NA,TRUE,44999013966;45000583182,3.782,NA,FALSE
ip-172-31-31-147,1699272070,1699281947,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,1,19.921,NA,TRUE,44998904641;45286789554,2.119,NA,FALSE
ip-172-31-31-147,1699272070,1699282850,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,2,17.261,NA,TRUE,44998904641;45286789554,3.307,NA,FALSE
ip-172-31-31-147,1699289348,1699289625.051615,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,juliads,0.7.18,,join,1,17.938,148.08,TRUE,44998904641.156;45286789553.965,0.209,,FALSE
ip-172-31-31-147,1699289348,1699289648.326663,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,juliads,0.7.18,,join,2,17.713,148.065,TRUE,44998904641.156;45286789553.965,0.129,,FALSE
ip-172-31-31-147,1699289348,1699289674.052402,join,J1_1e9_NA_0_0,1000000000,medium inner on int,899989698,11,juliads,0.7.18,,join,1,20.45,155.681,TRUE,44998219781.541;44956313962.718,0.095,,FALSE
ip-172-31-31-147,1699289348,1699289697.394478,join,J1_1e9_NA_0_0,1000000000,medium inner on int,899989698,11,juliads,0.7.18,,join,2,18.889,155.674,TRUE,44998219781.541;44956313962.718,0.141,,FALSE
ip-172-31-31-147,1699289348,1699289720.019483,join,J1_1e9_NA_0_0,1000000000,medium outer on int,1000000000,11,juliads,0.7.18,,join,1,20.927,161.92,TRUE,49998622440.431;44956313962.718,0.145,,FALSE
ip-172-31-31-147,1699289348,1699289746.001191,join,J1_1e9_NA_0_0,1000000000,medium outer on int,1000000000,11,juliads,0.7.18,,join,2,21.367,161.924,TRUE,49998622440.431;44956313962.718,0.164,,FALSE
ip-172-31-31-147,1699289348,1699289768.963107,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,899989698,11,juliads,0.7.18,,join,1,19.749,156.522,TRUE,44998219781.541;44956313962.718,0.091,,FALSE
ip-172-31-31-147,1699289348,1699289790.044045,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,899989698,11,juliads,0.7.18,,join,2,16.958,156.524,TRUE,44998219781.541;44956313962.718,0.089,,FALSE
ip-172-31-31-147,1699289348,1699289842.847400,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,juliads,0.7.18,,join,1,50.699,170.778,TRUE,44999013966.477;45000583181.581,0.083,,FALSE
ip-172-31-31-147,1699289348,1699289893.449326,join,J1_1e9_NA_0_0,1000000000,big inner on int,900000000,13,juliads,0.7.18,,join,2,47.157,170.78,TRUE,44999013966.477;45000583181.581,0.131,,FALSE
ip-172-31-31-147,1699289348,1699301553,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,dplyr,1.1.3,,inner_join,1,198.746,NA,TRUE,44998904641;45286789554,4.282,NA,FALSE
ip-172-31-31-147,1699289348,1699309384,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,1,9.688,NA,TRUE,44998904641;45286789554,1.74,NA,FALSE
ip-172-31-31-147,1699289348,1699309393,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,2,6.281,NA,TRUE,44998904641;45286789554,1.965,NA,FALSE
ip-172-31-31-147,1699437325,1699447786,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,1,18.94,NA,TRUE,44998904641;45286789554,3.059,NA,FALSE

0 comments on commit 0922dd6

Please sign in to comment.