Skip to content

Commit

Permalink
Use Tim's improved code
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Sep 5, 2023
1 parent ba77271 commit 39baa0c
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 33 deletions.
60 changes: 30 additions & 30 deletions plot_data/time-scaling.csv
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
,num_samples,num_sites,prog,threads,user_time,sys_time,wall_time
0,10,116230,bcftools,1,0.09,0.0,0.11072063446044922
1,10,116230,sgkit,1,1.55,0.15,1.8883519172668457
2,10,116230,bcftools,2,0.09,0.0,0.10980343818664551
3,10,116230,sgkit,2,1.49,0.16,1.5718891620635986
4,10,116230,bcftools,8,0.08,0.01,0.11037111282348633
5,10,116230,sgkit,8,1.63,0.19,1.4816482067108154
6,100,204714,bcftools,1,0.44,0.01,0.48564791679382324
7,100,204714,sgkit,1,6.52,0.34,6.573964357376099
8,100,204714,bcftools,2,0.42,0.0,0.4452967643737793
9,100,204714,sgkit,2,5.62,0.2,3.559974193572998
10,100,204714,bcftools,8,0.41,0.01,0.4495086669921875
11,100,204714,sgkit,8,5.86,0.32,2.1813440322875977
17,1000,403989,sgkit,8,70.5,2.82,11.096916198730469
16,1000,403989,bcftools,8,4.76,0.03,4.820271730422974
15,1000,403989,sgkit,2,73.38,3.17,37.98290753364563
14,1000,403989,bcftools,2,4.74,0.02,4.794331312179565
13,1000,403989,sgkit,1,75.28,4.23,75.7719886302948
12,1000,403989,bcftools,1,5.71,0.04,5.776134490966797
18,10000,863998,bcftools,1,86.59,0.24,86.89493989944458
19,10000,863998,sgkit,1,1599.32,55.18,1578.9084451198578
20,10000,863998,bcftools,2,86.37,0.21,86.6481101512909
21,10000,863998,sgkit,2,1301.26,41.34,664.7058627605438
22,10000,863998,bcftools,8,110.02,0.27,110.37974095344543
23,10000,863998,sgkit,8,1375.65,33.42,186.56210684776306
24,100000,2365367,bcftools,1,2239.48,1.99,2242.193691253662
25,100000,2365367,sgkit,1,42960.0,1454.64,42453.70330142975
26,100000,2365367,bcftools,2,2281.0,2.95,2284.997227668762
27,100000,2365367,sgkit,2,35033.74,1029.06,17835.4977581501
28,100000,2365367,bcftools,8,2367.76,4.39,2373.241059064865
29,100000,2365367,sgkit,8,37586.89,1239.03,5196.217045783997
0,10,116230,bcftools,1,0.15,0.0,0.17081141471862793
1,10,116230,sgkit,1,6.21,0.25,6.432373762130737
2,10,116230,bcftools,2,0.12,0.0,0.162766695022583
3,10,116230,sgkit,2,5.95,0.27,6.077243089675903
4,10,116230,bcftools,8,0.11,0.01,0.15416932106018066
5,10,116230,sgkit,8,6.14,0.32,6.261801481246948
6,100,204714,bcftools,1,0.44,0.01,0.4834921360015869
7,100,204714,sgkit,1,6.84,0.38,7.092373371124268
8,100,204714,bcftools,2,0.44,0.01,0.4980020523071289
9,100,204714,sgkit,2,6.68,0.35,6.690417528152466
10,100,204714,bcftools,8,0.45,0.0,0.48285841941833496
11,100,204714,sgkit,8,6.62,0.36,6.48983359336853
17,1000,403989,sgkit,8,11.38,0.87,7.161712408065796
16,1000,403989,bcftools,8,4.74,0.05,4.83846116065979
15,1000,403989,sgkit,2,11.26,0.87,8.980347871780396
14,1000,403989,bcftools,2,4.62,0.02,4.68326473236084
13,1000,403989,sgkit,1,12.4,0.85,12.790526390075684
12,1000,403989,bcftools,1,4.71,0.01,4.757795095443726
18,10000,863998,bcftools,1,85.8,0.46,86.35128688812256
19,10000,863998,sgkit,1,117.31,12.18,148.714905500412
20,10000,863998,bcftools,2,85.61,0.47,86.1492350101471
21,10000,863998,sgkit,2,89.96,10.8,54.31121015548706
22,10000,863998,bcftools,8,85.81,0.44,86.3404746055603
23,10000,863998,sgkit,8,100.98,12.18,21.27691149711609
24,100000,2365367,bcftools,1,2227.38,3.87,2232.036372423172
25,100000,2365367,sgkit,1,2604.82,357.28,3303.462653875351
26,100000,2365367,bcftools,2,2227.53,2.69,2230.9869668483734
27,100000,2365367,sgkit,2,2273.44,278.23,1295.5113973617554
28,100000,2365367,bcftools,8,2220.04,2.51,2223.291947364807
29,100000,2365367,sgkit,8,2422.59,265.88,374.46946597099304
11 changes: 8 additions & 3 deletions src/collect_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,21 @@ def get_prob_dist(ds, num_bins=10):
count = (a + b).astype(int)
return pd.DataFrame({"start": bins[:-1], "stop": bins[1:], "prob_dist": count[1:]})

def _sgkit_afdist_work(ds_path):
ds = sg.load_dataset(ds_path)
ds = sg.variant_stats(ds, merge=False).compute()
# print(ds)
df = get_prob_dist(ds)
# print(df)
return df

def sgkit_afdist_worker(ds_path, num_threads, debug, conn):
before = time.time()
with dask.distributed.Client(
processes=False, threads_per_worker=num_threads
) as client:
print(client)
ds = sg.load_dataset(ds_path)
ds = sg.variant_stats(ds)
df = get_prob_dist(ds)
df = _sgkit_afdist_work(ds_path)
wall_time = time.time() - before
cpu_times = psutil.Process().cpu_times()
print(cpu_times)
Expand Down

0 comments on commit 39baa0c

Please sign in to comment.