Skip to content

Commit

Permalink
BUG fix independent filtering (#243)
Browse files Browse the repository at this point in the history
* test: add test case with 50x100 dataset

* fix: remove extra dimension in lowess results and put arguments in the correct order

* test: update wide dataset test case

* fix: handle the median == 0 case in lowess
  • Loading branch information
BorisMuzellec authored Feb 28, 2024
1 parent a07a34f commit 62638e8
Show file tree
Hide file tree
Showing 8 changed files with 268 additions and 6 deletions.
7 changes: 3 additions & 4 deletions pydeseq2/ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,14 +528,13 @@ def _independent_filtering(self) -> None:
if not U2.empty:
result.loc[use, i] = false_discovery_control(U2, method="bh")
num_rej = (result < self.alpha).sum(0).values
lowess_res = lowess(num_rej, theta, frac=1 / 5)
lowess_res = lowess(theta, num_rej, frac=1 / 5)

if num_rej.max() <= 10:
j = 0
else:
residual = num_rej[num_rej > 0] - lowess_res[num_rej > 0, 1]
thresh = lowess_res[:, 1].max() - np.sqrt(np.mean(residual**2))

residual = num_rej[num_rej > 0] - lowess_res[num_rej > 0]
thresh = lowess_res.max() - np.sqrt(np.mean(residual**2))
if np.any(num_rej > thresh):
j = np.where(num_rej > thresh)[0][0]
else:
Expand Down
5 changes: 4 additions & 1 deletion pydeseq2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1590,7 +1590,10 @@ def lowess(

residuals = targets - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
if s == 0:
delta = (np.abs(residuals) > 0).astype(float)
else:
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta**2) ** 2

return yest
51 changes: 51 additions & 0 deletions tests/data/wide/r_test_dispersions.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"","x"
"gene1",0.748140934583972
"gene2",0.234433387696388
"gene3",0.860305419951616
"gene4",0.138932924677425
"gene5",0.148959676083603
"gene6",0.525979541628815
"gene7",0.275469070328401
"gene8",0.220039173826647
"gene9",0.250579424017925
"gene10",0.276438297834654
"gene11",0.19489057474872
"gene12",0.373556459579741
"gene13",0.592560793421874
"gene14",4.92891121267816
"gene15",0.160432599582938
"gene16",0.293313990006312
"gene17",0.352188111339015
"gene18",0.197268864528868
"gene19",0.181638573534229
"gene20",0.23154307691956
"gene21",0.209060712745133
"gene22",0.268218357613483
"gene23",0.309767019996308
"gene24",11.7345118588127
"gene25",0.26030298788413
"gene26",0.401775836581255
"gene27",0.603369619046339
"gene28",1.61705691361112
"gene29",0.491678110822878
"gene30",0.298095159353663
"gene31",0.167439314008087
"gene32",0.256668645361674
"gene33",0.258210546033555
"gene34",0.427539360167968
"gene35",1.21873166887581
"gene36",0.752815913952672
"gene37",0.590163103462473
"gene38",0.566458068657173
"gene39",0.151773154611373
"gene40",0.148916590525509
"gene41",0.441176385953598
"gene42",0.484198416945829
"gene43",0.216779029191911
"gene44",0.317247504718874
"gene45",0.603528873089713
"gene46",0.741589104539421
"gene47",0.27915739449809
"gene48",0.209686577859526
"gene49",0.525605773428821
"gene50",0.265395637378768
51 changes: 51 additions & 0 deletions tests/data/wide/r_test_res.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj"
"gene1",5.56443651611395,0.0294694778689073,0.507603054200333,0.058056147663124,0.953703904536815,0.971977293840831
"gene2",15.6153493322782,0.0331823129287897,0.288353607784351,0.115075074606334,0.908385616219801,0.946235016895626
"gene3",4.76939725520432,-0.808480964343957,0.549494887269052,-1.47131662746135,0.14120550866643,0.27154905512775
"gene4",107.601806794673,0.0885626393325583,0.204014314733171,0.434100124044672,0.664215735425561,0.830123601687476
"gene5",34.3944109994626,-0.230246958379629,0.224449414782295,-1.02583006778145,0.304971690602697,0.444596609665518
"gene6",35.6998062134351,2.05313557049122,0.409238612022908,5.01696445587664,5.24942748169204e-07,5.24942748169204e-06
"gene7",39.5876926638795,0.748827220517804,0.29034836816026,2.57906467758924,0.00990682360311656,0.0377119726870398
"gene8",39.9369146594089,0.674685838207666,0.263863228227323,2.55695286812155,0.0105593523523711,0.0377119726870398
"gene9",46.1088950092268,0.521221109396736,0.275819047677013,1.88972122769089,0.0587952535207517,0.154724351370399
"gene10",21.2750614075596,1.71881791365222,0.308210940352263,5.57675828018218,2.45041987517039e-08,4.08403312528398e-07
"gene11",300.196634621935,-0.322957436418014,0.236184775066428,-1.3673931197605,0.171502117745151,0.30625378168777
"gene12",22.1669799793965,-0.600973618577076,0.341950908945528,-1.75748507418885,0.0788351637446882,0.19708790936172
"gene13",18.7106512715424,1.33492511472204,0.427920184377527,3.11956566541466,0.00181117885937363,0.010062104774298
"gene14",0.679279107589095,-0.441479443815586,1.33967509886243,-0.329542173464643,0.741745922589863,0.830123601687476
"gene15",52.9341555700959,-0.141081031179019,0.223647469819094,-0.630818811825317,0.528159001743524,0.694946054925689
"gene16",13.0644167554459,-0.41510911603736,0.321208438439663,-1.29233564987844,0.196240931163462,0.338346433040452
"gene17",7.96299754721104,0.0608548298947286,0.369401860457523,0.16473882892565,0.869149553438392,0.924627184508928
"gene18",100.763604726858,-0.61024491547402,0.242317018582851,-2.51837414905041,0.0117898008023857,0.0392993360079523
"gene19",54.4149922635949,0.701217795517923,0.236513086101633,2.96481605764766,0.00302863959321155,0.0137665436055071
"gene20",105.447062779537,-0.306059227252092,0.262823540400517,-1.16450462080257,0.244219571842174,0.381593081003397
"gene21",57.0282899895038,-0.415734938516491,0.251459438233842,-1.65328826564022,0.0982722100062559,0.233981452395847
"gene22",87.1065998786281,1.48053141560129,0.28235284575055,5.24355053573392,1.57515640157307e-07,1.96894550196634e-06
"gene23",21.4404974695457,-0.476244141836472,0.315341093882065,-1.51025080801738,0.130979438641805,0.26195887728361
"gene24",1.00134698336677,-0.610792421446532,1.89421925295355,-0.322450751408084,0.747111241518728,0.830123601687476
"gene25",24.004469052763,-0.244001520388959,0.293988507285909,-0.829969588408649,0.406555978251274,0.564661080904547
"gene26",19.6989207449776,0.728188091511614,0.355933223200233,2.04585591916483,0.0407705484059284,0.119913377664495
"gene27",10.2257937384849,0.856952568510307,0.445623278039735,1.92304264777186,0.0544746940682856,0.151318594634127
"gene28",2.86668295538467,-0.2687123708177,0.739514452917272,-0.363363244298567,0.716333554868076,0.830123601687476
"gene29",6.50340033846079,-0.521641026318916,0.42413129203094,-1.22990459822253,0.218732832130881,0.364554720218135
"gene30",19.7199945870775,0.0726982058791112,0.31250459770684,0.232630836194318,0.816048080317031,0.887008782953294
"gene31",95.822626386748,0.261209143247732,0.222785728089372,1.17246802785745,0.241009201191656,0.381593081003397
"gene32",10.3320075277629,-0.479963190441564,0.314401492551454,-1.52659323130603,0.126862191199356,0.26195887728361
"gene33",56.8365687074748,0.447692168824197,0.278768381116038,1.60596466152969,0.108281674682339,0.246094715187134
"gene34",6.96257191493728,-1.55307598955063,0.421501172259871,-3.68463029704957,0.000229034838823165,0.00163596313445118
"gene35",3.39951861314589,-0.222361723767466,0.651263277613789,-0.341431386369877,0.7327788502323,0.830123601687476
"gene36",7.21496778019806,-1.40714534760812,0.514001617625887,-2.73762824737315,0.00618839859675959,0.025784994153165
"gene37",13.0959227343239,-0.438935578830093,0.43344367941264,-1.01267038759198,0.311217626765862,0.444596609665518
"gene38",11.5315449367447,0.0149830412163643,0.426521226638346,0.0351284772728759,0.971977293840831,0.971977293840831
"gene39",63.7469366951742,-0.309912048978196,0.216003397040393,-1.43475544007413,0.151356799805261,0.280290370009743
"gene40",41.3895203624729,0.342502233510796,0.219412558944949,1.56099648606136,0.118524579970798,0.2576621303713
"gene41",7.12138579746208,-1.49673137015901,0.418098678017581,-3.5798519556574,0.000343788923236412,0.00214868077022758
"gene42",23.4882459512015,0.431122349617538,0.383771674370528,1.12338241305765,0.261275115836985,0.395871387631796
"gene43",48.8738198412927,-1.5675101875832,0.261466004631887,-5.99508218970977,2.03382552447065e-09,5.08456381117663e-08
"gene44",40.3388348631134,-0.213306616942372,0.308724256864447,-0.690929242518282,0.489610008204324,0.66163514622206
"gene45",9.4311661862429,-1.02842479174934,0.462720616130752,-2.22256099230888,0.0262454169883623,0.082016928088632
"gene46",3.55716622464779,-0.189506142078803,0.536487833473088,-0.35323474318511,0.723912463273255,0.830123601687476
"gene47",55.6878809849337,2.70821823283829,0.301609739715236,8.97921345443037,2.72709247918183e-19,1.36354623959092e-17
"gene48",41.0574430309621,0.147421736842092,0.255791763249164,0.576334964697399,0.564388793827548,0.723575376701985
"gene49",6.16880050095403,-1.36830197810574,0.454982512846416,-3.00737267800801,0.00263516486173961,0.0131758243086981
"gene50",28.4888150033072,-1.46348009850214,0.293834722004443,-4.98062342162547,6.33797656274349e-07,5.28164713561958e-06
31 changes: 31 additions & 0 deletions tests/data/wide/r_test_size_factors.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"","x"
"sample1",0.862940375136949
"sample2",1.01893883237414
"sample3",1.15044795647715
"sample4",1.28622887425061
"sample5",0.783315632771148
"sample6",1.05202950644149
"sample7",1.26953999364157
"sample8",1.03700363305076
"sample9",1.0722375490845
"sample10",1.06737438774636
"sample11",1.0447430054961
"sample12",1.30411299116775
"sample13",1.02654088756295
"sample14",0.919076897143146
"sample15",0.949895150907176
"sample16",1.03053389737881
"sample17",0.895536794347967
"sample18",0.915952221873316
"sample19",0.962496480053063
"sample20",1.1596033819172
"sample21",0.880796822178318
"sample22",1.22648158969201
"sample23",1.24541560411767
"sample24",1.23244102238326
"sample25",1.00348847241032
"sample26",0.809709275581433
"sample27",1.08342868220202
"sample28",1.21366974823372
"sample29",0.946588711724149
"sample30",0.89963255442506
51 changes: 51 additions & 0 deletions tests/data/wide/test_counts.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"","sample1","sample2","sample3","sample4","sample5","sample6","sample7","sample8","sample9","sample10","sample11","sample12","sample13","sample14","sample15","sample16","sample17","sample18","sample19","sample20","sample21","sample22","sample23","sample24","sample25","sample26","sample27","sample28","sample29","sample30"
"gene1",6,11,11,12,1,4,14,0,5,1,13,5,2,2,1,1,1,15,7,3,2,6,4,5,4,1,5,3,3,26
"gene2",12,13,10,8,5,9,26,17,24,22,9,8,37,13,33,13,15,12,33,15,8,23,23,23,30,9,11,10,10,7
"gene3",3,7,2,0,2,3,4,1,8,11,5,0,21,12,12,5,4,5,4,1,0,2,1,5,4,11,0,3,4,2
"gene4",167,68,134,36,71,123,163,88,162,42,102,38,135,89,214,66,143,56,177,127,144,101,228,73,149,64,173,40,90,45
"gene5",19,83,46,56,22,69,25,40,28,81,12,55,19,21,17,57,28,21,34,38,16,37,20,43,14,45,14,65,29,31
"gene6",5,33,5,6,11,21,1,11,2,9,8,39,9,1,5,33,14,31,20,45,6,150,16,74,36,130,6,389,3,42
"gene7",37,43,23,28,8,12,58,12,23,42,55,11,71,28,23,86,40,26,39,78,52,13,102,59,38,32,60,15,89,30
"gene8",22,11,74,25,40,13,48,22,33,43,30,35,43,11,32,6,76,15,69,28,119,18,87,34,42,43,27,51,92,29
"gene9",27,39,39,42,15,51,59,109,43,40,26,31,39,31,10,12,31,47,55,37,54,46,40,52,42,150,72,91,40,44
"gene10",10,15,3,15,11,8,6,8,8,20,24,3,10,5,5,15,11,57,56,39,27,62,35,32,21,28,19,77,18,18
"gene11",91,455,134,544,89,611,75,394,191,937,203,699,92,525,181,331,87,521,49,271,52,917,90,121,179,604,146,413,153,300
"gene12",15,27,36,3,14,2,54,32,41,36,30,39,46,15,35,42,8,23,7,22,37,33,9,18,8,10,6,24,10,16
"gene13",4,9,3,5,10,14,3,11,8,58,16,15,12,3,1,12,70,2,8,42,6,29,12,30,59,18,15,71,20,20
"gene14",0,0,0,0,0,2,0,0,1,0,5,1,4,0,0,0,0,0,0,0,0,1,0,0,4,2,0,1,0,0
"gene15",63,54,127,70,50,42,146,35,20,49,64,42,40,38,62,42,56,29,46,55,62,34,52,47,25,37,39,68,117,42
"gene16",16,13,15,16,9,10,2,10,18,22,13,18,21,27,19,15,14,20,16,15,4,5,6,7,11,19,13,18,5,2
"gene17",10,5,13,1,7,8,8,4,11,2,13,3,17,10,12,6,1,5,4,4,14,6,36,11,3,4,14,4,10,3
"gene18",35,218,57,110,71,304,78,188,104,270,44,144,47,79,89,87,47,156,5,143,42,180,50,267,47,107,50,108,25,50
"gene19",88,43,28,78,39,34,43,26,59,24,61,35,9,51,28,65,96,49,81,86,47,33,87,92,72,34,89,60,57,84
"gene20",12,278,76,102,16,355,40,188,46,184,75,180,19,269,26,138,35,289,62,106,39,199,37,62,28,54,24,180,23,159
"gene21",29,81,104,111,56,36,40,97,42,128,102,105,54,53,9,81,58,45,47,54,61,66,42,82,49,19,50,47,12,47
"gene22",63,55,106,61,47,11,21,32,13,19,62,22,47,17,115,15,167,40,173,63,203,93,158,129,230,86,154,64,318,49
"gene23",31,15,26,56,28,27,16,20,52,10,13,18,25,25,23,10,12,64,14,19,22,28,26,22,11,7,4,12,7,20
"gene24",7,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4,0,15,0,0,0,0,0,2,0,0
"gene25",35,6,27,12,56,8,89,19,28,8,33,32,24,15,33,20,14,14,16,11,69,7,28,21,11,6,26,23,39,6
"gene26",7,29,17,10,10,4,22,42,17,11,11,40,3,7,11,43,14,26,45,19,11,29,14,4,69,33,14,23,13,12
"gene27",5,8,3,2,21,0,3,2,15,0,17,4,3,16,14,17,19,21,2,5,7,19,21,7,22,10,8,6,23,5
"gene28",7,1,2,0,1,1,6,3,6,0,2,5,3,11,0,6,1,10,0,3,9,0,0,2,6,0,0,1,0,0
"gene29",8,2,10,22,1,1,11,10,6,2,11,10,16,13,2,6,1,9,2,5,2,2,9,10,15,1,5,6,6,5
"gene30",34,24,38,9,28,5,21,21,21,17,44,8,3,8,25,13,13,15,34,24,37,6,27,28,11,5,18,22,16,27
"gene31",45,64,95,87,139,54,116,39,84,129,110,128,62,42,184,59,86,68,113,72,204,93,146,66,54,119,113,120,161,81
"gene32",6,3,11,21,8,10,13,9,5,9,8,16,27,13,32,5,6,8,5,12,6,5,13,6,13,11,13,9,13,6
"gene33",8,42,82,69,30,76,22,81,68,51,3,58,25,95,18,56,25,81,80,89,37,177,23,78,33,108,31,139,33,68
"gene34",6,11,5,12,16,5,8,4,9,1,48,6,10,7,14,1,1,3,3,1,4,7,10,3,4,1,6,1,7,1
"gene35",0,0,2,4,4,5,6,5,3,15,0,1,10,0,1,1,3,0,3,1,0,1,1,6,6,4,1,10,0,13
"gene36",22,8,5,11,7,30,0,14,12,4,11,4,0,21,0,19,2,5,0,1,2,5,3,13,2,5,2,6,1,5
"gene37",18,11,2,61,4,1,9,1,9,17,9,52,20,33,4,8,0,22,6,15,18,33,4,9,8,5,8,10,15,10
"gene38",22,19,7,0,28,9,11,6,7,12,2,7,13,16,10,13,18,2,22,12,7,2,23,10,3,24,6,2,10,15
"gene39",98,119,53,82,37,64,78,93,47,69,54,84,122,65,37,88,48,124,49,68,53,64,58,47,39,43,77,26,40,47
"gene40",40,27,25,49,34,42,23,15,48,31,55,46,55,50,30,39,16,48,68,81,23,58,44,65,65,35,60,29,41,46
"gene41",2,6,25,7,7,8,8,2,5,17,17,10,5,17,26,1,7,2,6,5,8,1,3,0,6,4,5,3,5,1
"gene42",3,11,4,39,8,28,24,28,33,44,7,31,46,9,7,74,29,77,17,17,6,8,7,16,57,19,19,12,15,30
"gene43",28,129,70,132,68,155,58,70,35,46,28,47,66,81,106,46,22,34,9,36,17,59,11,18,13,20,27,20,12,58
"gene44",36,45,53,63,23,94,33,43,86,30,16,75,24,39,32,6,43,27,33,76,12,84,82,112,51,23,15,15,10,22
"gene45",3,1,2,42,16,5,9,33,1,19,11,4,3,30,3,9,0,5,4,18,1,8,3,9,2,28,2,6,2,11
"gene46",3,2,4,2,1,5,1,6,6,5,1,19,5,2,0,0,4,4,0,4,7,1,0,4,3,9,3,5,1,4
"gene47",26,15,18,10,33,10,8,8,6,8,10,8,28,8,19,46,294,93,131,25,181,59,57,21,118,58,128,56,124,17
"gene48",37,71,29,25,64,31,64,26,34,18,46,33,48,29,45,31,40,28,30,30,88,16,54,78,29,26,81,10,72,38
"gene49",6,17,9,16,8,3,18,1,29,4,9,3,2,6,8,4,4,0,2,1,6,2,5,0,5,0,12,2,8,4
"gene50",86,64,89,81,11,36,22,31,47,30,51,32,9,32,37,14,21,21,9,10,16,13,17,11,36,15,13,10,17,6
31 changes: 31 additions & 0 deletions tests/data/wide/test_metadata.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"","condition","group"
"sample1","A","X"
"sample2","A","Y"
"sample3","A","X"
"sample4","A","Y"
"sample5","A","X"
"sample6","A","Y"
"sample7","A","X"
"sample8","A","Y"
"sample9","A","X"
"sample10","A","Y"
"sample11","A","X"
"sample12","A","Y"
"sample13","A","X"
"sample14","A","Y"
"sample15","A","X"
"sample16","B","Y"
"sample17","B","X"
"sample18","B","Y"
"sample19","B","X"
"sample20","B","Y"
"sample21","B","X"
"sample22","B","Y"
"sample23","B","X"
"sample24","B","Y"
"sample25","B","X"
"sample26","B","Y"
"sample27","B","X"
"sample28","B","Y"
"sample29","B","X"
"sample30","B","Y"
47 changes: 46 additions & 1 deletion tests/test_pydeseq2.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ def test_lfc_shrinkage(counts_df, metadata, tol=0.02):
).squeeze()

r_dispersions = pd.read_csv(
os.path.join(test_path, "data/single_factor/r_test_dispersions.csv"), index_col=0
os.path.join(test_path, "data/single_factor/r_test_dispersions.csv"),
index_col=0,
).squeeze()

dds = DeseqDataSet(counts=counts_df, metadata=metadata, design_factors="condition")
Expand Down Expand Up @@ -426,6 +427,50 @@ def test_continuous_lfc_shrinkage(tol=0.02):
).max() < tol


def test_wide_deseq(
tol=0.02,
):
"""Test that the outputs of the DESeq2 function match those of the original R
package, up to a tolerance in relative error, on a dataset with more genes than
samples.
"""

test_path = str(Path(os.path.realpath(tests.__file__)).parent.resolve())

counts_df = pd.read_csv(
os.path.join(test_path, "data/wide/test_counts.csv"), index_col=0
).T

metadata = pd.read_csv(
os.path.join(test_path, "data/wide/test_metadata.csv"), index_col=0
)

# Load R results
r_res = pd.read_csv(os.path.join(test_path, "data/wide/r_test_res.csv"), index_col=0)

dds = DeseqDataSet(
counts=counts_df,
metadata=metadata,
design_factors=["group", "condition"],
)
dds.deseq2()

res = DeseqStats(dds)
res.summary()
res_df = res.results_df

# check that the same p-values are NaN
assert (res_df.pvalue.isna() == r_res.pvalue.isna()).all()
assert (res_df.padj.isna() == r_res.padj.isna()).all()

# Check that the same LFC, p-values and adjusted p-values are found (up to tol)
assert (
abs(r_res.log2FoldChange - res_df.log2FoldChange) / abs(r_res.log2FoldChange)
).max() < tol
assert (abs(r_res.pvalue - res_df.pvalue) / r_res.pvalue).max() < tol
assert (abs(r_res.padj - res_df.padj) / r_res.padj).max() < tol


def test_contrast(counts_df, metadata):
"""
Check that the contrasts ['condition', 'B', 'A'] and ['condition', 'A', 'B'] give
Expand Down

0 comments on commit 62638e8

Please sign in to comment.