diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py index c3f10a19..cc7a1321 100644 --- a/pydeseq2/ds.py +++ b/pydeseq2/ds.py @@ -528,14 +528,13 @@ def _independent_filtering(self) -> None: if not U2.empty: result.loc[use, i] = false_discovery_control(U2, method="bh") num_rej = (result < self.alpha).sum(0).values - lowess_res = lowess(num_rej, theta, frac=1 / 5) + lowess_res = lowess(theta, num_rej, frac=1 / 5) if num_rej.max() <= 10: j = 0 else: - residual = num_rej[num_rej > 0] - lowess_res[num_rej > 0, 1] - thresh = lowess_res[:, 1].max() - np.sqrt(np.mean(residual**2)) - + residual = num_rej[num_rej > 0] - lowess_res[num_rej > 0] + thresh = lowess_res.max() - np.sqrt(np.mean(residual**2)) if np.any(num_rej > thresh): j = np.where(num_rej > thresh)[0][0] else: diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index 7e69edc9..ce54200a 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -1590,7 +1590,10 @@ def lowess( residuals = targets - yest s = np.median(np.abs(residuals)) - delta = np.clip(residuals / (6.0 * s), -1, 1) + if s == 0: + delta = (np.abs(residuals) > 0).astype(float) + else: + delta = np.clip(residuals / (6.0 * s), -1, 1) delta = (1 - delta**2) ** 2 return yest diff --git a/tests/data/wide/r_test_dispersions.csv b/tests/data/wide/r_test_dispersions.csv new file mode 100644 index 00000000..91739e86 --- /dev/null +++ b/tests/data/wide/r_test_dispersions.csv @@ -0,0 +1,51 @@ +"","x" +"gene1",0.748140934583972 +"gene2",0.234433387696388 +"gene3",0.860305419951616 +"gene4",0.138932924677425 +"gene5",0.148959676083603 +"gene6",0.525979541628815 +"gene7",0.275469070328401 +"gene8",0.220039173826647 +"gene9",0.250579424017925 +"gene10",0.276438297834654 +"gene11",0.19489057474872 +"gene12",0.373556459579741 +"gene13",0.592560793421874 +"gene14",4.92891121267816 +"gene15",0.160432599582938 +"gene16",0.293313990006312 +"gene17",0.352188111339015 +"gene18",0.197268864528868 +"gene19",0.181638573534229 +"gene20",0.23154307691956 +"gene21",0.209060712745133 +"gene22",0.268218357613483 +"gene23",0.309767019996308 +"gene24",11.7345118588127 +"gene25",0.26030298788413 +"gene26",0.401775836581255 +"gene27",0.603369619046339 +"gene28",1.61705691361112 +"gene29",0.491678110822878 +"gene30",0.298095159353663 +"gene31",0.167439314008087 +"gene32",0.256668645361674 +"gene33",0.258210546033555 +"gene34",0.427539360167968 +"gene35",1.21873166887581 +"gene36",0.752815913952672 +"gene37",0.590163103462473 +"gene38",0.566458068657173 +"gene39",0.151773154611373 +"gene40",0.148916590525509 +"gene41",0.441176385953598 +"gene42",0.484198416945829 +"gene43",0.216779029191911 +"gene44",0.317247504718874 +"gene45",0.603528873089713 +"gene46",0.741589104539421 +"gene47",0.27915739449809 +"gene48",0.209686577859526 +"gene49",0.525605773428821 +"gene50",0.265395637378768 diff --git a/tests/data/wide/r_test_res.csv b/tests/data/wide/r_test_res.csv new file mode 100644 index 00000000..65760c85 --- /dev/null +++ b/tests/data/wide/r_test_res.csv @@ -0,0 +1,51 @@ +"","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj" +"gene1",5.56443651611395,0.0294694778689073,0.507603054200333,0.058056147663124,0.953703904536815,0.971977293840831 +"gene2",15.6153493322782,0.0331823129287897,0.288353607784351,0.115075074606334,0.908385616219801,0.946235016895626 +"gene3",4.76939725520432,-0.808480964343957,0.549494887269052,-1.47131662746135,0.14120550866643,0.27154905512775 +"gene4",107.601806794673,0.0885626393325583,0.204014314733171,0.434100124044672,0.664215735425561,0.830123601687476 +"gene5",34.3944109994626,-0.230246958379629,0.224449414782295,-1.02583006778145,0.304971690602697,0.444596609665518 +"gene6",35.6998062134351,2.05313557049122,0.409238612022908,5.01696445587664,5.24942748169204e-07,5.24942748169204e-06 +"gene7",39.5876926638795,0.748827220517804,0.29034836816026,2.57906467758924,0.00990682360311656,0.0377119726870398 +"gene8",39.9369146594089,0.674685838207666,0.263863228227323,2.55695286812155,0.0105593523523711,0.0377119726870398 +"gene9",46.1088950092268,0.521221109396736,0.275819047677013,1.88972122769089,0.0587952535207517,0.154724351370399 +"gene10",21.2750614075596,1.71881791365222,0.308210940352263,5.57675828018218,2.45041987517039e-08,4.08403312528398e-07 +"gene11",300.196634621935,-0.322957436418014,0.236184775066428,-1.3673931197605,0.171502117745151,0.30625378168777 +"gene12",22.1669799793965,-0.600973618577076,0.341950908945528,-1.75748507418885,0.0788351637446882,0.19708790936172 +"gene13",18.7106512715424,1.33492511472204,0.427920184377527,3.11956566541466,0.00181117885937363,0.010062104774298 +"gene14",0.679279107589095,-0.441479443815586,1.33967509886243,-0.329542173464643,0.741745922589863,0.830123601687476 +"gene15",52.9341555700959,-0.141081031179019,0.223647469819094,-0.630818811825317,0.528159001743524,0.694946054925689 +"gene16",13.0644167554459,-0.41510911603736,0.321208438439663,-1.29233564987844,0.196240931163462,0.338346433040452 +"gene17",7.96299754721104,0.0608548298947286,0.369401860457523,0.16473882892565,0.869149553438392,0.924627184508928 +"gene18",100.763604726858,-0.61024491547402,0.242317018582851,-2.51837414905041,0.0117898008023857,0.0392993360079523 +"gene19",54.4149922635949,0.701217795517923,0.236513086101633,2.96481605764766,0.00302863959321155,0.0137665436055071 +"gene20",105.447062779537,-0.306059227252092,0.262823540400517,-1.16450462080257,0.244219571842174,0.381593081003397 +"gene21",57.0282899895038,-0.415734938516491,0.251459438233842,-1.65328826564022,0.0982722100062559,0.233981452395847 +"gene22",87.1065998786281,1.48053141560129,0.28235284575055,5.24355053573392,1.57515640157307e-07,1.96894550196634e-06 +"gene23",21.4404974695457,-0.476244141836472,0.315341093882065,-1.51025080801738,0.130979438641805,0.26195887728361 +"gene24",1.00134698336677,-0.610792421446532,1.89421925295355,-0.322450751408084,0.747111241518728,0.830123601687476 +"gene25",24.004469052763,-0.244001520388959,0.293988507285909,-0.829969588408649,0.406555978251274,0.564661080904547 +"gene26",19.6989207449776,0.728188091511614,0.355933223200233,2.04585591916483,0.0407705484059284,0.119913377664495 +"gene27",10.2257937384849,0.856952568510307,0.445623278039735,1.92304264777186,0.0544746940682856,0.151318594634127 +"gene28",2.86668295538467,-0.2687123708177,0.739514452917272,-0.363363244298567,0.716333554868076,0.830123601687476 +"gene29",6.50340033846079,-0.521641026318916,0.42413129203094,-1.22990459822253,0.218732832130881,0.364554720218135 +"gene30",19.7199945870775,0.0726982058791112,0.31250459770684,0.232630836194318,0.816048080317031,0.887008782953294 +"gene31",95.822626386748,0.261209143247732,0.222785728089372,1.17246802785745,0.241009201191656,0.381593081003397 +"gene32",10.3320075277629,-0.479963190441564,0.314401492551454,-1.52659323130603,0.126862191199356,0.26195887728361 +"gene33",56.8365687074748,0.447692168824197,0.278768381116038,1.60596466152969,0.108281674682339,0.246094715187134 +"gene34",6.96257191493728,-1.55307598955063,0.421501172259871,-3.68463029704957,0.000229034838823165,0.00163596313445118 +"gene35",3.39951861314589,-0.222361723767466,0.651263277613789,-0.341431386369877,0.7327788502323,0.830123601687476 +"gene36",7.21496778019806,-1.40714534760812,0.514001617625887,-2.73762824737315,0.00618839859675959,0.025784994153165 +"gene37",13.0959227343239,-0.438935578830093,0.43344367941264,-1.01267038759198,0.311217626765862,0.444596609665518 +"gene38",11.5315449367447,0.0149830412163643,0.426521226638346,0.0351284772728759,0.971977293840831,0.971977293840831 +"gene39",63.7469366951742,-0.309912048978196,0.216003397040393,-1.43475544007413,0.151356799805261,0.280290370009743 +"gene40",41.3895203624729,0.342502233510796,0.219412558944949,1.56099648606136,0.118524579970798,0.2576621303713 +"gene41",7.12138579746208,-1.49673137015901,0.418098678017581,-3.5798519556574,0.000343788923236412,0.00214868077022758 +"gene42",23.4882459512015,0.431122349617538,0.383771674370528,1.12338241305765,0.261275115836985,0.395871387631796 +"gene43",48.8738198412927,-1.5675101875832,0.261466004631887,-5.99508218970977,2.03382552447065e-09,5.08456381117663e-08 +"gene44",40.3388348631134,-0.213306616942372,0.308724256864447,-0.690929242518282,0.489610008204324,0.66163514622206 +"gene45",9.4311661862429,-1.02842479174934,0.462720616130752,-2.22256099230888,0.0262454169883623,0.082016928088632 +"gene46",3.55716622464779,-0.189506142078803,0.536487833473088,-0.35323474318511,0.723912463273255,0.830123601687476 +"gene47",55.6878809849337,2.70821823283829,0.301609739715236,8.97921345443037,2.72709247918183e-19,1.36354623959092e-17 +"gene48",41.0574430309621,0.147421736842092,0.255791763249164,0.576334964697399,0.564388793827548,0.723575376701985 +"gene49",6.16880050095403,-1.36830197810574,0.454982512846416,-3.00737267800801,0.00263516486173961,0.0131758243086981 +"gene50",28.4888150033072,-1.46348009850214,0.293834722004443,-4.98062342162547,6.33797656274349e-07,5.28164713561958e-06 diff --git a/tests/data/wide/r_test_size_factors.csv b/tests/data/wide/r_test_size_factors.csv new file mode 100644 index 00000000..26084b2b --- /dev/null +++ b/tests/data/wide/r_test_size_factors.csv @@ -0,0 +1,31 @@ +"","x" +"sample1",0.862940375136949 +"sample2",1.01893883237414 +"sample3",1.15044795647715 +"sample4",1.28622887425061 +"sample5",0.783315632771148 +"sample6",1.05202950644149 +"sample7",1.26953999364157 +"sample8",1.03700363305076 +"sample9",1.0722375490845 +"sample10",1.06737438774636 +"sample11",1.0447430054961 +"sample12",1.30411299116775 +"sample13",1.02654088756295 +"sample14",0.919076897143146 +"sample15",0.949895150907176 +"sample16",1.03053389737881 +"sample17",0.895536794347967 +"sample18",0.915952221873316 +"sample19",0.962496480053063 +"sample20",1.1596033819172 +"sample21",0.880796822178318 +"sample22",1.22648158969201 +"sample23",1.24541560411767 +"sample24",1.23244102238326 +"sample25",1.00348847241032 +"sample26",0.809709275581433 +"sample27",1.08342868220202 +"sample28",1.21366974823372 +"sample29",0.946588711724149 +"sample30",0.89963255442506 diff --git a/tests/data/wide/test_counts.csv b/tests/data/wide/test_counts.csv new file mode 100644 index 00000000..b842102a --- /dev/null +++ b/tests/data/wide/test_counts.csv @@ -0,0 +1,51 @@ +"","sample1","sample2","sample3","sample4","sample5","sample6","sample7","sample8","sample9","sample10","sample11","sample12","sample13","sample14","sample15","sample16","sample17","sample18","sample19","sample20","sample21","sample22","sample23","sample24","sample25","sample26","sample27","sample28","sample29","sample30" +"gene1",6,11,11,12,1,4,14,0,5,1,13,5,2,2,1,1,1,15,7,3,2,6,4,5,4,1,5,3,3,26 +"gene2",12,13,10,8,5,9,26,17,24,22,9,8,37,13,33,13,15,12,33,15,8,23,23,23,30,9,11,10,10,7 +"gene3",3,7,2,0,2,3,4,1,8,11,5,0,21,12,12,5,4,5,4,1,0,2,1,5,4,11,0,3,4,2 +"gene4",167,68,134,36,71,123,163,88,162,42,102,38,135,89,214,66,143,56,177,127,144,101,228,73,149,64,173,40,90,45 +"gene5",19,83,46,56,22,69,25,40,28,81,12,55,19,21,17,57,28,21,34,38,16,37,20,43,14,45,14,65,29,31 +"gene6",5,33,5,6,11,21,1,11,2,9,8,39,9,1,5,33,14,31,20,45,6,150,16,74,36,130,6,389,3,42 +"gene7",37,43,23,28,8,12,58,12,23,42,55,11,71,28,23,86,40,26,39,78,52,13,102,59,38,32,60,15,89,30 +"gene8",22,11,74,25,40,13,48,22,33,43,30,35,43,11,32,6,76,15,69,28,119,18,87,34,42,43,27,51,92,29 +"gene9",27,39,39,42,15,51,59,109,43,40,26,31,39,31,10,12,31,47,55,37,54,46,40,52,42,150,72,91,40,44 +"gene10",10,15,3,15,11,8,6,8,8,20,24,3,10,5,5,15,11,57,56,39,27,62,35,32,21,28,19,77,18,18 +"gene11",91,455,134,544,89,611,75,394,191,937,203,699,92,525,181,331,87,521,49,271,52,917,90,121,179,604,146,413,153,300 +"gene12",15,27,36,3,14,2,54,32,41,36,30,39,46,15,35,42,8,23,7,22,37,33,9,18,8,10,6,24,10,16 +"gene13",4,9,3,5,10,14,3,11,8,58,16,15,12,3,1,12,70,2,8,42,6,29,12,30,59,18,15,71,20,20 +"gene14",0,0,0,0,0,2,0,0,1,0,5,1,4,0,0,0,0,0,0,0,0,1,0,0,4,2,0,1,0,0 +"gene15",63,54,127,70,50,42,146,35,20,49,64,42,40,38,62,42,56,29,46,55,62,34,52,47,25,37,39,68,117,42 +"gene16",16,13,15,16,9,10,2,10,18,22,13,18,21,27,19,15,14,20,16,15,4,5,6,7,11,19,13,18,5,2 +"gene17",10,5,13,1,7,8,8,4,11,2,13,3,17,10,12,6,1,5,4,4,14,6,36,11,3,4,14,4,10,3 +"gene18",35,218,57,110,71,304,78,188,104,270,44,144,47,79,89,87,47,156,5,143,42,180,50,267,47,107,50,108,25,50 +"gene19",88,43,28,78,39,34,43,26,59,24,61,35,9,51,28,65,96,49,81,86,47,33,87,92,72,34,89,60,57,84 +"gene20",12,278,76,102,16,355,40,188,46,184,75,180,19,269,26,138,35,289,62,106,39,199,37,62,28,54,24,180,23,159 +"gene21",29,81,104,111,56,36,40,97,42,128,102,105,54,53,9,81,58,45,47,54,61,66,42,82,49,19,50,47,12,47 +"gene22",63,55,106,61,47,11,21,32,13,19,62,22,47,17,115,15,167,40,173,63,203,93,158,129,230,86,154,64,318,49 +"gene23",31,15,26,56,28,27,16,20,52,10,13,18,25,25,23,10,12,64,14,19,22,28,26,22,11,7,4,12,7,20 +"gene24",7,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4,0,15,0,0,0,0,0,2,0,0 +"gene25",35,6,27,12,56,8,89,19,28,8,33,32,24,15,33,20,14,14,16,11,69,7,28,21,11,6,26,23,39,6 +"gene26",7,29,17,10,10,4,22,42,17,11,11,40,3,7,11,43,14,26,45,19,11,29,14,4,69,33,14,23,13,12 +"gene27",5,8,3,2,21,0,3,2,15,0,17,4,3,16,14,17,19,21,2,5,7,19,21,7,22,10,8,6,23,5 +"gene28",7,1,2,0,1,1,6,3,6,0,2,5,3,11,0,6,1,10,0,3,9,0,0,2,6,0,0,1,0,0 +"gene29",8,2,10,22,1,1,11,10,6,2,11,10,16,13,2,6,1,9,2,5,2,2,9,10,15,1,5,6,6,5 +"gene30",34,24,38,9,28,5,21,21,21,17,44,8,3,8,25,13,13,15,34,24,37,6,27,28,11,5,18,22,16,27 +"gene31",45,64,95,87,139,54,116,39,84,129,110,128,62,42,184,59,86,68,113,72,204,93,146,66,54,119,113,120,161,81 +"gene32",6,3,11,21,8,10,13,9,5,9,8,16,27,13,32,5,6,8,5,12,6,5,13,6,13,11,13,9,13,6 +"gene33",8,42,82,69,30,76,22,81,68,51,3,58,25,95,18,56,25,81,80,89,37,177,23,78,33,108,31,139,33,68 +"gene34",6,11,5,12,16,5,8,4,9,1,48,6,10,7,14,1,1,3,3,1,4,7,10,3,4,1,6,1,7,1 +"gene35",0,0,2,4,4,5,6,5,3,15,0,1,10,0,1,1,3,0,3,1,0,1,1,6,6,4,1,10,0,13 +"gene36",22,8,5,11,7,30,0,14,12,4,11,4,0,21,0,19,2,5,0,1,2,5,3,13,2,5,2,6,1,5 +"gene37",18,11,2,61,4,1,9,1,9,17,9,52,20,33,4,8,0,22,6,15,18,33,4,9,8,5,8,10,15,10 +"gene38",22,19,7,0,28,9,11,6,7,12,2,7,13,16,10,13,18,2,22,12,7,2,23,10,3,24,6,2,10,15 +"gene39",98,119,53,82,37,64,78,93,47,69,54,84,122,65,37,88,48,124,49,68,53,64,58,47,39,43,77,26,40,47 +"gene40",40,27,25,49,34,42,23,15,48,31,55,46,55,50,30,39,16,48,68,81,23,58,44,65,65,35,60,29,41,46 +"gene41",2,6,25,7,7,8,8,2,5,17,17,10,5,17,26,1,7,2,6,5,8,1,3,0,6,4,5,3,5,1 +"gene42",3,11,4,39,8,28,24,28,33,44,7,31,46,9,7,74,29,77,17,17,6,8,7,16,57,19,19,12,15,30 +"gene43",28,129,70,132,68,155,58,70,35,46,28,47,66,81,106,46,22,34,9,36,17,59,11,18,13,20,27,20,12,58 +"gene44",36,45,53,63,23,94,33,43,86,30,16,75,24,39,32,6,43,27,33,76,12,84,82,112,51,23,15,15,10,22 +"gene45",3,1,2,42,16,5,9,33,1,19,11,4,3,30,3,9,0,5,4,18,1,8,3,9,2,28,2,6,2,11 +"gene46",3,2,4,2,1,5,1,6,6,5,1,19,5,2,0,0,4,4,0,4,7,1,0,4,3,9,3,5,1,4 +"gene47",26,15,18,10,33,10,8,8,6,8,10,8,28,8,19,46,294,93,131,25,181,59,57,21,118,58,128,56,124,17 +"gene48",37,71,29,25,64,31,64,26,34,18,46,33,48,29,45,31,40,28,30,30,88,16,54,78,29,26,81,10,72,38 +"gene49",6,17,9,16,8,3,18,1,29,4,9,3,2,6,8,4,4,0,2,1,6,2,5,0,5,0,12,2,8,4 +"gene50",86,64,89,81,11,36,22,31,47,30,51,32,9,32,37,14,21,21,9,10,16,13,17,11,36,15,13,10,17,6 diff --git a/tests/data/wide/test_metadata.csv b/tests/data/wide/test_metadata.csv new file mode 100644 index 00000000..a302d4e9 --- /dev/null +++ b/tests/data/wide/test_metadata.csv @@ -0,0 +1,31 @@ +"","condition","group" +"sample1","A","X" +"sample2","A","Y" +"sample3","A","X" +"sample4","A","Y" +"sample5","A","X" +"sample6","A","Y" +"sample7","A","X" +"sample8","A","Y" +"sample9","A","X" +"sample10","A","Y" +"sample11","A","X" +"sample12","A","Y" +"sample13","A","X" +"sample14","A","Y" +"sample15","A","X" +"sample16","B","Y" +"sample17","B","X" +"sample18","B","Y" +"sample19","B","X" +"sample20","B","Y" +"sample21","B","X" +"sample22","B","Y" +"sample23","B","X" +"sample24","B","Y" +"sample25","B","X" +"sample26","B","Y" +"sample27","B","X" +"sample28","B","Y" +"sample29","B","X" +"sample30","B","Y" diff --git a/tests/test_pydeseq2.py b/tests/test_pydeseq2.py index b58db574..900ede6f 100644 --- a/tests/test_pydeseq2.py +++ b/tests/test_pydeseq2.py @@ -173,7 +173,8 @@ def test_lfc_shrinkage(counts_df, metadata, tol=0.02): ).squeeze() r_dispersions = pd.read_csv( - os.path.join(test_path, "data/single_factor/r_test_dispersions.csv"), index_col=0 + os.path.join(test_path, "data/single_factor/r_test_dispersions.csv"), + index_col=0, ).squeeze() dds = DeseqDataSet(counts=counts_df, metadata=metadata, design_factors="condition") @@ -426,6 +427,50 @@ def test_continuous_lfc_shrinkage(tol=0.02): ).max() < tol +def test_wide_deseq( + tol=0.02, +): + """Test that the outputs of the DESeq2 function match those of the original R + package, up to a tolerance in relative error, on a dataset with more genes than + samples. + """ + + test_path = str(Path(os.path.realpath(tests.__file__)).parent.resolve()) + + counts_df = pd.read_csv( + os.path.join(test_path, "data/wide/test_counts.csv"), index_col=0 + ).T + + metadata = pd.read_csv( + os.path.join(test_path, "data/wide/test_metadata.csv"), index_col=0 + ) + + # Load R results + r_res = pd.read_csv(os.path.join(test_path, "data/wide/r_test_res.csv"), index_col=0) + + dds = DeseqDataSet( + counts=counts_df, + metadata=metadata, + design_factors=["group", "condition"], + ) + dds.deseq2() + + res = DeseqStats(dds) + res.summary() + res_df = res.results_df + + # check that the same p-values are NaN + assert (res_df.pvalue.isna() == r_res.pvalue.isna()).all() + assert (res_df.padj.isna() == r_res.padj.isna()).all() + + # Check that the same LFC, p-values and adjusted p-values are found (up to tol) + assert ( + abs(r_res.log2FoldChange - res_df.log2FoldChange) / abs(r_res.log2FoldChange) + ).max() < tol + assert (abs(r_res.pvalue - res_df.pvalue) / r_res.pvalue).max() < tol + assert (abs(r_res.padj - res_df.padj) / r_res.padj).max() < tol + + def test_contrast(counts_df, metadata): """ Check that the contrasts ['condition', 'B', 'A'] and ['condition', 'A', 'B'] give