From c5f69b3b456d588407ae98dca7fe3a6e08c62b87 Mon Sep 17 00:00:00 2001 From: Changrong You Date: Wed, 8 Jan 2025 22:47:15 +0800 Subject: [PATCH 1/6] remove_unused_fragments bug fix --- alphabase/peptide/fragment.py | 4 +- .../mini_sample_remove_unused_fragments.hdf | Bin 0 -> 190304 bytes tests/test_remove_unused_fragments.py | 92 ++++++++++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 test_data/unit_tests/input_hdf_formats/mini_sample_remove_unused_fragments.hdf create mode 100644 tests/test_remove_unused_fragments.py diff --git a/alphabase/peptide/fragment.py b/alphabase/peptide/fragment.py index 2eb4a6d9..1909f5cb 100644 --- a/alphabase/peptide/fragment.py +++ b/alphabase/peptide/fragment.py @@ -928,13 +928,13 @@ def remove_unused_fragments( returns the reindexed precursor DataFrame and the sliced fragment DataFrames """ - precursor_df = precursor_df.sort_values([frag_start_col], ascending=True) + frag_idx = precursor_df[[frag_start_col, frag_stop_col]].values new_frag_idx, fragment_pointer = compress_fragment_indices(frag_idx) precursor_df[[frag_start_col, frag_stop_col]] = new_frag_idx - precursor_df = precursor_df.sort_index() + precursor_df = precursor_df.reset_index(drop=True) output_tuple = [] diff --git a/test_data/unit_tests/input_hdf_formats/mini_sample_remove_unused_fragments.hdf b/test_data/unit_tests/input_hdf_formats/mini_sample_remove_unused_fragments.hdf new file mode 100644 index 0000000000000000000000000000000000000000..978a524d91d3cf73b1e089ed3a3f7a09a004d4a3 GIT binary patch literal 190304 zcmeI531AdO*2n)nJy$N0kPvQCGYJqt1pWK|w_1aMzQsdb%qL%g7$=eiC0R)!kLEURC|7X3{h7 z&8x@Tr>2Dk*9eAsSgkN3!hb1}%lhH^!r;#m_oCdTUr73Sf_|Q_i9_=L2UOGZBX#{( z%XRJIbb`wY{o<@wgU_U0Sw#k&|5OCpI~`f-fP3hp7OQltzRm=Ug1)8xOR*l?i-Ldi zEJ5b0^4Iknp|9Tri>9zh*L%J7`=w12r1h%<`;(onUcU+Y_$TP^YXZm6(!sd}eSy9u zdHn_#4k^p+KcuX1NNNAF5&mrdT2y75hzi#^=XC0vs=fz{b;o3_Liy`Mzc0F~AJ>no}N>S6Y@kY-mAVSz&=%P+gMz?-=U)mBw6~3Bh0eyOUzO7x#1~0z`la5CI}U1c(3;AOb{y2oM1xKm`7H1OnLs)2C?P1Qh9A z*6e_CKs&Y)uL&mT^0!{lE?oNkxnY{V8Tvh)Di#4@2XyIQAkEi@H>FH`qoifz7v+`o zD+CG(^NUCDEY%*ETUwS^!YzUY!^`iBhibRzKcuv*WSD9()anl|F39Cp!$Qp(4rq2j zL%DZH`h2ZD(Q2`NKlwE+N3&9K+f=NkPsnBI_d{)3{+}(CVAkw-IuJ?sY>~K~Pd#es zK!pE_%!#Wd5ge$-RPjrg_??<>kPZHdAyEAHlJ3u7@Rv&{EgIIh@1R1xF0b&gT2IEH zk=#hk44Gi zJKF}5(DGHgsCrcEuAKv=33|a3Ljy_N9Qo?s4b@IB){Qt<4bc6S`W%R2vF^$zS`^Tk z2oM1xKm>>Y5vVW(4E({-1;}2D4?zZl0lt;$^COw5E6fbjK!^YlAOb{y2oM1xKm>>Y z5g-CYfCvO6@Sm8cRCA876#c8_DXlKA9>6^Ht9e2*>~Qncr|tah`7LJU)o+A(s#?0H zSXCOxtP-S&{6iY9*A)e#-{l>Y z5g-CYfCvx)B0vO)01+Spzf0ghF{J#Df9I3XLmSBr|DDehE=}?88B&@Dpx8sND~g37 zg?^tRh|rk`5CI}U1c*SzBTy$JA^Sw+gKq~9nK(VZhEnb2TGcu=UD4;5bHHOCwB-gC z1#S}q|LwEH49x2DkjVo5nqRa!-|t4<%SHX?udnX_^K0NREjt7W{);R3wL$s5Hd??il<#NroBeAd zCaL|>w$u^Z#@vwoYm{l4;@bYxmA^;{6fo>XE~|}Gt1D9FWpSmQh0UeB!&es59T6Y` zM1Tko0U}TV2n5=2#iWldbJ*Zwbq_{;TR&W#FY9tw=d1hrsmIm%;$EDEezPY25Av1( z`QoYPYCR$_>eczd0rZ3f$PW#W9~L0LQh@yM0Quti>gs+*29U2DAU`TVesqBRDgp9i z0_2PR`>yV%*x&H#{OSSp)CiDYGeEw04!v5x_RP!oRn+TsFDzhp>Y5%`l5FsUD~lxfV!xS@i{FfrL1 zj+!jQCx@fD&m4{#Y7(O+Rn?@LnuM!KgqlRENo6&OQj=&ksiG#5ni$kXRudyn93hIl zJ{D%5ITjW*)2b#xY7(p_A!-t;CSg30)sNJFVDrv5b9w*xp?*>opXP6fc_(&}UoZX@ z^Uii&>Y5%^sK{?FsF zi^|PAk}&Tm;AWocEEEQRP7T%I(-<)<+$YEW*3iSa{f0|)MuaNSS-1D7VvP}{ouXMd z*VY+TSSYWoguuw6ys^qcSbTK$J-;Zgq+cOWP?%plf@k(E$?KO}T9#K*mfOEz zc=>(tQ0*4|hm@9;3>#cHq)gK=xVRvDN1Ol0^gh?o?sJL`!QL)lfAFMD<2op%>;r$Ep%K8@%Q98Cs;m<6^ zLq-fzGJZ8_+4Xv<{RbD874fnzZGKzjH`kh=FcAf6Uzomx39F;7HIV5`n5wc$iih%E zYMtXviTZl?Alei8$^DDV`VUbugb7TPCl4zdQBtl25c-A5Dt*wflDx7JgES@?5GFAp zcXdC61xA>HI`uCr;k4VN6a24UkHX{@s9f|TP=5t?9VdhdPUyROzJ)1H%;&HCvivIk zQ1Qvfz@9g&aa~VSAG*#EFx8#QW7e26H^M8)T~%ahTj_Hw_&{q ziI=_1Ema1t&+e})YX&l-S~r(lvQ(2-W7U#%j2pr%{@YlkjlzFC#VuZHdf1_+8R|7c zS*D|upSd+lx!@JQY%*)665Vz3`hn{r{PGd}bE)>xRPlrUHDDdzjF=xGQ2cUN2CS3H z1J;R;wOZghxjb;4`t^p1Yw)Q``0<-ScH0hWS?VXq^6H^z@6VMGhG?EWD6h0Eci2$> z2Wqt-P=Gysxc&ehwU7H%`1>2kJQS~q{6qTvu`9F0$J_D_URg|cM1Tko0U|&Ih(HA( zP%9)VB+?QVUO6O(~!6O#Dq{d&Q}whCKC1>i+P zAOb{y2oM1xKm>>Y5g-CYfCvx)B2ZBYNZc;rw>RiUhwIx|w9SDcS(0w(P|!RC%`fZW zIQ|E=mSGpQ$+*JS5~2U-<$<5ILd~I1y-v_F{>e0_Q|DBP$8Nbp$WIg(*FI|&9^liL zuU#&3d*~OZuZlJ&%Ej5GU%Ycbrt~A&+ZhxiJ4`bIZdNb{{hKJgGI=NYpl-jd**nuVcJ5Eitj&dGo zv%2!SuVSD5Wa6iPpO8@$H9K=p=ZEC}i#AmoUb?ap{x<8H&awA2*wAO?+%q%2 zdA{H0i95eaKm45~v+;hp=Iu}AJj(j507z= znmF1$cFd$nPmD2+d;G~UijTV?DawuT{&_%kH{yr>a9$B7p2kE3hyW2F0z{xf5g^Z+ z3N@WH7$QIfhyW2F0z`la5CI}U1c(3;AOhDU@CV+PaeRzsP*9yLL`*Rr?Cd}PwepHO zn&N8(;|=5yGLLuSTK0_+5Yb!$%4KQe5J6D`%IW$Ay2z1Pztg7^lM)7QKByb!28?K2Ae zQI^#A$?HWr7%mq4&HScHeJ27$fCvx)B0vO)01+Spe^3Hu^}(GR6qt#BqOZkdzImb= zD3(XJMJ4}Lbv52>ENb}7BDAe~RZUI8)g(eqBGsg_nnbBdG*6U;JmTy+<%GK5qQPfw z3|TEPs)sCcheg#CHeW3Z#7u9@0uH z%qm|L3FTM+%tat0PoSUuK7YrxjXdQY`)3*@86rRghyW2F0z}}ZA`tuV_`NOWA3bx| z+i(0Z>!lfQ%;DW7VOMG_d){;kNI){`DS@lr%oXV4Tz43M9 z<%e4w?6WGvywVVRf6;)M%PSXE?vS+7Ic9^s!4smEtMOQu>-Bc4xoh1T9fCvx)BJigo5Xfysn8~kaW~PApf2!p|gCzn) zfCvx)B0vO)01+SpM1Tkoft!@TO&C!BkuI8{LG_p~cf1fDA?{vU;s44bWbr*$T0Mjt ze|g7l(se=ICIUo&2oM1xKm-C4h@I6pbasP?W+URpZ@llpxv@!=8$3U3@|m_>j@`Gq z!OXsfZ({H5(d^XWm`iP=hmLD8=H5-yHe?5{9ND()-kE(Hy*jy7vwE@LA4~{d-1e*9 z-pQ=mrIq{ZLSN@_wFuT@6GF5n4fcZ&+gstcjf09d*}D)djmZ}-1*v5(Us3efhkf6 z5g-CYfCvx)BJlr9fILF}fB94EM1Tko0U|&IhyW2F0z`la5CI}U1pXHU{=k9Y?|48n zD5y>rA`a#a2?SsCsHXT@f#3!6{Gz;)emwAFL1BLJ2%g!uB(Gm?X<1%LS#JM=;pO+m zL$zDCrAOh5PNOgA1aylH{GIF{) zI%Pm?OdtZucS%pr;{Ru*XJvMDW_L_a@1hM%Y)l~ZXZFm@>XzLp!=#T}1ddNjPtD;v z($hPoaYb3_Ii1q{8pK8fA`pGoY=<*FoB!7(Ju^MCTaKff){xkUKs?7dx^{Eq zNXt&mOz)N<+86Dmr*=)xTi0&>k&BH6 z#CUo-Qn`xM?98m36lYFmhNvgn?~tAC%*^iU$k00|=1t5`x72RVZds|_ax$IiSvv4WwzI58xR`=2)#X>9lG*m z3$Y=9=x;|ydZsfoGbc4Wy{B5Ynm&JmPkNdovzt2qPQF}t>tX|d5M3@UvqMg1rqkIo zO#@a#&lg-$)hTs4y7E=&$jop!b*vB@0EqGPrIwNIbfjl>Ro9#|D_vi=;`v*&*R7|+ z*)=mKJyCN)A8EEZiqIhLMHy+gO*z+d zkt|7%imae{2%2Bk{c}w`S1mWl27k>EDE@m%YZ?ZBxrEZ9VSW1!D%5rG3Jbx9t3VInj%^&;7n!6TP7}HFaPC8ooo%Z=3Y{a_x1wjjrF9>!HhS8U6cm zE?utEtG_Q7=x1WX0}OxivMUS3x+|YxyCV9%kqZ^Sqs|H)A_tApn?-E(!_@tW>& ze%*zdZvNw*fx7NVT=()5Ml(jUNLBYNuKT3Hh^e~n*KnV%JBig{GBfaz8&r@!bCl0e zNxgm;Yzi~@9#kL9Z^Y*c%R24ZO-n%s5g-CYfCvx)BJc+%K>llg@Cl;s5dk7V1c(3; zAOb{y2oM1xKm>>Y5x4;aZbJW`s=G8p!!`YX-aeoe{=5D^f&5km2VkA9wzSZ1m+1F_^#^^VXsW)Ez=7M??qu)HJ z-rT9*oK|o8={J+rn<@HD62I|Ty7QK9gtNxms&5THw6x+*42_Hk5CI}U1c*RIAwbR= z6=f!A97KQ!5CI}U1c(3;AOb{y2oM1xKm-C4xCxy#HhrrZ8m{TA(MHySh}>vrjdZbz z-&H6tet)IagPb+40z|h&fCvx)B0vNxAOXuMXFA{Cc_fdBfv|qSaCKMd`8*8A?E~G_ z40m9qVwL*UW+rLL1ZGqa_70lE+7Y;;Uhjtxt3-0iiK{AqnEe96AHXZvgvWs~h~qI! z{>me5Kna%c3$tPli;(XE*0EqW5YNo`jL8x%C`Pnbj0~nbaC|GKGph;s0BFX{Y=Oa5 z=?t$qiplUY+0x`aU^EcK2E)xNxgh(1%Ze4biqU|VE^(Fjs}15cwx6f{`Soh+LL33? zm(IS(D&Y{5WjN5udjySGxN-6}Jo*B&u!fE(Lr2`pEDG?5V)P*VQg=KTvg{EPRt)Tc zfzi8Ij6CuMJjcxD#Ta=6HNsgCLuJ0FFjM2}od@o|wO7{$SCVU07 z_PApVmC%@Xv$N7g8f8?1dEc_xSj=Uirdjp_&m)sXFj!w2J+WT;VRYWlzQr^aYr!z& zIe}+b4KDwbib`Te%dpD8Odwr_sC=b-A>kn<-1fQ)rbei=1NB|OQadDvnb8wCIqfzU ziS8W3gA&%+tny>PI945FSY;R3dNkyH`c6f+aWO}?L`5elKQNwwH&jx0qCm09pFh0a z%GuTB#h0YzofHf2P7fvId-pQF0+D2k4jTV9Fe!Mj59^i4U{2_rX5)U|fv1kxhN4&UK68Jpn-47T z(;LC$KKpPB+af&*cFD`9i&lf4L!&cf(DwfY`K%hw)9QT$0(Ri6A9PV*Hv z7kk&>h+SQA_+I%t3S6Jcx0dcX9Jdz7{L4*x1#uzr3}BhJGLZ0ATJ!? z{E>}AsZ!b43SaMH@y|2ZXByFF658d}zzP<@)z`)kFNNU?7S6ey-j74iw7zUXVqQLe zh~CDkN@Lp=K506LJ(1X1IGwYdcc_q5W_a9~inEo#n_ZpeIc_oNIq#-0VfNFK#t z=#1@rb)iGDB1<`VO9_>D-@eBS@iuut?$VgCy+gMk*ZAD>4_-vK*q~thvvqqKy24{C zOl`}cv`b!U!Mh&;31Id#WiCVNJ%z=|UF|DeFrJwK+oCb~j&*Tet*8TRZU0igg{*r$ zg*h!|D+WG>)Ry+Me0B0yw$p1o0eDuiQ2DF#*k_NDcdvaEsZ;p`{KcNfQB^<6j1tWD zh~Q;B>eIy|L3j}_rtUYsgo|wkj|;AM(1d9naA}j$kU8sy@mQlD#HDb~r~EIKC~o4M2F8!k7V);`a$atg2KMh5LO(&q8?X00 zU5rK}Ywxg^#WgqFf*N&K^LOW3d_0qw?7ED_wq1tC*vL#TwMku(yp8>hm)O+TldOy(|TTu#~Usn7SA7DvRy6OYQ*lvUM_Gn#r-% z^wYFA_99wQV};g@YrxaI!fz33zsuYds23miRo(TFoJqzwJks%t$FfpFB> zAMr)}C@iwYC}87!K&40dpOso?l&xqw>jc`5i$5(Rymv}HN4Y2l{x2UC+Jtgk=KGG_^nyoTMo>bH%5 z-Il`N*55yeiMF{i-r0}%pXJA&@qYPub-uD@uT@vp8at027B3n05nV7|ZB!$&dz~5>WW>lh?k=0-Z1unBv-fi+^4``yc;*hE$niqacN-rE>hb^h;G-f9z;$U3oPPZR!r z*NeASorNj}7VHXTCS|Asg}>Xdwn{Rq>Y5g-CL0ReJDy$L3Xx<~|w01+SpM1Tko0U|&IhyW2F0z}|HBybbD zp=Q+5%njFcLv34ME4cLr-roqP3Gq&d5ki*ms=8Tnn;XsZryqa3)T~~;4A{buO6o0HK_L+A2ssQ);Om*;>w-Oqu zT7$UOnV845KB#GRbFK1b9K!KBw;<7Hyu%m9{Z(8s#*yIeX_~@@dQVMSPYbl3#_*nY zQ*|!nI=wjR4fes`(MYwU_n--PRn1UXQCbye^YExH-O>t>JRV z;~ux!3*6$h;)trdgzJvL2fV+Fy;e?%@wOWRKl_Z$xbNf&;8VScrCJkXe=!!~F9`7& zs`x5#S5a40Sp6-0Uh~rWjAQ0R^z{zIKvne}Ty;NO;Ht}1)%j6)gjs!c;X^m>cDfdm zfN8E^UsYg`uPWnhE8IJjdkUh@$(r@Kw-jR|t`V8ck5(BGW9AW`>`z#SIAd`pP-~~J`%UB-m1Z{nx35fhv`kXFZ%%vqB5hN0^JOl`@ zU;__2659~VeAWaU<@v|T^Y2#kPpSE{cz$qseq%L%oZ7&fzc#Q=Zy?FvfP+Q)g8V(L z%w0fk$W}aCO2`GI;x2L;84(}?M1TlXL;~ajQW0mB#zX{&01+SpM1Tko0U|&IhyW2F z0z}|S0ym)x$YW!F#|31`Yg#z)e>V_^pwlOJaB;z);?mOGkvx_>@SFQY?Ox=M^xu!T zO8z(ZdbuV)B3F|aLCf2V2LH`-*Dg+{#0@!@uR3M~Ut8hwCp37Xlw4K>2|5!2B0vO) z01>FT1Y*{qjTgAYN4SP>&e9N;NX*YdCl-!jOpe8X`=ErO;gXQWkL^8iWYQTt8-Xc9 zfap`)q2+tX2ti+^ZA=Uv=Z-Ip#sL%i1z+NO##Hy>$Npa_4u1JjB(NO^SdanMmlPA< z3J4D{i@bdsnlUq`GPySDVfJ81KVl2giq|1w6YM>}_tv=!XAj_3#=_RU`7FR@J|4H} zFyek;5k}mGyYm%`494?oS}(S2((=d;KvNclhnOq_8>cWM_VDd=`gP?y_$ZHr^DTW| zYYyj1;K1Yp)JY^6&0>Lq*+CEVVx7V7X_c#xvhH36+XTa3&s z;T~p`5^iTk(+-4W>~F&6h73J@Pu1Fw1V`u3SU8kb#h1M0+kZg}n`E59pAc8EO0I8B zE_{8eaz9&J(>|mI0gjN+{=oVUzs&c5?+bLA)kQ5PS3zYLJZ0#Ae8wxMC#-Hzb`iy? zSix3@jR@tt1M!E~d05Lgd14KWOVfciwy;odNyDb0K=Uow+wWmkmBG-jE!ZknlMg=q zE#6feWL}QB)*1}vn7L0-4DLhDU_AE(I^Z4WI9Tq$x9{Oe*gi0U`QRH?nL++G;;C&a z<4<|VM|Nb!sK;?JcxDzp#F>cLcgC(=+kF1A*7j9Ea;Ut2AD+>}^4OxKc#H3; zbiZwdVPh_mCmNREWb!NKRTw-Sn9aTp?c_{db_y8&cZ?4IJ9F}dHq~|y=8p=JBImJi zfvW}kHF(+c%jh}3R2 zU%Fr7*r~hc+RB2vUZ{Dw<7w;I^^GsZ-_AFl>Jj|yW_&vbhytd>eaR3PZ~75Wc6j5A zz1a9Qj?QLE341UYj}Q34vlY(1c$BZ)r{jkhyz*X~DH=s3T~3YvV)x1OvADGsHvb*& zbA8#jQZHZJEV-J`{j;r^barxlPWZ!an6^%x@)(qeo$PE?GjsBH2<5I_*uw%eUEIb+;{OC-=jVpuC*mweZ~skJJMLcE=5qs<_y4C0oJ@yH9faSTNpWG19C= zW{tSOH=x?$dy;R*_0vfUn)O%W&N0+#hn7D0;A-GwGnEcUyk2~@5AJo=#j_{DTvw6M zgT*sgtL80f2@l+3Ipbz2w!cJg$E*eIXQP&~ZorO+&!4Tm;jQ5>uVT|s#+Xs==ZfE$ zw_WNsoK=!N_yqR0^1^wu&&4~|p9wphIm6Ii%yZq1I)r;lLa z<|PZ=KaVV$+$1R@8DWdi@k6*8{Wb2a_4r$GK(Y@@w8io_pHtz@ENS@E@bU1Sn7-)8 zo=@+y$HzJ4gX`wTv-)Vnx&)(e@saZ%V&*IiU}5ITsW^roNAzt|a|aqW#N@hYg}>V$ zN;&!5vFZt!3w(eDIZ!?iyO=obFE~H_g3|oBwC*sw(}YxvsR8@O5S+l~OW3;P&5(v4 z)k~G;XY?J&UoJ1%W29k&yitt=iG#X9o0hqpdDQQ1CGYFxrXT#+(Zcxe zB6n1>nop#>)VSVL>4g*AXoHD-_qOf#EqF05te#C8*SxL+332Er~;rWB&6sW~_lD;n=K9-Q<&l($UdTq`!Cw=*jQIMcf~%f*A8 z(T9af!;|B(rEeOiXUd4jGjX}76Q5wM+P+zrwX+*Na}V2V&zCpNz#-Nsw7Yr!R?K+U zST>~1Ut8zBA|J;yN78<&XAY`85T&)6{w4mi4z{87E7d>D`Fd=l4c3Mq+wx={+{@-{ zVjhh9qYvy@Q{Q8;8mgg*`5DBWHUZUePwjfoR~hozRI4lL^@6)H(J>eA*59x^GZPzZ zDH4j+bu910gAdks;*{fp;l9|V^;i`0VxL@wMf)EZA^QxbmO;3E5(-j=Pbw96@7vaUqjkAb(Wty zxBiP0yRiGMvs<@qf98u-V>WL*i1$|D&>&$MLfOl zB#wXd$(L`R+q!q>VejsvEZc;05y$p_Qg4#|ySVe_<;xH6Kljjn=B;-sXdaH@vya}# zxpTHbM$anj@y4C7PI=VR7^qjXb$`SGC04@Bgwf`U+rI_89wysHS+EH2u0Q^_ z_4bbDS2wQ0zTI!99OWJR7$0wHkP>xqh4=GMmhb-L`wLjG5Ubx?zjE!CxVx|`y_J$> zJh$S&nx!mATDq8(No!8Tw}~mkk~f!ZJ$H7~^eso{9UlAEruR0UK5%ZUtrefS%?mak zV^5iP;@taQnBw+Wzr;IRaO~}qCv8=XNk~dk7?)CHBq?AfC4`yfBt=PLA^cCC$qQCR zF({GDrrhC*cSW;mEW!mA%M8rUBH&;O%VMD}o06l1E1@i!RbnlfmEEcsm{DoX+OjBS za7FQ)DlCMnuc2hHSVK~hL#e@@<*ULv~|-Kty?r@iA@q)=*8H|{jTXC0z`la5CI}U z1c(3;AOb{y2oM1xKm`8S1a3kK=u8BN01+SpM4;jl;LjioSi)opsoYJ&#$+60GIt308k?BO z1A`ekt{~}@$7S|`dzAPPz_rxzYk38$ zn2BrIfoYx~AAbh%`OM)SV1UP>0P__iiWDP%0$I&upA6I5$AQbccxam0YHbWeM_gImAfXnAiMXnDq5^A}E4fQf!RDLobYMiT*8|bZ= zjoa{g!~TkuYiy|w)XHmq&b!#m-^IRomzj7oPa&2M^g>&0pa->q-iG4bA^1)SbG&G} zA6SPWN|+0H-DOgse1y@8)d)PyO;xIfbZpiJ*-snfUjHE9RE*f>A7lvcE%#*U!H!Bj zOZgyw@DDP>Kgh#4!Gh+z?!kuo1}E-SA`!`Z`2w%OeOWx5&~m@89>)&9 zh*3&7YVrwmanBOe@K4~6Zgm3x#_2fx4MrU^JZsy0ywa&-=C8e6%&Su5&O8#%`^WT? zHYSg~NNTsAkLd}uR$hjiJEE{ryxSk}az1TYeA?DxpAy79nT$T;bTnZRyz~}-8@1Iz zDHnK$@9Szw?rK269W0bL_B^Ls0|%QHD{7-3s*Qfnj4A1OgIRdS;k;@dV3K{b0yK+j z0;Y=DviiK?Gs3(M)8F5Z$GlN^nvIgYc6XTU<=$xSAO?KJWJ3|$%p@rn=B{$Vkcg!& ziyJJ_3uPwnZi>@|xi7hZhZGZ*xw*%M65<04%ze(@9UB}$bDs2kuE6j--e+d+izK_b z0TV}+BMgRYY+*w`qmJ%dWWDe@S}ReQ!>8|6 z?w0a-8RMguZv%I@G4t*W!aMnS^1bh&lRJv{^k*zm%v|Hk9!}2owU4u(;0?ihUX|<= z&I60*DJGx!Hp{W#7OG=YM^*zdOlFvu!mFP`b3V`#b)cyknC%&12*E-n8lN%as66MR z49T~0jSW)=!6Wl)20RH(|f9CMB66-d6w@(&wU6pH~eH z$AfkC0+C-BpF<08%n2lU@baCXnFb(?g-iH2f0^fJFAOIhZ^!cSW7+^u7iWeCFxKtB zyIffv63+3#h14_ffkxx9Vn#z|#4IKQb)VtwJk1U4F&M*w`LLStPIMf8{~6T>t%KJXP{d36NZTW-n5{pi*_sTYGIw`lttWn>}*!cy<3}xJBitF6&dWq9F z4#lkHdt7&=r*fB!ebT0&mr|u_IBEC}BUv-7jOQL8IJdZHjdGg7z`Y!p$%_>AjdQKX z;zmlL3!lm8a|+M6o8c8tXE)>%wzm(kC-58#bDaj*E(NZ5ywI;b=DI=@C{}o!o4Ky} z=iX8(Ia-1H74apY2H^UHcM9IuP8oWPH5sNZ{;=qx;f)3GtdKs67`@%J5$}2SIg~D3 zFp0@7qheLmNls#tkJk$27qzwyOt*Ne@>@QeajdQnouwp)i~Fo8jXnJ3rUY@fgUS+r zk4*IN>2~v1w$jG~C62$)e5mG=V_e)1!Cixm^%)>34kcWX-SJ+tyQ$0Us^x<6DX(v2 zU>_)PUfH!t;Vxm`R8Oc2o~jH=C8dqKJ+H>Y5g-CYfCvx)B0vQGLjsY|ehOGTU;FlcvL4wh zRQ+{4KO}@98deO-D=o_%HnbqGtgt}MuP#acnrdyWrd7=@w*jcXD0J;7yMP}GB}9M- t5CI}U1pZ(I===Xa*o07bhyW2F0z`la5CI}U1c(3;AOb{y2>cfa{2!)MB%lBQ literal 0 HcmV?d00001 diff --git a/tests/test_remove_unused_fragments.py b/tests/test_remove_unused_fragments.py new file mode 100644 index 00000000..d8e092c7 --- /dev/null +++ b/tests/test_remove_unused_fragments.py @@ -0,0 +1,92 @@ +import os +from alphabase.peptide.fragment import remove_unused_fragments +import alphabase.io.hdf +import pandas as pd + +def example_remove_unused_fragments(): + """ + Example function demonstrating the usage of remove_unused_fragments + with sample data from an HDF file located in the current script directory. + Handles three cases: + 1. No `nAA` column. + 2. Unordered `nAA` column. + 3. Ordered `nAA` column. + """ + # Dynamically determine the path to the HDF file + current_dir = os.path.dirname(os.path.abspath(__file__)) + hdf_file_name = os.path.join( + current_dir, + "..", + "test_data", + "unit_tests", + "input_hdf_formats", + "mini_sample_remove_unused_fragments.hdf" + ) + + # Load HDF file + hdf_file = alphabase.io.hdf.HDF_File( + hdf_file_name, + read_only=True + ) + + # Extract DataFrames + precursor_df = hdf_file.dfs.psm_df.values + fragment_intensity_df = hdf_file.dfs.fragment_intensity_df.values + + # Ensure the DataFrames have the necessary columns for processing + assert 'frag_start_idx' in precursor_df.columns, "Missing 'frag_start_idx' in precursor_df" + assert 'frag_stop_idx' in precursor_df.columns, "Missing 'frag_stop_idx' in precursor_df" + assert 'nAA' in precursor_df.columns, "Missing 'nAA' in precursor_df" + + # Case 1: No `nAA` column + case1_precursor_df = precursor_df.copy().drop(columns=['nAA']) + case1_precursor_df, _ = remove_unused_fragments( + precursor_df=case1_precursor_df, + fragment_df_list=(fragment_intensity_df,), + ) + # Validate the conditions + assert case1_precursor_df['frag_start_idx'].is_monotonic_increasing, \ + "frag_start_idx must be monotonic increasing" + assert (case1_precursor_df['frag_start_idx'].iloc[1:].values == + case1_precursor_df['frag_stop_idx'].iloc[:-1].values).all(), \ + "frag_start_idx[i] must equal frag_stop_idx[i-1]" + + # Case 2: Unordered `nAA` column + case2_precursor_df = precursor_df.copy().sample(frac=1, random_state=42).reset_index(drop=True) + case2_precursor_df_nAA = case2_precursor_df['nAA'].copy() if 'nAA' in case2_precursor_df.columns else None + case2_precursor_df, _ = remove_unused_fragments( + precursor_df=case2_precursor_df, + fragment_df_list=(fragment_intensity_df,), + ) + # Validate the conditions + assert case2_precursor_df['frag_start_idx'].is_monotonic_increasing, \ + "frag_start_idx must be monotonic increasing" + assert (case2_precursor_df['frag_start_idx'].iloc[1:].values == + case2_precursor_df['frag_stop_idx'].iloc[:-1].values).all(), \ + "frag_start_idx[i] must equal frag_stop_idx[i-1]" + if case2_precursor_df_nAA is not None: + assert (case2_precursor_df['nAA'] == case2_precursor_df_nAA).all(), \ + "nAA values must remain unchanged" + + # Case 3: Ordered `nAA` column + case3_precursor_df = precursor_df.sort_values('nAA').reset_index(drop=True) if 'nAA' in precursor_df.columns else precursor_df.copy() + case3_precursor_df_nAA = case3_precursor_df['nAA'].copy() if 'nAA' in case3_precursor_df.columns else None + case3_precursor_df, _ = remove_unused_fragments( + precursor_df=case3_precursor_df, + fragment_df_list=(fragment_intensity_df,), + ) + # Validate the conditions + assert case3_precursor_df['frag_start_idx'].is_monotonic_increasing, \ + "frag_start_idx must be monotonic increasing" + assert (case3_precursor_df['frag_start_idx'].iloc[1:].values == + case3_precursor_df['frag_stop_idx'].iloc[:-1].values).all(), \ + "frag_start_idx[i] must equal frag_stop_idx[i-1]" + if case3_precursor_df_nAA is not None: + assert (case3_precursor_df['nAA'] == case3_precursor_df_nAA).all(), \ + "nAA values must remain unchanged" + assert case3_precursor_df['nAA'].is_monotonic_increasing, \ + "nAA column must be monotonic increasing" + +# Call the example function +if __name__ == "__main__": + example_remove_unused_fragments() From 174642a65e7d88352d2aa38d7e856900e511212f Mon Sep 17 00:00:00 2001 From: Changrong You Date: Thu, 9 Jan 2025 14:05:10 +0800 Subject: [PATCH 2/6] Fix pre-commit issues --- alphabase/peptide/fragment.py | 1 - tests/test_remove_unused_fragments.py | 112 ++++++++++++++++---------- 2 files changed, 69 insertions(+), 44 deletions(-) diff --git a/alphabase/peptide/fragment.py b/alphabase/peptide/fragment.py index 1909f5cb..9f7e280b 100644 --- a/alphabase/peptide/fragment.py +++ b/alphabase/peptide/fragment.py @@ -928,7 +928,6 @@ def remove_unused_fragments( returns the reindexed precursor DataFrame and the sliced fragment DataFrames """ - frag_idx = precursor_df[[frag_start_col, frag_stop_col]].values new_frag_idx, fragment_pointer = compress_fragment_indices(frag_idx) diff --git a/tests/test_remove_unused_fragments.py b/tests/test_remove_unused_fragments.py index d8e092c7..00fb1876 100644 --- a/tests/test_remove_unused_fragments.py +++ b/tests/test_remove_unused_fragments.py @@ -1,7 +1,8 @@ import os -from alphabase.peptide.fragment import remove_unused_fragments + import alphabase.io.hdf -import pandas as pd +from alphabase.peptide.fragment import remove_unused_fragments + def example_remove_unused_fragments(): """ @@ -15,78 +16,103 @@ def example_remove_unused_fragments(): # Dynamically determine the path to the HDF file current_dir = os.path.dirname(os.path.abspath(__file__)) hdf_file_name = os.path.join( - current_dir, + current_dir, "..", - "test_data", - "unit_tests", - "input_hdf_formats", - "mini_sample_remove_unused_fragments.hdf" + "test_data", + "unit_tests", + "input_hdf_formats", + "mini_sample_remove_unused_fragments.hdf", ) # Load HDF file - hdf_file = alphabase.io.hdf.HDF_File( - hdf_file_name, - read_only=True - ) - + hdf_file = alphabase.io.hdf.HDF_File(hdf_file_name, read_only=True) + # Extract DataFrames precursor_df = hdf_file.dfs.psm_df.values fragment_intensity_df = hdf_file.dfs.fragment_intensity_df.values # Ensure the DataFrames have the necessary columns for processing - assert 'frag_start_idx' in precursor_df.columns, "Missing 'frag_start_idx' in precursor_df" - assert 'frag_stop_idx' in precursor_df.columns, "Missing 'frag_stop_idx' in precursor_df" - assert 'nAA' in precursor_df.columns, "Missing 'nAA' in precursor_df" + assert ( + "frag_start_idx" in precursor_df.columns + ), "Missing 'frag_start_idx' in precursor_df" + assert ( + "frag_stop_idx" in precursor_df.columns + ), "Missing 'frag_stop_idx' in precursor_df" + assert "nAA" in precursor_df.columns, "Missing 'nAA' in precursor_df" # Case 1: No `nAA` column - case1_precursor_df = precursor_df.copy().drop(columns=['nAA']) + case1_precursor_df = precursor_df.copy().drop(columns=["nAA"]) case1_precursor_df, _ = remove_unused_fragments( precursor_df=case1_precursor_df, fragment_df_list=(fragment_intensity_df,), ) # Validate the conditions - assert case1_precursor_df['frag_start_idx'].is_monotonic_increasing, \ - "frag_start_idx must be monotonic increasing" - assert (case1_precursor_df['frag_start_idx'].iloc[1:].values == - case1_precursor_df['frag_stop_idx'].iloc[:-1].values).all(), \ - "frag_start_idx[i] must equal frag_stop_idx[i-1]" + assert case1_precursor_df[ + "frag_start_idx" + ].is_monotonic_increasing, "frag_start_idx must be monotonic increasing" + assert ( + case1_precursor_df["frag_start_idx"].iloc[1:].values + == case1_precursor_df["frag_stop_idx"].iloc[:-1].values + ).all(), "frag_start_idx[i] must equal frag_stop_idx[i-1]" # Case 2: Unordered `nAA` column - case2_precursor_df = precursor_df.copy().sample(frac=1, random_state=42).reset_index(drop=True) - case2_precursor_df_nAA = case2_precursor_df['nAA'].copy() if 'nAA' in case2_precursor_df.columns else None + case2_precursor_df = ( + precursor_df.copy().sample(frac=1, random_state=42).reset_index(drop=True) + ) + case2_precursor_df_nAA = ( + case2_precursor_df["nAA"].copy() + if "nAA" in case2_precursor_df.columns + else None + ) case2_precursor_df, _ = remove_unused_fragments( precursor_df=case2_precursor_df, fragment_df_list=(fragment_intensity_df,), ) # Validate the conditions - assert case2_precursor_df['frag_start_idx'].is_monotonic_increasing, \ - "frag_start_idx must be monotonic increasing" - assert (case2_precursor_df['frag_start_idx'].iloc[1:].values == - case2_precursor_df['frag_stop_idx'].iloc[:-1].values).all(), \ - "frag_start_idx[i] must equal frag_stop_idx[i-1]" + assert case2_precursor_df[ + "frag_start_idx" + ].is_monotonic_increasing, "frag_start_idx must be monotonic increasing" + assert ( + case2_precursor_df["frag_start_idx"].iloc[1:].values + == case2_precursor_df["frag_stop_idx"].iloc[:-1].values + ).all(), "frag_start_idx[i] must equal frag_stop_idx[i-1]" if case2_precursor_df_nAA is not None: - assert (case2_precursor_df['nAA'] == case2_precursor_df_nAA).all(), \ - "nAA values must remain unchanged" + assert ( + case2_precursor_df["nAA"] == case2_precursor_df_nAA + ).all(), "nAA values must remain unchanged" # Case 3: Ordered `nAA` column - case3_precursor_df = precursor_df.sort_values('nAA').reset_index(drop=True) if 'nAA' in precursor_df.columns else precursor_df.copy() - case3_precursor_df_nAA = case3_precursor_df['nAA'].copy() if 'nAA' in case3_precursor_df.columns else None + case3_precursor_df = ( + precursor_df.sort_values("nAA").reset_index(drop=True) + if "nAA" in precursor_df.columns + else precursor_df.copy() + ) + case3_precursor_df_nAA = ( + case3_precursor_df["nAA"].copy() + if "nAA" in case3_precursor_df.columns + else None + ) case3_precursor_df, _ = remove_unused_fragments( precursor_df=case3_precursor_df, fragment_df_list=(fragment_intensity_df,), ) # Validate the conditions - assert case3_precursor_df['frag_start_idx'].is_monotonic_increasing, \ - "frag_start_idx must be monotonic increasing" - assert (case3_precursor_df['frag_start_idx'].iloc[1:].values == - case3_precursor_df['frag_stop_idx'].iloc[:-1].values).all(), \ - "frag_start_idx[i] must equal frag_stop_idx[i-1]" + assert case3_precursor_df[ + "frag_start_idx" + ].is_monotonic_increasing, "frag_start_idx must be monotonic increasing" + assert ( + case3_precursor_df["frag_start_idx"].iloc[1:].values + == case3_precursor_df["frag_stop_idx"].iloc[:-1].values + ).all(), "frag_start_idx[i] must equal frag_stop_idx[i-1]" if case3_precursor_df_nAA is not None: - assert (case3_precursor_df['nAA'] == case3_precursor_df_nAA).all(), \ - "nAA values must remain unchanged" - assert case3_precursor_df['nAA'].is_monotonic_increasing, \ - "nAA column must be monotonic increasing" - + assert ( + case3_precursor_df["nAA"] == case3_precursor_df_nAA + ).all(), "nAA values must remain unchanged" + assert case3_precursor_df[ + "nAA" + ].is_monotonic_increasing, "nAA column must be monotonic increasing" + + # Call the example function if __name__ == "__main__": - example_remove_unused_fragments() + example_remove_unused_fragments() From 8b3c4adfee5e081d196a5d5e80acd30e0690c0d5 Mon Sep 17 00:00:00 2001 From: Changrong You Date: Thu, 9 Jan 2025 14:41:14 +0800 Subject: [PATCH 3/6] Fix pre-commit issues --- tests/test_remove_unused_fragments.py | 61 +++++++++++++-------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/tests/test_remove_unused_fragments.py b/tests/test_remove_unused_fragments.py index 00fb1876..a4ed19a7 100644 --- a/tests/test_remove_unused_fragments.py +++ b/tests/test_remove_unused_fragments.py @@ -1,19 +1,16 @@ import os +import pytest + import alphabase.io.hdf from alphabase.peptide.fragment import remove_unused_fragments -def example_remove_unused_fragments(): +@pytest.fixture +def hdf_data(): """ - Example function demonstrating the usage of remove_unused_fragments - with sample data from an HDF file located in the current script directory. - Handles three cases: - 1. No `nAA` column. - 2. Unordered `nAA` column. - 3. Ordered `nAA` column. + Fixture to load HDF data and provide precursor_df and fragment_intensity_df. """ - # Dynamically determine the path to the HDF file current_dir = os.path.dirname(os.path.abspath(__file__)) hdf_file_name = os.path.join( current_dir, @@ -23,30 +20,23 @@ def example_remove_unused_fragments(): "input_hdf_formats", "mini_sample_remove_unused_fragments.hdf", ) - - # Load HDF file hdf_file = alphabase.io.hdf.HDF_File(hdf_file_name, read_only=True) - - # Extract DataFrames precursor_df = hdf_file.dfs.psm_df.values fragment_intensity_df = hdf_file.dfs.fragment_intensity_df.values + return precursor_df, fragment_intensity_df - # Ensure the DataFrames have the necessary columns for processing - assert ( - "frag_start_idx" in precursor_df.columns - ), "Missing 'frag_start_idx' in precursor_df" - assert ( - "frag_stop_idx" in precursor_df.columns - ), "Missing 'frag_stop_idx' in precursor_df" - assert "nAA" in precursor_df.columns, "Missing 'nAA' in precursor_df" - # Case 1: No `nAA` column +def test_case_no_nAA_column(hdf_data): + """ + Test case 1: Precursor dataframe without the 'nAA' column. + """ + precursor_df, fragment_intensity_df = hdf_data case1_precursor_df = precursor_df.copy().drop(columns=["nAA"]) case1_precursor_df, _ = remove_unused_fragments( precursor_df=case1_precursor_df, fragment_df_list=(fragment_intensity_df,), ) - # Validate the conditions + assert case1_precursor_df[ "frag_start_idx" ].is_monotonic_increasing, "frag_start_idx must be monotonic increasing" @@ -55,7 +45,12 @@ def example_remove_unused_fragments(): == case1_precursor_df["frag_stop_idx"].iloc[:-1].values ).all(), "frag_start_idx[i] must equal frag_stop_idx[i-1]" - # Case 2: Unordered `nAA` column + +def test_case_unordered_nAA_column(hdf_data): + """ + Test case 2: Precursor dataframe with unordered 'nAA' column. + """ + precursor_df, fragment_intensity_df = hdf_data case2_precursor_df = ( precursor_df.copy().sample(frac=1, random_state=42).reset_index(drop=True) ) @@ -64,11 +59,12 @@ def example_remove_unused_fragments(): if "nAA" in case2_precursor_df.columns else None ) + case2_precursor_df, _ = remove_unused_fragments( precursor_df=case2_precursor_df, fragment_df_list=(fragment_intensity_df,), ) - # Validate the conditions + assert case2_precursor_df[ "frag_start_idx" ].is_monotonic_increasing, "frag_start_idx must be monotonic increasing" @@ -76,12 +72,18 @@ def example_remove_unused_fragments(): case2_precursor_df["frag_start_idx"].iloc[1:].values == case2_precursor_df["frag_stop_idx"].iloc[:-1].values ).all(), "frag_start_idx[i] must equal frag_stop_idx[i-1]" + if case2_precursor_df_nAA is not None: assert ( case2_precursor_df["nAA"] == case2_precursor_df_nAA ).all(), "nAA values must remain unchanged" - # Case 3: Ordered `nAA` column + +def test_case_ordered_nAA_column(hdf_data): + """ + Test case 3: Precursor dataframe with ordered 'nAA' column. + """ + precursor_df, fragment_intensity_df = hdf_data case3_precursor_df = ( precursor_df.sort_values("nAA").reset_index(drop=True) if "nAA" in precursor_df.columns @@ -92,11 +94,12 @@ def example_remove_unused_fragments(): if "nAA" in case3_precursor_df.columns else None ) + case3_precursor_df, _ = remove_unused_fragments( precursor_df=case3_precursor_df, fragment_df_list=(fragment_intensity_df,), ) - # Validate the conditions + assert case3_precursor_df[ "frag_start_idx" ].is_monotonic_increasing, "frag_start_idx must be monotonic increasing" @@ -104,6 +107,7 @@ def example_remove_unused_fragments(): case3_precursor_df["frag_start_idx"].iloc[1:].values == case3_precursor_df["frag_stop_idx"].iloc[:-1].values ).all(), "frag_start_idx[i] must equal frag_stop_idx[i-1]" + if case3_precursor_df_nAA is not None: assert ( case3_precursor_df["nAA"] == case3_precursor_df_nAA @@ -111,8 +115,3 @@ def example_remove_unused_fragments(): assert case3_precursor_df[ "nAA" ].is_monotonic_increasing, "nAA column must be monotonic increasing" - - -# Call the example function -if __name__ == "__main__": - example_remove_unused_fragments() From 1f333d26f918a5768c83df8320208f6671d438a6 Mon Sep 17 00:00:00 2001 From: Changrong You Date: Thu, 9 Jan 2025 14:43:43 +0800 Subject: [PATCH 4/6] Fix pre-commit issues --- alphabase/peptide/fragment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alphabase/peptide/fragment.py b/alphabase/peptide/fragment.py index 9f7e280b..43c9b614 100644 --- a/alphabase/peptide/fragment.py +++ b/alphabase/peptide/fragment.py @@ -933,7 +933,7 @@ def remove_unused_fragments( new_frag_idx, fragment_pointer = compress_fragment_indices(frag_idx) precursor_df[[frag_start_col, frag_stop_col]] = new_frag_idx - precursor_df = precursor_df.reset_index(drop=True) + precursor_df = precursor_df.sort_index() output_tuple = [] From c2610b507d27d1224fa7d0e3c8f727608542b13b Mon Sep 17 00:00:00 2001 From: Changrong You Date: Thu, 9 Jan 2025 16:14:25 +0800 Subject: [PATCH 5/6] delete mini_sample_remove_unused_fragments.hdf --- .../mini_sample_remove_unused_fragments.hdf | Bin 190304 -> 0 bytes tests/test_remove_unused_fragments.py | 57 +++++++++++++----- 2 files changed, 42 insertions(+), 15 deletions(-) delete mode 100644 test_data/unit_tests/input_hdf_formats/mini_sample_remove_unused_fragments.hdf diff --git a/test_data/unit_tests/input_hdf_formats/mini_sample_remove_unused_fragments.hdf b/test_data/unit_tests/input_hdf_formats/mini_sample_remove_unused_fragments.hdf deleted file mode 100644 index 978a524d91d3cf73b1e089ed3a3f7a09a004d4a3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 190304 zcmeI531AdO*2n)nJy$N0kPvQCGYJqt1pWK|w_1aMzQsdb%qL%g7$=eiC0R)!kLEURC|7X3{h7 z&8x@Tr>2Dk*9eAsSgkN3!hb1}%lhH^!r;#m_oCdTUr73Sf_|Q_i9_=L2UOGZBX#{( z%XRJIbb`wY{o<@wgU_U0Sw#k&|5OCpI~`f-fP3hp7OQltzRm=Ug1)8xOR*l?i-Ldi zEJ5b0^4Iknp|9Tri>9zh*L%J7`=w12r1h%<`;(onUcU+Y_$TP^YXZm6(!sd}eSy9u zdHn_#4k^p+KcuX1NNNAF5&mrdT2y75hzi#^=XC0vs=fz{b;o3_Liy`Mzc0F~AJ>no}N>S6Y@kY-mAVSz&=%P+gMz?-=U)mBw6~3Bh0eyOUzO7x#1~0z`la5CI}U1c(3;AOb{y2oM1xKm`7H1OnLs)2C?P1Qh9A z*6e_CKs&Y)uL&mT^0!{lE?oNkxnY{V8Tvh)Di#4@2XyIQAkEi@H>FH`qoifz7v+`o zD+CG(^NUCDEY%*ETUwS^!YzUY!^`iBhibRzKcuv*WSD9()anl|F39Cp!$Qp(4rq2j zL%DZH`h2ZD(Q2`NKlwE+N3&9K+f=NkPsnBI_d{)3{+}(CVAkw-IuJ?sY>~K~Pd#es zK!pE_%!#Wd5ge$-RPjrg_??<>kPZHdAyEAHlJ3u7@Rv&{EgIIh@1R1xF0b&gT2IEH zk=#hk44Gi zJKF}5(DGHgsCrcEuAKv=33|a3Ljy_N9Qo?s4b@IB){Qt<4bc6S`W%R2vF^$zS`^Tk z2oM1xKm>>Y5vVW(4E({-1;}2D4?zZl0lt;$^COw5E6fbjK!^YlAOb{y2oM1xKm>>Y z5g-CYfCvO6@Sm8cRCA876#c8_DXlKA9>6^Ht9e2*>~Qncr|tah`7LJU)o+A(s#?0H zSXCOxtP-S&{6iY9*A)e#-{l>Y z5g-CYfCvx)B0vO)01+Spzf0ghF{J#Df9I3XLmSBr|DDehE=}?88B&@Dpx8sND~g37 zg?^tRh|rk`5CI}U1c*SzBTy$JA^Sw+gKq~9nK(VZhEnb2TGcu=UD4;5bHHOCwB-gC z1#S}q|LwEH49x2DkjVo5nqRa!-|t4<%SHX?udnX_^K0NREjt7W{);R3wL$s5Hd??il<#NroBeAd zCaL|>w$u^Z#@vwoYm{l4;@bYxmA^;{6fo>XE~|}Gt1D9FWpSmQh0UeB!&es59T6Y` zM1Tko0U}TV2n5=2#iWldbJ*Zwbq_{;TR&W#FY9tw=d1hrsmIm%;$EDEezPY25Av1( z`QoYPYCR$_>eczd0rZ3f$PW#W9~L0LQh@yM0Quti>gs+*29U2DAU`TVesqBRDgp9i z0_2PR`>yV%*x&H#{OSSp)CiDYGeEw04!v5x_RP!oRn+TsFDzhp>Y5%`l5FsUD~lxfV!xS@i{FfrL1 zj+!jQCx@fD&m4{#Y7(O+Rn?@LnuM!KgqlRENo6&OQj=&ksiG#5ni$kXRudyn93hIl zJ{D%5ITjW*)2b#xY7(p_A!-t;CSg30)sNJFVDrv5b9w*xp?*>opXP6fc_(&}UoZX@ z^Uii&>Y5%^sK{?FsF zi^|PAk}&Tm;AWocEEEQRP7T%I(-<)<+$YEW*3iSa{f0|)MuaNSS-1D7VvP}{ouXMd z*VY+TSSYWoguuw6ys^qcSbTK$J-;Zgq+cOWP?%plf@k(E$?KO}T9#K*mfOEz zc=>(tQ0*4|hm@9;3>#cHq)gK=xVRvDN1Ol0^gh?o?sJL`!QL)lfAFMD<2op%>;r$Ep%K8@%Q98Cs;m<6^ zLq-fzGJZ8_+4Xv<{RbD874fnzZGKzjH`kh=FcAf6Uzomx39F;7HIV5`n5wc$iih%E zYMtXviTZl?Alei8$^DDV`VUbugb7TPCl4zdQBtl25c-A5Dt*wflDx7JgES@?5GFAp zcXdC61xA>HI`uCr;k4VN6a24UkHX{@s9f|TP=5t?9VdhdPUyROzJ)1H%;&HCvivIk zQ1Qvfz@9g&aa~VSAG*#EFx8#QW7e26H^M8)T~%ahTj_Hw_&{q ziI=_1Ema1t&+e})YX&l-S~r(lvQ(2-W7U#%j2pr%{@YlkjlzFC#VuZHdf1_+8R|7c zS*D|upSd+lx!@JQY%*)665Vz3`hn{r{PGd}bE)>xRPlrUHDDdzjF=xGQ2cUN2CS3H z1J;R;wOZghxjb;4`t^p1Yw)Q``0<-ScH0hWS?VXq^6H^z@6VMGhG?EWD6h0Eci2$> z2Wqt-P=Gysxc&ehwU7H%`1>2kJQS~q{6qTvu`9F0$J_D_URg|cM1Tko0U|&Ih(HA( zP%9)VB+?QVUO6O(~!6O#Dq{d&Q}whCKC1>i+P zAOb{y2oM1xKm>>Y5g-CYfCvx)B2ZBYNZc;rw>RiUhwIx|w9SDcS(0w(P|!RC%`fZW zIQ|E=mSGpQ$+*JS5~2U-<$<5ILd~I1y-v_F{>e0_Q|DBP$8Nbp$WIg(*FI|&9^liL zuU#&3d*~OZuZlJ&%Ej5GU%Ycbrt~A&+ZhxiJ4`bIZdNb{{hKJgGI=NYpl-jd**nuVcJ5Eitj&dGo zv%2!SuVSD5Wa6iPpO8@$H9K=p=ZEC}i#AmoUb?ap{x<8H&awA2*wAO?+%q%2 zdA{H0i95eaKm45~v+;hp=Iu}AJj(j507z= znmF1$cFd$nPmD2+d;G~UijTV?DawuT{&_%kH{yr>a9$B7p2kE3hyW2F0z{xf5g^Z+ z3N@WH7$QIfhyW2F0z`la5CI}U1c(3;AOhDU@CV+PaeRzsP*9yLL`*Rr?Cd}PwepHO zn&N8(;|=5yGLLuSTK0_+5Yb!$%4KQe5J6D`%IW$Ay2z1Pztg7^lM)7QKByb!28?K2Ae zQI^#A$?HWr7%mq4&HScHeJ27$fCvx)B0vO)01+Spe^3Hu^}(GR6qt#BqOZkdzImb= zD3(XJMJ4}Lbv52>ENb}7BDAe~RZUI8)g(eqBGsg_nnbBdG*6U;JmTy+<%GK5qQPfw z3|TEPs)sCcheg#CHeW3Z#7u9@0uH z%qm|L3FTM+%tat0PoSUuK7YrxjXdQY`)3*@86rRghyW2F0z}}ZA`tuV_`NOWA3bx| z+i(0Z>!lfQ%;DW7VOMG_d){;kNI){`DS@lr%oXV4Tz43M9 z<%e4w?6WGvywVVRf6;)M%PSXE?vS+7Ic9^s!4smEtMOQu>-Bc4xoh1T9fCvx)BJigo5Xfysn8~kaW~PApf2!p|gCzn) zfCvx)B0vO)01+SpM1Tkoft!@TO&C!BkuI8{LG_p~cf1fDA?{vU;s44bWbr*$T0Mjt ze|g7l(se=ICIUo&2oM1xKm-C4h@I6pbasP?W+URpZ@llpxv@!=8$3U3@|m_>j@`Gq z!OXsfZ({H5(d^XWm`iP=hmLD8=H5-yHe?5{9ND()-kE(Hy*jy7vwE@LA4~{d-1e*9 z-pQ=mrIq{ZLSN@_wFuT@6GF5n4fcZ&+gstcjf09d*}D)djmZ}-1*v5(Us3efhkf6 z5g-CYfCvx)BJlr9fILF}fB94EM1Tko0U|&IhyW2F0z`la5CI}U1pXHU{=k9Y?|48n zD5y>rA`a#a2?SsCsHXT@f#3!6{Gz;)emwAFL1BLJ2%g!uB(Gm?X<1%LS#JM=;pO+m zL$zDCrAOh5PNOgA1aylH{GIF{) zI%Pm?OdtZucS%pr;{Ru*XJvMDW_L_a@1hM%Y)l~ZXZFm@>XzLp!=#T}1ddNjPtD;v z($hPoaYb3_Ii1q{8pK8fA`pGoY=<*FoB!7(Ju^MCTaKff){xkUKs?7dx^{Eq zNXt&mOz)N<+86Dmr*=)xTi0&>k&BH6 z#CUo-Qn`xM?98m36lYFmhNvgn?~tAC%*^iU$k00|=1t5`x72RVZds|_ax$IiSvv4WwzI58xR`=2)#X>9lG*m z3$Y=9=x;|ydZsfoGbc4Wy{B5Ynm&JmPkNdovzt2qPQF}t>tX|d5M3@UvqMg1rqkIo zO#@a#&lg-$)hTs4y7E=&$jop!b*vB@0EqGPrIwNIbfjl>Ro9#|D_vi=;`v*&*R7|+ z*)=mKJyCN)A8EEZiqIhLMHy+gO*z+d zkt|7%imae{2%2Bk{c}w`S1mWl27k>EDE@m%YZ?ZBxrEZ9VSW1!D%5rG3Jbx9t3VInj%^&;7n!6TP7}HFaPC8ooo%Z=3Y{a_x1wjjrF9>!HhS8U6cm zE?utEtG_Q7=x1WX0}OxivMUS3x+|YxyCV9%kqZ^Sqs|H)A_tApn?-E(!_@tW>& ze%*zdZvNw*fx7NVT=()5Ml(jUNLBYNuKT3Hh^e~n*KnV%JBig{GBfaz8&r@!bCl0e zNxgm;Yzi~@9#kL9Z^Y*c%R24ZO-n%s5g-CYfCvx)BJc+%K>llg@Cl;s5dk7V1c(3; zAOb{y2oM1xKm>>Y5x4;aZbJW`s=G8p!!`YX-aeoe{=5D^f&5km2VkA9wzSZ1m+1F_^#^^VXsW)Ez=7M??qu)HJ z-rT9*oK|o8={J+rn<@HD62I|Ty7QK9gtNxms&5THw6x+*42_Hk5CI}U1c*RIAwbR= z6=f!A97KQ!5CI}U1c(3;AOb{y2oM1xKm-C4xCxy#HhrrZ8m{TA(MHySh}>vrjdZbz z-&H6tet)IagPb+40z|h&fCvx)B0vNxAOXuMXFA{Cc_fdBfv|qSaCKMd`8*8A?E~G_ z40m9qVwL*UW+rLL1ZGqa_70lE+7Y;;Uhjtxt3-0iiK{AqnEe96AHXZvgvWs~h~qI! z{>me5Kna%c3$tPli;(XE*0EqW5YNo`jL8x%C`Pnbj0~nbaC|GKGph;s0BFX{Y=Oa5 z=?t$qiplUY+0x`aU^EcK2E)xNxgh(1%Ze4biqU|VE^(Fjs}15cwx6f{`Soh+LL33? zm(IS(D&Y{5WjN5udjySGxN-6}Jo*B&u!fE(Lr2`pEDG?5V)P*VQg=KTvg{EPRt)Tc zfzi8Ij6CuMJjcxD#Ta=6HNsgCLuJ0FFjM2}od@o|wO7{$SCVU07 z_PApVmC%@Xv$N7g8f8?1dEc_xSj=Uirdjp_&m)sXFj!w2J+WT;VRYWlzQr^aYr!z& zIe}+b4KDwbib`Te%dpD8Odwr_sC=b-A>kn<-1fQ)rbei=1NB|OQadDvnb8wCIqfzU ziS8W3gA&%+tny>PI945FSY;R3dNkyH`c6f+aWO}?L`5elKQNwwH&jx0qCm09pFh0a z%GuTB#h0YzofHf2P7fvId-pQF0+D2k4jTV9Fe!Mj59^i4U{2_rX5)U|fv1kxhN4&UK68Jpn-47T z(;LC$KKpPB+af&*cFD`9i&lf4L!&cf(DwfY`K%hw)9QT$0(Ri6A9PV*Hv z7kk&>h+SQA_+I%t3S6Jcx0dcX9Jdz7{L4*x1#uzr3}BhJGLZ0ATJ!? z{E>}AsZ!b43SaMH@y|2ZXByFF658d}zzP<@)z`)kFNNU?7S6ey-j74iw7zUXVqQLe zh~CDkN@Lp=K506LJ(1X1IGwYdcc_q5W_a9~inEo#n_ZpeIc_oNIq#-0VfNFK#t z=#1@rb)iGDB1<`VO9_>D-@eBS@iuut?$VgCy+gMk*ZAD>4_-vK*q~thvvqqKy24{C zOl`}cv`b!U!Mh&;31Id#WiCVNJ%z=|UF|DeFrJwK+oCb~j&*Tet*8TRZU0igg{*r$ zg*h!|D+WG>)Ry+Me0B0yw$p1o0eDuiQ2DF#*k_NDcdvaEsZ;p`{KcNfQB^<6j1tWD zh~Q;B>eIy|L3j}_rtUYsgo|wkj|;AM(1d9naA}j$kU8sy@mQlD#HDb~r~EIKC~o4M2F8!k7V);`a$atg2KMh5LO(&q8?X00 zU5rK}Ywxg^#WgqFf*N&K^LOW3d_0qw?7ED_wq1tC*vL#TwMku(yp8>hm)O+TldOy(|TTu#~Usn7SA7DvRy6OYQ*lvUM_Gn#r-% z^wYFA_99wQV};g@YrxaI!fz33zsuYds23miRo(TFoJqzwJks%t$FfpFB> zAMr)}C@iwYC}87!K&40dpOso?l&xqw>jc`5i$5(Rymv}HN4Y2l{x2UC+Jtgk=KGG_^nyoTMo>bH%5 z-Il`N*55yeiMF{i-r0}%pXJA&@qYPub-uD@uT@vp8at027B3n05nV7|ZB!$&dz~5>WW>lh?k=0-Z1unBv-fi+^4``yc;*hE$niqacN-rE>hb^h;G-f9z;$U3oPPZR!r z*NeASorNj}7VHXTCS|Asg}>Xdwn{Rq>Y5g-CL0ReJDy$L3Xx<~|w01+SpM1Tko0U|&IhyW2F0z}|HBybbD zp=Q+5%njFcLv34ME4cLr-roqP3Gq&d5ki*ms=8Tnn;XsZryqa3)T~~;4A{buO6o0HK_L+A2ssQ);Om*;>w-Oqu zT7$UOnV845KB#GRbFK1b9K!KBw;<7Hyu%m9{Z(8s#*yIeX_~@@dQVMSPYbl3#_*nY zQ*|!nI=wjR4fes`(MYwU_n--PRn1UXQCbye^YExH-O>t>JRV z;~ux!3*6$h;)trdgzJvL2fV+Fy;e?%@wOWRKl_Z$xbNf&;8VScrCJkXe=!!~F9`7& zs`x5#S5a40Sp6-0Uh~rWjAQ0R^z{zIKvne}Ty;NO;Ht}1)%j6)gjs!c;X^m>cDfdm zfN8E^UsYg`uPWnhE8IJjdkUh@$(r@Kw-jR|t`V8ck5(BGW9AW`>`z#SIAd`pP-~~J`%UB-m1Z{nx35fhv`kXFZ%%vqB5hN0^JOl`@ zU;__2659~VeAWaU<@v|T^Y2#kPpSE{cz$qseq%L%oZ7&fzc#Q=Zy?FvfP+Q)g8V(L z%w0fk$W}aCO2`GI;x2L;84(}?M1TlXL;~ajQW0mB#zX{&01+SpM1Tko0U|&IhyW2F z0z}|S0ym)x$YW!F#|31`Yg#z)e>V_^pwlOJaB;z);?mOGkvx_>@SFQY?Ox=M^xu!T zO8z(ZdbuV)B3F|aLCf2V2LH`-*Dg+{#0@!@uR3M~Ut8hwCp37Xlw4K>2|5!2B0vO) z01>FT1Y*{qjTgAYN4SP>&e9N;NX*YdCl-!jOpe8X`=ErO;gXQWkL^8iWYQTt8-Xc9 zfap`)q2+tX2ti+^ZA=Uv=Z-Ip#sL%i1z+NO##Hy>$Npa_4u1JjB(NO^SdanMmlPA< z3J4D{i@bdsnlUq`GPySDVfJ81KVl2giq|1w6YM>}_tv=!XAj_3#=_RU`7FR@J|4H} zFyek;5k}mGyYm%`494?oS}(S2((=d;KvNclhnOq_8>cWM_VDd=`gP?y_$ZHr^DTW| zYYyj1;K1Yp)JY^6&0>Lq*+CEVVx7V7X_c#xvhH36+XTa3&s z;T~p`5^iTk(+-4W>~F&6h73J@Pu1Fw1V`u3SU8kb#h1M0+kZg}n`E59pAc8EO0I8B zE_{8eaz9&J(>|mI0gjN+{=oVUzs&c5?+bLA)kQ5PS3zYLJZ0#Ae8wxMC#-Hzb`iy? zSix3@jR@tt1M!E~d05Lgd14KWOVfciwy;odNyDb0K=Uow+wWmkmBG-jE!ZknlMg=q zE#6feWL}QB)*1}vn7L0-4DLhDU_AE(I^Z4WI9Tq$x9{Oe*gi0U`QRH?nL++G;;C&a z<4<|VM|Nb!sK;?JcxDzp#F>cLcgC(=+kF1A*7j9Ea;Ut2AD+>}^4OxKc#H3; zbiZwdVPh_mCmNREWb!NKRTw-Sn9aTp?c_{db_y8&cZ?4IJ9F}dHq~|y=8p=JBImJi zfvW}kHF(+c%jh}3R2 zU%Fr7*r~hc+RB2vUZ{Dw<7w;I^^GsZ-_AFl>Jj|yW_&vbhytd>eaR3PZ~75Wc6j5A zz1a9Qj?QLE341UYj}Q34vlY(1c$BZ)r{jkhyz*X~DH=s3T~3YvV)x1OvADGsHvb*& zbA8#jQZHZJEV-J`{j;r^barxlPWZ!an6^%x@)(qeo$PE?GjsBH2<5I_*uw%eUEIb+;{OC-=jVpuC*mweZ~skJJMLcE=5qs<_y4C0oJ@yH9faSTNpWG19C= zW{tSOH=x?$dy;R*_0vfUn)O%W&N0+#hn7D0;A-GwGnEcUyk2~@5AJo=#j_{DTvw6M zgT*sgtL80f2@l+3Ipbz2w!cJg$E*eIXQP&~ZorO+&!4Tm;jQ5>uVT|s#+Xs==ZfE$ zw_WNsoK=!N_yqR0^1^wu&&4~|p9wphIm6Ii%yZq1I)r;lLa z<|PZ=KaVV$+$1R@8DWdi@k6*8{Wb2a_4r$GK(Y@@w8io_pHtz@ENS@E@bU1Sn7-)8 zo=@+y$HzJ4gX`wTv-)Vnx&)(e@saZ%V&*IiU}5ITsW^roNAzt|a|aqW#N@hYg}>V$ zN;&!5vFZt!3w(eDIZ!?iyO=obFE~H_g3|oBwC*sw(}YxvsR8@O5S+l~OW3;P&5(v4 z)k~G;XY?J&UoJ1%W29k&yitt=iG#X9o0hqpdDQQ1CGYFxrXT#+(Zcxe zB6n1>nop#>)VSVL>4g*AXoHD-_qOf#EqF05te#C8*SxL+332Er~;rWB&6sW~_lD;n=K9-Q<&l($UdTq`!Cw=*jQIMcf~%f*A8 z(T9af!;|B(rEeOiXUd4jGjX}76Q5wM+P+zrwX+*Na}V2V&zCpNz#-Nsw7Yr!R?K+U zST>~1Ut8zBA|J;yN78<&XAY`85T&)6{w4mi4z{87E7d>D`Fd=l4c3Mq+wx={+{@-{ zVjhh9qYvy@Q{Q8;8mgg*`5DBWHUZUePwjfoR~hozRI4lL^@6)H(J>eA*59x^GZPzZ zDH4j+bu910gAdks;*{fp;l9|V^;i`0VxL@wMf)EZA^QxbmO;3E5(-j=Pbw96@7vaUqjkAb(Wty zxBiP0yRiGMvs<@qf98u-V>WL*i1$|D&>&$MLfOl zB#wXd$(L`R+q!q>VejsvEZc;05y$p_Qg4#|ySVe_<;xH6Kljjn=B;-sXdaH@vya}# zxpTHbM$anj@y4C7PI=VR7^qjXb$`SGC04@Bgwf`U+rI_89wysHS+EH2u0Q^_ z_4bbDS2wQ0zTI!99OWJR7$0wHkP>xqh4=GMmhb-L`wLjG5Ubx?zjE!CxVx|`y_J$> zJh$S&nx!mATDq8(No!8Tw}~mkk~f!ZJ$H7~^eso{9UlAEruR0UK5%ZUtrefS%?mak zV^5iP;@taQnBw+Wzr;IRaO~}qCv8=XNk~dk7?)CHBq?AfC4`yfBt=PLA^cCC$qQCR zF({GDrrhC*cSW;mEW!mA%M8rUBH&;O%VMD}o06l1E1@i!RbnlfmEEcsm{DoX+OjBS za7FQ)DlCMnuc2hHSVK~hL#e@@<*ULv~|-Kty?r@iA@q)=*8H|{jTXC0z`la5CI}U z1c(3;AOb{y2oM1xKm`8S1a3kK=u8BN01+SpM4;jl;LjioSi)opsoYJ&#$+60GIt308k?BO z1A`ekt{~}@$7S|`dzAPPz_rxzYk38$ zn2BrIfoYx~AAbh%`OM)SV1UP>0P__iiWDP%0$I&upA6I5$AQbccxam0YHbWeM_gImAfXnAiMXnDq5^A}E4fQf!RDLobYMiT*8|bZ= zjoa{g!~TkuYiy|w)XHmq&b!#m-^IRomzj7oPa&2M^g>&0pa->q-iG4bA^1)SbG&G} zA6SPWN|+0H-DOgse1y@8)d)PyO;xIfbZpiJ*-snfUjHE9RE*f>A7lvcE%#*U!H!Bj zOZgyw@DDP>Kgh#4!Gh+z?!kuo1}E-SA`!`Z`2w%OeOWx5&~m@89>)&9 zh*3&7YVrwmanBOe@K4~6Zgm3x#_2fx4MrU^JZsy0ywa&-=C8e6%&Su5&O8#%`^WT? zHYSg~NNTsAkLd}uR$hjiJEE{ryxSk}az1TYeA?DxpAy79nT$T;bTnZRyz~}-8@1Iz zDHnK$@9Szw?rK269W0bL_B^Ls0|%QHD{7-3s*Qfnj4A1OgIRdS;k;@dV3K{b0yK+j z0;Y=DviiK?Gs3(M)8F5Z$GlN^nvIgYc6XTU<=$xSAO?KJWJ3|$%p@rn=B{$Vkcg!& ziyJJ_3uPwnZi>@|xi7hZhZGZ*xw*%M65<04%ze(@9UB}$bDs2kuE6j--e+d+izK_b z0TV}+BMgRYY+*w`qmJ%dWWDe@S}ReQ!>8|6 z?w0a-8RMguZv%I@G4t*W!aMnS^1bh&lRJv{^k*zm%v|Hk9!}2owU4u(;0?ihUX|<= z&I60*DJGx!Hp{W#7OG=YM^*zdOlFvu!mFP`b3V`#b)cyknC%&12*E-n8lN%as66MR z49T~0jSW)=!6Wl)20RH(|f9CMB66-d6w@(&wU6pH~eH z$AfkC0+C-BpF<08%n2lU@baCXnFb(?g-iH2f0^fJFAOIhZ^!cSW7+^u7iWeCFxKtB zyIffv63+3#h14_ffkxx9Vn#z|#4IKQb)VtwJk1U4F&M*w`LLStPIMf8{~6T>t%KJXP{d36NZTW-n5{pi*_sTYGIw`lttWn>}*!cy<3}xJBitF6&dWq9F z4#lkHdt7&=r*fB!ebT0&mr|u_IBEC}BUv-7jOQL8IJdZHjdGg7z`Y!p$%_>AjdQKX z;zmlL3!lm8a|+M6o8c8tXE)>%wzm(kC-58#bDaj*E(NZ5ywI;b=DI=@C{}o!o4Ky} z=iX8(Ia-1H74apY2H^UHcM9IuP8oWPH5sNZ{;=qx;f)3GtdKs67`@%J5$}2SIg~D3 zFp0@7qheLmNls#tkJk$27qzwyOt*Ne@>@QeajdQnouwp)i~Fo8jXnJ3rUY@fgUS+r zk4*IN>2~v1w$jG~C62$)e5mG=V_e)1!Cixm^%)>34kcWX-SJ+tyQ$0Us^x<6DX(v2 zU>_)PUfH!t;Vxm`R8Oc2o~jH=C8dqKJ+H>Y5g-CYfCvx)B0vQGLjsY|ehOGTU;FlcvL4wh zRQ+{4KO}@98deO-D=o_%HnbqGtgt}MuP#acnrdyWrd7=@w*jcXD0J;7yMP}GB}9M- t5CI}U1pZ(I===Xa*o07bhyW2F0z`la5CI}U1c(3;AOb{y2>cfa{2!)MB%lBQ diff --git a/tests/test_remove_unused_fragments.py b/tests/test_remove_unused_fragments.py index a4ed19a7..0f264542 100644 --- a/tests/test_remove_unused_fragments.py +++ b/tests/test_remove_unused_fragments.py @@ -1,28 +1,55 @@ -import os - +import numpy as np +import pandas as pd import pytest -import alphabase.io.hdf from alphabase.peptide.fragment import remove_unused_fragments @pytest.fixture def hdf_data(): """ - Fixture to load HDF data and provide precursor_df and fragment_intensity_df. + Fixture to automatically generate precursor_df and fragment_intensity_df. """ - current_dir = os.path.dirname(os.path.abspath(__file__)) - hdf_file_name = os.path.join( - current_dir, - "..", - "test_data", - "unit_tests", - "input_hdf_formats", - "mini_sample_remove_unused_fragments.hdf", + # Data for precursor_df + sequences = [ + "PSKGPLQSVQVFGR", + "FLISLLEEYFK", + "MTEDALRLNLLK", + "FMSAYEQR", + "PGPKGEAGPTGPQGEPGVR", + "YEITEQR", + "DAEAAEATAEGALKAEK", + "FGDSRGGGGNFGPGPGSNFR", + "LDEKENLSAK", + "ATVASSTQKFQDLGVK", + "GFALVGVGSEASSKK", + "LQLEIDQKK", + "MAGLELLSDQGYR", + "RGGPGGPPGPLMEQMGGR", + ] + + frag_start_idx = [151, 81, 110, 24, 296, 17, 229, 334, 69, 183, 180, 26, 123, 284] + frag_stop_idx = [164, 91, 121, 31, 314, 23, 245, 353, 78, 198, 194, 34, 135, 301] + charge = [2] * len(sequences) + nAA = [len(seq) for seq in sequences] + + precursor_df = pd.DataFrame( + { + "sequence": sequences, + "frag_start_idx": frag_start_idx, + "frag_stop_idx": frag_stop_idx, + "charge": charge, + "nAA": nAA, + } + ) + + # Data for fragment_intensity_df + num_rows = len(sequences) + fragment_intensity_data = np.random.uniform(0.0, 600.0, size=(num_rows, 4)) + fragment_intensity_df = pd.DataFrame( + fragment_intensity_data, columns=["b_z1", "b_z2", "y_z1", "y_z2"] ) - hdf_file = alphabase.io.hdf.HDF_File(hdf_file_name, read_only=True) - precursor_df = hdf_file.dfs.psm_df.values - fragment_intensity_df = hdf_file.dfs.fragment_intensity_df.values + return precursor_df, fragment_intensity_df From 18369e79e630a8bfe9185c9d4c60c290a9456c08 Mon Sep 17 00:00:00 2001 From: Changrong You Date: Thu, 9 Jan 2025 16:36:18 +0800 Subject: [PATCH 6/6] pytest code bug fix --- tests/test_remove_unused_fragments.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_remove_unused_fragments.py b/tests/test_remove_unused_fragments.py index 0f264542..3f6105ac 100644 --- a/tests/test_remove_unused_fragments.py +++ b/tests/test_remove_unused_fragments.py @@ -44,8 +44,9 @@ def hdf_data(): ) # Data for fragment_intensity_df - num_rows = len(sequences) - fragment_intensity_data = np.random.uniform(0.0, 600.0, size=(num_rows, 4)) + fragment_intensity_data = np.random.uniform( + 0.0, 600.0, size=(max(precursor_df["frag_stop_idx"]), 4) + ) fragment_intensity_df = pd.DataFrame( fragment_intensity_data, columns=["b_z1", "b_z2", "y_z1", "y_z2"] )