From e8938b81a022ad9d55188017bfb3c125592d3ec0 Mon Sep 17 00:00:00 2001 From: Claudio <122022571+claudio-tw@users.noreply.github.com> Date: Thu, 24 Aug 2023 09:48:12 +0100 Subject: [PATCH] Profiling - Select with GPU and LAR comparison (#42) * GPU support - Installation extras of CuPy * cupy dependencies * bump hisel version * Cuda kernels * Examples and profilers * Error 137 in tests * Error 137 in tests * Files for README * Profiling - LAR comparison * Profiling LAR * New performance bar plot * update readme * use cprofile in lar comparison --- README.md | 7 ++ gramtimes.png | Bin 21738 -> 24190 bytes hisel/lar/lar.py | 5 ++ hisel/select.py | 1 + profiling/gram_comparison.py | 5 +- profiling/lar_comparison.py | 74 +++++++++++++++++ profiling/select_comparison.py | 64 +++++++++------ profiling/select_profile.py | 142 +++++++++++++++++++++++++++++++-- tests/lar_performance.py | 63 --------------- tests/select_test.py | 21 +++-- 10 files changed, 282 insertions(+), 100 deletions(-) create mode 100644 profiling/lar_comparison.py delete mode 100644 tests/lar_performance.py diff --git a/README.md b/README.md index 872a854..2252ce6 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,13 @@ via and via [pyHSICLasso](https://pypi.org/project/pyHSICLasso/). +The performance has been measured +on the computation +of Gram matrices required +by HSIC Lasso +for the selection +from a dataset of 300 features +with as many samples as reported on the x-axis. ![gramtimes](gramtimes.png) diff --git a/gramtimes.png b/gramtimes.png index 29afd1dd34fbf277c42b3595bf84986d23b529a4..d19d4afc4afb50cf783339fe01fd7895fc66712e 100644 GIT binary patch literal 24190 zcmdVC2{@MT-Zpxxq^KkbNs1s{+z-(K%~eEa*3eSCYbW3A&^|L6aBp8LM9-*EoU^Smz4b7zjNrr$_U zQPk=a$K{kLYKbUC(G;#&j(6xRZ(PEQl+DqLHs{R^Z0vNc^r_RjHWnu4HYP@TTdwO{ zSsR&~i3;o$5aHWmXk%kxy-!fk^dBD(Ft@rYxLcg-D86K+#c?%jiel6y|D%bMjWwbu z&AtzT8ECe<;XAb`JKSBmPbzd->51b3Y@YtEYquFDZhWm8ziVaM-Q8*4-|5va z#mA(1jhgx@i3V1BvA<;7yIX8K(@}R8_T*MBXDO~@?j9wjU&bsorwTKMrkq-n=bQ$+ zq^*;q*D=vBF){fcW@pc&xVgEbLOu!|k&~12D*Au^G8R}Jd$u#$Pv)>2JxgSjA^v;r zrPB6QY(mt|Gf#wU>hIIE&u3a`cu*JzL82VD^vB$OsbQ#vgw0Px(Vj~ z`9=jD=)JUO&z`#pSzE+S>RqnYygC$JbudP0>+apV>(b{Y2ks^(3-a;vKT~>{qF)xQ zs#NX6E~?d#x_Y~~_y*4X*X3qLf9e_;t-N&U()9GS&PYe$@v~hIXJk zmX?n0l$q?HyMwIrZ0}B*Ept;t72*SlqmtJ+RGG=qbK*RioppA0tL)m;%0qLuAXiXYS;YROK$T)c)|7s4>YNuzqBXkW6UWC;?Kf%0=j;=E{!ugJT zbg8uz<7R0WH@Cug0oB)=WgJ)d$}i>O;yMy?_$=@Jp<{@xRhs#ytnKWMIXLW-_uK5- zS?qTtMu|Q1Irjy&Tt-2y4<2vdiaAcV-!rUF(G52>Z+d@wzujQu<>LE>Ki>A9N~}AV z>U?bD;h&#_USF%(ooz`&LxZ@>dvzgxxJFg`P@>9GDrdTbp()dr;T|KGySFz36?y3H z^0;%)mh3qF*mX6BI7NwwhzMQ#zDvx!Wm8!&|58fKtck6*wzj7#n(q2wlc$S|OMZjP zVPARrnpYS0kj3DTFzdP*o{(#HCPIcEPlOrqG%_~E%;bOI=H&6L>jTeC8d9TF;$x6J z-*Ru=%CPMVZV}mWHk^;@s*qg_6wu7{@b&c#gp%4ct5|z zoN^fJj$KIj5y6?d@cFu>rGIVWrLrPlju?60jkpy3@<+m%MrRiy` zmgW5U^Ojat53%xP%~^I6!9f?M5S`EQJLDRJ_NWG%Hl}-elz6f1d2aRdv!1T*)->a~ zV61H4mxwg&CsNk(kHw6*H*DCTlxbtMa?P6BId74^AvVIyM&I;o%|O`i6Y_#q%n+mMvfIG zmEm(_ORC*ai?lJn2U7DSo_AL<*RA!`Xs4g-?-xI9uI%V@a#m51J>aCA6gR2pzw_As zoY~)dUTS4$J_-vv<+*PAbL;M}qf@Q7y5DEFx3_l;4GC`amCw9B@c1g%{AihAM|by5 zypfQQ(A8|$8dPwPv9D&Pf1xTxw_sC9P<2IV^;|`vH*3tLcV-TAV<0k#3B=Be$0yzS;3qYI-^bfKU}1jlR70wM;rH*#8k(Bhw{4UA z_U+qv@Z3b5w)eYt?+_?nK|w(`Zrmt&Z`nE8KgG(jX%nA;fq_F`(#6+NZw;%sFE!#j zyyc^-(&p!^JVv{G`S0tgm6hij z-kC@Aw`5Kh1*bg|or5t3fzr%8GhTgGzxX~Uxi@xM=MRH)yF^9Lla$@DV~2=khaCRG zkAxr!H?NG0%xF&)PxGjThQ@P^46BabUS8x;bbtNpBfo1C_q=%VLd59XwwKzuIa(3> zBqdAk(lL%tG9Zxc`x4(>nH}#xc==>w(%zBwv3F7I5Zlg*nIu^H5Fmlkb5iF z_1?YiAF{@ENi5X25BAoGYj2bKltbiu5wcc2m0OwYQ(K*JZA%Uslt0>vg|?dPZ`c&V z%E7TGOx$!GC1TTa&eOxA^63ldo0AFkKYn`OMZadZ)z0RSnv=&IxKDv368rYf3(vogQN_lpP!oO$x} zX<=dE(VY3oOQZ!WJl%KU`?a)Xw6wjyD(F8Sxwj?=-Jk?m;%At7j=ye!dv9W6Vd3`1 z3MDNOQPJ)E{HF(-GI;vxl37okI+g7*-u7{+;!9))Io|j*mS3{8*{ck91sD!h*xY1ppK>+k3Tr)NbwN)6S8u_<41Jtu9HE znL@qE?RlX;Fm3&-yecdf1v-;4nGYXVdZEDH`~79L^}CfWsIb|&U*91JC{n^58ij8y zIMQs1JsaWNK0B5fT!35j@%6RcKk=qI{J=Vk-f!Dqzkcn%hI1dewZf8;<1%yoZ@SI; ze%Hj)y6{~}=c^M(2gC~8oHWTE;Se`jOQqWMDrNr=+vxjeZYVd?Qc+QH$-zyWoD&OH zUw&67UV8VtNPmEpVFO>~rMmInYFd{+3v)x<4;O`a9xfUiq_)Hl4_h=d}|OF~^4wAGRGhpjkIq|N64j!rsY*4@se={N?Lya&vAi z=F~SdoM=opod^!~_upC*uNtz?_IHW|VyyP=CIG zU;TK7)lZvS)D?ob&>OlPG*8zKEfVDA=;%29;^*}*<=#1@cyAKYak;3bR*D`uUUUlu z#Nne($%Cy*QF5M}jBlv}B0UF;s4xxLE+(dI_vu88QmN}o=J6NEC{60Pp`l^AeO=SF%6S2Cy-$#sn7m~#QDug8(K8&W}&NJpO~2Lvm1P3s&V-6lI3eQ zt+;yi>Z3=G^3XKwv_}5Sj5YjtXH+deWE-^m0m({Ry3>8VH&T|72RLS+?uQ!LX=@9$= z=*b>z$=*-tEH2DfG(KlV*MXXuzgKZ^ ztzW;4scPbCFknzYef`C$AB~M12fxmbrqX9>bICZe*$p-w($U%St-9Lwh=IWl^>hFjb6o*E~nVPpo=Lsl0|3cx=$&;TMOwG-_^tk5HQ89W9x8LLXZJMPLe-sg5 z*_KC>J##9U&l8|h`rRZ0t3VrSa!(*Vvp%D6qsT3j7?Hja&-%U&xbLW=<4go%r#G;9kWhf?X|a><(%nD z&W%EQw6L=J7BA5^wnbWc6Ll{33@7S#6i~zT&k*h5)?14|-`_`{>jd9|Q36m&BQxrnayJ2tt@!Pj`p`oF}V`BwCGjI5d(W8gb zLPOd4@F!+w;iisOR#rLbdt&u}?OmvfK1Q1)D{=q+{TVX`>6vpj78ZHh(qUv-7Ig_`OR8$5+!mXok9vfeOeZCGUyveY|S;> zwDrNr-*yC=dq#%Ti_>At?dD^ZPg zd;QwKu?G_5ozF+Yvp`BuGKIWBT)zust9=?s0Nn!%UHZjbjGcc7HY zVTKwzpiN>R+kR|^+_E)m)=1T!%W<^NuA7;+uTXbBGw}4ulLw)po>_K78$N$okKdKY zT!U)hdenQv&-xvEmYMu`yK*RZQKseQ&;g%~d+8Cyzz-KwudD>lU-*{RwJlcWJ-!`n zb@}=8)zW^&Ym){Mo53b_JnA^jEOFeYJ1&jo{fGBpQ3lg0ACkGlECZjPwU1`+TF5=)aKw+=Kz}DaJhR)K` z(qX?UCLEeA#oaoTRP1Nbt6$qNt$f1V%*4hP$Fay=TK|Cl%>wJ`=B0FNa0S@Ooc*CLSxkM`RMsHa^EGM%lSJEFq2>_MM) zqu?IdIYZzhHPw^vO{WR6{cLGA|eih6;94Ud}`}ZRY>CAxqH{zr1RIW z<NLe!o1k_VGE*4pYu{N=8FhtwAG(DkdcIHC1DiB^_hIM!90m>NV?%Ydd z!>0Em)*=2}$)hX2zWH63?6+?F$+o5p>*0xsFP1lE{a00tj*fmao{Q35PLW_m{plR5 zkgX8s?>k=qX08D7t##wT%)|$$xA^Y0Od7e)2Z|Ry&qg{ZZr;3k7hnVmHyu-zZ|&CQK=qlB3cMu?A%jr)rSO6XO02ho!`hK2{IyDp;i$z)^x4X(zw zMt2dwk!YP#wPDpcSB;D?$WscBglI6-6alP{W#@p08@Tvd?`ZS;`<(kb-nBX(F)>Ln z(?DdScew%ye*Dx`YWWagHNUD#xN)dE;?@HD)L`?G*fUS*6TeCCA4`Bp-T~-8x9nwM+mbx(L}73=B7G!W4ID zJzq%BHURuNh<>DS`ZVT78e`)=R`sih4UD1gd3gslU1orZs$x#DmPg2Pq0m;?wwN_% z>LQHTBrUcGXuNkpzu`f31~58`?mY~67^(Kgz_2Qcw#bM5Zdccp@5$O+fOSP(r2*}o zop-|z+{`(bU2)mKz+KAvR~x__DJ*NaWPC7iY~8YjTD?(>qMFlIppOuu=3KUe?WXFzHOS#z^J@SEp;36)T1jBpb@qj}6L`c!Z!)D{)C?Jc(IDA%SW?8&y zvOMBaSpV~Sq_Yk8I^hUqt?=7*+3BwON`?Gt=gfi3#5egwC;J$22l+PMC*kX=zDXv{9tk<2_Js z1qB7s=UR$!(OdZVRw302P<7hc+izo8+kvnFZt2J%7q~}t4e-HTR1&HPI2>_+!fJA$l$5cDt?5O_wq&-YgsPSs6TpOr0Sl>HMq z$N5YpUfBAVg0#c<@uBhf*RO@i%_B(k@?2@Un!p`FE4+t{8H%~T3hy26=}!m zb4URm0RaIrf&zSqV1YFvM8^{TO!ih+a1pp^D7F!FY5=Ig-exx*?uUVa+d%ZP(9zMo ze)~2M3-lRNgLYg5BbN-{!omU}wLm@j1Hqj{X-?lCgV=~MZ_Od26-JU7hp!4K6fw98 z@|!4WqX?X?*(ae-X8RgUOG<&Y3%`6hjRrh6|0FavyTqBFd)?;E0+@z2?BBniu#6za zPatk zW`S>dc|P_&pT>Kk#xYa5@TPsO&yLVx=44`EU@-de_T>|4dy!)OxB@aS{8F{|IZtWZCw?AUC(oUGFt~O%6BEm7Ezy6GL8=un;5|2#7WtQV|E7^l z{*V
ZMZYjR0Tvwi0Lk_F^K_CD@Afq{YD&WoFem%E07|IKS?XsFa*%fzHe^QovP z3LKApy8S?bLT&T}D5$Xs+hzETD>q6@OD(Qn|5~To^y_jjkiy_Jz%ia;6oO2k& zCcSC4+M%MBmd)T{|5!xootDF3vUJIG<`@-*-kYmd0#Wt>{wrt%>GdVda z{ZmfPT>7py}i!Q(l#?U|}Jo{?5$TuGLA^+)+Vc6)#67?W3@Hq$4 zk|EI`>gwKW-&deFOJLRH%*F04yJLx^do%0orzs1&Q8Cs%RFXbeqG;!ZCw{F%9!b)6q2d>N7m zByc23)%0)yqwck9UXhVoNNd2F9DP$I*q(QLDZrgD2^V0xo+5KhEZ{5M_xN~ttYS5I zEN<%6(_@%zNzg6=LD-<2El^Ngx06DR1A(y;O>6>+M|$-4N;oL<08u5B8#8pGe)s9bp2D#9ZjUJL{t1 z_c$i^RqS`bG)KyLuG@vF1eBGi#|ld4b0F{V=;&v0?L_r7At)iN>UGJv3tr1tZ`1)J zJUl$?l9jc8pG}Vo8Q=C*plDkTw;cjs!T9mxM#^0x6 z@R@AVH-zx7W5b*We(Zf~+PGo+sbJTbnC)b6f>KFQuvoMK$ z`!=3_EJg+e++-yrq-=fJ0rY>eGF0T1(uaWHE9mK$JeU^?0O%kH8-VQN=g;)$a)bl0 z05p1Q-ntoj67DeHlX>TOt>yHv?uW6j`|(RCN<}~x7T{DICzzS4!oIfIiT>0_ZnDWZ zN-k&Iyh>YJ8(9$CZEj^ ze>OtK7r4Av#&zsed`$<2N=e{}oNhHlUJLqDGgDe@=;O!v-@b`xmkEoA_y+H1T4S_wrl?>zGfl_$lz`^*#BTbseO9Kt9^ zLqT0Uh`?3Ew7W^xd6Vq?IP>ISvp1Hy>*BpPZ$!!DK)ttUUjim0Md#xkjlOVPIhj{* zrO(hK$)tx)=LQV47TQtV#n;^U#~6kP{5rwT;0awpT3s%1Urh#E>%@B=9^)+D_kQV1 zS+u#d7$f)|JXq`M>dN*!yrUeEI`-?!-IFnQ?%kt{ijEcrS3llY$A~;R;=O@CMX!+l zdy*yxMgEVNN&VWNWg&I^Jp1;kPqjyE?^`NfR{89RC5BTHL4}31&d$z;Egz(ybNTnB zod8R57@E1314o2RNBgf|?r4*Gm63-CjHaO$|BNw$+Dq6^KQW#@kAxzFCWLaAoyQbX z6Ew3n0J(fZ1lfMP21VSkF`XYMJ!s%DA6iRu@3xyaZ)O`CSbc-SNi=>6*{}`JdKp;B zB0|$qY@!CM{>;x?f-nJPOgN;Tmh3FEn|m(2az#0d0uKNd<3u=X33FB0v&cHjjUA5_ zj_j&EX0Xc+_u*h?J2N-e37xW}q=cf#LxCW|gV=Y4z)P;rX?DEok~?{BsCDiM)naO( z8ec<|*2r;Wk8qk~M<57Ubshiy@;n`uz2tRzce7=%HZ=8PC^ZD&0%S(3CTqQSnoA!X zFGF|>nf?$XH3gG+QEzpu?bsSj2!Q{`z%>ei3Cq69{1njyWLLNUuyU@m(-i}Q-vZLN z*w4zA?%Tt5!0Cseu|}=xIv|d97$96BpqY`N0DTW(X6)O=0 zUz;|HKY#He3NoOBDRAgHjF;J1%k9NKFT|ar0j4ZO-?|6doK!)u<0Y>XQDeSF$Sx{P zXlGqtjY@PUHg+ch*8`VLy7iLb+}IXSH>|2j8iy*v4}dpVjqGerlg4llVs@iDBiDUE zl1R*L(@?2sx;WG6d0Y=T*`2Q)wb~dG8v1h14j|s)*(jlNFk_v2cGxW>>7GZAZLv`F zxnuV?l}=_V#hlC>>IDEJtSE}mZ3w1qsN1{VCY!>sfx-J|5^LOb5U(h!g$TXW)2Cpo zsB=q%ALJldu3zP$OW{U%pOuBFm!si_)c*aK!C2J4HDZ>(zsVC$joueQyyut00YV6s zUlkF4xRfQ-E0sjAxVT-}`xd4<8KJs-Y;HbK?0`o{9klC;7ph56V+QW{vVm`pNo@n+(Y*TJ26`1pJRBV<$IRQm0mxi$(hZC(Il-<=gJS3;#`tVB>_ z#x^^IWkf;*+rs*CYlt3dNYykWt-3&^+k%oEAmm~q-6Rn#>^ z0swU%%U|9TUaD zKBM}Stw|agchDrBfk0oeV#Ps6M`>tbOBoscHg!Yp2OCjFLQ z?Dvn&k`@&B6^DZ7Cj!nz%dakfx}OzkM?$qWNz)rDaP|ZSp)HbB&=9`gya`zNcN=DlBej||jWH6smj}`m{TI}xZv<{Q) zcckpURBVRk+&jEP2l!D*%$@yz;>Cd%NdeXR?z6Hjz&UIYq5)xR8@V-9m8ufsU5Gad z?8cLQwkM~y`%`FROJu6u0mi&^-L5mNsrRoZHOK*C(oe(~oHf>6QoY$0liwnrk9SH-%+y&gsZ~jH_e}*-$SWu_|s$IYx zbbwLWb7LnHQx(TuUHRLn3-^#(x*eZ9CG7?`Aqg<^(PfLHtK~x)vHbm=_j|l5BPhuN zu)_==zQX21Ly?C5{=FoabKbgis3eEc`7QhFg;#OzTMyDDAMNQLdiGYJdVck{2T>tM zFv(B{H8eJD6gTm}^|yWUWUfy$W;JX1;0bR=Zr;Q?KptF?E@npZJj=rmaKLj#Qk>8^ znCgG0afn=5frjd{`@$<7%ql5qX@&S(?Y45DmzQj>A)JjlsQ17y{*bX>|sU&OpB5F8DSGs| ze}8|9!IzDoa+iX?n;t3Vf(`Ov_rB8#3Mm%thaEx7L!CQ}r^H}-^U0GZ!>|x-;o(_| z;B-N`lv$X*3_n7CiAUBhl&{m_F1q1}>8XJ2*f4;50zP{)5O;GlFdU|b8AFQpd zmtMH|-s+~3V78W4u~UeLRrkK@H*f#S+V{qGtE!UC=+5CczmiADLQtxVj zx8(_s@C9BRcU!(7YNg7bNHGH)ETaf=N1_%34UlF>5h(r|(6(7xt(6JPhTJl?=@JD* z69}L^00srk&71(!_kg2XLbkoCxrPjC)`13{=f%4d01MOE_Enn8am~-XzE~T;mp2T4 zvKTOi=!-o%()(w-$CiCnGGFXA>-a?cm0D;PDX({jC z6~liq5gZX1=!%D40CouDxZ$_wD;Lm@Lk0y=2(sCw%O8b?K7-H?t>-BCrdO)T_h}h6 zbk)~ZRWV`we2?~!RCYeTCNnEK)xGYE2NMrT0UdCe1Z1F>tYv2pO>Ab>cBFxGOHN*1 zuj#$zbXUOHwHr35v`v_Sa6o4&fN5zhE9*TVHw`$`rdsPWtan0_aRV2_L^KI}-w0?# z?!R#j`Bx*HUS!Ffl&j3NHU3mwjMl+PR_t1Bf+xn~4#@AcNug-fx0cI(8RuiF%KG(_ z2*dK=IJsrKN>XP+;7mkW}@# zw`4IQ`agIOx!;Y9?1X_KKH4fsUQ24g3?|Fobq^isEapKtf=oDm;>5?L(% zKy;|%$4QG?Mf8Mh2Nw7jj-fYJ!BTJ}*z_D37l3tP8H?t)YHZ8^q-SCpk@H|*-vo#Z zZCg(&{K>D>^z_>7>lPL+Ap8l{UIgbCMV$$k_5{p-1|;oJWqhn(^SK?|Q6JdE${iQyr!vkU)9SNt z+A*m_uY$MC<>gCWh<3|SJgBo~vbi?B>(K1u>Lz=;C$40p&WlilRH0me9Ka({E-i(5 z8N#BXN95&;%FN`&_ZrvA0Rby0D75|lt=?0%6P-x)b(VqqsDY`eFVdEX^FaM1Cm@*V zUbzxDVEBbNo*^AZ!`wwe`K;{x^DJhC=qiHKp$NHxiot`Db)0OtTA`VXe_FFy+JjMM zn!1*@5s*M2aKK@z*{&TW7XB$6AT>}-dGJ!Cz?yB-Q%RGPljD}U8Cf9 zT43}2pz*XJ^TD2Hbx%*y_f^L|I{6Qvo!b0j9+eXt0P?n(N%2kH%9%r36j~R|E&5B?r+R` zm-1#F(!aZ}+V8tY6zV_0#DjCUVPHu;aN~ZQBomYLjbREzE&jzaC^oA#*D?8j%&yX1 z?lRaI@n*?t|8;@86gx6nHO|P5zqrv^-~$3EI+_Q)iebWggQpg;OmFtz%I)wOo{Ln> zKx0eqcz7JYeM5p7;sWnRq6qw-etCSTpd1#Gwt<1w92^|zMR~2QTxe9^PqQ*%V$CZp zCD0GVNj?%_^+n)GCKie1t3^qr22!$wpZtlW)2t1;a62Y}wM<0jX5OK25K22ZUQdk0?HJ`R zzB7BewQL8vx8C!54bcF9|J&g6iD?S};n+W=08Ng6N&(9g0tt9wW{`iU0q$O=dt=sEDXzKMLMCvf~SB# zYZ<1F!(d6!QewlBV3YgdeLUUrEMS3KVq&5hxK(HUG_0(D`+wCd560Ilp&(mK{Mfl| z+m-$5V{f63ZRO=XAptQd=i4ltt0gI`HXDR*=RSV=v|Csh2mDX@`S~{0@KO7~4v8^I zux_U2TH2t>P{M88pO*IWYy*Qo(<2z|T|j)2!3G|B(re(xyaQAKRphU+0~j7MUOwn} z9bMhq7{WPHO^{Q&6rH7Qy{RG#)CCa08H7iIR&dXn6g*=>oj~rTZ2B;b1RvC()p1u@sASz|MLsGSUFVsD8Lv%1o!Oh z14LXvjq6EUNl%Ye`o{6+&!1Jw|Hv+oC@a>iDZ+FDY~&92J@F$>4Yk%8*Foxm#iW<$ z2yAhBWChI>R%MLBUEQ0l^;(Y>)yHzQ>jWYEGe^*W)DzR=%6RZO+FdGjdjG zlx`x-JIVWN*RI_!Tlvo{5O{TZ3x02@5M;O5)$Ah6i2DSHElDB=Ksvo1^6kOWK2Zth9s~r7jl=AQ6KoZ5G6nnyuN&^vwAL!}~N?4)mA-{6MkyDj% ziagmP;3p5BJGT}M;@@H*3O}(_nFzjxz8}wu{)D$!2N>?C@@k|8uj1?X)FY4MBZo~G zB(;^=YY&Yt-z{DdO2)a}|3ljROA@_Z;ucDl-09voGAa}Q8-UyUn^R`bSdNiaC{0T* zic#ROm6Zs!1+o&n?{WaS7I4vK3)*8p8Mi|X{rHp6M~~Qud6r7C?35>nL)4)B;FT-e zpy6MLqs5bD`WbWZm;q>6A`?@2@L>?mm2!Fd^y#1nxdlbVU6uZ!L^}KA!4#*RD+722 zH5U7;U}pCquH9?5aY1tSy!aTVp8~jYWEcO4V$aAOSRIy7WUo~)|D_H8>0|F&92g*P z8mEr{#@y_^;}NH$QEVny>8N@&U7+hE%DPHULAkdE14NATkpu&HXes9uC}55Rp*mV4 z&chR(M~jPN%Jy))7m8gUeG9j+o<5xWH-K_O)1aepNjkvlD^{=O|FavR;bG-JBvhTE zd!%_0cE&qUorteC(-HxHHzb5LSU}S&FE1}Y5j(LwcCpfCh9O9n0Jt?2&CSksz@Q_j zxt=@@XO>;%lKb~}!u($-Of&*eTYd_`N`F+RVK-TRzzLG-Zf_QeO?;oXSIz z4})R4j1+KF-$K4Zi{9qS3$#8=RO^1E8Q()YZUbR$*qU4y1zcnPx2?Xt%{(+o}glAENCK% z^$lckc6%LSy}j^(124l_HJN5U|?bOYViD`w`p#0zwYl7m<`V?X!s! zGA56>m{5I) zw*li4gsq#;SpNE4$P&&_82Racq??ABI^wFq2LufL{4Z)a)BhbQF(`?XN8?Vv-T?bW zUPE>_>6{v9q`P{x+fi^QMpx7zLKdL8KL`)MkI9OlnM`p&_afkbFyeZoyaJkIzB+R9 zWejkE_p;*c|9TfAw`1j;{=cL=?*Q!X!etT*9!gxSy2=!0er4Y zQ33DKEM30-xg&Xqz6cNErNfV6=g7y!`PM}m>Xq`#JIH1N(P;&C@ba#tK-|z#;Mfmg zj)DO>$v9di9vj(?Y=VMZEEMpcP}Gt-;H6zpQ%Z0nM=bMJ$nX4Pte+WH#Xx%yjk zxd3CUee15JQ44miJ5GTqNsr#E1`ZvJ*!XhtP@Is?gvvp5m5aAPfj~BtOyt6=4UbBGQ65z_ACCmiH;1w3aB+ zo2>l)p$|)0Sy{>aOhJ_;q&}DP^w!ogCjahqxZI$x9l~zgLnlE#M%8KD&R2jr%yM?z zi0HY*<_8au!_=TJoT^Csw<*TkqEvZ5rMrx6XEEcHlcQB`>n?`{E$Y_df}7fZ6pUsC z3dR&!vP+k2ayXC&P-;*35&Fb`6bv=^<>h_YZo_2SlzkIBuGuLU;(v$F#Z8So7sYTS z9P!;lcGS&xUFq;n57n7_-@eRBufGaKI0^+?UbCjcSuj<^4E>MNOfL$T4fwRr;?7uO z5Q4wUYDpOiG)J+AlB7rw8NzQ1Hz}{qe@2d>9CTz2h<9olJ#s`Od%r1WVZq{up;3?BsaBHK3FVYZ*fX4CPUk;hnxxLxae z9*3gjUxpPFE(m7kx5jmApe-K-BW=_Aj$D@VdfX~E56=jcv89yr!pv1a&MzbkwT)d1~|_4#zGY)V*uC;#zq-*486oJsm>qFEBOmdRg< zrALg6$BLH!>!%p}8-w3}cNT&{rI0oJJ*2&08wyxeSg*WW#$hKy8fJg2sHhMsBp~xO zK&CZZ{zMvpZu2$mG^jf!R@PXK>;3hLt}Ns;mRI#kf&i4kC2y$^{~P}H*o|f9j^UlG zD<_EZUr@JB4*?W$Y{EVMq1}*_^N8yL$p>~t2YMMHVZq9+OjCc_DGdq-V_{7;E>hr? z71pJM^0Ix@`y(xWa1lOg@Um{$=$>xgdjHJReLp47{=JSy?Iz88RYHA)fx~0)6QH-T zoi9eC@jq9~6?=dQqoPz2{K}>K5P8_B?iL+=Imr~%cT#L*&)#)^r%TpwbBjoVpak4( z2kn$u2k!u~=FZcnoR}-t;$#TS#|5B-fjbJPM>-X;*&D^b%H;x9Rnbxpj}V*z_S#>Z zasXnaTtK=JG!5eoiCmx#VrxcWBiW-cy9%EH2;l4A&d%&_19I0~SW{RY9?t*QUdV~&JH~tpM#m=BRV-XJUwz5TaGV-kX@dXZy1%7Yvc)>0i#0PB#7ZYEk8ElCDd9rHV+(-phwyQSata9 zd{Aej&>!njlM@mXS6>jpt+I%MSOe8|$D?bXCJsK?wvDXgQB^Wm@Sc2~8%9d;F2VoJ z7%!Z>^oG<9p${td*~8LRx2uvPAO30g|DSOz;P(D|Z-etw97U78fqry_b6$#Hdu%@J zD*s!F>9|A)D?IWtO-|5{&_f=_tJwbbg3^Z`b(@eUaDqQ8!?h85P+RmoG|B$vEY+zW zpfg=+fC3dQ&-WMjczUk%D0jfsPv|^AQi4|Y zMl4qmrd8s=hU(vr0-5oZdE-WUpta_<{(cQt7E;|5_tfg{fNBFF)g3bTTKQ?jH~(0o!~`E_6bGZ9q=>b zK$l{`Ow%4EkCq#>_E!86>ZjzA55JcAGTs{-L4+{mo20||u1uTWJJ|7qvo1Dfijh#< z;WgnOdTVi>6%D%pulGdsKNHacD*J73Z|`TPNM`6oUAz|xK9vJ*Tl~#0@^r| zMEUV@mpfvcFZLka#cnvPnHvNj_Dn*S;7K|?YDm#tN=$bsfNjt(Zb0#8LTLozEv}3q zkVW`he?d9Mw~?x2T+cA`5nBN&CL`1;w75_I@u7r*a!})hr30;}VeW`BNj$nsFA$QT z(9`o7LrED~EbKcclaa5Tg#~O|j`zo4E92g~_rlaZGKW2Q@$HD^83 z^XJc@E%auXdfPrC`W*BCLUjTx{w})b=JpKprqowVme`&&Q3ShE$Aw@uLnMyZz?m2^ zd{;pZ-=_Xz@*-WBEof?LI#!c(@g6(HCakv%_PUku%uLVCT?WpXhGs7WJ?%({JbAna zUo(ngLN0`Xf$T{Gm-?}~nwdzo|4fL;dh$&$aidb>oF^@I$O^E#T-|>_Uhuw?+Ku`U zx4ZKf$xF9w->IPE&V%-d5$@gLUK2;gkhmV<_x_#f!dLgj0|IqgT3Ryc*23O%8G{Dp z0wXL~`Qf%I!U8+f&Fpy!8e>iaIdqc=s;_oK&+{vKb zwD8hkvRS(GYIrJ^9#9Ju-_38T-chz)jTfo znhS04#(0gfo|%4GUhNgnzf9B0Mmg_ju`X7?ht<)+4wUx z@1{0dZB87};At43DhCQ^ZUky9Kz3<=RLOM-Sy%#u;14G1Gb;8^7#pqH^J&R3E{`5R z{*0r7w&4^IQl*jEcmH66Kj6exxWj`$fA;bd8!L`UQCrEt5CG$@QPca^E1DE zdap>8;N6KA$v9Po2+?ZkCZW^=vIF3>-~Ic+7(GhCE0G&WHE)%HCG{aTu&X6EkInMq zpbqH+2Vx+4;UqL(>>D9l!a$_hUkD2atVuoxOVUqF)Mb`G@D!K1kkg*z^0&i=5&ra>`|Hl zH*r*M8xFYv)*#PWVjfOYaxz-0 zx+f`I+d+f)Uh0X#sE~Kezn|(b zdZ9{eWUz4-1G!`Dj<_5K!x%=><|+B7u&Co?7i<^ENb*JLz~d#Ygn}Ht%*51`a6FnE zpqUA#{_#Noq+b<$G9Ts!vl?FPHLRqFx`Nifm7Ca72w@NMgwoMAXuvjf96d9F9ity% z2O@?=EVEV^tQw^75M=?{d@BE4rx`g=c_~Qm|%!Og`EL$UO9r+n%KX z-QB-pTTx`e8or$R=|?P|5T24^Q@ocHC~Wn*f8RYQhy}9-*_jOIU^Loy!w*m_E8qws z<|$k4VHOME!f0O*kEK#G!}*52gg|G;P5&r z0X(BmkAyE*CSH;t=1;g@ut(z~blEDrLsgo_;!;{1D<-SMT0%g^L7gZDxpKNBpMvQ(;&>n&1B!zXZ^bKK}vR zhJRHa)i`l~3dST42bJNc8rOjyqr!^`H+yW-A{#ekNEV5nCxm+xz;w z*+mT~3RgiUP}2F}L(V5gRd{dV|H>H_&oy$&!1)RZYHyZfj68yvTFb&h_H`AYCBT}S zdA-8S+kum@0y*2Uyjs7&M8{!`=x z1oHDRYp=zQ3H-rUe*V>9ICp7gioyOg+GO6~C%6;VHxPf=;lcvlbQL)*=io}+j|W?+>zkxh62 zFEkX)mHsdj;#YfrfVW>yIC=4tb#T|o9XrOed-q8RSe2r%xgE^WVUz>-k#(*-&+XgD z!O^$y>awF(?sL9oD^il*IJt z)|p-jxI}PH;d1nVOmS?1yRDum4`AwLaKuW_#rwYYDRkBV{NhOk1s`%X*m5ff6y-aq8g1&H;BoK!P-1y^v)136m}MJFw3_1tJFr!|5~1o~BmP47dH?JDppT|t4! z_&V0#tiauWipd{r8aerGu(ltE7n}faB%Av(#o>cif-(?ey5 zz)}!)q?nQ0AV(Uaf*gZ_!N<)FNV5(xp_Xa$k{n!s>bwmdj-12REYkAHS>eP9e^j4R z_<)zWU}uObdVfq7&m@Yu6+Hb7IIWI)fbLR;4{XE9X>xMleqz9S;GVa`zFj-F$-&>b zEOT!bR62qPkk3JQRwK3jbw{zm5ZL@El;`Kbo!<~TFd+GP{25Zoc;b!+)c3a&5^>BH zaEqsWv_H=7D1opX{Vu%mo%t!8v-Ii54|Twy7=)P>8UT0Gx2P$TN1iw|5m<{H=mn4U z$nfw39D{M(z~BXW!k$q3J!4~IF@=+(C9wlqtYT8ijeYWG0OK8OuD(dHr?7-X2+&O# zIgV?AjhxDf#P?Q-XGv4%hmr0+9&jK3Jty8&6gFP%oDZ%0{n@-bk=zmLbUNW zL@KuSlQTL2oBVNSET@lk9p28ZWK&G+Kpyn0$^Q%Uca(${ETz~fquV3m5$E!Eg5{qt zaR*UWU9r_q7r}%RHtyp1D+oa0V}#DeCZyX)HJq<|8#dKWTqWg_3nxEGIB1Np8=uR% zZb}Flb7z>-6~e?mCbyW_IPJ{`9`p=w*|F4< zZLFA}$Tn7DP=o6b%Bz)07_?VdG9z(ol1{UPG9jC80j;P!6i%{%4A;#HGumNnVFpdT6~q5ChU zU1Oni06S2U_ACr7E-Q!L2&`F#CiYtwCjS#y0N4YxOD5#0)DBnrk2|2#}RZ z&&B>n>H*44*G*0^vq9Lq+^d!K{(T9KIE=f=$;!Hux`Lm`v_#Qaau5YduIpCo4#%z% zCe1MH#%G|&_~GF}`XdR;u)+R*KJ?7$cztN&QC97v6T2{L;mZn}}oCKPep5NQ(T zJQ9|efbH$pGi57*8&%c;!g5H);bGXXX>FQ|jR~0hBe4YzXlyAAhnBcyDi4zm@w||Y zQV`WJXRgPUfD!K0s0+3A+8t|IQR}~V!M-!~7sgU{{FY4t21c7Skj2cvwU74M?%pip z=mD(Dg>7u5wppG%H6p?`G;-qxINI{IpC2RO6&)H%!lid>;QN+DOUHqeS@6_}qi5uj4(r_czX0s7aW4P> literal 21738 zcmd742{@MT+AjV`N|Y&6glHgzG>VWZMVTu}gbJyUF*6wxnk1E(P{tyad9GB3gjD94 zqB3P_pLgrG*SFU9?Y+ML{`R+zh3vwsvUvp`bcZ-bK6#Fd0r+of-#&iCuH<6K}_rL6M z-6o?V=*zN3NJ!{LDnrr8jUMto;;dGP-W z7cOw`ILrR-{%+mY-tgVp+S>FC42lK@t&C#*U0ptcO8x~m?)L0&w)JR;mUU%eXV>c~ zXmF7iH!28>ja_4y@0ea!fH##?R`%vLEB8kDgaya`=*S-}Qrf$hI{V?F-}SYs@kWI% zS|aSv><8I)TDM&_s}Aja9lj|Af3=J-IO8@y(R^pSB^&-{G!Q&E^6^PlR#u1p?Q0p` zbBSJF)Mn$N)hhGtmn;hf0n7qr6%{>) zBW9~!zj@&kU&=i_J%RGS=X9rMW*D|@+txWUvYB(UzE*rIMeph9ne8%hR{P1B zLni0WZN9tRQYchFUi`<;pOs~PjDEqv!CQ9h;5gr!!}|UEcY~UU#08($m}n;cJtDp= zWgkBtv;W$9_}r(bhqj+<(Mr)1l|22L-7-2RCMG8XqqHnsZ9V3CZEkOGZ_v~8pBQ9h z^81S$xAA&a zp&dI8Tb-bD`2C|sBHpCjzpSN2zcJ&)@j3jiNH2(IRsPzV>lYLhWYLA3ca~x_<4NR?jT9MRTSx7wu9e_Hfa|Z2R`@i+B9m+BKY6%z5e3 zCBMneN0~)z!JJ|@U6 z(~&#(?lI;Xh!qqTs+>C2v&|~$hqp<+gT%($Bu26Zwuoj>$x#wd!2c#T( z@*wN!Q_dK-A3*~jpCnIDG#gh|h4A_n78a%$7Z<&(t&O-XabO+;ZaUJEjqkw+r99@I zk)QpVnp}!|78W_!*sy$fczETbqnVsbnfck1HKK0x$^PoS8yUHgXXiPQhld|+@C%bD znpV_H(NgdY2#~ikw!3&y`QFa+zHxDJccg6NTb`JQwd7hwOk-bF_PxD_XYc9nXEZi8 zHnFe}k^4Po$I8zyUbHY{WpehcD9?_w>-c3|)}_~CVd|yos65Nh4}N=ZSFo(>WFS7x zh`}tH?qaNed|EwSGB+=ei&;RfamJ8))v6%e%$41CePn~Nva_#SzG8(+-i0rT9_v@g z)bjQpdvYeIqvK?@>(m9?fjYtOU-Q}-jvhT)|KJd>rTftgLss|MNe(KpHw;_n>(u*E zRW-FMDTdDMC6e0ZgefhNI$LluRXl`~m_3vK~KX zv$V9VEVACXaii(C&(9Zm=@+>@*VDyUZ4XuPi;DPVm}y*WB*M;6|LCYhw)?E(_Zo@f zzzbhqETg5RUBYkLB6vw%LxYo2{=G2I__pTlTaMkkcQ=gp{I=zs%Pnx37&6b0mzOst z!C<&KlacD`kMX?S$Ke~L63l5S_CCu*X|N`JuZBh_x$A*1`BQdN%e?9=wi|eU8U6fR zmttALyR)%1Y`ywzdaCR5b2|yc-$#wzlN@|I4!nE!uB@bFuOV*u-Tv-3{ zM>CCtwrY>LM?xOFt5i6eBv)`zZ<*CezJ!bao0}w6ja{ zz!b1iiCM?)3wX|H4~u-bd4*Btg-3?K9_KY_>FFv{-@fEFMMgvj^K7?Rb9aZ; zZA!n_G%W2fv^P#G)~^4Z&b_!e!LyAi-k64nl*ygvTT2=n^{k?(y61N0BJ5WbziMVo zsSHbK%+E|9Fu!G+NY_qSawOfrFG|KK{&?O6=i&6cD}^ot2VAGV`DJHI`mf+UaxSLb zV|E3_aKd){TmEQSVad7zMJB^mr)-peT{=DH z4)3<%q+LrjwKg1klAG&ikS_k?_nlqmTYZrkg*I+nChIaTvc0{cVlmc@!FhzfftKvG zSloWK^1oN}^YbGOsd#wE8RS@azj^aUaX9!-BC+)rHvRAT(j8A!XL#Z7=gytWY2D5fq8D6*q#$5jIKg=}Bt+76@?2xu zv6UM(Y}kMBpz>&2q47YWO9STTO2JSKvew<-iOExklXPgj-hJ}qJkfs7^Y=U#RhMVo zZrbcV_3cFZ8kJtga;8^zy}fB#pPmm!+*!ANy`{TCbNX>E>Pi7KJ(XU(j`6;{C%@<1 z_oYo~$7r#6Q|AF?X2j+=>dog>LTaB}<(K8bDpe|Unb>mSvsA%E({aQRm%gn?Q)Sp5 z*JEOg&vj&HXDeK|AdaU_=WzCA*{`9o@BM>A#wqXmU%h=>iF?jb+)}i&qI2 znr->?srcQEl&hV!`s-Zr?Mg;Q{QJWAL$Dtr433s^xs@8{(G|^p%a1S^9sVISG5k|v zdbIdAKkv?SCA-?)E+dr*iixd24tD=`5w}#8pcP{Slz*xPQ{%>A9fZ zng}ywH;d2Dvh8(C88}4OV4Z$?dY;xxQT`Z55+DFjrSt=hCZ_GX2XCr@VA z6#ZV91u`igs86u{_2YDo<@+Vr3kYHB5X&wj{U>0E#wI2zZ*A1QcKy2dZL#Ad7T=LH zTY~iGt8jTqp;lq7*{M^$h)tb%KLF9j`1trOSg`irKCenQ$XzrhmEty5x>Uc=Wyiy# znV~=q9e`XqA}%_%-6LPWuGq9`6Ioc>&b;x`Nv%NI2c-05&kvWO^mp*^_LPKpjn&JEW`EKl+ z&Y>ZL0dq2BuFMee)+050v~g$im@a~AQ< z!aH{!SSJ^I%oV?Oj~m- z3GPI6h|JeVu`cO8Dc|C>5m+eZnA`BuF(+<_sd{#F*tMP!E{%!gzvq3 z_kjG_cJAC6k7X((DysIV7ITC#c|w3J^4Qf?+a9#cj_gTKG@I#}k8pEyLm9x?)YN2( z!7@0?x=c{n#pMOJq^WNfpvi*=58@HtyIOOtB(}dprBqf^qkiYkoj|p4(ID2f`vURb zibq{epFaKPH~e*(pQ3jB?8tKNEqhy9T9AS^q9R&fG&A(bbo7aCqqA%wv&yl@r-E!8 z95|LNS+c&U_T5dR9y>8#7KC6ySc^A?YZ zjRge;rr6U9+8}E7%PEDKEDeiU*4@*i*Ls?TmGwFk+sc*NNowJy{o6*UhOeWD@9N_( zDSWZ0`>Iu|Y%X4O`DS*G+i}d3`7?^QlP!WmLN4WdM%me;V-Nl>YE?T&qrC`57cX8^ z%U>Sfk9ora7Wuk0F!1$a|0p&V#WlNkuLAN)I>DeFbQsZqC&Yg$R4D#pm`U}w<@Z)TX%W!JA!QB`%e`Qld|!xJZL zCr6H<0%M`F&VSmu+3*>Q=fW>x)NtClXAj#CH7-MKvWQAM_GF2-x3@vWD4>bis~hWE zp4qPL>h3l%HT6N|U{VptI^A)Z8M$Bb)JvAcq$HF6cTt(<$;M@IN&$6%x`_`S#G6)L z(V3n6`YBFNR6TPGMkzxtYy|a4yvO`Z1FckHK|z+&sF7t6%OV8@1si*NEOGy^FlKSX zXR*1VX=$6U^2u;MefpGSbCiKLKl|P`7jEh`ICW~>%+zQYU~(5iQDA6j)&1Re*P^0M zjBNe*@gs?MbH9E{UhLVgtfJD1cZ^Lt&wm#F!3MHI_n}ELHo_fw zPdSvGml2X;u@^}JGp>?n)2X=GsDQiO?dJ-O7`YW8`z|x_Nq4jrx;98JqNU9?t7ba) z@iCoh7=Pvb>{NaF@m)Q=z2Qw4!k3};NA0v141?&AGTbLo$RU%?(FQO(bd(J+P%9oAxy3eca!uB<)f4w61oa4URE`_YHl+ojmMX!gg*EPb zu`D7F4iH$M?_`aV%nYTv^YoY??q)HCVzHW8e$EToDh@@IMPmx<&T}nZ&z?ys@7s5~ zFC%=JRda?PaF|B)o}kk;w*{r7xZ6E{%Tgy>vP_;ie;2}BicQEneOvu5T6UBBfrwlC z5=#9Txoj5Zez8eBvF+YBDC>GxC)GDK)iS6_H%^gm^YN$4`)gyW5m>)}vMx43aoper z8syaJ)16c8o~nk1t3i%X;7O5xVD?SI~k|@GEPiD`jr52!RY~kfeV_EyUm3qB$&y?`1d$d z5)u;Y)~zG4i%-V!)(p6poC{xgkS7Si7N3xC{pQVt57qVctlz$U3%qj0Co|L9azrh% zv!}-wl`N`AvOH~VZRweqssaQ&0)c-^nwz;yOiT!3pw?{Oyd3av9~Nk$Lq%offzH^y zrCc`N*e6|JoW{n+ypZh)8V}`@A!`ssj!F}!fB>_AKukniwQ$%SBx_UPR_<+QmLEye zr=gzNb}yM3A6&~<>2k6C!sln+`$Bk1-@jj3(!ExkZsfp6X%Ogo;Z!C*x8=nJP& z0ys+A+Ke7Cp}^HYew>yX85zlW_i>|Ig!qXqXB$PR5-wvm@47mz?dMzBM~h~~sgtj7 zZ5;dYqpbM%0wZ>KX3-1-?ULo!CY#^BT>;`l2_TS_hljz;tecB_htQ3+Cu-M=h1e+? z=jjylJnE>?OVTkJAE*ZfvJ|O4xToh7B{H65YHcm?7NxcS_3NzIJy*W!-a;e;6KHYh zVq?4tW4dd)^QB9Ia&ks9L8Il&@)h73z7N!|MWuZb`-FSvxjRkLgq;Lym7dyR9=iz1 zy`|7~SBBwppIW)uExwFM&5lT#dl8TSrV^Nft^4{${wno~1zzgZP{PXA4WWfL! zC&oAd)C6E9C15)c+y??dSWM>38*UQ65L7C9UfpEn;8_1QA4?XcYpjEBD&7jtrW%OW z_{F!GiDC8M;?q~O$6ujM|{^OE69(Ad@y1FIOtIP8!XC;TxHa^jC1{ueZ5{cqD zAB+<=zUree3>_-oxE;G?WVoIeq1ZGd>1JVely&9G%VdjTW>hBA5k16DycpM+FfR7s zJMqHaJ3n6rq0E0^z=ANdCBMJyaK;CpxlD)xw)!GWIJ>y0^|Ltu+L9P~^5hy)GXk2M zbX5dV(etYsRQJ377)tY*;P`MmCPrd@G(|fhN3kCjzxVXCGpOnLq+vm!8&7nYW$26_ zL`O&8*=5a-Q6tbDbRe}iZtZr|_6^2`JRwVvi}oKnB=%HO`@x}m{^R5JmTCtNHvTjU zUx7d;GT!?3?I9P98;Di_wg|=kSy@}gN8639!zyiTZM}nn4rB}0F>!C9u3x_{1wbBm z31#wn>F<+oBDcLnDTYc4)f6KsEI~erHr680q7JG;zLhj7TbyQ)t9JKjs?Pf5tF|sC zh0~4o>RA_i)u_EF&slkSA04j7_S8a*!#6y6>2mezRjMQW{_6H;wq}OUY~r-W(zFx& zb4~!Z7+_kHmkC}A52pi`fG30^mMvedj4fG@8jVaXsd6)n3a|X0AN90oE7%QSl2kM} zIOyl0mXE+e3YE{F4SjumJFs~4i#=qE<|jWXe)?GKDGx561*?eg9}Yu}G-K0-kujbN ztH8^VOfUwTR_*5!rq%RjW@gjBesuy#NMUPLh4KZXT;kwOjEahi(-I-?>%vG^8a+=6 z-l+Sa1J6ic23T?2OSVrZQhZamVFRrvK@Q(grCi3C*ObQ3&^l>Gn;zF?`J zqBoe4H&HTN$muqAcI7D02*{$LgL)( zjJ)7olnB)*rLJOwK=4@XJ~LhlOcTr(fH2mL3b_L4P11fqYdQZOZmXuWddq_`M?fL1 zmUH6;Vc@R8(995Z#`#$q2~14n*kYV~=|x=Y3hAQ0zpO^84OhT^!vtkn5}~pRb&F?}zh_ zrYA+kF?tVG9Z|stn7=zJ!cLSdEiJ8VcT}gWY-~T?@V02z>*(lYdCa@a{j8PembSOB zjDVUpu68SOcBF3D+}fJ?`0?XlJs;Qg&SC>zEX`W03%$qXLX6HF-Oa$jf!vLDN6k=Z zu@0{MtJ3<{FNGITsVM+(qSV1IPj-&sJ3)7>sPAi1aog9y4vYbcze<8a@Q|tQgAGXq z$;_;*;XM9)F1?$@jP16pLov0;v+73b+??BH{VZz7*+ws{BoMZe=I>ThVPPeJ4kXdE zwY6>8xpNT`z4$w~d^7uN&Jcp)B?A;W!mbhH|HJh#d7dzXs+U;`cUB1+-vScS`DzTmb+UAk8@L{V0SS@>2b3;%bHkxB-(@RpP!1u8A$0OTv+ECi_i2yA z5IeO7MborNjCuV?OBAG$%2|$EOIQ-}cDdmq8LsUcx zT#Cq~0L=nRH)eL-jCm>`J5>UBj;w;RyK?e;T`Y~wxR=Xh!W(DUW% z*NXX$U!y%^- zy5q4phxz%bcA)ycO&NCr1KaeL%WO?N^2ovHX~+Hhpv2f;_=pM%_a_ax+|WtgobyHa z6T=WuhZgb>6!o)CD#SVLmXaDs7Y8pOuyA#~5<()LP0FUDWZP_%l&#{76R1f55bb=& z^HiAU4d6b0tcf)fK_MaTeKRD7Q4rarj~EpI_jF@~kxCWt&^a4W2^Ct01qSw9Hc|B_ zq%6e8gVB3d0ZNo(y@UX?uuBV=|N7BO0NujePtEDs*|m@}%l%i-QroTDR)fAJ(vofO zYmRfRIm-}_F7-Kn9T|x|@+j$M!29=K8Ye2 z!Na9*=ZY08vd?|o>NfSo38k}gZM5vfQ~A))P$|a|?jJvXbPNwOIypJHLE zdnK#(cXpq=rz-pQEy2>d9v#huD4l-k-=Z2+T|+gFp`*4=(i<*a?>3JJywbnbAy1d(h_DQ&_z{ZsJIVAu3duB*hw#p!_qPM(ZyY|1p2 z$EHsjzaj)$A=o6q2>R%XT!RyY2GQPKffYT3=)f&yvj*CDXGez@>KN$87JTPX#*`w5 zkxH5ru_$@~N*d-+IU@S30e5&75EbNa2?EWP&l%t&GK`Bup^PtP@LPocmvqNuqqY~B zU_=bZ%o0UhFWrCx@rP6|82T)*2S7-0^ zb3e>Z1_tiucRr=+|K{ymk{u4*+)&!u$^(9oRBxi-nFlOt0o=(N8z1_>hy_7>BSdVb z!X&t?>*MMAYGatWxVYxPPfU+Isg2HeC%>V38fY(;*AoqI^VyFD1lNEcY%V6HK6ELE zA90dG2+DUXhIedyys^d+_hE$lc)%%Pv>fyS7}*stj;I2;YA0*(K|!&axQ8+nV_Ki0 zz4p!B9gN@&K^$am_k_{p<*QfT$;rZU3)5zAqGe~Fe~R91w}G%qSP$x|0pr+eClRrn zXTRo2o@?1cRuZVIYOJ@D*4CMMUs+hM7lPiXeQ^^3t5 za56Ah-$3Sxz1aI&KgWXApILw@c0h*jrrXQjC+f=UyZ;usw(*a3F@|5SR@pN=J9_{t z`vJ(GU=UX;-`?G!g>O+6$YcGZR$jr#O zO=sQOBfaW5=MMV|NdfI+LcS&NYFxi@!{-Xe zCP=5TIqHCDhm&|5Q9}n{BD$e;61o&V414p>iwNfqz?OU4s;%I8lNrR@1zhG6oLE8)@z4J+S;#$lfze@qvjG67ZBozN6RvsM>DjsM}L0AHY>z7K@l$3ZGufdkHzO&WA=c3C@_a{wYGOJtxX?uEX55gtN&5bFg z$K1r41fg{qT$9>)L@XO8CsjQ?z3ZP|0pIM~=ih`cQU0NKrYw|YQFnEiDj3*~Z{Ngo zq3y2X-F2bDx_I87=wSe27N4GqVJIo;)vH$!Y<$AQ!<%NHdSTld_B{PZI>SK*YezZh z;Tyvc4i*+KC|{jWj%Sw*2)l&iOpg&)=}DTq|n>ZL4kxvR6(Jgr03|kQ%6@fcLpI1d{@y+2e5hi z`uY}!aQ$_fSRuYFV4zD-Ari*2^uvdV=QX4dg#E_G#)hJ{gMWs^h`^x}gX}>>W5Dyw zG)F93mu%%72ZYxWL@Z<)UpQ6l9XN5NaAVvT2vHh}c;mi;H{{-W3b}ka3)pA4Jcydh z#KdIr{=tf;7rrncfuwpOtH4z%CBMCudejtQ1DFZ-B?fRV&UQy$ZJzlh2Pj z9r0kzx^>FNNx0JWgSUcK-hyZpXvFc)%F_ScZ@D_|EF$=N4WG>phTcfL%o(Gn8kdv@^4d|8blBpePsMdjM^wK8OKt(O1)=;g0aL@IYwW7vQLA0Bau z8yBU0P^I^V|58d?nrI5B=D5TS>xOQIhx;LIl{GbSg3rX(SPGKhm$Z`~hVXTY&I%cD znf{;-H111z2L{sj^!3GK7Z4{%qj_hUpMHU}WYMoaUc#WG2!e83Qs_FBCAb*TYG!%(=9J}WhD`Y-2!Q7%mJ=b2uN60(yAh~_{FJ6H9gqK0_Y|XoORV0nT z7C>1vBrdz9mUQH=7-KX@Ulg$O4=9MvM017P(&7;r?b3m-Be&Z6Y{iY#KPvk#IG-^r|L2bB2f8hA#0iA*%Ze5n z6}jz(MKTbK;Tl+9tt)XWtc9DS%HAa&xCz?_4FzCbPI7dTI+tOg3kQ;3vz0flt1MBj z72yX)R4ebo391C#IZ5M$iv+I$Mj+UT5fqP*%PMKUkHD>EfFv}8jwWs^3PIjLx{J)J z9_N7lsbs=o$b0M|UKmsdCa9|^ui}Ln#={Tw>i@c$Qu10EBG-_(j?^KDISb!(bO|-` zC7%mqW5YkZ4c}IoS-fD7A}b{|oy3T{5Nb$f3y9*{%~cJvag-J3*W;nRl z58aE1NVZcTPc@_0m1PHpW3U|+P&uIxpZDWm`9PsXV2@r!Vd}iY#A6I(LPjJPAsNgYAU(rx? zq+*GNE#s6SXGJUl4Kh*X8U_YDgnYPliyi(~tGB#+oX6C;2s}w-miCK+nVJrwas{a4 zlNT;zI6P7CQ!M?nZcqDb^p6l`0#$7}!t1r$xBcNYw0z+@l~H6xRzgFhh6%RqLMi0A zlcuKeqb`4@K`l9}dw4h)68b`S3^6p}yp`ihFM366ndg3V0pGkZ0ezy$lprs&& zzP}QV*?%6&jwC9atMKfalz7p&Y`TUA8aR#h+>c6S00nj2BxV4?z4MR?2*d zH!ZQ4495Q%29@NP>H~5X%P|edSqk}k+Oj5K@MCI&)=u4vg~oANu>RJvx0IWZB!W(Rq*qjjB|gKlhN63 zX8t{UFE3fK1?)N&WAaNmY(PbQ{X65WFJJDr@_;tx@vE1Mkhf4Ch~S74o+xYF+}s6v z?wFqOzV{nPNIr(;3`F|#BeSn=3X}3*Q?r&WJsAji;PdmJ+hzez!3GHRCnYC$W5P=y zDAt2jMYS62bn&7h%p6d$)%BJlXuXQi5EhvQ?ItOitu>1hjn>oicZB%R0r1>Du+|W- z0Q7$rBC!?VUZ9!A0ww6&`&({h<={wamL?PUHWB*=B1Jr=!mpJVUN`(wWUoyp?ey)c zQ@cACJjif_{E_St^@KoSLWleZkbe<`GZL}5W8tx5$H0r;Ffu=NY9Rd#S)#|WMDcET z?<8OZfAAst)~&@#N>Q4$qKOb98;ZYt`O-N!NG!c0ZH2tpN8*mV1sAh3sAT`_NAU)w zQ^(o?_*?s9()(I{0%iVW=>^ZD$3cS;Os#_F9Y6B<{p!`LJ4QyXLV5%{o}lpWkblbr zd!x}7j)|MA$SAoHlIO{+PU&gvG`S5`QPlf9EdXd$w|GWyQ%m8#LMSQg) z3-waN;I3q%#NdO({zL3`=r@43j-3#kx71DtHQRwuF?@be#U<+6H7`ikP~OXd{^92t zc%RIS2eA3ty2EW|Fc}1OxcH^KoSce(RVP?W$DA}fd-e(n?xM!3TKIDC6#Fv~XqhSK z$x0x_ySloFjSxcxZHWfWCZ+J{fnyRETe*5QAyWw3-f)PnIqg`vD`6p7y&~vk;GV9= z#0P*E>w;Ei zUL;Yd5mzLT(aV||S_%y_CNM_Lq=8iq97q~qcZ4>Sb@qek!5Fy{!#Z|I3h+VXiPW1!lH|1F~UGx1%4=1C}R_|7W4XlS*P)HvXPXU9`DJB47! zDobO>o;q`eI54Ki%9+0->2xB@5ytqTUTXa447sS$6=VUDX^_EbDQ+H~c;r8l3}J=z zgMFfq6UvJ6_*U!!pakMBrzq0>gtFLne#QZ&3?|S5@Bg05vwlJu8N9v%vL!0tU7)bM z2;YY=;3D={v}JcB26xCoLXwiKBrSq@prNqK-F&1lf8iUEw$S>qh}4u|n(ywiW&$Mz zedz}~(ia)thIpi7Sb~V1>KasNkm&or5?m)UT9A;}_2;>6ztfZj(j`p_=Ry6r4*L8V z;{%A=Ofui%=5P#Ni_up1Rk!=9AQfH(AI!B|liqv1 z`t6hO29ec7ox~&)*$;InG5I9^yChQr<)j0mJ8~NO*@~XJ10=;iPJI*oeMKnG24Rt!`;ujy(OWBL98D$Emi;a#23*+dq&?CG5& zPz)#$d*2P~*Q-dkCe&NN&=}I#*l0S-uJVsDS@elXHKIC-V0=*LIC26?qva6$sl5<1 z3Cfy*AV7QzIs(`kqU~xEXVb8!x)I15hQXY|35q5<9z;GBjC|j_C`M?8L`Xy%5mf(D zL=K9&efu`IoSPKT3 zqRTQ8uouy!t8l7K=`V>LhR2eUl8)R_Fkcu1Pzq3e*Z7J!7AeN<)zV^tclF9wvvcRJ zqUZw2Q{ck@6gP0_=$YD&f50vzBHYctw-EpBkGVR9JnG*JAAIxZJdG_n!yt-}h!#7a zf&ujehKeJq~?OT>HF!(6O)!jv(%XZ`c$n}U+Ucrr} zq-}g}vXnx02>^n%yb?TH+Mh{zS^7EODU*E@So8SDiEje< zkioGPERm4dysA|lZf`#3gZCyljkbZv{bR&LK7jz6dh5}j&&0n8hyp?a-&BTuK~x0D zF*Fp~Hc9UaGD%Vq&>FEP>ghFZYmNjF1xX>D)B`=K2Lqx2oy}m0YJoS1Xi10(a07HB z*8H7_5i=v6zD@7`VQ*)i6Ts(F?867M-lGqTlymUDTC^9*qNia6rH|(0M%6ei3$eAn zB8`*4cmKBIjrWknzU;L0$ey=oyu!!O02h(*+GR#WLrqOBEKV;RND63KY3pq0X;Cm_Wj zR}{t)($WDFVhQ?7{sqq>^0k4?-BpqICjA%}X9#2jB*f_?siCKs4&)ropH4M6wMe-U zCO1R>myM>Zyc`(J=yf>SMYWGmY> zAT0RWGPy1Zv6_xcHmP! z^V;Z2*r)uk&LKncCY|6Rt8(ix-zaQQv+1!;_)!)UcLI`a37my!vH9ygk9Wz9oH93G zi<${$;>NS1AcK%O)lx;tWj3MD-78`Oi$)OsV^mQFU;iqlE=Cd`?CH}7GT*;{PeZ|1 z<&rKYKitVEaWjrZ`urw|ZJNow!u$ows!CIY@gBP|2-?N zAH!1nap`yeS&$HmDOS{bALOvGwSoGCA|fTt{HYQwdn{Ai8>P4I+?jrpHdZC@F&h}g z+gl+nK?;!4FLaXq4%}!}VFD6`@1RhFTQPy9#ZU3O+VF?N&|$#VT)T4zeO18?;{732 z*^L6BNgg(eKQc_Z{-E6^kL`&RL z7+i+L=WJwTL<~>F`3zTggrg-vLa#B0z|8>wLnpyd!7=Uw%PRzRT66RM2JsNY3$@72 zsPhTuNsbv%%fEVbA8g8?8~i*x61b}W>0vJNsQaXE;u(CDL|!CG%le@_D>grw6rNSt7i_wx0|Gi1PKFIOKU6XFC{D% ztcbZYqzeJK5S|p@q@?x4mz$H5)0}%R^9#Z&dJWLaB9i+B&66N-h{*x13S=4L(&@cY zx5MUPjw6jH#DxT=hxD-kv8hag7h-2;phLEZ|6tTsy%iE>jQiv&Fw{6wK|9@G$D8}R zSAwa~5fEGqSkVD=2g3-OT;X+9u#)X{U(JX-#0&x&rJ?^HVRaX}1=2tCBkmHp0)`o4 zWFVyr_DjRu52@S5zZYcSKw?67DDd3~+McZzAQyDPc>`qMmZe~` z39$+GEU{meQ{>gNFVAdgu>b=Aq0gN^FA29LwpOuEp}1h-p4cmDr$Atu0*!aUyZPq_ zWBNV_vxLPnW+Z9kHx~t_q3}I6;gubV2#RoNn6Wl5h--I@7*oO?XZ@Wd^ zUg#=n?qDj{L5fh()4K(Uf(@R3(kGnh>SP=Y=5`s}0>m)+9kv4XuWR>RWkEjWB0c#? zsa-feVCB2kf5LfRFW%b;$t7mN1IGNN&wUnBCwX` z_JvPn-igZ=LiH@N^EuY z$(jP`9<{uAsIohu7lUG1YieqWed!DT6+4z|=Go?$V3Xuqpp1fW@uHWP{`8Djz{7*H z2rysdfCflaLKEDBGaX1Jpo4{Wcs1uTYkux@c_`9GNb12%ZX58a8tp%Ph(AabS@S{RR zBvX`II64EdM>yoXw$$zTmsz^tMC0&si+HfhP+x(JqZd@$c>%pftQ?|xEWhV|hBcZ) zbx};x34%=a#c62$n0cDcjfjWR|#iZW0( zbJMm^pyR=L>cn9Gl{7Ty46WsYN=3NEL@aP#X2(uq z6T#C`fu+kRBO^m#6W-o1bQ2xtcs3Jo)G4sN#5ve(7VvBXvHM51pvGWKCIu;U5XS$h z?irXR>ZI18+1>K@?$EH_zCIx&Iq*dWU!h-tCLvAdq^T8Z5`;q1rb?L6Ce59z)#ndr zYQp$Hw|s>_`{ifrx*hRUctR`L`|t5iESbs$<%NfmBL!z*P{WS6td>-Ut3xwoYQz@T zD#q8FAQGbQ-fNtC3Yf~hYE>y(S082KoQ?6uDf|VV3l4)AG{VIRmlwYxq9_`M-rU=j z`SKSi8Pb=JUP%soE)M%#KiiZJF^LW%L|pPPiu!?9pB!xuL&7op{LEHA??P0gIe_Js z?b|C5vaT2|t!BHk^)v%fMTnUcE&^C96;VZ!zfjjE>(PH>^*p&cr66oAg^gTZQ^SZO z3CeK)*D&j7H|ao(K-yA-%?oX^VZVO%C*W4-nVG9W)Lh1D@5a`qkRmTa?jWWj@MSkz z)wW&$i|Hpf3>6eM4AI=|@VDJUZS3ebQiS8ap`n4aG7`H!K5gI)rFJRm098x|Trb~0 zJ1__$u_QKYF62LxvwmDrp-fznxTwTHey9$```Q>5h#?TKDUN5qqk)T95xej~zwk#kJVdws+v4$^A@HbN`YNEY#5 zXv-EBFel(HZ_daTX0HGE`^WZBIwCCNWSTfQmx%d?7<6Grtf;8iFYG=A>J3M!ETdxa z4#ej&$koBv{9O5M$T=@wzkInD3tAhB9SMI{MhB|W(?Z0;uyAnH4Ej};z_crA-L?ne zo`TIdysy8fr-KyTFs=ikB_>EyIJV$#?1Emp!-K@c{TmGu>N^10#B#oT%CqKDYqfre zy_lGo7z_IuW*;Z42!ds}ilHj4t7Cz5P?8+uaRo>*fm9gA#d3=lEviGbCI=>A_UOR- zGVmQs(@hSwKU@PY_Zo~$;9Awp+`$kMg$Y(pypJNCj|ZcqN!13g$Wnp^;Br)!{*h8| zLB-Y?=W2#&UlOQZM9kh0mWg|B-G&XM>ysGFK$;?H;&7MgH??BbKYB?5hl=478Mz9< zod&`7?WR}HEVtiPjG2Irg#dM{wPowbt*wHKiK%DKxGpFxOceqIIksVD=fC-4j>E?N zE=Hc2uqs}FIt}mN+YU8uOs$2&0oSbf#HII85t*=214BZ>H-P9}zFLX}CntaxR)RTE zRIhHqnU|Y;zbD7V%>q^*AFuyojF^P0j5JMBTP*6 zuZ;1SNkt#K4|WC@PJjRA$VdzDIL%~a^!YSDCseTk1P-oAKh4MaU8|*p59wX zaaeHuF$& z2n@_u>>D<0FopbBDgG0mR;@vN8`9w<@u=Be#*5HCivjgsYHTfC1fQeKt^srjPZknPL9oiI2lF8 zT5+yD$xG!B#^_cP%Ib~SOuo6fxnNrfE+fYh7<_^kA{1`s*ew9IIJ1CI_&DckV#l|^ z;VJu;18Qn)umG|m()>B?>fvGa%?3I9A^)no6VQy4ugboBIRPn-IOnqwcFZRwL`QB- zWxsj(k_94YHfVx}Xwl##XE4DP(>$ZJ9>@QX2m>+(r-xMl8;C)A$!XGU!yqFbsUZ6L z;z_`#m*Ef{z#GenUoh|~BPaM`FOY$6lFjiIi7~r`qCluv34LxDFJC`Dj@_Q9NNNrF zy|DDIJ=MKoo9{9I0@?f#o|&BRwRrL3K&-yPXm1f8;0~j9TT;WuHwX9s=eF|y=sXW6 zTp}D2q(}Zb(|@RL2EuV#8Bw&iT>5beRHqW$EFvNUku|H~EQC;S8Q>g-??7^T1`rTj z4Zjz7;g$7;VF;(o)a6*}0`i^}8^xh?0FY*QSdzb?!Vw1@IiG^O89@muxJ}=iRhE(9 zJ!4^8XnAfY3`uhtENu1zbu{qk)TZq~D=oS|ym3$u&X6FdWBlo~L%T2qQzdM-<&~9m z09CG~tm}l_ah^f^(X*hy(IQ`o;FJwlFe!9^`c)?1zJ05L?j8bHNP{r8Cvl;pd5xl` zM%z`<7R!ROtWc{GsDdJ-I)ryuP7_45tK>=1fk)sK3CTce-Itl=_QHowbQ<#OD^*&A zDi)%p!**&r(jtj%5bODSq4HZZ0{s^E8GH14-G+|)TrEyDWw*9%CD#Zf9K=k`gwAyZh z&X(AWs-p{cbD>;ph3IyROSn*qS#SGHAP7HD&{uxmN3+c?8JA&kV zrngX@+0g6N@bEOWd2(0=ygIP_+AJA$#U46&_K|z9c;!Nggwd&7A7-AdY=iy6+@;hegqyYSvH`b|PB_>vWpUz4 zUd#N3_@n3~VO%5RVgOK92NU$cdplQvryyQY(%TI?#z~Zc#9u(|kqhX@bGm#y17VgN zuYl8eXeeaCt2sH_fgSU;jM0OomtnXHS)md(ir^E;0LO&fz<9@DR0bMTSE3<-5d&V> zcu;wLImocn`+#ANWEcj)l2L-jjR*;}f=FPym_{iObLrLqIVX{{ zMFRg`#umjSmb=ogbrM@MG@kPDu5FLK{?u)?sfz&}OYQL2!J0bxjU9}M?2q>CO$ zT_FafM7RW{^dN$yydhEvoG?KE0$m`{V67#GKA?vU#~mF$f^~g4FK-vA>CtC|y)kh9 z6N)qI1qXYF{B(lmMrL!M{6f( zT1RE2;K#7X)oZ{(m;QT2ZUhW= 1 gram_dim: int = num_batches * batch_size**2 lx = 1. ly = np.sqrt(dy) diff --git a/profiling/gram_comparison.py b/profiling/gram_comparison.py index ebf6046..7ef48d4 100644 --- a/profiling/gram_comparison.py +++ b/profiling/gram_comparison.py @@ -193,7 +193,7 @@ def test_num_samples(): num_features = 300 batch_size = 800 num_samples = 1600 * np.arange(2, 8, dtype=int) - num_runs = 8 + num_runs = 5 data = [] Result = make_dataclass("Result", [ @@ -211,14 +211,17 @@ def test_num_samples(): 'experiment.run_hisel()', globals=locals(), number=num_runs) + hisel_cpu_time /= num_runs hisel_gpu_time = timeit.timeit( 'experiment.run_cudahisel()', globals=locals(), number=num_runs) + hisel_gpu_time /= num_runs pyhsiclasso_time = timeit.timeit( 'experiment.run_pyhsiclasso()', globals=locals(), number=num_runs) + pyhsiclasso_time /= num_runs del experiment result = Result( n, diff --git a/profiling/lar_comparison.py b/profiling/lar_comparison.py new file mode 100644 index 0000000..a022bb2 --- /dev/null +++ b/profiling/lar_comparison.py @@ -0,0 +1,74 @@ +import pstats +from pstats import SortKey +import cProfile +import numpy as np +import cupy as cp +from hisel import lar +from pyHSICLasso import nlars + + +class Experiment: + def __init__(self, n, d, a): + x = np.random.uniform(size=(n, d)) + beta = np.random.permutation(np.vstack( + [ + np.random.uniform(low=0., high=1., size=(a, 1)), + np.zeros((d-a, 1), dtype=np.float32) + ] + )) + y = x @ beta + np.random.uniform(low=-1e-2, high=1e-2, size=(n, 1)) + + self.a = a + self.x = x + self.x_ = cp.array(x) + self.y = y + self.y_ = cp.array(y) + self.beta = beta + + def run_hisel(self): + feats, _ = lar.solve(self.x, self.y, self.a) + + def run_hisel_from_cupy_arrays(self): + x = cp.asnumpy(self.x_) + y = cp.asnumpy(self.y_) + feats, _ = lar.solve(x, y, self.a) + + def run_pyhsiclasso(self): + _ = nlars.nlars(self.x, self.x.T @ self.y, self.a, 3) + + +def main(): + n = 100000 + d = 500 + a = 100 + experiment = Experiment(n, d, a) + cProfile.runctx( + 'experiment.run_hisel()', + locals=locals(), + globals=globals(), + filename='hisel_lar', + ) + hisel_lar = pstats.Stats('hisel_lar') + hisel_lar.sort_stats(SortKey.CUMULATIVE).print_stats(20) + + cProfile.runctx( + 'experiment.run_hisel_from_cupy_arrays()', + locals=locals(), + globals=globals(), + filename='hisel_lar_from_cupy_arrays', + ) + hisel_lar_from_cupy_arrays = pstats.Stats('hisel_lar_from_cupy_arrays') + hisel_lar_from_cupy_arrays.sort_stats(SortKey.CUMULATIVE).print_stats(20) + + cProfile.runctx( + 'experiment.run_pyhsiclasso()', + locals=locals(), + globals=globals(), + filename='pyhsiclasso_lar', + ) + pyhsiclasso_lar = pstats.Stats('pyhsiclasso_lar') + pyhsiclasso_lar.sort_stats(SortKey.CUMULATIVE).print_stats(20) + + +if __name__ == '__main__': + main() diff --git a/profiling/select_comparison.py b/profiling/select_comparison.py index 9f84953..1884ea6 100644 --- a/profiling/select_comparison.py +++ b/profiling/select_comparison.py @@ -21,7 +21,10 @@ def pyhsiclasso(x, y, xfeattype, yfeattype, else: lasso.regression(n_features, B=batch_size, discrete_x=discrete_x, M=number_of_epochs) - return lasso.A + + selection = np.array(lasso.A, copy=True) + del lasso + return selection class Experiment: @@ -34,10 +37,9 @@ def __init__( apply_transform: bool = False, batch_size: int = 500, number_of_epochs: int = 3, - device: Device = Device.CPU, ): print('\n\n\n##############################################################') - print('Test selection of features in a linear transformation setting') + print('# Test selection of features in a linear transformation setting') print('##############################################################') print(f'Feature type of x: {xfeattype}') print(f'Feature type of y: {yfeattype}') @@ -45,11 +47,11 @@ def __init__( print(f'Noisy target: {add_noise}') print(f'Number of epochs: {number_of_epochs}') print(f'Batch size: {batch_size}') - print(f'device: {device}') - d: int = np.random.randint(low=50, high=100) - n: int = np.random.randint(low=15000, high=20000) - n_features: int = d // 6 + d: int = np.random.randint(low=300, high=400) + n: int = batch_size * \ + (np.random.randint(low=10000, high=12000) // batch_size) + n_features: int = d // np.random.randint(low=15, high=20) features = list(np.random.choice(d, replace=False, size=n_features)) x: np.ndarray y: np.ndarray @@ -73,7 +75,7 @@ def __init__( scaler = .01 if yfeattype == FeatureType.DISCR else .1 u += scaler * np.std(u) * np.random.uniform(size=u.shape) if yfeattype == FeatureType.CONT: - y = u + y = np.sum(u, axis=1, keepdims=True) elif yfeattype == FeatureType.DISCR: y = np.zeros(shape=(n, 1), dtype=int) for i in range(1, n_features): @@ -81,7 +83,6 @@ def __init__( else: raise ValueError(yfeattype) - self.device = device self.number_of_epochs = number_of_epochs self.batch_size = batch_size self.d = d @@ -110,7 +111,13 @@ def run_pyhsiclasso(self): return pyhsiclasso_selection - def run_hisel(self): + def run_hisel_on_cpu(self): + self._run_hisel(Device.CPU) + + def run_hisel_on_gpu(self): + self._run_hisel(Device.GPU) + + def _run_hisel(self, device: Device = Device.CPU): selector = Selector( self.x, self.y, xfeattype=self.xfeattype, @@ -121,7 +128,7 @@ def run_hisel(self): batch_size=len(self.x), minibatch_size=self.batch_size, number_of_epochs=self.number_of_epochs, - device=self.device, + device=device, return_index=True, ) print( @@ -134,16 +141,18 @@ def run_hisel(self): print( f'WARNING: hisel did not perform an exact selection:\n{msg}') + del selector return selection def test_regression_with_noise(): xfeattype = FeatureType.CONT yfeattype = FeatureType.CONT - batch_size = 1000 + batch_size = 750 number_of_epochs = 1 return Experiment(xfeattype, yfeattype, add_noise=True, + apply_transform=False, batch_size=batch_size, number_of_epochs=number_of_epochs) @@ -151,34 +160,43 @@ def test_regression_with_noise(): def test_regression_with_noise_with_transform(): xfeattype = FeatureType.CONT yfeattype = FeatureType.CONT - batch_size = 1000 + batch_size = 750 number_of_epochs = 1 return Experiment(xfeattype, yfeattype, add_noise=True, + apply_transform=True, batch_size=batch_size, number_of_epochs=number_of_epochs) -regression_experiment = test_regression_with_noise() - - def main(): + regression_experiment = test_regression_with_noise() + pyhsiclasso_time = timeit.timeit( 'regression_experiment.run_pyhsiclasso()', - number=3, - globals=globals(), + number=1, + globals=locals(), ) print('\n#################################################################') print(f'# pyhsiclasso_time: {round(pyhsiclasso_time, 6)}') print('#################################################################\n\n\n') - hisel_time = timeit.timeit( - 'regression_experiment.run_hisel()', - number=3, - globals=globals(), + hisel_cpu_time = timeit.timeit( + 'regression_experiment.run_hisel_on_cpu()', + number=1, + globals=locals(), + ) + print('\n#################################################################') + print(f'# hisel_cpu_time: {round(hisel_cpu_time, 6)}') + print('#################################################################\n\n') + + hisel_gpu_time = timeit.timeit( + 'regression_experiment.run_hisel_on_gpu()', + number=1, + globals=locals(), ) print('\n#################################################################') - print(f'# hisel_time: {round(hisel_time, 6)}') + print(f'# hisel_gpu_time: {round(hisel_gpu_time, 6)}') print('#################################################################\n\n') diff --git a/profiling/select_profile.py b/profiling/select_profile.py index 168081c..0804d58 100644 --- a/profiling/select_profile.py +++ b/profiling/select_profile.py @@ -2,16 +2,148 @@ from pstats import SortKey import cProfile from tests.select_test import SelectorTest +import numpy as np +from scipy.stats import special_ortho_group + +from hisel.select import HSICSelector as Selector, FeatureType +from hisel.kernels import Device + + +class SelectProfiler: + def __init__( + self, + xfeattype: FeatureType, + yfeattype: FeatureType, + add_noise: bool = False, + apply_transform: bool = False, + apply_non_linear_transform: bool = False, + ): + print('\n\n\n##############################################################################') + print('Test selection of features in a (non-)linear transformation setting') + print( + '##############################################################################') + print(f'Feature type of x: {xfeattype}') + print(f'Feature type of y: {yfeattype}') + print(f'Apply linear transform: {apply_transform}') + print(f'Apply non-linear transform: {apply_non_linear_transform}') + print(f'Noisy target: {add_noise}') + d: int = 200 + minibatch_size: int = 500 + n: int = 4000 + batch_size: int = n + n_features: int = 30 + features = list(np.random.choice(d, replace=False, size=n_features)) + x: np.ndarray + y: np.ndarray + if xfeattype == FeatureType.DISCR: + ms = np.random.randint(low=2, high=2*n_features, size=(d,)) + xs = [np.random.randint(m, size=(n, 1)) for m in ms] + x = np.concatenate(xs, axis=1) + split = None + elif xfeattype == FeatureType.BOTH: + split: int = np.random.randint(low=3, high=d-1) + xcat = np.random.randint(10, size=(n, split)) + xcont = np.random.uniform(size=(n, d-split)) + x = np.concatenate((xcat, xcont), axis=1) + else: + x = np.random.uniform(size=(n, d)) + split = None + z: np.array = x[:, features] + if (apply_transform or yfeattype == FeatureType.DISCR): + tt = np.expand_dims( + special_ortho_group.rvs(n_features), + axis=0 + ) + zz = np.expand_dims(z, axis=2) + u = (tt @ zz)[:, :, 0] + else: + u = z + if add_noise: + scaler = .01 if yfeattype == FeatureType.DISCR else .1 + u += scaler * np.std(u) * np.random.uniform(size=u.shape) + if yfeattype == FeatureType.CONT: + if apply_non_linear_transform: + u = np.sum(u, axis=1, keepdims=True) + u /= np.max(np.abs(u), axis=None) + y = np.sin(4 * np.pi * u) + else: + y = u + elif yfeattype == FeatureType.DISCR: + y = np.zeros(shape=(n, 1), dtype=int) + for i in range(1, n_features): + y += np.asarray(u[:, [i-1]] > u[:, [i]], dtype=int) + else: + raise ValueError(yfeattype) + print(f'Expected features:\n{sorted(features)}\n') + + self.x = x + self.y = y + self.xfeattype = xfeattype + self.yfeattype = yfeattype + self.split = split + self.features = features + self.n_features = n_features + self.batch_size = batch_size + self.minibatch_size = minibatch_size + + def run_on_cpu(self): + self._run(Device.CPU) + + def run_on_gpu(self): + self._run(Device.GPU) + + def _run(self, device: Device = Device.CPU): + + selector = Selector( + self.x, self.y, + xfeattype=self.xfeattype, + yfeattype=self.yfeattype, + catcont_split=self.split, + ) + num_to_select = self.n_features + selection = selector.select( + num_to_select, + batch_size=self.batch_size, + minibatch_size=self.minibatch_size, + number_of_epochs=3, + device=device, + return_index=True) + print(f'Expected features:\n{sorted(self.features)}') + print( + f'hisel selected features:\n{sorted(selection)}') + del selector def main(): - cProfile.run('SelectorTest().test_regression_no_noise_with_transform()', - 'select_profile') - p = pstats.Stats('select_profile') + profiler = SelectProfiler( + xfeattype=FeatureType.CONT, + yfeattype=FeatureType.CONT, + add_noise=True, + apply_transform=False, + ) + cProfile.runctx('profiler.run_on_cpu()', globals=globals(), + locals=locals(), filename='cpu_select_profile') + p = pstats.Stats('cpu_select_profile') + p.sort_stats(SortKey.CUMULATIVE).print_stats( + 30) p.sort_stats(SortKey.CUMULATIVE).print_stats( - 'hisel/select.py:', 50) + 'hisel/select.py:', 20) p.sort_stats(SortKey.CUMULATIVE).print_stats( - 'hisel/kernels.py:', 50) + 'hisel/kernels.py:', 20) + p.sort_stats(SortKey.CUMULATIVE).print_stats( + 'lar.py:', 20) + + cProfile.runctx('profiler.run_on_gpu()', globals=globals(), + locals=locals(), filename='gpu_select_profile') + p_gpu = pstats.Stats('gpu_select_profile') + p_gpu.sort_stats(SortKey.CUMULATIVE).print_stats( + 30) + p_gpu.sort_stats(SortKey.CUMULATIVE).print_stats( + 'hisel/select.py:', 20) + p_gpu.sort_stats(SortKey.CUMULATIVE).print_stats( + 'hisel/cudakernels.py:', 20) + p_gpu.sort_stats(SortKey.CUMULATIVE).print_stats( + 'lar.py:', 20) if __name__ == '__main__': diff --git a/tests/lar_performance.py b/tests/lar_performance.py deleted file mode 100644 index 79cc379..0000000 --- a/tests/lar_performance.py +++ /dev/null @@ -1,63 +0,0 @@ -import datetime -import numpy as np -from hisel import lar -use_pyhsiclasso = True -try: - from pyHSICLasso import nlars -except (ModuleNotFoundError, ImportError): - use_pyhsiclasso = False - - -def speed_test( - n: int, - d: int, - a: int, - number_of_experiments: int = 1, -): - x = np.random.uniform(size=(n, d)) - beta = np.random.permutation(np.vstack( - [ - np.random.uniform(low=0., high=1., size=(a, 1)), - np.zeros((d-a, 1), dtype=np.float32) - ] - )) - y = x @ beta + np.random.uniform(low=-1e-2, high=1e-2, size=(n, 1)) - hisel_runtimes = [] - for _ in range(number_of_experiments): - t0 = datetime.datetime.now() - feats, _ = lar.solve(x, y, a) - t1 = datetime.datetime.now() - hisel_runtime = t1 - t0 - hisel_runtimes.append( - hisel_runtime.seconds + 1e-6 * hisel_runtime.microseconds - ) - hisel_mean = np.mean(hisel_runtimes) - hisel_std = np.std(hisel_runtimes) - print( - f'hisel run time: {hisel_mean:.4f} +/- {hisel_std:.4f} seconds\n') - - if use_pyhsiclasso: - pyhsic_runtimes = [] - for _ in range(number_of_experiments): - t0 = datetime.datetime.now() - feats = nlars.nlars(x, x.T @ y, a, 3) - t1 = datetime.datetime.now() - pyhsic_runtime = t1 - t0 - pyhsic_runtimes.append( - pyhsic_runtime.seconds + 1e-6 * pyhsic_runtime.microseconds - ) - pyhsic_mean = np.mean(pyhsic_runtimes) - pyhsic_std = np.std(pyhsic_runtimes) - print( - f'pyhsic run time: {pyhsic_mean:.4f} +/- {pyhsic_std:.4f} seconds\n') - - -if __name__ == '__main__': - n = 100000 - d = 500 - a = 100 - number_of_experiments = 5 - # With these parameters, on my laptop, I get: - # hisel run time: 14.1813 +/- 0.0975 seconds - # pyhsic run time: 15.9536 +/- 0.2564 seconds - speed_test(n, d, a, number_of_experiments) diff --git a/tests/select_test.py b/tests/select_test.py index 626ed8c..7c06c45 100644 --- a/tests/select_test.py +++ b/tests/select_test.py @@ -19,15 +19,19 @@ SKLEARN_RECON = True -def pyhsiclasso(x, y, xfeattype, yfeattype, n_features: int, batch_size=500): +def pyhsiclasso(x, y, xfeattype, yfeattype, n_features: int, minibatch_size=500): lasso = pyHSICLasso.HSICLasso() lasso.X_in = x.T lasso.Y_in = y.T discrete_x = False # xfeattype == FeatureType.DISCR if yfeattype == FeatureType.DISCR: - lasso.classification(n_features, B=batch_size, discrete_x=discrete_x) + lasso.classification(n_features, + B=minibatch_size, + discrete_x=discrete_x) else: - lasso.regression(n_features, B=batch_size, discrete_x=discrete_x) + lasso.regression(n_features, + B=minibatch_size, + discrete_x=discrete_x) return lasso.A @@ -90,13 +94,11 @@ def test_regression_with_noise_with_transform(self): self._test_selection(xfeattype, yfeattype, add_noise=True, apply_transform=True) - # @unittest.skipIf(QUICK_TEST, 'Skipping for faster test') def test_mixed_feature_regression_no_noise(self): xfeattype = FeatureType.BOTH yfeattype = FeatureType.CONT self._test_selection(xfeattype, yfeattype, add_noise=False) - # @unittest.skipIf(QUICK_TEST, 'Skipping for faster test') def test_mixed_feature_regression_with_transform(self): xfeattype = FeatureType.BOTH yfeattype = FeatureType.CONT @@ -183,7 +185,10 @@ def _test_selection( print(f'Noisy target: {add_noise}') print(f'device: {device}') d: int = np.random.randint(low=15, high=25) - n: int = np.random.randint(low=5000, high=10000) + minibatch_size: int = np.random.randint(low=500, high=1000) + n: int = minibatch_size * \ + (np.random.randint(low=5000, high=10000) // minibatch_size) + batch_size: int = n n_features: int = d // 3 features = list(np.random.choice(d, replace=False, size=n_features)) x: np.ndarray @@ -231,7 +236,7 @@ def _test_selection( if USE_PYHSICLASSO and (xfeattype == FeatureType.CONT): print('Using pyHSICLasso for reconciliation purposes') pyhsiclasso_selection = pyhsiclasso( - x, y, xfeattype, yfeattype, n_features, 500) + x, y, xfeattype, yfeattype, n_features, minibatch_size) print( f'pyHSICLasso selected features:\n{sorted(pyhsiclasso_selection)}') self.assertEqual( @@ -258,7 +263,7 @@ def _test_selection( ) num_to_select = n_features selected_features = selector.select( - num_to_select, batch_size=len(x), minibatch_size=800, number_of_epochs=3, device=device) + num_to_select, batch_size=batch_size, minibatch_size=minibatch_size, number_of_epochs=3, device=device) selection = [int(feat.split('f')[-1]) for feat in selected_features] print(f'Expected features:\n{sorted(features)}')