From 5988b6a00d3266465273a7c6e3f04e0affb69a88 Mon Sep 17 00:00:00 2001 From: Ibrahim Abou Elseoud Date: Mon, 10 Apr 2017 19:25:57 +0200 Subject: [PATCH 1/7] Implemented incremental SVD ;however, can read only 1 tuple from file and update each time --- recsys/__init__.pyc | Bin 0 -> 556 bytes recsys/algorithm/__init__.pyc | Bin 0 -> 251 bytes recsys/algorithm/baseclass.py | 6 ++ recsys/algorithm/baseclass.pyc | Bin 0 -> 11549 bytes recsys/algorithm/factorize.py | 126 ++++++++++++++++++++++++++++++- recsys/algorithm/factorize.pyc | Bin 0 -> 25820 bytes recsys/algorithm/matrix.py | 28 +++++-- recsys/algorithm/matrix.pyc | Bin 0 -> 7182 bytes recsys/datamodel/__init__.pyc | Bin 0 -> 226 bytes recsys/datamodel/data.pyc | Bin 0 -> 8343 bytes recsys/evaluation/__init__.pyc | Bin 0 -> 269 bytes recsys/evaluation/baseclass.pyc | Bin 0 -> 4781 bytes recsys/evaluation/prediction.pyc | Bin 0 -> 3802 bytes 13 files changed, 154 insertions(+), 6 deletions(-) create mode 100644 recsys/__init__.pyc create mode 100644 recsys/algorithm/__init__.pyc create mode 100644 recsys/algorithm/baseclass.pyc create mode 100644 recsys/algorithm/factorize.pyc create mode 100644 recsys/algorithm/matrix.pyc create mode 100644 recsys/datamodel/__init__.pyc create mode 100644 recsys/datamodel/data.pyc create mode 100644 recsys/evaluation/__init__.pyc create mode 100644 recsys/evaluation/baseclass.pyc create mode 100644 recsys/evaluation/prediction.pyc diff --git a/recsys/__init__.pyc b/recsys/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6bc57bd3ac78b000f3d61b363cfbd46fbc14d5c GIT binary patch literal 556 zcmY*VO;6k~5S?W|SeCZxPsnjEY@o-gswkD{0R$^?LAhAQ9&KFNF|sE}`*ZmX`~jX5 z1R{xN-kb58%*^!V>ne25_lDk{8T>g;zJ#$v4eW%P3E2Z`4#-ZanUbARGaDjvvU6${ zWEa#NlEt?qdqj3cJKO<2rn~hQl#&HZVw3kdp8-DZrH$Rm82MTKsj>@?o~tolAs$2E z&T^_C5f1qns@rUIs{XPMI#0n*H$SBKf7|Q)9X^_pI^PBD{00@IjB~-FN-_&12__t5uVPczplZOkOKXJ&^za literal 0 HcmV?d00001 diff --git a/recsys/algorithm/baseclass.py b/recsys/algorithm/baseclass.py index 89656a5..681b805 100644 --- a/recsys/algorithm/baseclass.py +++ b/recsys/algorithm/baseclass.py @@ -36,6 +36,10 @@ def __init__(self): self._matrix_similarity = None #self-similarity matrix (only for the input Matrix rows) self._matrix_and_data_aligned = False #both Matrix and Data contain the same info? + #new for update + self._updateData=Data() + self._singleUpdateMatrix=SparseMatrix() + def __len__(self): return len(self.get_data()) @@ -97,6 +101,8 @@ def load_data(self, filename, force=True, sep='\t', format={'value':0, 'row':1, self._matrix_similarity = None self._data.load(filename, force, sep, format, pickle) + + def save_data(self, filename, pickle=False): """ diff --git a/recsys/algorithm/baseclass.pyc b/recsys/algorithm/baseclass.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16b11748948af3db710aff504e77c4d4593df659 GIT binary patch literal 11549 zcmcgy+ix6MTK}rsZoAvAvR!s} zRl80(v1e^*wHYz9PrEO`3#&!&2O#l)goJp5ClD72Bwi6uJn+IJ67c(dr@Fe=$qY!4 z&P*SlTb;}I{l05e`QN8&Q?qj?3D(ZI4T*BHamDo3QPQbS9-+m9?sEPe`XG-6>g{66fG#fBCws)#Y(~ zUK~ePos`bBbhX!s9eKi?s?s?j-5FV%u`AW2b5gpeWbKq~Pf6#rbZ2F47VWx(XJi9U zP0ME@FGRkxc9s`eZJgl!Iw)$*rrS+|y-v8aB#pAIZq0SwC36rbJrkKF_oi<` zx83oLac>k;22c8X*;bO``3I)$r|zw=)Ailuq|LwA*Smh!Y$x4owOaIF__xwg#c*-40tY<6P5jIRP8pVQ7+3C?Th8J&sA7v!^&5CQmqPUoDH z;G)hsuZMX<;Ef9qXfySnyHVWR%XBNwUU2;&09?~hA9fqr zUau3HD+_LzwVO4acOyP<6KtCfpy~!;8a)e*yOAbcEFBr1DMz$E)FvkRE8%wwKXVsF zrZRIV(sCp#QGQqlyKH4#vWoUh49%Pznh90|c8ax?Mhy#klU$X!YVkTAfvb|xvRBW> z(6N%lVRo8xY-h_v-KgV3E)Vq4yM6~$yD&5X1k?3eeka{#p_fVJ`aY1lRgW-Y$$|4{QduglHAGbMSTP!F6Q}*3g5n%HfPUP*1qRxR+T~TnwD}pvi_5KsnKeW-DAc=9pKe1f zi@TAeL+H3-O^AXA-h$uN>F|qOs!={>ouEc1GMSrfxYqu-%g_AIUbsNaAvSOltBIDzAOGbn~6l5)+%j!r0XKh>Q#v9sa!Qmy`PU&Pekl}P3|c9Cp(+Zngo<{o z+NQHWB~9>}-$PN{=ms1a-{J>kQhz6h=1Ukm zMp@XshSdOSsE{a$o9+X2tG~1v#$oDrmR`ncaikTsCDuaFBZjZl(k;DL;gcp!M@Irs z&&75R@w33J%sPe^qN?Ui-69pih@Gm?A&L>Ht__G}o0_!$yM?;BhT^a!>Dxoz8!WC> zCS|p+%FyjGRW&ffWjREBTwd5HFK6rbuueh4Z^N}UU{%gbcO&YA1BR}Kp{shuz$M+F z69gua{Z~03VOZE{QcjxmkQ-M9)@r3ea;y93sm7q!Q$}O%>>WLxo&Y z%?jeDH~IlinFxk(*U`lpWd(w08+3zS20MRfbAAC8qU3oClyqd4f}L&K$f$NO}twUrAIk z!5Pn*ZjKt{BEoJjJGf+;n4!`60Bg_>yL*w@qPp=j7orch6Yx{LWoiw&jGlsqlBMh5 zxFX1e<(`*X3&jXsRtqT3ctnR)!KAoRMk-W29>ZH?@d1k5Xjn^O4>RPWa;0t*x9oXF zqhuKMAf1S`Q20Ys%^4KpeCW(L@8NgRnFjJI&iMiM3Z=7)iI#0(Ro%=F8aA&0ZtYA; z`l0rh$xl^4=ZEI0D*)!MK8`Oz^^B|P=}`5cPS?Q#gfL~kmWqT<$cmvV0OR5$gO1;e zgG&vHxkpqeQ#>{3%V?@_Ga`aP(v-%x!UJX05Zx)|(P+r6siXWjL($ofae_Q`GY@eu zgW%InL_8EmoEN{vX}^ZzE9vgMitc=(Pi{yTEq~!j|2bAHY~Yf$s1ZG68gaT)LxH}v z#~7uL>>}Kwm+VHt@#o_uH{R>6bIyjFN6K#43A@NFnZaZ=$e4>X^`NszG|(I9CJ@r& z3vkh^(Xd1JW&HQ9<8taKWl68a!&{qbnnPif2V^1H0^)G|qy7kn&N zJoVA51mF7w47`ff+oNGMg?DhPKV)l&+dMUYg;eKR*}^UlDZ9&<*Y;y7J8Ors2uU^f z#yWgqzn$5)2DkV+Nuzm9Pp>4|y)KgZ6t^(23QsAejx1sC-A18;asS>*>(PyGe{#pR zZalHA(U6G}(M!W1YG;3hs&#P(((oGS(ms zCYw|NTtx-Gwsq-?Xbmoeb&6O~Eq>;#s&c9^n{!ZScmsi9=5aA~I)Wtg#?TmC94Qre zo4;iLd^noRliRI!0)Hy}6I6$<&LJL9um1a}FwA&&D9k`kzi+_$F~+9IQCap^NRiu5 z@5&J-AO-PI?W1)e@k$APup>v~5}zIFNBGn4Mx#^c_bEWUxFa8(d7 zYiQ&5dccVC(7xNE0>NM_>3L;7MQ5!+!r4Rx`>?0FIer?_<*_kRFM)kE%IPRjbZC!S zzWbx${3OoSC(fUuo;#Wqgf|tKzlw9Abk;cqms1TPl_v#-H!?SX>@N`4>Z_hPFdPjW zo&F)>+>%^?g`3+1tHuO?z~JqtZtsjfE`WJKS!RE;B)d)dk;wi(Q!oG* zU4ak%EtwsR$=K0^98JppwoXtK+e7PqTUY1wos7p#NJX!O1K5 zix8^w{nxGNvDtLR%)XWfqeISe$M3EO{e^BuyHTg*8rKF79gss z_*TQ?x=O6qNGH8uYlqq3zr=QiNgpXqmo7T*mCidgXSP&z=A8@98EBikPDBIQrrn_f zR+wHw>t*``WdR^_$<-4fWdFNTUpjyT3ig{=JRML>cNxsn@PI4$kt;1Kx^OaZd^QC$3Km?kZHbrF;s6-38h4?9+O(*IK3 z>A|v#ho60Kz-UGlB2M1g2@e`nF6yIsk5~&ik>{{P@*cC3=hO(y-m?`2sw%x62cK|| zUeSGoXCFVfxAJh++vq>g_p7Y4r{}XyV4tH_fm^(d#%S|$_It#CLd{%9A=B2`v#uVq zinDO{==!}=a*<_x7q6@;rPEN&Z8cdI?(P_2#F0R#5inLnx zgG)5&476H7(r&dpX4<@SEaq4!b2UHs4x49LxGcWLLOt~NSbLuZHyWf6z3Xh=VsVGX zJr=*t;sFZ->;TNisSPC>ED3`mPas`htruUr#zGOZefxvi{E4R_VV1k90yH literal 0 HcmV?d00001 diff --git a/recsys/algorithm/factorize.py b/recsys/algorithm/factorize.py index 00a9e61..6d41edb 100644 --- a/recsys/algorithm/factorize.py +++ b/recsys/algorithm/factorize.py @@ -9,6 +9,7 @@ import os import sys import zipfile + try: import divisi2 except: @@ -30,6 +31,10 @@ from recsys.algorithm.matrix import SimilarityMatrix from recsys.algorithm import VERBOSE +from numpy.linalg import inv #for update +import numpy as np +from recsys.datamodel.data import Data + TMPDIR = '/tmp' class SVD(Algorithm): @@ -66,6 +71,9 @@ def __init__(self, filename=None): self._file_row_ids = None self._file_col_ids = None + #Update feature + + def __repr__(self): try: s = '\n'.join(('M\':' + str(self._reconstruct_matrix()), \ @@ -241,7 +249,7 @@ def compute(self, k=100, min_values=None, pre_normalize=None, mean_center=False, :param savefile: path to save the SVD factorization (U, Sigma and V matrices) :type savefile: string """ - super(SVD, self).compute(min_values) + super(SVD, self).compute(min_values) #creates matrix and does squish to not have empty values if VERBOSE: sys.stdout.write('Computing svd k=%s, min_values=%s, pre_normalize=%s, mean_center=%s, post_normalize=%s\n' @@ -352,6 +360,122 @@ def recommend(self, i, n=10, only_unknowns=False, is_row=True): item = self._get_col_reconstructed(i, zeros) return item.top_items(n) + def load_updateDataTuple(self, filename, force=True, sep='\t', format={'value':0, 'row':1, 'col':2}, pickle=False,is_row=True): + """ + Loads a dataset file that contains a tuple + + See params definition in *datamodel.Data.load()* + """ + # nDimension + if force: + self._updateData = Data() + + self._updateData.load(filename, force, sep, format, pickle) + print "reading the new tuple" + if(is_row): + nDimensionLabels=self._V.all_labels() + # print nDimensionLabels + self._singleUpdateMatrix.create(self._updateData.get(),col_labels=nDimensionLabels[0]) + + else: + nDimensionLabels = self._U.all_labels() + # print nDimensionLabels + self._singleUpdateMatrix.create(self._updateData.get(), row_labels=nDimensionLabels[0]) + + # #update the data matrix + print "updating the sparse matrix" + # print "matrix before update:",self._matrix.get().shape + self._matrix.update(self._singleUpdateMatrix) # updating the data matrix for the zeroes , also for saving the data matrix if needed + # print "matrix after update:",self._matrix.get().shape + + def update_sparse_matrix_data(self,squishFactor=10): + #update the data matrix + # print "matrix before update:",self._matrix.get().shape + print "commiting the sparse data matrix by removing empty rows and columns divisi created" + self._matrix.squish(squishFactor) # updating the data matrix for the zeroes ,#NOTE: Intensive so do at end + # print "matrix after update:",self._matrix.get().shape + + def update(self,is_row=True): #update(tuple:denseVector tuple,isRow=True,, + print "type of S",type(self._S) + print "type of U",type(self._U) + print "type of V",type(self._V) + print "type of data",type(self._data) + print "type of matrix",type(self._matrix) + print "type of matrix reconstructed",type(self._matrix_reconstructed) + print "type of matrix similarity",type(self._matrix_similarity) + + print "dimensions of S",self._S.shape + print "dimensions of U",self._U.shape + print "dimensions of V",self._V.shape + + + invS=np.zeros((self._S.shape[0], self._S.shape[0])) + for i in range(self._S.shape[0]): + invS[i, i] = self._S[i]**-1 # creating diagonal matrix and inverting using special property of diagonal matrix + + #if new is row -> V*S^-1 + if is_row: + prodM=self._V.dot(invS) + print "dimension of VxS^-1=", prodM.shape + else: #if new is col -> U*S^-1 + prodM = self._U.dot(invS) + print "dimension of UxS^-1=", prodM.shape + + updateTupleMatrix=self._singleUpdateMatrix.get() + if not is_row: + updateTupleMatrix=updateTupleMatrix.transpose() #transpose + print "dimensions of user",updateTupleMatrix.shape + res=updateTupleMatrix.dot(prodM) + print "type of res=", type(res) + print "dimension of resultant is", res.shape + + if is_row: + #use new value can now be concatinated with U + print "U before adding", self._U.shape + self._U=self._U.concatenate(res) + print "U after adding", self._U.shape + + else: + print "V before adding", self._V.shape + self._V = self._V.concatenate(res) + print "V after adding", self._V.shape + + print "before updating, M=",self._matrix_reconstructed.shape + # Sim. matrix = U \Sigma^2 U^T + self._reconstruct_similarity(post_normalize=False, force=True) + # M' = U S V^t + self._reconstruct_matrix(shifts=self._shifts, force=True) + + print "done updating, M=",self._matrix_reconstructed.shape + + + + + # myFile=open("prodMVSq.dat",'w') + # myFile.truncate() + # + # for i in range(20): + # myFile.write(str(res[0, i])+" ") + # + # myFile.write("\n") + + # # invS = inv(diag_S) + # # print "dimensions of S^-1", invS.shape + # + # + # print "writing s to file" + # myFile=open("invS.dat",'w') + # myFile.truncate() + # # for item in self.invS.tolist(): + # # myFile.write(str(item)) + # # myFile.write("\n") + # myFile.write("dimensions= "+str(invS.shape)) + # myFile.write("\n") + # for i in range(invS.shape[0]): + # myFile.write(str(invS[i,i])) + # myFile.write("\n") + + def centroid(self, ids, is_row=True): points = [] for id in ids: diff --git a/recsys/algorithm/factorize.pyc b/recsys/algorithm/factorize.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ddc6846c4d74c0e30b323e23e17fcfaff1d0441 GIT binary patch literal 25820 zcmeHwU2t62ec!pe_*^VLh%XW%C0<&Rv7pGMNXN1bk&;M)q8JMhb3uYOL~6C%y#Ow- zU%2Nk35bO2hH^4_NivhP(@Zi=W|B!-x07VX51l?WPMjvwx|6h%wwX*EXPUgUe&}@4 zw?4SP-~XJudv^&+wiCk5Od;O0_k5r8Kj-|v&sF+whBNEUzy3l=*}pV?-@v1P(^0Cd z)G9(pl~bzXs8zg}_mo;q#qVjgnpV4GakNLR_NZI6!^&|SmQJfmuc}I_Q*+c8nAW2z zeX828R{K?TK&=j_>Y!R3RMm`H%_ynWt13gPnpLY=6Yo=%VO1Sbt0N}fuPUReI;K{~ zOng994yo$6S{*m>K~Z^r-5zTAeoWti%tiRg9Y>99ESh zsyd@qXH0xVRi07RqiXf2iI1wvF;#t5tv+kwW2*9;svcLX$4&f@s<^6pLam-Kalo%n zt$sp)G_ESotLh7C^#$ZjsMQx$c~bdD)f)PpQXeYyNU1Nbeo~dEmH$aeJ={v2Rplef zpOw^^R%(t5w48f}>vkVOCZBh!^>VY~&(FKX%6dHvqK#_Cb=`TrTdOy8py%B;i=_w| z9~7gYUUNggq<3|@u+Fp_DK?{xdWfdp(WPSOUiK^1qIuF>hnE7nZK3=)ILude%1S5Ky*jF(Oi@PQ|faqFSudW3Z-=qv7Wx=m$f5j&dx)K+Q zQ5fu)L=NYjufK}m1jNb}@6tQVS4>gPI;h<< z5srJI7!@^N&P3IQ0AqP&p@g{nvHm4IdKhr^aUiiv5aRe9<8Jy8$FhgZ{~;24dEEHY zrviY#M{@g>Kfr(GUiBf)?I0^M+9%PBD)&oMNF0)MG&`U^#BR+>`k*v2Ebke4AF<`j zL(;;iDre)m!|FqzgE47mLXoO_mFSpitMv)ya84IjYQE|=Q9ThzW=suZ<$TT7L9 zO~J^;er+@(U%P;`n_CWQ>rJejyHcz)eRsiUc3lTN7;`QTN72P5$ChwUE}qQ9Z4@iI z?l!{uy`YSSaDM95s$VP1al%2Wm67R}gA!ZOZd0QZH`?%}WKBn5vlRK|cEhu_<@rW2 zELL6a)mpLY&%4))(S{q~KK~`X5Mm=;?Sm68S5Z@m;%Bcy-VK%meDTj@Rla=EzmU3x^ zOG~*Cj)R9>HyeKFO)$zsUMK)2QK29xW~pBDBi1Y2G(XGcXGL&RL2m?Wk&dQVz^rb8 zYuH*DZFw>?1*luoRZa&A|Akf~d zgGR6-Hg?2~l3ZoG@UoS{V0|N!Diwb%!bVl<29Cy+Fsz4Cpn#)YwA(E78)2a!YwEcu z2|%ESgIUNqbhXItcnXx0c4nMsoLOhsnQ-L9W|2y$$s+y^roKDFL%WGX3)0~zoPtM= zvzL-J0OZ?wDJ*ItpKAcnJ;H*d)y`|`Ar3s2QkHPx&S|xW)83;tdszZ&hc)Kf;_!nE z^{SbNKnZy3Q#0qcZ>n3nlVl2z2ztX^ve$Qc!(tz?mVaVFsdP7Glw%A zvLHd;dnvVDaFBbe_8MpU5+24L6bi+Y}*8_)U|A;ldy0Hs9(~;m@Vpdvw%RX{U@mmU76Ztef?kR-kgkc1Oq( z@daH>$f;B*L4#m*Y#b8QByb>6xBeXjLY7Ij-zFn1WIh5LtU=j9!pO$wVybV5zAKci zkHRBsRpjl zZj8h`8 zxPC3_`}^3JAg|%GWDvk6kw+3TJPkZZZWkaVQ6}L9SxpI{eM8WVP}PMy=hSVa_6S)` zOMOsW5U?I0rYPj7HKBndWNdW%g1QAFyE#B~>P z<(l9;6a<0gxNNemFPPWX|DRHS*z)cm#>x6fHuY^XJ0FRQW=v70jjC`wJbo4~?I z!qSW@aEOmm4mb(&0!W!?Ee}M{DS$NPM3ZWBDo!TEcPt}-IPRx*u82Gg8GG~xZ0+^bElXjhRrpJ(_CO4&!9=EI9JbJ5 z6@I<5&|%WPGU;7PB(fv1YU$U2uUe>nxp>dY&)tOKi6qDfvEMrZ(&$1&=8vF4PwK2S z&W|BUR@}C@4cao7=sP!;6tPJNgW4KI@VoV9)EZIeyz}mr{CeJfFuOH7@1A}6DwIGg{1AzxBP?6jQAc5>?{!V9HEa&L zm8Xf|u|8A}=zGvHP)MZ;;YFR%#N^P|)z*S_!OHgy#WjR9AR!He->}5iQdap{7K+?K zK|xWC{FNlL`;yfbu{ST^L%f&JnISVAR52`(OnIm9-bv$y;9Ii3gfX}(C*+MvH=!xe zTpFq!m9aoVXzxQ6 zL2>nc=u^FyS@SFd%GIJ=z?!jg=vV9aeD68t9Y>HORS-c@rt7_ds0?eZfEAP~W+f$2 zwhROvhOd7V7aB19rqKz_(`Yxj08K&WR`l_7Q(8;&($SF~f z`?Jm`+A!n>6c~ceTNq|F11xKWf5^)llYW@m5 z{|e*}+#L2Gz+<$Z04gaskWlz85eRl5#arR=kVh2=c4n_y>s7+?Ax$R>n(Eu{#$fEX zj2oB}sLXGJ?}Kh+kje=NvuG;(Ln6HJ7ZZJnI!1OoDucySQmsEx4a^9X8$$}pH?AwB zVyqcjS#+q4=}}wQ8ULL;5V$r}N8j5f3+g#!^*GD|^Owv57m2Lnk_EXrnbj{@^O6Pm zIL?Z|lYy^F3m|?R8a@mH?Lw^~x{BLoa3T>GaNIFa!;C@_#{_V)Gb%ES3-Tb1gJa~& zP%;D3`L~?_8Yu9tS@jB_4gT>J&T<4l&|Qt<2TcvCSN6c6qWTe02VveR;m5%kF6XEq z)vu)!yw2sMxd2kOm55;tIOWbGvN8YuqIE0HNySwtPqb8n0{8_yV z0d_@IicFADx7yT^y8%Sr?n3%a_OQgv1QXxEzQTST04a$-hw>%xlwKzpl6v}5H>fi^#E zN_4QoNXh_-D)sKIi{8?;rK@)mExLhrwGSS>ZkcN?ve_fW$kni{xNFVY78b6{URS01 z-=#vO7@o+86+O{@dqC`foOqs`b>6OHE}zfOVH9k_teXpa_N>mk*VfSKDT)j=w_KMx zG%mvS$U1;UyX6aS!lLNUWrPQmLn9n1&yBqB=m@uq)1!YJ^S>VYr`re7FpzHUMd*zln%gj0VhS z5mA28h}VKq!9aDSBTeAjnIw?rxZ5=`TVa94LTqe8>=ApQi<$AroQXgnx;78~Gb+cn z45xUSZzTrQN_jF+0#USd@lonL z&m&NWrorH6ox|XsrqiS8aZ#mBfbpM7_w{C-(G(u!%ackwW2qCV>C_DB%(aII~QWok&|2CyFIwhaHi!Un`jeTo)MkML1s*(bFNt(Y^6%f5HJ5rWf8@P>u0 z{kg+nUKoR*<}S8*N_6GkE|u%iUN^-@@uns#8M;($xIwu!ddqHZfvOo_yV7D4eA?wYeA^tD>t zF~8u0xi6&!Q_)4#yTaf;0>L(|37J^jF)XG+_qdV#vQYyb4B8>p1i@v;uke(Sa;o7$ zC}5_5ZHS%6oq$BT#L2OA>(KoXBKsdYm?&_{)*;Nn_<*x^Rm#E6fHS3_X1~f41-Ta| z%F1>)ct0gYqmH69UPv+P>H)*Q=@YUc&U0UUL_a5Wf4j3T1i^jliZfBGE)F(L10q{5 z3k}de0s7L~@LtUNY1#5P=5(%-T~85;FD9)f`Ey7i2l=->=%OAD*4&f9Nit|fIq!56 zJ6W$)b_?K2YxVmzEty6K4ACK$0;q!C0Dp-iZv?v}MzUCjh{j|1+DGKP)S5?XVwcg-oT)ZL~IamE&nHz697)i^;hVYl9N=~9LfLv9s|hp6qDUV<& zM8|vaZQ{gZ7_1s4PNdm)8oVigPdHQHN>2bk8k-1A4VWEm$YSFVqJW(sMiPl&;YCe2 zfFk5xF5}Vv1_3$PDJlye334kKb(e6iEye|C6dVb;kX~`2y!sX}Sh(GR@t}(%J{GMa zUtro44>_P?W3z-O6;$2lq!dh+f{mY83B8G-Q!2q=j2_YEAk~JZ7wR(Mu(d49s^7;% zTD#hKoWMFpXJrW3)FZSJ$g=O7v!UHGR18ph&>4m2lI6wzlwZJGo;y4@ce2&2rmczI z4@O4!-I{;jjhYSkR0xRz-&iT$^($J|Lej`hn>0-kuh~EsaczwQic@0rb4xi&NFLRB zLgx!o)aN*47ji73fR<*7CQY-(O^`Uh;@@QJzB#Yn9X2P&MT`JEJ+iY+yW(?b&cX>6 zq&+}(iS~_2$$05VjYd$~f_3RDEJFHRTZr|6Z_8{ChEt%VO&}xybAzDY;t`?rqWb$F zz8!OVaX3a(pG=RYo=^9tN)*=ON47lWb`XEFpJbaFI_*HnwRQnbO906MwuW_twu=P- zxBRPttHpN4B1Vmg)i%!5IBU%+^kL>wfom{b*@%9vmC)PGKyM`cGKtMhqPgyHFn}dI z%%&)qNyf${&h;k=-5!)s>0aZpNhC^MOki0DWC=?k4|C#iD6hVS10SadUxSo%D@9BI zNa<;%h$|*iaK>#rBJ3cguayE;OIq%4rHH#E$_%Kh7TMtKEojRn3R{~*GbSoNCRg7A zJ+SBlxCw3$@Z)IJWyE-=z}s}^b3BC|dFX%P&Ivlbm}FqY8R+dNfIW#E({yb)zx^FJ zw7_ch8SxZBuTb0z2;db1?fzBZP#IV4IcWu>Ocoev7hoUoOfl99h<}dKyAF9pW2ERM zBMJk-RVHzw{6$j&N^rd}4G1h_upL54I4ny^4DUM*+((jdVs{t}3~y2cQP?cuP?9WD z`xIR*B*lW0weXs(1?$CBM;Z8UNR_yERKX>R!*mYCV zt|>mXjU;ABkEh+-r`(E=3b~7lk?kJ!@~Ymc!kF_GRVHq7OTT03WYKR*)U}O0r}nkEQavY#NBV zb&jVgV2`H8oh(v8H%0Y{Fx7|nlrx@YIGP$2C8&5l5dRaZg8!*lz>3i2{w*S@g*$JE zr#(2zykIz-X9&s$Edgm6A7WsdL?TcpE&GD=o$XJNmiYUm0HGJ!yDun!NZeOqO>S6LFHn9P>}GJ znq}w5%<^BOMIKFrKPT%39SC*+Tr@#U&HaY7a8&42+pj}sBcGU~il85tk| z%2~Nlf^F>X!CAjmYi17v2e9ByfESQ?j2iw1Ha`OoQc)?HIpf#z_k*pV0li%@Uk}&MFm}ea@#48l_%}yIRcWIRBr6&X5wnUF zF@$p`3$>!6$o7aUys@$GvK_+*aM>)n^aLl#y0!qP0Uxexph)1esZ7Q})^qK8v0|u< z`@(T8mZaVTMWJ9ouN;+Wzt!Jlds;V zn8-nF6m`{N+=TOp6@`e4pN!`+`Wp;r+O$G@yr5#$jKM2;RPSGZ=i1VZ zWdVTq&spFI|OPhmrz;$xKA(P(FB2yJMlo^2o0IS zJ8sP5HauiVyr<=sxfn2fNP2j|Kirr;1lDs2gzosl1U`Wf+=Fsj@Gz+i#&1S$@i;YtG&uhd zZ>-yqEO)rt*(tex#fQ%wthzawF{z!1c0yL+tc6HfRF>9wW~MFxHFpTuY;3161~j7Y z__@qI@;n0Ygga(;^gKs>A^<@4_LPBzJ}5gis_6h#5OSLB18I z>-fT*59A^f+)}|viUjirAz&gJ@EO#aG3+Le9X(4QF{drVK$JlMvn0CTn1MdP;B%5Edt}oV?GsN$x{<o1&Z$s~ zv88^KYfK?rIrQLxBfs-W(4&kz8HlX|j#xLy+O}4H8e12z)JE(SIn{FG*z+RrAQ#@@ zkXZm&tTwg=M2MX_rIm#{>#_}uJF{#6ZUYEHVEB!+0gW4&Yb@Y^3L9#BUd+Q_(}(0P zHWo7cdAVDKod5wSi)}~IXFg67MxF6n5LdGLYfR6E|B4`AX5~L7qk%NkLV+m=B{c{r z*prfkD0W)-zp2uo8~_0t+ZkmrXV_-44t507DFfMb>NMqIoV7H6HJuOv{tj_EndTn^ zueW)5nx11)adN)Js$7(EF03J;pW9$Z##ufcKl!EJn@r|@9Yy$x&5@_P@q z_Hx0i&AB~N3SdZg6P~K5u(`MQ@gj_egXwwhiI!{(`TtdKnob(E z)uv96rV_-+uFUd#TL8P;#8kOn{Ck`xNve^F;&rAEka(LKRv}KN@Yl7Oy6x9-(RsI9 zD6&Mh<@fUU*&V2b`>lOkQ1pKQzVrT&!3qN^P>m!-BxrP8B*)$d3?485MN^^ycp^cF z!RhU>{FfN~I|L%A;3O3+t0tF}c;GCy#fpw@~qUsPSthl#@ShgG+cc$U>)N0S#GF zvVd3ti32>!prTj-fdUc-EbyX8`w-u_0(HbYKE#E0yqIGB@pri-g$3|AE__vMh?0nG zfnyl9;DN^kicXP4qzioXO3HR-iAg?oVf;v`MJiu{&A-eqVa*ydYW&r>Mc=n>lD2t~ z!FY>y5yp@JHwt_RCep;^dIdr}+>s)P5$>Bo#puM`MS8i+y16g<8_jaj6;K-;kJXGd zns+NfDG&P!T=PpC`C=sBn>+KRd-V!*BU`&?@F{||O0aWgt6YBhvs+xNe51Vf|Amt- z;p95D>*KpB3aL02OqJD7BDA9Yrp?RYdecCWaDSB9buQZT#^xQ3ny%FA;UnDyUL3xaZ3M zvc{T^4DpV*j9)#BfNU{l0jn9^jhU^#fiv2gMk5eNkM?X>p)oXuMrd^ObRMOlF|}K%7;J^fE>e%0f;d* z(GVq{QKOpyZ#{D|nkv1=H(I=(j0?AA26+Y_+ggCeyy%iTtK=X|n2#60N7b^J+C6Ct z<$}4B2{pYK`K6)Gy(eHcANBM80^>9q%m0kG_Ez-%oKZYaUF{)Wi>);vF#a_cfiTB@ zeEQ%g&imnl|5q?m@4q1U;e(&R_}v8fx$CJk;hvV=Pj!pg{rn0Y^btoT3SC2wNb5KQ zj_42qNrycgHdx?}UaOtbH#*b%m`>aQ?Eq8@JWKosza=CB(NKfrsPklK%{Iiu}B0I z$Bcue=G#36JbB%hFNhS{NV@FtV+aR>B#4t^B=PULLFpEth6aJ8v-ppGJq(NsTnykanu2Iv5?+;kQxrR%O}+p<(uu_>sn5Tcgxkb?>fW_WyQC@m-D*VT4IcKe8;6R8O@|TQD9cdikUC=YaSiC-hM<4@Kg`dD-=*xjOjVir2GtNWZ%`nj z-Z&b8d459eP4bMv%MT9d59OUIHhiKFV1r8T9fxiSNH;1uNv8t?K7&X)PyC?2lujJU z{Z(%#7p+#mb--Ye$A-T9x0i3oWnKP$WE}f7V%EE|-87f3Kjr2$ZtT)|`dt3p+Yok(s3OpJPxm*cR6-#>orxDB_M+w5)u`7{Q7#C^J_tFZff ztxankrj|NQ92%B#^lvm~4^i_7%S{Hu6TDAO?_>0AT`D$W)csU!-C8x$jJr)Sn%xbA ztH0X}ygy>?KW6X;2qakto8EtnBVlvXRN1suHo*-*#%)$NVUd%x5X~H{+U+k%WM08A z)Qcc7sj1#cTo}&u7;}wLzES;#G-pQCZ(~%(K?!Fr`dPi^*HM38_1m8@L)34=fq$FP zzhv;=7cS^zbv55T^0bfAM4y9|Bzp8oPPxpOnA1?U8zW zzsZ0bK(0iYGEg|-^V%Mpk)9;g6H}n5K;*&+wE}X7#Qy&e;Laug{{TvO9!0%~MmIP^ zlYISR82#W0mEH1MoG@Nqp&QEH#FA{MY%5 SSQcr0sp;>k@2cVHcmFTQxdQh9 literal 0 HcmV?d00001 diff --git a/recsys/algorithm/matrix.py b/recsys/algorithm/matrix.py index 9a8fa53..85da209 100644 --- a/recsys/algorithm/matrix.py +++ b/recsys/algorithm/matrix.py @@ -73,12 +73,30 @@ def get_col_len(self): class SparseMatrix(Matrix): def __init__(self): super(SparseMatrix, self).__init__() + self._values=None + self._rows=None + self._cols=None + +#`nrows` and `ncols` specify the shape the resulting +#matrix should have, in case it is larger than the largest index. + def create(self, data,row_labels=None, col_labels=None): + self._values = map(itemgetter(0), data) + self._rows = map(itemgetter(1), data) + self._cols = map(itemgetter(2), data) + self._matrix = divisiSparseMatrix.from_named_lists(self._values, self._rows, self._cols,row_labels, col_labels) + + def update(self, matrix): + + self._values.extend(matrix._values) + self._rows.extend(matrix._rows) + self._cols.extend(matrix._cols) + + self._matrix = divisiSparseMatrix.from_named_lists(self._values, self._rows, self._cols) + + def squish(self,squishFactor): + self._matrix=self._matrix.squish(squishFactor) + - def create(self, data): - values = map(itemgetter(0), data) - rows = map(itemgetter(1), data) - cols = map(itemgetter(2), data) - self._matrix = divisiSparseMatrix.from_named_lists(values, rows, cols) def empty(self): return not self._matrix or not self._matrix.values() diff --git a/recsys/algorithm/matrix.pyc b/recsys/algorithm/matrix.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c5cba451931863960257260cb5c4e1e2c7a9d74 GIT binary patch literal 7182 zcmc&(Taz2b6+RmxrrRd2~Pl7*{FGvt58Pj&oOxtazm4V#lt6j;@ zNzs?1K9=f<8OZ%W(j9pyaw_t{(VnEcV)m4JZb`+IeoGxJsplo#lj-<87|s5Lzj1(( z8={-eLq<*;NUAbY>b_DSlFD~9 zq->nCFX?$5JSOQ-2NxvW*TF>zr{l-KO+vxh$m1O}yN@mvc_0!gn^To?GUf!=LpP~P zBK*i8cBCF|MMWonmw_s|s_F-K*nSRvf%{lFZ_|c(1OGP2%jqVVulNUezQV z%un2`DqjWE_QW1EmpK2V0@SnOz<=x}X7#ad6lG=RqKF!_WKDuci{ez2<3LUWD4dcu zqR?8Q(QpS>rH9nq*4VGjypAHOpk<#@v`sZ}5d^Rg3jyzcX_Ubk6Z<^+BreO!jWXC} zT)HfFX7syzkSI>v=rFcsbSICk^)`BmZ&mJ{VxF7Al+L7Y)pb>?QdMr%Y#O_G3jh%3 zq&8sqag5f*r%u>Z{C6>c6&{NW?bT*PJ>tIp2~BkBpB{Hf`?4bs0;%7W2kKL=LVx2?Jd(Kf~0om|79HI6n)@v6o_{Jr{H zr{<&WJ2OoRf&W=tT3%Zp=2dxRM1^gF-AF_0*E!}?Z&&n_~Mx8dvpb z7TZx-jgl%~6s5iXqW4F}1n-nIJv@x^_@2qFE7O$} zf?wcf9Rg~tZ3@9JLHJe(mPYXa)j?PH9#ahx6Z@TQ;{rJWTkH0rHbhrn0Te27AZ<8W z6D6{vD80@bA9B=C81>j_{)*Cn6Sq1LPk{#?YiMG*DYsfb(4(8U9O2uy zal0O4HUZTC5}U)TElS}}G5Wtiq{-EkYA>twHD*j`Hd#y(QKYFiii#>->Vz&^o;ELOuS?0(FSZW4~zJ;&|_J7&&ReAUsWwv8c;ku`*6P~Qy( zUH*oBe;w?sej%Pq!j6A@?GVlW8lAzmdVTYbtvdc;3*X_+z1hP1{X1r^?ccH8Cc;T; zeq8kqSG|P}n>=JTxFB{ZsH{%m%BjY<2y6NQ*7O6ciJgy{%VbPN==Q`euo+Rly>fbd zz^6W1Se?XDr5QoK#)VI!!0K7=(0FjA)I7llA)7~8p;?($7B^1 zE(`K`NJ_|pC1eT?UqR<+}F3TV4bH++dnTdJ6qeeP(7abHqLKwGK2fg))OHk&&O!()~^js zysB8NfW275`O_-_PjLX#2B5D>rIpEc;|g;Xf^sXwM4dK#9+dC}c8oiEqNAxQ{5fwa zxTdKU$U53s@Wc1y9W=X#4w}a5vE>DyFekI{#{t?K5Z=;S$C zD<)YW^kiPed;?vom5 z;-SzG#Lmv*xmj)av_dGC6+)>Mg1*LXW025)D}JEQ&jdqM1y2Q9DxGm++xOLO&{~R&Lil#4rhOFhmt&pnLdLgmN&9A=U zSPQI?we+q~&&9|CT6bv5sP6IqOtgwsFWr*x%FS3^G2*eB3+gNwnFf%ct?oy%`r Ge)iwQG9Op~ literal 0 HcmV?d00001 diff --git a/recsys/datamodel/__init__.pyc b/recsys/datamodel/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d3822860cf95760a3f628de56e2c4b7be24950d GIT binary patch literal 226 zcmYLDK?(vf4D4D#5j^>cUbG(&5g#B6B8Znz+h8m0R@&}ipX(d^fa$VeNG6kENW#DC zRp>r9&8Fbgk5qX>U?>}AX6(t?oj7L~F(QQUXMg<5?Uw3)%J_a6kJ&;I;NMZPQfo!sQESeiy`ym71UHXrGmP80AU=HIvQtW z)RcMvkxZ-Lm=N2>7O-&9If5<*R;cBaG9G z4a?n~d?QVs$-%QYQO+r6^1nYK*$Ae} z9`!L%HDH!l+4H{2O^8`KEuIe_zREd0jmmCGW%F_{VwpG_n3|K+cz|>}Vz^B?3$qD& zaaVM6%`i8boe5#pxpq);*dAqOwPL*L%#{kmr7DY1X(aS=1N}COXec(d zl7)FMOU#!D#2PfW{pd_vRP+*-w7OCenF zQi~Gv5T&A9X_EU0gb44KGerDkeNp24qQv+sI!fAU))6S68{yLWlGc~sBRE=Q@LI(9 z_{u`DMkKoS5|+J5ThVr`PSPBN8NzX{8$cAjIM8>(mC>2JD3HnUePuy{pdmOB9Ht|q z!%jEfL3}jN7Om$EJA93pNcJ0@9?bMk2!H~vVX&EQ$Elw$U=+?D;+RF@rgwD&05dWp zo3Ewuf#%f)?ZJ9?R4govOq7`1Pg?L6XuAWqVOMvFB8C?4ce}KB>Y zoUQ;e#Ezv3Nuqk>-hh$U4+2k)|3$o*vnbTqtTTJhc{$ZC!F42iH$K@?9)09caIu zyz1;UoSj(?Vwjpx^Yb9PmCUP3eoSrERW{)`ds7HWGCpzC?ICrI)OAolR#3l)r5sEK za(3;fuah1ejnHol=qEuxDfGK_wOJEqfsIcN>XYPe^SBD8xaI7jLw2Xsd_FC3;J1T! zT$mY`A38fT3VoQ2k|UFIiefmhXY+qsoI!#SvVAC-ohH$&Qj zkVwYa{;0&2;5Z@p7oc1mvrb%&>3yE>^eNVH;Nf z7Fu~@BzpH1D{5`UP!QQfw?~QL@GzI|N>6{zgTdN%uIPh`VtA*gP7_s}H+6x;upL;1 zguq_jeo?MG$Li)ZS{b~Ds1`cKM8vsBA~sVV609L)`G(vg-lc5>4~+XHUd#m)lJeAz z*RFFe!gfU51$}ped!fdI8)m7@nS%Ck-wWug+llsd>c)Aqh{1domUMF~>UO!ie1peH za9oc*{3)_8WO^xj`RcT-?nXcU`7dnN2~!06jfJ{|+WKmP_h+}*2~vInC`=xtJqt_j zX2>SEybD@zP5|QJRuFhr@O|VRAYUkAO@Z45&(O5Gwnhvf_&z5lxrs}^Y)+DhuZKJK zg(zpkH{MpbBhnN&^I8&w_oUxtPDZAaJ4u58!NMc*MyF&u>15;U5kv17rT;*aFoXnE&U7 z7-W_XdG1wvRs0s#cCWGcI*aF75cv)hiZ1WgM-6@5=dj4{@z|Wocya$DFJ#ZW0TX3_ z{!&Q$oWK*wnFw4s?LfzEngW;tpcRM}SHye&6uNPs*S`w(tSH?0fI@&qVPiyUAO-HM z07jh`NCN;`?rwVuy~64#joS4*f)i3QWd|+nzDvc0gsnOI*Q<2*rP_cYQcA2to!+N)Y^?A{O%~V45yn zxW+vgflIvkUH5BL8)%KH4Sct_K$@I;kp=TQ?j;m*N%Y7{-$x%X23&iT>LpPNzv#G( zLoQP>;t@Afgexj(D^5)~yn~9>xU??ut&r3~m&s^*)Mh0ta$kSIVuqVmHS02GN~e%> zu}+dXn9r$|&LWpzxV6F%H72ocjfoV=M8dPdo53{j@4(RdX^{6p`R{d$$v+<8z|9GS z{68w3dmU`7%8y|7JQzlTUZ-o*3iNo7+S%Q}nt#L7cXk$3z|0XBJQ|YcNi6p~{Djzx z`AP-!ydZ6Pp3AyRRN;y~Gil(`5JU}M@naK8K19;g76fp|H?v;oQr~ReP+UHpuFC|2 zOOA%hD7B;c9aQo)v>rq7P%T%T+IX#6E7!(q<*6yuW~a}b`a!KW^^|3Yq}Ey`K8ZoG zExgwPT;Z?9f<4576CCQ!QU*|-c3c`huD^&F^26+sAFroCW*Z#=Z^FhS*{~ezWqXiU S$duvH5qV6NW`J5|O8){sGDSuJ literal 0 HcmV?d00001 diff --git a/recsys/evaluation/__init__.pyc b/recsys/evaluation/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c3acb92feecc6b7ce41ead6f886938ec678ce78 GIT binary patch literal 269 zcmYL@U1~xx5QXP@s}`jTa0mCL%?3o;B7G_<{iFCY#3Yz$^CL+V?s8p2H_%BDIxyeN znK=)q{kfU=aQ;+qsg~zGcb+jcz?Rv79)NCP-@=ynD|qdO{G}12Tvwk_lqrTXVR1-C zlPXn?Stc32DePC>pt`4wm-YKL+$tNR+|0xO)QGP8vWj) z$MmU5=s!MQbcHime=OU>kU~DUt>YO_6M{&=Sce3p7uYg)Nb^SlAZ9etQdx-lA={B>mlq zjFRyrGDaTfT2>ov@$0Xw97RUjk&;J|8OJhJc9bWi$>k)E5;@fQEK8)-Gdq$IBxS5j zA{%vig4@|#<}`eH>SDb_>G4xV>*_qE6~+cj6r1U~<@?h}M=4L&m$&6)OmpYQ($1z6 zWjZp>GaHSwaW>=~Y86e~9%#E$t&(wURU%<~)s^?U!@m5keXS#}cCYPrWcx}-UhaMd zX|E%%JLzhvqwlVC_jVuiDf)IkRmC>230B5%A)Y!tcVX3s)I9Tq?h8wxAcuP5g$KZwT($Q#~_8!jTQKZv{I)9>KYkGI0c%&w| zhN3r}lmD|%wTjK$^ok!y z-dUeP3ZUX)kZzCwgd#MdG<(!kr`O8PbY}WYNU#F42AN#8oppE1Z+W;oHzwk9uj$B$ zL%ORRMc@!Hyr}g|%D-XIr;L2SA8hR5Fmw9X%kxcGP<%Udo60O`ky9<0?>s|(L zQ?rNWDjL!`7H7OOldmrF2ABrm)Bv2t+`&xaXam>h^6)iz3?#=OU0xgf@Xx?{g+`Qq z>92s*Ohh_Px-9?;s>B2Qa(I3~f(uValO8(6kbHx9 z6-9neAy*p00rZIS6b{=JOK?s@`2@!@%tBmvfc}q^0ii3?8AbUhdP+Ldnt_$4jWP9fFv*eE83^moo8vx4dvCV4LYK^?ZvMl=L>p4 zn4$ajq$%|0fJ`Mbh@Mcg#&<7+swU8PxXQSaC~n_hXrdBYw-a~yB4d>|*w2~@fo#~} z47Dp=@8)YaveSbBmj=09R<;4OWt3KzO5tHQI>a^ZuA=HfcdyC~4mhC<_y&S;ETTJ_ zPF0q?nla&x8*OntFRigAHh|BfBzZtT1?^q%cB;IoB!Q3UgUXd3G9h^EF6X`#caf7{ zbJwfZLb(H*$1|-}W-BuI3%C6}!{a#4@JLOAbJSE9P4FJN^sFfb=^{rbMm_O`{uuc*RjZVk{^SX=Z}CkHn&O);`5V@_Z<0FYP&m4IL`{jv zyLmdD5#6%ItI|TwC~I_u9-^5FyX6R|YELHRaxyNQ3~q({u2MbVO0~#7Bo&=1o!a>& zgJk?*IbSPZxd`D}f_R}>S|iLSV+pQB3RoHuB{MXI6z z%jil9L*8>3rg<`(po`&QnB+0Z2>d~wsbG_Y04WZWc7W?QU=Du=(z|Fbpm`t7MQSR9 zvh-h;lL0`8owBbQ&4#zN*=#ho%I9>mIpF^@!{^SYiWrKVLn8!N#MvyJ&N<+i;|_Pr-Y;;ku=md5WvUwJ(5d818LKwmB3IT9a6}tq7qUSDiRXTS+mss@A zC?foKAOT1n=sMs~+l9o1)Pt^P+a4qpNUPAT+I9t!8l-jT*0Eg$Rfj_dq8eNPoC7@R z&OlTj_0EiYXCay$^)|-64T$ET)0{_D;=l1}I#@FmQtzZNwl#yTzLJH`v%>bwVUKw5 zZF@WT`kM!v`?j-zojXruGLRHDsK{ z*^zI0%D-RagDmp*i-GC!%r#$Pn}+t4Hr!HTU#>hsU$C`qtzdpUY>ngs8A1etNc-8 z|FrsrisF!5wca0!8ZLeXizYR|2}xbjmipPq-?4&L6}RBjC0QDbHoLHbwi=gA@1Wap z6OJonep#n{#x`w)trdD|lwzj^5rk1f{ zI94c1#u1{G(SA=p#Q=#94^^QuE=`kX(j?ppfC4ui zWkCLBCG;TWf%vjwi{k1%EP5FQIP=bR_kHJzx8&5E51a-3TXEV=QnEY{nM_p>u>C=h z=FuRbCK(h2Q66HQPER|}6o;*#^(VGm!eOD$&tTD{bgfo%<~>$P>~>xc?|#K~88#x% zjfGz4WFU*T4|z6hiID6orko&G3fjPl*WNtc=-PRaZlnm)?Jukl-EK48ZkKdJER-lI zf3B9e@GR>ri5Mj4f8t(k#JcYZZqKo92F&b;b&YA(Jp|+uv2JQPX5Ge!btT>`>uS@i zdzpgFyx+#mbHU6r_NFu}u}9>yro3MA5!J*j2`nH8WQ;w*c?0Ddf~UMgHKr8@2%TfG zNa9@-6O0Lhhgc$UmBcj~#c~<0 Date: Tue, 25 Apr 2017 18:00:29 +0200 Subject: [PATCH 2/7] Added ability to read batch users or items and fold them in. Also added ability to foldin or truncate new items not in the original svd model when originally trying to foldin new users and vice versa --- recsys/algorithm/baseclass.py | 5 + recsys/algorithm/baseclass.pyc | Bin 11549 -> 11617 bytes recsys/algorithm/factorize.py | 251 +++++++++++++++++++++++++++------ recsys/algorithm/factorize.pyc | Bin 25820 -> 30045 bytes recsys/algorithm/matrix.py | 53 ++++++- recsys/algorithm/matrix.pyc | Bin 7182 -> 8411 bytes recsys/datamodel/data.py | 2 +- recsys/datamodel/data.pyc | Bin 8343 -> 8353 bytes 8 files changed, 260 insertions(+), 51 deletions(-) diff --git a/recsys/algorithm/baseclass.py b/recsys/algorithm/baseclass.py index 681b805..8310a7b 100644 --- a/recsys/algorithm/baseclass.py +++ b/recsys/algorithm/baseclass.py @@ -39,6 +39,11 @@ def __init__(self): #new for update self._updateData=Data() self._singleUpdateMatrix=SparseMatrix() + #new for batch + self._batchDict={} + #new for foldin additional + self._singleAdditionalFoldin=SparseMatrix() + def __len__(self): return len(self.get_data()) diff --git a/recsys/algorithm/baseclass.pyc b/recsys/algorithm/baseclass.pyc index 16b11748948af3db710aff504e77c4d4593df659..50b4b32a88900c7b87a8c0da46433b026d69f6b3 100644 GIT binary patch delta 1145 zcmZvcUue@;6vuxjO_Qcc|D+~O+NP~d`|#`1&2Gb#A}A@fZmj<4tziGiex%8d_Gj~_ zBzFt6tBRs-P|)q{p#^=JYz!+C$-oC;um?d93cl#uMxV!?HW0)I@Asw;QY7%nJ?DJS z{hfQy`Tg$9?aq1Y{%dOd=joe=-WE5_bHshT&oPW|48v+Pq|3NkTljdc5cf4W0WZ?6oL#W2VV?4AM%$G`U{hJAP?jar1y6{{yGEb#y7#Wmtk5WL zHLTTk;fuZwO7W)ebyEnLZiQ@Grukv-1z*{g7K{_z=Ko3QDdE&(|7oR<@v*>jig2*0 zuUTi-BmJwDoUKxAkBBRCwz}OECzZI z*wb%yuG6m^>H4;I;1og=e6Q;k{mw^Xi{a1qB_KYoE+pSrvNOee-Y#Sx@kiYiRpu|< zSLiVh_Wa-Z&KaTa0D6F9JkopEk2^Fm6{_Wx-uv`Nb>QHri(c@T@yj&9f5*eB)4TN< zp6PFMiET4>LE5FVIM2KNNq<^-1Y{z`L9gXU z7p!Hc;K*lpHwN6J^9Y?&aBA;B5cd97!KZstI|fX_Pt16$E=&&v3KMyLW%f_$%4T3M zD(E3`%2ugZ5wmdOVbZ6tQgjNkY<>@4bZJ9D+FC$<&N>=Je;M&Q6OZULO3J{^H;V zl}llWqCgj*>(QO=gNdovBQ26xH3J_4hX6f=wb6n(1{?=Y0AB!;z!aeGC6AT*WEgl8 f0RC=hft|zwYT@<7kmnNO!~7u8qh64m#A$j9+~?>) delta 1159 zcmZvbOGs2v7{}-2I65z9nz~Monl@UDk2IkKdtlip*hBNAW|m#2@tU`39^HFYN~|b? zc10gi3)?8I40;R%5d^kq8Ci=65(-+is70;X)cK#O5Q$-a-0yYHcmCfwp9b#^7UX^R zy4;&SKPvFqSqBB}?Vb^v?Qn$IY__h5i>B>2X?y-vzem_M<`F+9bLM#%z0E(s0#xa+ zSRLJR+}W^J#JM71ho?8p-HcqUnw@}&3^NSnQ#Pr z^K2;l+;^P0(E;BC)QWa>dg!ye%-Dpcj>N23B0ICXhLy_ZUC40@SPp@>p~WnXhv;LV z`S0eX!atF{x@MlS5H%_56gW7tc>~Z0Y@riMnW&gjuCUAWOIh%j!yN*kBHu}`i^Hye z!stSnQ92n6vg_IN!49@#Wjbnh3s_Ddol$j5OPUeLEal~`@I&Jimhu#cFH=wIXJW?f zY_jA4tN!1LVUe|b4O$9KvpG6edM&3I7Ufa;T6%*$po?Xvl#Uezh`0Sy)X?JTWKv6w zJSL-jQXKzF`DONmlIwqjmPvz}AW#PEqN$2)ZfGAt&pbfSD(6SioMx(u*c+O!iZ=J4dI-Rsi^xIBu@4JV)+BfvKCw+)OIey>@$hJ9%*pl8S*6K=vc2UjY-(lUD*;fg+#|*bK sometimes new users rate new items not in the original SVD matrix so would you like new items to be truncated or folded in ? default is foldin + is_row: boolean -> are you trying to foldin a row or a column ? yes->foldin row , no->foldin column + See params definition in *datamodel.Data.load()* + + """ + # call update here until it finishes + # nDimension + if force: + self._updateData = Data() + + self._updateData.load(filename, force, sep, format, pickle) #load array of tuples + print "Reading the new batch" + + self._construct_batch_dictionary(self._updateData.get(),is_row) + + print "Folding in batch entries" + nDimensionLabels=None + if (is_row): + nDimensionLabels = self._V.all_labels()[0] # get labels from V matrix to complete the sparse matrix + # print nDimensionLabels + else: + nDimensionLabels = self._U.all_labels()[0] # get labels from U matrix to complete the sparse matrix + # print nDimensionLabels + length_of_dict=len(self._batchDict) + i=0 + isbatch=True + for key_idx in self._batchDict: #data in batchDict in form {key:[(tuple)]} + print "user:",key_idx + i += 1 + if (is_row): + self._singleUpdateMatrix.create(self._batchDict[key_idx], col_labels=nDimensionLabels,foldin=True,truncate=truncate) + + else: + self._singleUpdateMatrix.create(self._batchDict[key_idx], row_labels=nDimensionLabels,foldin=True,truncate=truncate) + + # if(i==length_of_dict): + # isbatch=False + + + # If it's trying to foldin a new user who has rated a new item which was not used before, then foldin the item first then foldin that user + if not truncate: + additionalElements = self._singleUpdateMatrix.get_additional_elements() + print "dimension", len(nDimensionLabels) + print "additional elements:", additionalElements + print "length", len(additionalElements) + if len(additionalElements) != 0: + for item in additionalElements: + if (is_row): # if I am folding in a row then , the additionals added that shouldn't be are the columns to be folded in to the rows + self._singleAdditionalFoldin.create([(0, nDimensionLabels[0], item)], + row_labels=self._U.all_labels()[0]) + else: + self._singleAdditionalFoldin.create([(0, item, nDimensionLabels[0])], + col_labels=self._V.all_labels()[0]) + self._update(update_matrix=self._singleAdditionalFoldin, is_row=not is_row) + + + # #update the data matrix + print "updating the sparse matrix" + # print "matrix before update:",self._matrix.get().shape + if self._matrix.get(): #if matrix not there due to load ignore it + self._matrix.update( + self._singleUpdateMatrix,is_batch=isbatch) # updating the data matrix for the zeroes , also for saving the data matrix if needed + # print "matrix after update:",self._matrix.get().shape + self._update(is_row=is_row,is_batch=isbatch) #Do foldin on the singleUpdateMatrix tuple + + self.update_sparse_matrix_data(is_batch=True) + + + def update_sparse_matrix_data(self,squishFactor=10,is_batch=False): #update the data matrix # print "matrix before update:",self._matrix.get().shape - print "commiting the sparse data matrix by removing empty rows and columns divisi created" - self._matrix.squish(squishFactor) # updating the data matrix for the zeroes ,#NOTE: Intensive so do at end - # print "matrix after update:",self._matrix.get().shape + if is_batch: + if self._matrix.get(): + if VERBOSE: + print "updating sparse index" + self._matrix.index_sparseMatrix() + if VERBOSE: + print "before updating, M=", self._matrix_reconstructed.shape + # Sim. matrix = U \Sigma^2 U^T + self._reconstruct_similarity(post_normalize=False, force=True) + # M' = U S V^t + self._reconstruct_matrix(shifts=self._shifts, force=True) + if VERBOSE: + print "done updating, M=", self._matrix_reconstructed.shape + + if self._matrix.get(): #if loaded model there is no matrix + if VERBOSE: + print "commiting the sparse data matrix by removing empty rows and columns divisi created" + self._matrix.squish(squishFactor) # updating the data matrix for the zeroes ,#NOTE: Intensive so do at end + # print "matrix after update:",self._matrix.get().shape - def update(self,is_row=True): #update(tuple:denseVector tuple,isRow=True,, - print "type of S",type(self._S) - print "type of U",type(self._U) - print "type of V",type(self._V) - print "type of data",type(self._data) - print "type of matrix",type(self._matrix) - print "type of matrix reconstructed",type(self._matrix_reconstructed) - print "type of matrix similarity",type(self._matrix_similarity) - print "dimensions of S",self._S.shape - print "dimensions of U",self._U.shape - print "dimensions of V",self._V.shape + def _update(self,update_matrix=None,is_row=True,is_batch=False): #update(tuple:denseVector tuple,isRow=True,, + if VERBOSE: + print "type of S",type(self._S) + print "type of U",type(self._U) + print "type of V",type(self._V) + print "type of data",type(self._data) + print "type of matrix",type(self._matrix) + print "type of matrix reconstructed",type(self._matrix_reconstructed) + print "type of matrix similarity",type(self._matrix_similarity) + print "dimensions of S",self._S.shape + print "dimensions of U",self._U.shape + print "dimensions of V",self._V.shape invS=np.zeros((self._S.shape[0], self._S.shape[0])) for i in range(self._S.shape[0]): + # invS[i, i] = self._S[i] # creating diagonal matrix invS[i, i] = self._S[i]**-1 # creating diagonal matrix and inverting using special property of diagonal matrix + # invS=inv(invS) inverting with numpy #if new is row -> V*S^-1 if is_row: prodM=self._V.dot(invS) - print "dimension of VxS^-1=", prodM.shape + if VERBOSE: + print "dimension of VxS^-1=", prodM.shape else: #if new is col -> U*S^-1 prodM = self._U.dot(invS) - print "dimension of UxS^-1=", prodM.shape + if VERBOSE: + print "dimension of UxS^-1=", prodM.shape + + if update_matrix: + updateTupleMatrix=update_matrix.get() + else: + updateTupleMatrix = self._singleUpdateMatrix.get() - updateTupleMatrix=self._singleUpdateMatrix.get() if not is_row: updateTupleMatrix=updateTupleMatrix.transpose() #transpose - print "dimensions of user",updateTupleMatrix.shape + if VERBOSE: + print "dimensions of user",updateTupleMatrix.shape res=updateTupleMatrix.dot(prodM) - print "type of res=", type(res) - print "dimension of resultant is", res.shape + if VERBOSE: + print "type of res=", type(res) + print "dimension of resultant is", res.shape if is_row: #use new value can now be concatinated with U - print "U before adding", self._U.shape + if VERBOSE: + print "U before adding", self._U.shape self._U=self._U.concatenate(res) - print "U after adding", self._U.shape + if VERBOSE: + print "U after adding", self._U.shape else: - print "V before adding", self._V.shape + if VERBOSE: + print "V before adding", self._V.shape self._V = self._V.concatenate(res) - print "V after adding", self._V.shape - - print "before updating, M=",self._matrix_reconstructed.shape - # Sim. matrix = U \Sigma^2 U^T - self._reconstruct_similarity(post_normalize=False, force=True) - # M' = U S V^t - self._reconstruct_matrix(shifts=self._shifts, force=True) + if VERBOSE: + print "V after adding", self._V.shape - print "done updating, M=",self._matrix_reconstructed.shape + #TODO: contemplating removing this segment and just reconstruct in the updating spare matrix function + if not is_batch: #will reconstruct all at end with batch using another function + if VERBOSE: + print "before updating, M=",self._matrix_reconstructed.shape + # Sim. matrix = U \Sigma^2 U^T + self._reconstruct_similarity(post_normalize=False, force=True) + # M' = U S V^t + self._reconstruct_matrix(shifts=self._shifts, force=True) + if VERBOSE: + print "done updating, M=",self._matrix_reconstructed.shape @@ -475,6 +631,13 @@ def update(self,is_row=True): #update(tuple:denseVector tuple,isRow=True,, # myFile.write(str(invS[i,i])) # myFile.write("\n") + def printMovies(self): + myFile=open("movieIDs.dat",'w') + myFile.truncate() + + movies=self._matrix_reconstructed.get_col_labels() + for movie in movies : + myFile.write(str(movie)+",") def centroid(self, ids, is_row=True): points = [] diff --git a/recsys/algorithm/factorize.pyc b/recsys/algorithm/factorize.pyc index 8ddc6846c4d74c0e30b323e23e17fcfaff1d0441..79f7199fc03ca842f12de55634fa87558ea13eb6 100644 GIT binary patch delta 5927 zcmZ`-eQX@X6`$F?JNwROpFd(JmpJi;Byff#*a?u}0M3UKCn7f0CXmRG9_!n+ea`vL z-d)EwwR;6S2toMVhL-ONs02kq(5qUC3Z*Trgh1ktN);`E(5PReX_FTEK`Yw+-rGBW z5SsYj&AfT@X6DU%@Au|TydmE^Ez8u8DkCp^_tO{4o>ptWD21?u=#jz%RfO5(y`pa3 z)g-KPVTQzvB6=diECZkZtP19%T`p!qq9@Ayu$Ymer;^(dZddv8-j=}D#h7E(vvdtl zZeWR8?}osdgEY37$LfR`CH3YKUULzzR?eKbm{CPfy)c)G8SJrvwJc*Ti-p-J%;n5k z!D35#wUT7eZ9vL*F~_aheqG`b#(FT~*la!jz(O zcUFkWkg%T@2AACl+VH7z>P>Co4)`DJD!vV-#LjK|6>}3q_8!%L60tNZbwG)bAwR z%fBfIRu@fdS12?O-5OTF7e!Nu3$?6mT1aVDihYSHy8SaC5lz#))h+S7<_;LHmda&a zBb{|LL)*8fyL0bO%^e-iSX#o+Ov5!C%hmdFHb$Lvwjcaa$FjBSbS>>#L)uzxz;HA- zr}bKzm39X(5mixn}R`mP7e8 zwkO;O2qq;nMz57|;CDjwq>l2{mHW_rlVI_X^2S%jWpnrRmu zBcoXv3nQ-6#@aGgw%;9aDSl%lwD`l;BvgLw6HM$S9)>j7b z?M@6^`W6~r)VeCU0S$dIv5E9sZgOt5WN9@XOZs-(HoKW!{#nW}FX=C@_md(zr{7Oh zcwMcREBY}n-?}93)?#2z`_AG~^xM6a2h(zy_n+3q`J`{4L8L?HDu|1+kc=sfN=TN= zkgSsxGKSB3H70}d5^!TmOoqX$m6aI3LfI%+tB?h^K}o5YAH;_?v;!aK4iKb@DR{9W z9HcUZ+Hkb-elaN#Jo%)U3W$kD_B{AsKuoG49>;djM7n@4H;RCZn2QMe7I>t@iOEZ6 zWv^P$n>P~^58`vcR4JSTaAew=a~6agng2yDF16Z*4ci!E$8FO(;J%6GSt)@P8bc)@@p3uhN%LtEIM}|9UH182q&tMnwp~UjNwC}$c~dGu(>ji zi}n*Bzybb%OAm=@MM(G&?zBb_3IKtQuuFu*Ag}@s2M@wPwvM^Lm-$@R2OA&_+!Bde zrR)u)nW_-u1x~9GxaBi)3J@R20}LpAB8dZuHWl;yC#)BVypkKp!BG*X^p3<`#;Ao| z6;Z4Cb7irI^JNL1syQ!1brl_CDv#PC0vuv_22Z5s&dEwTJoZI=;22|Usfhy-JXI?M zPnAAARk4x2Ujz+o8$z@J*lDt=1W!-^YtOgVw9L=qQ;bea)1b?rxd7GXo4y23)dJ{2 zIsquW10SBo5Yp8OQh*sF%VNByc(?XeK0Ug%MXTk_YB5=X6Jj5J*h4AnDh?3dJueQB z78Fv|WPFCL8bmuKKPQMdn2ihyo(_N(!4eX$);Ee+wzRRFeON<44cnH3RBuoy1D|Yb zXj$%vr6K8RM%IJ`@XOV*))*`FR<<;L76`jWGj5t7uo$ijRxZI$TW>Cxv5f4xYcwY} zWVt{dhqN+2I+_hs;V!`>268U#jq*p%PWPt?llI-vp)u~{$DsoIAH^0Ya-&)%eR%#{ zA6-l9nuM#A?U4tdu^qVDXJEsWetZyEuw4S$+*q4d+^vS~8ZZ_uLe;83EcYNL&Fp?)W+x{Js$B6kzH2V@TGS9}KS^?OO?J|dLv0PIb9 z&Wf-Kuh*TRT&9zc>GuH0$;E-5J18BQJm<&^4;Bo$Z*%9ws;UP-M>i9Af?5h9Ngq22qZehe+Q3dFBrlDR(4 z&iYQ`Q#wyOd@}t>wDre{+zyh!hl#iD;GsMvd4iH+`9V0jo$#etMSPEv{9c+-$yJ05 zDsd%YE`3th~Rsz+Dx?u{?FADO>(i)Bp1omfgslzb;?q|s!*XbD1Zk$ z&~V-WK|x_3=RASnmk4`dZxkpL>{WA*6O7)XXH55H6)8ae?LFv{d}9>82niB2T@&P9JLMP z@%Y@AAoS~Ihv&1!?hn(Z9w3n5FVE-6rAOt6krbSp(sP3ON!`C=Py=#g7UMD*T$7X0ED)p(hZ{v zjHNC-g1WdCR7HchCL!|JE_p!(pt3ZdI(>W!l&<4f3Yc!e81~R{Xr-9)JqItrAj*TN zsH5T*yr973t42NsY{8HAu%BUNU?rb=D#Rumrngt;WFE*$dm0&**^77??D>4rDu@;( z$qA*fk4n&D8Br#(F`f$)_X`WWW>JqnD^YXc6^QVdf{cGxuSx=D8lZLaqz-K?$>ye) zJudVw5@5?VE*ZE6bNDImM-)!hu8?F*F>5h2XKYgF_we@Q+2|$_dk~&c%(?}_Bl#I6 zyOYNbDCQ!6Z0?#!5qk3#AxeEk_2&tzc*XLbo>a^Rf3$eY;+|^hM;RA|#1i2y@^ipO ztbk`rocG)?Z-iDkqu{3*K3h@G$QdeZ3i66y%PLO!6qx!~LAmAfQ!DGit4D16ty;8bo*&&QdQMPe(`N@VuyFQ+;QV;lA&60q zPa)ReY@GDioe?vfbf*+CzJ^?_x}kr^8tfpwoBLdbEwTi5_}d>X#8YMuf9l6lD-E}&n$hHd;{&_x??ZXruYz!R`o1lFyTg|A$Bn^bO73V|&nfzaz1 z(qHqw)c&UYq1Us%gsj})Ku?9Myo zhlNi&Yl9Wc`7NuM<3hkzW-4+x?abUBHnL3AC%;mVIx@GhX@r8Kq10?%97+%~|mII{f#p ze$M;%{zsJ?RPUkd&o2Et>F3b+6IuyeMlmWr#xEF0La0xjn^bSbfe(F2j-)@IFL{e% zsfEW5o{}M24NDyDX|4H~nnyv{vrPSO^rMK^a${7i7>_B_s`r%9 zvGM;}_pdaMjlpL52X+5Rd*)u+*H#X=6{oDkJtRpD?f6b(nivHC{LSMGn0)>d$2Liz=NNGz@fdyMqEM@6T+ud%rv@=yO zTME*KU*}6{EB&%he3bW;LpdrA1I+9126$v85l@87{%fXWkEwRm|0+GFv6kL4~7?U z`ZKd7j(ir3XwVvvxDQ4cv<4;bmwZmfUM%t~UTWmShzhNFV4!tGszWl$RLO@WKTYye zz$gHtP$ESzqCjgtMAuZ7IJ)|$hrQ(Z{Ik5Q*{RBF@v=kC?rA$1yX9mTHZk^-v$kly zk3DeqR4iib3FrI_##oheVWz3DXPl?!gjM#UsDEl@*b4wzxHMc#aakuo)Gb&ztieD( zqyZ8NBs{PaV8|;)6%v|c7$m%e8ywQu4h0H!Vv1C?RA&93lV`G%p+aL5t1+DyGnLVIrlRw5MdckFHRDTmlxi@TyG7h3gEY%*4{&>A`mz;8+R@kB z8@J=Bt}?sZEEA)PmqkeVs7$MWFm82Ma`EJn0xD75s4E%XMdf(V4iuHiG?x|s48=LUZ#t}}XAfiklii>zA5F7w$gXiSNQC&nEo`5tL@=hQ)AJQ!g8tHU# zVvuVxd>N6uqCX?zoRQX^m;q^SWJAG(FA1Hp-XRWK7J{7(>EZFfq{MXfZPQ^$arc_W zsl;0rPUDOsrT}_B+y4Ms@BjwpGk3fLfO>Qt@_-S*#ntb~;+8ftVN zVgW?)ezE}RP|{G&XCZ~C&UGH)1>Cv6{7uGhGbgp8-=iI+Y=^}!D=)GS#j#bj*@p@E zh{(s{_f?JB5n_*uy471X2if~f9ACYZMc#?7tr;U#Cy_xSTZn8YvWv*OMBXDp+lb0U zju;zA%?+bmiOZs$Yd(zyihMrb^1yStre_r`38(>8kAzC+76-h6)y~2-S2#N@e9imV z^UnLt^B6lTuC|nDXDI49@v!AB*5GVg7gDn&@-@Nd#s0QsXm_`*h+PnWwawDb5qwF^ z*|38(i=!LrO1~uFB#~1@=m4YnL}Y+!v46l)PorGpY;V7$V60DTyS>ul02jwQF0pUL zZexTW;X*fG&HDi@LPbe^vqC8A9d+E`B0*$3`E7r&XtI={_+>jk~luijM30+^+>rwdhs7 zDKFq+F?F*kfjVFoi_;Jlqf6^Xf%uTBZsjp#Wseub+Rexoj?u5}-J~nks?i0h>siqq z;jJtCHiwKZp(;`kfRzlyI4=4Q;fjWs7*C(R)6gKyv4#raU06O1E&ht zO$aB6Pbx5j1dpdf(7kG{ZnRdU^}|(bB+|N}Q;>R*ek(d|I1h6s`3}dwNtp%}jAg<- z26Hh8(o(4YC#0s7j$!J2Vpl{8D> z$fzaCmKOPw9W0B_R$)&=fv5x+1q(}+kvEenC98A~3xzE(AOO*q>F(;s&Gtb8I$NIn zEg#83CbRdW;uVLNu8E>q5??b3>keund2`kL=5H3zXUN z{T|6T;g7wWpp(EP=q1=ku%CePls!G+R3p!(e%J*9I*NTvyluJFLNN^%kHu9S$!vQv zPD~86K8+8O75T*nJHwuc(cEn|B$jl~kglG+KC~^(*mH5ZZKr7ukZ}@SPV2PqvG~>2 V!}`UJ_DMD<7F#OfXL~0*`41yl<5vIx delta 522 zcmX9)Jxc>Y5S_hb?{cSNk~1I1=tj|a2r3CEf;K8DS_obm5fMW!K_!srDG0GltIhaF z{HTJZh5iOXEG)!MENz^*u=^hG-p;(8y|?9?1tav^XZ`Je-WeZJD+B;51b-2KqV}o; zkG6oe276)fG-w0tkrN7p4qiAA2Dl|hr?!e9VsOqX!7t)fb`Y;)>0S?@p6%aY)CZ%7MX6^$GQ_XRf`;ALtFJE>88PfcZJ$aZI$B$30F-EwXp2)S2YoBYaZRVg|p}L6GGDKM5amd7=IR*r`^h diff --git a/recsys/datamodel/data.py b/recsys/datamodel/data.py index 21de618..f2f3d94 100644 --- a/recsys/datamodel/data.py +++ b/recsys/datamodel/data.py @@ -121,7 +121,7 @@ def load(self, path, force=True, sep='\t', format=None, pickle=False): self._load_pickle(path) else: i = 0 - for line in codecs.open(path, 'r', 'utf8'): + for line in codecs.open(path, 'r', 'ISO-8859-1'): #was utf8 changed it to 'ISO-8859-1' data = line.strip('\r\n').split(sep) value = None if not data: diff --git a/recsys/datamodel/data.pyc b/recsys/datamodel/data.pyc index 57a97e63b45c10a3996f3fd0a234824a80e45c99..176bf003ceacb312e03caca2d384a9ec88644cf4 100644 GIT binary patch delta 269 zcmbR4xX_WE`7snZtTIw2Zb{4(Jtl|I^3vvPx#vsB3 zM6d!04JHta4Mf;XR+LX+G}=5@GLXr=gas&6T9Rf_%m`#~f=Mnv4PKB!uois~%K$_e z0*MkHAQ4}jSe6=JkeQsFlM1zFvZ0I(n+-^n!{%a{7Dhd1Alpv^WO$GRkN{hgQkq); sb`02A=q{SvDc39jcDWHqmlud|oy@C{F}X$FlpE{}u$LSrACbQd0QkEy3jhEB delta 211 zcmZ4JINgz*`7Dhg9sB4VG1HxfrJJV zh{XmXY$tDEO=b!*-n>{ckcl-&3n(`Eue1cW0Z7siL>Nt0kP&5r7% Date: Thu, 8 Jun 2017 14:34:36 +0200 Subject: [PATCH 3/7] Cleaned up code for commiting to github, on a much much earlier date, I added the ability to recommend items unrated by the user for folded-in users even if we loaded the SVD model instead of built it from scratch which wasn't previously allowed under original implementation --- AUTHORS | 1 + CHANGELOG | 5 + recsys/algorithm/factorize.py | 406 +++++++++++++++++++++------------ recsys/algorithm/factorize.pyc | Bin 30045 -> 32828 bytes recsys/algorithm/matrix.py | 10 +- recsys/datamodel/data.py | 178 ++++++++++++++- recsys/datamodel/data.pyc | Bin 8353 -> 12984 bytes 7 files changed, 437 insertions(+), 163 deletions(-) diff --git a/AUTHORS b/AUTHORS index 8003cab..eb97b49 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1 +1,2 @@ Oscar Celma (ocelma __at__ gmail __dot__ com), http://ocelma.net +Ibrahim Abou Elseoud (Ibrahim__dot__Elseoud__at__ gmail __dot__ com), for updating SVD model part diff --git a/CHANGELOG b/CHANGELOG index f15b79f..ad6bba0 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -28,3 +28,8 @@ See: https://github.com/ocelma/python-recsys/commits/master 2011-10-08 * Added the whole project at github + +2017-06-08 + * Added updating the SVD model incrementally by folding-in + * Added a capability to split the dataset for train,test,foldin to facilitate testing the Fold-in implementation + * Added update to github diff --git a/recsys/algorithm/factorize.py b/recsys/algorithm/factorize.py index c5ddb53..75e893f 100644 --- a/recsys/algorithm/factorize.py +++ b/recsys/algorithm/factorize.py @@ -26,20 +26,24 @@ from divisi2 import DenseVector from divisi2 import DenseMatrix from divisi2.ordered_set import OrderedSet - + from recsys.algorithm.baseclass import Algorithm from recsys.algorithm.matrix import SimilarityMatrix from recsys.algorithm import VERBOSE from numpy.linalg import inv #for update import numpy as np +from divisi2.sparse import SparseMatrix as divisiSparseMatrix +from divisi2.sparse import SparseVector as divisiSparseVector +from divisi2.dense import DenseVector as divisiDenseVector + from recsys.datamodel.data import Data TMPDIR = '/tmp' class SVD(Algorithm): """ - Inherits from base class Algorithm. + Inherits from base class Algorithm. It computes SVD (Singular Value Decomposition) on a matrix *M* It also provides recommendations and predictions using the reconstructed matrix *M'* @@ -72,6 +76,8 @@ def __init__(self, filename=None): self._file_col_ids = None #Update feature + self._foldinZeroes={} + self.inv_S=None #since it doesn't get updated so redundent to calculate each time def __repr__(self): try: @@ -124,7 +130,7 @@ def load_model(self, filename): idx = [ int(idx.strip()) for idx in zip.read('.row_ids').split('\n') if idx] except: idx = [ idx.strip() for idx in zip.read('.row_ids').split('\n') if idx] - #self._U = DenseMatrix(vectors) + #self._U = DenseMatrix(vectors) self._U = DenseMatrix(vectors, OrderedSet(idx), None) try: self._V = loads(zip.read('.V')) @@ -140,7 +146,7 @@ def load_model(self, filename): idx = [ int(idx.strip()) for idx in zip.read('.col_ids').split('\n') if idx] except: idx = [ idx.strip() for idx in zip.read('.col_ids').split('\n') if idx] - #self._V = DenseMatrix(vectors) + #self._V = DenseMatrix(vectors) self._V = DenseMatrix(vectors, OrderedSet(idx), None) self._S = loads(zip.read('.S')) @@ -148,7 +154,7 @@ def load_model(self, filename): # Shifts for Mean Centerer Matrix self._shifts = None if '.shifts.row' in zip.namelist(): - self._shifts = [loads(zip.read('.shifts.row')), + self._shifts = [loads(zip.read('.shifts.row')), loads(zip.read('.shifts.col')), loads(zip.read('.shifts.total')) ] @@ -198,7 +204,7 @@ def save_model(self, filename, options={}): # Store Options in the ZIP file fp.write(filename=filename + '.config', arcname='README') os.remove(filename + '.config') - + # Store matrices in the ZIP file for extension in ['.U', '.S', '.V']: fp.write(filename=filename + extension, arcname=extension) @@ -231,6 +237,7 @@ def _reconstruct_matrix(self, shifts=None, force=True): self._matrix_reconstructed = divisi2.reconstruct(self._U, self._S, self._V) return self._matrix_reconstructed + def compute(self, k=100, min_values=None, pre_normalize=None, mean_center=False, post_normalize=True, savefile=None): """ Computes SVD on matrix *M*, :math:`M = U \Sigma V^T` @@ -251,7 +258,7 @@ def compute(self, k=100, min_values=None, pre_normalize=None, mean_center=False, super(SVD, self).compute(min_values) #creates matrix and does squish to not have empty values if VERBOSE: - sys.stdout.write('Computing svd k=%s, min_values=%s, pre_normalize=%s, mean_center=%s, post_normalize=%s\n' + sys.stdout.write('Computing svd k=%s, min_values=%s, pre_normalize=%s, mean_center=%s, post_normalize=%s\n' % (k, min_values, pre_normalize, mean_center, post_normalize)) if not min_values: sys.stdout.write('[WARNING] min_values is set to None, meaning that some funky recommendations might appear!\n') @@ -264,17 +271,18 @@ def compute(self, k=100, min_values=None, pre_normalize=None, mean_center=False, if mean_center: if VERBOSE: sys.stdout.write("[WARNING] mean_center is True. svd.similar(...) might return nan's. If so, then do svd.compute(..., mean_center=False)\n") - matrix, row_shift, col_shift, total_shift = matrix.mean_center() + matrix, row_shift, col_shift, total_shift = matrix.mean_center() self._shifts = (row_shift, col_shift, total_shift) + # Pre-normalize input matrix? if pre_normalize: """ - Divisi2 divides each entry by the geometric mean of its row norm and its column norm. + Divisi2 divides each entry by the geometric mean of its row norm and its column norm. The rows and columns don't actually become unit vectors, but they all become closer to unit vectors. """ if pre_normalize == 'tfidf': - matrix = matrix.normalize_tfidf() #TODO By default, treats the matrix as terms-by-documents; + matrix = matrix.normalize_tfidf() #TODO By default, treats the matrix as terms-by-documents; # pass cols_are_terms=True if the matrix is instead documents-by-terms. elif pre_normalize == 'rows': matrix = matrix.normalize_rows() @@ -296,7 +304,7 @@ def compute(self, k=100, min_values=None, pre_normalize=None, mean_center=False, options = {'k': k, 'min_values': min_values, 'pre_normalize': pre_normalize, 'mean_center': mean_center, 'post_normalize': post_normalize} self.save_model(savefile, options) - def _get_row_reconstructed(self, i, zeros=None): + def _get_row_reconstructed(self, i, zeros=None): #if foldin that means it is known what the user rated and zeros contains the rated items if zeros: return self._matrix_reconstructed.row_named(i)[zeros] return self._matrix_reconstructed.row_named(i) @@ -306,6 +314,40 @@ def _get_col_reconstructed(self, j, zeros=None): return self._matrix_reconstructed.col_named(j)[zeros] return self._matrix_reconstructed.col_named(j) + def _get_row_unrated(self,i,rated): # use for foldin since that means users new rated items are known so no need to squish or need normal matrix + sparse_matrix = self._matrix_reconstructed.row_named(i).to_sparse() + # values: np array with the predicted ratings or ratings + # named_rows: normal array with movie names + values, named_cols = sparse_matrix.named_lists() #values contains a np array with predicted ratings , while named_cols contains list of labels of columns + removal_indicies = [] #array of indicies for removal + + for item in rated: + index_remove = named_cols.index(item) + del named_cols[index_remove] #since its a normal list can remove like this + removal_indicies.append(index_remove) + + values = np.delete(values, removal_indicies) #since it's a numpy array so must remove like this + + return divisiSparseVector.from_named_lists(values, named_cols).to_dense() + + + + def _get_col_unrated(self, j,rated): # use for foldin since that means users new rated items are known so no need to squish or need normal matrix + sparse_matrix=self._matrix_reconstructed.col_named(j).to_sparse() + # values: np array with the predicted ratings or ratings + # named_rows: normal array with movie names + values, named_rows= sparse_matrix.named_lists() + removal_indicies=[] + + for item in rated: + index_remove = named_rows.index(item) + del named_rows[index_remove] + removal_indicies.append(index_remove) + + values=np.delete(values, removal_indicies) + + return divisiSparseVector.from_named_lists(values,named_rows).to_dense() + def predict(self, i, j, MIN_VALUE=None, MAX_VALUE=None): """ Predicts the value of :math:`M_{i,j}`, using reconstructed matrix :math:`M^\prime = U \Sigma_k V^T` @@ -347,25 +389,79 @@ def recommend(self, i, n=10, only_unknowns=False, is_row=True): self.compute() #will use default values! item = None zeros = [] - if only_unknowns and not self._matrix.get(): + seeDict=False + if only_unknowns and not self._matrix.get() and len(self._foldinZeroes)==0: raise ValueError("Matrix is empty! If you loaded an SVD model you can't use only_unknowns=True, unless svd.create_matrix() is called") + if not self._matrix.get(): + seeDict=True if is_row: if only_unknowns: - zeros = self._matrix.get().row_named(i).zero_entries() - item = self._get_row_reconstructed(i, zeros) + if seeDict: + zeros=self._foldinZeroes[i] #zeros in this instance contains the rated items + if len(zeros)==0: + raise ValueError("Matrix is empty! If you loaded an SVD model you can't use only_unknowns=True, unless svd.create_matrix() is called or youve just folded them in") + else: + item = self._get_row_unrated(i, zeros) #removing the rated items from utility row for recommendations + else: + zeros = self._matrix.get().row_named(i).zero_entries() + item = self._get_row_reconstructed(i, zeros) + else: + item = self._get_row_reconstructed(i, zeros) else: if only_unknowns: - zeros = self._matrix.get().col_named(i).zero_entries() - item = self._get_col_reconstructed(i, zeros) + if seeDict: + zeros=self._foldinZeroes[i] #zeros in this instance contains the rated items + if len(zeros)==0: + raise ValueError("Matrix is empty! If you loaded an SVD model you can't use only_unknowns=True, unless svd.create_matrix() is called or you just folded them in") + else: + item = self._get_col_unrated(i, zeros) #removing the rated items from utility columns for recommendations + else: + zeros = self._matrix.get().col_named(i).zero_entries() + item = self._get_col_reconstructed(i, zeros) + else: + item = self._get_row_reconstructed(i, zeros) + return item.top_items(n) - def load_updateDataTuple_foldin(self, filename, force=True, sep='\t', format={'value':0, 'row':1, 'col':2}, pickle=False,is_row=True,truncate=False): + def _calc_mean_center(self, matrix, is_row=True): #created this to use the loaded shifts and calculate the row or column shift + row_shift, col_shift, total_shift = self._shifts + + total_mean = total_shift # use the global shift one + if is_row: + row_means = matrix.row_op(np.mean) - total_mean # calculate row shift + col_means = col_shift # use already given col shifts + else: + row_means = row_shift # use already given row shifts + col_means = matrix.col_op(np.mean) - total_mean # calculate col shifts + + row_lengths = matrix.row_op(len) + col_lengths = matrix.col_op(len) + + shifted = matrix.copy() + for row, col in shifted.keys(): + shifted[row, col] -= ( + (row_means[row] * row_lengths[row] + + col_means[col] * col_lengths[col] + ) / (row_lengths[row] + col_lengths[col]) + ) + total_mean + + return (shifted, row_means, col_means, total_mean) + # return shifted + + def load_updateDataTuple_foldin(self, filename,force=True, sep='\t', format={'value':0, 'row':1, 'col':2}, pickle=False,is_row=True,truncate=True,post_normalize=False): """ - Loads a dataset file that contains a SINGLE tuple (a dataset for a single user OR item , has to be either same row or same column depending on is_row aka tuple) + Folds-in a SINGLE user OR item. First loads a dataset file that contains a SINGLE tuple (a dataset for a single user OR item , has to be either same row or same column depending on is_row aka tuple) + + For params: filename,force,sep,format,pickle then see params definition in *datamodel.Data.load()* + + :param is_row: are you trying to foldin a row or a column ? yes->foldin row , no->foldin column + :type is_row: boolean + :param truncate: sometimes new users rate new items not in the original SVD matrix so would you like new items to be truncated or folded in ? default is foldin + :type truncate: boolean + :param post_normalize: Normalize every row of :math:`U \Sigma` to be a unit vector. Thus, row similarity (using cosine distance) returns :math:`[-1.0 .. 1.0]` + :type post_normalize: Boolean - See params definition in *datamodel.Data.load()* """ - # nDimension if force: self._updateData = Data() @@ -379,11 +475,14 @@ def load_updateDataTuple_foldin(self, filename, force=True, sep='\t', format={'v print type(nDimensionLabels[0]) print len(nDimensionLabels) self._singleUpdateMatrix.create(self._updateData.get(), col_labels=nDimensionLabels, foldin=True,truncate=truncate) + self._foldinZeroes[self._singleUpdateMatrix.get_rows()[0]] = self._singleUpdateMatrix.get_cols() + else: nDimensionLabels = self._U.all_labels() #get labels from U matrix to complete the sparse matrix print nDimensionLabels self._singleUpdateMatrix.create(self._updateData.get(), row_labels=nDimensionLabels, foldin=True,truncate=truncate) + self._foldinZeroes[self._singleUpdateMatrix.get_cols()[0]] = self._singleUpdateMatrix.get_rows() if not truncate: additionalElements=self._singleUpdateMatrix.get_additional_elements() @@ -402,23 +501,49 @@ def load_updateDataTuple_foldin(self, filename, force=True, sep='\t', format={'v # #update the data matrix if VERBOSE: print "updating the sparse matrix" - # print "matrix before update:",self._matrix.get().shape if self._matrix.get(): #if matrix not there due to load ignore it self._matrix.update(self._singleUpdateMatrix) # updating the data matrix for the zeroes , also for saving the data matrix if needed - # print "matrix after update:",self._matrix.get().shape - self._update(is_row=is_row) + + # Mean centering + if self._shifts: #if not None then it means mean_center was equal true + row_shift, col_shift, total_shift=self._shifts + + + meanedMatrix, rowShift, colShift, totalShift=self._calc_mean_center(self._singleUpdateMatrix.get(),is_row=is_row) + + self._singleUpdateMatrix.set(meanedMatrix) + + if is_row: + values, named_rows = row_shift.to_sparse().named_lists() #values numpy array, named_rows normal array + valuesFold, named_rowsFold = rowShift.to_sparse().named_lists() + + else: + values, named_rows = col_shift.to_sparse().named_lists() # values numpy array, named_rows normal array + valuesFold, named_rowsFold = colShift.to_sparse().named_lists() + + + values=np.concatenate((values, valuesFold)) + named_rows.extend(named_rowsFold) + + if is_row: + row_shift=divisiSparseVector.from_named_lists(values, named_rows).to_dense() + else: + col_shift=divisiSparseVector.from_named_lists(values, named_rows).to_dense() + + self._shifts=(row_shift, col_shift, total_shift) + + + self._update(is_row=is_row,post_normalize=post_normalize) def _construct_batch_dictionary(self,data,is_row=True): - ''' - + """ + :param data: Data() :param is_row: Boolean :return: constructs a dictionary with the row or col as the keys (depending on which is being added) with values as the tuples in self._batchDict - ''' - # self._values = map(itemgetter(0), data) - # self._rows = map(itemgetter(1), data) - # self._cols = map(itemgetter(2), data) + """ + key_idx=1 #key index default is the row if not is_row: key_idx=2 @@ -436,23 +561,33 @@ def _construct_batch_dictionary(self,data,is_row=True): print "Batch loaded successfully" - def load_updateDataBatch_foldin(self, filename, force=True, sep='\t', format={'value': 0, 'row': 1, 'col': 2}, - pickle=False, is_row=True,truncate=False): + def load_updateDataBatch_foldin(self, filename=None, data=None, force=True, sep='\t', format={'value': 0, 'row': 1, 'col': 2}, + pickle=False, is_row=True,truncate=True,post_normalize=False): """ - Dont forget future work in presentation , remove old and insert new - Loads a dataset file that contains Multiple tuples + Folds in the batch users or items, first Loads a dataset file that contains Multiple tuples (users or items) or uses the preloaded data from the datamodel/data.py object then folds them in with their ratings + + :param data: Contains the dataset that was loaded using the Data() class + :type data: Data() - truncate:boolean-> sometimes new users rate new items not in the original SVD matrix so would you like new items to be truncated or folded in ? default is foldin - is_row: boolean -> are you trying to foldin a row or a column ? yes->foldin row , no->foldin column - See params definition in *datamodel.Data.load()* - + For params: filename,force,sep,format,pickle then see params definition in *datamodel.Data.load()* + + :param is_row: are you trying to foldin a row or a column ? yes->foldin row , no->foldin column + :type is_row: boolean + :param truncate: sometimes new users rate new items not in the original SVD matrix so would you like new items to be truncated or folded in ? default is foldin + :type truncate: boolean + :param post_normalize: Normalize every row of :math:`U \Sigma` to be a unit vector. Thus, row similarity (using cosine distance) returns :math:`[-1.0 .. 1.0]` + :type post_normalize: Boolean """ - # call update here until it finishes - # nDimension + if force: self._updateData = Data() - - self._updateData.load(filename, force, sep, format, pickle) #load array of tuples + if filename: #not null + self._updateData.load(filename, force, sep, format, pickle) #load array of tuples + else: + if data: + self._updateData =data + else: + raise ValueError('No data or filename set!') print "Reading the new batch" self._construct_batch_dictionary(self._updateData.get(),is_row) @@ -461,32 +596,32 @@ def load_updateDataBatch_foldin(self, filename, force=True, sep='\t', format={'v nDimensionLabels=None if (is_row): nDimensionLabels = self._V.all_labels()[0] # get labels from V matrix to complete the sparse matrix - # print nDimensionLabels else: nDimensionLabels = self._U.all_labels()[0] # get labels from U matrix to complete the sparse matrix - # print nDimensionLabels length_of_dict=len(self._batchDict) i=0 + meanDenseVector=[] isbatch=True for key_idx in self._batchDict: #data in batchDict in form {key:[(tuple)]} - print "user:",key_idx i += 1 + if VERBOSE: + if i % 100 == 0: + sys.stdout.write('.') + if i % 1000 == 0: + sys.stdout.write('|') + if i % 10000 == 0: + sys.stdout.write(' (%d K user)\n' % int(i / 1000)) + if (is_row): self._singleUpdateMatrix.create(self._batchDict[key_idx], col_labels=nDimensionLabels,foldin=True,truncate=truncate) else: self._singleUpdateMatrix.create(self._batchDict[key_idx], row_labels=nDimensionLabels,foldin=True,truncate=truncate) - # if(i==length_of_dict): - # isbatch=False - - # If it's trying to foldin a new user who has rated a new item which was not used before, then foldin the item first then foldin that user if not truncate: additionalElements = self._singleUpdateMatrix.get_additional_elements() - print "dimension", len(nDimensionLabels) - print "additional elements:", additionalElements - print "length", len(additionalElements) + if len(additionalElements) != 0: for item in additionalElements: if (is_row): # if I am folding in a row then , the additionals added that shouldn't be are the columns to be folded in to the rows @@ -495,24 +630,54 @@ def load_updateDataBatch_foldin(self, filename, force=True, sep='\t', format={'v else: self._singleAdditionalFoldin.create([(0, item, nDimensionLabels[0])], col_labels=self._V.all_labels()[0]) + self._update(update_matrix=self._singleAdditionalFoldin, is_row=not is_row) + if self._shifts: # if not None then it means mean_center was equal true + row_shift, col_shift, total_shift = self._shifts + + + meanedMatrix, rowShift, colShift, totalShift = self._calc_mean_center(self._singleUpdateMatrix.get(),is_row=is_row) + + self._singleUpdateMatrix.set(meanedMatrix) + # row shift cause it's row for the time being + if is_row: + meanDenseVector.append(rowShift) + + else: + meanDenseVector.append(colShift) + - # #update the data matrix - print "updating the sparse matrix" - # print "matrix before update:",self._matrix.get().shape if self._matrix.get(): #if matrix not there due to load ignore it self._matrix.update( self._singleUpdateMatrix,is_batch=isbatch) # updating the data matrix for the zeroes , also for saving the data matrix if needed - # print "matrix after update:",self._matrix.get().shape + self._update(is_row=is_row,is_batch=isbatch) #Do foldin on the singleUpdateMatrix tuple + if VERBOSE: + sys.stdout.write('\n') + # UPDATING MEAN CENTER PART + if self._shifts: + sys.stdout.write("updating shifts") + if is_row: + values, named_rows = row_shift.to_sparse().named_lists() # values numpy array, named_rows normal array + else: + values, named_rows = col_shift.to_sparse().named_lists() # values numpy array, named_rows normal array + for vector in meanDenseVector: + valuesFold, named_rowsFold = vector.to_sparse().named_lists() # rowShift contains new calculated row shift + values = np.concatenate((values, valuesFold)) + named_rows.extend(named_rowsFold) + if is_row: + row_shift = divisiSparseVector.from_named_lists(values, named_rows).to_dense() + else: + col_shift = divisiSparseVector.from_named_lists(values, named_rows).to_dense() - self.update_sparse_matrix_data(is_batch=True) + self._shifts = (row_shift, col_shift, total_shift) + self.update_sparse_matrix_data(is_batch=True,squish=False,post_normalize=post_normalize) - def update_sparse_matrix_data(self,squishFactor=10,is_batch=False): + + def update_sparse_matrix_data(self,squishFactor=10,is_batch=False,squish=True,post_normalize=False): #update the data matrix - # print "matrix before update:",self._matrix.get().shape if is_batch: if self._matrix.get(): if VERBOSE: @@ -521,48 +686,34 @@ def update_sparse_matrix_data(self,squishFactor=10,is_batch=False): if VERBOSE: print "before updating, M=", self._matrix_reconstructed.shape # Sim. matrix = U \Sigma^2 U^T - self._reconstruct_similarity(post_normalize=False, force=True) + self._reconstruct_similarity(post_normalize=post_normalize, force=True) # M' = U S V^t self._reconstruct_matrix(shifts=self._shifts, force=True) if VERBOSE: print "done updating, M=", self._matrix_reconstructed.shape + if squish: + if self._matrix.get(): #if loaded model there is no matrix + if VERBOSE: + print "commiting the sparse data matrix by removing empty rows and columns divisi created" + self._matrix.squish(squishFactor) # updating the data matrix for the zeroes ,#NOTE: Intensive so do at end - if self._matrix.get(): #if loaded model there is no matrix - if VERBOSE: - print "commiting the sparse data matrix by removing empty rows and columns divisi created" - self._matrix.squish(squishFactor) # updating the data matrix for the zeroes ,#NOTE: Intensive so do at end - # print "matrix after update:",self._matrix.get().shape - - - def _update(self,update_matrix=None,is_row=True,is_batch=False): #update(tuple:denseVector tuple,isRow=True,, - if VERBOSE: - print "type of S",type(self._S) - print "type of U",type(self._U) - print "type of V",type(self._V) - print "type of data",type(self._data) - print "type of matrix",type(self._matrix) - print "type of matrix reconstructed",type(self._matrix_reconstructed) - print "type of matrix similarity",type(self._matrix_similarity) - - print "dimensions of S",self._S.shape - print "dimensions of U",self._U.shape - print "dimensions of V",self._V.shape - - invS=np.zeros((self._S.shape[0], self._S.shape[0])) - for i in range(self._S.shape[0]): - # invS[i, i] = self._S[i] # creating diagonal matrix - invS[i, i] = self._S[i]**-1 # creating diagonal matrix and inverting using special property of diagonal matrix - # invS=inv(invS) inverting with numpy + + def _update(self,update_matrix=None,is_row=True,is_batch=False,post_normalize=False): + #The function which does the actual folding-in process + if self.inv_S is None: + self.inv_S=np.zeros((self._S.shape[0], self._S.shape[0])) + for i in range(self._S.shape[0]): + self.inv_S[i, i] = self._S[i]**-1 # creating diagonal matrix and inverting using special property of diagonal matrix #if new is row -> V*S^-1 if is_row: - prodM=self._V.dot(invS) - if VERBOSE: - print "dimension of VxS^-1=", prodM.shape + prodM=self._V.dot(self.inv_S) + # if VERBOSE: + # print "dimension of VxS^-1=", prodM.shape else: #if new is col -> U*S^-1 - prodM = self._U.dot(invS) - if VERBOSE: - print "dimension of UxS^-1=", prodM.shape + prodM = self._U.dot(self.inv_S) + # if VERBOSE: + # print "dimension of UxS^-1=", prodM.shape if update_matrix: updateTupleMatrix=update_matrix.get() @@ -571,74 +722,30 @@ def _update(self,update_matrix=None,is_row=True,is_batch=False): #update(tuple:d if not is_row: updateTupleMatrix=updateTupleMatrix.transpose() #transpose - if VERBOSE: - print "dimensions of user",updateTupleMatrix.shape + res=updateTupleMatrix.dot(prodM) - if VERBOSE: - print "type of res=", type(res) - print "dimension of resultant is", res.shape if is_row: - #use new value can now be concatinated with U - if VERBOSE: - print "U before adding", self._U.shape + #new value can now be concatinated with U + self._U=self._U.concatenate(res) - if VERBOSE: - print "U after adding", self._U.shape else: - if VERBOSE: - print "V before adding", self._V.shape + #new value can now be concatinated with V + self._V = self._V.concatenate(res) - if VERBOSE: - print "V after adding", self._V.shape - #TODO: contemplating removing this segment and just reconstruct in the updating spare matrix function if not is_batch: #will reconstruct all at end with batch using another function if VERBOSE: print "before updating, M=",self._matrix_reconstructed.shape # Sim. matrix = U \Sigma^2 U^T - self._reconstruct_similarity(post_normalize=False, force=True) + self._reconstruct_similarity(post_normalize=post_normalize, force=True) # M' = U S V^t self._reconstruct_matrix(shifts=self._shifts, force=True) if VERBOSE: print "done updating, M=",self._matrix_reconstructed.shape - - - # myFile=open("prodMVSq.dat",'w') - # myFile.truncate() - # - # for i in range(20): - # myFile.write(str(res[0, i])+" ") - # - # myFile.write("\n") - - # # invS = inv(diag_S) - # # print "dimensions of S^-1", invS.shape - # - # - # print "writing s to file" - # myFile=open("invS.dat",'w') - # myFile.truncate() - # # for item in self.invS.tolist(): - # # myFile.write(str(item)) - # # myFile.write("\n") - # myFile.write("dimensions= "+str(invS.shape)) - # myFile.write("\n") - # for i in range(invS.shape[0]): - # myFile.write(str(invS[i,i])) - # myFile.write("\n") - - def printMovies(self): - myFile=open("movieIDs.dat",'w') - myFile.truncate() - - movies=self._matrix_reconstructed.get_col_labels() - for movie in movies : - myFile.write(str(movie)+",") - def centroid(self, ids, is_row=True): points = [] for id in ids: @@ -684,7 +791,7 @@ def kmeans(self, ids, k=5, components=3, are_rows=True): i = 0 clusters = dict() for cluster in labels: - if not clusters.has_key(cluster): + if not clusters.has_key(cluster): clusters[cluster] = dict() clusters[cluster]['centroid'] = centroids[cluster] clusters[cluster]['points'] = [] @@ -754,7 +861,7 @@ def similar_neighbours(self, i, j, Sk=10): _Sk += 1 current += 1 _Sk -= 1 - if _Sk == 0: + if _Sk == 0: break # We have enough elements to use return similars[:Sk] @@ -816,7 +923,7 @@ def predict(self, i, j, Sk=10, weighted=True, MIN_VALUE=None, MAX_VALUE=None): # SVDNeighbourhoodKoren class __SVDNeighbourhoodKoren(SVDNeighbourhood): """ - Inherits from SVDNeighbourhood class. + Inherits from SVDNeighbourhood class. Neighbourhood model, using Singular Value Decomposition. Based on 'Factorization Meets the Neighborhood: a Multifaceted @@ -901,7 +1008,7 @@ def predict(self, i, j, Sk=None, MIN_VALUE=None, MAX_VALUE=None): Predicts the value of *M(i,j)* It is based on 'Factorization Meets the Neighborhood: a Multifaceted - Collaborative Filtering Model' (Yehuda Koren). + Collaborative Filtering Model' (Yehuda Koren). Equation 3 (section 2.2): :math:`\hat{r}_{ui} = b_{ui} + \\frac{\sum_{j \in S^k(i;u)} s_{ij} (r_{uj} - b_{uj})}{\sum_{j \in S^k(i;u)} s_{ij}}`, where @@ -925,8 +1032,8 @@ def predict(self, i, j, Sk=None, MIN_VALUE=None, MAX_VALUE=None): # bui = µ + bu + bi # The parameters bu and bi indicate the observed deviations of user # u and item i, respectively, from the average - # - # S^k(i; u): + # + # S^k(i; u): # Using the similarity measure, we identify the k items rated # by u, which are most similar to i. # @@ -946,7 +1053,7 @@ def predict(self, i, j, Sk=None, MIN_VALUE=None, MAX_VALUE=None): bui = bu + bi #if self._Mu: #TODO uncomment? # bui += self._Mu - + sim_ratings = [] sum_similarity = 0.0 for similar, sij in similars[1:]: @@ -965,10 +1072,9 @@ def predict(self, i, j, Sk=None, MIN_VALUE=None, MAX_VALUE=None): Sumj_Sk = sum(sim_ratings)/sum_similarity rui = bui + Sumj_Sk predicted_value = rui - + if MIN_VALUE: predicted_value = max(predicted_value, MIN_VALUE) if MAX_VALUE: predicted_value = min(predicted_value, MAX_VALUE) return float(predicted_value) - diff --git a/recsys/algorithm/factorize.pyc b/recsys/algorithm/factorize.pyc index 79f7199fc03ca842f12de55634fa87558ea13eb6..cc7a947dcb9f76cedf3a58fec1a2f623f818422f 100644 GIT binary patch delta 9576 zcmb_idvu(|b)Wh6A+5C1>a~(qZ`+btKe3UG9mmGPR7-W4Mm4=o&8%0WS9$$vH62$o&1!Um*KbnO8`aDvHM$A?s2bg@lFiC#R}+}qqRuIG zR;k-Zx2R;Ra<&LIwv64Xl5NV_D%kdA?A0o1sSzvQvGj)iie4WEOCz&mIoG*)ET2m) z#5EB473^)!cs`pm_xj@PRTw$I6Z|@89HmCV1E8gnYs}YuyNhd-Q>l_cbf=9bjoeQebu;4@LoK~YX(yo$rtx8st%h8BRhNV$gS|M2@Yu2k|ZF#kbI_FcP z4YG2bOm8eor(UIeD%n8EUYl%EklD467-V&=#|KzM1L8VYp+=iz3M2+rqNUAvhF@@z zHgztfMzOlEbwg=sh}y44TezHu;eH*0#3f<%PT4P}l3gm8fmy8| zGG~JMohhXXN+lo}uK~&6w&E5pVK`2O8Xg{w*Wo)K;JwVxIypOPS}K0KBVPkVVj?@8 zOl3x$T-I@gB$YXt7%?wb+~3oJ@oPD$89-T$fmSPIh5TVFsOx+o9a=hCxldR1Vk}RM zaLt*`Y-IIZCAnsV&UjJO6vEw*afuA9nU#qc}F-BnaDMHQxlxpZ(iDQ zB52c&eqgHGo~^zHtvFe>JI#x2v2NK^q)EB1noh#<^f=n@F8ylz-ikVJRgMY4cJut& z!69C%9R(1fj%R0P=fM;;#U3WOh2Ylmcw$COKTnOCbn>>`Ds&GQYPPJKy!}#^gm;md zO*ZVi0pe6Q@08ownj|FjIq^lHfL={v9Ynb-P8dpj{T9|9Q9PA8~H z$rS;L9;fXEY?m@Gt#2zHfMUCpyu#W|dYs4STz((K$9)t4_6N?=S{1Z9t5w>f$6fTP zf={J=Y~N|AMZa1MD4cmQs1~-Xf~5xU+MpIIR3V_!epRU8%*9G}3zceLA*j-56)TC> zbGy`0Xigf`APuUsvL2+B4hbeeOuC9&jfis|j#q;(n@6=z0mymt*@P=zJkLQnd4dL5 zHv6QrU=NU(YMv1rDXOG1?c|+&15nA-$&{P&0)gl9^VF7!Tz1BjV`()SH}*<88P|zZ zSl*=%5lutQLHU#7d^~o_szouM9GjkpAEGUh!5zs2ZpoE>#Nx)L6RAuxHJ$k z2<_qOWs%ZwN+O;8Xpt`c{rapG6ctRHn>X}Vi*KYB*!P)7HhfXvZ?!XnJBgNR*~562U5vyufN6&?@8EW_gZ# zFAV|95${*I7R_}n4dctbSy~Dv94o>`xi$<9>5I4U%d#-6Sr(rAK*+0T@g;i{i>Q+rzA5fVFIpC#?(N)#cbwLOt@!-;qO^;cD6z)x#Z*4hcxsRJ~Z<+7z$>w@a zXXj5kJ?VKj-^2XEN%rJVI5Rz|jC-Cmi4x6~D2C&-P|tPI-%F$szj4FZG=ljQanl6* z3Dy$K5immAbjS87lh_*Fa+=M@2*gluHDxzAn~+=~PSq> z(2o_c9KMwUqFseoGWq(z4sPckVOlC|AN{ObDt8|8m5V+vtECBuAp# z&d$=6!LufNWL4j0(X6U-Bw1P~XO&*l+EW}LB% zghlSej=P)`cs&M5j!iq;Gn4rfu0%~TFztAK(UB9W2}U!YM=alcgSKUXysQi@OP# z?rq&rdpH&xJBdt$PSz*R1zCqUG^#7q0uY#5>JOLLoALZ~q~xcmZXs+^qC*c8pnsCA z;30^?kJzNP$Tct7MfR0lz>nKyl@_fN<(22;Ay{ttTzE?!g8m&koCE{JV^2A6xrvpg z5CFQYh0k+v@UF*c0?^9bt32r-oE;J@P?1The6^bJsY%2H&zG0a$g#6p>rB6}yF3eP zh0LDq>x+15!Y*)dJWpkV@L6T)xlie%b6pmK5!5EP*}{wV&|&mQ*jSQK?uZ-`-6p|% zX+w!9s&YjisiTTsBTO@0gzpEYxkN2;MiwW#g2y}IE+T5NPA%4}MbF3M_F7aSLfL4Z z{a-B@bh?!?O~(kCy*8>vFi`g)d{iwqtIReU{gtOc7_D-VD{IM|e49$Qt3snHMCnnm zIrbm`jxPye0xo7_L ztoOFeH=>ttQ<=$W=htSFNN>G5i9Nf#b z+>A-@sI9)h;TH)K=94?N6(tG0%#(&OOh@S?y~D~`aE=jrMCss|UZ;v1(dkN8a!va@ zaUAe6{u_`b<@WWS8F8LUGSW zXV#(?;Y^USJ|{F4^uMsuq`wHqUZpZHBTNk~qXi356I(5Y29FLm5SG_y4GVd28Vjez z(Nx0=W-Pb}7!ln5E>Pj6mYdS-2URmBGRvO*NXXr?=wo02&^tAw5gI!5mta9A-oOt@_rGmZd5F4CqEq%s?X z=5k`3X%%sa7GLgpIXTGTVUPsXQ7_@@zyMDR*L+?AM4cBPz$Ey&R^~*>4~V$1Uln78 z`f`B4W|v*UssOP(4Qt8FSShUFJeaj|qh44c4h2MH=xCq>DI9naP54&Ss&V!5IN1^Z$`~%H z#NG400uI&5i%S=~_{C@yQ;Thk5C|`&u}g;*4l`g4-Zh{W5re?1x4<${hg$4Z`EHf& zQ3VL2qa2~oM%Y@T3ZSQYzT!Jd5%v1>YgKxkBgAbIrg}w{8YE0{>_bD)W5`89UMWnq zl%rH1MMjm5s{DG^@|n;mpN8d+#9NnM+EZ(Fu>3Zh?Ln*=<6DaLGJ4J)#D_KJ;@-VQ zNfuK{kV#EzYwBHJsaZ(XT?N0zYFtmeFWGZr&%Ep8HdNWa2aNqCf)@#1BKQiy1pslI zDK~L^EI)oiGSezI0=z^iM7|!Ee&4H&x^wfzlzXBSNwFCsDZvuKR|(|7_Lpo)bd+Ku z!>s)W1X4?s*WF*R%h-9Px}^Pa4*U^;1lpI``eTBN>Jxw+Y%+4%e?l-oFh%fnf=i>Q z{R(m4Ab5ZPiiDDh`P+SKiuN;T$LaG|#ZHM9450Qm3BE<}rv#K$DRi>+Z2*aD-lHLr zok*a1mA8k8r~DW~2U#gARn+*XNVy_q`%Sd%!`~siLIk$|j03L{yhiX{f`5E-y`zR5y=X@2Y7wunm=9UpH9V0}K zy4km(0<``HdH8ng7OPt~S-q=*Z(X3Sf)C+Fs|~Xotq^8Q5XKBtW_7*S!?SptvA6s= zh_?IJ0E~}Mse&2VfAbo|PPkRXV`ge}Ca82EmZIT38Y;{;_pjLr2kpIZfJYp~qwNho zfOB7CXj)FFF$k|NaaPa0p-k<-Ks7Tx$%0U2zI|Y>nweCozF^)O=ox11N-7WTi7}*l z2RJOs$h5>$ukWL^l0--)650>tzhR8OiP?WcN)jQHLJ7OtehcllDfk|9@%pwRq(RA7 zYLNz$3Q9zz!8!xaiqNjqRHe!-mfrP5%4-g3L+p}{mq-JJlP&C*H{NPm_^iRZdRbfZ zW)t8y_@CIk;!=JI`S!;EL7eb^sDiB)cNoe z6f=t4kfciFOP*-UN`Pzy!5Uf>V6p+NDwfiU(t{4cygVZ77)=#gwa%Pj5W{_pNbzVm zh09W@5AerrMC-VN6x!)2`^3#XH}4ZCSW+@jxGV}JvzZc65uTG&@g|_{bp+xI&#=Wp zl=qw2KciJDoo6P!U#ui?PiFI;vlK5%Pbf8vd=Ax&S^n}RUX68Iub%cH4PJXmO?cVa zdul7@F=S33`s|*ISmI~Y+O24)+KAq)JFPxHa&3PUFk(gE93s9*An0$chz3~KXt0{C zm=2ovhCV+a4*e9_5GOyt7Cy>uAb6g@N3b6tP7f%aR*slTy7mS0)~`Hc-Q!!z+}Q39 zZs+&~^Y|?vZ$E^lymxTmWOhbw!+wwB@0fRP*>kL!P5Ft7-f&9lI#-M&10=kROB$nB zY2xI>R4S9o+icpq%&A*9M(GK>T4gVhKP1?0{_xhTtKVVs-w56{Ke=^-9x`o5I)gtV z;yttP$m!ru(46v_i$@OX_9yJkMY;19j&C98Cb*kGyp7zNxSVH*gQY54iknc(QpNKz zww@;V4FU;zud^jF_8-}z<3V5q5CM?|?{$Uh?F+f_k5NpU@o~2S1Ve}f9idQYD12Qc z7zwrC6b^&~p$(z>NOQx^aC`sJa7V9>o{hFf(@P&4`JS~|M6KbnY@S1|E-^B-^2?gt zM>JGOjZE=(;vmVs;amE`ZH;;zt6MTPc>=XP+mQZA zf(q;)?`=0VlfX|(uwE0nJKE;w&LIv~fox^aeu_@NLT={oeCA7c@9q6RuJZwb;-rCI8nP*)xOdUp^n3$R0laNggL&?-V%kD*VE?7^nzFuC$fRky*)tlXlGG>7Eo`@UE3PJ@&2QyDsSlW*`D>V_*0KI$ NHGiP^_mSp1{vS;i@lyZ* delta 7122 zcmai2dvILWSwH8lcBR#8rIq!Tt(7gy>$e>zA%4WNYdf|RTTQQGJCWmv)!r+66=`?9 z_sW)2sq54>P?EMOCw-;FrA<0h$V>>`(hf77G-(SnP=@jcWdfuvWD+253ZN8datRolzL!fKsjy79T05$Ja&z8I+VLcu$}YRq;f1Zoa`$7 zQh!OCS3@hitFg*^xY9#fDOafx@CLM$(`!1b?kM&uw?;V;b=guQwaTdmo_}tg;L)y8 zmm_MVUhq+MS*wu-Y1c}-u}tsO35S?+>ML^_)a8&GX%aq-Dicx;Mw`^_PP202__T1h z#oL3G0J&wX)GQO)gnRt|p1f!-6oa{7Da%7O+|M%ENeu;Zpd(1rs+=w=YXlm_w!2ji zH`K129_1_-)C!^NQ1);#QTli|ul2NvRQs!-wBe)$MGhr-ctb-&$wqwrFm*efcMEol z`Tgo=Ot$9Ho>t&)AwCYEtd?+x6}6&O^;SeTm15C5wN99$wYl(Gj2DpH-EKb7 z^q!S%K-irG9Rytfehol2HRVobJm2m&zi3(=NpP^wtZrVXH=28!pA0Vt;#ld;=BLB$ z>tzhU=gFrEY0kAbn5WzCS$iwd6gcC%Q=YvNEqhf3m!8aduDz4EcGKL^vF0XnxLLu^ZMH=6rb?LWMX{T2YxU^+LIpN5!C#NJJC55bYj_~=y1 zFJxwX8pUbXx8(@oXE;6c#;Vf~%x^C^S8m6qZP*mU9sx);u$3U#NRTFgxhY%NP)0EV zFLllx&rG`ZK8|fNU+CYW_fSV`$AP$pe{UCnYHov_)$4X0(QTnvD2{JLx9ASt8Zu{A zPp+r-{3t*ncYc)C;VjbA0S?kc9&rmf&*(KdlO1T*>&?Z1k^cFa>D=V?Gkt^?HDUYa zmjjn#ThQ@%R0>Nk)1|rfIV-~1_PF`U#%*;8Ho262-fY?QtiH{Bchky78Z6bw{dwOp zPpt1OBAU&G)oG>XB5F3I3J29JRtc$zD#pfo^dYRH)g`Sys8v>}*(z1|kdXRINGC8~ z$R?^e6YNL~UNyweM${!)DZ5?fzEGaq6|6H6y>YU6d2>r_HmoM<)U3tzhm)bwH@1Al zvb=)%i*3{Cp=3Qb80?xYu?%cQ*=Go*2s{9vbX51Kn}sR4o;}T>a|AOS=UMr={HS==fTVW^W1&N|H)L|5ZiPN&y0tG6Gx^*PZ~m6`}ssW`08xf(k6Yz;MX@KDes zKIm3s#9ZCpJAl|*MelXOY8D5gLpnPsyz0xmzz3wC+um6eZr4*|lJ|-Z&TpbYPBhBo zbr3#VSsa<`;A|99H~_>)%R*I}kn3UC)GK=CDGu&G^-(ofOIs*BDy$n74t_VNz>#~H ztySY8bs7m2GQo5zvIhIpI=fBKGL5sqtH!*$W1xtSr~|3IfYoT6>gW5)=mQquo1`Oy#-L4DE}PE zr5vyH%B~r$ubB7kURg7gWKOc<=Hl*^D;VK@2JF=2qH!_T+pTT7KNQh5z_wYXi+jJTx9#GhG{Ae@ z3kM2?Tp`IY6)ejF$L4|(vLALYjAon}yRvMR`RTsyt)JntjP0t90Wn(D0ITc4a?Lss z5(|%EjJ_roo~4CvDdSaT60G%|kBY;e+Dv zl_oYs-)gexTMdD4HHwn{NOl*a*dU?+(9>*V#kIfydZ{Vvu*ktndfvC@r-4^6bBRYT zRy}8tRg;2mumN`w2?FOk59e#L;7O=TgA;BppKW1}>>k~?+Q*4FfhILuhaEzW!1cgX zc$Ej}&Cjb7Bt{9^MCFT9Hp407rLP=nw9G%;z4NF?i=Fn|!VZbG_HUwVKSuC4!G{Sx z0w89Za-58x$z@ZM33t++LT>PO*dHay69nSUMRACXH#(N`(`SZ~Qo4wPe~dK0Meq!P zc(5cOsp<@IQkm>z>vssmzdz2F`1~i>`Xs@p2qbTP8ZDm;0>2;iOzueY<|O@QvHP?8 z6v1Z*ew!c&25dcTUOdv?a{x{8)r=>i+n?vyRf3Aw_MZ7FJ@yE2s(}HbM~AIWD;#9A zPzyo>{kREXqZZ!{p|#bZ!tX*D>DBF4FY1r}s<4EY7OTrjhro~jJPYykKQ(8EPV_+< zCK&`+93~GEPKo4%2Aqxq=FOqL4d~(zg#<$LA1mKdg)d3ksl;szA|y+pq0QiX_tc%l zD)wfAP3DJor+MbR_t(|48x#U2a&O{3mIzXFd1q1xyetu=dsk`_6}g;?6pL~lU{&Rv zoz8e?7OFO>AOhrFskEOf*w3;qEPesB8j0LnfrdgnrOIlcvWSjbZB{3~t)j*non{rm z;5V<9A7i4AZGM^c)j^bZ)P+Yk*X-wk5V&x)2^0=&xGXVhrML`iLT?8g}QG~Dy|3pUPAt+UZxID%>#Fxr^cE)8F zb5&|)JBzO+V~}D0sXi8+0U8xvCt_R+>Pj^6hsnFnz71c;*&h}|!O93QvTKx;R9T+s z_~0x`7m?{~UT4<`KgfI|X(LF>l#DJ~6|QJN6brDjxoVY-$y|iK3I=O?zpD|b5!}i& z!CAei8O=CAT@=XaO4S3C`Lk@XTcB2wX zy|-k-#r;9W&sH`ekq&}Z94*cZ5<(hDS5XN5?}G~|?>+!10%-topZ8>hXT> zf)W7{W|0OEtSmLNn%Cj|r}GWW+Zj%Bi6OD1soc4Y zdw9RM3GQPb;(8QgLvlpkdu~NLD3K&p(z|dNqkM%3ZY}a0BTrevQxq!~l&K4M;l(6q zatTiotE`5%jqyI-T=IoX);}tvk5JRR5GiP^7TRqvnfsm!DkTrt;0aQsaqVxB(@SP}WXHZ`Y|{9J=;0|BmUpwKDO+R|J717nY;LFd%aLVzn|WhoT`NnA z;69ij->(yFH2o(x*S*B%%LLyv51m}A518LLxjga>BEDswJ9#1UZ8Se@nRO5D)14;! z;D!k)F8T0juO;A%q`YnghA+=v{ltkgjI$-L9DLxgFB1GZ!Dk7cA^0Kz3z(q({2E)L zh;N(5Q`y_713IsVl1xRSc<}0qMx%%8cgG^Jn(n>z;rei_E#A9jYkkiO9X}lFh^L}8 zr9V9NXVyy5;j83Fapg=#haX<3rtq@*xnIpkIQW`XYEQN3RmUrY`CNXH>mDGJ^90{l z=IB^Y_4nBRfin5A4_R+orT-dh4i^RY1_%D0fUJV2AUPtsKwv72IQAn1? z>8aIq33BKo_`WPdjShy)9oavxdkal@O}7&!mV3lH8ZwXN9^Ck^K#OIcOHEGWA?(MT zOvNm6Yx-5{+-dkwWB#IbJY=58zrOkkkio;e{U4kog8qmTMXd?q-sUSrPXn5W)T4Br zQ9N{c4}zgbDphShn@^iZ3xylliP8QGNUmqsvV8N2^U@CyR5wQbkX+{$BO9*^)c*cgq*BK6VwXsk6U@kXrG=9Ie$wwSNZoz#C~ zT0ZcdTb~EQ=WbDMc^4$bq^5F?JIQ2|vg4%RLBN<|OJKML6Q44#7vp+|`C0LnI^@l^ Mch$S row , no-> column + :type is_row: Boolean + :param force: clear the values in data + :type force: Boolean + + + The following parameters are used for when generating a report of the dataset distribution: + :param data_report_path: path to create report in + :type data_report_path: String + :param id: id number to be given to the report + :type id: String + :param ignore_rating_count: shuffle dataset? + :type ignore_rating_count: Boolean + + :returns: a tuple for train, test, foldin + """ + if force: + self._construct_dictionary(is_row=is_row,force=True) + elif len(self._tupleDict)==0: + self._construct_dictionary(is_row=is_row) + self._remove_ratings_count_from_dictionary(ignore_rating_count) + dictKeys=self._tupleDict.keys() #users + numberOfKeys= len(dictKeys) #number of users + + train_list =[] + test_list=[] + foldin_list=[] + + if shuffle_data: + shuffle(dictKeys) + train_list_keys=dictKeys[:int(round(numberOfKeys*base/100.0))] + if base==100: + foldin_list_keys=[] + else: + foldin_list_keys=dictKeys[-int(round(numberOfKeys*(100-base)/100.0)):] + + for key in train_list_keys: + tupleList=self._tupleDict[key] + lengthTupleList=len(tupleList) + if shuffle_data: + shuffle(tupleList) + + train_list.extend(tupleList[:int(round(lengthTupleList*percentage_base_user/100.0))]) + if int(round(lengthTupleList*(100-percentage_base_user)/100.0)) !=0: #if test=0 then can't take that percentage so skip taking it's tuple for test + test_list.extend(tupleList[-int(round(lengthTupleList*(100-percentage_base_user)/100.0)):]) + + for key in foldin_list_keys: + tupleList=self._tupleDict[key] + lengthTupleList=len(tupleList) + if shuffle_data: + shuffle(tupleList) + + foldin_list.extend(tupleList[:int(round(lengthTupleList*percentage_base_user/100.0))]) + if int(round(lengthTupleList*(100-percentage_base_user)/100.0)) !=0: #if test=0 then can't take that percentage so skip taking it's tuple for test + test_list.extend(tupleList[-int(round(lengthTupleList*(100-percentage_base_user)/100.0)):]) + + + + length = len(self._data) + if VERBOSE: + print "total number of tuples:",length + print "percentage of data for training:",round((len(train_list)*1.0/length)*100),"%","with",len(train_list),"tuples" + print "percentage of data for testing:",round((len(test_list)*1.0/length)*100),"%","with",len(test_list),"tuples" + print "percentage of data for foldin:",round((len(foldin_list)*1.0/length)*100),"%","with",len(foldin_list),"tuples" + print "_____________" + print "percentage of users for foldin:",round((len(foldin_list_keys)*1.0/numberOfKeys*1.0)*100),"%","with",len(foldin_list_keys),"users" + print "percentage of users for training:",round((len(train_list_keys)*1.0/numberOfKeys*1.0)*100),"%","with",len(train_list_keys),"users" + + if data_report_path: + myFile = open(data_report_path+"/data_distribution_report.txt", 'a+') + + myFile.write("DataID:"+ str(id)) + myFile.write("total number of tuples:"+ str(length)) + myFile.write("\n") + myFile.write( "percentage of data for training:"+ str(round((len(train_list) * 1.0 / length) * 100))+ "%"+ "with"+str(len(train_list))+"tuples") + myFile.write("\n") + myFile.write( "percentage of data for testing:"+ str(round((len(test_list) * 1.0 / length) * 100))+ "%"+ "with"+ str(len(test_list))+ "tuples") + myFile.write("\n") + myFile.write("percentage of data for foldin:"+ str(round((len(foldin_list) * 1.0 / length) * 100))+ "%"+ "with"+ str(len(foldin_list))+ "tuples") + myFile.write("\n") + myFile.write("_____________") + myFile.write("\n") + myFile.write("percentage of users for foldin:"+ str(round((len(foldin_list_keys) * 1.0 / numberOfKeys * 1.0) * 100))+ "%"+ "with"+ str(len(foldin_list_keys))+ "users") + myFile.write("\n") + myFile.write("percentage of users for training:"+ str(round((len(train_list_keys) * 1.0 / numberOfKeys * 1.0) * 100))+ "%"+"with"+ str(len(train_list_keys))+ "users") + myFile.write("\n") + myFile.write("________________________________________________________________") + myFile.write("\n") + + myFile.close() + + + train = Data() + train.set(train_list) + test = Data() + test.set(test_list) + foldin=Data() + foldin.set(foldin_list) + + return train, test, foldin + + def _remove_ratings_count_from_dictionary(self,count_threshold_to_remove): + ''' + :param count_threshold_to_remove: The threshold number of ratings to be removed from the data. + :type count_threshold_to_remove: int + :return: void, it changes the data itself in the class. + ''' + if count_threshold_to_remove==0: + return + removed=0 + dictKeys=self._tupleDict.keys() + for key in dictKeys: + if len(self._tupleDict[key])<=count_threshold_to_remove: + del self._tupleDict[key] + removed+=1 + + print "users removed less than or equal threshold count=",removed,"users" + return + + def _construct_dictionary(self, is_row=True,force=True): + ''' + + :param data: Data() + :param is_row: Boolean + :return: constructs a dictionary with the row or col as the keys (depending on which is being added) with values as the tuples + in self._batchDict + ''' + # self._values = map(itemgetter(0), data) + # self._rows = map(itemgetter(1), data) + # self._cols = map(itemgetter(2), data) + key_idx = 1 # key index default is the row + if not is_row: + key_idx = 2 + if force: #construct new dictionary + self._tupleDict={} + # collecting the significant col or row tuples at one place to fold them in at once + + for item in self._data: # data is a list of tuples so item is 1 tuple + try: + self._tupleDict[item[key_idx]].append(item) + except KeyError: + self._tupleDict[item[key_idx]] = [] + self._tupleDict[item[key_idx]].append(item) + + # batch loaded , now need to fold them in one by one + if VERBOSE: + print "Dictionary created successfully" + def load(self, path, force=True, sep='\t', format=None, pickle=False): """ Loads data from a file @@ -104,10 +272,10 @@ def load(self, path, force=True, sep='\t', format=None, pickle=False): :type force: Boolean :param sep: Separator among the fields of the file content :type sep: string - :param format: Format of the file content. + :param format: Format of the file content. Default format is 'value': 0 (first field), then 'row': 1, and 'col': 2. - E.g: format={'row':0, 'col':1, 'value':2}. The row is in position 0, - then there is the column value, and finally the rating. + E.g: format={'row':0, 'col':1, 'value':2}. The row is in position 0, + then there is the column value, and finally the rating. So, it resembles to a matrix in plain format :type format: dict() :param pickle: is input file in pickle format? @@ -120,7 +288,7 @@ def load(self, path, force=True, sep='\t', format=None, pickle=False): if pickle: self._load_pickle(path) else: - i = 0 + i = 0 for line in codecs.open(path, 'r', 'ISO-8859-1'): #was utf8 changed it to 'ISO-8859-1' data = line.strip('\r\n').split(sep) value = None @@ -140,7 +308,7 @@ def load(self, path, force=True, sep='\t', format=None, pickle=False): value = data[format['value']] except KeyError, ValueError: value = 1 - try: + try: row_id = data[format['row']] except KeyError: row_id = data[1] diff --git a/recsys/datamodel/data.pyc b/recsys/datamodel/data.pyc index 176bf003ceacb312e03caca2d384a9ec88644cf4..cae0024d2cec36ea7ac9f0508473dc791dccbe96 100644 GIT binary patch delta 5076 zcmb7IO>7)l5w16Xo?nmu#uKwi+}`|6*eEE1-30G{)~pj{ah6WNyU~W(_H^5BXWV0V zze!?gOs`0^;slC1H!cebt4LfT`2aVLhzqP($c-b5#EI43KuCO5J!6j@hlTOH=lAMW z)vH%kud3es?Dbze+2j|c{J%zq-o2OoQ$9yT=h1?{-23c$c$er}k?cGWmh2I_o1$x@WRD8+m;lGC(UqD4!dxxMXeo3}H_$qbLEW~r zrSm+}?X{*e<2H0OnD`*eSd=cS=zijtY9M+oc~gx?ze=7TVjY-FAUw^1=uz5^R#OFa zFxp7H{j4lAXW3`23>!*tTb|`_7|T}RozTXD<9J5HcP#DL#)hL$8NjoG3y$HpJtJ6ay1K4? z%k}Eo33Pq2)wEsDSakhB8+sYE8oaC}AjI0pU@sG`Ux`zLm@heC#6r|rh;x7hj7ut~ z%4%yq`*uR*qKEktsvLcipMVzrp8u(OdF#W%IbQfp>6yYfuxnX#FwI{}pM;k&493t= zA)|)CWl9|fw-UF6`x4;r6ru(j?f@E-isqij#+@|zlU@BmbDb2~3D)8rN+_>JIw5OG zR;it&8wv7fcVXY5P^qq_Q`E^aqGEF-X(bIVw3!noIbPVzvjcAyq%TTeqE3?h`=m3} z5gcI#ngP;T>g1`TXv|Jg1?2b|n}ZCo(;~`mnMJr+ChrJSZ_{8INC*1p3X{OP{fIh) zG;{qSZ4Pyh)fu9R&0&#nSR@<~d5$QSM<60kfmU)n@9qGk#I{r+i-o4P7kT@{uI*WM zIOgZnDemR;BywSrxvhUr_|CJ;<^L*kNSm?HkM{^&+PhVs(Eq#457d{)9ExGOLO)RJ z57d0uYx1JeQPKtS?2A6&SG4DmA2_h{dD`02||}_koPQa)0Ves^AHsx_vpiZc?D}0As@8E+q8pE zqwOaZq?nZNQ-=J{(z?h4B#SEY#GM3fj!|cfR*+Q2NaH39fN@Aw_LnX^BXA(AUEm4> zdN{aRgM7QT6}-m+Obf}XFhZ3vdv*|bVJ~0UzsI=P$9R{Gy1Lqjv7qhGc!zJPkM9#U zkjOttE$7xI8ex3kep<)%BL9yX^k)s#F`tqzt*`hI&~fh&f4B@H9q9;LImD z{PfM@{64KqK&khC_F>!(96vBZma#|1<&l7csVDY`(RZCFI0US(@kvn>1lo5O+FYL0 zeP^xZ>(lzSmg>iPE*Jpf4Dc?_Ob1*A<@IamS-`Mg%Kvv<8vz$xVOEWn2MO8@E%ixU zB#DZz`fbOQ;*Sljk1?|EthR1Cb>Gr%=q&{xZhI(>{MPDT>Ko2RV6wy}Ys7q(JFZzw zR+Y)coNUrsgXI}?%XhV778uUcMza+-W)dS)${(&{;);_?$OS8KLe1=UOkQVTot14K zW#s3$JB1FO9v7Wm_14!cxDHF;+Ip1#EqN_JK9si<9|^${G^p1u;TQ z=Ec=lH{Nucj#s8BSpa8IY9e*>gNh}YllbMb-v(#U#dkOM zdfR9^L4e}l@(f&kXT6Pxrk8yuSjOYPz*U&{^vfob$+OJlYh%8^9oty=$1Xg9X?%0G z$?Lk+y8U6DVU)Q?M|q?8itV?>{5IR-dObcF$S!^ zB&#IAI};T}Qr=0Z>reC^lyGjM+bg0i1*O(msT&7(vULP~PY{Q4P6X_UBm z@_#}~DMaY_MFs>$ereaNj}nCMpv~j%`#`4I9Mj5B>&$^LeBDnO?JeYgIDB zbZBrGtexDu7$=}<<9w@y_nWt~>^r*cd(*H>H@*di)&CB>Ofh++w@g{LaQ)>_OsHY-RHqbP}i#lfLEq zEnmXwj-T~~K=vt|s_xpiCF~;Bt|DSsC-|lP7%lh;I*tr{voorg98yVj5Y9EMjATY_ z{c&(TnfP-e`sl!+t%Z?iRP=+PlOQ<6OYo13>f~Wg0CMc6>p3Q8J(DFixgIblqwCdm^~Kg7s+ZK!1%@CR zkogXGETkl?onJi6I^V_ta~?}q)Yij^)kNXDjP^b5&PB$-N_{u2oTTl!jh5}0eD&ij z$3btU<$C5fG3Y+;d9ltN%OSdJwROL&yjFw&cJ%Lq*~EJ)8aVXN(+;3>HRc!7@yKqk zuEnXA&#+hgQ4=?oK7jwlzp@O>;a8_$96sf8+A~F!oL`zlI@DRZ-W5TqAHkX(B8Ry~MN75@P}TET;$7w64JkskKPoA=)Q=J!5!n>{@r^8JW} zx8D4`vJv_a4gkQU_-dOQU$0Iv9jq`k6;fw)opGS^Mi<{9^QRJm(FM2;aG$_!fII;UVBR3YpEDGU`UR9rHP=N2dCPQ7UR3U? z7=r-+t~@6yf9zW$NuKae#zMkNH35=>syrJi@EyNSdiY2G+(8*eB1xOGD@;Bj^3Dbt zq>pb0W;*uYw$%tZ!q?SLQT6XdR6!HSt+_totT8vm-vw8Nlx)a~BEuNErAU8kPn%On zh-dY5Pp5EGDFDgLYL!(i1J6RjJ$;jmxB4REN*Z-a=2FEg+_CEwi{e`}DZr40sC+&8 zo?PO&SZ`}@VAV%Hx6HvagbeUUN4ErKQy0iUtCD&9a7f2#Q0|tmT(kgZ5)?SinrhVz*?R_Ms08 zS}NP7Yea-a|MjhEx Date: Thu, 8 Jun 2017 16:29:06 +0200 Subject: [PATCH 4/7] updating comments and documentation --- recsys/algorithm/matrix.py | 4 ++-- recsys/datamodel/data.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/recsys/algorithm/matrix.py b/recsys/algorithm/matrix.py index 36e0313..253a07f 100644 --- a/recsys/algorithm/matrix.py +++ b/recsys/algorithm/matrix.py @@ -85,7 +85,7 @@ def get_rows(self): #can use to get rated items and remove from recommendation def get_cols(self): #can use to get rated items and remove from recommendation return self._cols - def get_additional_elements(self): # can use to get rated items and remove from recommendation + def get_additional_elements(self): # can use to get additional items to either fold or truncate return self._additional_elements #row_labels specifies the row labels the complete matrix should have incase the inputted file doesn't include all indicies and it was saved in previous matrix (for update) @@ -131,7 +131,7 @@ def update(self, matrix,is_batch=False): #isbatch is for creating the final spar def squish(self,squishFactor): #remove additional empty fields created by divisiSparseMatrix self._matrix=self._matrix.squish(squishFactor) - def index_sparseMatrix(self): + def index_sparseMatrix(self): #create the divisi2 sparse matrix from already existing values self._matrix = divisiSparseMatrix.from_named_lists(self._values, self._rows, self._cols) def empty(self): diff --git a/recsys/datamodel/data.py b/recsys/datamodel/data.py index de4a5e8..6919229 100644 --- a/recsys/datamodel/data.py +++ b/recsys/datamodel/data.py @@ -125,8 +125,8 @@ def split_train_test_foldin(self,base=60,percentage_base_user=80, shuffle_data=T :type data_report_path: String :param id: id number to be given to the report :type id: String - :param ignore_rating_count: shuffle dataset? - :type ignore_rating_count: Boolean + :param ignore_rating_count: The threshold number of ratings to be removed from the data. + :type ignore_rating_count: int :returns: a tuple for train, test, foldin """ From af7c0a210dcc9414dddae2bd53dc45e63b03ca24 Mon Sep 17 00:00:00 2001 From: Ibrahim Abou Elseoud Date: Mon, 14 Aug 2017 15:42:15 +0200 Subject: [PATCH 5/7] Updated the README.rst to include the incremental svd update --- README.rst | 160 ++++++++++++++++++++++++++++--- recsys/__init__.pyc | Bin 556 -> 0 bytes recsys/algorithm/__init__.pyc | Bin 251 -> 0 bytes recsys/algorithm/baseclass.pyc | Bin 11617 -> 0 bytes recsys/algorithm/factorize.pyc | Bin 32828 -> 0 bytes recsys/algorithm/matrix.pyc | Bin 8411 -> 0 bytes recsys/datamodel/__init__.pyc | Bin 226 -> 0 bytes recsys/datamodel/data.pyc | Bin 12984 -> 0 bytes recsys/evaluation/__init__.pyc | Bin 269 -> 0 bytes recsys/evaluation/baseclass.pyc | Bin 4781 -> 0 bytes recsys/evaluation/prediction.pyc | Bin 3802 -> 0 bytes 11 files changed, 147 insertions(+), 13 deletions(-) delete mode 100644 recsys/__init__.pyc delete mode 100644 recsys/algorithm/__init__.pyc delete mode 100644 recsys/algorithm/baseclass.pyc delete mode 100644 recsys/algorithm/factorize.pyc delete mode 100644 recsys/algorithm/matrix.pyc delete mode 100644 recsys/datamodel/__init__.pyc delete mode 100644 recsys/datamodel/data.pyc delete mode 100644 recsys/evaluation/__init__.pyc delete mode 100644 recsys/evaluation/baseclass.pyc delete mode 100644 recsys/evaluation/prediction.pyc diff --git a/README.rst b/README.rst index 27e9d66..1444df5 100644 --- a/README.rst +++ b/README.rst @@ -4,6 +4,19 @@ python-recsys A python library for implementing a recommender system. +- Now supports incrementally adding new users or items instead of building the model from scratch for these new users or items via the folding-in technique which was mentioned in Sarwar et al.'s `paper`_ (Titled: Incremental Singular Value Decomposition Algorithms for Highly Scalable Recommender Systems), this latest commit is simply an implementation to it for python-recsys. + +.. _`paper`: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.3.7894&rep=rep1&type=pdf + +- A `Demonstration video is available`_ for this latest commit in form of a demo site built using the MEAN stack which uses the updated python-recsys as backend for the recommender which folds-in the website's user in to the SVD model and gets recommendations instantaneously instead of building the model from scratch. + +.. _`Demonstration video is available`: https://youtu.be/tIvQxBfa2d4 + +-There is also an accompanying `bachelor thesis paper`_ (For those interested) which outlines the background, architecture and discusses the "Folding-in" approach. + +.. _`bachelor thesis paper`: https://drive.google.com/file/d/0BylQe2cRVWE_RmZoUTJYSGZNaXM/view + + Installation ============ @@ -57,8 +70,8 @@ Example from recsys.algorithm.factorize import SVD svd = SVD() - svd.load_data(filename='./data/movielens/ratings.dat', - sep='::', + svd.load_data(filename='./data/movielens/ratings.dat', + sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int}) 2. Compute Singular Value Decomposition (SVD), M=U Sigma V^t: @@ -66,11 +79,11 @@ Example :: k = 100 - svd.compute(k=k, - min_values=10, - pre_normalize=None, - mean_center=True, - post_normalize=True, + svd.compute(k=k, + min_values=10, + pre_normalize=None, + mean_center=True, + post_normalize=True, savefile='/tmp/movielens') 3. Get similarity between two movies: @@ -111,10 +124,10 @@ Example USERID = 1 svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) - # Predicted value 5.0 + # Predicted value 5.0 svd.get_matrix().value(ITEMID, USERID) - # Real value 5.0 + # Real value 5.0 6. Recommend (non-rated) movies to a user: @@ -152,7 +165,130 @@ Example (4801, 5.4947999354188548), (1131, 5.4941438045650068), (2339, 5.4916048051511659)] - + + +Example for incremental update +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +1. Load Movielens dataset and prepare for training and testing: + +:: + + import recsys.algorithm + recsys.algorithm.VERBOSE = True + + from recsys.algorithm.factorize import SVD + from recsys.datamodel.data import Data + + filename = “(your movielens file path here)” + + #In movielens dataset, the user is at 0 so I made them the row (could keep it as above {'col':0, 'row':1, 'value':2, 'ids': int} but I changed order to emphasis a parameter in an upcoming function) + format = {'col':1, 'row':0, 'value':2, 'ids': int} + + data = Data() + data.load(filename, sep='::', format=format) + #splits the dataset according to row or column (based on is_row=true or false) which causes there to be no overlap (of users for example) between train and foldin dataset + train, test, foldin = data.split_train_test_foldin(base=60,percentage_base_user=80,shuffle_data=True,is_row=True) #since users are in the row so is_row=true + + # Returns: a tuple for train, test, foldin + # Prints: (If VERBOSE=True) + total number of tuples: 1000209 + percentage of data for training: 48.0 % with 479594 tuples + percentage of data for testing: 20.0 % with 200016 tuples # 100-percentage_base_user per user (percentage of tuples which means the ratings since a user has many tuples(ratings)) + percentage of data for foldin: 32.0 % with 320599 tuples + _____________ + percentage of users for foldin: 40.0 % with 2416 users # 100-base= foldin (percentage of users) + percentage of users for training: 60.0 % with 3624 users #base for training (percentage of users) + +2. Compute Singular Value Decomposition (SVD), M=U Sigma V^t: + +:: + + svd = SVD() + svd.set_data(train) + svd.compute(k=100, + min_values=1, + pre_normalize=None, + mean_center=False, + post_normalize=True) + + # Prints: + Creating matrix (479594 tuples) + Matrix density is: 3.7007% + Updating matrix: squish to at least 1 values + Computing svd k=14, min_values=1, pre_normalize=None, mean_center=False, post_normalize=False + +3. "Foldin" those new users or items (update model instead of updating from scratch) + +:: + + svd.load_updateDataBatch_foldin(data=foldin,is_row=True) + + # Prints: (If VERBOSE=True) + before updating, M= (3624, 3576) + done updating, M= (6040, 3576) # Folds in all the new users (not previously in model) + +4. Recommend (non-rated) movies to a NEW user +:: + + user_id=foldin[0][1] #returns userID which is in foldin dataset BUT not in train dataset + svd.recommend(user_id,is_row=True,only_unknowns=True) #The userID is in row and gets only the unrated (unknowns) + + # Returns: + [(1307, 3.6290483094468913), + (1394, 3.5741565545425957), + (1259, 3.5303836262378048), + (1968, 3.4565426585553927), + (2791, 3.3470277643217203), + (1079, 3.268283171487782), + (1198, 3.2381080336246675), + (593, 3.204915630088236), + (1270, 3.1859618303393233), + (2918, 3.1548530640630252)] + +5. Recommend (non-rated) movies to a NEW user and validate not in base model (prior to folding-in) +:: + + # BEFORE running points 3 and 4 (prior to calling svd.load_updateDataBatch_foldin) + + user_id=foldin[0][1] #returns userID which is in foldin dataset BUT not in train dataset + + # Try block to validate that the userID is new and not in the base model + try: + print "Getting recommendation for user_id which was not in original model training set" + print "recommendations:",svd.recommend(user_id) + except Exception: + print "New user not in base model so in except block and will foldin the foldin dataset (update the model NOT calculate from scratch)" + svd.load_updateDataBatch_foldin(data=foldin,format=format,is_row=True,truncate=True,post_normalize=True) + print "recommendations:",svd.recommend(user_id,is_row=True,only_unknowns=True) #The userID is in row and get us only the unrated (unknowns) + + + # Prints: + Getting recommendation for user_id which was not in original model training set + recommendations: New user not in base model so in except block and will foldin the foldin dataset (update the model NOT calculate from scratch) + before updating, M= (3624, 3576) + done updating, M= (6040, 3576) + recommendations: [(1307, 3.6290483094468913), (1394, 3.5741565545425957), (1259, 3.5303836262378048), (1968, 3.4565426585553927), (2791, 3.3470277643217203), (1079, 3.268283171487782), (1198, 3.2381080336246675), (593, 3.204915630088236), (1270, 3.1859618303393233), (2918, 3.1548530640630252)] + + +6. Load previous SVD model and foldin NEW users from file then instantly get recommendations +:: + + format = {'col':1, 'row':0, 'value':2, 'ids': int} + + svd = SVD() + #load base svd model + svd.load_model('SVDModel') + + # load new users by their movie rating data file and use it to fold-in the users into the model (loads data and folds in) + svd.load_updateDataBatch_foldin(filename = 'newUsers.dat', sep='::', format=formate, is_row=True) + + # gets recommendedations + print "recommendations:", svd.recommend(new_userID,is_row=True,only_unknowns=True) + + +- All the normal functionalities of python-recsys are compatible with the incremental update commit. The incremental update can even work if you load the model then foldin a new user or users or even items. + +- Please note that preexisting users can't be folded-in only new users which aren't already in the svd model. Documentation ~~~~~~~~~~~~~ @@ -168,10 +304,8 @@ To create the HTML documentation files from doc/source do: cd doc make html -HTML files are created here: +HTML files are created here: :: doc/build/html/index.html - - diff --git a/recsys/__init__.pyc b/recsys/__init__.pyc deleted file mode 100644 index a6bc57bd3ac78b000f3d61b363cfbd46fbc14d5c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 556 zcmY*VO;6k~5S?W|SeCZxPsnjEY@o-gswkD{0R$^?LAhAQ9&KFNF|sE}`*ZmX`~jX5 z1R{xN-kb58%*^!V>ne25_lDk{8T>g;zJ#$v4eW%P3E2Z`4#-ZanUbARGaDjvvU6${ zWEa#NlEt?qdqj3cJKO<2rn~hQl#&HZVw3kdp8-DZrH$Rm82MTKsj>@?o~tolAs$2E z&T^_C5f1qns@rUIs{XPMI#0n*H$SBKf7|Q)9X^_pI^PBD{00@IjB~-FN-_&12__t5uVPczplZOkOKXJ&^za diff --git a/recsys/algorithm/baseclass.pyc b/recsys/algorithm/baseclass.pyc deleted file mode 100644 index 50b4b32a88900c7b87a8c0da46433b026d69f6b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11617 zcmcgy&2JoOT7RnBZo4~noW%KZ=Bu)^8FjLr_H0(lf@QMf#K~+l6VFhdO|oV}Q|>C; zWmi|V>n$htjxAPX#LV9Iuy9($YES$DNE|@ikdOeuh2_S9BS>60f`H%ed8?~?oy>p) zac27U`&IA9^ZcIAs_MT@HvZ)=|Km|dihou7eHTCT3lxFKDoRI!io}krqJsac5?7>K zmDQ>^Y>!AhBHdA09kuOIiEGjwlhrZXu1P#D-MXySZF@}OhICKJ>Irf7PxY6dkktu! zm|PIY(N)JKo|LZkI*B8XxKmx?lhU1%)hWADL*i4?JuR!JZTp18XQVqVtJ7#tNO)G( z@zkVzCh|<=`>W@8k(K62-Y-VcXtmsK8tlg5{JiVO8)+8ho85-%y7Oi~NqZ(T^X@I* zgl;GHjd2&tDT62d-F!36@cf6S<7e*eFz)*9LfYZqch|ap-s+^?n~g^FU--A&bg*qc zhGM%L`iZ$d=*aveh?3lPjADkLW&XZxk}GDnYa1hIBxzQ(xKVs5wK66_O)KLPjA^AV!MIi$5}qJGBdpVq;Dp=&zrd**!AS|G=^1NHDFPrzJS6oo6HfgU@T{v;-Hl^Q_Lq zzT1mKb9L4Y^G>Uw^A?kRH^sKu0IF^fX3>+-xa(Qk#nO@CnF>VfLmgt0zY2c0@iV`H zB3GF?5NSJ-S13QMgI%^VDtS$N#)f8&56uK`?aandCwLAlDgc+8#HBaRtq8U@@8}WD zDjsc1k9-6j%V`qkXE?`pwoTNHVjse~uaDmKW03OV&;(FX*K7N6w2_2Ceu9I$J?s%K z5%kk+n<&|c!|!RY6^*>X{xv`EY%WEeJijoQwipCa9;JyN-%Vo(>OhK27_T=mQM51* zP@CIKyWw24mie1eckZM8&ZeJrKg!aru#=m)TYhIVjMI!AbG>~)BY8UuJ7(X^6@N-y zG)G>k@@ZiriPdgLNtCzSmr*yTP>3^u|LRV?iocb5<#~2;7T5{?Z8>D*GpFPMS)Nx> z#9^Y#1CB~Ly`Fa#BgbXWi4cY6eN>==j$G&b;d5QtJvo5R zst|qU&7rDhaQpM$#{2{nAt4lDhNQg`xLNwtw6xK<)7qGKuNa^hG$4t*^DGDnrI^XH zCXc2>$|*nQct~YYkcGXh-ToH0WrsNDoiom5OR>H{E5+W#APLatzIj!fD$)*#pkr1j z&0CnPN`Y!@J`3~REP)Lyl+(5Q*UCXl63UfQ1Gvd2(dc9$WULU>;|39>mjJfM{Z+I~ z4TX$MIg>A-3^Dghx4(go@`8igQ;$=~-yZ0lf+FQ)ds7nNxbJ)pdk*eAq#747V8v*p zL!9=13JO9Q=*R8bV4$t2ZC_DLn?J;OdAXG^x5kV{iE0|!)@`bFa@W&r2pw0g8B*}T zotlLTBn^()t=kxuZwRrQoNI9@< z5lEhU^ys`P6hjh8x#nSyPbhIe)t&EQXT$BKUjN^~4lEcDdjPw|1Rj9N{iG9;RHUFK zU(5X%i2R?K(Q->-SyYl%$Ba+amn+!!OQGmu&`RMmRZ)N?RJ3E&HXRNsX@b}M6h(O> z*kiy-@dGlc&s0G3ISlF~g3!+Hq)G0>h$b8ELgwfAJL~S6Fq?IwJnY`YY5+A<2#kHp z{Se*ib8Uo4nECPi^H?p9w5+zoS_pd8@YmY9w8+p(|nNs$Ma0YuD)rfk|ZlHO@yU7sf5h zNsC@~^XkA_EjLMS^)@}#80vgdU#o|y(|I{-%xO@$apJ>q=Ys%5C$v^S?7{KX9zaB^ zy{NMthsVN24c2&A)iY~2hc*7S8lAvOr;48bPkBUd#R?Z(KPr5f{fY+51@ZqK6H4N* z_zVfGEc~>#xF692&P3rEV2DChH2>SG#SC%poGC5;+;jJUfTU$zE=kWe6SIcrzw z`g&xlH;dx4gLc(-~(rG(RgoFWJ~oVu0Uo(z~JLjiNd|x ze@**pvNcu=ZjBc~fb%fPd2Hl{S!Oq{YanJAqpB`Nx`?Pp|0=2Jra>D$cP8`D)Ki8T9OJfR0vVRsJ zEBgyUOucnNwg3yjW`N7$5|jEM&I8b*Jjp3IX93V1>Nbrglsu$ICeX2?h7O5G@F+w&Zcl3~F1sd`YS>tF#wm@?-}MM5WJ*-#aLaY>p($L}V=$qXi|LB?F9x(A(QqJiE(H-(TMU4V;Ti-sM#FXO*= z3zt(zDNlQC9@Y$5^hFMm{mJpLpVC&`<)^Ij)G|(%mwc|G=l{rO&DEBEHjF^w-Tf_= z9#HxJhtFGIi4%-|DNcTg%R{-8wJ?7ghBk)FpQa^f$Z$=FU)vgHJoV9w1mF8Q242MK zt>a-eg?DhPKVoZ$+dMUYrBoML+0rf#DZ95Yuj40FcGeE%5t3@|;u?HmzmwZH54ZVB zN~3wrW-q7ty)Iq|P~5`6DmaiY&hR<{!`$Oy>U0EY?v0=^xHwWOB2T_)|6D(w%ahyf zP6~f2{8Lniug)PJP_O&F-yB8OGkTP8)89^aKiOh5|aW3`Xg zg~V$q_`!}Gj!JTFs2|}^zaNPR1VKzhAV%7GM;<~B5X+#O&tU-|GP*%0v4q zj0yyU?X>67OlYXnB;jnLf_>Oi-5fs)>GIeZsh7gO8s&5pC_1!9E#Lj);ruv{UJ!@= zM&W4I5Z=^a{%X#}$~osWTuwEFRGt(R-pDKh*8)mJ?;U^p5$I{ib$xfQtx3pcX? zR*eV%fx)H6uan=*8N$=SD``h&E`oVMRi=NlB0DYlk;vXZQ7`}(U4ak%9hn`B$jITC z9FEK0mQGL9`os@%{43>O6r@N2I7q}lTi<@C8YsC}& z+hJo~@IQIMoeq!;Tr1316ei0N=qZ}j(`=r8h0viM=>Ie~OJ2cWgiu}Rzn$fC>M<*3 z_O#qTKIAOKes?YKZ~g+uFg#0#+Kz`N`oD#r`4tp6^y8M;w|^iNE--_Wl18W%6jM@( zOoLPpoSmVBvQormfK+2560;hU+o+@Rusk=;ewlGQ!!^~h|GuNyqB)TGdPWQFIO)_kukQklASgqN>X0;#^mW^%Ci%7i@1c z8~j(;&RjwvmC4E_=Npv^PQ#h5)DdG{bk0KC+;t)v$PVoe9k9~$5?asOA1Dg|X}wbB zxbUS4O-@y^_l8UhEQY2mFm%-9^!p**P*5yuvbRGSwQ0De#~p!QU|SUibO|&O4G`=R z@+mM-*v_4i7)%-9{a-xJVZ2#h9^EBRF(JSUuYZeFpV; zh|f3rvkn@vjl@n{(}a_>k9H=V66ilzOr&pc3w-6FYV%pe`2_tFayTi;EB#UEUyRo9 z$Y2zo2Z0Fr6mtfLCuOfE2b0jxn{qfM2Pb9glpJ8p5%c$sorF&GEF9nnQ!-;)Gx)Y> z0Tk}H>avgww`NE`kZsKBTPo{@d>i}j{MdPjhgS596muWqRK`s2zpbBX;l10gfiS?} zx7^J<@6FH6g-Ppav>o-}$@;A{+n8f(u6Uq5c%V@(cDq>Q2St*-3CnJ-7Re|ExW2oA zPjwRe8WfTClc*Cufe8A8HTuDNFt4$C6SJ6ZM?B6PSFtNX^fJfAcd`6eJh(j^r5DqO zqVpm|H1s48qZ|9S1%X(ANUYq^*FBGB*`wmATSs#2=1mCNz+u|9B%O7);VR7*ZWEGn z4RsK352Shc^%VYCFQyUwJ{C~j1`QqARGQ5tTQ!HTebTNPI*(s2c@I&j$y;uoW}6D| zNLtROi%Lh#^DqU-;)iv~55g>M({YKQ+^8U8e=Ury?&N!O)tz3KHT>**14c8d5OMPM zcDUc9a@YQ>i$Y63OG6I6bFEVD_HvD6nx_#zA~|$6c;ewB6h^nBI{>~qvAaEmvqlkt?iY$a^JY5Yg-^e*?Cw9cM&^_W$hhPy}C?~RI! zEaRJaWmT)3fogue@(OzE7~}XEDC!yKq_f<71&gTBX}9?9%_t j0N^j11B%53i@ux0p1$DX%hyxRsS1Ahud2VZwpICWgUwc! diff --git a/recsys/algorithm/factorize.pyc b/recsys/algorithm/factorize.pyc deleted file mode 100644 index cc7a947dcb9f76cedf3a58fec1a2f623f818422f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32828 zcmeI53v683dEd{>@FBiLiXuf((ys3AYOSGGqV{30U3#Aqch`bf-1X(I6w{JtM>9il z$a#=+hmyD}I!?59(xgt(I!@}w2?8W2(4=jECPjiYZJf5Q)7Eu@HbsH9Nq{zOVzfXS zBnS!=sQdeW=iYfx)UNGBasrgnoVoYhbIv`_@B4p`b0+i0BZHq?8~JF##eW&TpW_q# zPRhBWb4!FNS4_J~$}Mr>uhVWRom^+!QpRnKCAU3psmEQcO}SJmrPLW$>2+0$P1REF zI~3RBDt)fn@0R*qb&p%x{oCm5Wa z4XX9xW~DSY7vw8f>y2`Ar8*b{!CbUetFK4pXfAj$Uug2+&3v<5uLX@#A=--CiKD@G zA^FYbO1(i*uSA7>BRE^CRP(`8^@6^iyHd?JXAAY}GlPTWuk*K%Wum!eKS8m4tsIpf z&-N&ym!MM57o(eW)SS!%QkTT_~TxM)`)1RCQiy z6iba#aj_J?Y=F4ulV0`7#)w)hSId<=UA@&=fI8yB3*nhp7GLm5)#`HXn!g!VzV&<~ zDs|>*$9C3FZJf_H^N}u2H>>N`af=t`3*6hEDtm@c^u3HhiN(&0B4lYlpqctMOTI@_ zor9Jvz>~(KL69$w~w|#ay=!*ST6p=#~PqBO4+pL6PiyyE; zM(ldfuJ^j)kiBKp6^HEt!a^?ok64-eTybww+NgUQ;xld~k6HHpcD>J)Gp;!9iu>J@ z#RT9JpPV!5DFqF^)PB|&qgWGQP=v$0ud zmWu6ykH;m?F=_MFKufBYua@S5SM$x4pjoF9OXYQoKS}*-&3w70BJ)(Q(kSJNTS=>m zzE!WE3>M2*tND{wmkTq&a=p>r67||#b8EfS@ir<(sn*6qn*P;C%i^s1`g;hP{7<=? z5Yx0X5E)rCh3JoK%mQ z_CDQ+7Lj6@@cx1c&(hASVhh=f!i9yb$b?bCXmh>P2q$$b>XggTOU+!)XyZb?R%$9= z?sfmQ=)W!)QO!jw<>hA7RNkB)@0>=pHCExMN*(Wb9JL5jQxrl}I0t(uRhlD8r7_Gk z>eq8+h;uupP_J~v)GH9%rBb6_imU-pu$*t5)j29vmaQepgve5|vmm;_)#;UbwRF0C zrIBALS5LpXRanV4s;@TctI*%*^qG8NrBtam6mfce3vN?8R&antssxBCgiG=8$2(ahLRfs_K*Ll3PDy2edmCe55ax7x26#dAR> zrqN=yk6J85s@!0t>2dz}T%^%?K42(po$VP-C?*J3YfgBaxFY1k;OH*z_fwc#Iz%vV|FDUBwps;@Fr9xbcj)+_glJ+olO*Nq| zJcghQI2IvPOGWj&t5nh$@b-Ya@v6JY)4lE{vx!#sxyd!>8fQ|?QAKgj;|c#oMW-en zP#KW0J#KZ(-5gY=6iImYeu~@pV=Di=-Vj&J5~#sLN;!Y|ez#4nhM<2bwQwlz5}tiL z?KW~Lo?fgyqn6&ejDtxTKb2|~pxi&D+%^#VR6pnyk0}}ce#;Z-!P|RoSSk3go}0M6 z=Z3%8p1Ya;7JKegy1VBXFsR8cJ@@(aE@`F=bsV0}@;$_B8u?B-%b3Gvb&t*pmzXNz9Oa>cvPh z%&Z3k%G7mF(ny>ioITvVkK* zHOb=h>%5^DisFZAM|e=dVFjvgc!a<>mau1~95q9+sHWgR7aMFNpRAQ7mKHv&q!|JY zkWfK=-N3Ksk!8S_S}mp+KBlCqsGlm9`kIN1k#WYdh1Yz$i#P5KpFB!^KE@}~+;RQl zlbO_g>4DUN)FhuwYJ$(;KqhsB?}Mp6{!OQj!bkUV|2&@$rjHW7!_)LZfYZ4ByFWdh zDyXwGel70%CFUjUwQ-*S0x(G!$=L7#=%Da1T_~n3#tU0b8>4;B$c=H;`OA;HOT_jV zTg_O0I4%sV$CxRJQf}FJpct77-FVtvgvqY%5xGhgr^Q+UGomxbw_tl>hCdg_c(jgm zYME~WFGAeh=T@P@ zM49LeLcEnufh2?lh}qv79+RhUL7|%yZgnz=R*&B}qmC*f^$$2`{QTv|9H^nQ0I>!3(ojXM?wnuN|KY9((lB zlfm(7xt6;oB}7Ej`-nh=kdyi?Un##?B2FT#(xI};72p-629e?;N?VVbU5ORHy`x#J zYOB(%XORdz<0FMabPcV9gjgvZ9*zbhQF0WKwdL|vqsrGk$MuY~EJj*3yb&|on6ui~ zqH*Ld4hl#0@>eID-IcAjn7zLMhlHnjnP)SqRK8IVn+hN0x|_!v!;jhe0(A&n))<|s z2}E>J$$GBcEHXgGXs@F@Duo~5q1{SV8inuSHawoh6)N?p6h5msv0l&4HqjS~emCo4 z^1Ja;IBs+uMRE9E<$Rw4$=0TPV9b=cQL5IjmBK*J9w5kyE0~~Y%?;^}b;xeG6xeeIQSF+8h08iwU{6HdVgXlB8D_k^1oa?^VB8j+u-9?^>g zZlEtUmEI5MJdqkioiNGWL>x|J2GR%NtcQsY;Hw8Shf@182ar}L;hXvv|2&dH`Jh46 zM|evOjXp#mnrK@7;RrgMc8%x7MsL}JT-qKROI12kRUwLc_6iLb<SZcPQW5eQ4{eqa&619(JUL>ks&t_Q<8ifPg2`$5JM36hxB* zs`YLC`CnmwpgHCs-Q)G8bgDQwz_#%*5fEme#9PC7Jd+B9nc3@>d!0c(;-b!kKKsVU zl5Xsefg9iiP5dQrAG(nNDJKwSDXQ`7g5-@K>v)%`V{EpQ8HndBTYpD3zzE8XA&2r! z5jz>KyJp%l7J_1GZ=%_tHZti^4Z;$5e z5%O{Js0k(mS7odtz8ei6m7rYI8m6nb22`ilWOH&zR+Eb@WrI2p`wvoiO_`QYi`_281}=z|L%f9;LI7CVk1#@bv^+pJzG zHG=vwAOq`ysCU#(X1aS)AsMzKy>Q+}5_199vmGVXPy)Y+!Z`}ESOj!ef`x4b9(c^4 z2iP)IDGy2Iji)1mHj@dm-j-CrLRBjtm-Ln|(CEV9nR%H`>*- z!P{BOJ2Xc8Tc~C(NK`ys2s<$uyo)Aj$Gh8w?ijP^q6nw0oCJujJ^e5p(4J~`+cok& zK^qVI6kSl5s6mJ*!u`eP!-aDTFJ0~^F(^ktR05;dV{olO_H!iP3?i&6!SZHpje*-? z;j2phA4s8+Z#*z)?g1U;w=1L<2#L=MSndro@B;xpts9 zuK|;s;+0=A^0oRX=X-T;SDf{4ca-(C>fO#sYYhUMP1)Oo^hV~u4lol6oS8siy0-A+ z+-6NTcT5kYDGRSESW#dVRVSL3E?G+9OL`_OWJsSHdTXZLQMvd|5`t+g{k=&QLNs?D zfC9_iAMuHv((Cpe0N@X&ra(^zGNXOtrb^on@Shy&>m5#wrup#PKIu$qEd4UyK%_aGaTBm6d;Sp=|!>Zj<`kh4q5PaIKj+7TRi>*4&g{|y z1Jp~4%I3je<6_5Xdpk3>NB|vz%Po*@q8nvJq368um}bfR(5$!NK3#Ip&0v5$8sy`5?;dY^X!_>)4)0*>w&2_G9x;5*$2(SRx z(L?LlMqHtGmiz0fcg3vh0|+HQ<2Am0VlCS8K;FFBBd;8$`C+o z=>`KQZN|NsF{)E*&1}}JQ7>t(2$VZ$3e+Vt0!`^p?MshmOmXm+Yu>y`JHl-RpHT3f z3Z7H&Nd@1fK+HX4db{v#f{wv{OL3o4@M#6#L(nm(;$-1p)r0R-@cjh$c+j-W<3U@0 zhd~n^*0gLb_7XRKvE!kl3sdtXDje<+rHuo~M8fh#cs(mYSc}@*O?pk9L_jv_TJd-z zSYqT%2tP5bc)yCH-=l6V6bp)y@-#BFt}PVFXNgpod#ikMb^DEzvBy^z4NJtpB<;s9 ztv3+sIt0Po+U}xYdCsJOaxFOD79e?5aH&wNH@A1fdy;QPk{cU=mS#{cwrU^CmaB+E z2zNURwBiEl-%fAx0?Dl)3bcZ6FYx@ih1`YbUw-|CSl;v>0Rj#8F!r@Bq7HT`D%mi&!bFEv^#-r+6pyX`H~lKN7*-_oIm5@Ip> zjF!Ez1zIl|3>K5ytER0Mgwf3uc6kR(fU?{{9W>5+#?L+Y9f`2Ugv*rmR^7R z#owhBC;I6|2GF)n0xbl^##7}%;tFbkq5jy1{pY)nZ3)xhbY-!*Q$~9dVHljUbEOLt`L zhpoAvw`wh00y<##YJDW7=9=S^s`Vq58pk7u)sAg?hFlOU?U*bXN-s zCS{p*t#^xyJ`JL1t?V!nXd_$O&@(xi+VYlSNEbe?X3uAckzD*|rD$gKDlZ2o$|odt z=8dq)UG_x1R@s6Ztkvq*Ymq(lmMs(_iEcxNwH2ghXq$&9ouoG=r=q|ySxl(%C0sHP z$a1T1o%i(l=}V^b0)j#6T0Iie#+caB2MLL6r!Z>x?&P)Bsk&2-b@~R=&%Ql7iY3!7 zinOj?nZq5qB3*=3J?M(8uad5S!lGb&Hlc(vV3|~Jm?*fiWeiKhx|1I6c*Px*LFGtt z33ZIn2FXWRBj|@u^Chp|nt66do03dB%Pd1!T+|>q?_H}Y)rRB&X+XB>n}NJ(agatO zXU1H!NGiU7o}{VSXk8`A(R z)T>gdF5~%2`j~wY}&fW@w5M<9Mca3fB}@QShvS2Ni56kZm)R zG!~k~Be6g-bWV&Efg#+|qaRRU$QFi>h(+r;v$~ouZilb}+i^7$RJtar+1UL2^7vI^ zs;PLQQfXc`dG7DJYQy+?YlZ))Sn8bKJ1Ukc7oon?BWN;4F$4i#l^2Rn%*lojfFsxu48IfaA z0i_GvnfqzHRrEE1?98)inUwgNn2O`$`>9D7-_!>(Pi_1Ven^x=W-|1-<(?RQFZlUz zgkG>AmvW@O?xi0|$QCI0K=RMHx*nXFsaDv z#-wTVqsohP6WN{K;O`{0s}gwDU2U#Jh7$EOxfjGTcRN}`gz)_={PByCsT$goAXvXlh0Zr8M86gmGpW;=(ECXFMUWD4}08KZLTNi z2V^+Sv=q8C_wJWIH&>Nd>?5>2|scV_&B2%yoNZ7?&ByztLR z`mwf`QkzRqzEQV5=C=2_ZI6vfGv#iY`*ljw?03@;C#<=2`RcU7O9=n_-8SzZyDfgw zZR3Ugs0i5ZS?H`CMOl{SA-8(i-NZUQ8GBvwiktWYLX$B~;|QRtgqh}jZuNdkhnGGC z`l!u>VOKloc7MctHHv$4Z(^jMG#QD;FbLKS7_z0cWB{;bFJ>ihM*owb1<<@pIPPoO z`iKQghCbj3RP74d#={uAzjRRqR_Oc_T}xz(npjc_2fqy=|tA%NI)}4>=>UENe%XJl6$xRd;h% zo1N!sR+uq6bE4J6NRDZ8mDW2XJJd?oed$KrjOZKX&R;2EE0c;zunZI~s@805)(R*| zEtwH~pG=gmZH}MdE?+EKo${3cJuQa%W;AD|d6r<OZ4$`-N#i9)@^_)N|hqt+UGQ;WT+>#&kYaF?J{@ii3#l*HosqHSjKL-x8=#zXHg zPb!b(1l_FZGD6TXk-@rF41ExJ&o>c%rD-=jmo(fvi~H9+-|CkaeYc7=#!WB)X`SSo zWahWG?@Ti|;ZuH6%3+i-yab+KM|BpsJqd_Ydnzj#hD@UYqW>KUe|l z<-!_ndtXqxzpucIYto9;<`b8Tmwj(aqug%r3-LgE?70I?2=odpl#0HOtqbYCMeQ-P z8+zB@XvhqB*sZB(LkR;mmR`tJ=b1N}*TriX6m@UFgyxlIS;x$KP1GRgSIX}TR>)@Q z`uhM9M}QQ202ljHL%@k4u;XB(51dGk zkqQUE3~qp#$`BmjK)#>kd9rIQ>AsGa`aFt_sD@U(N|A zvq4OJ0!D&)CgY~3Wgay33ry(?nLQ*=*IB_NhDNB;$ARxme7ID+{NpX3-xK zv`A0uB33GhQq2Wgx7iNy+Y8cn5|ZbvybDz%Svo`LUmj-?d6gPnV@gQ8WKsnp zzEC>h-XPmWVXv>03o8(@+?lge2X6|!<#I9IIOXb<_LBv1O0==Z;9dX$D z0AM$^473fg)@o_+7LdetK-zXeZPeht&mAD66ulEENZ2Z16SqE70GqXjBzsy(-rteL zFny9_u$5%3Bgv39W9TJ&+$31a{3SQmV$a6LkH?S{jOm*J_?rfMf{=kmv;bISX5CDx zHe*S~g-1Km(xThbk|p|#wVULiHTq*rv>Mb%9n@Ba`7;K^$2=6Lj&%H!mSJMtfK+icf5 z>Q)1Hldd?DAbqZZ{|~sEr04w`znZzIIvne1KIm2-vb2W`?H{vQG;U~r*GJ4sdWCQz zLHkn)+J9Ibrpb)d{T^-9K~}rvGAVZBEZUxjs257rqSx}}n&k@!?g`Xaw)i8 zZ>$MV<8~DRUcEmbj)!K)4}coBITS5rsnHaM?kevakR#4-R+?o=5HV-z%AdgOxhvkI z^3*ftcGm=bp}z*OdrO40*mi3=LQ`y0W3f68+qqh5bTluW&HP{Ia$k*e1fme>3qy%S{ z#Pe*hRfRa6$R=8zendE>dO2FO@IAwbSwF1H^;&7gL?3b3QSn~WnKLv1;3);hyqD2;XhIE zoPsq4|5!oWb>ow|{Z$2DRPfyjAVS`2qp7-hQ7Co~ie*JRMk4%Co@Rw9I^-I|ND^+s zUsLc;75p;=>czG+6aI5;LdKRMwXie4m1D zK_~ndiu7)A}+i=SaH_wr2Kk6fZF&OAvEDnzRBpJSvxnVFHq)8l2Cqikb3nwm%->yTr{ zddK=D@9a+>BKLka5^KK*@(j`szK%e}Sh`7@tz9yNK4H2DigYubz1T;ZTB zc#cI-qE*(>qA!3=6_ zO-cN|2Z__h-#qn2$7IK=1>SyG%nng(hqX(p0rSK2zHa^%u_nblN=+7A$)j~Sqnt)l zH0eE=_${v5Vh)^}!8RJ^kBtleol+{1wfUYbmhj*6y5G@?2)J-aNbOQ-lHQ&UmI~<4b1k zGR?7EOgseVFqR~S^%s3j;JvlYN$>KnKsKX%3{|X@1GbQOh1a;EV4z(C$*W1<8IQRl zjA@)R=}gr!9eiu?AR!?lu=|K(k=TVEuT-$HSJ)##v>vFK&!)8UI*Qr&tY|@d z!FevNLj?a!3oJXqsD}?JFgE&0UCHR`&yM)dT$z2C14gd;jj*P4!$HmSS!2LrqGnIC z&Cu*aDS3{lW#x^{yR4cHkH!{C@GFJ+<2)bJbmTa;;(59(VWg(0iZyT-v}AKO}T z6*-eLl@LX=G!R9YAWXg4c;U6QTDW1w&On$E5>=P?fYjDav+JfdMi8KEu`w6@Izich zp;vqXJ2KPS;wKX)W-WI=hSzl^m7qsCT)1#t!A~ksl}*>2RRavE*c9LZG>_~A2-bDL zRs@T|oLIC8V=;yu&)<{bF@UMvV2ygY_y(2sUT_S8O!kxMaai-Hwgpl{HOWYxTB+*W zkc?1!kPw4GwNbM~kBQ{bR#u@5IE)TLJ^jyyLew@=IHnhAYvVQ1Ixn${8=J-lN@ltX ztyz>Ti6h;OkNCkpXU2QH-##mo8l+4sof9{(^4M@ORso}y+CcBwdX8;tm**0jrB*W1 z1?lkD%!)P0;T|JnqdMy;j>mNj`55afss!c)2!XATWKX9q5pP>eYloAkw3MgRsN*S` z8FWTQ!=yI4@mum0n7{lLX^*59h-G~jeIn%(5sCEw6j~0xrv2SCcF(7n|H<8;drEY; z)d}=6ZU(eT9U(Q8m7!eIsAiN7$(ajmf7BXXLi~J?13~CapIw?5^OQ?)$7MYH_!<7s$V0ENQio=m+-rs~ z#NK2kM{hEk^hc^+a^MN)SDDAmoC-+Eu1uWJWut5&{BoIUBa-`cPRmXpmaSfT8yHlUWuSJ?m;tEJ`a zm9^ujknnHQo+EzCF^$|v?1^Vu5eLF{Lk;4^b&hWHQEL~l(@V|ozWRcO_8~3(*51T; z{7weipeSXEbp-ni%RfRoBWmnuD>rRpmf4wfs&2h%oH+a%X~O@egc$;K+a^Jq)YZ@~ zQTA(0>FFQ=2mPXh)(%f^@6(w~2Z;9@-b9f60lvG6tGvq5pKy`S=5NB^S7GX0GZnJ;C0QBw zgsX)Ai#MbFjUB*-N7E0&P4464`AU#}01(eM8BVdCT*c^G$&eg3+CK zz=JNvUV1;ED^1(*Clr_-<14!QO$Gl^fdmAv>aeM+hPJ!ZuI}bxq%z{4o{Vqp=bbLU{vs|&K@oog0A-l>&#S+<-PNH!?1|1bBv|Zv}24tE&rzWp0L+g zI@kO)7*F=P+Rt7C4m4i23gRYU%p%^Dk@yE!**cx|#!FlUnW@LDs98X}YPO zjrXE8=El^-Cs38k8t9vD9qP}GQn85_x8Fx08lQa5YK}Za0C)sBvIvD14Fv))Qo@6* zY>azSmH;L=;<$&gUo2G6h|T zx#-*4DJ3k;f#wKgnZa@j?Q0x(+n~kMFQF+qp6qFJHTnL z0hp$TD1v1tlj1EsX^SIIZWCBt=HE(oIVw3^Ma9adQtyFj4ccdr zUD|t6cJPhxZ)@ZaP=ET8J(zy{y<>(SQq2X16amw`lId7R8=8uoaicDgX8!V3n+E;} z8>Rt6PKdOGRNzGt?4BTS1`uV_y4#quVFt?&+4;K+WP>d^c>FOFkb;KYgccXc;y6R| zy1$4_GaUWA;)ff*s4ibr=HC<=A&y)mm_%}s$(=cAQSx|3lKT^>F(fP?Q!|}OL!0sD z8%{AJ6fZ$3jt7qrLvf3?WTT&=9xS0^P$RrQ8!g~^Qg5pcS7XqU`FxCEPi~AZVxtT zs>VxyxAP>6^-@wY&hjs^6Gl8#NTMYn?wCwFclRVo=YpM*Q_N&r+aLcj?+R_v{sn*7 zt&!+K(dk|VBKRJKKc_1ZL9YlCI}U$H!CzPKHwZd*BJ9EtlI{#YtMostpkJIo76?{S zE{19-Zn$T`#nhS~Mlp^N4k)oig?8fbbKKq&{$V1-e-O8r<(2NBPWYI#jRrc|V;IgM z8`?yAUwUt<2TrmdYg@A6G~VLzf5{hqU4c`_nv&Th8|!Q{q)1L_H^;8-rs$EP9>1VA z?cu$BxO$ED507~d*W6B_;=@?2Y6a5SpQeFH7O;hG$pST6r(}Uxfy4nu88EQ|fdYwx z)7(tk_BKDZ0(az^Uy9+H3!kh%`QaFgQ38H1h97wuk|bhN;7B5g+$K;!OtA$*JZS!YNW5ZQ*yQpRHGr z^x3T0M2rYtELXhZEI6<2f5(IDM@uW4#e860?G-_>`fPpkN~K(wmH!IIg09TwoA%>L zr$2G6UO|DfwsjgE#&V^6c z{(@QSFTCl%U-P2_vLahb=4D%w(X=#4#!k@Ob@`ZBVGG~OIOjKwp80)D(&Ba8qJNCM z&Pq~8D~TDG;*C#ixMJ~kx=aXsx9P{-Bpo7L_w_(JWa4${Y0fs)zhVEI8{!T6oaJNw zF@nXk0uz8xq8>$`B4}~)#bga7Fs!=yhQx8)pU@bJPjgmVBY%~R7G^eYL%Nt1S2x)_ zyn9k%0{wZkpoHDWBKV&k3-_t-R5Neb(N%Txo^+=-+TAGb&|h%qm{Fv9$j45H4aS&5uI!L|2anbeiu;|1_zT-lUFo8nDb%2mBL ztsmi4Icq*RH^*4NSDhf16X^nx|8#D;~9LphoVJaTJuQSQNf+{SthU^bEVLwV0HSgY4W9D6e zP@u6}S5rFUY^y(4HU%xNmpHUlro);-uO=NE`v#b=?Bf4ato%gXx@CGczlWa!P*@R? z3-y}Uvtgy@cO^2mQvSBvShv}}S9@!vR~tdw#+QG!HvDs?S7U3ZkTEwUV>^xtUQI@x zeeR&w#I4&_|KaHW==pHt>yla*r6jZc%#=N;UAyEPrx2|7eqSz{Id+IpIriwXTv4v^ zgG_WA%D7ij@>(F?E8{F){tf2(zi2Ex`b4H=BYgEu zNjLGYRVv>L8=KxG|A`}fu|WIGq0&2)b632U?vY)!bW#?Xj*cU7 zR=0z!*OLXO0_9rWp4q9Z>rSsD&aSh$x8qo7UxoOcXa=jB!6Ovr!*YlI?axJKQ^zEv zr}K0)hHxjicz62PbsXCH?CQ=EcrG)L+~#E$!rI@OA#okaT2;!(pWy zQP4wR(NTFd+?(7qHdlR?)i}$l+2I#AtE<@JEUrN@_g3MS7n2b%b`Q&eeX@6g<6#GJ zF_fXktKFn{BQ7;-LC(`xV?{W!xz5994VA zXl|bz_xBDAjP?x=3=fP>W*;Te>kTjHYC*w!EsnrOrw|p)GlxU<`H52s^8%h%)(S$J zRgT|aLxQe1%+bZX2cklGeQP%MIGMe+5gOe1AIjgaToUko8dOdEplT=-U~jV6Q0fW$ znKt_!Hv924`(e@eXT0(DvY)}0gcnNGR}Qn`CzVM>C)M(v?dIKi+qtvny{%L3`nHY5q@yIGoj7Y= zxIU(HXNi_*Tq}9y`?DK#Wk(!@a>FpsrZA-mrV(#TcXo9*MLdn$Q{Ktu8C^$b2t0Bd l);ZK{@tsIL$ZilFlf%F6Z|;VZ_;g?Tz}KDregA=v{Xfo(*T4V( diff --git a/recsys/algorithm/matrix.pyc b/recsys/algorithm/matrix.pyc deleted file mode 100644 index f4df053d8a20bc30468603f38761637e803476ba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8411 zcmc&(ZI2sQ6+Scedc7NO67tqf8o()O>n3y;l_F8mr0oVOid3u2QW{p!(d^7kJjR}J z=FWyJ+7(haA3z`^kdXQV_z!$Q{1yHLpAkIIxgOiQ4Q(jcUGKef=gjrobDr~Z&$+Js zV{P@XfBEO#SjK-fT;D{qe@3@|T@sPRlYz+4lO10?-up7B$*?Xvb@4vPUKGz$GNvt^ znAWJKWuDySS8Fm@mSIzNns`-R%!=IeBw3QjB1a-W*;$pOA!b#n%M&W5G%M=LgnCMn zRoU*G0;B1_@YnG$DqHBbM^RzTyHQ!Bj~w+9sD+91%$CJoT!uCsrh}+R%YFBJl`kpH z@V+TaQu^mAE{X6QV;# zPFs;gWu(+ir9dQ=Z>L4sIA=|gQ#v>;NlOQ3Bw5qJSqZi~r@>83!P&^;9W=X!E)h8p z36;%lm9pF6gtCsVn`fq6#vmNdpu{KE4DNMs747UcMw`8SXg1T`BI>2X&G+`>UQ`U< zEAqaHOS`!h#XU2~3lcU*`(-cBUIx^5-)@dC(cpd#sC&arx7Y?|HSyjsOtZ8MLu$~H zHSwM-N~xmsJvs8Aa7sEBh1Lp<21~dqJ)q_m#(rT&MHo^A75kK;YO0B|Ab@>X2zdW! ztPIZR+85BrQI_RpJB3|FS(!$qX}`4#iK4h{Z$;L$?+hYqosFL7xAO9x;b>rnCM!+y zy`snqRjSIZnoXiIS^xmVIW7zs{t`yB;!`JVD*n3|zzX{!Eqi4=q8@Qye1#^u?Vk2) zq#gQl;7ReO9H>vdhWktnmiN9K`OrRJ-A(aLNolSfs$TW4<_uVh)YyQJl#rOr!ploe zEAqW8(HA(lX#XQq#Q0J`CC(oR<%r^nEyCM@np4Rdj;4aUP!4DXoTE zzNu@kqjx)EJGE`qZM%~f?OtTtS>BHG!QL>l*Ish|$e7@qlBS1;;UL;I16%U;$I)QV zXy__ea9iZR2(xHtl9@n8J4om6jv67L`4Bujk0mWNC*EcFQjIMMGmAnM{A-)b`+)8c;iBj8VaKx8_i!)`fuVkl0O2fVf}xP+rdTf z;9?C;Y+$m5<`4Afaa<0u_HEqG#+Z2k^?71*aHT>i_!>rE2t>NMnoR9wx_xyR6UIdr zH;FLRtv3vZd9pXaZNTI!pg9IiMYVMJQL$$yX)+@ zb57%{j^Fl?h1L-@2-$eh z(jW~xg5M_^5W{*^w_|ljX~=yyz`CfH)X2J>Cf`D_>>f%OmDZ`W&IeSki!oLwF~J#+ zU@x|$R>LSs(lX7nXb>7#QMy7?x6Y|T=cY?C`2#ddoWwitt>`DN5aU%5<3zVvRMPWN zz-@G)E=SDAQOOeVD&KxuhQIzrF-)Dm|2Y_bdC?3%g9VB36HN2&4F8OC3|+jch1`J( zoKLyblC=xY5@6(hMTeKc<0kZ4MMr8eOUEu+mtx&R%29kY_B<3r-~HLT95&;R`>M!)3m^_`8zo(#KNE z(Q0ZlrfnP`|vv&1|qxo;%t!_=m;=ddMH zwtGcBbV+(RNNs6dt*^Tfbs2Eqcbh8n4?Tvxmk*LO)2$3gw^@u6CxlLsN4jP4%vO(l zb?LA$!~CIHKG|Kp zMxF;5F^#H6f82Lb4N*kR5vn~=9cZe@6F#bHr70|Ech@N3oYKzx8uMFla8hD&rCz~m z-6^s)dS*O=_(wP_u7o_UN3Eh3o3t>m{{2*lZifeh!3*qOWT$?{BaeW`0KqrdQKcFm z-;M^>Xuci1&Ih!C;0<GT%$>l* z{1X4DfGspjkN^dY`nRHX1Uk9JsUbdmIqhL=G5RN zc3e@RU1Ywe3;q!+R|ZQxveB1RgsT`p1Vn58K!G0N*JsGCesv>S-SRa0QEtmH%Zp(& zNPlTu=uvl6$K+epF?DNuU;FKVYfOz|ZuY?7_b0SU%t}jSUd5z^LZ3hN_%k?n_;|%tB>%IRcjAonY#A~52x#)E?!k%#TxA5F~^deoD{_qDhJrrRX+^Tgu)T7#(OTJ#&aDg`f zSpseh?guf=%w7qoWew7?jW>>eqhuO3L)QI2N0}L6Yt#%-chP*8KLBecUbG(&5g#B6B8Znz+h8m0R@&}ipX(d^fa$VeNG6kENW#DC zRp>r9&8Fbgk5qX>U?>}AX6(t?oj7L~F(cCVfp4mrc&_m`NpSFPBxMp=rI*6~`NNU|Z(rUcuR({xN(@S-!CZjw#T zbPv0FEHWX1&B_RJ2oMAa78_s>i-i&7mc8aLF!Bdvoy+E!Ah#HsQ*uy#zgImy!w;=& zBh)D}yQjMPRn@Di_wjqL+WFs%l>cXP{Nsj7|K;)f7B0Pwn}elDf zgPfFes+m`Os*w7gTdlwT9G#Av_N#Z%}zf^_fzSmHPNWiDRr* zhcI671pij57K%w3w~y-0j{kNXN3p3JM4jI3tgkoyUd2amJGbtwzMT~Ca;cuwv#FF& zaf>Z}jhl~22;*eg;@}VR%F9WiK#o}h)(TQ6DzM3#LFEq#vzRv+9hAzl@`j`^th|yG zD#|M>e?)o1>NBj$sPeissAT=D{Gl+aKEpbUD{o91PVg-W#--t;lqX2caVOPhSkx)y zO$qU|R8FhUz|4&DW~6dfIjhwUtpFUmvt?Zu6neO_Y`F?m@w-kN@x@OQ&cW3ygagAU1<=`sdx_cS9V<{a_UpyYPl{J({(Ahnv5u`!5>~+ z;oG?M%eY~edhA;YmM*r_Eyb((RZn3TIb$Q;j?9MwBXYhHh5nHp3Qm1`!x-IlAtTgS z*FB3zUBpdUGuH4o-y)d^x{9CoHc_ykm>Ai0xfR`g;{NdBUdcYag2H}I#q+W;qM6tm zsG1d)xPdfCqPVNF7kUfyV#sI!SN%lWv;}q&i4|m7`$;*YsI}{Hy(Kg?yKd8V+0nW! z_R1JU_ESst+I~tzlC1fP5de`1ZiBy@_@O6CZSL;YJ8#3(eU{rps0MD$e*X=J64O)HZXYJ-`U zK0}iUa}GiIv9ZYMbV$rbNpJaP*N^pzyc+cLf>+nKiKlJ_3Zp3~)@jZLbPZn&@ zL2B%{6WY4ngfB_rdJwvauM>AYYI;FvuLm(C2aYzh6LeMI0D+MW?t_EtIj&SfR|gwm z+^)+hUPDc4C`uOGAD4mKNBIE`+;&dsbFzrJzD2auCwX;{QwO*gsF#)a zWB$PXZRZurTy?y#)HpyISa#K4+*q{j>z@*|F3|BWpndb^#cYf~^z;UX zH8N=0+F_J{Fb0@O>?&|tr|Cf$YyCaBfhV2e_vS?b5PAaL2tKw0ZTqcuvIlITuV%I9 zHDA_3)c5;rG$WCwtSeLb@JcGNsly;7wu zF`f|x1J?r`ggU8*4Om*#J%sX@(Va08XV$XbZqr)w*;q??L$wtAC<~ZSf3Sv_u@teA zD4x0{rK_)dp3xxg6f6q<4NO9L#;hqoqA_dUI)h(c!3TKP;I7iIk>lIA^e=FOl*hA& zV1@;J*n^H1b%{m`SR_wi0l9~SPD2G}Ag1`UscDA^06sx;rjKa&?7@J-sy$Zf03u!# zK`)4)r?t?ZL8%21#2+MhhW4$Bx6-e|Ib;I5N-JmL2JxOSaLJUX^B(LI@v4j|?e92;$jQwv|d9{E2Vhu&-XZaw%=v(;DN1 zvmPP58NaC)PeVe9J8E|N)MfU~9<_M$!^F3u2u42an?RovG&bP+68!jr&HK%)uMKQ5 z{(byb0I;ZxGSJP6kc(Ofm0g324~iI-m(EXNA?S=D?M})4=OMy_OGAKUNksj2Z*I88X6- zhdj+c)uSYRl=$MH%&_)gSnd_Mk5B>QzfoB8eW8)g&}L8}n%plNC{XzQo2^6TCI*nl*#Ej;Y;Ws{K*5^uxbb2jjvoG=E&pA4~{?6T;x6u=A2dcBDqw z8CF{Wzo0WrfhY%yk@apAYv9HDQ zua2-ha&)9`#q#IYH(>d5Yv*%oEj6`eST_$G^=FID$i;r1ZlI8tl7x3nh8PLY?MQeWo^f@wqFC#KDnzC!HL0lLe;a^NLIIhmIV7O~;|`|P{%XDqGzn%g?shfjR8J94WiIB=y6GhC9@6rLW(U_UD9t zh3r}Vj`a0=(gvQ;uaG^fUs>?(q8ysZSl@_G+x8Pi6;i1mfBGuHeq^ivG`TpTZy|kF z-?RPO+?x^jLKsQ_G%mJyM(xk2t%+3mth9Dk?axZBY)ZFwPVLW0tt_d9hM+>#dL9@? z5}*H%VZIfH5qJ@37bsgm3c`_wFCO|Zy3t(ZBueT{JM6R``f)E)QqwlxrTC@3;q#3k zNAzVdB4A%F?e0I{lyv5K0V$6HfEprZ0a;ozhJRW=z%%SdfA{G$CLFINBOr16|DDjW zNq>{WZY#zgQj5^{C>uR$L0H1bi=dKRGh%E;I1f9_CAdhSMsc#3>?V?`sDDor-4Qt6 zU8-rq!*cQ#?*06In{t&gSu(?(5TJsAP9u?M-y}3-eLsvlz6sA{k#);%SijmQCn4^@(Ml;fWAKE3> z>!rmQ7AEYix7{x5T&}dkq@eRX-k6($VK8~LbZy;s9b!WU5Ef4HXCsW_?v%`oB>B(W z$Gez?U}__^_tshAynArLlrj?F3W{cs+Jo3G7eU7FqcN4l@pzjE2xr*(9*;iQ z+<*+t4*d+!8I-ytk_E_N$T$EOa}#YUc-zgh97hU6t4kc=YKm-v|IKCgCf5;HIT@CQs$5i-bY z#1TB8K@Z!OiWhpdHvmKtVZ4rxb{6=z26()T%4Gm%U?=8CX%|++A_mk3$*vfCU{uMo zam>be8A%nG6Mv@z8<}jzKC(DS;)sIXaF~-ZsbQ+3^8?-}I(-R{c|00*%v!rilqxI` zJyIc}yPoqVTQ9LSuG2h2b6!LL|Afm#blCd>c6basH*b}#3GBZlKDb1j)@@S$9d6tk zNo8mpQA);;SQ0_fh3@g8;z`-=mTZNE4Rg6+doT312HQA4zl9w%iHxE&#w1A;+K)Da z#wN3H4}I1kA?14)ZRtajW9ZzpHO@23N+4guuiRnhJGhBqg$7pT7<-m&(o)*dniU;`A_JS{brYb81%zJk7Nqc7&O|LxTG{#9wvWV^5q1k z96zzhZbi+*ukuXdm)&O%Tw^nySbO6N@6Z{@g~=(}27F)v?q-+oJlZhK87c|#WwD-+ zp?<2JC)Ryfg|+ZSOmbY7&t-`^VIkK6ijEmzV!WEX3hOODYn|qS<)t}R=)E%5Spt}veVfq`Ok@*h zc^=N>Xgwt=>CsCjI3+)3%S~t6Z`bTqpAQKPw)t{~7JNPMo0tYqi{%N2i999(6h6|w z^zc+pacWev-;vX!llofBI-`GC^4IH~W`cl)r%m(>7sL@lRW~Eba$C`pHktmqX~nv){j1 zHX-jym4TYz1(^vx+>($;CtlP^9G*}+1>OiWBwllIFzLBw<%P6?kGmS$AWOgN}R+#R)Wn(03S>%T_wfRya&+IJiz7t_#atx8Gne{g)has)*pLh;O)X z7=vK!1squt7a>r~QRd-h8PAizNu$|s1(5$To`|4I)?ii|MbL(4w8I-! zF!ic{XB*|B^>VI^`#450R~1hZ))6ssDR1Kv5DNdn<4=KG20X#X5L&_WTks%)R(C+1 zDf)2_(pBE4I}!+4SKUwaC-@EshlX-Wo{f{@--j66K_sZ0`@B*9L`uyU9+VWv#qHG5 z?HUGIg_4;sU#KrALp0L_BJ88aXUpeI5A!ZMHoWEOy#&I^74T0zK-GdBRWpYdA&JYon??EK82Fw+#@ z*P5mS{Y`OzZJK^5O$8HU#GNnun*xKFrp5lIz(J&j!KO6BQY19Wc`}h=u+2Giv^_G zIe*04N4(KM8?#JyQnP$SA;i*l!hnX_A@DQFKw8DEC#}81f0?*)T}EQQI(9v9M8=l$zg0HKl(VKlskiDS!UHTJ+#q@v1&|7$fr** zY%pff4E$Npv-G=*AA|DQqQ$R`xPM(MC-Cq8P~IFLY^=x^%Vs~gV_v7#Hkn=;yytD} zHZbOodbVywj(DKua0@Q=$aTewyDq-??lk#GJM6k%)NoygbdE^zC`%K(5q=0*c&FY( zUYa;ezA1F(Sh9KhecpbLH~CWOE=$aRIj{029)LF3+2)NL_JmVPr}G)Rq|gZ_<*Vf) z@)xC{Qn8dT4V3bg3ho1?$+5X}jnc^2i!*;*Dpg)KY_W+^U~yZbXsq*h8$dAt?ph#8 zH#gqiTa58P14J!J`Af>%d|1C&Z*D|L?QgaOy$L(dW#Y2k*Ubjsge)jcgFrx1$&CZd HjOYFrv)e*0 diff --git a/recsys/evaluation/__init__.pyc b/recsys/evaluation/__init__.pyc deleted file mode 100644 index 0c3acb92feecc6b7ce41ead6f886938ec678ce78..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 269 zcmYL@U1~xx5QXP@s}`jTa0mCL%?3o;B7G_<{iFCY#3Yz$^CL+V?s8p2H_%BDIxyeN znK=)q{kfU=aQ;+qsg~zGcb+jcz?Rv79)NCP-@=ynD|qdO{G}12Tvwk_lqrTXVR1-C zlPXn?Stc32DePC>pt`4wm-YKL+$tNR+|0xO)QGP8vWj) z$MmU5=s!MQbcHime=OU>kU~DUt>YO_6M{&=Sce3p7uYg)Nb^SlAZ9etQdx-lA={B>mlq zjFRyrGDaTfT2>ov@$0Xw97RUjk&;J|8OJhJc9bWi$>k)E5;@fQEK8)-Gdq$IBxS5j zA{%vig4@|#<}`eH>SDb_>G4xV>*_qE6~+cj6r1U~<@?h}M=4L&m$&6)OmpYQ($1z6 zWjZp>GaHSwaW>=~Y86e~9%#E$t&(wURU%<~)s^?U!@m5keXS#}cCYPrWcx}-UhaMd zX|E%%JLzhvqwlVC_jVuiDf)IkRmC>230B5%A)Y!tcVX3s)I9Tq?h8wxAcuP5g$KZwT($Q#~_8!jTQKZv{I)9>KYkGI0c%&w| zhN3r}lmD|%wTjK$^ok!y z-dUeP3ZUX)kZzCwgd#MdG<(!kr`O8PbY}WYNU#F42AN#8oppE1Z+W;oHzwk9uj$B$ zL%ORRMc@!Hyr}g|%D-XIr;L2SA8hR5Fmw9X%kxcGP<%Udo60O`ky9<0?>s|(L zQ?rNWDjL!`7H7OOldmrF2ABrm)Bv2t+`&xaXam>h^6)iz3?#=OU0xgf@Xx?{g+`Qq z>92s*Ohh_Px-9?;s>B2Qa(I3~f(uValO8(6kbHx9 z6-9neAy*p00rZIS6b{=JOK?s@`2@!@%tBmvfc}q^0ii3?8AbUhdP+Ldnt_$4jWP9fFv*eE83^moo8vx4dvCV4LYK^?ZvMl=L>p4 zn4$ajq$%|0fJ`Mbh@Mcg#&<7+swU8PxXQSaC~n_hXrdBYw-a~yB4d>|*w2~@fo#~} z47Dp=@8)YaveSbBmj=09R<;4OWt3KzO5tHQI>a^ZuA=HfcdyC~4mhC<_y&S;ETTJ_ zPF0q?nla&x8*OntFRigAHh|BfBzZtT1?^q%cB;IoB!Q3UgUXd3G9h^EF6X`#caf7{ zbJwfZLb(H*$1|-}W-BuI3%C6}!{a#4@JLOAbJSE9P4FJN^sFfb=^{rbMm_O`{uuc*RjZVk{^SX=Z}CkHn&O);`5V@_Z<0FYP&m4IL`{jv zyLmdD5#6%ItI|TwC~I_u9-^5FyX6R|YELHRaxyNQ3~q({u2MbVO0~#7Bo&=1o!a>& zgJk?*IbSPZxd`D}f_R}>S|iLSV+pQB3RoHuB{MXI6z z%jil9L*8>3rg<`(po`&QnB+0Z2>d~wsbG_Y04WZWc7W?QU=Du=(z|Fbpm`t7MQSR9 zvh-h;lL0`8owBbQ&4#zN*=#ho%I9>mIpF^@!{^SYiWrKVLn8!N#MvyJ&N<+i;|_Pr-Y;;ku=md5WvUwJ(5d818LKwmB3IT9a6}tq7qUSDiRXTS+mss@A zC?foKAOT1n=sMs~+l9o1)Pt^P+a4qpNUPAT+I9t!8l-jT*0Eg$Rfj_dq8eNPoC7@R z&OlTj_0EiYXCay$^)|-64T$ET)0{_D;=l1}I#@FmQtzZNwl#yTzLJH`v%>bwVUKw5 zZF@WT`kM!v`?j-zojXruGLRHDsK{ z*^zI0%D-RagDmp*i-GC!%r#$Pn}+t4Hr!HTU#>hsU$C`qtzdpUY>ngs8A1etNc-8 z|FrsrisF!5wca0!8ZLeXizYR|2}xbjmipPq-?4&L6}RBjC0QDbHoLHbwi=gA@1Wap z6OJonep#n{#x`w)trdD|lwzj^5rk1f{ zI94c1#u1{G(SA=p#Q=#94^^QuE=`kX(j?ppfC4ui zWkCLBCG;TWf%vjwi{k1%EP5FQIP=bR_kHJzx8&5E51a-3TXEV=QnEY{nM_p>u>C=h z=FuRbCK(h2Q66HQPER|}6o;*#^(VGm!eOD$&tTD{bgfo%<~>$P>~>xc?|#K~88#x% zjfGz4WFU*T4|z6hiID6orko&G3fjPl*WNtc=-PRaZlnm)?Jukl-EK48ZkKdJER-lI zf3B9e@GR>ri5Mj4f8t(k#JcYZZqKo92F&b;b&YA(Jp|+uv2JQPX5Ge!btT>`>uS@i zdzpgFyx+#mbHU6r_NFu}u}9>yro3MA5!J*j2`nH8WQ;w*c?0Ddf~UMgHKr8@2%TfG zNa9@-6O0Lhhgc$UmBcj~#c~<0 Date: Mon, 14 Aug 2017 15:46:43 +0200 Subject: [PATCH 6/7] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 1444df5..a51c658 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ A python library for implementing a recommender system. .. _`Demonstration video is available`: https://youtu.be/tIvQxBfa2d4 --There is also an accompanying `bachelor thesis paper`_ (For those interested) which outlines the background, architecture and discusses the "Folding-in" approach. +- There is also an accompanying `bachelor thesis paper`_ (For those interested) which outlines the background, architecture and discusses the "Folding-in" approach. .. _`bachelor thesis paper`: https://drive.google.com/file/d/0BylQe2cRVWE_RmZoUTJYSGZNaXM/view From 986058d9afd677069a9e52be3b2d270d4b146318 Mon Sep 17 00:00:00 2001 From: Ibrahim Abou Elseoud Date: Mon, 14 Aug 2017 16:33:37 +0200 Subject: [PATCH 7/7] Update README.rst --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index a51c658..d09f9a5 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,9 @@ python-recsys A python library for implementing a recommender system. -- Now supports incrementally adding new users or items instead of building the model from scratch for these new users or items via the folding-in technique which was mentioned in Sarwar et al.'s `paper`_ (Titled: Incremental Singular Value Decomposition Algorithms for Highly Scalable Recommender Systems), this latest commit is simply an implementation to it for python-recsys. +Incremental SVD update for python-recsys +======================================== +- python-recsys now supports incrementally adding new users or items instead of building the model from scratch for these new users or items via the folding-in technique which was mentioned in Sarwar et al.'s `paper`_ (Titled: Incremental Singular Value Decomposition Algorithms for Highly Scalable Recommender Systems), this latest commit is simply an implementation to it for python-recsys. .. _`paper`: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.3.7894&rep=rep1&type=pdf @@ -16,7 +18,6 @@ A python library for implementing a recommender system. .. _`bachelor thesis paper`: https://drive.google.com/file/d/0BylQe2cRVWE_RmZoUTJYSGZNaXM/view - Installation ============ @@ -166,7 +167,6 @@ Example (1131, 5.4941438045650068), (2339, 5.4916048051511659)] - Example for incremental update ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1. Load Movielens dataset and prepare for training and testing: