From e2afe8cff849c7540d1eca1896bfbf91ea04b6e6 Mon Sep 17 00:00:00 2001 From: shuix007 <39778055+shuix007@users.noreply.github.com> Date: Mon, 28 Oct 2019 20:17:58 -0500 Subject: [PATCH 1/9] Add 1 vs k prediction --- python-package/SLIM/core.py | 105 +++++++++++++++++++++++++++++------- 1 file changed, 87 insertions(+), 18 deletions(-) diff --git a/python-package/SLIM/core.py b/python-package/SLIM/core.py index 772fde5..936d255 100755 --- a/python-package/SLIM/core.py +++ b/python-package/SLIM/core.py @@ -516,12 +516,15 @@ def mselect(self, params, trndata, tstdata, arrayl1, arrayl2, nrcmds): raise RuntimeError( 'Something went wrong with model estimation or evaluation when l1=%.4f, l2=%.4f. Please check the input matrix.' % (bestl1HR, bestl2HR)) - def predict(self, data, nrcmds=10, outfile=None): + def predict(self, data, nrcmds=10, outfile=None, negitems=None, nnegs=0): ''' @brief predict using the learned SLIM model - @params data: a SLIMatrix object to be predicted - nrcmds: number of recommended items for each user - outfile: a filename to dump the topn lists - @return an numpy ndarray of shape (nUsers, nrcmds) + @params data: a SLIMatrix object to be predicted + nrcmds: number of recommended items for each user + outfile: a filename to dump the topn lists + negitems: negative items + nnegs: number of negative items + @return out: an numpy ndarray of shape (nUsers, nrcmds) with recommended item ids + outscores: an numpy ndarray of shape (nUsers, nrcmds) with recommended scores of the corresponding items ''' if self.ismodel != SLIM_OK: raise TypeError("Model not found. Please train a model.") @@ -531,29 +534,79 @@ def predict(self, data, nrcmds=10, outfile=None): # initialize the result matrix res = np.full(data.nUsers * nrcmds, -1, dtype=np.int32) - - rstatus = self._slim_predict( - nrcmds, - self.handle, - data.handle, - res) - + scores = np.zeros(data.nUsers * nrcmds, dtype=np.float32) + + if negitems != None: + assert nnegs >= nrcmds, \ + 'The number of negative items must be larger than the number of items to be recommended.' + + if isinstance(data.user2id, dict): + assert data.user2id.keys() == negitems.keys(), \ + 'The users in the negative items should be the same with the input matrix.' + else: + assert np.array_equal(data.user2id, np.array(sorted(list(negitems.keys())))), \ + 'The users in the negative items should be the same with the input matrix.' + + slim_negitems = np.full(data.nUsers * nnegs, -1, dtype=np.int32) + nusers = 0 + newitems = 0 + for key, value in negitems.items(): + assert len(value) == nnegs, \ + 'The number of negative items should match nngs.' + for i in range(nnegs): + try: + slim_negitems[nusers * nnegs + i] = self.item2id[value[i]] + except: + newitems += 1 + nusers += 1 + + if newitems > 0: + print('%d negative items not in the training set.' % (newitems)) + + rstatus = self._slim_predict_1vsk( + nrcmds, + nnegs, + self.handle, + data.handle, + slim_negitems, + res, + scores) + + else: + rstatus = self._slim_predict( + nrcmds, + self.handle, + data.handle, + res, + scores) + if rstatus == SLIM_OK: res = self.id2item[res].reshape(data.nUsers, nrcmds) - out = {} - for key, value in data.user2id.items(): - out[key] = res[value, :] + scores = scores.reshape(data.nUsers, nrcmds) + out = dict() + outscores = dict() + + if isinstance(data.user2id, dict): + for key, value in data.user2id.items(): + out[key] = res[value, :] + outscores[key] = scores[value, :] + else: + for key in data.user2id: + out[key] = res[key, :] + outscores[key] = scores[key, :] if outfile: f = open(outfile, 'w') for key, value in out.items(): f.write(str(key) + ': ' + np.array2string(value, max_line_width=np.inf) + '\n') + f.write(str(key) + ': ' + np.array2string(outscores[key], + max_line_width=np.inf) + '\n') else: raise RuntimeError( 'Something went wrong during prediction. Please check 1) if the model is estimated correctly; 2) if the input matrix for prediction is correct.') - return out + return out, outscores def save_model(self, modelfname, mapfname): # save the model if there is one @@ -634,10 +687,26 @@ def _get_slim(self): argtypes=[c_int, # nrcmds c_void_p, # slimhandle c_void_p, # trnhandle - array_1d_int32_t # output + array_1d_int32_t, # output + array_1d_float32_t # scores ] ) - + + # access Py_SLIM_Predict_1vsk from libslim.so + self._slim_predict_1vsk = wrap_function( + slimlib, + "Py_SLIM_Predict_1vsk", + restype=c_int32, # resmat + argtypes=[c_int, # nrcmds + c_int, # nnegs + c_void_p, # slimhandle + c_void_p, # trnhandle + array_1d_int32_t, # negitems + array_1d_int32_t, # output + array_1d_float32_t # scores + ] + ) + # access Py_csr_save from libslim.so self._slim_save = wrap_function( slimlib, From 3f37823b107ad44369828688fb80dbe1cbf9aff4 Mon Sep 17 00:00:00 2001 From: shuix007 <39778055+shuix007@users.noreply.github.com> Date: Tue, 29 Oct 2019 14:56:13 -0500 Subject: [PATCH 2/9] Add 1 vs k prediction --- src/libslim/predict.c | 62 ++++++++++++++++++++++++++++++++ src/libslim/proto.h | 4 +++ src/libslim/pyapi.c | 82 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 147 insertions(+), 1 deletion(-) diff --git a/src/libslim/predict.c b/src/libslim/predict.c index 1849651..e90cd90 100644 --- a/src/libslim/predict.c +++ b/src/libslim/predict.c @@ -69,3 +69,65 @@ int32_t GetRecommendations(params_t *params, gk_csr_t *smat, int32_t nratings, return nrcmds; } + + +/**************************************************************************/ +/*! Get the top-N recommendations given the provided historical data */ +/**************************************************************************/ +int32_t GetRec_1vsk(params_t *params, gk_csr_t *smat, int32_t nratings, + int32_t *itemids, float *ratings, int32_t nrcmds, + int32_t *rids, float *rscores, int32_t nnegs, + int32_t *negitems) { + ssize_t j; + int32_t iR, i, k, ncols, ncand; + ssize_t *rowptr; + int32_t *rowind, *marker; + float *rowval, rating; + gk_fkv_t *cand; + + ncols = smat->ncols; + rowptr = smat->rowptr; + rowind = smat->rowind; + rowval = smat->rowval; + + marker = gk_i32smalloc(ncols, -2, "marker"); + cand = gk_fkvmalloc(ncols, "cand"); + + ncand = 0; + for (iR = 0; iR < nnegs; iR++) { + cand[ncand].val = negitems[iR]; + cand[ncand].key = 0.0; + if (negitems[iR] >= 0 && negitems[iR] < ncols) { + marker[negitems[iR]] = ncand++; + } + else { + ncand++; + } + } + + for (iR = 0; iR < nratings; iR++) { + i = itemids[iR]; + if (i >= ncols && i < 0) + continue; + + rating = (ratings ? ratings[iR] : 1.0); + for (j = rowptr[i]; j < rowptr[i + 1]; j++) { + k = rowind[j]; + if (marker[k] == -2) + continue; /* part of the history */ + + cand[marker[k]].key += rating * rowval[j]; + } + } + + gk_fkvsortd(ncand, cand); + + nrcmds = gk_min(ncand, nrcmds); + for (iR = 0; iR < nrcmds; iR++) { + rids[iR] = cand[iR].val; + rscores[iR] = cand[iR].key; + } + + gk_free((void **)&marker, &cand, LTERM); + return nrcmds; +} \ No newline at end of file diff --git a/src/libslim/proto.h b/src/libslim/proto.h index c87d005..5ade99b 100644 --- a/src/libslim/proto.h +++ b/src/libslim/proto.h @@ -34,6 +34,10 @@ double ComputeAvgZeroScore(gk_csr_t *mat, double *x, double *y, int32_t ntop); int32_t GetRecommendations(params_t *params, gk_csr_t *smat, int32_t nratings, int32_t *itemids, float *ratings, int32_t nrcmds, int32_t *rids, float *rscores); +int32_t GetRec_1vsk(params_t *params, gk_csr_t *smat, int32_t nratings, + int32_t *itemids, float *ratings, int32_t nrcmds, + int32_t *rids, float *rscores, int32_t nnegs, + int32_t *negitems); /* timing.c */ void InitTimers(params_t *params); diff --git a/src/libslim/pyapi.c b/src/libslim/pyapi.c index 3bbd00a..fbe8a21 100644 --- a/src/libslim/pyapi.c +++ b/src/libslim/pyapi.c @@ -392,17 +392,96 @@ int32_t Py_SLIM_GetTopN(slim_t *model, int32_t nratings, int32_t *itemids, return nrcmds; } +int32_t Py_SLIM_GetTopN_1vsk(slim_t *model, int32_t nratings, int32_t *itemids, + float *ratings, int32_t nrcmds, int32_t *rids, + float *rscores, int32_t nnegs, int32_t *negitems, + int32_t dbglvl) { + params_t params; + gk_csr_t *smat; + + /* setup params */ + memset((void *)¶ms, 0, sizeof(params_t)); + + params.dbglvl = dbglvl; + + InitTimers(¶ms); + + /* get the model in the internal form */ + smat = (gk_csr_t *)model; + + /* get the recommendations */ + gk_startwctimer(params.TotalTmr); + nrcmds = GetRec_1vsk(¶ms, smat, nratings, itemids, ratings, nrcmds, + rids, rscores, nnegs, negitems); + gk_stopwctimer(params.TotalTmr); + + if (nrcmds < 0) + return SLIM_ERROR; + else + return nrcmds; +} + +/**************************************************************************/ +/*! @brief predict topn lists + @param nrcmds number of items to be recommended + nnegs number of negative items + slimhandle handle to the training matrix + trnhandle integer training options + negitems pointer to the negative items + output pointer to the output lists + scores pointer to the output scores + @return a flag indicating whether the function succeed +*/ +/**************************************************************************/ +int32_t Py_SLIM_Predict_1vsk(int32_t nrcmds, int32_t nnegs, slim_t *slimhandle, slim_t *trnhandle, + int32_t *negitems, int32_t *output, float *scores) { + int32_t iU, iR, n, nvalid = 0; + int32_t *rids; + float *rscores; + gk_csr_t *model, *trnmat; + + model = (gk_csr_t *)slimhandle; + trnmat = (gk_csr_t *)trnhandle; + + rids = gk_i32malloc(nrcmds, "rids"); + rscores = gk_fmalloc(nrcmds, "rscores"); + + for (iU = 0; iU < trnmat->nrows; iU++) { + n = Py_SLIM_GetTopN_1vsk( + model, trnmat->rowptr[iU + 1] - trnmat->rowptr[iU], + trnmat->rowind + trnmat->rowptr[iU], + (trnmat->rowval ? trnmat->rowval + trnmat->rowptr[iU] : NULL), nrcmds, + rids, rscores, nnegs, negitems + iU * nnegs, 0); + + if (n != SLIM_ERROR) { + for (iR = 0; iR < n; iR++) { + output[iU * nrcmds + iR] = rids[iR]; + scores[iU * nrcmds + iR] = rscores[iR]; + // printf("id: %d", rids[iR]); + } + nvalid += 1; + // printf("---\n"); + } + } + if (nvalid < 1) { + return SLIM_ERROR; + } else { + return SLIM_OK; + } +} + /**************************************************************************/ /*! @brief predict topn lists @param nrcmds number of items to be recommended slimhandle handle to the training matrix trnhandle integer training options output pointer to the output lists + scores pointer to the output scores @return a flag indicating whether the function succeed */ /**************************************************************************/ int32_t Py_SLIM_Predict(int32_t nrcmds, slim_t *slimhandle, slim_t *trnhandle, - int32_t *output) { + int32_t *output, float *scores) { int32_t iU, iR, n, nvalid = 0; int32_t *rids; float *rscores; @@ -424,6 +503,7 @@ int32_t Py_SLIM_Predict(int32_t nrcmds, slim_t *slimhandle, slim_t *trnhandle, if (n != SLIM_ERROR) { for (iR = 0; iR < n; iR++) { output[iU * nrcmds + iR] = rids[iR]; + scores[iU * nrcmds + iR] = rscores[iR]; } nvalid += 1; } From 3b0a1e75cc851e2264069433685083f7e8979fe7 Mon Sep 17 00:00:00 2001 From: shuix007 <39778055+shuix007@users.noreply.github.com> Date: Mon, 4 Nov 2019 12:41:28 -0600 Subject: [PATCH 3/9] Add exportation to scipy csr --- python-package/SLIM/core.py | 73 ++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/python-package/SLIM/core.py b/python-package/SLIM/core.py index 936d255..032b848 100755 --- a/python-package/SLIM/core.py +++ b/python-package/SLIM/core.py @@ -516,7 +516,7 @@ def mselect(self, params, trndata, tstdata, arrayl1, arrayl2, nrcmds): raise RuntimeError( 'Something went wrong with model estimation or evaluation when l1=%.4f, l2=%.4f. Please check the input matrix.' % (bestl1HR, bestl2HR)) - def predict(self, data, nrcmds=10, outfile=None, negitems=None, nnegs=0): + def predict(self, data, nrcmds=10, outfile=None, negitems=None, nnegs=0, returnscores=False): ''' @brief predict using the learned SLIM model @params data: a SLIMatrix object to be predicted nrcmds: number of recommended items for each user @@ -600,15 +600,21 @@ def predict(self, data, nrcmds=10, outfile=None, negitems=None, nnegs=0): for key, value in out.items(): f.write(str(key) + ': ' + np.array2string(value, max_line_width=np.inf) + '\n') - f.write(str(key) + ': ' + np.array2string(outscores[key], - max_line_width=np.inf) + '\n') + if returnscores: + f.write(str(key) + ': ' + np.array2string(outscores[key], + max_line_width=np.inf) + '\n') else: raise RuntimeError( 'Something went wrong during prediction. Please check 1) if the model is estimated correctly; 2) if the input matrix for prediction is correct.') - - return out, outscores + + return out, outscores if returnscores else out def save_model(self, modelfname, mapfname): + ''' @brief save the model + @params modelfname: filename to save the model + mapfname: filename to save the item map + @return None + ''' # save the model if there is one if self.ismodel == SLIM_OK: self._slim_save(self.handle, c_char_p(modelfname.encode('utf-8'))) @@ -617,6 +623,11 @@ def save_model(self, modelfname, mapfname): raise RuntimeError("Not exist a model to save.") def load_model(self, modelfname, mapfname): + ''' @brief load a model + @params modelfname: filename of the model + mapfname: filename of the item map + @return None + ''' # if there is a model, destruct the model if self.ismodel == SLIM_OK: self._slim_free(self.handle) @@ -636,7 +647,35 @@ def load_model(self, modelfname, mapfname): if self.ismodel != SLIM_OK: raise RuntimeError("Fail to laod the model.") - + + def to_csr(self, returnmap=False): + ''' @brief export the model as a scipy csr + @params returnmap: return the map or not + @return modelcsr: the model as a scipy csr + itemmap (optional): the item map attached with the model + ''' + if self.ismodel == SLIM_OK: + nnz = c_int(0) + self._slim_stat(self.handle, byref(nnz)) + + indptr = np.zeros(self.nItems + 1, dtype=np.int32) + indices = np.zeros(nnz.value, dtype=np.int32) + data = np.ones(nnz.value, dtype=np.float32) + + self._slim_export(self.handle, indptr, indices, data) + + modelcsr = csr_matrix((data, indices, indptr), shape=(self.nItems, self.nItems)) + + if returnmap: + itemmap = self.id2item[:] + return modelcsr, itemmap + else: + return modelcsr + else: + raise RuntimeError("Not exist a model to export.") + + + def _get_slim(self): ''' @brief wrap up slim functions from c library for python @params None @@ -735,3 +774,25 @@ def _get_slim(self): argtypes=[c_void_p # mathandle ] ) + + # access Py_csr_stat from libslim.so + self._slim_stat = wrap_function( + slimlib, + "Py_csr_stat", + restype=c_int32, # flag + argtypes=[c_void_p, # mathandle + c_void_p # nnz + ] + ) + + # access Py_csr_stat from libslim.so + self._slim_export = wrap_function( + slimlib, + "Py_csr_export", + restype=c_int32, # flag + argtypes=[c_void_p, # mathandle + array_1d_int32_t, # indptr + array_1d_int32_t, # indices + array_1d_float32_t # data + ] + ) \ No newline at end of file From 09ca88b0477c91b9b3f1651dbd7f5c1e4b5d31d1 Mon Sep 17 00:00:00 2001 From: shuix007 <39778055+shuix007@users.noreply.github.com> Date: Mon, 4 Nov 2019 12:42:19 -0600 Subject: [PATCH 4/9] Add exportation to scipy csr --- src/libslim/pyapi.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/libslim/pyapi.c b/src/libslim/pyapi.c index fbe8a21..1b35733 100644 --- a/src/libslim/pyapi.c +++ b/src/libslim/pyapi.c @@ -75,6 +75,53 @@ int32_t Py_csr_free(slim_t *mathandle) { return SLIM_OK; } +/**************************************************************************/ +/*! @brief get the statistics (nnz) of the csr model + @param mathandle handle to the matrix + nnz number of non-zeros in the model + @return a flag indicating whether the function succeed +*/ +/**************************************************************************/ +int32_t Py_csr_stat(slim_t *mathandle, int32_t *nnz) { + gk_csr_t *mat = (gk_csr_t *)mathandle; + *nnz = mat->rowptr[mat->nrows]; + return SLIM_OK; +} + + +/**************************************************************************/ +/*! @brief export the gk_csr matrix to a scipy csr matrix + @param mathandle handle to the matrix + indptr index pointer of the scipy csr matrix + indices index of the scipy csr matrix + data data of the scipy csr matrix + @return a flag indicating whether the function succeed +*/ +/**************************************************************************/ +int32_t Py_csr_export(slim_t *mathandle, int32_t *indptr, int32_t *indices, float *data) { + int32_t nrows, nnz; + + gk_csr_t *mat = (gk_csr_t *)mathandle; + nrows = mat->nrows; + nnz = mat->rowptr[mat->nrows]; + + for (int i = 0; i < nrows + 1; i++) { + indptr[i] = mat->rowptr[i]; + } + + for (int i = 0; i < nnz; i++) { + indices[i] = mat->rowind[i]; + } + + if (mat->rowval) { + for (int i = 0; i < nnz; i++) { + data[i] = mat->rowval[i]; + } + } + + return SLIM_OK; +} + /**************************************************************************/ /*! @brief estimate a slim model and return the model handle to python @param trnhandle handle to the training matrix From 0bd88b46b9792b7247f96a5e2bca6462539e83b1 Mon Sep 17 00:00:00 2001 From: shuix007 <39778055+shuix007@users.noreply.github.com> Date: Mon, 4 Nov 2019 12:55:12 -0600 Subject: [PATCH 5/9] Add exportation to scipy csr --- python-package/SLIM/core.py | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/python-package/SLIM/core.py b/python-package/SLIM/core.py index 032b848..5493bfb 100755 --- a/python-package/SLIM/core.py +++ b/python-package/SLIM/core.py @@ -6,6 +6,7 @@ @author: dminerx007 """ +import os import site import time import scipy @@ -629,24 +630,27 @@ def load_model(self, modelfname, mapfname): @return None ''' # if there is a model, destruct the model - if self.ismodel == SLIM_OK: - self._slim_free(self.handle) + if os.path.isfile(modelfname) and os.path.isfile(mapfname): + if self.ismodel == SLIM_OK: + self._slim_free(self.handle) + else: + self.handle = c_void_p() + self.ismodel = self._slim_load( + byref(self.handle), c_char_p(modelfname.encode('utf-8'))) + + try: + self.id2item = np.genfromtxt(mapfname, dtype=np.int32) + except: + self.id2item = np.genfromtxt(mapfname) + self.item2id = {} + for i in range(len(self.id2item)): + self.item2id[self.id2item[i]] = i + self.nItems = len(self.id2item) + + if self.ismodel != SLIM_OK: + raise RuntimeError("Fail to laod the model.") else: - self.handle = c_void_p() - self.ismodel = self._slim_load( - byref(self.handle), c_char_p(modelfname.encode('utf-8'))) - - try: - self.id2item = np.genfromtxt(mapfname, dtype=np.int32) - except: - self.id2item = np.genfromtxt(mapfname) - self.item2id = {} - for i in range(len(self.id2item)): - self.item2id[self.id2item[i]] = i - self.nItems = len(self.id2item) - - if self.ismodel != SLIM_OK: - raise RuntimeError("Fail to laod the model.") + raise RuntimeError('File does not exist or invalid filename.') def to_csr(self, returnmap=False): ''' @brief export the model as a scipy csr From 61a793cbf60b637cc195b2f934a8784493deeaa7 Mon Sep 17 00:00:00 2001 From: shuix007 <39778055+shuix007@users.noreply.github.com> Date: Tue, 5 Nov 2019 18:01:46 -0600 Subject: [PATCH 6/9] Add exportation to scipy csr --- python-package/SLIM/core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python-package/SLIM/core.py b/python-package/SLIM/core.py index 5493bfb..6c63bba 100755 --- a/python-package/SLIM/core.py +++ b/python-package/SLIM/core.py @@ -608,7 +608,10 @@ def predict(self, data, nrcmds=10, outfile=None, negitems=None, nnegs=0, returns raise RuntimeError( 'Something went wrong during prediction. Please check 1) if the model is estimated correctly; 2) if the input matrix for prediction is correct.') - return out, outscores if returnscores else out + if returnscores: + return out, outscores + else: + return out def save_model(self, modelfname, mapfname): ''' @brief save the model From ba9f0a4dd630cdb76fd71c214d116a0b035054f2 Mon Sep 17 00:00:00 2001 From: shuix007 <39778055+shuix007@users.noreply.github.com> Date: Thu, 7 Nov 2019 19:33:53 -0600 Subject: [PATCH 7/9] Add exportation to scipy csr --- python-package/SLIM/core.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python-package/SLIM/core.py b/python-package/SLIM/core.py index 6c63bba..a452c79 100755 --- a/python-package/SLIM/core.py +++ b/python-package/SLIM/core.py @@ -549,18 +549,17 @@ def predict(self, data, nrcmds=10, outfile=None, negitems=None, nnegs=0, returns 'The users in the negative items should be the same with the input matrix.' slim_negitems = np.full(data.nUsers * nnegs, -1, dtype=np.int32) - nusers = 0 + newitems = 0 for key, value in negitems.items(): assert len(value) == nnegs, \ 'The number of negative items should match nngs.' for i in range(nnegs): try: - slim_negitems[nusers * nnegs + i] = self.item2id[value[i]] + slim_negitems[data.user2id[key] * nnegs + i] = self.item2id[value[i]] except: newitems += 1 - nusers += 1 - + if newitems > 0: print('%d negative items not in the training set.' % (newitems)) @@ -802,4 +801,4 @@ def _get_slim(self): array_1d_int32_t, # indices array_1d_float32_t # data ] - ) \ No newline at end of file + ) From e09cc70a3934d10f9d0b54e50a269b7e975fff4b Mon Sep 17 00:00:00 2001 From: George Karypis Date: Fri, 8 Nov 2019 08:49:07 -0800 Subject: [PATCH 8/9] - Added support for negative test in slim_predict. - Re-organized the information that is displayed by the command-line programs. - Fixed mini-help typos. --- Makefile | 8 +-- include/slim.h | 2 +- src/programs/cmdline_learn.c | 23 ++++----- src/programs/cmdline_predict.c | 27 ++++++++-- src/programs/slim_learn.c | 14 +++--- src/programs/slim_predict.c | 90 +++++++++++++++++++++++++++++++--- src/programs/struct.h | 1 + 7 files changed, 127 insertions(+), 38 deletions(-) diff --git a/Makefile b/Makefile index 10af3fa..ab673bc 100644 --- a/Makefile +++ b/Makefile @@ -10,10 +10,10 @@ gklib_path = not-set bcls_path = not-set shared = not-set with_mkl = not-set -cc = /usr/bin/gcc -cxx = /usr/bin/g++ -#cc = gcc-mp-4.9 -#cxx = g++-mp-4.9 +cc = not-set +cxx = not-set +#cc = /usr/bin/gcc +#cxx = /usr/bin/g++ #=============================================================== # There should be no need to modify beyond this point diff --git a/include/slim.h b/include/slim.h index 6a39230..6fba4f1 100644 --- a/include/slim.h +++ b/include/slim.h @@ -50,7 +50,7 @@ typedef void slim_t; * Constant definitions *-------------------------------------------------------------------------*/ /* SLIM's version number */ -#define SLIM_VERSION "2.0pre1" +#define SLIM_VERSION "2.0" /* The maximum length of the options[] array */ #define SLIM_NOPTIONS 40 diff --git a/src/programs/cmdline_learn.c b/src/programs/cmdline_learn.c index 6199b5e..c2f9ec6 100644 --- a/src/programs/cmdline_learn.c +++ b/src/programs/cmdline_learn.c @@ -73,24 +73,18 @@ static char helpstr[][512] = { " csrnv - CSR format without ratings.", " cluto - Format used by CLUTO.", " ijv - One (row#, col#, val) per line.", - " " + " ", " -binarize", " Specifies that the ratings should be binarized.", " ", " -l1r=double", - " Specifies the L1 regularization parameter. The default value is " - "1.0.", - " ", - " -ipmdlfile=string", - " Specifies the file used to initialize the model.", + " Specifies the L1 regularization parameter. The default value is 1.0.", " ", " -l2r=double", - " Specifies the L2 regularization parameter. The default value is " - "1.0.", + " Specifies the L2 regularization parameter. The default value is 1.0.", " ", " -nnbrs=int", - " Selects FSLIM model and specifies the number of item nearest " - "neighbors", + " Selects FSLIM model and specifies the number of item nearest neighbors", " to be used. The default value is 0.", " ", " -simtype=string", @@ -116,12 +110,13 @@ static char helpstr[][512] = { " ", " -nthreads=int", " Specifies the number of threads to be used for estimation.", - " The default value is maximum number of threads available in the " - "machine.", + " The default value is maximum number of threads available in the machine.", + " ", + " -ipmdlfile=string", + " Specifies the file used to initialize the model.", " ", " -dbglvl=int", - " Specifies the debug level. The default value turns on info and " - "timing.", + " Specifies the debug level. The default value turns on info and timing.", " ", " -help", " Prints this message.", diff --git a/src/programs/cmdline_predict.c b/src/programs/cmdline_predict.c index 5e1535f..3d90f96 100644 --- a/src/programs/cmdline_predict.c +++ b/src/programs/cmdline_predict.c @@ -36,17 +36,27 @@ static gk_StringMap_t ifmt_options[] = { static char helpstr[][512] = { " ", " Usage:", - " slim_predict [options] model-file old-file [test-file]", + " slim_predict [options] model-file old-file [test-file] [neg-file]", " ", " Parameters:", " model-file", " The file that stores the model that was generated by slim_learn.", " ", " old-file", - " The file that stores the historical information for each user.", + " The file that stores the historical information for the users", + " for which recommendations are generated.", " ", " test-file", " The file that stores the hidden items for each user.", + " It is only used to evaluate the quality of the recommendations", + " and it should contain a row for each of the users in the old-file.", + " ", + " neg-file", + " The file that stores the negative items for each user.", + " It is used for evaluation purposes as follows: The hidden items", + " and the negative items are predicted, and the nrcmds highest", + " highest scoring items among them are returned as the recommendations.", + " This is list is then used to evaluate the performance.", " ", " Options:", " -ifmt=string", @@ -55,7 +65,7 @@ static char helpstr[][512] = { " csrnv - CSR format without ratings.", " cluto - Format used by CLUTO.", " ijv - One (row#, col#, val) per line.", - " " + " ", " -binarize", " Specifies that the ratings should be binarized.", " ", @@ -98,6 +108,7 @@ params_t *parse_cmdline(int argc, char *argv[]) { params->binarize = 0; params->outfile = NULL; params->tstfile = NULL; + params->negfile = NULL; params->nrcmds = 10; params->dbglvl = 0; @@ -145,7 +156,7 @@ params_t *parse_cmdline(int argc, char *argv[]) { } /* get the datafile */ - if (argc - gk_optind < 1 || argc - gk_optind > 3) { + if (argc - gk_optind < 1 || argc - gk_optind > 4) { for (int i = 0; strlen(shorthelpstr[i]) > 0; i++) printf("%s\n", shorthelpstr[i]); exit(0); @@ -159,11 +170,17 @@ params_t *parse_cmdline(int argc, char *argv[]) { if (!gk_fexists(params->trnfile)) errexit("Input old file %s does not exist.\n", params->trnfile); - if (argc - gk_optind == 1) { + if (argc - gk_optind >= 1) { params->tstfile = gk_strdup(argv[gk_optind++]); if (!gk_fexists(params->tstfile)) errexit("Input test file %s does not exist.\n", params->tstfile); } + if (argc - gk_optind >= 1) { + params->negfile = gk_strdup(argv[gk_optind++]); + if (!gk_fexists(params->negfile)) + errexit("Input negative file %s does not exist.\n", params->negfile); + } + return params; } diff --git a/src/programs/slim_learn.c b/src/programs/slim_learn.c index 1c87744..3d886a7 100644 --- a/src/programs/slim_learn.c +++ b/src/programs/slim_learn.c @@ -33,12 +33,14 @@ int main(int argc, char *argv[]) { "------------------------------------------------------------------\n"); printf(" trnfile: %s, nrows: %d, ncols: %d, nnz: %zd\n", params->trnfile, tmat->nrows, tmat->ncols, tmat->rowptr[tmat->nrows]); - printf(" l1r: %.2le, l2r: %.2le, optTol: %.2le, niters: %d\n", params->l1r, - params->l2r, params->optTol, params->niters); - printf(" binarize: %d, nnbrs: %d, nthreads: %d, dbglvl: %d\n", - params->binarize, params->nnbrs, params->nthreads, params->dbglvl); - printf(" simtype: %s, mdlfile: %s\n", slim_simtypenames[params->simtype], - params->mdlfile); + printf(" l1r: %.2le, l2r: %.2le, binarize: %s\n", params->l1r, + params->l2r, (params->binarize == 0 ? "No" : "Yes")); + printf(" solver: %s, optTol: %.2le, niters: %d\n", + slim_algonames[params->algo], params->optTol, params->niters); + printf(" mdlfile: %s, nthreads: %d, dbglvl: %d\n", + params->mdlfile, params->nthreads, params->dbglvl); + printf(" simtype: %s, nnbrs: %d\n", + slim_simtypenames[params->simtype], params->nnbrs); printf("\nEstimating model...\n"); /* free any user-supplied ratings if set to be ignored */ diff --git a/src/programs/slim_predict.c b/src/programs/slim_predict.c index 3108ae5..a9590bc 100644 --- a/src/programs/slim_predict.c +++ b/src/programs/slim_predict.c @@ -14,14 +14,15 @@ /*************************************************************************/ int main(int argc, char *argv[]) { ssize_t zI; - int32_t iU, iR, nrcmds, nhits[3], ntrue[2]; + int32_t i, iU, iR, nrcmds, ask_nrcmds, ncands, nhits[3], ntrue[2]; int32_t nvalid, nvalid_head, nvalid_tail; float all_hr, head_hr, tail_hr; int is_tail_u, is_head_u; int32_t *rids, *rmarker, *fmarker; + gk_fkv_t *rcands, cand; float *rscores, hr[3], arhr, larhr, baseline; params_t *params; - gk_csr_t *oldmat, *tstmat = NULL, *model; + gk_csr_t *oldmat, *tstmat = NULL, *negmat = NULL, *model; int32_t ioptions[SLIM_NOPTIONS]; FILE *fpout = NULL; @@ -34,6 +35,8 @@ int main(int argc, char *argv[]) { oldmat = gk_csr_Read(params->trnfile, params->ifmt, params->readvals, 0); if (params->tstfile) tstmat = gk_csr_Read(params->tstfile, params->ifmt, params->readvals, 0); + if (params->negfile) + negmat = gk_csr_Read(params->negfile, params->ifmt, params->readvals, 0); printf( "------------------------------------------------------------------\n"); @@ -47,6 +50,9 @@ int main(int argc, char *argv[]) { if (tstmat) printf(" tstfile: %s, nrows: %d, ncols: %d, nnz: %zd\n", params->tstfile, tstmat->nrows, tstmat->ncols, tstmat->rowptr[tstmat->nrows]); + if (negmat) + printf(" negfile: %s, nrows: %d, ncols: %d, nnz: %zd\n", params->negfile, + negmat->nrows, negmat->ncols, negmat->rowptr[negmat->nrows]); if (params->outfile) printf(" outfile: %s\n", (params->outfile ? params->outfile : "No output")); @@ -62,18 +68,25 @@ int main(int argc, char *argv[]) { gk_free((void **)&oldmat->rowval, LTERM); if (tstmat) gk_free((void **)&tstmat->rowval, LTERM); + if (negmat) + gk_free((void **)&negmat->rowval, LTERM); } SLIM_iSetDefaults(ioptions); ioptions[SLIM_OPTION_DBGLVL] = params->dbglvl; - /* predict for each row in oldmat */ if (params->outfile) fpout = gk_fopen(params->outfile, "w", "outfile"); - rids = gk_i32malloc(params->nrcmds, "rids"); - rscores = gk_fmalloc(params->nrcmds, "rscores"); + /* if we are using a negative test, ask for a score for all non-supplied items */ + ask_nrcmds = (negmat ? model->nrows : params->nrcmds); + + /* allocate neccessary arrays */ + rids = gk_i32malloc(ask_nrcmds, "rids"); + rscores = gk_fmalloc(ask_nrcmds, "rscores"); rmarker = (tstmat ? gk_i32smalloc(model->ncols, -1, "rmarker") : NULL); + rcands = (negmat ? gk_fkvmalloc(model->ncols, "rcands") : NULL); + // get head and tail columns, mark 0 for head items and 1 for items in tail fmarker = (tstmat ? SLIM_DetermineHeadAndTail( oldmat->nrows, gk_max(oldmat->ncols, tstmat->ncols), @@ -84,12 +97,72 @@ int main(int argc, char *argv[]) { arhr = 0.0; nvalid = nvalid_head = nvalid_tail = 0; + + /* predict for each row in oldmat */ for (iU = 0; iU < oldmat->nrows; iU++) { nrcmds = SLIM_GetTopN( model, oldmat->rowptr[iU + 1] - oldmat->rowptr[iU], oldmat->rowind + oldmat->rowptr[iU], - (oldmat->rowval ? oldmat->rowval + oldmat->rowptr[iU] : NULL), ioptions, - params->nrcmds, rids, rscores); + (oldmat->rowval ? oldmat->rowval + oldmat->rowptr[iU] : NULL), + ioptions, ask_nrcmds, rids, rscores); + + /* if negative test items, select the params->nrcmds from neg+pos test */ + if (negmat && nrcmds != SLIM_ERROR) { + for (zI = tstmat->rowptr[iU]; zI < tstmat->rowptr[iU + 1]; zI++) + rmarker[tstmat->rowind[zI]] = -2; + for (zI = negmat->rowptr[iU]; zI < negmat->rowptr[iU + 1]; zI++) + rmarker[negmat->rowind[zI]] = -2; + + /* select the neg+pos that were in the recommended list */ + for (ncands=0, iR=0; iRrowptr[iU]; zI < tstmat->rowptr[iU + 1]; zI++) { + if (rmarker[tstmat->rowind[zI]] != -3) { + rcands[ncands].val = tstmat->rowind[zI]; + rcands[ncands].key = 0.0; + ncands++; + } + rmarker[tstmat->rowind[zI]] = -1; + } + for (zI = negmat->rowptr[iU]; zI < negmat->rowptr[iU + 1]; zI++) { + if (rmarker[negmat->rowind[zI]] != -3) { + rcands[ncands].val = negmat->rowind[zI]; + rcands[ncands].key = 0.0; + ncands++; + } + rmarker[negmat->rowind[zI]] = -1; + } + //printf("ncands: %5d,", ncands); + + + /* shuffle prior to sorting */ + for (iR=0; iRnrcmds); + for (iR=0; iR 0 ? 1.0 * nhits[0] / ntrue[0] : 0.0); @@ -171,7 +245,7 @@ int main(int argc, char *argv[]) { "------------------------------------------------------------------\n"); /* clean up */ - gk_free((void **)&rids, &rscores, &rmarker, &fmarker, LTERM); + gk_free((void **)&rids, &rscores, &rmarker, &fmarker, &rcands, LTERM); SLIM_FreeModel((slim_t **)&model); gk_csr_Free(&oldmat); if (tstmat) diff --git a/src/programs/struct.h b/src/programs/struct.h index 6b22bce..bf82693 100644 --- a/src/programs/struct.h +++ b/src/programs/struct.h @@ -17,6 +17,7 @@ the University of Minnesota typedef struct { char *trnfile; /*!< the file of historical preferences */ char *tstfile; /*!< the file to validate the recommendations */ + char *negfile; /*!< the file containing the negative test instances */ char *l12file; /*!< the file that contains the regularization values over which to search */ char *mdlfile; /*!< the model file during prediction */ From 75da5562c5c0042b94d4c15389d931bd737a024d Mon Sep 17 00:00:00 2001 From: George Karypis Date: Fri, 8 Nov 2019 08:51:39 -0800 Subject: [PATCH 9/9] - Added a missing free --- src/programs/slim_predict.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/programs/slim_predict.c b/src/programs/slim_predict.c index a9590bc..889344a 100644 --- a/src/programs/slim_predict.c +++ b/src/programs/slim_predict.c @@ -250,4 +250,6 @@ int main(int argc, char *argv[]) { gk_csr_Free(&oldmat); if (tstmat) gk_csr_Free(&tstmat); + if (negmat) + gk_csr_Free(&negmat); }