Skip to content

Commit

Permalink
Update for version 1.4.7
Browse files Browse the repository at this point in the history
Fix numpy version incompatibility, which caused FTRL model serialization errors with newer Numpy versions.

Add apply_groupby.py, which distributes Pandas groupby calls
  • Loading branch information
anttttti committed May 20, 2021
1 parent e2be83b commit 6299a37
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 28 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
===============
Wordbatch 1.4.6
Wordbatch 1.4.7
===============

Overview
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

setup(
name='Wordbatch',
version='1.4.6',
version='1.4.7',
description='Python library for distributed AI processing pipelines, using swappable scheduler backends',
url='https://github.com/anttttti/Wordbatch',
author='Antti Puurula',
Expand Down
2 changes: 1 addition & 1 deletion wordbatch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
PACKAGE_DIR = os.path.dirname(os.path.abspath(__file__))
__version__ = '1.4.6'
__version__ = '1.4.7'

12 changes: 6 additions & 6 deletions wordbatch/models/fm_ftrl.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ cdef void update_single(int* inds, double* vals, int lenn, double e, double ialp
n_fm[i] += e2

cdef class FM_FTRL:
cdef double[:] w
cdef double[:] z
cdef double[:] n
cdef double[:] w_fm
cdef double[:] z_fm
cdef double[:] n_fm
cdef const double[:] w
cdef const double[:] z
cdef const double[:] n
cdef const double[:] w_fm
cdef const double[:] z_fm
cdef const double[:] n_fm

cdef unsigned int threads
cdef unsigned int iters
Expand Down
6 changes: 3 additions & 3 deletions wordbatch/models/ftrl.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ cdef void update_single(int* inds, double* vals, int lenn, double e, double ialp
n[i]+= g2

cdef class FTRL:
cdef double[:] w
cdef double[:] z
cdef double[:] n
cdef const double[:] w
cdef const double[:] z
cdef const double[:] n

cdef unsigned int threads
cdef unsigned int iters
Expand Down
6 changes: 3 additions & 3 deletions wordbatch/models/ftrl32.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ cdef void update_single(int* inds, double* vals, int lenn, double e, double ialp
n[i]+= g2

cdef class FTRL32:
cdef float[:] w
cdef float[:] z
cdef float[:] n
cdef const float[:] w
cdef const float[:] z
cdef const float[:] n

cdef unsigned int threads
cdef unsigned int iters
Expand Down
10 changes: 5 additions & 5 deletions wordbatch/models/nn_relu_h1.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ cdef void update_single(int* inds, double* vals, int lenn, int D, int D_nn, doub
c1[j]+= fabs(dldw1)

cdef class NN_ReLU_H1:
cdef double[:] w0
cdef double[:] w1
cdef double[:] z
cdef double[:] c0
cdef double[:] c1
cdef const double[:] w0
cdef const double[:] w1
cdef const double[:] z
cdef const double[:] c0
cdef const double[:] c1

cdef unsigned int threads
cdef unsigned int iters
Expand Down
16 changes: 8 additions & 8 deletions wordbatch/models/nn_relu_h2.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ cdef void update_single(int* inds, double* vals, int lenn, int D, int D_nn, i
c2[k] += fabs(dldw2)

cdef class NN_ReLU_H2:
cdef double[:] w0
cdef double[:] w1
cdef double[:] w2
cdef double[:] z1
cdef double[:] z2
cdef double[:] c0
cdef double[:] c1
cdef double[:] c2
cdef const double[:] w0
cdef const double[:] w1
cdef const double[:] w2
cdef const double[:] z1
cdef const double[:] z2
cdef const double[:] c0
cdef const double[:] c1
cdef const double[:] c2

cdef unsigned int threads
cdef unsigned int iters
Expand Down
34 changes: 34 additions & 0 deletions wordbatch/pipelines/apply_groupby.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pandas as pd
from wordbatch.pipelines import Apply

class ApplyGroupBy(object):
def __init__(self, batcher, function, group, rows_per_bin= 200, cache=None, vectorize=None, args=[], kwargs={}):
self.batcher= batcher
self.function= function
self.group= group
self.rows_per_bin = rows_per_bin
self.cache= cache
self.vectorize= vectorize
self.args= [args]
self.kwargs= [kwargs]

def fit(self, data, input_split= False):
return self

def fit_transform(self, data, input_split= False, merge_output= True):
return self.transform(data, input_split, merge_output)

def transform(self, data, input_split= False, merge_output= True):
bin_ids = data[self.group].unique()
group_bins= {x:1 for x in bin_ids} if len(bin_ids) <= self.rows_per_bin else \
{x[0]: x[1] for x in zip(bin_ids, pd.qcut(bin_ids, len(bin_ids) // self.rows_per_bin))}
group_bin_col = data[self.group].map(group_bins)
bin_ids, groups = zip(*data.groupby(group_bin_col, as_index=False))
t= [x for x in Apply(self.function, self.batcher, *self.args, *self.kwargs, self.cache,
self.vectorize).transform(groups, input_split, merge_output)
if len(x) > 0]
try:
t= pd.concat(t, sort=False) # t is Series or DataFrame
except:
t= [item for sublist in t for item in sublist] # t is some iterable
return t

0 comments on commit 6299a37

Please sign in to comment.