Skip to content

Commit

Permalink
Adding missing scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
AaronWChen committed Mar 21, 2024
1 parent 6950153 commit 2161e1d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 16 deletions.
10 changes: 5 additions & 5 deletions src/custom_sklearn_text_transformer_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,20 +390,20 @@ def prepare_stanza_pipeline(
# return ngrams_per_line

@classmethod
def ngrams_maker(min_ngram_length, max_ngram_length):
def ngrams_per_line(row):
def ngram_maker(self, min_ngram_length: int, max_ngram_length: int):
def ngrams_per_line(row: str):
for ln in row.split(" brk "):
at_least_two_english_characters_whole_words = r"(?u)\b\w{2,}\b"
terms = re.findall(at_least_two_english_characters_whole_words, ln)
for ngramLength in range(min_ngram_length, max_ngram_length + 1):
for ngram_length in range(min_ngram_length, max_ngram_length + 1):

# find and return all ngrams
# for ngram in zip(*[terms[i:] for i in range(3)]):
# <-- solution without a generator (works the same but has higher memory usage)
for ngram in (
word
for i in range(len(terms) - ngramLength + 1)
for word in (" ".join(terms[i : i + ngramLength]),)
for i in range(len(terms) - ngram_length + 1)
for word in (" ".join(terms[i : i + ngram_length]),)
):
yield ngram

Expand Down
21 changes: 10 additions & 11 deletions src/custom_stanza_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ class CustomSKLearnWrapper(mlflow.pyfunc.PythonModel):
custom PythonModel
"""

# def __init__(self, model):
# """
# Constructor method. Initializes the model with a Stanza libary language
# type. The default is "en" for English
def __init__(self, model):
"""
Constructor method. Initializes the model with a Stanza libary language
type. The default is "en" for English
# model: sklearn.Transformer
# The sklearn text Transformer or Pipeline that ends in a
# Transformer
model: sklearn.Transformer
The sklearn text Transformer or Pipeline that ends in a
Transformer
# later can add functionality to include pretrained models needed for Stanza
later can add functionality to include pretrained models needed for Stanza
# """
# self.model = model
"""
self.model = model

def load_context(self, context):
"""
Expand Down Expand Up @@ -74,7 +74,6 @@ def predict(self, context, model_input, params):
print(model_input.shape)
print(model_input.sample(3, random_state=200))

# response = self.sklearn_transformer.transform(model_input)
response = self.sklearn_transformer.transform(model_input.values)

transformed_recipe = pd.DataFrame(
Expand Down

0 comments on commit 2161e1d

Please sign in to comment.