Skip to content

Commit

Permalink
Merge pull request #96 from AaronWChen/vector-db-1/init-pyvespa
Browse files Browse the repository at this point in the history
BGEM3 embedding test
  • Loading branch information
AaronWChen authored Jul 30, 2024
2 parents e77c209 + b731810 commit c1356a4
Show file tree
Hide file tree
Showing 3 changed files with 1,326 additions and 554 deletions.
30 changes: 16 additions & 14 deletions nbs/13_new_preproc_test.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2842,8 +2842,8 @@
"\n",
"artifacts = {'sklearn_model': sklearn_model_path,\n",
" 'sklearn_transformer': sklearn_transformer_path,\n",
" 'transformed_recipes': transformed_recipes_path,\n",
" # 'combined_data': combined_df_path\n",
" # 'transformed_recipes': transformed_recipes_path,\n",
" 'combined_data': combined_df_path\n",
" }\n"
]
},
Expand Down Expand Up @@ -3400,10 +3400,12 @@
" print('Input Data Shape: ', end='\\n')\n",
" print(model_input.shape)\n",
"\n",
" random_sample = model_input.sample(3, random_state=200)\n",
"\n",
" print('\\n')\n",
" print('-' * 80)\n",
" print('Random 3 Records from Input Data: ', end='\\n')\n",
" print(model_input.sample(3, random_state=200))\n",
" print(random_sample)\n",
"\n",
" # Do fit transform on data\n",
" response = sklearn_transformer.fit_transform(tqdm(model_input)) \n",
Expand Down Expand Up @@ -3431,24 +3433,24 @@
" # artifacts=artifacts\n",
" # )\n",
"\n",
" # combined_df = pd.concat(\n",
" # [transformed_recipe,\n",
" # whole_nlp_df\n",
" # ]\n",
" # , axis=1)\n",
" # print('\\n')\n",
" # print('-' * 80)\n",
" # print('Combined Data:', end='\\n')\n",
" # print(combined_df.head())\n",
" combined_df = pd.concat(\n",
" [transformed_recipe,\n",
" random_sample\n",
" ]\n",
" , axis=1)\n",
" print('\\n')\n",
" print('-' * 80)\n",
" print('Random Sample of Combined Data:', end='\\n')\n",
" print(combined_df.head())\n",
"\n",
" with open(sklearn_transformer_path, \"wb\") as fo:\n",
" pickle.dump(sklearn_transformer, fo)\n",
" \n",
" with open(transformed_recipes_path, \"wb\") as fo:\n",
" pickle.dump(transformed_recipe, fo)\n",
" \n",
" # with open(combined_df_path, 'wb') as fo:\n",
" # pickle.dump(combined_df, fo)\n",
" with open(combined_df_path, 'wb') as fo:\n",
" pickle.dump(combined_df, fo)\n",
"\n",
"\n",
" model_info = mlflow.pyfunc.log_model( \n",
Expand Down
Loading

0 comments on commit c1356a4

Please sign in to comment.