diff --git a/models/content-engine/core/domain/Content_Create_Posts_database.ipynb b/models/content-engine/core/domain/Content_Create_Posts_database.ipynb index adafa93..fc1ed4d 100644 --- a/models/content-engine/core/domain/Content_Create_Posts_database.ipynb +++ b/models/content-engine/core/domain/Content_Create_Posts_database.ipynb @@ -371,26 +371,26 @@ " df_init[\"AUTHOR_URL\"] = tmp_df.loc[0, \"AUTHOR_URL\"]\n", " df_init[\"ENGAGEMENTS\"] = df_init[\"LIKES\"] + df_init[\"COMMENTS\"] + df_init[\"SHARES\"]\n", " \n", - " df_init[\"ID\"] = df_init.apply(lambda row: create_sha_256_hash(str(row[\"URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1) \n", + " # Add KG data columns\n", + " col_ref = [\n", + " \"ID\",\n", + " \"CONCEPT\",\n", + " \"SENTIMENT\",\n", + " \"TARGET\",\n", + " \"OBJECTIVE\",\n", + " ] \n", + " for c in col_ref:\n", + " if len(df_init) == 0 and c != \"ID\":\n", + " df[c] = \"TBD\"\n", + " elif not c in df_init.columns:\n", + " df_init[c] = \"TBD\"\n", + " \n", " if len(df_init) > 0:\n", - " # Get meta data from existing people\n", - " col_ref = [\n", - " \"ID\",\n", - " \"CONCEPT\",\n", - " \"SENTIMENT\",\n", - " \"TARGET\",\n", - " \"OBJECTIVE\",\n", - " ]\n", - " for c in col_ref:\n", - " # If columns does not exist, init value to be determined (TBD)\n", - " if not c in df_init.columns:\n", - " df_init[c] = \"TBD\"\n", + " # Merge to get meta data\n", " ref = df_init[col_ref]\n", - " \n", - " # Merge to get meta data\n", - " df = pd.merge(df, ref, on=\"ID\", how=\"left\")\n", - " for c in col_ref:\n", - " df[c] = df[c].fillna(\"TBD\")\n", + " df = pd.merge(df, ref, on=\"ID\", how=\"left\")\n", + " for c in col_ref:\n", + " df[c] = df[c].fillna(\"TBD\")\n", "\n", " # Concat new posts with init\n", " df = pd.concat([df, df_init], axis=0).reset_index(drop=True)\n", diff --git a/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb b/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb index 2b866d4..d2e0487 100644 --- a/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb +++ b/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb @@ -81,13 +81,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "5fad521a-4a18-4dc7-b13d-98a37172715b", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:36.733735Z", + "iopub.status.busy": "2024-04-25T13:47:36.733297Z", + "iopub.status.idle": "2024-04-25T13:47:39.363011Z", + "shell.execute_reply": "2024-04-25T13:47:39.361985Z", + "shell.execute_reply.started": "2024-04-25T13:47:36.733661Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ utils file '/home/ftp/abi/utils/data.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/llm.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/naas_api.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/naas_chat_plugin.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/naas_lab.ipynb' successfully loaded.\n" + ] + } + ], "source": [ "from naas_drivers import gsheet\n", "import pandas as pd\n", @@ -123,9 +142,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "7c34bff6-9136-4aaf-a692-b38129b7de83", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:39.364954Z", + "iopub.status.busy": "2024-04-25T13:47:39.364641Z", + "iopub.status.idle": "2024-04-25T13:47:43.179902Z", + "shell.execute_reply": "2024-04-25T13:47:43.179036Z", + "shell.execute_reply.started": "2024-04-25T13:47:39.364923Z" + }, "papermill": {}, "tags": [ "parameters" @@ -176,13 +202,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "34407369-03a1-45c4-9768-03222224612b", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:43.183375Z", + "iopub.status.busy": "2024-04-25T13:47:43.183186Z", + "iopub.status.idle": "2024-04-25T13:47:43.830739Z", + "shell.execute_reply": "2024-04-25T13:47:43.830016Z", + "shell.execute_reply.started": "2024-04-25T13:47:43.183353Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🗂️ Interactions (init): 0\n" + ] + } + ], "source": [ "df_init = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_interaction)\n", "if not isinstance(df_init, pd.DataFrame):\n", @@ -204,13 +245,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "cdb161fa-8e4b-4294-99f0-59c2c2f203e1", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:43.833689Z", + "iopub.status.busy": "2024-04-25T13:47:43.833253Z", + "iopub.status.idle": "2024-04-25T13:47:44.589126Z", + "shell.execute_reply": "2024-04-25T13:47:44.588456Z", + "shell.execute_reply.started": "2024-04-25T13:47:43.833656Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "- Posts db (init): 1\n" + ] + } + ], "source": [ "df_posts = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_posts)\n", "if not isinstance(df_posts, pd.DataFrame):\n", @@ -230,12 +286,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "4d46b104-7a94-4674-a31c-4b5338d2b243", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.590681Z", + "iopub.status.busy": "2024-04-25T13:47:44.590256Z", + "iopub.status.idle": "2024-04-25T13:47:44.641951Z", + "shell.execute_reply": "2024-04-25T13:47:44.641315Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.590618Z" + }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📁 Files: 1\n", + "⚠️ Limit Date: 2024-04-15\n", + "1- File: /home/ftp/abi/outputs/florent_ravenel/growth-engine/2024-04-25/linkedin_post_reactions.pickle\n", + "👍 Total Reactions: 14\n" + ] + } + ], "source": [ "def get_reactions(\n", " entity_dir,\n", @@ -293,12 +367,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "5bb75165-5195-48a9-9c71-9b648575da46", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.643610Z", + "iopub.status.busy": "2024-04-25T13:47:44.643139Z", + "iopub.status.idle": "2024-04-25T13:47:44.754410Z", + "shell.execute_reply": "2024-04-25T13:47:44.747805Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.643577Z" + }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📁 Files: 1\n", + "⚠️ Limit Date: 2024-04-15\n", + "1- File: /home/ftp/abi/outputs/florent_ravenel/growth-engine/2024-04-25/linkedin_post_comments.pickle\n", + "🗨️ Total Comments: 3\n" + ] + } + ], "source": [ "def get_comments(\n", " entity_dir,\n", @@ -347,12 +439,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "dc7ec5d6-0272-4839-a21c-a4caa6f03f58", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.760041Z", + "iopub.status.busy": "2024-04-25T13:47:44.759805Z", + "iopub.status.idle": "2024-04-25T13:47:44.942913Z", + "shell.execute_reply": "2024-04-25T13:47:44.942342Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.760012Z" + }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🗂️ Interactions: 17\n" + ] + } + ], "source": [ "def handle_time_error(df_init, column):\n", " # Handle NonExistentTimeError\n", @@ -445,34 +552,35 @@ " \n", " \n", " # Histo abi version < 1.14.0\n", - " df_gsheet[\"CONTENT_ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1)\n", - " to_rename = {\n", - " \"DATE_INTERACTION\": \"INTERACTION_DATE\",\n", - " \"INTERACTION\": \"TYPE\",\n", - " \"INTERACTION_SCORE\": \"SCORE\",\n", - " \"INTERACTION_CONTENT\": \"CONTENT\",\n", - " \"COMMENT_SENTIMENT\": \"SENTIMENT\",\n", - " }\n", - " df_gsheet = df_gsheet.rename(columns=to_rename)\n", - " to_add = {\n", - " \"COMMENT_COMMENTS_COUNT\": 0,\n", - " \"COMMENT_LIKES_COUNT\": 0,\n", - " \"COMMENT_LANGUAGE\": \"NA\",\n", - " \"SENTIMENT\": \"NA\",\n", - " \"CONTENT_ID\": df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1),\n", - " \"PROFILE_ID\": df_gsheet.apply(lambda row: get_linkedin_id_from_url(row[\"PROFILE_URL\"]), axis=1),\n", - " }\n", - " for k, v in to_add.items():\n", - " if k not in df_gsheet.columns:\n", - " df_gsheet[k] = v\n", - " if k == \"SENTIMENT\":\n", - " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", - " df_gsheet.loc[df_gsheet[\"TYPE\"] == \"POST_COMMENT\", k] = \"TBD\"\n", - " elif k in [\"COMMENT_COMMENTS_COUNT\", \"COMMENT_LIKES_COUNT\"]:\n", - " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"0\").astype(int)\n", - " else:\n", - " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", - " df_gsheet[\"ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"INTERACTION_DATE\"]) + str(row[\"PROFILE_ID\"]) + str(row[\"CONTENT_ID\"]) + str(row[\"CONTENT\"])), axis=1)\n", + " if len(df_gsheet) > 0:\n", + " df_gsheet[\"CONTENT_ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1)\n", + " to_rename = {\n", + " \"DATE_INTERACTION\": \"INTERACTION_DATE\",\n", + " \"INTERACTION\": \"TYPE\",\n", + " \"INTERACTION_SCORE\": \"SCORE\",\n", + " \"INTERACTION_CONTENT\": \"CONTENT\",\n", + " \"COMMENT_SENTIMENT\": \"SENTIMENT\",\n", + " }\n", + " df_gsheet = df_gsheet.rename(columns=to_rename)\n", + " to_add = {\n", + " \"COMMENT_COMMENTS_COUNT\": 0,\n", + " \"COMMENT_LIKES_COUNT\": 0,\n", + " \"COMMENT_LANGUAGE\": \"NA\",\n", + " \"SENTIMENT\": \"NA\",\n", + " \"CONTENT_ID\": df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1),\n", + " \"PROFILE_ID\": df_gsheet.apply(lambda row: get_linkedin_id_from_url(row[\"PROFILE_URL\"]), axis=1),\n", + " }\n", + " for k, v in to_add.items():\n", + " if k not in df_gsheet.columns:\n", + " df_gsheet[k] = v\n", + " if k == \"SENTIMENT\":\n", + " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", + " df_gsheet.loc[df_gsheet[\"TYPE\"] == \"POST_COMMENT\", k] = \"TBD\"\n", + " elif k in [\"COMMENT_COMMENTS_COUNT\", \"COMMENT_LIKES_COUNT\"]:\n", + " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"0\").astype(int)\n", + " else:\n", + " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", + " df_gsheet[\"ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"INTERACTION_DATE\"]) + str(row[\"PROFILE_ID\"]) + str(row[\"CONTENT_ID\"]) + str(row[\"CONTENT\"])), axis=1)\n", " \n", " # Concat dfs\n", " df = pd.concat([df, df_gsheet])\n", @@ -517,9 +625,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "5f80c5c9-0026-4916-8389-e50425a297f7", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.945200Z", + "iopub.status.busy": "2024-04-25T13:47:44.944947Z", + "iopub.status.idle": "2024-04-25T13:52:08.357675Z", + "shell.execute_reply": "2024-04-25T13:52:08.357037Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.945168Z" + }, "papermill": { "duration": 1472.148616, "end_time": "2024-04-10T10:33:16.219446", @@ -529,7 +644,142 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-> Comments to be updated: 3\n", + "1 - Comment 'Thank you for sharing this perspective Florent Ravenel It's true that it's hard to imagine what goes on behind the scenes.' made on 'Many companies ignore the fact that AI assistants require high-quality, organized data.' by 'Anne-Flore Lewi'\n", + "- Sentiment\n", + "Praise: The user is expressing gratitude and agreement with the post's perspective, thus showing admiration or approval.\n", + "\n", + "2 - Comment 'From BI to AI, it feels like we're always repeating ourselves Jérémy Ravenel. Quality data is key! :)' made on 'Many companies ignore the fact that AI assistants require high-quality, organized data.' by 'Florent Ravenel'\n", + "🤖 Extracting Sentiment...\n", + "'completion'\n", + "'completion'\n", + "'completion'\n", + "the JSON object must be str, bytes or bytearray, not NoneType\n", + "\n", + "3 - Comment 'Well said! No useful AI without clean data. It’s always the old same story.' made on 'Many companies ignore the fact that AI assistants require high-quality, organized data.' by 'Jérémy Ravenel'\n", + "🤖 Extracting Sentiment...\n", + "'completion'\n", + "'completion'\n", + "'completion'\n", + "the JSON object must be str, bytes or bytearray, not NoneType\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ENTITYSCENARIOSOURCEINTERACTION_DATEDATEIDTYPECONTENTSENTIMENTSCORE...PUBLIC_IDCONTENT_TITLECONTENT_URLCONTENT_IDPUBLISHED_DATEDATE_EXTRACTINTERACTIONINTERACTION_CONTENTINTERACTION_SCOREDATE_INTERACTION
0Florent RavenelW17-2024LinkedIn2024-04-25 05:06:23+0200Thu. 25 Apr.a10283fabc31b620417f5186c9e20395b5f5073d2f6d73...POST_COMMENTThank you for sharing this perspective Florent...Praise: The user is expressing gratitude and a...3.0...aflewiMany companies ignore the fact that AI assista...https://www.linkedin.com/feed/update/urn:li:ac...5c7d40add88e90d393008095dcc787cefe0e996496d7ae...2024-04-24 21:45:50+02002024-04-25 15:28:53+0200NaNNaNNaNNaN
\n", + "

1 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " ENTITY SCENARIO SOURCE INTERACTION_DATE \\\n", + "0 Florent Ravenel W17-2024 LinkedIn 2024-04-25 05:06:23+0200 \n", + "\n", + " DATE ID \\\n", + "0 Thu. 25 Apr. a10283fabc31b620417f5186c9e20395b5f5073d2f6d73... \n", + "\n", + " TYPE CONTENT \\\n", + "0 POST_COMMENT Thank you for sharing this perspective Florent... \n", + "\n", + " SENTIMENT SCORE ... PUBLIC_ID \\\n", + "0 Praise: The user is expressing gratitude and a... 3.0 ... aflewi \n", + "\n", + " CONTENT_TITLE \\\n", + "0 Many companies ignore the fact that AI assista... \n", + "\n", + " CONTENT_URL \\\n", + "0 https://www.linkedin.com/feed/update/urn:li:ac... \n", + "\n", + " CONTENT_ID \\\n", + "0 5c7d40add88e90d393008095dcc787cefe0e996496d7ae... \n", + "\n", + " PUBLISHED_DATE DATE_EXTRACT INTERACTION \\\n", + "0 2024-04-24 21:45:50+0200 2024-04-25 15:28:53+0200 NaN \n", + "\n", + " INTERACTION_CONTENT INTERACTION_SCORE DATE_INTERACTION \n", + "0 NaN NaN NaN \n", + "\n", + "[1 rows x 29 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def enrich_content(\n", " df_init,\n", @@ -663,9 +913,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "2dc65a28-ed03-4fa4-b80c-6ef823639807", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:52:08.359107Z", + "iopub.status.busy": "2024-04-25T13:52:08.358765Z", + "iopub.status.idle": "2024-04-25T13:52:08.434472Z", + "shell.execute_reply": "2024-04-25T13:52:08.433833Z", + "shell.execute_reply.started": "2024-04-25T13:52:08.359075Z" + }, "papermill": {}, "tags": [] }, @@ -687,13 +944,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "32e91b35-c1f8-4fe6-ae78-a2d4b79c8be7", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:52:08.435862Z", + "iopub.status.busy": "2024-04-25T13:52:08.435453Z", + "iopub.status.idle": "2024-04-25T13:52:10.105031Z", + "shell.execute_reply": "2024-04-25T13:52:10.104254Z", + "shell.execute_reply.started": "2024-04-25T13:52:08.435830Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Invalid data[10]: Range (INTERACTIONS!AC1) exceeds grid limits. Max rows: 3387, max columns: 28\n", + "✅ DataFrame successfully sent to Google Sheets!\n" + ] + } + ], "source": [ "send_data_to_gsheet(df_interactions, df_init, spreadsheet_url, sheet_interaction)" ] @@ -711,9 +984,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "794c1df8-383c-4e58-ae1d-90d59851cc20", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:52:10.106236Z", + "iopub.status.busy": "2024-04-25T13:52:10.106009Z", + "iopub.status.idle": "2024-04-25T13:52:10.193847Z", + "shell.execute_reply": "2024-04-25T13:52:10.193025Z", + "shell.execute_reply.started": "2024-04-25T13:52:10.106213Z" + }, "tags": [] }, "outputs": [],