diff --git a/models/content-engine/core/domain/Content_Create_Posts_database.ipynb b/models/content-engine/core/domain/Content_Create_Posts_database.ipynb index adafa93..fc1ed4d 100644 --- a/models/content-engine/core/domain/Content_Create_Posts_database.ipynb +++ b/models/content-engine/core/domain/Content_Create_Posts_database.ipynb @@ -371,26 +371,26 @@ " df_init[\"AUTHOR_URL\"] = tmp_df.loc[0, \"AUTHOR_URL\"]\n", " df_init[\"ENGAGEMENTS\"] = df_init[\"LIKES\"] + df_init[\"COMMENTS\"] + df_init[\"SHARES\"]\n", " \n", - " df_init[\"ID\"] = df_init.apply(lambda row: create_sha_256_hash(str(row[\"URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1) \n", + " # Add KG data columns\n", + " col_ref = [\n", + " \"ID\",\n", + " \"CONCEPT\",\n", + " \"SENTIMENT\",\n", + " \"TARGET\",\n", + " \"OBJECTIVE\",\n", + " ] \n", + " for c in col_ref:\n", + " if len(df_init) == 0 and c != \"ID\":\n", + " df[c] = \"TBD\"\n", + " elif not c in df_init.columns:\n", + " df_init[c] = \"TBD\"\n", + " \n", " if len(df_init) > 0:\n", - " # Get meta data from existing people\n", - " col_ref = [\n", - " \"ID\",\n", - " \"CONCEPT\",\n", - " \"SENTIMENT\",\n", - " \"TARGET\",\n", - " \"OBJECTIVE\",\n", - " ]\n", - " for c in col_ref:\n", - " # If columns does not exist, init value to be determined (TBD)\n", - " if not c in df_init.columns:\n", - " df_init[c] = \"TBD\"\n", + " # Merge to get meta data\n", " ref = df_init[col_ref]\n", - " \n", - " # Merge to get meta data\n", - " df = pd.merge(df, ref, on=\"ID\", how=\"left\")\n", - " for c in col_ref:\n", - " df[c] = df[c].fillna(\"TBD\")\n", + " df = pd.merge(df, ref, on=\"ID\", how=\"left\")\n", + " for c in col_ref:\n", + " df[c] = df[c].fillna(\"TBD\")\n", "\n", " # Concat new posts with init\n", " df = pd.concat([df, df_init], axis=0).reset_index(drop=True)\n", diff --git a/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb b/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb index 2b866d4..d2e0487 100644 --- a/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb +++ b/models/growth-engine/core/domain/Growth_Create_interactions_db.ipynb @@ -81,13 +81,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "5fad521a-4a18-4dc7-b13d-98a37172715b", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:36.733735Z", + "iopub.status.busy": "2024-04-25T13:47:36.733297Z", + "iopub.status.idle": "2024-04-25T13:47:39.363011Z", + "shell.execute_reply": "2024-04-25T13:47:39.361985Z", + "shell.execute_reply.started": "2024-04-25T13:47:36.733661Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ utils file '/home/ftp/abi/utils/data.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/llm.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/naas_api.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/naas_chat_plugin.ipynb' successfully loaded.\n", + "✅ utils file '/home/ftp/abi/utils/naas_lab.ipynb' successfully loaded.\n" + ] + } + ], "source": [ "from naas_drivers import gsheet\n", "import pandas as pd\n", @@ -123,9 +142,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "7c34bff6-9136-4aaf-a692-b38129b7de83", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:39.364954Z", + "iopub.status.busy": "2024-04-25T13:47:39.364641Z", + "iopub.status.idle": "2024-04-25T13:47:43.179902Z", + "shell.execute_reply": "2024-04-25T13:47:43.179036Z", + "shell.execute_reply.started": "2024-04-25T13:47:39.364923Z" + }, "papermill": {}, "tags": [ "parameters" @@ -176,13 +202,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "34407369-03a1-45c4-9768-03222224612b", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:43.183375Z", + "iopub.status.busy": "2024-04-25T13:47:43.183186Z", + "iopub.status.idle": "2024-04-25T13:47:43.830739Z", + "shell.execute_reply": "2024-04-25T13:47:43.830016Z", + "shell.execute_reply.started": "2024-04-25T13:47:43.183353Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🗂️ Interactions (init): 0\n" + ] + } + ], "source": [ "df_init = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_interaction)\n", "if not isinstance(df_init, pd.DataFrame):\n", @@ -204,13 +245,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "cdb161fa-8e4b-4294-99f0-59c2c2f203e1", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:43.833689Z", + "iopub.status.busy": "2024-04-25T13:47:43.833253Z", + "iopub.status.idle": "2024-04-25T13:47:44.589126Z", + "shell.execute_reply": "2024-04-25T13:47:44.588456Z", + "shell.execute_reply.started": "2024-04-25T13:47:43.833656Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "- Posts db (init): 1\n" + ] + } + ], "source": [ "df_posts = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_posts)\n", "if not isinstance(df_posts, pd.DataFrame):\n", @@ -230,12 +286,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "4d46b104-7a94-4674-a31c-4b5338d2b243", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.590681Z", + "iopub.status.busy": "2024-04-25T13:47:44.590256Z", + "iopub.status.idle": "2024-04-25T13:47:44.641951Z", + "shell.execute_reply": "2024-04-25T13:47:44.641315Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.590618Z" + }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📁 Files: 1\n", + "⚠️ Limit Date: 2024-04-15\n", + "1- File: /home/ftp/abi/outputs/florent_ravenel/growth-engine/2024-04-25/linkedin_post_reactions.pickle\n", + "👍 Total Reactions: 14\n" + ] + } + ], "source": [ "def get_reactions(\n", " entity_dir,\n", @@ -293,12 +367,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "5bb75165-5195-48a9-9c71-9b648575da46", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.643610Z", + "iopub.status.busy": "2024-04-25T13:47:44.643139Z", + "iopub.status.idle": "2024-04-25T13:47:44.754410Z", + "shell.execute_reply": "2024-04-25T13:47:44.747805Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.643577Z" + }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📁 Files: 1\n", + "⚠️ Limit Date: 2024-04-15\n", + "1- File: /home/ftp/abi/outputs/florent_ravenel/growth-engine/2024-04-25/linkedin_post_comments.pickle\n", + "🗨️ Total Comments: 3\n" + ] + } + ], "source": [ "def get_comments(\n", " entity_dir,\n", @@ -347,12 +439,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "dc7ec5d6-0272-4839-a21c-a4caa6f03f58", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.760041Z", + "iopub.status.busy": "2024-04-25T13:47:44.759805Z", + "iopub.status.idle": "2024-04-25T13:47:44.942913Z", + "shell.execute_reply": "2024-04-25T13:47:44.942342Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.760012Z" + }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🗂️ Interactions: 17\n" + ] + } + ], "source": [ "def handle_time_error(df_init, column):\n", " # Handle NonExistentTimeError\n", @@ -445,34 +552,35 @@ " \n", " \n", " # Histo abi version < 1.14.0\n", - " df_gsheet[\"CONTENT_ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1)\n", - " to_rename = {\n", - " \"DATE_INTERACTION\": \"INTERACTION_DATE\",\n", - " \"INTERACTION\": \"TYPE\",\n", - " \"INTERACTION_SCORE\": \"SCORE\",\n", - " \"INTERACTION_CONTENT\": \"CONTENT\",\n", - " \"COMMENT_SENTIMENT\": \"SENTIMENT\",\n", - " }\n", - " df_gsheet = df_gsheet.rename(columns=to_rename)\n", - " to_add = {\n", - " \"COMMENT_COMMENTS_COUNT\": 0,\n", - " \"COMMENT_LIKES_COUNT\": 0,\n", - " \"COMMENT_LANGUAGE\": \"NA\",\n", - " \"SENTIMENT\": \"NA\",\n", - " \"CONTENT_ID\": df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1),\n", - " \"PROFILE_ID\": df_gsheet.apply(lambda row: get_linkedin_id_from_url(row[\"PROFILE_URL\"]), axis=1),\n", - " }\n", - " for k, v in to_add.items():\n", - " if k not in df_gsheet.columns:\n", - " df_gsheet[k] = v\n", - " if k == \"SENTIMENT\":\n", - " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", - " df_gsheet.loc[df_gsheet[\"TYPE\"] == \"POST_COMMENT\", k] = \"TBD\"\n", - " elif k in [\"COMMENT_COMMENTS_COUNT\", \"COMMENT_LIKES_COUNT\"]:\n", - " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"0\").astype(int)\n", - " else:\n", - " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", - " df_gsheet[\"ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"INTERACTION_DATE\"]) + str(row[\"PROFILE_ID\"]) + str(row[\"CONTENT_ID\"]) + str(row[\"CONTENT\"])), axis=1)\n", + " if len(df_gsheet) > 0:\n", + " df_gsheet[\"CONTENT_ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1)\n", + " to_rename = {\n", + " \"DATE_INTERACTION\": \"INTERACTION_DATE\",\n", + " \"INTERACTION\": \"TYPE\",\n", + " \"INTERACTION_SCORE\": \"SCORE\",\n", + " \"INTERACTION_CONTENT\": \"CONTENT\",\n", + " \"COMMENT_SENTIMENT\": \"SENTIMENT\",\n", + " }\n", + " df_gsheet = df_gsheet.rename(columns=to_rename)\n", + " to_add = {\n", + " \"COMMENT_COMMENTS_COUNT\": 0,\n", + " \"COMMENT_LIKES_COUNT\": 0,\n", + " \"COMMENT_LANGUAGE\": \"NA\",\n", + " \"SENTIMENT\": \"NA\",\n", + " \"CONTENT_ID\": df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"CONTENT_URL\"].split(\":activity:\")[1].split(\"/\")[0])), axis=1),\n", + " \"PROFILE_ID\": df_gsheet.apply(lambda row: get_linkedin_id_from_url(row[\"PROFILE_URL\"]), axis=1),\n", + " }\n", + " for k, v in to_add.items():\n", + " if k not in df_gsheet.columns:\n", + " df_gsheet[k] = v\n", + " if k == \"SENTIMENT\":\n", + " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", + " df_gsheet.loc[df_gsheet[\"TYPE\"] == \"POST_COMMENT\", k] = \"TBD\"\n", + " elif k in [\"COMMENT_COMMENTS_COUNT\", \"COMMENT_LIKES_COUNT\"]:\n", + " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"0\").astype(int)\n", + " else:\n", + " df_gsheet[k] = df_gsheet[k].astype(str).replace(\"None\", \"NA\")\n", + " df_gsheet[\"ID\"] = df_gsheet.apply(lambda row: create_sha_256_hash(str(row[\"INTERACTION_DATE\"]) + str(row[\"PROFILE_ID\"]) + str(row[\"CONTENT_ID\"]) + str(row[\"CONTENT\"])), axis=1)\n", " \n", " # Concat dfs\n", " df = pd.concat([df, df_gsheet])\n", @@ -517,9 +625,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "5f80c5c9-0026-4916-8389-e50425a297f7", "metadata": { + "execution": { + "iopub.execute_input": "2024-04-25T13:47:44.945200Z", + "iopub.status.busy": "2024-04-25T13:47:44.944947Z", + "iopub.status.idle": "2024-04-25T13:52:08.357675Z", + "shell.execute_reply": "2024-04-25T13:52:08.357037Z", + "shell.execute_reply.started": "2024-04-25T13:47:44.945168Z" + }, "papermill": { "duration": 1472.148616, "end_time": "2024-04-10T10:33:16.219446", @@ -529,7 +644,142 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-> Comments to be updated: 3\n", + "1 - Comment 'Thank you for sharing this perspective Florent Ravenel It's true that it's hard to imagine what goes on behind the scenes.' made on 'Many companies ignore the fact that AI assistants require high-quality, organized data.' by 'Anne-Flore Lewi'\n", + "- Sentiment\n", + "Praise: The user is expressing gratitude and agreement with the post's perspective, thus showing admiration or approval.\n", + "\n", + "2 - Comment 'From BI to AI, it feels like we're always repeating ourselves Jérémy Ravenel. Quality data is key! :)' made on 'Many companies ignore the fact that AI assistants require high-quality, organized data.' by 'Florent Ravenel'\n", + "🤖 Extracting Sentiment...\n", + "'completion'\n", + "'completion'\n", + "'completion'\n", + "the JSON object must be str, bytes or bytearray, not NoneType\n", + "\n", + "3 - Comment 'Well said! No useful AI without clean data. It’s always the old same story.' made on 'Many companies ignore the fact that AI assistants require high-quality, organized data.' by 'Jérémy Ravenel'\n", + "🤖 Extracting Sentiment...\n", + "'completion'\n", + "'completion'\n", + "'completion'\n", + "the JSON object must be str, bytes or bytearray, not NoneType\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " | ENTITY | \n", + "SCENARIO | \n", + "SOURCE | \n", + "INTERACTION_DATE | \n", + "DATE | \n", + "ID | \n", + "TYPE | \n", + "CONTENT | \n", + "SENTIMENT | \n", + "SCORE | \n", + "... | \n", + "PUBLIC_ID | \n", + "CONTENT_TITLE | \n", + "CONTENT_URL | \n", + "CONTENT_ID | \n", + "PUBLISHED_DATE | \n", + "DATE_EXTRACT | \n", + "INTERACTION | \n", + "INTERACTION_CONTENT | \n", + "INTERACTION_SCORE | \n", + "DATE_INTERACTION | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Florent Ravenel | \n", + "W17-2024 | \n", + "2024-04-25 05:06:23+0200 | \n", + "Thu. 25 Apr. | \n", + "a10283fabc31b620417f5186c9e20395b5f5073d2f6d73... | \n", + "POST_COMMENT | \n", + "Thank you for sharing this perspective Florent... | \n", + "Praise: The user is expressing gratitude and a... | \n", + "3.0 | \n", + "... | \n", + "aflewi | \n", + "Many companies ignore the fact that AI assista... | \n", + "https://www.linkedin.com/feed/update/urn:li:ac... | \n", + "5c7d40add88e90d393008095dcc787cefe0e996496d7ae... | \n", + "2024-04-24 21:45:50+0200 | \n", + "2024-04-25 15:28:53+0200 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1 rows × 29 columns
\n", + "