From d608f111455c0a213492612703089068b374deb2 Mon Sep 17 00:00:00 2001 From: Varsha Kumar Date: Mon, 8 Jul 2024 23:27:34 +0200 Subject: [PATCH 1/3] feat: instagram extracting profile and posts details --- ...stagram_Extract_details_from_account.ipynb | 803 ++++++++++++++++++ 1 file changed, 803 insertions(+) create mode 100644 Instagram/Instagram_Extract_details_from_account.ipynb diff --git a/Instagram/Instagram_Extract_details_from_account.ipynb b/Instagram/Instagram_Extract_details_from_account.ipynb new file mode 100644 index 0000000000..ab056a5c03 --- /dev/null +++ b/Instagram/Instagram_Extract_details_from_account.ipynb @@ -0,0 +1,803 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "rocky-cardiff", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"Instagram.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "judicial-headline", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Instagram - Extract details from account\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "1cef8cab-e783-4589-b2c4-c21ee380c773", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #instagram #snippet #content" + ] + }, + { + "cell_type": "markdown", + "id": "naas-author", + "metadata": { + "papermill": {}, + "tags": [ + "naas" + ] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "8edddd04-a2af-47f7-82aa-a9108cdcd3d4", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-07-04 (Created: 2024-07-04)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to extract details from an Instagram account." + ] + }, + { + "cell_type": "markdown", + "id": "input_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "import_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d40e70c0-a388-417b-a50f-c50bb82cc0b3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-08T21:22:37.198608Z", + "iopub.status.busy": "2024-07-08T21:22:37.198348Z", + "iopub.status.idle": "2024-07-08T21:22:37.628862Z", + "shell.execute_reply": "2024-07-08T21:22:37.628308Z", + "shell.execute_reply.started": "2024-07-08T21:22:37.198540Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "id": "5c3c12ca-5f3e-411a-aa54-c2b4b612a91d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-03-17T10:12:43.371273Z", + "iopub.status.busy": "2022-03-17T10:12:43.371011Z", + "iopub.status.idle": "2022-03-17T10:12:43.374551Z", + "shell.execute_reply": "2022-03-17T10:12:43.373882Z", + "shell.execute_reply.started": "2022-03-17T10:12:43.371208Z" + }, + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables\n", + "- `apify_token`: personal token apify creates to access data\n", + "- `instagram_profile_url`: link to the instagram profile" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ce903236-60d1-4087-a31e-9321f2df6112", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-08T21:22:37.630171Z", + "iopub.status.busy": "2024-07-08T21:22:37.629960Z", + "iopub.status.idle": "2024-07-08T21:22:37.633585Z", + "shell.execute_reply": "2024-07-08T21:22:37.632976Z", + "shell.execute_reply.started": "2024-07-08T21:22:37.630145Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU847JwNP\"\n", + "instagram_profile_url = \"https://www.instagram.com/naaslife/\"\n", + "output_csv1 = f\"{instagram_profile_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_account.csv\"\n", + "output_csv2 = f\"{instagram_profile_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_posts.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "model_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "5e04b21e-8ae9-4081-acc4-73f01fc39f3b", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Scrape instagram data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "legislative-taiwan", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-08T21:22:37.634743Z", + "iopub.status.busy": "2024-07-08T21:22:37.634527Z", + "iopub.status.idle": "2024-07-08T21:22:46.737950Z", + "shell.execute_reply": "2024-07-08T21:22:46.737277Z", + "shell.execute_reply.started": "2024-07-08T21:22:37.634716Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_instagram_data(apify_token, instagram_profile_url):\n", + " # Extract the username from the profile URL\n", + " username = instagram_profile_url.split('/')[-2]\n", + " \n", + " # Define the Apify API URL for the Instagram Profile Scraper\n", + " api_url = \"https://api.apify.com/v2/acts/apify~instagram-profile-scraper/run-sync-get-dataset-items\"\n", + "\n", + " # Define the payload with the necessary parameters\n", + " payload = {\n", + " \"usernames\": [username], # Pass the username as a list\n", + " \"proxyConfig\": {\n", + " \"useApifyProxy\": True\n", + " }\n", + " }\n", + "\n", + " # Define the headers with the Apify API token\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {apify_token}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + "\n", + " # Make the request to the Apify API\n", + " response = requests.post(api_url, json=payload, headers=headers)\n", + "\n", + " # Extract the JSON data from the response\n", + " data = response.json()\n", + " \n", + " return data\n", + "\n", + "# Make posts dataframe structure\n", + "def get_posts(\n", + " ownerUsername,\n", + " ownerId,\n", + " pid,\n", + " post_type,\n", + " caption,\n", + " hashtags,\n", + " mentions,\n", + " url,\n", + " comments_count,\n", + " likes_count,\n", + " timestamp\n", + "):\n", + " return {\n", + " \"OWNER_USERNAME\": ownerUsername,\n", + " \"OWNER_ID\": ownerId,\n", + " \"ID\": pid,\n", + " \"POST_TYPE\": post_type,\n", + " \"CAPTION\": caption,\n", + " \"HASHTAGS\": hashtags,\n", + " \"MENTIONS\": mentions,\n", + " \"URL\": url,\n", + " \"COMMENTS_COUNT\": comments_count,\n", + " \"LIKES_COUNT\": likes_count,\n", + " \"TIMESTAMP\": timestamp\n", + " }\n", + " \n", + "profile_data = get_instagram_data(apify_token, instagram_profile_url)" + ] + }, + { + "cell_type": "markdown", + "id": "output_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "display_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Account dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c7ac65a4-dd93-43c4-8090-c86a2aa28898", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-08T21:22:46.739075Z", + "iopub.status.busy": "2024-07-08T21:22:46.738892Z", + "iopub.status.idle": "2024-07-08T21:22:46.759351Z", + "shell.execute_reply": "2024-07-08T21:22:46.758768Z", + "shell.execute_reply.started": "2024-07-08T21:22:46.739054Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDUSERNAMEURLBIOFOLLOWERSFOLLOWINGPRIVATEPOST_COUNT
049645556825naaslifehttps://www.instagram.com/naaslifeUnlocking the power of data, automation, and A...78102False17
\n", + "
" + ], + "text/plain": [ + " ID USERNAME URL \\\n", + "0 49645556825 naaslife https://www.instagram.com/naaslife \n", + "\n", + " BIO FOLLOWERS FOLLOWING \\\n", + "0 Unlocking the power of data, automation, and A... 78 102 \n", + "\n", + " PRIVATE POST_COUNT \n", + "0 False 17 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data1 = []\n", + "\n", + "data1 = [{\n", + " \"ID\": profile_data[0]['id'],\n", + " \"USERNAME\": profile_data[0]['username'],\n", + " \"URL\": profile_data[0]['url'],\n", + " \"BIO\": profile_data[0]['biography'],\n", + " \"FOLLOWERS\": profile_data[0]['followersCount'],\n", + " \"FOLLOWING\": profile_data[0]['followsCount'],\n", + " \"PRIVATE\": profile_data[0]['private'],\n", + " \"POST_COUNT\": profile_data[0]['postsCount']\n", + "}]\n", + "\n", + "df1 = pd.DataFrame(data1)\n", + "df1" + ] + }, + { + "cell_type": "markdown", + "id": "636d22d3-beac-4de3-9ea8-6232e9cdcc6c", + "metadata": {}, + "source": [ + "### Save first dataframe to csv" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "be26e796-6c38-4152-a0b8-49f51b617a6f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-08T21:23:24.525210Z", + "iopub.status.busy": "2024-07-08T21:23:24.524992Z", + "iopub.status.idle": "2024-07-08T21:23:24.558533Z", + "shell.execute_reply": "2024-07-08T21:23:24.557920Z", + "shell.execute_reply.started": "2024-07-08T21:23:24.525189Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "df1.to_csv(output_csv1, index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "d2f54970-5afc-45d8-ae52-dfb98f280c74", + "metadata": {}, + "source": [ + "### Posts dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4472174c-fa1d-49a4-8dfe-b16d3ad39b4c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-08T21:22:58.311571Z", + "iopub.status.busy": "2024-07-08T21:22:58.311357Z", + "iopub.status.idle": "2024-07-08T21:22:58.335386Z", + "shell.execute_reply": "2024-07-08T21:22:58.334746Z", + "shell.execute_reply.started": "2024-07-08T21:22:58.311549Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OWNER_USERNAMEOWNER_IDIDPOST_TYPECAPTIONHASHTAGSMENTIONSURLCOMMENTS_COUNTLIKES_COUNTTIMESTAMP
0naaslife496455568253278939566399457755ImageThe real magic happens when you merge this AI ...[data, ai][]https://www.instagram.com/p/C2BIim_Nonb/002024-01-12T22:54:37.000Z
1naaslife496455568253157000429711370041ImageThe Lean Data Journal, article #1 is out! \\n\\n...[1, data, automation, ai, datascience, artific...[]https://www.instagram.com/p/CvP6yoQs6c5/132023-07-28T17:03:20.000Z
2naaslife496455568253023800274288630756ImageIn the desert sands, a pipeline flows\\nOil rus...[][]https://www.instagram.com/p/Cn2slQks9vk/022023-01-25T22:18:24.000Z
3naaslife496455568253023165800621664596ImageA pipeline of water in the jungle,\\nA sight bo...[AvatarWorld, Nature, ManVsWild, JungleLife, W...[]https://www.instagram.com/p/Cn0cUc7KelU/442023-01-25T01:17:49.000Z
4naaslife496455568253023157023520418465ImageEvery data pipeline is different,\\n\\nand that'...[][]https://www.instagram.com/p/Cn0aUunKWqh/022023-01-25T01:00:22.000Z
5naaslife496455568252961429883488333870Image🎃 Happy Halloween to all the Pandas coders who...[halloween, candy, python, coders, jupyternote...[]https://www.instagram.com/p/CkZHMnojHAu/082022-10-31T20:59:34.000Z
6naaslife496455568252923439672380916997ImageAre you interested in data & open source ?\\n\\n...[opensource, data, ai, automation, analytics, ...[]https://www.instagram.com/p/CiSJOSajIUF/052022-09-09T10:59:48.000Z
7naaslife496455568252916247470160561289ImageBack from a break in the mountains and ready t...[][]https://www.instagram.com/p/Ch4l5-IDUSJ/032022-08-30T12:50:10.000Z
8naaslife496455568252884235236414899245Image📊⭐️ Do you want to build dashboards and data a...[][]https://www.instagram.com/p/CgG3KqLLMQt/042022-07-17T08:47:35.000Z
9naaslife496455568252861894946727707225ImageWondering how to read a dataframe from your fi...[aws, cloud, storage, S3bucket, operations, sn...[awscloud]https://www.instagram.com/p/Ce3fkqEMMJZ/042022-06-16T13:01:25.000Z
10naaslife496455568252861827651701360003ImageDo you want to download files in your AWS buck...[aws, cloud, storage, S3bucket, operations, sn...[]https://www.instagram.com/p/Ce3QRYsDMWD/022022-06-16T10:47:42.000Z
11naaslife496455568252861737947777933288ImageDid you know you can send daily billing notifi...[aws, budget, awsbilling, cloudaccounting, clo...[]https://www.instagram.com/p/Ce274BZjx_o/132022-06-16T07:49:29.000Z
\n", + "
" + ], + "text/plain": [ + " OWNER_USERNAME OWNER_ID ID POST_TYPE \\\n", + "0 naaslife 49645556825 3278939566399457755 Image \n", + "1 naaslife 49645556825 3157000429711370041 Image \n", + "2 naaslife 49645556825 3023800274288630756 Image \n", + "3 naaslife 49645556825 3023165800621664596 Image \n", + "4 naaslife 49645556825 3023157023520418465 Image \n", + "5 naaslife 49645556825 2961429883488333870 Image \n", + "6 naaslife 49645556825 2923439672380916997 Image \n", + "7 naaslife 49645556825 2916247470160561289 Image \n", + "8 naaslife 49645556825 2884235236414899245 Image \n", + "9 naaslife 49645556825 2861894946727707225 Image \n", + "10 naaslife 49645556825 2861827651701360003 Image \n", + "11 naaslife 49645556825 2861737947777933288 Image \n", + "\n", + " CAPTION \\\n", + "0 The real magic happens when you merge this AI ... \n", + "1 The Lean Data Journal, article #1 is out! \\n\\n... \n", + "2 In the desert sands, a pipeline flows\\nOil rus... \n", + "3 A pipeline of water in the jungle,\\nA sight bo... \n", + "4 Every data pipeline is different,\\n\\nand that'... \n", + "5 🎃 Happy Halloween to all the Pandas coders who... \n", + "6 Are you interested in data & open source ?\\n\\n... \n", + "7 Back from a break in the mountains and ready t... \n", + "8 📊⭐️ Do you want to build dashboards and data a... \n", + "9 Wondering how to read a dataframe from your fi... \n", + "10 Do you want to download files in your AWS buck... \n", + "11 Did you know you can send daily billing notifi... \n", + "\n", + " HASHTAGS MENTIONS \\\n", + "0 [data, ai] [] \n", + "1 [1, data, automation, ai, datascience, artific... [] \n", + "2 [] [] \n", + "3 [AvatarWorld, Nature, ManVsWild, JungleLife, W... [] \n", + "4 [] [] \n", + "5 [halloween, candy, python, coders, jupyternote... [] \n", + "6 [opensource, data, ai, automation, analytics, ... [] \n", + "7 [] [] \n", + "8 [] [] \n", + "9 [aws, cloud, storage, S3bucket, operations, sn... [awscloud] \n", + "10 [aws, cloud, storage, S3bucket, operations, sn... [] \n", + "11 [aws, budget, awsbilling, cloudaccounting, clo... [] \n", + "\n", + " URL COMMENTS_COUNT LIKES_COUNT \\\n", + "0 https://www.instagram.com/p/C2BIim_Nonb/ 0 0 \n", + "1 https://www.instagram.com/p/CvP6yoQs6c5/ 1 3 \n", + "2 https://www.instagram.com/p/Cn2slQks9vk/ 0 2 \n", + "3 https://www.instagram.com/p/Cn0cUc7KelU/ 4 4 \n", + "4 https://www.instagram.com/p/Cn0aUunKWqh/ 0 2 \n", + "5 https://www.instagram.com/p/CkZHMnojHAu/ 0 8 \n", + "6 https://www.instagram.com/p/CiSJOSajIUF/ 0 5 \n", + "7 https://www.instagram.com/p/Ch4l5-IDUSJ/ 0 3 \n", + "8 https://www.instagram.com/p/CgG3KqLLMQt/ 0 4 \n", + "9 https://www.instagram.com/p/Ce3fkqEMMJZ/ 0 4 \n", + "10 https://www.instagram.com/p/Ce3QRYsDMWD/ 0 2 \n", + "11 https://www.instagram.com/p/Ce274BZjx_o/ 1 3 \n", + "\n", + " TIMESTAMP \n", + "0 2024-01-12T22:54:37.000Z \n", + "1 2023-07-28T17:03:20.000Z \n", + "2 2023-01-25T22:18:24.000Z \n", + "3 2023-01-25T01:17:49.000Z \n", + "4 2023-01-25T01:00:22.000Z \n", + "5 2022-10-31T20:59:34.000Z \n", + "6 2022-09-09T10:59:48.000Z \n", + "7 2022-08-30T12:50:10.000Z \n", + "8 2022-07-17T08:47:35.000Z \n", + "9 2022-06-16T13:01:25.000Z \n", + "10 2022-06-16T10:47:42.000Z \n", + "11 2022-06-16T07:49:29.000Z " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data2 = []\n", + "\n", + "for post in profile_data[0]['latestPosts']:\n", + " data_post = get_posts(\n", + " post[\"ownerUsername\"],\n", + " post[\"ownerId\"],\n", + " post[\"id\"],\n", + " post[\"type\"],\n", + " post[\"caption\"],\n", + " post[\"hashtags\"],\n", + " post[\"mentions\"],\n", + " post[\"url\"],\n", + " post[\"commentsCount\"],\n", + " post[\"likesCount\"],\n", + " post[\"timestamp\"]\n", + " )\n", + " data2.append(data_post)\n", + " \n", + "df2 = pd.DataFrame(data2)\n", + "df2" + ] + }, + { + "cell_type": "markdown", + "id": "32d3baf0-0abc-4eca-96c8-092fc1f132b7", + "metadata": {}, + "source": [ + "### Save second dataframe to csv" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "29bd55e8-47d6-4ee4-a233-eabf047f4beb", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-08T21:23:01.789193Z", + "iopub.status.busy": "2024-07-08T21:23:01.788854Z", + "iopub.status.idle": "2024-07-08T21:23:02.482024Z", + "shell.execute_reply": "2024-07-08T21:23:02.481431Z", + "shell.execute_reply.started": "2024-07-08T21:23:01.789151Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "df2.to_csv(output_csv2, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e655e0a-ee1f-4d56-a198-0f38d30036e9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "38c44121d518d242dcfd1209fca1b300a11475f5836b8ae8f214c0b4524816a9", + "notebook_path": "Instagram/Instagram_Post_image_and_caption.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.3" + }, + "toc-autonumbering": false + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d05755e9a7c8c99ad55281b7d833ff7f13e32ef9 Mon Sep 17 00:00:00 2001 From: Varsha Kumar Date: Mon, 8 Jul 2024 23:31:31 +0200 Subject: [PATCH 2/3] fix: without secret --- ...stagram_Extract_details_from_account.ipynb | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/Instagram/Instagram_Extract_details_from_account.ipynb b/Instagram/Instagram_Extract_details_from_account.ipynb index ab056a5c03..212cbca5f6 100644 --- a/Instagram/Instagram_Extract_details_from_account.ipynb +++ b/Instagram/Instagram_Extract_details_from_account.ipynb @@ -99,11 +99,11 @@ "id": "d40e70c0-a388-417b-a50f-c50bb82cc0b3", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:22:37.198608Z", - "iopub.status.busy": "2024-07-08T21:22:37.198348Z", - "iopub.status.idle": "2024-07-08T21:22:37.628862Z", - "shell.execute_reply": "2024-07-08T21:22:37.628308Z", - "shell.execute_reply.started": "2024-07-08T21:22:37.198540Z" + "iopub.execute_input": "2024-07-08T21:29:33.402992Z", + "iopub.status.busy": "2024-07-08T21:29:33.402544Z", + "iopub.status.idle": "2024-07-08T21:29:34.812630Z", + "shell.execute_reply": "2024-07-08T21:29:34.812039Z", + "shell.execute_reply.started": "2024-07-08T21:29:33.402899Z" }, "papermill": {}, "tags": [] @@ -141,18 +141,18 @@ "id": "ce903236-60d1-4087-a31e-9321f2df6112", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:22:37.630171Z", - "iopub.status.busy": "2024-07-08T21:22:37.629960Z", - "iopub.status.idle": "2024-07-08T21:22:37.633585Z", - "shell.execute_reply": "2024-07-08T21:22:37.632976Z", - "shell.execute_reply.started": "2024-07-08T21:22:37.630145Z" + "iopub.execute_input": "2024-07-08T21:29:34.814321Z", + "iopub.status.busy": "2024-07-08T21:29:34.814082Z", + "iopub.status.idle": "2024-07-08T21:29:34.817721Z", + "shell.execute_reply": "2024-07-08T21:29:34.817114Z", + "shell.execute_reply.started": "2024-07-08T21:29:34.814273Z" }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ - "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU847JwNP\"\n", + "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU8xxxxxx\"\n", "instagram_profile_url = \"https://www.instagram.com/naaslife/\"\n", "output_csv1 = f\"{instagram_profile_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_account.csv\"\n", "output_csv2 = f\"{instagram_profile_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_posts.csv\"" @@ -186,11 +186,11 @@ "id": "legislative-taiwan", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:22:37.634743Z", - "iopub.status.busy": "2024-07-08T21:22:37.634527Z", - "iopub.status.idle": "2024-07-08T21:22:46.737950Z", - "shell.execute_reply": "2024-07-08T21:22:46.737277Z", - "shell.execute_reply.started": "2024-07-08T21:22:37.634716Z" + "iopub.execute_input": "2024-07-08T21:29:34.819173Z", + "iopub.status.busy": "2024-07-08T21:29:34.818962Z", + "iopub.status.idle": "2024-07-08T21:29:43.408776Z", + "shell.execute_reply": "2024-07-08T21:29:43.408131Z", + "shell.execute_reply.started": "2024-07-08T21:29:34.819148Z" }, "papermill": {}, "tags": [] @@ -285,11 +285,11 @@ "id": "c7ac65a4-dd93-43c4-8090-c86a2aa28898", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:22:46.739075Z", - "iopub.status.busy": "2024-07-08T21:22:46.738892Z", - "iopub.status.idle": "2024-07-08T21:22:46.759351Z", - "shell.execute_reply": "2024-07-08T21:22:46.758768Z", - "shell.execute_reply.started": "2024-07-08T21:22:46.739054Z" + "iopub.execute_input": "2024-07-08T21:29:43.410195Z", + "iopub.status.busy": "2024-07-08T21:29:43.409944Z", + "iopub.status.idle": "2024-07-08T21:29:43.429473Z", + "shell.execute_reply": "2024-07-08T21:29:43.428870Z", + "shell.execute_reply.started": "2024-07-08T21:29:43.410165Z" }, "tags": [] }, @@ -385,15 +385,15 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "be26e796-6c38-4152-a0b8-49f51b617a6f", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:23:24.525210Z", - "iopub.status.busy": "2024-07-08T21:23:24.524992Z", - "iopub.status.idle": "2024-07-08T21:23:24.558533Z", - "shell.execute_reply": "2024-07-08T21:23:24.557920Z", - "shell.execute_reply.started": "2024-07-08T21:23:24.525189Z" + "iopub.execute_input": "2024-07-08T21:29:43.430709Z", + "iopub.status.busy": "2024-07-08T21:29:43.430481Z", + "iopub.status.idle": "2024-07-08T21:29:43.700834Z", + "shell.execute_reply": "2024-07-08T21:29:43.700365Z", + "shell.execute_reply.started": "2024-07-08T21:29:43.430681Z" }, "tags": [] }, @@ -412,15 +412,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "4472174c-fa1d-49a4-8dfe-b16d3ad39b4c", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:22:58.311571Z", - "iopub.status.busy": "2024-07-08T21:22:58.311357Z", - "iopub.status.idle": "2024-07-08T21:22:58.335386Z", - "shell.execute_reply": "2024-07-08T21:22:58.334746Z", - "shell.execute_reply.started": "2024-07-08T21:22:58.311549Z" + "iopub.execute_input": "2024-07-08T21:29:43.703216Z", + "iopub.status.busy": "2024-07-08T21:29:43.702943Z", + "iopub.status.idle": "2024-07-08T21:29:43.876777Z", + "shell.execute_reply": "2024-07-08T21:29:43.876236Z", + "shell.execute_reply.started": "2024-07-08T21:29:43.703189Z" }, "tags": [] }, @@ -704,7 +704,7 @@ "11 2022-06-16T07:49:29.000Z " ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -742,15 +742,15 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "29bd55e8-47d6-4ee4-a233-eabf047f4beb", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:23:01.789193Z", - "iopub.status.busy": "2024-07-08T21:23:01.788854Z", - "iopub.status.idle": "2024-07-08T21:23:02.482024Z", - "shell.execute_reply": "2024-07-08T21:23:02.481431Z", - "shell.execute_reply.started": "2024-07-08T21:23:01.789151Z" + "iopub.execute_input": "2024-07-08T21:29:43.877843Z", + "iopub.status.busy": "2024-07-08T21:29:43.877624Z", + "iopub.status.idle": "2024-07-08T21:29:44.140133Z", + "shell.execute_reply": "2024-07-08T21:29:44.139642Z", + "shell.execute_reply.started": "2024-07-08T21:29:43.877816Z" }, "tags": [] }, From a25651cf56fbc95988b31901f1fa9c79f4e85d28 Mon Sep 17 00:00:00 2001 From: Varsha Kumar Date: Tue, 9 Jul 2024 11:40:11 +0200 Subject: [PATCH 3/3] feat: procedure to get apify token --- ...stagram_Extract_details_from_account.ipynb | 42 ++++++++++++++----- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/Instagram/Instagram_Extract_details_from_account.ipynb b/Instagram/Instagram_Extract_details_from_account.ipynb index 212cbca5f6..665e3633d3 100644 --- a/Instagram/Instagram_Extract_details_from_account.ipynb +++ b/Instagram/Instagram_Extract_details_from_account.ipynb @@ -55,7 +55,7 @@ "tags": [] }, "source": [ - "**Last update:** 2024-07-04 (Created: 2024-07-04)" + "**Last update:** 2024-07-09 (Created: 2024-07-04)" ] }, { @@ -71,6 +71,26 @@ "**Description:** This notebook allows users to extract details from an Instagram account." ] }, + { + "cell_type": "markdown", + "id": "88ed8bb2-2694-4848-a3ef-afc0f4e65e07", + "metadata": {}, + "source": [ + "### How to retrive API key with apify" + ] + }, + { + "cell_type": "markdown", + "id": "0fca1344-877b-417d-94f0-1f024a029523", + "metadata": {}, + "source": [ + "1. Go to https://apify.com.\n", + "2. Click \"Sign up for free\" and use your google account to sign up.\n", + "3. Once your account has been created, navigate to \"Settings\" on the left panel of the screen.\n", + "4. Here you will click on the tab labeled \"Integrations\" where your personal API token that was automatically generated with sign up will be.\n", + "5. Copy that token and use it to extract data!" + ] + }, { "cell_type": "markdown", "id": "input_cell", @@ -99,11 +119,11 @@ "id": "d40e70c0-a388-417b-a50f-c50bb82cc0b3", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:29:33.402992Z", - "iopub.status.busy": "2024-07-08T21:29:33.402544Z", - "iopub.status.idle": "2024-07-08T21:29:34.812630Z", - "shell.execute_reply": "2024-07-08T21:29:34.812039Z", - "shell.execute_reply.started": "2024-07-08T21:29:33.402899Z" + "iopub.execute_input": "2024-07-09T09:36:57.966063Z", + "iopub.status.busy": "2024-07-09T09:36:57.965772Z", + "iopub.status.idle": "2024-07-09T09:36:58.409889Z", + "shell.execute_reply": "2024-07-09T09:36:58.409329Z", + "shell.execute_reply.started": "2024-07-09T09:36:57.965976Z" }, "papermill": {}, "tags": [] @@ -141,11 +161,11 @@ "id": "ce903236-60d1-4087-a31e-9321f2df6112", "metadata": { "execution": { - "iopub.execute_input": "2024-07-08T21:29:34.814321Z", - "iopub.status.busy": "2024-07-08T21:29:34.814082Z", - "iopub.status.idle": "2024-07-08T21:29:34.817721Z", - "shell.execute_reply": "2024-07-08T21:29:34.817114Z", - "shell.execute_reply.started": "2024-07-08T21:29:34.814273Z" + "iopub.execute_input": "2024-07-09T09:36:58.539921Z", + "iopub.status.busy": "2024-07-09T09:36:58.539682Z", + "iopub.status.idle": "2024-07-09T09:36:58.543836Z", + "shell.execute_reply": "2024-07-09T09:36:58.543209Z", + "shell.execute_reply.started": "2024-07-09T09:36:58.539897Z" }, "papermill": {}, "tags": []