diff --git a/TikTok/TikTok_Get_keyword_trending_songs.ipynb b/TikTok/TikTok_Get_keyword_trending_songs.ipynb new file mode 100644 index 0000000000..965b93427e --- /dev/null +++ b/TikTok/TikTok_Get_keyword_trending_songs.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "81cc5584", + "metadata": {}, + "source": [ + "\"TikTok.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "compressed-wilson", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# TikTok - Get trending songs by keyword" + ] + }, + { + "cell_type": "markdown", + "id": "religious-programmer", + "metadata": {}, + "source": [ + "**Tags:** #tiktok #content #trends #songs" + ] + }, + { + "cell_type": "markdown", + "id": "1fe9f56e-561c-4f52-aef8-b861c9462107", + "metadata": {}, + "source": [ + "**Author:** [Alex Nodeland](https://www.linkedin.com/in/alexnodeland/)" + ] + }, + { + "cell_type": "markdown", + "id": "0de144be-594d-463e-9849-696fb2f6d1a8", + "metadata": {}, + "source": [ + "**Last update:** 2024-06-28 (Created: 2024-06-20)" + ] + }, + { + "cell_type": "markdown", + "id": "31ea7cdb-e10d-43fc-b026-f69249a59736", + "metadata": {}, + "source": [ + "**Description:** This notebook demonstrates how to extract the top trending songs on top trending platform, TikTok, by keyword. You need to create a free account and [rent an Actor](https://console.apify.com/actors/GdWCkxBtKWOsKjdch/input) to start playing with this template." + ] + }, + { + "cell_type": "markdown", + "id": "d4b89388-5cd5-409a-8169-c53cc8dfab96", + "metadata": {}, + "source": [ + "**References:**\n", + "- [Naas Documentation](https://site.naas.ai/)\n", + "- [Apify TikTok Scraper Documentation](https://apify.com/clockworks/tiktok-scraper)" + ] + }, + { + "cell_type": "markdown", + "id": "distinguished-truth", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "numeric-mediterranean", + "metadata": {}, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "potential-surfing", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import nest_asyncio\n", + "import pandas as pd\n", + "try:\n", + " from apify_client import ApifyClient\n", + "except:\n", + " !pip install --user apify-client\n", + " from apify_client import ApifyClient\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "id": "aggressive-trustee", + "metadata": {}, + "source": [ + "### Setup variables\n", + "\n", + "**Mandatory**\n", + "- `APIFY_API_TOKEN`: API token required to access the [Apify TikTok Scraper API](https://console.apify.com/settings/integrations)\n", + "- `searchQueries`: Keywords for which you want to extract the top trending songs on TikTok.\n", + "- `csv_output`: CSV file path to be saved as output.\n", + "\n", + "**Optional**\n", + "\n", + "- `hashtags`: Hashtags you want to search for in the TikTok videos. If not provided, the default value is `None`.\n", + "- `resultsPerPage`: Number of results per page. If not provided, the default value is `100`." + ] + }, + { + "cell_type": "markdown", + "id": "323e09ce", + "metadata": {}, + "source": [ + "#### Apify API Token & Query Details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "continuous-melbourne", + "metadata": {}, + "outputs": [], + "source": [ + "# Mandatory\n", + "APIFY_API_TOKEN = 'apify_api_xxxx'\n", + "searchQueries = ['afrobeat']\n", + "csv_output = 'trending_songs.csv'\n", + "\n", + "# Optional - modify if desired\n", + "hashtags = []\n", + "resultsPerPage = 5\n", + "\n", + "# Prepare the actor input\n", + "RUN_INPUT = {\n", + " \"hashtags\": hashtags,\n", + " \"resultsPerPage\": resultsPerPage,\n", + " \"searchQueries\": searchQueries\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "registered-showcase", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "af18cb32", + "metadata": {}, + "source": [ + "#### Create connection object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11dc686a", + "metadata": {}, + "outputs": [], + "source": [ + "client = ApifyClient(APIFY_API_TOKEN)" + ] + }, + { + "cell_type": "markdown", + "id": "tested-astrology", + "metadata": {}, + "source": [ + "### Get data from Apify TikTok Scraper API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "crude-louisville", + "metadata": { + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_songs_by_keyword(run_input) -> pd.DataFrame:\n", + " \"\"\"\n", + " Get songs by keyword\n", + "\n", + " Args:\n", + " keyword (str): keyword to search for\n", + "\n", + " Returns:\n", + " response (pd.DataFrame): DataFrame with songs\n", + " \"\"\"\n", + " print(f\"Searching for songs with keyword: {run_input['searchQueries']}\")\n", + " \n", + " try:\n", + " # Run the Actor and wait for it to finish\n", + " run = client.actor(\"clockworks/tiktok-scraper\").call(run_input=run_input)\n", + " print(f\"Actor run completed successfully, with id: {run['id']}\")\n", + "\n", + " # Fetch Actor results from the run's dataset\n", + " items = [item for item in client.dataset(run[\"defaultDatasetId\"]).iterate_items()]\n", + "\n", + " # Convert the list of items to a DataFrame\n", + " response = pd.DataFrame(items)\n", + " except Exception as e:\n", + " print(f\"An error occurred while running the actor: {e}\")\n", + " response = pd.DataFrame()\n", + "\n", + " return response" + ] + }, + { + "cell_type": "markdown", + "id": "eb8fe904", + "metadata": {}, + "source": [ + "### Extract data from the response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31247107", + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate the ABI Content Table Data Model as a pandas dataframe\n", + "attributes = {\n", + " \"ENTITY\": [],\n", + " \"SCENARIO\": [],\n", + " \"SOURCE\": [],\n", + " \"PUBLISHED_DATE\": [],\n", + " \"DATE & TIME\": [],\n", + " \"ID\": [],\n", + " \"TITLE\": [],\n", + " \"TEXT\": [],\n", + " \"CONCEPT\": [],\n", + " \"SENTIMENT\": [],\n", + " \"TARGET\": [],\n", + " \"OBJECTIVE\": [],\n", + " \"VIEWS\": [],\n", + " \"LIKES\": [],\n", + " \"COMMENTS\": [],\n", + " \"SHARES\": [],\n", + " \"ENGAGEMENTS\": [],\n", + " \"ENGAGEMENT_SCORE\": [],\n", + " \"TYPE\": [],\n", + " \"AUTHOR_NAME\": [],\n", + " \"AUTHOR_URL\": [],\n", + " \"LENGTH\": [],\n", + " \"PEOPLE_MENTIONED\": [],\n", + " \"ORGANIZATION_MENTIONED\": [],\n", + " \"CONTENT_TITLE_SHARED\": [],\n", + " \"CONTENT_URL_SHARED\": [],\n", + " \"LINKEDIN_LINKS\": [],\n", + " \"IMAGE_SHARED\": [],\n", + " \"TAGS\": [],\n", + " \"URL\": [],\n", + " \"DATE_EXTRACT\": [],\n", + " \"SCENARIO_ORDER\": [],\n", + " \"MUSIC_NAME\": [],\n", + " \"MUSIC_AUTHOR\": [],\n", + " \"MUSIC_ORIGINAL\": [],\n", + " \"MUSIC_ALBUM\": [],\n", + " \"MUSIC_PLAY_URL\": [],\n", + " \"MUSIC_COVER_MEDIUM_URL\": [],\n", + " \"MUSIC_ID\": [],\n", + "}\n", + "content_template = pd.DataFrame(attributes)\n", + "\n", + "def extract_data(response: pd.DataFrame, content_template: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Extract data from response\n", + "\n", + " Args:\n", + " response (pd.DataFrame): DataFrame with songs\n", + "\n", + " Returns:\n", + " content (pd.DataFrame): DataFrame with extracted data\n", + " \"\"\"\n", + " print(f\"Extracting data from response\")\n", + " content = content_template.copy()\n", + " try:\n", + " content = pd.concat([content, response.apply(lambda row: pd.Series({\n", + " \"ENTITY\": row['musicMeta']['musicName'],\n", + " \"SCENARIO\": f\"Trending {row['searchQuery']} Songs on TikTok\",\n", + " \"SOURCE\": \"TikTok\",\n", + " \"PUBLISHED_DATE\": row['createTimeISO'],\n", + " \"DATE & TIME\": datetime.now().strftime(\"%Y-%m-%dT%H:%M:%S.000Z\"),\n", + " \"ID\": row['id'],\n", + " \"TITLE\": row['musicMeta']['musicName'],\n", + " \"TEXT\": row['text'],\n", + " # \"CONCEPT\": \"\",\n", + " # \"SENTIMENT\": \"\",\n", + " # \"TARGET\": \"\",\n", + " # \"OBJECTIVE\": \"\",\n", + " \"VIEWS\": row['playCount'],\n", + " \"LIKES\": row['diggCount'],\n", + " \"COMMENTS\": row['commentCount'],\n", + " \"SHARES\": row['shareCount'],\n", + " \"ENGAGEMENTS\": row['diggCount'] + row['commentCount'] + row['shareCount'] + row['collectCount'],\n", + " # \"ENGAGEMENT_SCORE\": \"\",\n", + " \"TYPE\": \"video\",\n", + " \"AUTHOR_NAME\": row['authorMeta']['nickName'],\n", + " \"AUTHOR_URL\": f\"https://www.tiktok.com/@{row['authorMeta']['name']}\",\n", + " \"LENGTH\": row['videoMeta']['duration'],\n", + " \"PEOPLE_MENTIONED\": \", \".join(row['mentions']) if 'mentions' in row else '',\n", + " # \"ORGANIZATION_MENTIONED\": \"\",\n", + " # \"CONTENT_TITLE_SHARED\": \"\",\n", + " # \"CONTENT_URL_SHARED\": \"\",\n", + " # \"LINKEDIN_LINKS\": \"\",\n", + " \"IMAGE_SHARED\": row['videoMeta']['coverUrl'],\n", + " \"TAGS\": \", \".join([tag['name'] for tag in row['hashtags']]) if 'hashtags' in row else '',\n", + " \"URL\": row['webVideoUrl'],\n", + " \"DATE_EXTRACT\": datetime.now().strftime(\"%Y-%m-%d\"),\n", + " # \"SCENARIO_ORDER\": \"\",\n", + " \"MUSIC_NAME\": row['musicMeta']['musicName'],\n", + " \"MUSIC_AUTHOR\": row['musicMeta']['musicAuthor'],\n", + " \"MUSIC_ORIGINAL\": row['musicMeta']['musicOriginal'],\n", + " \"MUSIC_ALBUM\": row['musicMeta']['musicAlbum'],\n", + " \"MUSIC_PLAY_URL\": row['musicMeta']['playUrl'],\n", + " \"MUSIC_COVER_MEDIUM_URL\": row['musicMeta']['coverMediumUrl'],\n", + " \"MUSIC_ID\": row['musicMeta']['musicId'],\n", + " }), axis=1)], ignore_index=True)\n", + " print(f\"Data extracted successfully\")\n", + " except Exception as e:\n", + " print(f\"An error occurred while extracting data: {e}\")\n", + " return content" + ] + }, + { + "cell_type": "markdown", + "id": "lonely-pacific", + "metadata": { + "execution": { + "iopub.execute_input": "2021-07-02T23:32:10.789097Z", + "iopub.status.busy": "2021-07-02T23:32:10.788829Z", + "iopub.status.idle": "2021-07-02T23:32:10.796900Z", + "shell.execute_reply": "2021-07-02T23:32:10.796358Z", + "shell.execute_reply.started": "2021-07-02T23:32:10.789033Z" + } + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "890f7c86-b7bb-4f5d-9a1b-e492dd9580fd", + "metadata": {}, + "source": [ + "### Call the function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c4e3b7b-6440-4844-8054-265f1aec65eb", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"The input is: \\n\", RUN_INPUT)\n", + "response = get_songs_by_keyword(RUN_INPUT)" + ] + }, + { + "cell_type": "markdown", + "id": "2209a3f2", + "metadata": {}, + "source": [ + "### Extract the output from the response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8479fff7", + "metadata": {}, + "outputs": [], + "source": [ + "content = extract_data(response, content_template)\n", + "\n", + "# Save the extracted data to a CSV file\n", + "content.to_csv(csv_output, index=False)\n", + "content" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}