From 53e1644c37d48cafb1825d7553e19e91b445b3f8 Mon Sep 17 00:00:00 2001 From: Ravindra <42912207+KR-Ravindra@users.noreply.github.com> Date: Thu, 26 Oct 2023 23:30:43 -0700 Subject: [PATCH 01/10] Catch Exceptions --- parser/parser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parser/parser.py b/parser/parser.py index e8f62a3..ce84a2f 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -308,6 +308,7 @@ def get_top_keywords(keywords_and_count): try: return keywords_and_count[:12] except Exception as exc: + logger.error(f"Error while getting top keywords: {exc}") return exc # ****************************************************************************************************************************************** @@ -342,6 +343,7 @@ async def keyword_api(request: Request): push_to_redis(url + payload["algoChoice"],final_response) return final_response except Exception as e: + logger.error(f"Error while parsing: {e}") raise HTTPException(status_code=503, detail="Hello, I am the parser engine, Scrapper is taking too long, please try again later") @app.post('/api/v1/keyword-recommendations/') @@ -367,6 +369,7 @@ async def keyword_recommendations_api(request: Request): else: raise HTTPException(status_code=503, detail="Scrapper Engine is taking too long, please try again later") except Exception as e: + logger.error(f"Error while generating recommendations: {e}") raise HTTPException(status_code=503, detail="Hello, I am the parser engine, Scrapper is taking too long, please try again later") @app.post('/api/v1/multi-algo/') From 844afa38661a7eb9b37e5e069202e8ce76ff0ff0 Mon Sep 17 00:00:00 2001 From: Ravindra <42912207+KR-Ravindra@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:50:59 -0700 Subject: [PATCH 02/10] Made a big decision; purana paap --- parser/parser.py | 69 +----------------- recommender/recommender.py | 132 +++++++++++++++++++++++++++++++++++ recommender/requirements.txt | 0 3 files changed, 134 insertions(+), 67 deletions(-) create mode 100644 recommender/recommender.py create mode 100644 recommender/requirements.txt diff --git a/parser/parser.py b/parser/parser.py index ce84a2f..5691809 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -212,45 +212,6 @@ def check_in_redis(key): # ************************************************************************************************************************************ -# Recommendations Generator *********************************************************************************************************** -import requests -import better_profanity - -def get_seo_recommendation(keyword): - url = "https://www.spyfu.com/NsaApi/RelatedKeyword/GetPhraseMatchedKeywords" - payload = f"{{\"query\":\"{keyword}\",\"pageSize\":10,\"isOverview\":true,\"countryCode\":\"US\"}}" - headers = { - 'content-type': 'application/json;charset=UTF-8', - 'Cookie': 'ASP.NET_SessionId=rutmlg02sfx4yakg0nd0asxw' - } - - response = requests.request("POST", url, headers=headers, data=payload) - alternate_keywords = [] - for each in response.json()["keywords"]: - if not better_profanity.profanity.contains_profanity(each["keyword"]): - alternate_keywords.append(each["keyword"]) - return alternate_keywords - - -def get_suggested_replacements(keyword): - url = f"https://api.datamuse.com/words?rel_syn={keyword}" - response = requests.get(url) - if response.status_code == 200: - synonyms = [word['word'] for word in response.json()][:2] - return synonyms - else: - return None - -def generate_recommendations(keywords_and_count): - for each in keywords_and_count: - each["mostSearchedAlternatives"] = get_seo_recommendation(each["originalKeyword"]) - each["probableReplacements"] = get_suggested_replacements(each["originalKeyword"]) - return keywords_and_count - - -# ************************************************************************************************************************************ - - # Parsing Engine *************************************************************************************************************************** import time def get_keywords(algo_choice, scrapped_content): @@ -268,7 +229,7 @@ def get_keywords(algo_choice, scrapped_content): else: if each_word not in existing_keywords: occurences = search_pattern_with_suffix_array(scrapped_content, each_word, suffix_array) - keywords_and_count.append({"keyword": each_word, "count": occurences}) + keywords_and_count.append({"originalKeyword": each_word, "count": occurences}) existing_keywords.append(each_word) return keywords_and_count, (time.time()-start_time) if algo_choice == "suffix_tree": @@ -286,7 +247,7 @@ def get_keywords(algo_choice, scrapped_content): else: if each_word not in existing_keywords: occurences = suffix_tree(constructed_suffix_tree, each_word) - keywords_and_count.append({"keyword": each_word, "count": occurences}) + keywords_and_count.append({"originalKeyword": each_word, "count": occurences}) existing_keywords.append(each_word) return keywords_and_count, time.time() - start_time except Exception as e: @@ -346,32 +307,6 @@ async def keyword_api(request: Request): logger.error(f"Error while parsing: {e}") raise HTTPException(status_code=503, detail="Hello, I am the parser engine, Scrapper is taking too long, please try again later") -@app.post('/api/v1/keyword-recommendations/') -async def keyword_recommendations_api(request: Request): - payload = await request.json() - url = payload['url'].strip('/') if payload['url'].endswith('/') else payload['url'] - try: - data = check_in_redis(url) - if data: - logger.info("Found in Cache Store, Checking if this algo is already executed") - existing_algo_data = check_in_redis(url + payload["algoChoice"]) - if existing_algo_data: - logger.info("Cache store found this entry, checking if recommendations already exists") - if existing_algo_data["topKeywordListings"][0].get("mostSearchedAlternatives"): - logger.info("Recommendations exist, returning my precious data without changes") - return existing_algo_data - all_keywords = existing_algo_data["topKeywordListings"] - modified_keywords = generate_recommendations(all_keywords) - existing_algo_data["topKeywordListings"] = modified_keywords - logger.info("Revalidating the cache with recommendations") - push_to_redis(url + payload["algoChoice"],existing_algo_data) - return existing_algo_data - else: - raise HTTPException(status_code=503, detail="Scrapper Engine is taking too long, please try again later") - except Exception as e: - logger.error(f"Error while generating recommendations: {e}") - raise HTTPException(status_code=503, detail="Hello, I am the parser engine, Scrapper is taking too long, please try again later") - @app.post('/api/v1/multi-algo/') async def multialgo_api(request: Request): payload = await request.json() diff --git a/recommender/recommender.py b/recommender/recommender.py new file mode 100644 index 0000000..d30a221 --- /dev/null +++ b/recommender/recommender.py @@ -0,0 +1,132 @@ +from fastapi import FastAPI, Request, HTTPException +import uvicorn +from fastapi.middleware.cors import CORSMiddleware +import logging +import requests +import better_profanity +import time +import redis +import json + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +app = FastAPI() +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Cache Store ********************************************************************************************************************** + + +def get_redis_connection(): + return redis.Redis(host="redis", port=6379, db=0) + +def push_to_redis(key, response): + logger.info(f"Pushing to Cache Store {response}") + try: + redis_connection = get_redis_connection() + redis_connection.hset(key, "response", json.dumps(response)) + return True + except Exception as e: + logger.error(f"Error while pushing to Redis: {e}") + +def check_in_redis(key): + logger.info("Checking in our precious Cache Store") + try: + redis_connection = get_redis_connection() + response = redis_connection.hget(key, "response") + if response: + logger.info("Match found, returning from Cache Store") + return json.loads(response) + else: + return False + except Exception as e: + logger.error(f"Error while checking in Redis: {e}") + return False + +# ************************************************************************************************************************************ + +# Recommendations Generator *********************************************************************************************************** + +def get_seo_recommendation(keyword): + url = "https://www.spyfu.com/NsaApi/RelatedKeyword/GetPhraseMatchedKeywords" + payload = f"{{\"query\":\"{keyword}\",\"pageSize\":10,\"isOverview\":true,\"countryCode\":\"US\"}}" + headers = { + 'content-type': 'application/json;charset=UTF-8', + 'Cookie': 'ASP.NET_SessionId=rutmlg02sfx4yakg0nd0asxw' + } + + response = requests.request("POST", url, headers=headers, data=payload) + alternate_keywords = [] + for each in response.json()["keywords"]: + if not better_profanity.profanity.contains_profanity(each["keyword"]): + alternate_keywords.append(each["keyword"]) + return alternate_keywords + + +def get_suggested_replacements(keyword): + url = f"https://api.datamuse.com/words?rel_syn={keyword}" + response = requests.get(url) + if response.status_code == 200: + synonyms = [word['word'] for word in response.json()][:2] + return synonyms + else: + return None + +def generate_recommendations(keywords_and_count): + for each in keywords_and_count: + each["mostSearchedAlternatives"] = get_seo_recommendation(each["originalKeyword"]) + each["probableReplacements"] = get_suggested_replacements(each["originalKeyword"]) + return keywords_and_count + + +# ************************************************************************************************************************************ + +# API Endpoints *************************************************************************************************************************** + + +@app.post('/api/v1/keyword-recommendations/') +async def keyword_recommendations_api(request: Request): + payload = await request.json() + url = payload['url'].strip('/') if payload['url'].endswith('/') else payload['url'] + try: + wait_iterator = 0 + while True: + data = check_in_redis(url) + if data: + logger.info("Found in Cache Store, Checking if this algo is already executed") + existing_algo_data = check_in_redis(url + payload["algoChoice"]) + logger.info(f"Existing Algo Data: {existing_algo_data}") + if existing_algo_data: + logger.info("Cache store found this entry, checking if recommendations already exists") + if existing_algo_data["topKeywordListings"][0].get("mostSearchedAlternatives"): + logger.info("Recommendations exist, returning my precious data without changes") + return existing_algo_data + logger.info("Recommendations not found, generating recommendations") + all_keywords = existing_algo_data["topKeywordListings"] + modified_keywords = generate_recommendations(all_keywords) + existing_algo_data["topKeywordListings"] = modified_keywords + logger.info("Revalidating the cache with recommendations") + push_to_redis(url + payload["algoChoice"],existing_algo_data) + return existing_algo_data + else: + logger.info("Let's give that scrapper and parser engines, a tad bit more time") + if wait_iterator > 4: + raise HTTPException(status_code=503, detail="Scrapper and Parser Engines are taking too long, please try again later") + wait_iterator += 1 + time.sleep(7) + except Exception as e: + logger.error(f"Error while generating recommendations: {e}") + raise HTTPException(status_code=503, detail="Hello, I am the Recommender, Scrapper and Parser are taking too long, please try again later") + +# ************************************************************************************************************************************ + + +if __name__ == '__main__': + uvicorn.run("recommender:app", host='0.0.0.0', port=8003, reload=True) \ No newline at end of file diff --git a/recommender/requirements.txt b/recommender/requirements.txt new file mode 100644 index 0000000..e69de29 From 8822eb8cf5d20819a90116c082986dc37b9fa314 Mon Sep 17 00:00:00 2001 From: Ravindra <42912207+KR-Ravindra@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:52:21 -0700 Subject: [PATCH 03/10] The dockerfile keeps growing -_- --- Dockerfile | 8 +++++++- docker-compose.yml | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 78021ef..510cff2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,4 +20,10 @@ FROM python:3.11.5-bullseye as analyzer ADD . /app WORKDIR /app/analyzer RUN pip install -r requirements.txt -CMD python analyzer.py \ No newline at end of file +CMD python analyzer.py + +FROM python:3.11.5-bullseye as recommender +ADD . /app +WORKDIR /app/recommender +RUN pip install -r requirements.txt +CMD python recommender.py \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 02a2157..c3bb66f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,6 +32,12 @@ services: image: krravindra/analyzer:latest ports: - 8002:8002 + + + recommender: + image: krravindra/recommender:latest + ports: + - 8003:8003 redis: image: redis:6.2 From 8521536f0f7a9f9ca823b221b7ca812c02a916cd Mon Sep 17 00:00:00 2001 From: Ravindra <42912207+KR-Ravindra@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:53:54 -0700 Subject: [PATCH 04/10] It keeps getting uglier --- recommender/recommender.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/recommender/recommender.py b/recommender/recommender.py index d30a221..106e9b2 100644 --- a/recommender/recommender.py +++ b/recommender/recommender.py @@ -115,6 +115,12 @@ async def keyword_recommendations_api(request: Request): logger.info("Revalidating the cache with recommendations") push_to_redis(url + payload["algoChoice"],existing_algo_data) return existing_algo_data + else: + logger.info("Let's give that scrapper and parser engines, a tad bit more time") + if wait_iterator > 4: + raise HTTPException(status_code=503, detail="Scrapper and Parser Engines are taking too long, please try again later") + wait_iterator += 1 + time.sleep(7) else: logger.info("Let's give that scrapper and parser engines, a tad bit more time") if wait_iterator > 4: From cc9f3afbae6448afac09bced363902cb968ad271 Mon Sep 17 00:00:00 2001 From: Ravindra <42912207+KR-Ravindra@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:58:45 -0700 Subject: [PATCH 05/10] Loosing every possible ability to think straight --- recommender/requirements.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/recommender/requirements.txt b/recommender/requirements.txt index e69de29..8138e76 100644 --- a/recommender/requirements.txt +++ b/recommender/requirements.txt @@ -0,0 +1,18 @@ +annotated-types==0.6.0 +anyio==3.7.1 +better-profanity==0.7.0 +certifi==2023.7.22 +charset-normalizer==3.3.1 +click==8.1.7 +fastapi==0.104.0 +h11==0.14.0 +idna==3.4 +pydantic==2.4.2 +pydantic_core==2.10.1 +redis==5.0.1 +requests==2.31.0 +sniffio==1.3.0 +starlette==0.27.0 +typing_extensions==4.8.0 +urllib3==2.0.7 +uvicorn==0.23.2 From 9915944d8bb2256be45c79bc73a75e4a87bdcb0a Mon Sep 17 00:00:00 2001 From: Ravindra <42912207+KR-Ravindra@users.noreply.github.com> Date: Fri, 27 Oct 2023 01:29:30 -0700 Subject: [PATCH 06/10] Everything HTTPS --- src/api/analyzer.js | 2 +- src/api/keyword.js | 2 +- src/api/multiAlgo.js | 2 +- src/api/recommendation.js | 2 +- src/api/scraperText.js | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/api/analyzer.js b/src/api/analyzer.js index 3ec70c5..ae65c80 100644 --- a/src/api/analyzer.js +++ b/src/api/analyzer.js @@ -1,7 +1,7 @@ async function AnalyzerList(payload) { let final_response try { - const response = await fetch('http://34.105.100.197/api/v1/analyzer/', { + const response = await fetch('https://app.group5.live/api/v1/analyzer/', { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/api/keyword.js b/src/api/keyword.js index f9dc117..425f150 100644 --- a/src/api/keyword.js +++ b/src/api/keyword.js @@ -1,7 +1,7 @@ async function keywordList(payload) { let final_response try { - const response = await fetch('http://34.105.100.197/api/v1/keyword/', { + const response = await fetch('https://app.group5.live/api/v1/keyword/', { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/api/multiAlgo.js b/src/api/multiAlgo.js index 1803ff4..617d598 100644 --- a/src/api/multiAlgo.js +++ b/src/api/multiAlgo.js @@ -1,7 +1,7 @@ async function multialgoComparision(payload) { let final_response try { - const response = await fetch('http://34.105.100.197/api/v1/multi-algo/', { + const response = await fetch('https://app.group5.live/api/v1/multi-algo/', { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/api/recommendation.js b/src/api/recommendation.js index faa9c29..05ae51b 100644 --- a/src/api/recommendation.js +++ b/src/api/recommendation.js @@ -1,7 +1,7 @@ async function RecommendationList(payload) { let final_response try { - const response = await fetch('http://34.105.100.197/api/v1/keyword-recommendations/', { + const response = await fetch('https://app.group5.live/api/v1/keyword-recommendations/', { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/api/scraperText.js b/src/api/scraperText.js index 53bbfef..383b17f 100644 --- a/src/api/scraperText.js +++ b/src/api/scraperText.js @@ -1,7 +1,7 @@ async function scraperText(payload) { let final_response try { - const response = await fetch('http://34.105.100.197/api/v1/scraping/', { + const response = await fetch('https://app.group5.live/api/v1/scraping/', { method: 'POST', headers: { 'Content-Type': 'application/json', From 23b5b5ee3777b54423ed3ff3c06c9740f1f16312 Mon Sep 17 00:00:00 2001 From: Ravindra <42912207+KR-Ravindra@users.noreply.github.com> Date: Fri, 27 Oct 2023 09:43:48 -0700 Subject: [PATCH 07/10] QUICK CHANGES --- src/components/homePage/index.js | 172 +++++++++++--------- src/components/homePage/keywordListFrame.js | 49 +++--- 2 files changed, 128 insertions(+), 93 deletions(-) diff --git a/src/components/homePage/index.js b/src/components/homePage/index.js index 7b576ef..1b7a01d 100644 --- a/src/components/homePage/index.js +++ b/src/components/homePage/index.js @@ -2,16 +2,16 @@ import React, { useState, useEffect } from "react"; import "./index.css"; import ScraperTextFrame from "./scraperTextFrame"; import "./animation.css"; -import html2pdf from 'html2pdf.js'; -import scraperText from '../../api/scraperText'; +import html2pdf from "html2pdf.js"; +import scraperText from "../../api/scraperText"; import MultiAlgoComparision from "../../api/multiAlgo"; import keywordList from "../../api/keyword"; -import KeywordListFrame from './keywordListFrame'; -import AlgoComparision from './algoComparision'; -import TableComponent from './recommendationTableComponent'; +import KeywordListFrame from "./keywordListFrame"; +import AlgoComparision from "./algoComparision"; +import TableComponent from "./recommendationTableComponent"; import RecommendationList from "../../api/recommendation"; -import AnalyzerList from "../../api/analyzer" -import InsightTable from './inSight' +import AnalyzerList from "../../api/analyzer"; +import InsightTable from "./inSight"; function HomePage() { const [urlInput, setUrlInput] = useState(""); @@ -20,8 +20,8 @@ function HomePage() { const [scraperData, setScraperData] = useState(""); const [keywordListData, setKeywordListData] = useState(""); const [multialgo, setMultialgo] = useState(""); - const [recommendationListData, setRecommendationListData]=useState('') - const [analyzerData, setAnalyzerData]= useState(""); + const [recommendationListData, setRecommendationListData] = useState(""); + const [analyzerData, setAnalyzerData] = useState(""); const [scrollPosition, setScrollPosition] = useState(0); const [scrollUp, setScrollUp] = useState(false); const [resetScroll, setResetScroll] = useState(false); @@ -63,7 +63,7 @@ function HomePage() { }; // const handleDownload = () => { - // // const element = document.getElementById('pdf-container'); + // // const element = document.getElementById('pdf-container'); // // html2pdf(element); // window.print(); // }; @@ -74,6 +74,11 @@ function HomePage() { }; const handleSubmit = () => { + setScraperData(""); + setKeywordListData(""); + setMultialgo(""); + setRecommendationListData(""); + setAnalyzerData(""); setLoading(true); if (urlInput.trim() === "") { @@ -87,61 +92,62 @@ function HomePage() { }; console.log("Form submitted:", payload); - const scraperTextData= scraperText(payload); - scraperTextData.then((response)=> - setScraperData(response)) - .catch(error => { - console.error('API error:', error); + const scraperTextData = scraperText(payload); + scraperTextData + .then((response) => setScraperData(response)) + .catch((error) => { + console.error("API error:", error); }) .finally(() => { - setLoading(false); + console.log("Finalised"); }); - const keywordListData= keywordList(payload) + const keywordListData = keywordList(payload); - keywordListData.then((response)=> - setKeywordListData(response)) - .catch(error => { - console.error('API error:', error); + keywordListData + .then((response) => setKeywordListData(response)) + .catch((error) => { + console.error("API error:", error); }) .finally(() => { - setLoading(false); + console.log("Finalised"); }); - const recommendationList=RecommendationList(payload); - recommendationList.then((response) => - setRecommendationListData(response)) - .catch(error => { - console.error('API error:', error); + const recommendationList = RecommendationList(payload); + recommendationList + .then((response) => setRecommendationListData(response)) + .catch((error) => { + console.error("API error:", error); }) .finally(() => { - setLoading(false); + console.log("Finalised"); }); - const multialgoComparision = MultiAlgoComparision(payload); - multialgoComparision.then((response) => - setMultialgo(response)) - .catch(error => { - console.error('API error:', error); + multialgoComparision + .then((response) => setMultialgo(response)) + .catch((error) => { + console.error("API error:", error); }) .finally(() => { - setLoading(false); + console.log("Finalised"); }); - const urlAnalyzerData=AnalyzerList(payload) - urlAnalyzerData.then((response) => - setAnalyzerData(response)) - .catch(error => { - console.error('API error:', error); + const urlAnalyzerData = AnalyzerList(payload); + urlAnalyzerData + .then((response) => setAnalyzerData(response)) + .catch((error) => { + console.error("API error:", error); }) .finally(() => { setLoading(false); }); - }; return ( -