From 3ecfaa13637e0665069e03c98f38868d77723465 Mon Sep 17 00:00:00 2001 From: Hugo Prudente Date: Mon, 24 Jul 2023 21:05:19 +0100 Subject: [PATCH] Add support for adv_search --- .flake8 | 3 ++ PyMDL/Infopage.py | 38 +++++++++++-- PyMDL/Search.py | 133 ++++++++++++++++++++++++++++++++++++++++++++-- PyMDL/__init__.py | 3 +- PyMDL/enums.py | 44 +++++++++++++++ README.md | 6 ++- setup.py | 2 +- 7 files changed, 219 insertions(+), 10 deletions(-) create mode 100644 .flake8 create mode 100644 PyMDL/enums.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..02a4a4e --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 160 +ignore = F403,F405,F401 diff --git a/PyMDL/Infopage.py b/PyMDL/Infopage.py index ae700f1..06bb859 100644 --- a/PyMDL/Infopage.py +++ b/PyMDL/Infopage.py @@ -74,6 +74,18 @@ def __init__(self, details: dict): else: if 'aired' in allkeys: self.date = self.details.pop('aired').strip() + if 'episodes' in allkeys: + self.episodes = int(self.details.pop('episodes')) + else: + self.episodes = -1 + if 'where_to_watch' in allkeys: + self.where_to_watch = self.details.pop('where_to_watch') + else: + self.where_to_watch = None + + self.networks = "N/A" + if 'original network' in allkeys: + self.networks = self.details.pop('original network') # Finding recommendations def get_recommendations(self): @@ -123,10 +135,13 @@ def dumps(self) -> dict: 'ratings': self.ratings, 'synopsis': self.synopsis, 'casts': self.casts, + 'where_to_watch': self.where_to_watch, 'native title': self.native, + 'episodes': self.episodes, 'genere': self.genre, 'duration': self.duration, 'country': self.country, + "original network": self.networks, 'aka': self.aka, 'director': self.director, 'screenwriter': self.screenwriter, @@ -160,8 +175,7 @@ def save(self, file: str) -> bool: json.dump(self.dumps(), f, indent=4) return True except Exception as e: - print("Got Exception\n", e) - return False + raise Exception("Got Exception\n", e) def __str__(self): return str(self.dumps()) @@ -211,7 +225,7 @@ def info(link: str): details['ratings'] = details['ratings'].find("b").text detailed_info = mainbox.find("div", class_="show-detailsxss").find("ul").find_all("li") - req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres'] + req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres', 'original network'] for item in detailed_info: try: # if item.text.split(":")[0].lower() == 'tags': @@ -232,6 +246,24 @@ def info(link: str): for item in cast_names: casts.append(item.text) details['casts'] = casts + + try: + where_to_watch_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \ + find("div", class_="wts").find_all("b") + where_to_watch = [] + for item in where_to_watch_names: + where_to_watch.append(item.text) + details['where_to_watch'] = where_to_watch + except AttributeError: + details['where_to_watch'] = "N/A" + + networks_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \ + find("div", class_="box clear").find("div", class_="p-a-sm").find_all("b") + networks = [] + for item in networks_names: + networks.append(item.text) + details['networks'] = networks + details_box = soup.find("div", class_="box-body light-b").ul.find_all("li") for item in details_box[1:]: details[item.text.split(":")[0].lower()] = item.text.split(":")[1].strip() diff --git a/PyMDL/Search.py b/PyMDL/Search.py index da4df10..c23a876 100644 --- a/PyMDL/Search.py +++ b/PyMDL/Search.py @@ -1,7 +1,10 @@ import bs4 import requests +import warnings from typing import Union, List from .Infopage import info, InfoPage +from datetime import datetime +from .enums import * class SearchResult: @@ -49,7 +52,7 @@ def get_all(self, limit: int = 20) -> List[InfoPage]: except TypeError: limit = 20 for item in list(self.urls.keys())[:limit]: - print('Getting:', item) + print('Getting:', item) # noqa: 201 lst.append(info(self.urls[item])) return lst @@ -58,7 +61,7 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No match_all: bool = True, max_results: int = 20) -> Union[SearchResult, None]: urls = {} if max_results > 20: - print("Cannot have more than 20 Results!") + warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...") max_results = 20 filters_given = any([style, year, eps, score]) url = f"https://mydramalist.com/search?q={name.replace(' ', '+')}&page={page}" @@ -136,7 +139,131 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No filter_check |= 0b1000 # Add it to list if checks pass - if match_all and filter_check == 15: + elif match_all and filter_check == 15: + urls[curr_title] = curr_url + elif (not match_all) and filter_check != 0: + urls[curr_title] = curr_url + + else: # Directly add if no filters are given + urls[curr_title] = curr_url + if len(urls) >= max_results: + break + if len(urls) > 0: + return SearchResult(urls) + else: + return None + + +def adv_search(name: str = None, page: int = 1, + search_type: SearchType = SearchType.TITLE, + release_date_initial: int = 1890, + release_date_final: int = datetime.now().strftime('%Y'), + title_types: List[TitleType] = None, + title_status: TitleStatus = None, + title_sorted_by: TitleSortedByType = TitleSortedByType.MOST_POPULAR, + style: str = None, + year=None, + eps: int = None, + score: str = None, + match_all: bool = True, + max_results: int = 20) -> Union[SearchResult, None]: + + urls = {} + if max_results > 20: + warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...") + max_results = 20 + + title_type_encoded = "" + if title_types is not None: + title_type_encoded = f"&ty={','.join(map(lambda t: str(t), title_types))}" + + title_status_encoded = "" + if title_status is not None: + title_status_encoded = f'&st={title_status}' + + release_date_encoded = "" + if release_date_initial != 1890 or release_date_final != datetime.now().strftime('%Y'): + release_date_encoded = f"&re={release_date_initial},{release_date_final}" + + sorted_encoded = f"&so={title_sorted_by}" + + filters_given = any([style, year, eps, score]) + url = f"https://mydramalist.com/search?adv={search_type}{title_type_encoded}{title_status_encoded}{release_date_encoded}{sorted_encoded}&page={page}" + base = requests.get(url) + soup = bs4.BeautifulSoup(base.text, 'lxml') + results_box = soup.find('div', class_='col-lg-8 col-md-8').find_all('div', class_='box') + for item in results_box: + # Get Title + try: + curr_title = item.find("h6").find('a').text + except AttributeError: + return None + + # Get Category + curr_cateory = item.find('span', class_='text-muted') + + # Check if Ctegory Exists + if curr_cateory: + curr_cateory = curr_cateory.text + else: + continue + + # Get URL + curr_url = item.find("h6").find('a')['href'] + + # Apply filters + if filters_given: + if match_all: + filter_check = 15 # Has to match all filters given + else: + filter_check = 0 # Has to match atleast one of the filters given + # In Binary from MSB [0] is style, [1] is year, [2] is eps, [3] is score + + # Check for Score + curr_score = item.find('span', class_='score').text + if score: + if curr_score: + if score.endswith('+'): + if not float(curr_score) >= float(score.rstrip('+')): + filter_check &= 0b1110 + else: + filter_check |= 0b0001 + elif score.endswith('-'): + if not float(curr_score) <= float(score.rstrip('-')): + filter_check &= 0b1110 + else: + filter_check |= 0b0001 + else: + if not curr_score == score: + filter_check &= 0b1110 + else: + filter_check |= 0b0001 + else: + filter_check &= 0b1110 + + # Check for Episodes Filter + if eps: + if not ((curr_cateory.split(',')[-1]).startswith(f" {eps} episode")): + filter_check &= 0b1101 + else: + filter_check |= 0b0010 + + # Check for Year Filter + if year: + if not curr_cateory.split(',')[0].split('-')[-1].strip() == str(year): + filter_check &= 0b1011 + else: + filter_check |= 0b0100 + + # Check for Style Filter + if style: + if curr_cateory.find(style) == -1: + filter_check &= 0b0111 + else: + filter_check |= 0b1000 + + # Add it to list if checks pass + elif match_all and filter_check == 15: urls[curr_title] = curr_url elif (not match_all) and filter_check != 0: urls[curr_title] = curr_url diff --git a/PyMDL/__init__.py b/PyMDL/__init__.py index 5fb88cb..9a4f94b 100644 --- a/PyMDL/__init__.py +++ b/PyMDL/__init__.py @@ -3,7 +3,8 @@ from .SearchPeople import search_people, PeopleSearchResult from .Casts import casts, Cast from .exceptions import * +from .enums import * __title__ = 'PyMDL' __license__ = 'MIT' -__version__ = '1.0.1' +__version__ = '1.0.2' diff --git a/PyMDL/enums.py b/PyMDL/enums.py new file mode 100644 index 0000000..46c18a3 --- /dev/null +++ b/PyMDL/enums.py @@ -0,0 +1,44 @@ +from __future__ import annotations +from enum import Enum + +__all__ = ['SearchType', 'LocalEnum', 'TitleType', 'TitleStatus', 'TitleSortedByType'] + + +class LocalEnum(Enum): + def __str__(self): + return f'{self.value}' + + +class SearchType(LocalEnum): + """ + Enumerator representing adv search type + * **TITLE** - White + * **PEOPLE** - Gray + * **ARTICLES** - Black + """ + TITLE = 'titles' + PEOPLE = 'people' + ARTICLES = 'articles' + + +class TitleType(LocalEnum): + DRAMAS = 68 + DRAMA_SPECIAL = 83 + TV_SHOWS = 86 + MOVIES = 77 + + +class TitleStatus(LocalEnum): + ONGOING = 1 + COMPLETED = 3 + UPCOMING = 2 + + +class TitleSortedByType(LocalEnum): + RELEVANCE = "relevance" + MOST_POPULAR = "popular" + TOP_RANKED = "top" + TOP_RATED = "rated" + NEWEST = "newest" + RELEASE_DATE = "date" + RECENTLY_ADDED = "recently" diff --git a/README.md b/README.md index 919d5a1..a926e93 100644 --- a/README.md +++ b/README.md @@ -23,13 +23,15 @@ pip install PyMDL ## Authors -Rocker2234 +Rocker2234 RAiU14 ## Last Release Changes * 1.0.1 * Fix more Typos - * Fix incorrect filter when style is used + * Fix incorrect filter when style is used +* 1.0.2 + * Add adv_search method ## License diff --git a/setup.py b/setup.py index 837d3df..010f9c6 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='PyMDL', - version='1.0.1', + version='1.0.2', license='MIT', description='Web Scraping API to fetch data from MDL', url='https://github.com/Rocker2234/Python-MDL-API',