Skip to content

Commit

Permalink
Add support for adv_search
Browse files Browse the repository at this point in the history
  • Loading branch information
hugoprudente committed Jul 24, 2023
1 parent b7d3ac6 commit 3ecfaa1
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 10 deletions.
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 160
ignore = F403,F405,F401
38 changes: 35 additions & 3 deletions PyMDL/Infopage.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ def __init__(self, details: dict):
else:
if 'aired' in allkeys:
self.date = self.details.pop('aired').strip()
if 'episodes' in allkeys:
self.episodes = int(self.details.pop('episodes'))
else:
self.episodes = -1
if 'where_to_watch' in allkeys:
self.where_to_watch = self.details.pop('where_to_watch')
else:
self.where_to_watch = None

self.networks = "N/A"
if 'original network' in allkeys:
self.networks = self.details.pop('original network')

# Finding recommendations
def get_recommendations(self):
Expand Down Expand Up @@ -123,10 +135,13 @@ def dumps(self) -> dict:
'ratings': self.ratings,
'synopsis': self.synopsis,
'casts': self.casts,
'where_to_watch': self.where_to_watch,
'native title': self.native,
'episodes': self.episodes,
'genere': self.genre,
'duration': self.duration,
'country': self.country,
"original network": self.networks,
'aka': self.aka,
'director': self.director,
'screenwriter': self.screenwriter,
Expand Down Expand Up @@ -160,8 +175,7 @@ def save(self, file: str) -> bool:
json.dump(self.dumps(), f, indent=4)
return True
except Exception as e:
print("Got Exception\n", e)
return False
raise Exception("Got Exception\n", e)

def __str__(self):
return str(self.dumps())
Expand Down Expand Up @@ -211,7 +225,7 @@ def info(link: str):
details['ratings'] = details['ratings'].find("b").text

detailed_info = mainbox.find("div", class_="show-detailsxss").find("ul").find_all("li")
req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres']
req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres', 'original network']
for item in detailed_info:
try:
# if item.text.split(":")[0].lower() == 'tags':
Expand All @@ -232,6 +246,24 @@ def info(link: str):
for item in cast_names:
casts.append(item.text)
details['casts'] = casts

try:
where_to_watch_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \
find("div", class_="wts").find_all("b")
where_to_watch = []
for item in where_to_watch_names:
where_to_watch.append(item.text)
details['where_to_watch'] = where_to_watch
except AttributeError:
details['where_to_watch'] = "N/A"

networks_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \
find("div", class_="box clear").find("div", class_="p-a-sm").find_all("b")
networks = []
for item in networks_names:
networks.append(item.text)
details['networks'] = networks

details_box = soup.find("div", class_="box-body light-b").ul.find_all("li")
for item in details_box[1:]:
details[item.text.split(":")[0].lower()] = item.text.split(":")[1].strip()
Expand Down
133 changes: 130 additions & 3 deletions PyMDL/Search.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import bs4
import requests
import warnings
from typing import Union, List
from .Infopage import info, InfoPage
from datetime import datetime
from .enums import *


class SearchResult:
Expand Down Expand Up @@ -49,7 +52,7 @@ def get_all(self, limit: int = 20) -> List[InfoPage]:
except TypeError:
limit = 20
for item in list(self.urls.keys())[:limit]:
print('Getting:', item)
print('Getting:', item) # noqa: 201
lst.append(info(self.urls[item]))
return lst

Expand All @@ -58,7 +61,7 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No
match_all: bool = True, max_results: int = 20) -> Union[SearchResult, None]:
urls = {}
if max_results > 20:
print("Cannot have more than 20 Results!")
warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...")
max_results = 20
filters_given = any([style, year, eps, score])
url = f"https://mydramalist.com/search?q={name.replace(' ', '+')}&page={page}"
Expand Down Expand Up @@ -136,7 +139,131 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No
filter_check |= 0b1000

# Add it to list if checks pass
if match_all and filter_check == 15:
elif match_all and filter_check == 15:
urls[curr_title] = curr_url
elif (not match_all) and filter_check != 0:
urls[curr_title] = curr_url

else: # Directly add if no filters are given
urls[curr_title] = curr_url
if len(urls) >= max_results:
break
if len(urls) > 0:
return SearchResult(urls)
else:
return None


def adv_search(name: str = None, page: int = 1,
search_type: SearchType = SearchType.TITLE,
release_date_initial: int = 1890,
release_date_final: int = datetime.now().strftime('%Y'),
title_types: List[TitleType] = None,
title_status: TitleStatus = None,
title_sorted_by: TitleSortedByType = TitleSortedByType.MOST_POPULAR,
style: str = None,
year=None,
eps: int = None,
score: str = None,
match_all: bool = True,
max_results: int = 20) -> Union[SearchResult, None]:

urls = {}
if max_results > 20:
warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...")
max_results = 20

title_type_encoded = ""
if title_types is not None:
title_type_encoded = f"&ty={','.join(map(lambda t: str(t), title_types))}"

title_status_encoded = ""
if title_status is not None:
title_status_encoded = f'&st={title_status}'

release_date_encoded = ""
if release_date_initial != 1890 or release_date_final != datetime.now().strftime('%Y'):
release_date_encoded = f"&re={release_date_initial},{release_date_final}"

sorted_encoded = f"&so={title_sorted_by}"

filters_given = any([style, year, eps, score])
url = f"https://mydramalist.com/search?adv={search_type}{title_type_encoded}{title_status_encoded}{release_date_encoded}{sorted_encoded}&page={page}"
base = requests.get(url)
soup = bs4.BeautifulSoup(base.text, 'lxml')
results_box = soup.find('div', class_='col-lg-8 col-md-8').find_all('div', class_='box')
for item in results_box:
# Get Title
try:
curr_title = item.find("h6").find('a').text
except AttributeError:
return None

# Get Category
curr_cateory = item.find('span', class_='text-muted')

# Check if Ctegory Exists
if curr_cateory:
curr_cateory = curr_cateory.text
else:
continue

# Get URL
curr_url = item.find("h6").find('a')['href']

# Apply filters
if filters_given:
if match_all:
filter_check = 15 # Has to match all filters given
else:
filter_check = 0 # Has to match atleast one of the filters given
# In Binary from MSB [0] is style, [1] is year, [2] is eps, [3] is score

# Check for Score
curr_score = item.find('span', class_='score').text
if score:
if curr_score:
if score.endswith('+'):
if not float(curr_score) >= float(score.rstrip('+')):
filter_check &= 0b1110
else:
filter_check |= 0b0001
elif score.endswith('-'):
if not float(curr_score) <= float(score.rstrip('-')):
filter_check &= 0b1110
else:
filter_check |= 0b0001
else:
if not curr_score == score:
filter_check &= 0b1110
else:
filter_check |= 0b0001
else:
filter_check &= 0b1110

# Check for Episodes Filter
if eps:
if not ((curr_cateory.split(',')[-1]).startswith(f" {eps} episode")):
filter_check &= 0b1101
else:
filter_check |= 0b0010

# Check for Year Filter
if year:
if not curr_cateory.split(',')[0].split('-')[-1].strip() == str(year):
filter_check &= 0b1011
else:
filter_check |= 0b0100

# Check for Style Filter
if style:
if curr_cateory.find(style) == -1:
filter_check &= 0b0111
else:
filter_check |= 0b1000

# Add it to list if checks pass
elif match_all and filter_check == 15:
urls[curr_title] = curr_url
elif (not match_all) and filter_check != 0:
urls[curr_title] = curr_url
Expand Down
3 changes: 2 additions & 1 deletion PyMDL/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from .SearchPeople import search_people, PeopleSearchResult
from .Casts import casts, Cast
from .exceptions import *
from .enums import *

__title__ = 'PyMDL'
__license__ = 'MIT'
__version__ = '1.0.1'
__version__ = '1.0.2'
44 changes: 44 additions & 0 deletions PyMDL/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations
from enum import Enum

__all__ = ['SearchType', 'LocalEnum', 'TitleType', 'TitleStatus', 'TitleSortedByType']


class LocalEnum(Enum):
def __str__(self):
return f'{self.value}'


class SearchType(LocalEnum):
"""
Enumerator representing adv search type
* **TITLE** - White
* **PEOPLE** - Gray
* **ARTICLES** - Black
"""
TITLE = 'titles'
PEOPLE = 'people'
ARTICLES = 'articles'


class TitleType(LocalEnum):
DRAMAS = 68
DRAMA_SPECIAL = 83
TV_SHOWS = 86
MOVIES = 77


class TitleStatus(LocalEnum):
ONGOING = 1
COMPLETED = 3
UPCOMING = 2


class TitleSortedByType(LocalEnum):
RELEVANCE = "relevance"
MOST_POPULAR = "popular"
TOP_RANKED = "top"
TOP_RATED = "rated"
NEWEST = "newest"
RELEASE_DATE = "date"
RECENTLY_ADDED = "recently"
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ pip install PyMDL

## Authors

Rocker2234
Rocker2234
RAiU14

## Last Release Changes
* 1.0.1
* Fix more Typos
* Fix incorrect filter when style is used
* Fix incorrect filter when style is used
* 1.0.2
* Add adv_search method

## License

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='PyMDL',
version='1.0.1',
version='1.0.2',
license='MIT',
description='Web Scraping API to fetch data from MDL',
url='https://github.com/Rocker2234/Python-MDL-API',
Expand Down

0 comments on commit 3ecfaa1

Please sign in to comment.