Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for adv_search #4

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 160
ignore = F403,F405,F401
38 changes: 35 additions & 3 deletions PyMDL/Infopage.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ def __init__(self, details: dict):
else:
if 'aired' in allkeys:
self.date = self.details.pop('aired').strip()
if 'episodes' in allkeys:
self.episodes = int(self.details.pop('episodes'))
else:
self.episodes = -1
if 'where_to_watch' in allkeys:
self.where_to_watch = self.details.pop('where_to_watch')
else:
self.where_to_watch = None

self.networks = "N/A"
if 'original network' in allkeys:
self.networks = self.details.pop('original network')

# Finding recommendations
def get_recommendations(self):
Expand Down Expand Up @@ -123,10 +135,13 @@ def dumps(self) -> dict:
'ratings': self.ratings,
'synopsis': self.synopsis,
'casts': self.casts,
'where_to_watch': self.where_to_watch,
'native title': self.native,
'episodes': self.episodes,
'genere': self.genre,
'duration': self.duration,
'country': self.country,
"original network": self.networks,
'aka': self.aka,
'director': self.director,
'screenwriter': self.screenwriter,
Expand Down Expand Up @@ -160,8 +175,7 @@ def save(self, file: str) -> bool:
json.dump(self.dumps(), f, indent=4)
return True
except Exception as e:
print("Got Exception\n", e)
return False
raise Exception("Got Exception\n", e)

def __str__(self):
return str(self.dumps())
Expand Down Expand Up @@ -211,7 +225,7 @@ def info(link: str):
details['ratings'] = details['ratings'].find("b").text

detailed_info = mainbox.find("div", class_="show-detailsxss").find("ul").find_all("li")
req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres']
req_info = ['native title', 'also known as', 'director', 'screenwriter', 'screenwriter & director', 'genres', 'original network']
for item in detailed_info:
try:
# if item.text.split(":")[0].lower() == 'tags':
Expand All @@ -232,6 +246,24 @@ def info(link: str):
for item in cast_names:
casts.append(item.text)
details['casts'] = casts

try:
where_to_watch_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \
find("div", class_="wts").find_all("b")
where_to_watch = []
for item in where_to_watch_names:
where_to_watch.append(item.text)
details['where_to_watch'] = where_to_watch
except AttributeError:
details['where_to_watch'] = "N/A"

networks_names = soup.find('div', class_='col-lg-8 col-md-8 col-rightx'). \
find("div", class_="box clear").find("div", class_="p-a-sm").find_all("b")
networks = []
for item in networks_names:
networks.append(item.text)
details['networks'] = networks

details_box = soup.find("div", class_="box-body light-b").ul.find_all("li")
for item in details_box[1:]:
details[item.text.split(":")[0].lower()] = item.text.split(":")[1].strip()
Expand Down
133 changes: 130 additions & 3 deletions PyMDL/Search.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import bs4
import requests
import warnings
from typing import Union, List
from .Infopage import info, InfoPage
from datetime import datetime
from .enums import *


class SearchResult:
Expand Down Expand Up @@ -49,7 +52,7 @@ def get_all(self, limit: int = 20) -> List[InfoPage]:
except TypeError:
limit = 20
for item in list(self.urls.keys())[:limit]:
print('Getting:', item)
print('Getting:', item) # noqa: 201
lst.append(info(self.urls[item]))
return lst

Expand All @@ -58,7 +61,7 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No
match_all: bool = True, max_results: int = 20) -> Union[SearchResult, None]:
urls = {}
if max_results > 20:
print("Cannot have more than 20 Results!")
warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...")
max_results = 20
filters_given = any([style, year, eps, score])
url = f"https://mydramalist.com/search?q={name.replace(' ', '+')}&page={page}"
Expand Down Expand Up @@ -136,7 +139,131 @@ def search(name: str, page: int = 1, style: str = None, year=None, eps: int = No
filter_check |= 0b1000

# Add it to list if checks pass
if match_all and filter_check == 15:
elif match_all and filter_check == 15:
urls[curr_title] = curr_url
elif (not match_all) and filter_check != 0:
urls[curr_title] = curr_url

else: # Directly add if no filters are given
urls[curr_title] = curr_url
if len(urls) >= max_results:
break
if len(urls) > 0:
return SearchResult(urls)
else:
return None


def adv_search(name: str = None, page: int = 1,
search_type: SearchType = SearchType.TITLE,
release_date_initial: int = 1890,
release_date_final: int = datetime.now().strftime('%Y'),
title_types: List[TitleType] = None,
title_status: TitleStatus = None,
title_sorted_by: TitleSortedByType = TitleSortedByType.MOST_POPULAR,
style: str = None,
year=None,
eps: int = None,
score: str = None,
match_all: bool = True,
max_results: int = 20) -> Union[SearchResult, None]:

urls = {}
if max_results > 20:
warnings.warn("Cannot have more than 20 Results! Setting it to 20 and continuing...")
max_results = 20

title_type_encoded = ""
if title_types is not None:
title_type_encoded = f"&ty={','.join(map(lambda t: str(t), title_types))}"

title_status_encoded = ""
if title_status is not None:
title_status_encoded = f'&st={title_status}'

release_date_encoded = ""
if release_date_initial != 1890 or release_date_final != datetime.now().strftime('%Y'):
release_date_encoded = f"&re={release_date_initial},{release_date_final}"

sorted_encoded = f"&so={title_sorted_by}"

filters_given = any([style, year, eps, score])
url = f"https://mydramalist.com/search?adv={search_type}{title_type_encoded}{title_status_encoded}{release_date_encoded}{sorted_encoded}&page={page}"
base = requests.get(url)
soup = bs4.BeautifulSoup(base.text, 'lxml')
results_box = soup.find('div', class_='col-lg-8 col-md-8').find_all('div', class_='box')
for item in results_box:
# Get Title
try:
curr_title = item.find("h6").find('a').text
except AttributeError:
return None

# Get Category
curr_cateory = item.find('span', class_='text-muted')

# Check if Ctegory Exists
if curr_cateory:
curr_cateory = curr_cateory.text
else:
continue

# Get URL
curr_url = item.find("h6").find('a')['href']

# Apply filters
if filters_given:
if match_all:
filter_check = 15 # Has to match all filters given
else:
filter_check = 0 # Has to match atleast one of the filters given
# In Binary from MSB [0] is style, [1] is year, [2] is eps, [3] is score

# Check for Score
curr_score = item.find('span', class_='score').text
if score:
if curr_score:
if score.endswith('+'):
if not float(curr_score) >= float(score.rstrip('+')):
filter_check &= 0b1110
else:
filter_check |= 0b0001
elif score.endswith('-'):
if not float(curr_score) <= float(score.rstrip('-')):
filter_check &= 0b1110
else:
filter_check |= 0b0001
else:
if not curr_score == score:
filter_check &= 0b1110
else:
filter_check |= 0b0001
else:
filter_check &= 0b1110

# Check for Episodes Filter
if eps:
if not ((curr_cateory.split(',')[-1]).startswith(f" {eps} episode")):
filter_check &= 0b1101
else:
filter_check |= 0b0010

# Check for Year Filter
if year:
if not curr_cateory.split(',')[0].split('-')[-1].strip() == str(year):
filter_check &= 0b1011
else:
filter_check |= 0b0100

# Check for Style Filter
if style:
if curr_cateory.find(style) == -1:
filter_check &= 0b0111
else:
filter_check |= 0b1000

# Add it to list if checks pass
elif match_all and filter_check == 15:
urls[curr_title] = curr_url
elif (not match_all) and filter_check != 0:
urls[curr_title] = curr_url
Expand Down
3 changes: 2 additions & 1 deletion PyMDL/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from .SearchPeople import search_people, PeopleSearchResult
from .Casts import casts, Cast
from .exceptions import *
from .enums import *

__title__ = 'PyMDL'
__license__ = 'MIT'
__version__ = '1.0.1'
__version__ = '1.0.2'
44 changes: 44 additions & 0 deletions PyMDL/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations
from enum import Enum

__all__ = ['SearchType', 'LocalEnum', 'TitleType', 'TitleStatus', 'TitleSortedByType']


class LocalEnum(Enum):
def __str__(self):
return f'{self.value}'


class SearchType(LocalEnum):
"""
Enumerator representing adv search type
* **TITLE** - White
* **PEOPLE** - Gray
* **ARTICLES** - Black
"""
TITLE = 'titles'
PEOPLE = 'people'
ARTICLES = 'articles'


class TitleType(LocalEnum):
DRAMAS = 68
DRAMA_SPECIAL = 83
TV_SHOWS = 86
MOVIES = 77


class TitleStatus(LocalEnum):
ONGOING = 1
COMPLETED = 3
UPCOMING = 2


class TitleSortedByType(LocalEnum):
RELEVANCE = "relevance"
MOST_POPULAR = "popular"
TOP_RANKED = "top"
TOP_RATED = "rated"
NEWEST = "newest"
RELEASE_DATE = "date"
RECENTLY_ADDED = "recently"
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ pip install PyMDL

## Authors

Rocker2234
Rocker2234
RAiU14

## Last Release Changes
* 1.0.1
* Fix more Typos
* Fix incorrect filter when style is used
* Fix incorrect filter when style is used
* 1.0.2
* Add adv_search method

## License

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='PyMDL',
version='1.0.1',
version='1.0.2',
license='MIT',
description='Web Scraping API to fetch data from MDL',
url='https://github.com/Rocker2234/Python-MDL-API',
Expand Down