diff --git a/espncricinfo/match.py b/espncricinfo/match.py index e58753b..e96bc8a 100644 --- a/espncricinfo/match.py +++ b/espncricinfo/match.py @@ -9,6 +9,7 @@ def __init__(self, match_id): self.match_id = match_id self.match_url = "https://www.espncricinfo.com/matches/engine/match/{0}.html".format(str(match_id)) self.json_url = "https://www.espncricinfo.com/matches/engine/match/{0}.json".format(str(match_id)) + self.headers = {'user-agent': 'Mozilla/5.0'} self.json = self.get_json() self.html = self.get_html() self.comms_json = self.get_comms_json() @@ -89,7 +90,7 @@ def __repr__(self): return (f'{self.__class__.__name__}('f'{self.match_id!r})') def get_json(self): - r = requests.get(self.json_url) + r = requests.get(self.json_url,headers=self.headers) if r.status_code == 404: raise MatchNotFoundError elif 'Scorecard not yet available' in r.text: @@ -98,7 +99,7 @@ def get_json(self): return r.json() def get_html(self): - r = requests.get(self.match_url) + r = requests.get(self.match_url,headers=self.headers) if r.status_code == 404: raise MatchNotFoundError else: @@ -431,6 +432,6 @@ def get_recent_matches(date=None): url = "https://www.espncricinfo.com/ci/engine/match/index.html?date=%sview=week" % date else: url = "https://www.espncricinfo.com/ci/engine/match/index.html?view=week" - r = requests.get(url) + r = requests.get(url,headers={'user-agent': 'Mozilla/5.0'}) soup = BeautifulSoup(r.text, 'html.parser') return [x['href'].split('/',4)[4].split('.')[0] for x in soup.findAll('a', href=True, text='Scorecard')] diff --git a/espncricinfo/player.py b/espncricinfo/player.py index dece182..6b33b7e 100644 --- a/espncricinfo/player.py +++ b/espncricinfo/player.py @@ -12,6 +12,7 @@ def __init__(self, player_id): self.url = "https://www.espncricinfo.com/player/player-name-{0}".format(str(player_id)) self.json_url = "http://core.espnuk.org/v2/sports/cricket/athletes/{0}".format(str(player_id)) self.new_json_url = "https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId={0}".format(str(player_id)) + self.headers = {'user-agent': 'Mozilla/5.0'} self.parsed_html = self.get_html() self.json = self.get_json() self.new_json = self.get_new_json() @@ -28,21 +29,21 @@ def __init__(self, player_id): self.major_teams = self._major_teams() def get_html(self): - r = requests.get(self.url) + r = requests.get(self.url, headers=self.headers) if r.status_code == 404: raise PlayerNotFoundError else: return BeautifulSoup(r.text, 'html.parser') def get_json(self): - r = requests.get(self.json_url) + r = requests.get(self.json_url, headers=self.headers) if r.status_code == 404: raise PlayerNotFoundError else: return r.json() def get_new_json(self): - r = requests.get(self.new_json_url, headers={'user-agent': 'Mozilla/5.0'}) + r = requests.get(self.new_json_url, headers=self.headers) if r.status_code == 404: raise PlayerNotFoundError else: @@ -126,7 +127,7 @@ def get_career_averages(self, file_name=None, match_format=11, data_type='allrou self.file_name = f"{self.player_id}_{self.match_format}_{self.data_type}_career_averages.csv" self.url=f"https://stats.espncricinfo.com/ci/engine/player/{self.player_id}.html?class={self.match_format};template=results;type={self.data_type}" - html_doc = requests.get(self.url) + html_doc = requests.get(self.url, headers=self.headers) soup = BeautifulSoup(html_doc.text, 'html.parser') tables = soup.find_all("table")[2] table_rows = tables.find_all("tr") @@ -158,7 +159,7 @@ def get_career_summary(self, file_name=None, match_format=11, data_type='allroun self.file_name = f"{self.player_id}_{self.match_format}_{self.data_type}_career_summary.csv" self.url=f"https://stats.espncricinfo.com/ci/engine/player/{self.player_id}.html?class={self.match_format};template=results;type={self.data_type}" - html_doc = requests.get(self.url) + html_doc = requests.get(self.url, headers=self.headers) soup = BeautifulSoup(html_doc.text, 'html.parser') tables = soup.find_all("table")[3] table_rows = tables.find_all("tr") @@ -192,7 +193,7 @@ def get_data(self, file_name=None, match_format=11, data_type='allround', view=' self.file_name = f"{self.player_id}_{self.match_format}_{self.data_type}_{self.view}.csv" self.url=f"https://stats.espncricinfo.com/ci/engine/player/{self.player_id}.html?class={self.match_format};template=results;type={self.data_type};view={self.view}" - html_doc = requests.get(self.url) + html_doc = requests.get(self.url, headers=self.headers()) soup = BeautifulSoup(html_doc.text, 'html.parser') tables = soup.find_all("table")[3] table_rows = tables.find_all("tr") diff --git a/espncricinfo/series.py b/espncricinfo/series.py index 0d90c50..7999d5e 100644 --- a/espncricinfo/series.py +++ b/espncricinfo/series.py @@ -9,6 +9,7 @@ def __init__(self, series_id): self.json_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/".format(str(series_id)) self.events_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/events".format(str(series_id)) self.seasons_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/seasons".format(str(series_id)) + self.headers = {'user-agent': 'Mozilla/5.0'} self.json = self.get_json(self.json_url) self.seasons = self._get_seasons() self.years = self._get_years_from_seasons() @@ -25,7 +26,7 @@ def __init__(self, series_id): self.events = self._build_events() def get_json(self, url): - r = requests.get(url) + r = requests.get(url,headers=self.headers) if r.status_code == 404: raise "Not Found" else: diff --git a/espncricinfo/summary.py b/espncricinfo/summary.py index d64f39f..eca242f 100644 --- a/espncricinfo/summary.py +++ b/espncricinfo/summary.py @@ -6,12 +6,13 @@ class Summary(object): def __init__(self): self.url = "http://static.cricinfo.com/rss/livescores.xml" + self.headers = {'user-agent': 'Mozilla/5.0'} self.xml = self.get_xml() self.match_ids = self._match_ids() self.matches = self._build_matches() def get_xml(self): - r = requests.get(self.url) + r = requests.get(self.url, headers=self.headers) if r.status_code == 404: raise MatchNotFoundError else: