Skip to content

Commit

Permalink
add user header to all requests.get calls
Browse files Browse the repository at this point in the history
  • Loading branch information
RNerd12 committed Mar 7, 2024
1 parent 80878bb commit 1c06603
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 11 deletions.
7 changes: 4 additions & 3 deletions espncricinfo/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self, match_id):
self.match_id = match_id
self.match_url = "https://www.espncricinfo.com/matches/engine/match/{0}.html".format(str(match_id))
self.json_url = "https://www.espncricinfo.com/matches/engine/match/{0}.json".format(str(match_id))
self.headers = {'user-agent': 'Mozilla/5.0'}
self.json = self.get_json()
self.html = self.get_html()
self.comms_json = self.get_comms_json()
Expand Down Expand Up @@ -89,7 +90,7 @@ def __repr__(self):
return (f'{self.__class__.__name__}('f'{self.match_id!r})')

def get_json(self):
r = requests.get(self.json_url)
r = requests.get(self.json_url,headers=self.headers)
if r.status_code == 404:
raise MatchNotFoundError
elif 'Scorecard not yet available' in r.text:
Expand All @@ -98,7 +99,7 @@ def get_json(self):
return r.json()

def get_html(self):
r = requests.get(self.match_url)
r = requests.get(self.match_url,headers=self.headers)
if r.status_code == 404:
raise MatchNotFoundError
else:
Expand Down Expand Up @@ -431,6 +432,6 @@ def get_recent_matches(date=None):
url = "https://www.espncricinfo.com/ci/engine/match/index.html?date=%sview=week" % date
else:
url = "https://www.espncricinfo.com/ci/engine/match/index.html?view=week"
r = requests.get(url)
r = requests.get(url,headers={'user-agent': 'Mozilla/5.0'})
soup = BeautifulSoup(r.text, 'html.parser')
return [x['href'].split('/',4)[4].split('.')[0] for x in soup.findAll('a', href=True, text='Scorecard')]
13 changes: 7 additions & 6 deletions espncricinfo/player.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def __init__(self, player_id):
self.url = "https://www.espncricinfo.com/player/player-name-{0}".format(str(player_id))
self.json_url = "http://core.espnuk.org/v2/sports/cricket/athletes/{0}".format(str(player_id))
self.new_json_url = "https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId={0}".format(str(player_id))
self.headers = {'user-agent': 'Mozilla/5.0'}
self.parsed_html = self.get_html()
self.json = self.get_json()
self.new_json = self.get_new_json()
Expand All @@ -28,21 +29,21 @@ def __init__(self, player_id):
self.major_teams = self._major_teams()

def get_html(self):
r = requests.get(self.url)
r = requests.get(self.url, headers=self.headers)
if r.status_code == 404:
raise PlayerNotFoundError
else:
return BeautifulSoup(r.text, 'html.parser')

def get_json(self):
r = requests.get(self.json_url)
r = requests.get(self.json_url, headers=self.headers)
if r.status_code == 404:
raise PlayerNotFoundError
else:
return r.json()

def get_new_json(self):
r = requests.get(self.new_json_url, headers={'user-agent': 'Mozilla/5.0'})
r = requests.get(self.new_json_url, headers=self.headers)
if r.status_code == 404:
raise PlayerNotFoundError
else:
Expand Down Expand Up @@ -126,7 +127,7 @@ def get_career_averages(self, file_name=None, match_format=11, data_type='allrou
self.file_name = f"{self.player_id}_{self.match_format}_{self.data_type}_career_averages.csv"

self.url=f"https://stats.espncricinfo.com/ci/engine/player/{self.player_id}.html?class={self.match_format};template=results;type={self.data_type}"
html_doc = requests.get(self.url)
html_doc = requests.get(self.url, headers=self.headers)
soup = BeautifulSoup(html_doc.text, 'html.parser')
tables = soup.find_all("table")[2]
table_rows = tables.find_all("tr")
Expand Down Expand Up @@ -158,7 +159,7 @@ def get_career_summary(self, file_name=None, match_format=11, data_type='allroun
self.file_name = f"{self.player_id}_{self.match_format}_{self.data_type}_career_summary.csv"

self.url=f"https://stats.espncricinfo.com/ci/engine/player/{self.player_id}.html?class={self.match_format};template=results;type={self.data_type}"
html_doc = requests.get(self.url)
html_doc = requests.get(self.url, headers=self.headers)
soup = BeautifulSoup(html_doc.text, 'html.parser')
tables = soup.find_all("table")[3]
table_rows = tables.find_all("tr")
Expand Down Expand Up @@ -192,7 +193,7 @@ def get_data(self, file_name=None, match_format=11, data_type='allround', view='
self.file_name = f"{self.player_id}_{self.match_format}_{self.data_type}_{self.view}.csv"

self.url=f"https://stats.espncricinfo.com/ci/engine/player/{self.player_id}.html?class={self.match_format};template=results;type={self.data_type};view={self.view}"
html_doc = requests.get(self.url)
html_doc = requests.get(self.url, headers=self.headers())
soup = BeautifulSoup(html_doc.text, 'html.parser')
tables = soup.find_all("table")[3]
table_rows = tables.find_all("tr")
Expand Down
3 changes: 2 additions & 1 deletion espncricinfo/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self, series_id):
self.json_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/".format(str(series_id))
self.events_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/events".format(str(series_id))
self.seasons_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/seasons".format(str(series_id))
self.headers = {'user-agent': 'Mozilla/5.0'}
self.json = self.get_json(self.json_url)
self.seasons = self._get_seasons()
self.years = self._get_years_from_seasons()
Expand All @@ -25,7 +26,7 @@ def __init__(self, series_id):
self.events = self._build_events()

def get_json(self, url):
r = requests.get(url)
r = requests.get(url,headers=self.headers)
if r.status_code == 404:
raise "Not Found"
else:
Expand Down
3 changes: 2 additions & 1 deletion espncricinfo/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ class Summary(object):

def __init__(self):
self.url = "http://static.cricinfo.com/rss/livescores.xml"
self.headers = {'user-agent': 'Mozilla/5.0'}
self.xml = self.get_xml()
self.match_ids = self._match_ids()
self.matches = self._build_matches()

def get_xml(self):
r = requests.get(self.url)
r = requests.get(self.url, headers=self.headers)
if r.status_code == 404:
raise MatchNotFoundError
else:
Expand Down

0 comments on commit 1c06603

Please sign in to comment.