Skip to content

Commit

Permalink
add: flush video data every 5 pages worth of scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
Varstahl committed Nov 6, 2019
1 parent 6a7d5ef commit 66fcb48
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -570,9 +570,9 @@ msgid "Connection error"
msgstr "Verbindungsfehler"

msgctxt "#30252"
msgid "Loading next page…"
msgstr "Lade nächste Seite…"
msgid "Loading page {0}…"
msgstr "Lade Seite {0}…"

msgctxt "#30253"
msgid "Found “{0}"
msgstr "Gefunden “{0}"
msgid "{0} seasons"
msgstr "{0} Staffeln"
Original file line number Diff line number Diff line change
Expand Up @@ -610,9 +610,9 @@ msgid "Connection error"
msgstr ""

msgctxt "#30252"
msgid "Loading next page…"
msgid "Loading page {0}…"
msgstr ""

msgctxt "#30253"
msgid "Found “{0}"
msgid "{0} seasons"
msgstr ""
28 changes: 17 additions & 11 deletions plugin.video.amazon-test/resources/lib/primevideo.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,13 @@ def __init__(self, globalsInstance, settingsInstance):
}
self._LoadCache()

def _Flush(self, FlushVideoData=False):
def _Flush(self, bFlushCacheData=True, bFlushVideoData=False):
""" Cache catalog and video data """

with open(self._catalogCache, 'wb+') as fp:
pickle.dump(self._catalog, fp)
if FlushVideoData:
if bFlushCacheData:
with open(self._catalogCache, 'wb+') as fp:
pickle.dump(self._catalog, fp)
if bFlushVideoData:
with open(self._videodataCache, 'w+') as fp:
json.dump(self._videodata, fp)

Expand Down Expand Up @@ -950,6 +951,7 @@ def ParseSinglePage(oid, o, bCacheRefresh, data=None, url=None):
amzLang = amzLang if amzLang else 'en_US'

bUpdatedVideoData = False # Whether or not the pvData has been updated
pageNumber = 1 # Page number

while 0 < len(requestURLs):
requestURL = requestURLs.pop(0) # rULRs: FIFO stack
Expand Down Expand Up @@ -1043,19 +1045,23 @@ def ParseSinglePage(oid, o, bCacheRefresh, data=None, url=None):

# Pagination
if 'pagination' in cnt:
page = None
nextPage = None
if 'apiUrl' in cnt['pagination']:
page = cnt['pagination']['apiUrl']
nextPage = cnt['pagination']['apiUrl']
elif 'paginator' in cnt['pagination']:
page = next((x['href'] for x in cnt['pagination']['paginator'] if 'atv.wps.PaginatorNext' == x['*className*']), None)
if page:
requestURLs.append(page)
nextPage = next((x['href'] for x in cnt['pagination']['paginator'] if 'atv.wps.PaginatorNext' == x['*className*']), None)
if nextPage:
requestURLs.append(nextPage)
else:
Log('Unknown error while parsing pagination', Log.ERROR)

# Notify new page
if 0 < len(requestURLs):
NotifyUser(getString(30252))
if (0 == (pageNumber % 5)) and bUpdatedVideoData:
self._Flush(bFlushCacheData=False, bFlushVideoData=True)
bUpdatedVideoData = False
pageNumber += 1
NotifyUser(getString(30252).format(pageNumber))

# Flush catalog and data
self._Flush(bCacheRefresh or bUpdatedVideoData)
self._Flush(bFlushVideoData=bCacheRefresh or bUpdatedVideoData)

0 comments on commit 66fcb48

Please sign in to comment.