Skip to content

Commit

Permalink
! chunked all items recursive parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
ALERTua committed Mar 24, 2024
1 parent 490dee8 commit 1d0a0a8
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 17 deletions.
15 changes: 11 additions & 4 deletions rozetka/entities/supercategory.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ def get_fat_menu_categories():
return output


def get_all_items_recursively(loop=False) -> List[Item]:
if loop:
return list()

def get_all_item_ids_recursively():
_ = get_super_category_ids()
categories = list(get_all_categories_recursively())
all_categories = list(set(categories))
Expand All @@ -85,6 +82,16 @@ def get_all_items_recursively(loop=False) -> List[Item]:
items_ids = tools.fncs_map((_._get_item_ids for _ in all_categories)) or []
items_ids = list(set(chain(*items_ids)))
LOG.green(f"Got {len(items_ids)} item ids from {all_categories_len} categories")
return items_ids, all_categories_len


def get_all_items_recursively(loop=False, items_ids=None, all_categories_len=None) -> List[Item]:
if loop:
return list()

if items_ids is None:
items_ids, all_categories_len = get_all_item_ids_recursively()

items = Item.parse_multiple(*items_ids, parse_subitems=False)
LOG.green(f"Got {len(items)} items from {all_categories_len} categories")

Expand Down
30 changes: 17 additions & 13 deletions rozetka/runners/parse_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from rozetka.entities.item import Item
from rozetka.entities.point import Point
from rozetka.entities.supercategory import get_all_items_recursively
from rozetka.entities.supercategory import get_all_items_recursively, get_all_item_ids_recursively
from rozetka.tools import db, constants, tools

LOG = Log.get_logger()
Expand Down Expand Up @@ -61,19 +61,23 @@ def _main():
start = pendulum.now()
LOG.verbose = constants.VERBOSE

all_items = get_all_items_recursively()

LOG.green(f"Building points for {len(all_items)} items")
points = list(map(build_item_point, all_items))
LOG.green(f"Dumping {len(points)} points")
# https://docs.influxdata.com/influxdb/v2.4/write-data/best-practices/optimize-writes/
chunked_points = tools.slice_list(points, 5000)
for chunked_points_item in Bar(f"Dumping {len(chunked_points)} point chunks").iter(chunked_points):
asyncio.run(db.dump_points_async(record=chunked_points_item))

all_item_ids, _ = get_all_item_ids_recursively()
chunked_items_ids = tools.slice_list(all_item_ids, 10000)
overal_length = 0
for chunked_item_ids in Bar(f"Dumping {len(chunked_items_ids)} point chunks").iter(chunked_items_ids):
all_items = get_all_items_recursively(chunked_item_ids)
LOG.green(f"Building points for {len(all_items)} items")
points = list(map(build_item_point, all_items))
LOG.green(f"Dumping {len(points)} points")
# https://docs.influxdata.com/influxdb/v2.4/write-data/best-practices/optimize-writes/
chunked_points = tools.slice_list(points, 5000)
for chunked_points_item in Bar(f"Dumping {len(chunked_points)} point chunks").iter(chunked_points):
asyncio.run(db.dump_points_async(record=chunked_points_item))

overal_length += len(points)
duration = pendulum.now().diff_for_humans(start)
LOG.green(f"Duration: {duration}")
return len(points)
LOG.green(f"Points: {overal_length}, Duration: {duration}")
return overal_length


def main():
Expand Down

0 comments on commit 1d0a0a8

Please sign in to comment.