Skip to content

Commit

Permalink
Add logging of album/photo counters, extract directory creation to a …
Browse files Browse the repository at this point in the history
…utility function
  • Loading branch information
geeeezmo committed May 25, 2023
1 parent d328908 commit 9d431c0
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 39 deletions.
81 changes: 43 additions & 38 deletions leave_vk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os.path
import sys
import json
import re
import requests

import vk_api
Expand All @@ -13,12 +14,14 @@


def get_paginated(community, method, **kwargs):
data = {'profiles': [], 'groups': [], 'items': []}
data = {'profiles': [], 'groups': [], 'items': [], 'count': 0}

while True:
print('offset', len(data['items']))
more = method(count=50, offset=len(data['items']), **kwargs)

data['count'] = data['count'] + more['count']

if not more['items']:
break

Expand All @@ -27,7 +30,8 @@ def get_paginated(community, method, **kwargs):
ensure_attachment(community, att)

for k in data.keys():
data[k] += more.get(k, [])
if k != 'count':
data[k] += more.get(k, [])

data['profiles'] = {p['id']: p for p in data['profiles']}
data['groups'] = {p['id']: p for p in data['groups']}
Expand All @@ -41,22 +45,28 @@ def download_album(community, owner_id, album):
# ,
# ownder_id=attachment['album']['owner_id'],
# )
data = {'items': []}
data = {'items': [], 'count': 0}

while True:
print('album offset', len(data['items']))
# print('album offset', len(data['items']))
more = vk.photos.get(
count=50, offset=len(data['items']), owner_id=owner_id, album_id=album['id']
)

if not more['items']:
break

album_title = re.sub(r'[^\w\-_]', '_', album['title'], flags=re.I)

if data['count'] == 0:
data['count'] = more['count']
print('downloading %i photos from album: %s' % (data['count'], album_title))

for i in more['items']:
t, p = ensure_photo('albums/%s_%s' % (album['id'], album['title']), i)
t, p = ensure_photo('albums/%s_%s' % (album['id'], album_title), i)
data['items'].append((t, p))

return data['items']
return data


def get_community_info(community):
Expand Down Expand Up @@ -84,26 +94,37 @@ def get_all_posts(community):


def get_all_albums(community, community_id):
albums_data = get_paginated(community, vk.photos.getAlbums, extended=1, domain=community, owner_id=-community_id)
downloaded_album_count = 0
downloaded_photos_count = 0
total_photos_count = 0
albums_data = get_paginated(community, vk.photos.getAlbums, extended=1, domain=community, owner_id=-community_id, need_system=1)

if albums_data['count'] > 0:
print('starting to download %i albums' % albums_data['count'])

for album in albums_data['items']:
total_photos_count += album['size']

for album in albums_data['items']:
photos = download_album(
photos, photos_count = download_album(
community,
owner_id=album['owner_id'],
album=album,
)
print('album with ID %s downloaded' % album['id'])
).values()
downloaded_album_count += 1
downloaded_photos_count += photos_count
print('(%i/%i) album with ID %s downloaded' % (downloaded_album_count, albums_data['count'], album['id']))
print('(%i/%i) total community photos downloaded' % (downloaded_photos_count, total_photos_count))

return albums_data


def ensure_photo(photoDir, photo):
full_dir = '%s/%s' % (dir, photoDir)
fname = '%s/%s_%i.jpg' % (photoDir, utils.timestamp_to_moscow_datetime(photo['date']).strftime('%Y-%m-%d %H-%M'), photo['id'])
full_fname = '%s/%s' % (dir, fname)

try:
os.mkdir(full_dir)
except FileExistsError:
pass
utils.create_dir(full_dir)

if not os.path.isfile(full_fname):
r = requests.get(max(photo['sizes'], key=lambda s: s['height'])['url'])
Expand All @@ -115,7 +136,7 @@ def ensure_photo(photoDir, photo):

def ensure_doc(doc):
fname = 'attachments/%i.%s' % (doc['id'], doc['ext'])
full_fname = dir + fname
full_fname = '%s/%s' % (dir, fname)

if not os.path.isfile(full_fname):
r = requests.get(doc['url'])
Expand All @@ -140,11 +161,11 @@ def ensure_attachment(community, attachment):
return

if attachment['type'] == 'album':
photos = download_album(
photos, photos_count = download_album(
community,
owner_id=attachment['album']['owner_id'],
album=attachment['album'],
)
).values()
attachment['rendered'] = "Album: %s\n" + "\n".join(
"![%s](%s)" % (t, f) for (t, f) in photos
)
Expand All @@ -165,29 +186,13 @@ def ensure_attachment(community, attachment):
assert len(sys.argv) == 2, sys.argv
community = sys.argv[-1]
assert community.startswith("https://vk.com/")
community = community.replace("https://vk.com/", "")

dir = "docs/%s/" % community

try:
os.mkdir('docs')
except FileExistsError:
pass

try:
os.mkdir(dir)
except FileExistsError:
pass
community = community.replace("https://vk.com/", "").removesuffix('/')

try:
os.mkdir(dir + "attachments/")
except FileExistsError:
pass
dir = "docs/%s" % community
print('dir: %s' % dir)

try:
os.mkdir(dir + "albums/")
except FileExistsError:
pass
utils.create_dir(dir + "/attachments")
utils.create_dir(dir + "/albums")

community_data = get_community_info(community)

Expand Down
10 changes: 9 additions & 1 deletion utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import datetime
import pytz
import os

def timestamp_to_moscow_datetime(ts):
local_tz = pytz.timezone('Europe/Moscow')
dt = datetime.datetime.utcfromtimestamp(ts).replace(tzinfo=datetime.timezone.utc)
return dt.astimezone(local_tz)
return dt.astimezone(local_tz)

def create_dir(dir):
try:
os.makedirs(dir)
except FileExistsError:
# print('directory %s already exists, skipping' % dir)
pass

0 comments on commit 9d431c0

Please sign in to comment.