From 3b60dabfc84c5e038827323eedf9e6fb06d9a318 Mon Sep 17 00:00:00 2001 From: Braden Hilton Date: Sat, 4 Nov 2023 16:38:44 +0000 Subject: [PATCH] [weverse] add extractors --- docs/configuration.rst | 20 ++++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/weverse.py | 180 +++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 gallery_dl/extractor/weverse.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 23cc8f5b314..4a92d111bcb 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3613,6 +3613,26 @@ Description Download video files. +extractor.weverse.access-token +------------------------------ +Type + ``string`` +Default + ``null`` +Description + Your Weverse account access token. + + The token can be found in the ``we2_access_token`` cookie in the + ``.weverse.io`` cookie domain after logging in to your account. + + An invalid or not up-to-date value + will result in ``401 Unauthorized`` errors. + + If this option is unset, and the cookie is not used, an extra HTTP + request will be sent with your ``username`` and ``password`` to + attempt to fetch a new token. + + extractor.ytdl.enabled ---------------------- Type diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 22e4fe34123..08b56155220 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -168,6 +168,7 @@ "webmshare", "webtoons", "weibo", + "weverse", "wikiart", "wikifeet", "xhamster", diff --git a/gallery_dl/extractor/weverse.py b/gallery_dl/extractor/weverse.py new file mode 100644 index 00000000000..e66e99fdbca --- /dev/null +++ b/gallery_dl/extractor/weverse.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://weverse.io/""" + +from .common import Extractor, Message +from .. import text, util, exception +from ..cache import cache +import binascii +import hashlib +import hmac +import time +import urllib.parse +import uuid + +BASE_PATTERN = r"(?:https?://)?(?:m\.)?weverse\.io" +COMMUNITY_PATTERN = BASE_PATTERN + r"/(\w+)" + +MEMBER_ID_PATTERN = r"([a-f0-9]{32})" +POST_ID_PATTERN = r"(\d-\d{9})" + + +class WeverseExtractor(Extractor): + """Base class for weverse extractors""" + category = "weverse" + cookies_domain = ".weverse.io" + cookies_names = ("we2_access_token",) + root = "https://weverse.io" + request_interval = (1.0, 2.0) + + def _init(self): + self.login() + if self.access_token: + self.api = WeverseAPI(self, self.access_token) + + def login(self): + if self.config("access-token"): + self.access_token = self.config("access-token") + return + + if not self.cookies_check(self.cookies_names): + username, password = self._get_auth_info() + if username: + self.cookies_update( + self._login_impl(username, password), self.cookies_domain) + + self.access_token = self.cookies.get(self.cookies_names[0]) + + @cache(maxage=365*24*3600, keyarg=1) + def _login_impl(self, username, password): + endpoint = ("https://accountapi.weverse.io" + "/web/api/v2/auth/token/by-credentials") + data = {"email": username, "password": password} + headers = { + "x-acc-app-secret": "5419526f1c624b38b10787e5c10b2a7a", + "x-acc-app-version": "2.2.20-alpha.0", + "x-acc-language": "en", + "x-acc-service-id": "weverse", + "x-acc-trace-id": str(uuid.uuid64()) + } + res = self.request( + endpoint, method="POST", data=data, headers=headers).json() + if "accessToken" not in res: + raise exception.AuthenticationError() + return {self.cookies_names[0]: res["accessToken"]} + + +class WeversePostExtractor(WeverseExtractor): + """Extractor for weverse posts""" + subcategory = "post" + directory_fmt = ("{category}", "{community[communityName]}", + "{author_name}", "{postId}") + filename_fmt = "{category}_{filename}.{extension}" + archive_fmt = "{postId}" + pattern = COMMUNITY_PATTERN + r"/(?:artist|fanpost)/" + POST_ID_PATTERN + example = "https://weverse.io/abcdef/artist/1-123456789" + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.community_keyword = match.group(1) + self.post_id = match.group(2) + + def items(self): + data = self.api.post(self.post_id) + + if "publishedAt" in data: + data["date"] = text.parse_timestamp(data["publishedAt"] / 1000) + + extension = data["extension"] + attachments = data["attachment"] + + # skip posts with no media + if extension in [None, {}] and attachments in [None, {}]: + return + + del data["extension"] + del data["attachment"] + + author = data["author"] + data["author_name"] = author.get("artistOfficialProfile", {}).get( + "officialName") or author["profileName"] + + yield Message.Directory, data + for type in attachments: + if type == "photo": + for photo in attachments[type].values(): + url = photo["url"] + data["filename"] = photo["photoId"] + data["extension"] = text.ext_from_url(url) + yield Message.Url, url, data + if type == "video": + for video in attachments[type].values(): + best_video = self.api.video(video["videoId"]) + url = best_video["url"] + data["filename"] = video["videoId"] + data["extension"] = text.ext_from_url(url) + yield Message.Url, url, data + + +class WeverseAPI(): + """Interface for the Weverse API""" + + BASE_API_URL = "https://global.apis.naver.com" + + def __init__(self, extractor, access_token): + self.extractor = extractor + self.headers = {"Authorization": "Bearer " + access_token} + + def _endpoint_with_params(self, endpoint, params): + params_delimiter = "?" + if "?" in endpoint: + params_delimiter = "&" + return endpoint + params_delimiter + urllib.parse.urlencode( + query=params) + + def _message_digest(self, endpoint, params, timestamp): + key = "1b9cb6378d959b45714bec49971ade22e6e24e42".encode() + url = self._endpoint_with_params(endpoint, params) + message = "{}{}".format(url[:255], timestamp).encode() + hash = hmac.new(key, message, hashlib.sha1).digest() + return binascii.b2a_base64(hash).rstrip().decode() + + def post(self, post_id): + endpoint = "/post/v1.0/post-{}".format(post_id) + params = {"fieldSet": "postV1"} + return self._call(endpoint, params) + + def video(self, video_id): + endpoint = "/cvideo/v1.0/cvideo-{}/downloadInfo".format(video_id) + videos = self._call(endpoint)["downloadInfo"] + best_video = max(videos, key=lambda video: video["resolution"]) + return best_video + + def _call(self, endpoint, params=None): + if params is None: + params = {} + params = util.combine_dict({ + "appId": "be4d79eb8fc7bd008ee82c8ec4ff6fd4", + "language": "en", + "platform": "WEB", + "wpf": "pc" + }, params) + timestamp = int(time.time() * 1000) + message_digest = self._message_digest(endpoint, params, timestamp) + params = util.combine_dict(params, { + "wmsgpad": timestamp, + "wmd": message_digest + }) + while True: + try: + return self.extractor.request( + self.BASE_API_URL + "/weverse/wevweb" + endpoint, + params=params, headers=self.headers, + ).json() + except exception.HttpError as exc: + self.extractor.log.warning(exc) + return