Skip to content

Commit

Permalink
[weverse] add extractors
Browse files Browse the repository at this point in the history
  • Loading branch information
bradenhilton committed Nov 4, 2023
1 parent caf31e7 commit 4b512b6
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 0 deletions.
20 changes: 20 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3613,6 +3613,26 @@ Description
Download video files.


extractor.weverse.access-token
------------------------------
Type
``string``
Default
``null``
Description
Your Weverse account access token.

The token can be found in the ``we2_access_token`` cookie in the
``.weverse.io`` cookie domain after logging in to your account.

An invalid or not up-to-date value
will result in ``401 Unauthorized`` errors.

If this option is unset, and the cookie is not used, an extra HTTP
request will be sent with your ``username`` and ``password`` to
attempt to fetch a new token.


extractor.ytdl.enabled
----------------------
Type
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
"webmshare",
"webtoons",
"weibo",
"weverse",
"wikiart",
"wikifeet",
"xhamster",
Expand Down
179 changes: 179 additions & 0 deletions gallery_dl/extractor/weverse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://weverse.io/"""

from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
import binascii
import hashlib
import hmac
import time
import urllib.parse
import uuid

BASE_PATTERN = r"(?:https?://)?(?:m\.)?weverse\.io"
COMMUNITY_PATTERN = BASE_PATTERN + r"/(\w+)"

MEMBER_ID_PATTERN = r"([a-f0-9]{32})"
POST_ID_PATTERN = r"(\d-\d{9})"


class WeverseExtractor(Extractor):
"""Base class for weverse extractors"""
category = "weverse"
cookies_domain = ".weverse.io"
cookies_names = ("we2_access_token",)
root = "https://weverse.io"
request_interval = (1.0, 2.0)

def _init(self):
self.login()
self.api = WeverseAPI(self, self.access_token)

def login(self):
if self.config("access-token"):
self.access_token = self.config("access-token")
return

if not self.cookies_check(self.cookies_names):
username, password = self._get_auth_info()
if username:
self.cookies_update(
self._login_impl(username, password), self.cookies_domain)

self.access_token = self.cookies.get(self.cookies_names[0])

@cache(maxage=365*24*3600, keyarg=1)
def _login_impl(self, username, password):
endpoint = ("https://accountapi.weverse.io"
"/web/api/v2/auth/token/by-credentials")
data = {"email": username, "password": password}
headers = {
"x-acc-app-secret": "5419526f1c624b38b10787e5c10b2a7a",
"x-acc-app-version": "2.2.20-alpha.0",
"x-acc-language": "en",
"x-acc-service-id": "weverse",
"x-acc-trace-id": str(uuid.uuid64())
}
res = self.request(
endpoint, method="POST", data=data, headers=headers).json()
if "accessToken" not in res:
raise exception.AuthenticationError()
return {self.cookies_names[0]: res["accessToken"]}


class WeversePostExtractor(WeverseExtractor):
"""Extractor for weverse posts"""
subcategory = "post"
directory_fmt = ("{category}", "{community[communityName]}",
"{author_name}", "{postId}")
filename_fmt = "{category}_{filename}.{extension}"
archive_fmt = "{postId}"
pattern = COMMUNITY_PATTERN + r"/(?:artist|fanpost)/" + POST_ID_PATTERN
example = "https://weverse.io/abcdef/artist/1-123456789"

def __init__(self, match):
WeverseExtractor.__init__(self, match)
self.community_keyword = match.group(1)
self.post_id = match.group(2)

def items(self):
data = self.api.post(self.post_id)

if "publishedAt" in data:
data["date"] = text.parse_timestamp(data["publishedAt"] / 1000)

extension = data["extension"]
attachments = data["attachment"]

# skip posts with no media
if extension in [None, {}] and attachments in [None, {}]:
return

del data["extension"]
del data["attachment"]

author = data["author"]
data["author_name"] = author.get("artistOfficialProfile", {}).get(
"officialName") or author["profileName"]

yield Message.Directory, data
for type in attachments:
if type == "photo":
for photo in attachments[type].values():
url = photo["url"]
data["filename"] = photo["photoId"]
data["extension"] = text.ext_from_url(url)
yield Message.Url, url, data
if type == "video":
for video in attachments[type].values():
best_video = self.api.video(video["videoId"])
url = best_video["url"]
data["filename"] = video["videoId"]
data["extension"] = text.ext_from_url(url)
yield Message.Url, url, data


class WeverseAPI():
"""Interface for the Weverse API"""

BASE_API_URL = "https://global.apis.naver.com"

def __init__(self, extractor, access_token):
self.extractor = extractor
self.headers = {"Authorization": "Bearer " + access_token}

def _endpoint_with_params(self, endpoint, params):
params_delimiter = "?"
if "?" in endpoint:
params_delimiter = "&"
return endpoint + params_delimiter + urllib.parse.urlencode(
query=params)

def _message_digest(self, endpoint, params, timestamp):
key = "1b9cb6378d959b45714bec49971ade22e6e24e42".encode()
url = self._endpoint_with_params(endpoint, params)
message = "{}{}".format(url[:255], timestamp).encode()
hash = hmac.new(key, message, hashlib.sha1).digest()
return binascii.b2a_base64(hash).rstrip().decode()

def post(self, post_id):
endpoint = "/post/v1.0/post-{}".format(post_id)
params = {"fieldSet": "postV1"}
return self._call(endpoint, params)

def video(self, video_id):
endpoint = "/cvideo/v1.0/cvideo-{}/downloadInfo".format(video_id)
videos = self._call(endpoint)["downloadInfo"]
best_video = max(videos, key=lambda video: video["resolution"])
return best_video

def _call(self, endpoint, params=None):
if params is None:
params = {}
params = util.combine_dict({
"appId": "be4d79eb8fc7bd008ee82c8ec4ff6fd4",
"language": "en",
"platform": "WEB",
"wpf": "pc"
}, params)
timestamp = int(time.time() * 1000)
message_digest = self._message_digest(endpoint, params, timestamp)
params = util.combine_dict(params, {
"wmsgpad": timestamp,
"wmd": message_digest
})
while True:
try:
return self.extractor.request(
self.BASE_API_URL + "/weverse/wevweb" + endpoint,
params=params, headers=self.headers,
).json()
except exception.HttpError as exc:
self.extractor.log.warning(exc)
return

0 comments on commit 4b512b6

Please sign in to comment.