Skip to content

Commit

Permalink
github ext broke so direct upload go brr
Browse files Browse the repository at this point in the history
Added:
    * Threads! Now download time is reduced greatly (my playlist with 66 songs went down from 40m to 8 on average)
    * Modified image saving name
    * Attempted to finetune filtering, no success so far (live versions still appear randomly
  • Loading branch information
akuyuu authored Aug 13, 2023
1 parent e768e7d commit 106fb15
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 67 deletions.
9 changes: 8 additions & 1 deletion cliargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,17 @@ def cli_init():
)

parser.add_argument('--precise',
help='Precise search, might be considerably slower, but results are more precise, False by default',
help='Precise search, might be considerably slower, False by default',
type=bool,
action=argparse.BooleanOptionalAction,
#nargs='?'
)

parser.add_argument('--workers',
help='Amount of threads to be launched for download, 5 by default',
type=int,
default=5,
nargs='?'
)

return parser
12 changes: 8 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from spoton.connector import Connector


parser = cliargs.cli_init()
args = parser.parse_args()
from datetime import datetime

env = dotenv.load_dotenv()

Expand All @@ -15,6 +14,8 @@
print('.env was not detected, creating it - fill in CLIENT_ID and CLIENT_SECRET')
os._exit(1)

parser = cliargs.cli_init()
args = parser.parse_args()

client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')
Expand All @@ -27,6 +28,9 @@
link = args.link
download_path = args.path
precise = args.precise
workers = args.workers

Connector = Connector(client_id, client_secret)
Connector.process(type, link, download_path, precise)
Connector = Connector(client_id, client_secret, workers)
print(datetime.now())
Connector.process(type, link, download_path, precise)
print(datetime.now())
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
music_tag==0.4.3
python-dotenv==1.0.0
Requests==2.31.0
yt_dlp==2023.3.4
yt_dlp==2023.7.6
14 changes: 7 additions & 7 deletions spoton/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@


class Connector:
def __init__(self,
client_id: str,
client_secret: str,
):
def __init__(self, client_id: str, client_secret: str, workers: int):
'''
client_id - spotify client id
client_secret - spotify client secret
^ get both at https://developer.spotify.com/documentation/web-api/concepts/apps
workers - amount of threads to be launched during download
'''

self.spotify_obj = spotify.Spotify(client_id, client_secret)
self.workers = workers

def process(self, type: str, link: str, download_path: str, precise: bool):
download_id = self.get_id(link)

match type:
case 'playlist':
responses = self.spotify_obj.get_playlist(download_id)
Expand All @@ -33,10 +33,10 @@ def process(self, type: str, link: str, download_path: str, precise: bool):
case _:
raise(Exception('Invalid download type'))

Downloader = downloader.Downloader(download_path)
Downloader = downloader.Downloader(download_path, self.workers)
Downloader.batch_download(tracks, precise)


def get_id(self, link):
def get_id(self, link: str):
parts = link.split('/')
return parts[-1] if parts[-1] != '' else parts[-2]
52 changes: 30 additions & 22 deletions spoton/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,62 +12,67 @@
import music_tag
import requests

import concurrent.futures as cf

class Downloader:
def __init__(self, download_path):
def __init__(self, download_path: str, workers: int):
self.ytdlp_options = {
'default_search': 'ytsearch',
'default_search': 'auto',
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'postprocessor_args': ['-hide_banner'],
'geo_bypass': True,
'overwrites': True,
'geo_bypass_country': 'US',
'quiet': True,
'cookies-from-browser': 'chrome',
'user-agent': 'US'
'noprogress': True,
'nooverwrites': True,
}

#structure for saving album covers as album_name: imgpath
self.album_covers = {}

self.download_path = f'{download_path}'
self.__create_dir(download_path)


self.pool = cf.ThreadPoolExecutor(max_workers=workers)


def batch_download(self, tracks: list[structs.Track], precise=False):
download_func = self.download_track
if precise:
download_func = self.precise_download

for i in tracks:
download_func(i)
self.__cleanup()
self.pool.submit(download_func, i)

self.pool.shutdown(wait=True)
self.__cleanup()

def download_track(self, track: structs.Track):
artists = ", ".join(track.track_artists)
request_query = f'{artists} - {track.track_name} song HD'
request_query = f'{artists} - {track.track_name} song HQ'

savepath = os.path.join(self.download_path, track.track_name)
ytdlp_options = self.ytdlp_options
ytdlp_options = self.ytdlp_options.copy()

ytdlp_options['outtmpl'] = savepath

with yt_dlp.YoutubeDL(ytdlp_options) as ydl:
ydl.download([request_query])

self.__change_metadata(savepath, track)

#has duration filter
#has duration filter (kinda)
def precise_download(self, track: structs.Track):
artists = ", ".join(track.track_artists)
request_query = f'{artists} - {track.track_name} song HD'
request_query = f'{artists} - {track.track_name} song'

ytdlp_options = self.ytdlp_options
ytdlp_options = self.ytdlp_options.copy()

#to get first 10 results, if it was ytsearch then it would give single one only
ytdlp_options['default_search'] = 'ytsearch10'
# to get first 7 results, if it was ytsearch then it would give single one only
# why 7? because 10 was too high, 5 too low, although further finetuning is required
ytdlp_options['default_search'] = 'ytsearch7'

duration_s = track.duration_ms // 1000

Expand All @@ -81,7 +86,7 @@ def precise_download(self, track: structs.Track):
entries.sort(key=lambda x, dur=duration_s:(abs(x['duration'] - dur), x['view_count']))

url = entries[0]['webpage_url']

ytdlp_options['default_search'] = 'auto'

savepath = os.path.join(self.download_path, track.track_name)
Expand All @@ -92,9 +97,10 @@ def precise_download(self, track: structs.Track):

self.__change_metadata(savepath, track)

def __change_metadata(self, audio_path, track: structs.Track):
def __change_metadata(self, audio_path: str, track: structs.Track):
imgpath = self.__get_album_image(track)
f = music_tag.load_file(audio_path + '.mp3')

if not f:
raise(Exception(f'Failed to change metadata for {track.track_name} at {audio_path}'))
else:
Expand All @@ -118,7 +124,8 @@ def __change_metadata(self, audio_path, track: structs.Track):

def __get_album_image(self, track: structs.Track) -> str:
if track.album_name not in self.album_covers:
filename = track.image_url.split('/')[-1]

filename = f'{track.album_artists} - {track.album_name}'
resp = requests.get(track.image_url)
imgpath = os.path.join(self.download_path, filename) + '.jpg'

Expand All @@ -130,14 +137,15 @@ def __get_album_image(self, track: structs.Track) -> str:

self.album_covers[track.album_name] = imgpath


return self.album_covers[track.album_name]

def __cleanup(self):
for i in self.album_covers:
os.remove(self.album_covers[i])
self.album_covers = {}

def __create_dir(self, path):
def __create_dir(self, path: str):
if not os.path.exists(f'{path}'):
os.mkdir(f'{path}')

49 changes: 17 additions & 32 deletions spoton/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,27 @@
* Parsing list[dict] response from endpoint/playlists or endpoint/albums into list[Track]
* Building list[Track] from album response requires not only api response with elements in album, but also album info
'''
from dataclasses import dataclass
from typing import Optional

@dataclass
class Album:
def __init__(self,
name: str,
artists: list[str],
image_url: str,
total_tracks: int,
):

self.name = name
self.artists = artists
self.image_url = image_url
self.total_tracks = total_tracks
name: str
artists: list[str]
image_url: str
total_tracks: int

@dataclass
class Track:
def __init__(self,
track_name: str,
album_name: str,
image_url: str,
album_artists: list[str],
track_artists: list[str],
duration_ms: int,
disk_number=None,
track_number=None,
total_tracks=None,
):

self.track_name = track_name
self.album_name = album_name
self.image_url = image_url
self.album_artists = album_artists
self.track_artists = track_artists
self.duration_ms = duration_ms
self.disk_number = disk_number
self.track_number = track_number
self.total_tracks = total_tracks
track_name: str
album_name: str
image_url: str
album_artists: list[str]
track_artists: list[str]
duration_ms: int
disk_number: Optional[int] = None
track_number: Optional[int] = None
total_tracks: Optional[int] = None


def tracks_from_playlist(responses: list[dict]) -> list[Track]:
Expand Down

0 comments on commit 106fb15

Please sign in to comment.