kopia lustrzana https://github.com/Michael-K-Stein/SpotiFile
Implement user playlist scraping & changed track scrapers to generators
rodzic
a0f85e6ca5
commit
3706a095a9
|
@ -65,7 +65,7 @@ def download_track_list(download_dir: str, track_list: list, recursive_artist: b
|
|||
track.download_to_file(scraper, track_path)
|
||||
console.happy(f'Thread<{my_thread_id}> | Downloaded: {track.preview_title()}')
|
||||
if (recursive_album or recursive) and len(track_list) < recursive_limit:
|
||||
new_tracks = scraper.scrape_album_tracks(track.album.spotify_id)
|
||||
new_tracks = list(scraper.scrape_album_tracks(track.album.spotify_id))
|
||||
for new_track in new_tracks:
|
||||
if new_track not in track_list and len(track_list) < recursive_limit:
|
||||
track_list.append(new_track)
|
||||
|
@ -84,11 +84,11 @@ def download_track_list(download_dir: str, track_list: list, recursive_artist: b
|
|||
|
||||
if (recursive_artist or recursive) and len(track_list) < recursive_limit:
|
||||
old_size = len(track_list)
|
||||
track_list += scraper.scrape_artist_tracks(artist.spotify_id)
|
||||
track_list += list(scraper.scrape_artist_tracks(artist.spotify_id))
|
||||
if recursive_artist:
|
||||
albums = scraper.scrape_artist_albums(artist.spotify_id)
|
||||
albums = list(scraper.scrape_artist_albums(artist.spotify_id))
|
||||
for album in albums:
|
||||
track_list += scraper.scrape_album_tracks(album['id'])
|
||||
track_list += list(scraper.scrape_album_tracks(album['id']))
|
||||
console.log(f'Thread<{my_thread_id}> | Scraped {len(track_list) - old_size} new songs through recursive artist!')
|
||||
except Exception as ex:
|
||||
console.error(f'Thread<{my_thread_id}> | Exception: {ex}')
|
||||
|
@ -129,15 +129,16 @@ def full_download(download_dir: str, identifier: str, recursive_artist: bool=Fal
|
|||
|
||||
client.refresh_tokens()
|
||||
console.log(f'Recieved scrape command on identifier: {identifier}, {recursive=}, {recursive_artist=}, {recursive_album=}, {recursive_limit=}, {thread_count=}')
|
||||
track_list = scraper.scrape_tracks(identifier, console=console)
|
||||
console.log(f'Scraping on identifier: {identifier} yielded {len(track_list)} tracks!')
|
||||
#console.log(f'Scraping on identifier: {identifier} yielded {len(track_list)} tracks!')
|
||||
download_threads = []
|
||||
thread_subsection_size = int(len(track_list) / thread_count)
|
||||
for i in range(thread_count - 1):
|
||||
download_threads.append(Thread(target=download_track_list, args=(download_dir, track_list[thread_subsection_size * i : (thread_subsection_size * i) + thread_subsection_size], recursive_artist, recursive_album, recursive, recursive_limit)))
|
||||
download_threads[-1].start()
|
||||
sleep(0.05)
|
||||
download_threads.append(Thread(target=download_track_list, args=(download_dir, track_list[thread_subsection_size * (thread_count - 1):], recursive_artist, recursive_album, recursive, recursive_limit)))
|
||||
track_list = []
|
||||
for track in scraper.scrape_tracks(identifier, console=console):
|
||||
track_list.append(track)
|
||||
if len(track_list) == recursive_limit / thread_count:
|
||||
download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))
|
||||
download_threads[-1].start()
|
||||
sleep(0.05)
|
||||
download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))
|
||||
download_threads[-1].start()
|
||||
|
||||
[x.join() for x in download_threads]
|
||||
|
@ -165,7 +166,7 @@ def download_all_categories_playlists(download_meta_data_only=True):
|
|||
playlist = scraper.get_playlist(playlist_id)
|
||||
playlist.export_to_file()
|
||||
if not download_meta_data_only:
|
||||
full_download(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href)
|
||||
full_download(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href, thread_count=15)
|
||||
except Exception as ex:
|
||||
console.error(f'Scraping categories exception: {ex}')
|
||||
except Exception as ex:
|
||||
|
|
|
@ -13,6 +13,7 @@ class SpotifyScraper:
|
|||
Album = 1
|
||||
Artist = 2
|
||||
Track = 3
|
||||
User = 4
|
||||
Unknown = -1
|
||||
|
||||
def __init__(self, sp_dc=None, sp_key=None, client=None) -> None:
|
||||
|
@ -30,6 +31,8 @@ class SpotifyScraper:
|
|||
return self.IDTypes.Artist
|
||||
elif 'track' in link.lower():
|
||||
return self.IDTypes.Track
|
||||
elif 'user' in link.lower():
|
||||
return self.IDTypes.User
|
||||
return self.IDTypes.Unknown
|
||||
|
||||
def extract_id_from_link(self, link: str) -> str:
|
||||
|
@ -45,6 +48,8 @@ class SpotifyScraper:
|
|||
return self.scrape_artist_tracks(self.extract_id_from_link(link), intense=True, console=console)
|
||||
elif id_type == self.IDTypes.Track:
|
||||
return [SpotifyTrack(self.get(f'https://api.spotify.com/v1/tracks/{self.extract_id_from_link(link)}').json())]
|
||||
elif id_type == self.IDTypes.User:
|
||||
return self.scrape_user_items(self.extract_id_from_link(link))
|
||||
|
||||
def scrape_playlist(self, playlist_id: str):
|
||||
return self._client.get(f'https://api.spotify.com/v1/playlists/{playlist_id}').json()
|
||||
|
@ -54,17 +59,22 @@ class SpotifyScraper:
|
|||
limit = 100
|
||||
playlist_data = self._client.get(f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks?offset={offset}&limit={limit}&market=from_token').json()
|
||||
tracks = playlist_data['items']
|
||||
for track_data in playlist_data['items']:
|
||||
yield SpotifyTrack(track_data)
|
||||
while playlist_data['next'] is not None:
|
||||
offset += limit
|
||||
playlist_data = self._client.get(f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks?offset={offset}&limit={limit}&market=from_token').json()
|
||||
tracks += playlist_data['items']
|
||||
for track_data in playlist_data['items']:
|
||||
yield SpotifyTrack(track_data)
|
||||
if len(tracks) != int(playlist_data['total']):
|
||||
print(f'Warning: track count does not match! {len(tracks)} != {int(playlist_data["tracks"]["total"])}')
|
||||
for track_data in tracks:
|
||||
yield SpotifyTrack(track_data)
|
||||
spotify_tracks = [SpotifyTrack(track_data) for track_data in tracks]
|
||||
if settings.AUTO_DOWNLOAD_PLAYLIST_METADATA:
|
||||
playlist = SpotifyPlaylist(playlist_id, spotify_tracks, self.get_playlist_data(playlist_id))
|
||||
playlist.export_to_file()
|
||||
return spotify_tracks
|
||||
|
||||
def scrape_album(self, album_id: str):
|
||||
return self._client.get(f'https://api.spotify.com/v1/albums/{album_id}').json()
|
||||
|
@ -73,18 +83,13 @@ class SpotifyScraper:
|
|||
limit = 50
|
||||
offset = 0
|
||||
ret = self._client.get(f'https://api.spotify.com/v1/albums/{album_id}/tracks?limit={limit}').json()
|
||||
tracks = ret['items']
|
||||
for track in ret['items']:
|
||||
yield SpotifyTrack(self.get(track['href']).json())
|
||||
while ret['next'] is not None:
|
||||
offset += limit
|
||||
ret = self._client.get(f'https://api.spotify.com/v1/albums/{album_id}/tracks?offset={offset}&limit={limit}').json()
|
||||
tracks += ret['items']
|
||||
if len(tracks) != int(ret['total']):
|
||||
print(f'Warning: track count does not match! {len(tracks)} != {int(ret["total"])}')
|
||||
processed_tracks = []
|
||||
for track_data in tracks:
|
||||
processed_tracks.append(SpotifyTrack(self.get(track_data['href']).json()))
|
||||
return processed_tracks
|
||||
|
||||
for track in ret['items']:
|
||||
yield SpotifyTrack(self.get(track['href']).json())
|
||||
|
||||
def scrape_artist(self, artist_id: str):
|
||||
return self.get(f'https://api.spotify.com/v1/artists/{artist_id}/top-tracks?market=from_token').json()
|
||||
|
@ -107,15 +112,16 @@ class SpotifyScraper:
|
|||
except:
|
||||
artist_name = 'Unknown'
|
||||
proccessed_tracks = [SpotifyTrack(track_data) for track_data in tracks]
|
||||
yield proccessed_tracks
|
||||
if intense:
|
||||
albums = self.scrape_artist_albums(artist_id)
|
||||
proccessed_album_count = 0
|
||||
for album in albums:
|
||||
proccessed_tracks += self.scrape_album_tracks(album['id'])
|
||||
for track in self.scrape_album_tracks(album['id']):
|
||||
yield track
|
||||
proccessed_album_count += 1
|
||||
if console is not None:
|
||||
console.log(f'Scraping {artist_name}\'s albums: {proccessed_album_count} / {len(albums)}')
|
||||
return proccessed_tracks
|
||||
|
||||
def get(self, url: str) -> Response:
|
||||
return self._client.get(url)
|
||||
|
@ -168,3 +174,15 @@ class SpotifyScraper:
|
|||
playlist_data = self.get_playlist_data(playlist_id)
|
||||
tracks = self.scrape_playlist_tracks(playlist_id)
|
||||
return SpotifyPlaylist(spotify_id=playlist_id, tracks=tracks, data=playlist_data)
|
||||
|
||||
def scrape_user_items(self, user_id: str, limit:int=50) -> list[SpotifyTrack]:
|
||||
has_next = True
|
||||
user_playlists = []
|
||||
while has_next:
|
||||
user_playlist_set = self.get(f'https://api.spotify.com/v1/users/{user_id}/playlists?limit={limit}').json()
|
||||
has_next = user_playlist_set['next']
|
||||
for playlist in user_playlist_set['items']:
|
||||
user_playlists.append(playlist['id'])
|
||||
for playlist_id in user_playlists:
|
||||
for track in self.scrape_playlist_tracks(playlist_id):
|
||||
yield track
|
||||
|
|
Ładowanie…
Reference in New Issue