-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
138 lines (128 loc) · 5.94 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import json
import os
import spotipy
import re
from typing import List, Dict, Any, Tuple
with open('api_key_stuff.json') as file:
info = json.load(file)
token = spotipy.util.prompt_for_user_token(
info['Username'],
info['Scope'],
info['Client ID'],
info['Client Secret'],
info['Redirect URI']
)
s = spotipy.Spotify(auth=token)
def same_song(song1: Dict[str, Any], song2: Dict[str, Any]) -> bool:
""" Given two songs (each as dictionaries containing the title and artists),
returns whether the songs are the same, even if they have different IDs """
# List of all artists for each song:
artists: List[List[str]] = [song['Artists'] for song in [song1, song2]]
artists_overlap: bool = len(set(artists[0]).intersection(set(artists[1]))) > 0
titles = [song['Title'] for song in [song1, song2]]
# If only one of the songs has a unique identifier, like "reprise", they are not the same song
identifiers = [
'Reprise',
'Nightcore Edit',
'feat. UCLA Bruin Marching Band'
]
for identifier in identifiers:
if len([title for title in titles if identifier in title]) == 1:
return False
# The pattern ' - ' is usually followed by 'Remastered <year>' or 'Radio edit' or something
titles = [title.split(' - ')[0] for title in titles]
# Also, anything enclosed in parentheses is unnecessary
titles = [re.sub('\(.*\)', '', title) for title in titles]
titles = [re.sub('\[.*\]', '', title) for title in titles]
titles = [title.rstrip() for title in titles]
return artists_overlap and titles[0] == titles[1]
def sort_key(song):
title = song['Title'].lower()
# TODO: remove articles and non-alphanumeric chars from beginning and middle or string
return title
def get_playlist(playlist_id: str, sort=True) -> List[Dict[str, Any]]:
""" Returns useful information for each song on a playlist """
songs = []
offset = 0
# Retrieve 100 songs at a time until there aren't any left
while True:
# The offset tells it which index to start at
fields="items.track.name, items.track.id, items.track.artists.name, items.track.duration_ms"
successfully_fetched_songs = False
while not successfully_fetched_songs:
try:
new_songs = s.playlist_items(playlist_id, fields=fields, offset=offset, limit=100)['items']
successfully_fetched_songs = True
except socket.Timeouterror:
print("Socket timed out, trying again")
offset += 100
songs.extend(new_songs)
if len(new_songs) < 100:
break
# Extract the useful information from everything Spotify gave
songs = [song['track'] for song in songs]
songs = [song for song in songs if song is not None]
songs = [{'ID': song['id'],
'Title': song['name'],
'Artists': [artist['name'] for artist in song['artists']],
'Duration': song['duration_ms'] / 1000,
} for song in songs]
if sort:
# By default, sort alphabetically
songs.sort(key = sort_key)
return songs
def get_playlist_name(id):
""" Returns the name of a playlist whose ID is given """
return s.playlist(id, fields='name')['name']
def add_songs(playlist, songs):
""" Given a playlist ID & a list of song IDs, adds all those songs to the playlist """
# Can only add 100 at a time, so split songs into chunks of 100
while len(songs) > 0:
s.playlist_add_items(playlist, songs[:100])
del songs[:100]
def remove_songs(playlist, songs):
""" Given a playlist ID & a list of song IDs, removes all those songs from the playlist """
# Can only remove 100 at a time, so split songs into chunks of 100
while len(songs) > 0:
s.playlist_remove_all_occurrences_of_items(playlist, songs[:100])
del songs[:100]
def copy(main, feeders):
""" Copies all unique songs from feeder playlists to a main playlist, and removes songs that are no longer in any feeders """
main_playlist_name = get_playlist_name(main)
new_songs = []
old_songs = get_playlist(main)
for feeder in feeders:
feeder_name = get_playlist_name(feeder)
print(f'Fetching songs from "{feeder_name}" to copy to "{main_playlist_name}"')
for new_song in get_playlist(feeder):
unique = True
for song in new_songs:
if same_song(song, new_song):
unique = False
break
if unique:
new_songs.append(new_song)
print(f'Removing old songs from "{main_playlist_name}"')
remove_songs(main, [song['ID'] for song in old_songs])
print(f'Adding {len(new_songs)} songs to "{main_playlist_name}"')
add_songs(main, [song['ID'] for song in new_songs])
def warn_of_dupes(songs: List[Dict[str, Any]], playlist_name: str) -> None:
""" Given a list of songs, prints any that appear to be duplicates """
# It would be really nice to use sets here, but dictionaries are unhashable, so we can't
unique_songs = []
# Instead of printing dupes, store them in a list, to be counted and then printed
dupes_to_print = []
for new_song in songs:
for existing_song in unique_songs:
if same_song(new_song, existing_song):
dupes_to_print.append(f'\033[0;31m{new_song}\033[0;35m (similar to\n{existing_song})\n')
break
unique_songs.append(new_song)
print(f'\033[0;1mFound {len(dupes_to_print)} duplicate songs in "{playlist_name}":')
for thing_to_print in dupes_to_print: print(thing_to_print)
def restore_playlist(playlist_id):
playlist_name = get_playlist_name(playlist_id)
with open(f'playlists/{playlist_name}') as file:
# This is not formatted as a JSON, so we have to extract the ID manually
song_IDs = [line[8:30] for line in file.read().split('\n')]
add_songs(playlist_id, song_IDs)