from lyricsgenius import Genius import json import re import os def get_songs(artists=["Notorious B.I.G", "outkast", "nwa"]): GENIUS_TOKEN = "gMnJyj87FvjyP2W093rQ_mjo5ZwwLw1u2r0AmcVqYcJ8kkjjW6ZbObeGnS726SrH" session = Genius(GENIUS_TOKEN, retries=2, timeout=20, sleep_time=0.3) lyrics = [] # get songs for artist in artists: songlist = session.search_artist(artist, max_songs=75, sort='title') songlist.save_lyrics() def sanitize_lyrics(input): notes_re = re.compile('((?:\[[0-9a-zA-Z :()&+-.]+\])(?: \+ \([a-zA-Z -.]+)?(?:\\n)?)') footer_re = re.compile('((?:EmbedShare)[ ]*(?:URLCopyEmbedCopy))') multiline_re = re.compile(('(\\n){3,}')) sanitized_input = notes_re.sub('', input) sanitized_input = footer_re.sub('', sanitized_input) sanitized_input = multiline_re.sub('\n\n', sanitized_input) return sanitized_input def get_lyrics_from_json(json_file): artist_dict = json.load(json_file) ready_lyrics = [] print(artist_dict.keys()) for song in artist_dict['songs']: sanitized_lyrics = sanitize_lyrics(song['lyrics']) print(sanitized_lyrics) ready_lyrics.append(sanitized_lyrics) return ready_lyrics def save_sanitized_lyrics(): sanitized_lyrics_list = [] for file in os.listdir("./"): if file.endswith(".json"): with open(file, 'r', encoding="utf-8") as read_file: sanitized_lyrics_list.extend(get_lyrics_from_json(read_file)) print(sanitized_lyrics_list) with open('./lyrics.txt', 'w+', encoding="utf-8") as lyrics_file: for lyrics in sanitized_lyrics_list: print(lyrics) lyrics_file.write(f"{lyrics}\n") save_sanitized_lyrics()