Init ML scripts
This commit is contained in:
50
modules/WillieTimer/lyrics.py
Normal file
50
modules/WillieTimer/lyrics.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from lyricsgenius import Genius
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
|
||||
|
||||
def get_songs(artists=["Notorious B.I.G", "outkast", "nwa"]):
|
||||
GENIUS_TOKEN = "gMnJyj87FvjyP2W093rQ_mjo5ZwwLw1u2r0AmcVqYcJ8kkjjW6ZbObeGnS726SrH"
|
||||
session = Genius(GENIUS_TOKEN, retries=2, timeout=20, sleep_time=0.3)
|
||||
|
||||
lyrics = []
|
||||
|
||||
# get songs
|
||||
for artist in artists:
|
||||
songlist = session.search_artist(artist, max_songs=75, sort='title')
|
||||
songlist.save_lyrics()
|
||||
|
||||
def sanitize_lyrics(input):
|
||||
notes_re = re.compile('((?:\[[0-9a-zA-Z :()&+-.]+\])(?: \+ \([a-zA-Z -.]+)?(?:\\n)?)')
|
||||
footer_re = re.compile('((?:EmbedShare)[ ]*(?:URLCopyEmbedCopy))')
|
||||
multiline_re = re.compile(('(\\n){3,}'))
|
||||
sanitized_input = notes_re.sub('', input)
|
||||
sanitized_input = footer_re.sub('', sanitized_input)
|
||||
sanitized_input = multiline_re.sub('\n\n', sanitized_input)
|
||||
return sanitized_input
|
||||
|
||||
def get_lyrics_from_json(json_file):
|
||||
artist_dict = json.load(json_file)
|
||||
ready_lyrics = []
|
||||
print(artist_dict.keys())
|
||||
for song in artist_dict['songs']:
|
||||
sanitized_lyrics = sanitize_lyrics(song['lyrics'])
|
||||
print(sanitized_lyrics)
|
||||
ready_lyrics.append(sanitized_lyrics)
|
||||
return ready_lyrics
|
||||
|
||||
def save_sanitized_lyrics():
|
||||
sanitized_lyrics_list = []
|
||||
for file in os.listdir("./"):
|
||||
if file.endswith(".json"):
|
||||
with open(file, 'r', encoding="utf-8") as read_file:
|
||||
sanitized_lyrics_list.extend(get_lyrics_from_json(read_file))
|
||||
print(sanitized_lyrics_list)
|
||||
with open('./lyrics.txt', 'w+', encoding="utf-8") as lyrics_file:
|
||||
for lyrics in sanitized_lyrics_list:
|
||||
print(lyrics)
|
||||
lyrics_file.write(f"{lyrics}\n")
|
||||
|
||||
|
||||
save_sanitized_lyrics()
|
||||
121
modules/WillieTimer/phraseGenerator.py
Normal file
121
modules/WillieTimer/phraseGenerator.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import numpy
|
||||
import sys
|
||||
from nltk.tokenize import RegexpTokenizer
|
||||
from nltk.corpus import stopwords
|
||||
from keras.models import Sequential, load_model
|
||||
from keras.layers import Dense, Dropout, LSTM
|
||||
from keras.utils import np_utils
|
||||
from keras.callbacks import ModelCheckpoint
|
||||
|
||||
|
||||
class PhraseGenerator():
|
||||
def __init__(self):
|
||||
self.training_file = "./lyrics.txt"
|
||||
self.file = open(self.training_file, 'r', encoding='utf-8')
|
||||
self.model = Sequential()
|
||||
|
||||
self.processed_inputs = self.tokenize_words(self.file)
|
||||
self.chars = sorted(list(set(self.processed_inputs)))
|
||||
|
||||
self.input_len = len(self.processed_inputs)
|
||||
self.vocab_len = len(self.chars)
|
||||
|
||||
self.seq_length = 100
|
||||
self.x_data = []
|
||||
self.y_data = []
|
||||
|
||||
def nums_to_chars(self):
|
||||
return dict((i, c) for i, c in enumerate(self.chars))
|
||||
|
||||
def chars_to_nums(self):
|
||||
return dict((c, i) for i, c in enumerate(self.chars))
|
||||
|
||||
def tokenize_words(self, input):
|
||||
input = str(input).lower()
|
||||
|
||||
tokenizer = RegexpTokenizer(r'\w+')
|
||||
tokens = tokenizer.tokenize(input)
|
||||
|
||||
filtered = filter(lambda token: token not in stopwords.words('english'), tokens)
|
||||
|
||||
return " ".join(filtered)
|
||||
|
||||
def train(self):
|
||||
char_to_num = self.chars_to_nums()
|
||||
|
||||
print("Total number of characters:", self.input_len)
|
||||
print("Total vocab:", self.vocab_len)
|
||||
|
||||
for i in range(0, self.input_len - self.seq_length, 1):
|
||||
print(i)
|
||||
# Define input and output sequences
|
||||
# Input is the current character plus desired sequence length
|
||||
in_seq = self.processed_inputs[i:i + self.seq_length]
|
||||
|
||||
# Out sequence is the initial character plus total sequence length
|
||||
out_seq = self.processed_inputs[i + self.seq_length]
|
||||
|
||||
# We now convert list of characters to integers based on
|
||||
# previously and add the values to our lists
|
||||
self.x_data.append([char_to_num[char] for char in in_seq])
|
||||
self.y_data.append(char_to_num[out_seq])
|
||||
print(f"X-Data:\t{self.x_data}\nY-Data:\t{self.y_data}")
|
||||
|
||||
n_patterns = len(self.x_data)
|
||||
print("Total Patterns:", n_patterns)
|
||||
|
||||
X = numpy.reshape(self.x_data, (n_patterns, self.seq_length, 1))
|
||||
X = X/float(self.vocab_len)
|
||||
|
||||
y = np_utils.to_categorical(self.y_data)
|
||||
|
||||
self.model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
|
||||
self.model.add(Dropout(0.2))
|
||||
self.model.add(LSTM(256, return_sequences=True))
|
||||
self.model.add(Dropout(0.2))
|
||||
self.model.add(LSTM(128))
|
||||
self.model.add(Dropout(0.2))
|
||||
self.model.add(Dense(y.shape[1], activation='softmax'))
|
||||
|
||||
filepath = "model_weights_saved.hdf5"
|
||||
self.model.load_weights(filepath)
|
||||
self.model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
|
||||
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
|
||||
desired_callbacks = [checkpoint]
|
||||
|
||||
self.model.fit(X, y, epochs=500, batch_size=256, callbacks=desired_callbacks)
|
||||
|
||||
self.model.load_weights(filepath)
|
||||
self.model.compile(loss='categorical_crossentropy', optimizer='adam')
|
||||
|
||||
def generate_text(self):
|
||||
num_to_char = self.nums_to_chars()
|
||||
start = numpy.random.randint(0, len(self.x_data) - 1)
|
||||
pattern = self.x_data[start]
|
||||
print(pattern)
|
||||
print("Random Seed:")
|
||||
print("\"", ''.join([num_to_char[value] for value in pattern]), "\"")
|
||||
|
||||
output_string = ""
|
||||
for i in range(500):
|
||||
x = numpy.reshape(pattern, (1, len(pattern), 1))
|
||||
x = x / float(self.vocab_len)
|
||||
prediction = self.model.predict(x, verbose=0)
|
||||
index = numpy.argmax(prediction)
|
||||
result = num_to_char[index]
|
||||
|
||||
output_string += str(result)
|
||||
|
||||
pattern.append(index)
|
||||
pattern = pattern[1:len(pattern)]
|
||||
print(output_string)
|
||||
print(pattern)
|
||||
|
||||
print('Starting')
|
||||
bot = PhraseGenerator()
|
||||
print('Training')
|
||||
bot.train()
|
||||
print("Generating Text")
|
||||
bot.generate_text()
|
||||
|
||||
Reference in New Issue
Block a user