From 861eb3d808add4f9343462ee9c2556f1790e7527 Mon Sep 17 00:00:00 2001
From: Logan Cusano <logan@simplestepsolutions.com>
Date: Wed, 22 Dec 2021 03:06:21 -0500
Subject: [PATCH 1/5] Updated to not save development files

---
 modules/WillieTimer/.gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/WillieTimer/.gitignore b/modules/WillieTimer/.gitignore
index 80c7d3c..4a8f97d 100644
--- a/modules/WillieTimer/.gitignore
+++ b/modules/WillieTimer/.gitignore
@@ -1,2 +1,3 @@
 lyrics.py
-phraseGenerator.py
\ No newline at end of file
+phraseGenerator.py
+/lyrics.txt

From e3bcc124e1c05994367609d8f6fc5787fa691441 Mon Sep 17 00:00:00 2001
From: Logan Cusano <logan@simplestepsolutions.com>
Date: Wed, 22 Dec 2021 16:50:09 -0500
Subject: [PATCH 2/5] Update gitignore

---
 .gitignore                     | 3 ---
 modules/WillieTimer/.gitignore | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0893afc..4471aed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,4 @@
 config.ini
 *.7z
 *.bat
-*.hdf5
-*.json
-/modules/*.txt
 /DSDPlus/
diff --git a/modules/WillieTimer/.gitignore b/modules/WillieTimer/.gitignore
index 4a8f97d..700d7b9 100644
--- a/modules/WillieTimer/.gitignore
+++ b/modules/WillieTimer/.gitignore
@@ -1,3 +1,3 @@
-lyrics.py
-phraseGenerator.py
 /lyrics.txt
+*.hdf5
+*.json

From 24cfac7c33983e9a4776882c457096dbfa66f699 Mon Sep 17 00:00:00 2001
From: Logan Cusano <logan@simplestepsolutions.com>
Date: Wed, 22 Dec 2021 16:50:35 -0500
Subject: [PATCH 3/5] Init ML scripts

---
 modules/WillieTimer/lyrics.py          |  50 ++++++++++
 modules/WillieTimer/phraseGenerator.py | 121 +++++++++++++++++++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 modules/WillieTimer/lyrics.py
 create mode 100644 modules/WillieTimer/phraseGenerator.py

diff --git a/modules/WillieTimer/lyrics.py b/modules/WillieTimer/lyrics.py
new file mode 100644
index 0000000..2f988eb
--- /dev/null
+++ b/modules/WillieTimer/lyrics.py
@@ -0,0 +1,50 @@
+from lyricsgenius import Genius
+import json
+import re
+import os
+
+
+def get_songs(artists=["Notorious B.I.G", "outkast", "nwa"]):
+    GENIUS_TOKEN = "gMnJyj87FvjyP2W093rQ_mjo5ZwwLw1u2r0AmcVqYcJ8kkjjW6ZbObeGnS726SrH"
+    session = Genius(GENIUS_TOKEN, retries=2, timeout=20, sleep_time=0.3)
+
+    lyrics = []
+
+    # get songs
+    for artist in artists:
+        songlist = session.search_artist(artist, max_songs=75, sort='title')
+        songlist.save_lyrics()
+
+def sanitize_lyrics(input):
+    notes_re = re.compile('((?:\[[0-9a-zA-Z :()&+-.]+\])(?: \+ \([a-zA-Z -.]+)?(?:\\n)?)')
+    footer_re = re.compile('((?:EmbedShare)[ ]*(?:URLCopyEmbedCopy))')
+    multiline_re = re.compile(('(\\n){3,}'))
+    sanitized_input = notes_re.sub('', input)
+    sanitized_input = footer_re.sub('', sanitized_input)
+    sanitized_input = multiline_re.sub('\n\n', sanitized_input)
+    return sanitized_input
+
+def get_lyrics_from_json(json_file):
+    artist_dict = json.load(json_file)
+    ready_lyrics = []
+    print(artist_dict.keys())
+    for song in artist_dict['songs']:
+        sanitized_lyrics = sanitize_lyrics(song['lyrics'])
+        print(sanitized_lyrics)
+        ready_lyrics.append(sanitized_lyrics)
+    return ready_lyrics
+
+def save_sanitized_lyrics():
+    sanitized_lyrics_list = []
+    for file in os.listdir("./"):
+        if file.endswith(".json"):
+            with open(file, 'r', encoding="utf-8") as read_file:
+                sanitized_lyrics_list.extend(get_lyrics_from_json(read_file))
+                print(sanitized_lyrics_list)
+    with open('./lyrics.txt', 'w+', encoding="utf-8") as lyrics_file:
+        for lyrics in sanitized_lyrics_list:
+            print(lyrics)
+            lyrics_file.write(f"{lyrics}\n")
+
+
+save_sanitized_lyrics()
diff --git a/modules/WillieTimer/phraseGenerator.py b/modules/WillieTimer/phraseGenerator.py
new file mode 100644
index 0000000..d21edf4
--- /dev/null
+++ b/modules/WillieTimer/phraseGenerator.py
@@ -0,0 +1,121 @@
+import numpy
+import sys
+from nltk.tokenize import RegexpTokenizer
+from nltk.corpus import stopwords
+from keras.models import Sequential, load_model
+from keras.layers import Dense, Dropout, LSTM
+from keras.utils import np_utils
+from keras.callbacks import ModelCheckpoint
+
+
+class PhraseGenerator():
+    def __init__(self):
+        self.training_file = "./lyrics.txt"
+        self.file = open(self.training_file, 'r', encoding='utf-8')
+        self.model = Sequential()
+
+        self.processed_inputs = self.tokenize_words(self.file)
+        self.chars = sorted(list(set(self.processed_inputs)))
+
+        self.input_len = len(self.processed_inputs)
+        self.vocab_len = len(self.chars)
+
+        self.seq_length = 100
+        self.x_data = []
+        self.y_data = []
+
+    def nums_to_chars(self):
+        return dict((i, c) for i, c in enumerate(self.chars))
+
+    def chars_to_nums(self):
+        return dict((c, i) for i, c in enumerate(self.chars))
+
+    def tokenize_words(self, input):
+        input = str(input).lower()
+
+        tokenizer = RegexpTokenizer(r'\w+')
+        tokens = tokenizer.tokenize(input)
+
+        filtered = filter(lambda token: token not in stopwords.words('english'), tokens)
+
+        return " ".join(filtered)
+
+    def train(self):
+        char_to_num = self.chars_to_nums()
+
+        print("Total number of characters:", self.input_len)
+        print("Total vocab:", self.vocab_len)
+
+        for i in range(0, self.input_len - self.seq_length, 1):
+            print(i)
+            # Define input and output sequences
+            # Input is the current character plus desired sequence length
+            in_seq = self.processed_inputs[i:i + self.seq_length]
+
+            # Out sequence is the initial character plus total sequence length
+            out_seq = self.processed_inputs[i + self.seq_length]
+
+            # We now convert list of characters to integers based on
+            # previously and add the values to our lists
+            self.x_data.append([char_to_num[char] for char in in_seq])
+            self.y_data.append(char_to_num[out_seq])
+            print(f"X-Data:\t{self.x_data}\nY-Data:\t{self.y_data}")
+
+        n_patterns = len(self.x_data)
+        print("Total Patterns:", n_patterns)
+
+        X = numpy.reshape(self.x_data, (n_patterns, self.seq_length, 1))
+        X = X/float(self.vocab_len)
+
+        y = np_utils.to_categorical(self.y_data)
+
+        self.model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
+        self.model.add(Dropout(0.2))
+        self.model.add(LSTM(256, return_sequences=True))
+        self.model.add(Dropout(0.2))
+        self.model.add(LSTM(128))
+        self.model.add(Dropout(0.2))
+        self.model.add(Dense(y.shape[1], activation='softmax'))
+
+        filepath = "model_weights_saved.hdf5"
+        self.model.load_weights(filepath)
+        self.model.compile(loss='categorical_crossentropy', optimizer='adam')
+
+        checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
+        desired_callbacks = [checkpoint]
+
+        self.model.fit(X, y, epochs=500, batch_size=256, callbacks=desired_callbacks)
+
+        self.model.load_weights(filepath)
+        self.model.compile(loss='categorical_crossentropy', optimizer='adam')
+
+    def generate_text(self):
+        num_to_char = self.nums_to_chars()
+        start = numpy.random.randint(0, len(self.x_data) - 1)
+        pattern = self.x_data[start]
+        print(pattern)
+        print("Random Seed:")
+        print("\"", ''.join([num_to_char[value] for value in pattern]), "\"")
+
+        output_string = ""
+        for i in range(500):
+            x = numpy.reshape(pattern, (1, len(pattern), 1))
+            x = x / float(self.vocab_len)
+            prediction = self.model.predict(x, verbose=0)
+            index = numpy.argmax(prediction)
+            result = num_to_char[index]
+
+            output_string += str(result)
+
+            pattern.append(index)
+            pattern = pattern[1:len(pattern)]
+        print(output_string)
+        print(pattern)
+
+print('Starting')
+bot = PhraseGenerator()
+print('Training')
+bot.train()
+print("Generating Text")
+bot.generate_text()
+

From 7ed60e9f705840197b9c899750d114543206ba3f Mon Sep 17 00:00:00 2001
From: Logan Cusano <logan@simplestepsolutions.com>
Date: Wed, 22 Dec 2021 17:50:16 -0500
Subject: [PATCH 4/5] Ignore Mac system files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 4471aed..89abbc6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ config.ini
 *.7z
 *.bat
 /DSDPlus/
+._.DS_Store

From d41919e32b62503451dce018175f7077c8942a4b Mon Sep 17 00:00:00 2001
From: Logan Cusano <logan@simplestepsolutions.com>
Date: Mon, 27 Dec 2021 01:09:48 -0500
Subject: [PATCH 5/5] Created AI text generator function to fit into WilliBot

---
 modules/WillieTimer/phraseGenerator.py | 150 +++++++------------------
 1 file changed, 40 insertions(+), 110 deletions(-)

diff --git a/modules/WillieTimer/phraseGenerator.py b/modules/WillieTimer/phraseGenerator.py
index d21edf4..c183add 100644
--- a/modules/WillieTimer/phraseGenerator.py
+++ b/modules/WillieTimer/phraseGenerator.py
@@ -1,121 +1,51 @@
-import numpy
-import sys
-from nltk.tokenize import RegexpTokenizer
-from nltk.corpus import stopwords
-from keras.models import Sequential, load_model
-from keras.layers import Dense, Dropout, LSTM
-from keras.utils import np_utils
-from keras.callbacks import ModelCheckpoint
+import os
+import argparse
+from textgenrnn import textgenrnn
 
 
-class PhraseGenerator():
-    def __init__(self):
-        self.training_file = "./lyrics.txt"
-        self.file = open(self.training_file, 'r', encoding='utf-8')
-        self.model = Sequential()
+class PhraseGenerator(textgenrnn):
+    def __init__(self, input_training_file_path='./lyrics.txt', input_epochs=1, input_temperature=.5,
+                 input_model_file_path='./textgenrnn_weights.hdf5'):
+        # Set logging for Tensorflow
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
-        self.processed_inputs = self.tokenize_words(self.file)
-        self.chars = sorted(list(set(self.processed_inputs)))
+        # Init vars
+        self.training_file_path = input_training_file_path
+        self.model_file_path = input_model_file_path
+        self.epochs = input_epochs
+        self.temperature = input_temperature
 
-        self.input_len = len(self.processed_inputs)
-        self.vocab_len = len(self.chars)
+        # Init Textgenrnn
+        super().__init__(weights_path=self.model_file_path, allow_growth=True, name='WillieBotModel')
 
-        self.seq_length = 100
-        self.x_data = []
-        self.y_data = []
+    def pg_train(self):
+        self.train_from_file(self.training_file_path, num_epochs=self.epochs, verbose=0, top_n=5, return_as_list=True)
 
-    def nums_to_chars(self):
-        return dict((i, c) for i, c in enumerate(self.chars))
+    def pg_generate(self):
+        generated_text = self.generate(1, temperature=self.temperature, return_as_list=True)
+        print(generated_text[0])
 
-    def chars_to_nums(self):
-        return dict((c, i) for i, c in enumerate(self.chars))
 
-    def tokenize_words(self, input):
-        input = str(input).lower()
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Description of your program')
+    parser.add_argument('-t', '--train', action='store_true', help='Train the model', required=False)
+    parser.add_argument('-g', '--generate', action='store_true', help='Generate text', required=False)
+    parser.add_argument('-e', '--epochs', action='store', type=int, help='Set amount of epochs (defaults to 5)',
+                        required=False)
+    parser.add_argument('-p', '--temp', action='store', type=int,
+                        help='Set temperature for generation (defaults to .5)', required=False)
+    parser.add_argument('-f', '--training_file', action='store', type=str,
+                        help='Set the training file (defaults to \'./lyrics.txt\')', required=False)
+    args = vars(parser.parse_args())
+    print(args)
+    print('Starting')
 
-        tokenizer = RegexpTokenizer(r'\w+')
-        tokens = tokenizer.tokenize(input)
+    pg = PhraseGenerator(input_epochs=args['epochs'] if args['epochs'] else 1,
+                         input_training_file_path=args['training_file'] if args['training_file'] else './lyrics.txt',
+                         input_temperature=args['temp'] if args['temp'] else .5)
 
-        filtered = filter(lambda token: token not in stopwords.words('english'), tokens)
-
-        return " ".join(filtered)
-
-    def train(self):
-        char_to_num = self.chars_to_nums()
-
-        print("Total number of characters:", self.input_len)
-        print("Total vocab:", self.vocab_len)
-
-        for i in range(0, self.input_len - self.seq_length, 1):
-            print(i)
-            # Define input and output sequences
-            # Input is the current character plus desired sequence length
-            in_seq = self.processed_inputs[i:i + self.seq_length]
-
-            # Out sequence is the initial character plus total sequence length
-            out_seq = self.processed_inputs[i + self.seq_length]
-
-            # We now convert list of characters to integers based on
-            # previously and add the values to our lists
-            self.x_data.append([char_to_num[char] for char in in_seq])
-            self.y_data.append(char_to_num[out_seq])
-            print(f"X-Data:\t{self.x_data}\nY-Data:\t{self.y_data}")
-
-        n_patterns = len(self.x_data)
-        print("Total Patterns:", n_patterns)
-
-        X = numpy.reshape(self.x_data, (n_patterns, self.seq_length, 1))
-        X = X/float(self.vocab_len)
-
-        y = np_utils.to_categorical(self.y_data)
-
-        self.model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
-        self.model.add(Dropout(0.2))
-        self.model.add(LSTM(256, return_sequences=True))
-        self.model.add(Dropout(0.2))
-        self.model.add(LSTM(128))
-        self.model.add(Dropout(0.2))
-        self.model.add(Dense(y.shape[1], activation='softmax'))
-
-        filepath = "model_weights_saved.hdf5"
-        self.model.load_weights(filepath)
-        self.model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-        checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
-        desired_callbacks = [checkpoint]
-
-        self.model.fit(X, y, epochs=500, batch_size=256, callbacks=desired_callbacks)
-
-        self.model.load_weights(filepath)
-        self.model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-    def generate_text(self):
-        num_to_char = self.nums_to_chars()
-        start = numpy.random.randint(0, len(self.x_data) - 1)
-        pattern = self.x_data[start]
-        print(pattern)
-        print("Random Seed:")
-        print("\"", ''.join([num_to_char[value] for value in pattern]), "\"")
-
-        output_string = ""
-        for i in range(500):
-            x = numpy.reshape(pattern, (1, len(pattern), 1))
-            x = x / float(self.vocab_len)
-            prediction = self.model.predict(x, verbose=0)
-            index = numpy.argmax(prediction)
-            result = num_to_char[index]
-
-            output_string += str(result)
-
-            pattern.append(index)
-            pattern = pattern[1:len(pattern)]
-        print(output_string)
-        print(pattern)
-
-print('Starting')
-bot = PhraseGenerator()
-print('Training')
-bot.train()
-print("Generating Text")
-bot.generate_text()
+    if args['train']:
+        pg.pg_train()
 
+    if args['generate']:
+        pg.pg_generate()