diff --git a/utterance/config.json b/utterance/config.json index af3ee8e..422789a 100644 --- a/utterance/config.json +++ b/utterance/config.json @@ -2,18 +2,28 @@ "voices": [ { "name": "Ralph", - "model_dir": "../data/models/Emerson-Nature.txt", - "tokeniser_file": "../data/tokens/Emerson-Nature.txt.tokenizer.json" + "model_dir": "../data/tokens+models/Emerson-Nature.txt_bs=64_ns=8000_vs=5000", + "tokeniser_file": "../data/tokens+models/Emerson-Nature.txt_bs=64_ns=8000_vs=5000/Emerson-Nature.txt_ns=5000.tokenizer.json", + "temperature": "0.9" }, { "name": "Jean", - "model_dir": "../data/models/Lafontaine-Fables[english].txt", - "tokeniser_file": "../data/tokens/Lafontaine-Fables[english].txt.tokenizer.json" + "model_dir": "../data/tokens+models/Lafontaine-Fables[english].txt_bs=64_ns=8000_vs=5000", + "tokeniser_file": "../data/tokens+models/Lafontaine-Fables[english].txt_bs=64_ns=8000_vs=5000/Lafontaine-Fables[english].txt_ns=5000.tokenizer.json", + "temperature": "1.2" }, { "name": "Blake", - "model_dir": "../data/models/Blake-Songs-of-Innocence-and-of-Experience.txt", - "tokeniser_file": "../data/tokens/Blake-Songs-of-Innocence-and-of-Experience.txt.tokenizer.json" + "model_dir": "../data/tokens+models/Blake-Songs-of-Innocence-and-of-Experience.txt_bs=64_ns=8000_vs=5000", + "tokeniser_file": "../data/tokens+models/Blake-Songs-of-Innocence-and-of-Experience.txt_bs=64_ns=8000_vs=5000/Blake-Songs-of-Innocence-and-of-Experience.txt_ns=5000.tokenizer.json", + "temperature": "1.5" + }, + { + "name": "Friedrich", + "model_dir": "../data/tokens+models/Schelling-ON-THE-RELATION-OF-THE-PLASTIC-ARTS-TO-NATURE.txt_bs=64_ns=8000_vs=5000", + "tokeniser_file": "../data/tokens+models/Schelling-ON-THE-RELATION-OF-THE-PLASTIC-ARTS-TO-NATURE.txt_bs=64_ns=8000_vs=5000/Schelling-ON-THE-RELATION-OF-THE-PLASTIC-ARTS-TO-NATURE.txt_ns=5000.tokenizer.json", + "temperature": "1.5" } + ] } \ No newline at end of file diff --git a/utterance/speak.py b/utterance/speak.py index 4af96e7..4607f9e 100644 --- a/utterance/speak.py +++ b/utterance/speak.py @@ -1,7 +1,26 @@ import argparse, json, sys, time, random import spacy from aitextgen import aitextgen +import string +def clean(text: str) -> str: + + s = text.split('\n') + + if(len(s) > 0): + tok_1 = s[0].split(' ') + if len(tok_1) > 0 and tok_1[0].strip() in string.punctuation: + s_1 = ' '.join(tok_1[1:]) + s[0] = s_1.capitalize() + else: + s[0] = s[0].capitalize() + + return '\n'.join(s) + +def format(text: str) -> str: + + return text.replace('\r\n', '\n').replace('\n\n', '\n') + def main() -> int: p = argparse.ArgumentParser() @@ -17,7 +36,7 @@ def main() -> int: voices = [] for v in conf['voices']: a = aitextgen(model_folder=v['model_dir'], tokenizer_file=v['tokeniser_file']) - voices.append({"name": v["name"].upper(), "a": a}) + voices.append({"name": v["name"].upper(), "a": a, "temp": float(v["temperature"])}) nbr_voices = len(voices) current_voice = "" @@ -28,10 +47,13 @@ def main() -> int: print("==========") print(v['name'] + ":") current_voice = v['name'] - t = v['a'].generate_one().strip() - print(t) + t = v['a'].generate(n=1, max_lenght=32, temperature=v['temp'], return_as_list=True)[0] + if t != None: + t = clean(t) + t = format(t) + print(t) - time.sleep(1) + time.sleep(4) if __name__ == '__main__':