utterance work

This commit is contained in:
gauthiier 2022-02-26 15:28:46 +01:00
parent 97bae0b66b
commit abf2f2f720
2 changed files with 42 additions and 10 deletions

View File

@ -2,18 +2,28 @@
"voices": [
{
"name": "Ralph",
"model_dir": "../data/models/Emerson-Nature.txt",
"tokeniser_file": "../data/tokens/Emerson-Nature.txt.tokenizer.json"
"model_dir": "../data/tokens+models/Emerson-Nature.txt_bs=64_ns=8000_vs=5000",
"tokeniser_file": "../data/tokens+models/Emerson-Nature.txt_bs=64_ns=8000_vs=5000/Emerson-Nature.txt_ns=5000.tokenizer.json",
"temperature": "0.9"
},
{
"name": "Jean",
"model_dir": "../data/models/Lafontaine-Fables[english].txt",
"tokeniser_file": "../data/tokens/Lafontaine-Fables[english].txt.tokenizer.json"
"model_dir": "../data/tokens+models/Lafontaine-Fables[english].txt_bs=64_ns=8000_vs=5000",
"tokeniser_file": "../data/tokens+models/Lafontaine-Fables[english].txt_bs=64_ns=8000_vs=5000/Lafontaine-Fables[english].txt_ns=5000.tokenizer.json",
"temperature": "1.2"
},
{
"name": "Blake",
"model_dir": "../data/models/Blake-Songs-of-Innocence-and-of-Experience.txt",
"tokeniser_file": "../data/tokens/Blake-Songs-of-Innocence-and-of-Experience.txt.tokenizer.json"
"model_dir": "../data/tokens+models/Blake-Songs-of-Innocence-and-of-Experience.txt_bs=64_ns=8000_vs=5000",
"tokeniser_file": "../data/tokens+models/Blake-Songs-of-Innocence-and-of-Experience.txt_bs=64_ns=8000_vs=5000/Blake-Songs-of-Innocence-and-of-Experience.txt_ns=5000.tokenizer.json",
"temperature": "1.5"
},
{
"name": "Friedrich",
"model_dir": "../data/tokens+models/Schelling-ON-THE-RELATION-OF-THE-PLASTIC-ARTS-TO-NATURE.txt_bs=64_ns=8000_vs=5000",
"tokeniser_file": "../data/tokens+models/Schelling-ON-THE-RELATION-OF-THE-PLASTIC-ARTS-TO-NATURE.txt_bs=64_ns=8000_vs=5000/Schelling-ON-THE-RELATION-OF-THE-PLASTIC-ARTS-TO-NATURE.txt_ns=5000.tokenizer.json",
"temperature": "1.5"
}
]
}

View File

@ -1,6 +1,25 @@
import argparse, json, sys, time, random
import spacy
from aitextgen import aitextgen
import string
def clean(text: str) -> str:
s = text.split('\n')
if(len(s) > 0):
tok_1 = s[0].split(' ')
if len(tok_1) > 0 and tok_1[0].strip() in string.punctuation:
s_1 = ' '.join(tok_1[1:])
s[0] = s_1.capitalize()
else:
s[0] = s[0].capitalize()
return '\n'.join(s)
def format(text: str) -> str:
return text.replace('\r\n', '\n').replace('\n\n', '\n')
def main() -> int:
@ -17,7 +36,7 @@ def main() -> int:
voices = []
for v in conf['voices']:
a = aitextgen(model_folder=v['model_dir'], tokenizer_file=v['tokeniser_file'])
voices.append({"name": v["name"].upper(), "a": a})
voices.append({"name": v["name"].upper(), "a": a, "temp": float(v["temperature"])})
nbr_voices = len(voices)
current_voice = ""
@ -28,10 +47,13 @@ def main() -> int:
print("==========")
print(v['name'] + ":")
current_voice = v['name']
t = v['a'].generate_one().strip()
print(t)
t = v['a'].generate(n=1, max_lenght=32, temperature=v['temp'], return_as_list=True)[0]
if t != None:
t = clean(t)
t = format(t)
print(t)
time.sleep(1)
time.sleep(4)
if __name__ == '__main__':