From e2f841465c99f384effc1c314b99e20a9f7a414c Mon Sep 17 00:00:00 2001 From: NATURESPEAK Date: Mon, 11 Apr 2022 13:09:01 +0200 Subject: [PATCH] fix endings etc. --- run.sh | 2 ++ speak.py | 4 +-- speak_broadcast.py | 84 +++++++++++++++++++++++++++++++++++++++++++--- utterance/utils.py | 56 +++++++++++++++++++++++++++++++ utterance/voice.py | 2 +- 5 files changed, 141 insertions(+), 7 deletions(-) create mode 100755 run.sh diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..a3f9479 --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +. venv/bin/activate +python speak_broadcast.py -c ../assets/voice.config.json \ No newline at end of file diff --git a/speak.py b/speak.py index 3197565..486926f 100644 --- a/speak.py +++ b/speak.py @@ -2,7 +2,7 @@ import argparse, json, sys, time, random import utterance.voice import utterance.utils -UTTERANCE_LEN = 16 #<--------------- these should be in config +UTTERANCE_LEN = 64 #<--------------- these should be in config def main() -> int: @@ -19,7 +19,7 @@ def main() -> int: voices = [] for v in conf['voices']: model = v['model'] - voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), lenght=UTTERANCE_LEN) + voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), length=UTTERANCE_LEN) voices.append(voice) current_voice_name = "" diff --git a/speak_broadcast.py b/speak_broadcast.py index 729e812..b80a222 100644 --- a/speak_broadcast.py +++ b/speak_broadcast.py @@ -1,4 +1,4 @@ -import argparse, json, sys, time, random, logging, signal, threading +import argparse, json, sys, time, random, logging, signal, threading, string import utterance.voice import utterance.utils import utterance.oscosc @@ -17,6 +17,8 @@ exit = False terminal = False debug = False state = "METRIC" +B_SKIP = [] +B_SWAPS = {} def format_str(text) -> str: t = utterance.utils.clean(text) @@ -60,8 +62,58 @@ def terminal_utterance(utterance): if terminal: print(utterance, end="") +def fix_ending(frags): + + result = frags.copy() + + end = result[-1] + end = end.translate(str.maketrans('', '', string.punctuation)) + + fix = utterance.utils.fix_sentence(end) + + if fix is None or len(fix) == 0: + result = result[:-1] + # result[-1] = result[-1] + else: + result[-1] = fix + + result[-1] = utterance.utils.fix_punctuation(result[-1]) + + print(result) + + return result + +def fix_beginning(frags): + + global B_SKIP, B_SWAPS + + result = frags.copy() + + beginnig = result[0] + toks = beginnig.split() + + if len(toks) > 0: + f = toks[0].lower() + if f[0] in string.punctuation: + f = f[1:] + if f in B_SKIP: + if len(toks) > 2: + result[0] = " ".join(toks[1:]).capitalize() + "\n" + else: + if result[1][0] == ' ': + result[1] = result[1][1:] + result[1] = result[1].capitalize() + return result[1:] + elif toks[0] in B_SWAPS: + result[0] = result[0].replace(toks[0], B_SWAPS[toks[0]]) + + return result + + def broadcast_utterance(v, utterance): + print(utterance) + global broadcast, exit, debug # Send all text to server to calculate bounds in advance @@ -74,10 +126,15 @@ def broadcast_utterance(v, utterance): frags = v.fragments(utterance) + frags = fix_beginning(frags) + frags = fix_ending(frags) + for f in frags: + + terminal_utterance(f) + text += f broadcast.utterance(text, v.channel) - terminal_utterance(f) # sleep_time = 2 # toks = tokenise_str(f) @@ -129,7 +186,7 @@ def signal_terminate(signum, frame): def main() -> int: - global broadcast, metric, terminal, debug, state + global broadcast, metric, terminal, debug, state, UTTERANCE_LEN, NUM_METRIC_GEN, NUM_SAMPLE_VOICES, RANDOM_SEED_TIMER_MIN, STATE_TRANSITION_TIMER_MIN, B_SKIP, B_SWAPS p = argparse.ArgumentParser() p.add_argument("-c", "--config", type=str, default="voice.config.json", help="configuratin file") @@ -146,6 +203,20 @@ def main() -> int: terminal = args.terminal + #--------------------# + # CONFIGS + #--------------------# + + u_conf = conf['utterance_configuration'] + UTTERANCE_LEN = u_conf['UTTERANCE_LEN'] + NUM_METRIC_GEN = u_conf['NUM_METRIC_GEN'] + NUM_SAMPLE_VOICES = u_conf['NUM_SAMPLE_VOICES'] + RANDOM_SEED_TIMER_MIN = u_conf['RANDOM_SEED_TIMER_MIN'] + STATE_TRANSITION_TIMER_MIN = u_conf['STATE_TRANSITION_TIMER_MIN'] + + B_SKIP = u_conf['b_skip'] + B_SWAPS = u_conf['b_swaps'] + #--------------------# # VOICES #--------------------# @@ -272,7 +343,12 @@ def main() -> int: t.start() logging.info(f"METRIC::broadcasting {v.name}") - broadcast_utterance(v, uv) + + try: + broadcast_utterance(v, uv) + except Exception as e: + logging.error(e) + pass t.join() diff --git a/utterance/utils.py b/utterance/utils.py index c5089b8..3352a02 100644 --- a/utterance/utils.py +++ b/utterance/utils.py @@ -1,5 +1,6 @@ import string, regex from gensim.utils import tokenize +import nltk def clean(text: str) -> str: @@ -42,16 +43,71 @@ def fragments(utterance: str): if k in skip_punctuation: continue else: + # if cum[0] not in [' ', '\n']: + # cum = ' ' + cum frags.append(cum) cum = "" cum += '\n' frags.append(cum) + # get rid of newline (2x) + if len(frags) > 0: + if frags[-1] == '\n': + frags = frags[:-1] + if len(frags) > 0: + if frags[-1] == '\n': + frags = frags[:-1] + return frags + def tokenise(utterance: str): return list(tokenize(utterance, lower=True)) +def fix_sentence(s: str) -> str: + + if len(s.strip()) <= 1: + return None + + text = nltk.word_tokenize(s) + tags = nltk.pos_tag(text) + + if len(tags) == 0: + return None + elif len(tags) == 1 and tags[0][1] in ['DT', 'WDT', 'IN']: + return None + elif len(tags) == 1 and len(tags[0][0]) == 1 and tags[0][1] in ['NN']: + return None + + if tags[-1][1] in ['DT', 'WDT', 'IN', 'CC']: + tags = tags[:-1] + return " ".join([x[0] for x in tags]) + elif tags[-1][1] in ['NN'] and len(tags[-1][0]) == 1: + tags = tags[:-1] + return " ".join([x[0] for x in tags]) + + return s + +def fix_punctuation(s: str) -> str: + + if len(s.strip()) == 0: + return "" + + if len(s) == 1 and s in string.punctuation: + if s != ',': + return s + else: + return "" + + e = s.rstrip() + if e[-1] in string.punctuation: + if e[-1] in [',', ':']: + s = e[:-1] + '.' + else: + s = e + '.' + + return s + diff --git a/utterance/voice.py b/utterance/voice.py index a42ffae..99b676a 100644 --- a/utterance/voice.py +++ b/utterance/voice.py @@ -3,7 +3,7 @@ import utterance.utils import gensim, regex, string, time UTTERANCE_MEMORY_LEN = 15 -UTTERANCE_MEMORY_MIN_DIST = 0.2 +UTTERANCE_MEMORY_MIN_DIST = 0.85 class Voice: