fix endings etc.

This commit is contained in:
NATURESPEAK 2022-04-11 13:09:01 +02:00
parent 9673a3d4f0
commit e2f841465c
5 changed files with 141 additions and 7 deletions

2
run.sh Executable file
View File

@ -0,0 +1,2 @@
. venv/bin/activate
python speak_broadcast.py -c ../assets/voice.config.json

View File

@ -2,7 +2,7 @@ import argparse, json, sys, time, random
import utterance.voice import utterance.voice
import utterance.utils import utterance.utils
UTTERANCE_LEN = 16 #<--------------- these should be in config UTTERANCE_LEN = 64 #<--------------- these should be in config
def main() -> int: def main() -> int:
@ -19,7 +19,7 @@ def main() -> int:
voices = [] voices = []
for v in conf['voices']: for v in conf['voices']:
model = v['model'] model = v['model']
voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), lenght=UTTERANCE_LEN) voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), length=UTTERANCE_LEN)
voices.append(voice) voices.append(voice)
current_voice_name = "" current_voice_name = ""

View File

@ -1,4 +1,4 @@
import argparse, json, sys, time, random, logging, signal, threading import argparse, json, sys, time, random, logging, signal, threading, string
import utterance.voice import utterance.voice
import utterance.utils import utterance.utils
import utterance.oscosc import utterance.oscosc
@ -17,6 +17,8 @@ exit = False
terminal = False terminal = False
debug = False debug = False
state = "METRIC" state = "METRIC"
B_SKIP = []
B_SWAPS = {}
def format_str(text) -> str: def format_str(text) -> str:
t = utterance.utils.clean(text) t = utterance.utils.clean(text)
@ -60,8 +62,58 @@ def terminal_utterance(utterance):
if terminal: if terminal:
print(utterance, end="") print(utterance, end="")
def fix_ending(frags):
result = frags.copy()
end = result[-1]
end = end.translate(str.maketrans('', '', string.punctuation))
fix = utterance.utils.fix_sentence(end)
if fix is None or len(fix) == 0:
result = result[:-1]
# result[-1] = result[-1]
else:
result[-1] = fix
result[-1] = utterance.utils.fix_punctuation(result[-1])
print(result)
return result
def fix_beginning(frags):
global B_SKIP, B_SWAPS
result = frags.copy()
beginnig = result[0]
toks = beginnig.split()
if len(toks) > 0:
f = toks[0].lower()
if f[0] in string.punctuation:
f = f[1:]
if f in B_SKIP:
if len(toks) > 2:
result[0] = " ".join(toks[1:]).capitalize() + "\n"
else:
if result[1][0] == ' ':
result[1] = result[1][1:]
result[1] = result[1].capitalize()
return result[1:]
elif toks[0] in B_SWAPS:
result[0] = result[0].replace(toks[0], B_SWAPS[toks[0]])
return result
def broadcast_utterance(v, utterance): def broadcast_utterance(v, utterance):
print(utterance)
global broadcast, exit, debug global broadcast, exit, debug
# Send all text to server to calculate bounds in advance # Send all text to server to calculate bounds in advance
@ -74,10 +126,15 @@ def broadcast_utterance(v, utterance):
frags = v.fragments(utterance) frags = v.fragments(utterance)
frags = fix_beginning(frags)
frags = fix_ending(frags)
for f in frags: for f in frags:
terminal_utterance(f)
text += f text += f
broadcast.utterance(text, v.channel) broadcast.utterance(text, v.channel)
terminal_utterance(f)
# sleep_time = 2 # sleep_time = 2
# toks = tokenise_str(f) # toks = tokenise_str(f)
@ -129,7 +186,7 @@ def signal_terminate(signum, frame):
def main() -> int: def main() -> int:
global broadcast, metric, terminal, debug, state global broadcast, metric, terminal, debug, state, UTTERANCE_LEN, NUM_METRIC_GEN, NUM_SAMPLE_VOICES, RANDOM_SEED_TIMER_MIN, STATE_TRANSITION_TIMER_MIN, B_SKIP, B_SWAPS
p = argparse.ArgumentParser() p = argparse.ArgumentParser()
p.add_argument("-c", "--config", type=str, default="voice.config.json", help="configuratin file") p.add_argument("-c", "--config", type=str, default="voice.config.json", help="configuratin file")
@ -146,6 +203,20 @@ def main() -> int:
terminal = args.terminal terminal = args.terminal
#--------------------#
# CONFIGS
#--------------------#
u_conf = conf['utterance_configuration']
UTTERANCE_LEN = u_conf['UTTERANCE_LEN']
NUM_METRIC_GEN = u_conf['NUM_METRIC_GEN']
NUM_SAMPLE_VOICES = u_conf['NUM_SAMPLE_VOICES']
RANDOM_SEED_TIMER_MIN = u_conf['RANDOM_SEED_TIMER_MIN']
STATE_TRANSITION_TIMER_MIN = u_conf['STATE_TRANSITION_TIMER_MIN']
B_SKIP = u_conf['b_skip']
B_SWAPS = u_conf['b_swaps']
#--------------------# #--------------------#
# VOICES # VOICES
#--------------------# #--------------------#
@ -272,7 +343,12 @@ def main() -> int:
t.start() t.start()
logging.info(f"METRIC::broadcasting {v.name}") logging.info(f"METRIC::broadcasting {v.name}")
broadcast_utterance(v, uv)
try:
broadcast_utterance(v, uv)
except Exception as e:
logging.error(e)
pass
t.join() t.join()

View File

@ -1,5 +1,6 @@
import string, regex import string, regex
from gensim.utils import tokenize from gensim.utils import tokenize
import nltk
def clean(text: str) -> str: def clean(text: str) -> str:
@ -42,16 +43,71 @@ def fragments(utterance: str):
if k in skip_punctuation: if k in skip_punctuation:
continue continue
else: else:
# if cum[0] not in [' ', '\n']:
# cum = ' ' + cum
frags.append(cum) frags.append(cum)
cum = "" cum = ""
cum += '\n' cum += '\n'
frags.append(cum) frags.append(cum)
# get rid of newline (2x)
if len(frags) > 0:
if frags[-1] == '\n':
frags = frags[:-1]
if len(frags) > 0:
if frags[-1] == '\n':
frags = frags[:-1]
return frags return frags
def tokenise(utterance: str): def tokenise(utterance: str):
return list(tokenize(utterance, lower=True)) return list(tokenize(utterance, lower=True))
def fix_sentence(s: str) -> str:
if len(s.strip()) <= 1:
return None
text = nltk.word_tokenize(s)
tags = nltk.pos_tag(text)
if len(tags) == 0:
return None
elif len(tags) == 1 and tags[0][1] in ['DT', 'WDT', 'IN']:
return None
elif len(tags) == 1 and len(tags[0][0]) == 1 and tags[0][1] in ['NN']:
return None
if tags[-1][1] in ['DT', 'WDT', 'IN', 'CC']:
tags = tags[:-1]
return " ".join([x[0] for x in tags])
elif tags[-1][1] in ['NN'] and len(tags[-1][0]) == 1:
tags = tags[:-1]
return " ".join([x[0] for x in tags])
return s
def fix_punctuation(s: str) -> str:
if len(s.strip()) == 0:
return ""
if len(s) == 1 and s in string.punctuation:
if s != ',':
return s
else:
return ""
e = s.rstrip()
if e[-1] in string.punctuation:
if e[-1] in [',', ':']:
s = e[:-1] + '.'
else:
s = e + '.'
return s

View File

@ -3,7 +3,7 @@ import utterance.utils
import gensim, regex, string, time import gensim, regex, string, time
UTTERANCE_MEMORY_LEN = 15 UTTERANCE_MEMORY_LEN = 15
UTTERANCE_MEMORY_MIN_DIST = 0.2 UTTERANCE_MEMORY_MIN_DIST = 0.85
class Voice: class Voice: