fix endings etc.

2022-04-11 13:09:01 +02:00 · 2022-04-11 13:09:01 +02:00 · e2f841465c
commit e2f841465c
parent 9673a3d4f0
5 changed files with 141 additions and 7 deletions
--- a/run.sh
+++ b/run.sh
@ -0,0 +1,2 @@
+. venv/bin/activate
+python speak_broadcast.py -c ../assets/voice.config.json
--- a/speak.py
+++ b/speak.py
@ -2,7 +2,7 @@ import argparse, json, sys, time, random
 import utterance.voice
 import utterance.utils

-UTTERANCE_LEN = 16				#<--------------- these should be in config
+UTTERANCE_LEN = 64				#<--------------- these should be in config
 				
 def main() -> int:

@ -19,7 +19,7 @@ def main() -> int:
 	voices = []
 	for v in conf['voices']:
 		model = v['model']
-		voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), lenght=UTTERANCE_LEN)
+		voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), length=UTTERANCE_LEN)
 		voices.append(voice)

 	current_voice_name = ""
--- a/speak_broadcast.py
+++ b/speak_broadcast.py
@ -1,4 +1,4 @@
-import argparse, json, sys, time, random, logging, signal, threading
+import argparse, json, sys, time, random, logging, signal, threading, string
 import utterance.voice
 import utterance.utils
 import utterance.oscosc
@ -17,6 +17,8 @@ exit = False
 terminal = False
 debug = False
 state = "METRIC"
+B_SKIP = []
+B_SWAPS = {}

 def format_str(text) -> str:
 	t = utterance.utils.clean(text)
@ -60,8 +62,58 @@ def terminal_utterance(utterance):
 	if terminal:
 		print(utterance, end="")

+def fix_ending(frags):
+
+	result = frags.copy()
+
+	end = result[-1]
+	end = end.translate(str.maketrans('', '', string.punctuation))
+
+	fix = utterance.utils.fix_sentence(end)
+
+	if fix is None or len(fix) == 0:
+		result = result[:-1]
+		# result[-1] = result[-1]
+	else:
+		result[-1] = fix
+
+	result[-1] = utterance.utils.fix_punctuation(result[-1])
+
+	print(result)
+
+	return result
+
+def fix_beginning(frags):
+
+	global B_SKIP, B_SWAPS
+
+	result = frags.copy()
+
+	beginnig = result[0]
+	toks = beginnig.split()
+
+	if len(toks) > 0:
+		f = toks[0].lower() 
+		if f[0] in string.punctuation:
+			f = f[1:]
+		if f in B_SKIP:
+			if len(toks) > 2:
+				result[0] = " ".join(toks[1:]).capitalize() + "\n"
+			else:				
+				if result[1][0] == ' ':
+					result[1] = result[1][1:] 	
+				result[1] = result[1].capitalize()	
+				return result[1:]
+		elif toks[0] in B_SWAPS:
+			result[0] = result[0].replace(toks[0], B_SWAPS[toks[0]])
+
+	return result
+
+
 def broadcast_utterance(v, utterance):	

+	print(utterance)
+
 	global broadcast, exit, debug

 	# Send all text to server to calculate bounds in advance
@ -74,10 +126,15 @@ def broadcast_utterance(v, utterance):

 	frags = v.fragments(utterance)

+	frags = fix_beginning(frags)
+	frags = fix_ending(frags)
+
 	for f in frags:
+
+		terminal_utterance(f)
+
 		text += f
 		broadcast.utterance(text, v.channel)
-		terminal_utterance(f)

 		# sleep_time = 2
 		# toks = tokenise_str(f)
@ -129,7 +186,7 @@ def signal_terminate(signum, frame):

 def main() -> int:

-	global broadcast, metric, terminal, debug, state
+	global broadcast, metric, terminal, debug, state, UTTERANCE_LEN, NUM_METRIC_GEN, NUM_SAMPLE_VOICES, RANDOM_SEED_TIMER_MIN, STATE_TRANSITION_TIMER_MIN, B_SKIP, B_SWAPS

 	p = argparse.ArgumentParser()
 	p.add_argument("-c", "--config", type=str, default="voice.config.json", help="configuratin file")
@ -146,6 +203,20 @@ def main() -> int:

 	terminal = args.terminal

+	#--------------------#
+	# 		CONFIGS
+	#--------------------#
+
+	u_conf = conf['utterance_configuration']
+	UTTERANCE_LEN = u_conf['UTTERANCE_LEN']
+	NUM_METRIC_GEN = u_conf['NUM_METRIC_GEN']
+	NUM_SAMPLE_VOICES = u_conf['NUM_SAMPLE_VOICES']
+	RANDOM_SEED_TIMER_MIN = u_conf['RANDOM_SEED_TIMER_MIN']
+	STATE_TRANSITION_TIMER_MIN = u_conf['STATE_TRANSITION_TIMER_MIN']
+
+	B_SKIP = u_conf['b_skip']
+	B_SWAPS = u_conf['b_swaps']
+
 	#--------------------#
 	# 		VOICES
 	#--------------------#
@ -272,7 +343,12 @@ def main() -> int:
 			t.start()

 			logging.info(f"METRIC::broadcasting {v.name}")
+
+			try:
 				broadcast_utterance(v, uv)
+			except Exception as e:
+				logging.error(e)
+				pass
 			
 			t.join()

--- a/utterance/utils.py
+++ b/utterance/utils.py
@ -1,5 +1,6 @@
 import string, regex
 from gensim.utils import tokenize
+import nltk

 def clean(text: str) -> str:

@ -42,16 +43,71 @@ def fragments(utterance: str):
 				if k in skip_punctuation:
 					continue
 				else:
+					# if cum[0] not in [' ', '\n']:
+					# 	cum = ' ' + cum
 					frags.append(cum)
 					cum = ""
 		cum += '\n'
 		frags.append(cum)

+	# get rid of newline (2x)
+	if len(frags) > 0: 
+		if frags[-1] == '\n':
+			frags = frags[:-1]
+		if len(frags) > 0:
+			if frags[-1] == '\n':
+				frags = frags[:-1]
+
 	return frags

+
 def tokenise(utterance: str):
 	return list(tokenize(utterance, lower=True))

+def fix_sentence(s: str) -> str:
+
+	if len(s.strip()) <= 1:
+		return None
+
+	text = nltk.word_tokenize(s)
+	tags = nltk.pos_tag(text)
+
+	if len(tags) == 0:
+		return None
+	elif len(tags) == 1 and tags[0][1] in ['DT', 'WDT', 'IN']:
+		return None
+	elif len(tags) == 1 and len(tags[0][0]) == 1 and tags[0][1] in ['NN']:
+		return None
+
+	if tags[-1][1] in ['DT', 'WDT', 'IN', 'CC']:
+		tags = tags[:-1]
+		return " ".join([x[0] for x in tags])
+	elif tags[-1][1] in ['NN'] and len(tags[-1][0]) == 1:
+		tags = tags[:-1]
+		return " ".join([x[0] for x in tags])
+
+	return s	
+
+def fix_punctuation(s: str) -> str:
+
+	if len(s.strip()) == 0:
+		return ""
+
+	if len(s) == 1 and s in string.punctuation:
+		if s != ',':
+			return s
+		else:
+			return ""
+
+	e = s.rstrip()
+	if e[-1] in string.punctuation:
+		if e[-1] in [',', ':']:
+			s = e[:-1] + '.'
+	else:
+		s = e + '.'
+
+	return s	
+



--- a/utterance/voice.py
+++ b/utterance/voice.py
@ -3,7 +3,7 @@ import utterance.utils
 import gensim, regex, string, time

 UTTERANCE_MEMORY_LEN = 15
-UTTERANCE_MEMORY_MIN_DIST = 0.2
+UTTERANCE_MEMORY_MIN_DIST = 0.85

 class Voice: