From e2f841465c99f384effc1c314b99e20a9f7a414c Mon Sep 17 00:00:00 2001
From: NATURESPEAK <naturespeak@NATURESPEAK.local>
Date: Mon, 11 Apr 2022 13:09:01 +0200
Subject: [PATCH] fix endings etc.

---
 run.sh             |  2 ++
 speak.py           |  4 +--
 speak_broadcast.py | 84 +++++++++++++++++++++++++++++++++++++++++++---
 utterance/utils.py | 56 +++++++++++++++++++++++++++++++
 utterance/voice.py |  2 +-
 5 files changed, 141 insertions(+), 7 deletions(-)
 create mode 100755 run.sh

diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..a3f9479
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,2 @@
+. venv/bin/activate
+python speak_broadcast.py -c ../assets/voice.config.json
\ No newline at end of file
diff --git a/speak.py b/speak.py
index 3197565..486926f 100644
--- a/speak.py
+++ b/speak.py
@@ -2,7 +2,7 @@ import argparse, json, sys, time, random
 import utterance.voice
 import utterance.utils
 
-UTTERANCE_LEN = 16				#<--------------- these should be in config
+UTTERANCE_LEN = 64				#<--------------- these should be in config
 				
 def main() -> int:
 
@@ -19,7 +19,7 @@ def main() -> int:
 	voices = []
 	for v in conf['voices']:
 		model = v['model']
-		voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), lenght=UTTERANCE_LEN)
+		voice = utterance.voice.Voice(name=v["name"].upper(), model=model['model_dir'], tokenizer=model['tokeniser_file'], temp=float(model["temperature"]), length=UTTERANCE_LEN)
 		voices.append(voice)
 
 	current_voice_name = ""
diff --git a/speak_broadcast.py b/speak_broadcast.py
index 729e812..b80a222 100644
--- a/speak_broadcast.py
+++ b/speak_broadcast.py
@@ -1,4 +1,4 @@
-import argparse, json, sys, time, random, logging, signal, threading
+import argparse, json, sys, time, random, logging, signal, threading, string
 import utterance.voice
 import utterance.utils
 import utterance.oscosc
@@ -17,6 +17,8 @@ exit = False
 terminal = False
 debug = False
 state = "METRIC"
+B_SKIP = []
+B_SWAPS = {}
 
 def format_str(text) -> str:
 	t = utterance.utils.clean(text)
@@ -60,8 +62,58 @@ def terminal_utterance(utterance):
 	if terminal:
 		print(utterance, end="")
 
+def fix_ending(frags):
+
+	result = frags.copy()
+
+	end = result[-1]
+	end = end.translate(str.maketrans('', '', string.punctuation))
+
+	fix = utterance.utils.fix_sentence(end)
+
+	if fix is None or len(fix) == 0:
+		result = result[:-1]
+		# result[-1] = result[-1]
+	else:
+		result[-1] = fix
+
+	result[-1] = utterance.utils.fix_punctuation(result[-1])
+
+	print(result)
+
+	return result
+
+def fix_beginning(frags):
+
+	global B_SKIP, B_SWAPS
+
+	result = frags.copy()
+
+	beginnig = result[0]
+	toks = beginnig.split()
+
+	if len(toks) > 0:
+		f = toks[0].lower() 
+		if f[0] in string.punctuation:
+			f = f[1:]
+		if f in B_SKIP:
+			if len(toks) > 2:
+				result[0] = " ".join(toks[1:]).capitalize() + "\n"
+			else:				
+				if result[1][0] == ' ':
+					result[1] = result[1][1:] 	
+				result[1] = result[1].capitalize()	
+				return result[1:]
+		elif toks[0] in B_SWAPS:
+			result[0] = result[0].replace(toks[0], B_SWAPS[toks[0]])
+
+	return result
+
+
 def broadcast_utterance(v, utterance):	
 
+	print(utterance)
+
 	global broadcast, exit, debug
 
 	# Send all text to server to calculate bounds in advance
@@ -74,10 +126,15 @@ def broadcast_utterance(v, utterance):
 
 	frags = v.fragments(utterance)
 
+	frags = fix_beginning(frags)
+	frags = fix_ending(frags)
+
 	for f in frags:
+
+		terminal_utterance(f)
+
 		text += f
 		broadcast.utterance(text, v.channel)
-		terminal_utterance(f)
 
 		# sleep_time = 2
 		# toks = tokenise_str(f)
@@ -129,7 +186,7 @@ def signal_terminate(signum, frame):
 
 def main() -> int:
 
-	global broadcast, metric, terminal, debug, state
+	global broadcast, metric, terminal, debug, state, UTTERANCE_LEN, NUM_METRIC_GEN, NUM_SAMPLE_VOICES, RANDOM_SEED_TIMER_MIN, STATE_TRANSITION_TIMER_MIN, B_SKIP, B_SWAPS
 
 	p = argparse.ArgumentParser()
 	p.add_argument("-c", "--config", type=str, default="voice.config.json", help="configuratin file")
@@ -146,6 +203,20 @@ def main() -> int:
 
 	terminal = args.terminal
 
+	#--------------------#
+	# 		CONFIGS
+	#--------------------#
+
+	u_conf = conf['utterance_configuration']
+	UTTERANCE_LEN = u_conf['UTTERANCE_LEN']
+	NUM_METRIC_GEN = u_conf['NUM_METRIC_GEN']
+	NUM_SAMPLE_VOICES = u_conf['NUM_SAMPLE_VOICES']
+	RANDOM_SEED_TIMER_MIN = u_conf['RANDOM_SEED_TIMER_MIN']
+	STATE_TRANSITION_TIMER_MIN = u_conf['STATE_TRANSITION_TIMER_MIN']
+
+	B_SKIP = u_conf['b_skip']
+	B_SWAPS = u_conf['b_swaps']
+
 	#--------------------#
 	# 		VOICES
 	#--------------------#
@@ -272,7 +343,12 @@ def main() -> int:
 			t.start()
 
 			logging.info(f"METRIC::broadcasting {v.name}")
-			broadcast_utterance(v, uv)
+
+			try:
+				broadcast_utterance(v, uv)
+			except Exception as e:
+				logging.error(e)
+				pass
 			
 			t.join()
 
diff --git a/utterance/utils.py b/utterance/utils.py
index c5089b8..3352a02 100644
--- a/utterance/utils.py
+++ b/utterance/utils.py
@@ -1,5 +1,6 @@
 import string, regex
 from gensim.utils import tokenize
+import nltk
 
 def clean(text: str) -> str:
 
@@ -42,16 +43,71 @@ def fragments(utterance: str):
 				if k in skip_punctuation:
 					continue
 				else:
+					# if cum[0] not in [' ', '\n']:
+					# 	cum = ' ' + cum
 					frags.append(cum)
 					cum = ""
 		cum += '\n'
 		frags.append(cum)
 
+	# get rid of newline (2x)
+	if len(frags) > 0: 
+		if frags[-1] == '\n':
+			frags = frags[:-1]
+		if len(frags) > 0:
+			if frags[-1] == '\n':
+				frags = frags[:-1]
+
 	return frags
 
+
 def tokenise(utterance: str):
 	return list(tokenize(utterance, lower=True))
 
+def fix_sentence(s: str) -> str:
+
+	if len(s.strip()) <= 1:
+		return None
+
+	text = nltk.word_tokenize(s)
+	tags = nltk.pos_tag(text)
+
+	if len(tags) == 0:
+		return None
+	elif len(tags) == 1 and tags[0][1] in ['DT', 'WDT', 'IN']:
+		return None
+	elif len(tags) == 1 and len(tags[0][0]) == 1 and tags[0][1] in ['NN']:
+		return None
+
+	if tags[-1][1] in ['DT', 'WDT', 'IN', 'CC']:
+		tags = tags[:-1]
+		return " ".join([x[0] for x in tags])
+	elif tags[-1][1] in ['NN'] and len(tags[-1][0]) == 1:
+		tags = tags[:-1]
+		return " ".join([x[0] for x in tags])
+
+	return s	
+
+def fix_punctuation(s: str) -> str:
+
+	if len(s.strip()) == 0:
+		return ""
+
+	if len(s) == 1 and s in string.punctuation:
+		if s != ',':
+			return s
+		else:
+			return ""
+
+	e = s.rstrip()
+	if e[-1] in string.punctuation:
+		if e[-1] in [',', ':']:
+			s = e[:-1] + '.'
+	else:
+		s = e + '.'
+
+	return s	
+
 
 
 
diff --git a/utterance/voice.py b/utterance/voice.py
index a42ffae..99b676a 100644
--- a/utterance/voice.py
+++ b/utterance/voice.py
@@ -3,7 +3,7 @@ import utterance.utils
 import gensim, regex, string, time
 
 UTTERANCE_MEMORY_LEN = 15
-UTTERANCE_MEMORY_MIN_DIST = 0.2
+UTTERANCE_MEMORY_MIN_DIST = 0.85
 
 class Voice: