49 lines
816 B
Python
49 lines
816 B
Python
import string, regex
|
|
|
|
def clean(text: str) -> str:
|
|
|
|
s = text.split('\n')
|
|
|
|
if(len(s) > 0):
|
|
tok_1 = s[0].split(' ')
|
|
if len(tok_1) > 0 and tok_1[0].strip() in string.punctuation:
|
|
s_1 = ' '.join(tok_1[1:])
|
|
s[0] = s_1.capitalize()
|
|
else:
|
|
s[0] = s[0].capitalize()
|
|
|
|
return '\n'.join(s)
|
|
|
|
def format(text: str) -> str:
|
|
|
|
return text.replace('\r\n', '\n').replace('\n\n', '\n')
|
|
|
|
def fragments(utterance: str):
|
|
frags = []
|
|
sentences = utterance.splitlines()
|
|
|
|
PUNCT_RE = regex.compile(r'(\p{Punctuation})')
|
|
|
|
for s in sentences:
|
|
sf = PUNCT_RE.split(s)
|
|
cum = ""
|
|
for k in sf:
|
|
if len(k) < 1:
|
|
continue
|
|
elif len(k) > 1:
|
|
cum += k
|
|
elif k not in string.punctuation:
|
|
cum += k
|
|
else:
|
|
cum += k
|
|
frags.append(cum)
|
|
cum = ""
|
|
cum += '\n'
|
|
frags.append(cum)
|
|
|
|
return frags
|
|
|
|
|
|
|
|
|