import string, regex def clean(text: str) -> str: s = text.split('\n') if(len(s) > 0): tok_1 = s[0].split(' ') if len(tok_1) > 0 and tok_1[0].strip() in string.punctuation: s_1 = ' '.join(tok_1[1:]) s[0] = s_1.capitalize() else: s[0] = s[0].capitalize() return '\n'.join(s) def format(text: str) -> str: return text.replace('\r\n', '\n').replace('\n\n', '\n').replace('‘', "'").replace("’", "'") def fragments(utterance: str): frags = [] sentences = utterance.splitlines() PUNCT_RE = regex.compile(r'(\p{Punctuation})') skip_punctuation = ["'"] for s in sentences: sf = PUNCT_RE.split(s) cum = "" for k in sf: if len(k) < 1: continue elif len(k) > 1: cum += k elif k not in string.punctuation: cum += k else: cum += k if k in skip_punctuation: continue else: frags.append(cum) cum = "" cum += '\n' frags.append(cum) return frags