revert demo

This commit is contained in:
gauthiier 2022-02-10 13:46:30 +01:00
parent d62e500c17
commit 0f5f06359c

View File

@ -19,18 +19,14 @@ def train(filepath: str, ouputdir: Path, blocksize: int, vocabsize: int, num_ste
from aitextgen.utils import build_gpt2_config from aitextgen.utils import build_gpt2_config
from aitextgen import aitextgen from aitextgen import aitextgen
exts = ['.json', '.gz'] files = [x for x in ouputdir.glob('*') if x.name.endswith(".tokenizer.json")]
files = [x for x in ouputdir.glob('*') if x.suffix in exts and x.name != "config.json"]
print(files) print(files)
if len(files) == 2: if len(files) == 1:
if files[0].suffix == '.json':
tok = str(files[0]) tok = str(files[0])
dat = str(files[1])
else: else:
tok = str(files[1]) return "No valid tokenizer in " + str(ouputdir)
dat = str(files[0])
config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize, dropout=0.0, n_embd=256, n_layer=8, n_head=8) config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize, dropout=0.0, n_embd=256, n_layer=8, n_head=8)