revert demo
This commit is contained in:
parent
d62e500c17
commit
0f5f06359c
@ -19,18 +19,14 @@ def train(filepath: str, ouputdir: Path, blocksize: int, vocabsize: int, num_ste
|
|||||||
from aitextgen.utils import build_gpt2_config
|
from aitextgen.utils import build_gpt2_config
|
||||||
from aitextgen import aitextgen
|
from aitextgen import aitextgen
|
||||||
|
|
||||||
exts = ['.json', '.gz']
|
files = [x for x in ouputdir.glob('*') if x.name.endswith(".tokenizer.json")]
|
||||||
files = [x for x in ouputdir.glob('*') if x.suffix in exts and x.name != "config.json"]
|
|
||||||
|
|
||||||
print(files)
|
print(files)
|
||||||
|
|
||||||
if len(files) == 2:
|
if len(files) == 1:
|
||||||
if files[0].suffix == '.json':
|
|
||||||
tok = str(files[0])
|
tok = str(files[0])
|
||||||
dat = str(files[1])
|
|
||||||
else:
|
else:
|
||||||
tok = str(files[1])
|
return "No valid tokenizer in " + str(ouputdir)
|
||||||
dat = str(files[0])
|
|
||||||
|
|
||||||
config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize, dropout=0.0, n_embd=256, n_layer=8, n_head=8)
|
config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize, dropout=0.0, n_embd=256, n_layer=8, n_head=8)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user