diff --git a/tokenise+train.py b/tokenise+train.py index 720bf91..cf59b64 100644 --- a/tokenise+train.py +++ b/tokenise+train.py @@ -66,6 +66,7 @@ def encode(filepath: str, blocksize: int, vocabsize: int, ouputdir: Path, verbos from aitextgen.TokenDataset import TokenDataset from aitextgen.tokenizers import train_tokenizer + from transformers import GPT2Config #NOTE: vocab_size is fixed since this is not yet in train_tokenizer #see https://github.com/minimaxir/aitextgen/blob/master/aitextgen/tokenizers.py