diff --git a/tokenise+train.py b/tokenise+train.py index 51ec87e..720bf91 100644 --- a/tokenise+train.py +++ b/tokenise+train.py @@ -28,7 +28,19 @@ def train(ouputdir: Path, blocksize: int, vocabsize: int, num_steps: int, gpu: b tok = str(files[1]) dat = str(files[0]) - config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize) + # config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize) + + config = GPT2Config( + vocab_size=vocabsize, + n_positions=blocksize, + n_ctx=blocksize, + resid_pdrop=0.0, + embd_pdrop=0.0, + attn_pdrop=0.0, + summary_first_dropout=0.0, + bos_token_id=0, + eos_token_id=0 + ) print(config)