From 33d0fda2a22d6e7cdf5ef84a7be92e6a2782bdf0 Mon Sep 17 00:00:00 2001 From: gauthiier Date: Thu, 10 Feb 2022 14:38:39 +0100 Subject: [PATCH] revert demo --- tokenise+train.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/tokenise+train.py b/tokenise+train.py index 4c24966..35b227f 100644 --- a/tokenise+train.py +++ b/tokenise+train.py @@ -28,19 +28,24 @@ def train(filepath: str, ouputdir: Path, blocksize: int, vocabsize: int, num_ste else: return "No valid tokenizer in " + str(ouputdir) - config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize, dropout=0.0, n_embd=256, n_layer=8, n_head=8) + # config = build_gpt2_config(vocab_size=vocabsize, max_lenght=blocksize, dropout=0.0, n_embd=256, n_layer=8, n_head=8) -# config = GPT2Config( - # vocab_size=vocabsize, - # n_positions=blocksize, - # n_ctx=blocksize, - # resid_pdrop=0.0, - # embd_pdrop=0.0, - # attn_pdrop=0.0, - # summary_first_dropout=0.0, - # bos_token_id=0, - # eos_token_id=0 - # ) + config = GPT2Config( + vocab_size=vocabsize, + max_lenght=blocksize, + n_positions=blocksize, + n_ctx=blocksize, + n_embd=256, + n_layer=8, + n_head=8, + dropout=0.0, + resid_pdrop=0.0, + embd_pdrop=0.0, + attn_pdrop=0.0, + summary_first_dropout=0.0, + bos_token_id=0, + eos_token_id=0 + ) print(config)