GPT2Config
This commit is contained in:
parent
240e9c4535
commit
aff1c17d2e
@ -15,7 +15,8 @@ def suffix(bs: int, ns: int, vs: int) -> str:
|
|||||||
def train(ouputdir: Path, blocksize: int, vocabsize: int, num_steps: int, gpu: bool = False) -> str:
|
def train(ouputdir: Path, blocksize: int, vocabsize: int, num_steps: int, gpu: bool = False) -> str:
|
||||||
|
|
||||||
from aitextgen.TokenDataset import TokenDataset
|
from aitextgen.TokenDataset import TokenDataset
|
||||||
from aitextgen.utils import build_gpt2_config
|
from transformers import GPT2Config
|
||||||
|
# from aitextgen.utils import build_gpt2_config
|
||||||
from aitextgen import aitextgen
|
from aitextgen import aitextgen
|
||||||
|
|
||||||
exts = ['.json', '.gz']
|
exts = ['.json', '.gz']
|
||||||
@ -65,8 +66,7 @@ def encode(filepath: str, blocksize: int, vocabsize: int, ouputdir: Path, verbos
|
|||||||
return "text input is not valid"
|
return "text input is not valid"
|
||||||
|
|
||||||
from aitextgen.TokenDataset import TokenDataset
|
from aitextgen.TokenDataset import TokenDataset
|
||||||
from aitextgen.tokenizers import train_tokenizer
|
from aitextgen.tokenizers import train_tokenizer
|
||||||
from transformers import GPT2Config
|
|
||||||
|
|
||||||
#NOTE: vocab_size is fixed since this is not yet in train_tokenizer
|
#NOTE: vocab_size is fixed since this is not yet in train_tokenizer
|
||||||
#see https://github.com/minimaxir/aitextgen/blob/master/aitextgen/tokenizers.py
|
#see https://github.com/minimaxir/aitextgen/blob/master/aitextgen/tokenizers.py
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user