pip install tinyllama
Parses single or multiple files.
# ".txt" files
from tinyllama.readers import get_text
corpus = get_text("./txt_path")
# ".pdf" files
from tinyllama.readers import get_pdf_text
corpus = get_pdf_text("./pdf_path")
To parse multiple files:
# ".txt" files
from tinyllama.readers import get_text
corpus = ''.join(get_text(pdf_path) for txt_path in txt_paths)
# ".pdf" files
from tinyllama.readers import get_pdf_text
corpus = ''.join(get_pdf_text(pdf_path) for pdf_path in pdf_paths)
With a simple character-level tokenizer:
from tinyllama.tokenizers import CharacterTokenizer
tokenizer = CharacterTokenizer()
To turn a corpus into tokens:
tokens = tokenizer.tokenize(corpus)
from tinyllama import Llama
model = Llama(context_window=500, emb_dim=10, n_heads=2, n_blocks=2, vocab_size=tokenizer.vocab_size)
model = Llama(context_window=500, emb_dim=10, n_heads=2, n_blocks=2, gq_ratio=2, vocab_size=tokenizer.vocab_size)
The parameter gq_ratio represents the ratio
The configuration above builds a Llama model with the number of heads being twice as much as the number of queries/keys.
from tinyllama import TrainConfig, Trainer
TrainConfig = TrainConfig(batch_size=32, epochs=50, lr=1e-3, log_interval=15)
Trainer = Trainer(TrainConfig)
Trainer.run(model, tokens)
Diagnosis class run a training job on a copy of the model and returns training information that could be useful to the user.
Returns a plot representing the loss for each learning rate, the scale for the argument start and end is logarithmic.
from tinyllama.diagnosis import LrDiagnose
LrDiagnose = LrDiagnose(start=-5, end=0, n_lrs=50)
LrDiagnose.run(model, tokens, TrainConfig)
Returns a histogram representing the distribution of the gradients, doesn't run additional training jobs.
from tinyllama.diagnosis import GradDiagnose
GradDiagnose = GradDiagnose(num_params_to_track=1500)
GradDiagnose.run(model)
Returns a histogram representing the distribution of the activation layers.
from tinyllama.diagnosis import SwigluDiagnose, SwigluPath
# forward activations
SwigluDiagnose = SwigluDiagnose(track_direction=SwigluPath.FORWARD)
# backward activations (gradients)
SwigluDiagnose = SwigluDiagnose(track_direction=SwigluPath.BACKWARD)
SwigluDiagnose.run(model, tokens, TrainConfig)
Returns a plot representing the gradient/data ratio in each step of the training.
from tinyllama.diagnosis import SwigluDiagnose
GdrDiagnose = GdrDiagnose(num_params_to_track=5, num_iters=150)
GdrDiagnose.run(model, tokens, TrainConfig)
GPTune facilitates hyperparameter tuning by leveraging Gaussian Processes as a means to optimize the tuning process.
from tinyllama.gptuner import GPTuneConfig, GPTune
GPTuneConfig = GPTuneConfig(num_training_samples=100, hyperparams_to_tune=["epochs", "n_heads"], l_bounds=[10, 2], u_bounds=[50, 5], num_evaluations=500)
GPTune = GPTune(GPTuneConfig)
GPTune.run(model, tokens, TrainConfig)
Generates a response to a prompt.
from tinyllama import generate
# kv_cache is set to True by default.
generate(model, prompt, max_tokens=900, kv_cache=True)