Preprocessing & tokenization
def generate(model, tokenizer, prompt, max_new_tokens=50, temperature=0.8): model.eval() input_ids = tokenizer.encode(prompt) for _ in range(max_new_tokens): logits = model(input_ids[-256:]) # crop to context length next_token_logits = logits[0, -1, :] / temperature probs = F.softmax(next_token_logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) input_ids.append(next_token.item()) if next_token == tokenizer.eos_token_id: break return tokenizer.decode(input_ids) build a large language model %28from scratch%29 pdf