Subprocess import, Sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U",
"torchao>=0.16",
"trl>=0.20",
"transformers>=4.45",
"datasets",
"peft>=0.13",
"accelerate",
"bitsandbytes",
])
Import Sys as _sys
For _m [m for m in list(_sys.modules) if m.startswith(("torchao", "peft"))]:
_sys.modules.pop(_m, None)
try:
Import torchao
The exception:
Import types
_fake = types.ModuleType("torchao")
_fake.__version__ = "0.16.1"
_sys.modules["torchao"] = _fake
Import os gc warnings, res, gc
warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_DISABLED"] = "true"
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
Import datasets by loading them with load_dataset
Import AutoTokenizer and AutoModelForCausalLM
LoraConfig imports peft
print(f"torch={torch.__version__} cuda={torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)} "
The f"({torch.cuda.get_device_properties(0).total_memory/1e9:.1f} GB)")
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
DEVICE = "cuda" if torch.cuda.is_available() The same applies to "cpu"
BF16_OK = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
LORA_CFG = LoraConfig(
r=8, lora_alpha=16, lora_dropout=0.05, bias="none",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
task_type="CAUSAL_LM",
)
Def cleaning():
"""Release VRAM between training stages (Colab T4 is tight)."""
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
Def chat_generate() (model, tokenizer, prompt, maximum_new_tokens=120).
"""Helper: format as chat, generate, decode just the assistant turn."""
The message is: [{"role": "user", "content": prompt}]
ids = tokenizer.apply_chat_template(
msgs, return_tensors="pt", add_generation_prompt=True
).to(model.device)
No_grad. With torch():
Out = model.generate
ids, max_new_tokens=max_new_tokens,
do_sample=True, temperature=0.7, top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
)
return tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True)
Trending
- Build a single-cell RNA-seq analysis pipeline with Scanpy to perform PBMC clustering, annotation, and trajectory discovery
- OpenAI’s AI Agent can now access LinkedIn, Salesforce Gmail and internal tools via sign-in sessions.
- Nick Bostrom Has a Plan for Humanity’s ‘Big Retirement’
- A long shot proposal to protect California workers from AI
- AI Kids’ Toys: The New Wild West
- Natural Language Automatencoders by Anthropic Convert Claude’s internal activations directly into human-readable text explanations
- OpenAI Releases Three Realtime Audio Models: GPT-Realtime-2, GPT-Realtime-Translate, and GPT-Realtime-Whisper in the Realtime API
- LightSeek Foundation releases TokenSpeed, a open-source LLM inference engine targeting TensorRT-LLM level performance for agentic workloads

