Files
LLM_Engineering_OLD/community-contributions/sach91-bootcamp/week4-exercise.ipynb
2025-10-27 15:00:02 +05:30

1 line
7.4 KiB
Plaintext

{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"xsWGWo7YrSPA"},"outputs":[],"source":["# A HuggingFace LLAMA code generator and validator."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZzWgGqk2qPNP"},"outputs":[],"source":["import gradio as gr\n","import torch\n","from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer\n","from threading import Thread\n","from huggingface_hub import login\n","from google.colab import userdata"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5sYVSW-eqdYj"},"outputs":[],"source":["# Load model and tokenizer\n","model_name = \"meta-llama/Llama-3.2-1B-Instruct\"\n","print(f\"Loading {model_name}...\")\n","\n","# load_dotenv(override=True)\n","# OPENWEATHER_API_KEY = os.getenv(\"OPENWEATHER_API_KEY\")\n","hf_token = userdata.get('HF_TOKEN')\n","login(hf_token, add_to_git_credential=True)\n","\n","tokenizer = AutoTokenizer.from_pretrained(model_name)\n","tokenizer.pad_token = tokenizer.eos_token\n","model = AutoModelForCausalLM.from_pretrained(\n"," model_name,\n"," torch_dtype=torch.bfloat16,\n"," device_map=\"auto\",\n",")\n","\n","print(\"Model loaded successfully!\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PmgoWUVNqgL7"},"outputs":[],"source":["def apply_docstrings(code):\n"," \"\"\"\n"," Translator function to format the response.\n"," \"\"\"\n"," sys_msg = \"\"\"\n"," You are a technical assistant that documents Python code.\n"," Your task is below:\n"," - Add concise, clear, and informative docstrings to functions, classes, and modules.\n"," - Add inline comments only where they improve readability or clarify intent.\n"," - Do not modify the code logic or structure.\n"," - Give only the Python code and docstrings.\n"," \"\"\"\n","\n"," usr_msg = f\"\"\"\n"," Add docstrings and comments to the following Python code.\\n\n"," {code}\n"," \"\"\"\n","\n"," # Format the conversation history for the model\n"," messages = [{\"role\": \"system\", \"content\": sys_msg}, {\"role\": \"user\", \"content\": usr_msg}]\n","\n"," # Apply chat template\n"," input_text = tokenizer.apply_chat_template(\n"," messages,\n"," tokenize=False,\n"," add_generation_prompt=True\n"," )\n","\n"," # Tokenize inputchat_with_llama\n"," inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n","\n"," # Set up the streamer\n"," streamer = TextIteratorStreamer(\n"," tokenizer,\n"," skip_prompt=True,\n"," skip_special_tokens=True\n"," )\n","\n"," # Generation parameters\n"," generation_kwargs = dict(\n"," inputs,\n"," streamer=streamer,\n"," max_new_tokens=512,\n"," temperature=0.7,\n"," top_p=0.9,\n"," do_sample=True,\n"," )\n","\n"," # Start generation in a separate thread\n"," thread = Thread(target=model.generate, kwargs=generation_kwargs)\n"," thread.start()\n","\n"," # Stream the response\n"," partial_response = ''\n"," for new_text in streamer:\n"," partial_response += new_text\n"," yield partial_response\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"pONxPaeYqkzg"},"outputs":[],"source":["def chat_with_llama(message, history):\n"," \"\"\"\n"," Chat function that streams responses from the Llama model.\n"," Args:\n"," message: The user's current message\n"," history: List of [user_message, assistant_message] pairs\n"," Yields:\n"," Partial responses as they are generated\n"," \"\"\"\n","\n"," sys_msg = \"\"\"\n"," You are a expert python coder for a software company.\n"," You write python code for the specified problem.\n"," You never write comment in the code. Just provide raw and succinct python code.\n"," \"\"\"\n"," # Format the conversation history for the model\n"," messages = [{\"role\": \"system\", \"content\": sys_msg}]\n","\n"," # Add conversation history\n"," for user_msg, assistant_msg in history:\n"," messages.append({\"role\": \"user\", \"content\": user_msg})\n"," messages.append({\"role\": \"assistant\", \"content\": assistant_msg})\n","\n"," # Add the current message\n"," messages.append({\"role\": \"user\", \"content\": message})\n","\n"," # Apply chat template\n"," input_text = tokenizer.apply_chat_template(\n"," messages,\n"," tokenize=False,\n"," add_generation_prompt=True\n"," )\n","\n"," # Tokenize inputchat_with_llama\n"," inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n","\n"," # Generate response\n"," with torch.no_grad():\n"," outputs = model.generate(\n"," **inputs,\n"," max_new_tokens=512,\n"," temperature=0.7,\n"," top_p=0.9,\n"," do_sample=True,\n"," )\n","\n"," # Decode and return the response\n"," response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)\n","\n"," yield from apply_docstrings(response)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"Z6qAHIq1Wbqc"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.12/dist-packages/gradio/chat_interface.py:347: UserWarning: The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.\n"," self.chatbot = Chatbot(\n"]},{"name":"stdout","output_type":"stream","text":["Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n","* Running on public URL: https://eb2c5482d76228fa43.gradio.live\n","\n","This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"]},{"data":{"text/html":["\u003cdiv\u003e\u003ciframe src=\"https://eb2c5482d76228fa43.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen\u003e\u003c/iframe\u003e\u003c/div\u003e"],"text/plain":["\u003cIPython.core.display.HTML object\u003e"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"]}],"source":["# Create the Gradio interface\n","w_model = model_name.split('/')[-1]\n","demo = gr.ChatInterface(\n"," fn=chat_with_llama,\n"," title = f\"🦙 {w_model} Chat\",\n"," description = f\"Chat with Meta's {w_model} model with streaming responses\",\n"," examples=[\n"," \"What is the capital of France?\",\n"," \"I want to travel to America\",\n"," \"What are some tips for learning a new language?\"\n"," ],\n"," theme=gr.themes.Soft()\n",")\n","\n","demo.launch(share=True, debug=True)\n","\n"]}],"metadata":{"accelerator":"GPU","colab":{"authorship_tag":"ABX9TyME0tqBDXAuteAd1LCi9aKv","gpuType":"T4","name":"","provenance":[{"file_id":"1SDSiYyKWeT0brcZhYgqCU_us_UEsKZiv","timestamp":1761535394656}],"version":""},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}