LLM_Engineering_OLD/community-contributions/sach91-bootcamp/week4-exercise.ipynb


			
				
					
					
						
						
							
							
							{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"xsWGWo7YrSPA"},"outputs":[],"source":["# A HuggingFace LLAMA code generator and validator."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZzWgGqk2qPNP"},"outputs":[],"source":["import gradio as gr\n","import torch\n","from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer\n","from threading import Thread\n","from huggingface_hub import login\n","from google.colab import userdata"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5sYVSW-eqdYj"},"outputs":[],"source":["# Load model and tokenizer\n","model_name = \"meta-llama/Llama-3.2-1B-Instruct\"\n","print(f\"Loading {model_name}...\")\n","\n","# load_dotenv(override=True)\n","# OPENWEATHER_API_KEY = os.getenv(\"OPENWEATHER_API_KEY\")\n","hf_token = userdata.get('HF_TOKEN')\n","login(hf_token, add_to_git_credential=True)\n","\n","tokenizer = AutoTokenizer.from_pretrained(model_name)\n","tokenizer.pad_token = tokenizer.eos_token\n","model = AutoModelForCausalLM.from_pretrained(\n","    model_name,\n","    torch_dtype=torch.bfloat16,\n","    device_map=\"auto\",\n",")\n","\n","print(\"Model loaded successfully!\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PmgoWUVNqgL7"},"outputs":[],"source":["def apply_docstrings(code):\n","    \"\"\"\n","    Translator function to format the response.\n","    \"\"\"\n","    sys_msg = \"\"\"\n","      You are a technical assistant that documents Python code.\n","      Your task is below:\n","      - Add concise, clear, and informative docstrings to functions, classes, and modules.\n","      - Add inline comments only where they improve readability or clarify intent.\n","      - Do not modify the code logic or structure.\n","      - Give only the Python code and docstrings.\n","    \"\"\"\n","\n","    usr_msg = f\"\"\"\n","    Add docstrings and comments to the following Python code.\\n\n","    {code}\n","    \"\"\"\n","\n","    # Format the conversation history for the model\n","    messages = [{\"role\": \"system\", \"content\": sys_msg}, {\"role\": \"user\", \"content\": usr_msg}]\n","\n","    # Apply chat template\n","    input_text = tokenizer.apply_chat_template(\n","        messages,\n","        tokenize=False,\n","        add_generation_prompt=True\n","    )\n","\n","    # Tokenize inputchat_with_llama\n","    inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n","\n","    # Set up the streamer\n","    streamer = TextIteratorStreamer(\n","        tokenizer,\n","        skip_prompt=True,\n","        skip_special_tokens=True\n","    )\n","\n","    # Generation parameters\n","    generation_kwargs = dict(\n","        inputs,\n","        streamer=streamer,\n","        max_new_tokens=512,\n","        temperature=0.7,\n","        top_p=0.9,\n","        do_sample=True,\n","    )\n","\n","    # Start generation in a separate thread\n","    thread = Thread(target=model.generate, kwargs=generation_kwargs)\n","    thread.start()\n","\n","    # Stream the response\n","    partial_response = ''\n","    for new_text in streamer:\n","        partial_response += new_text\n","        yield partial_response\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"pONxPaeYqkzg"},"outputs":[],"source":["def chat_with_llama(message, history):\n","    \"\"\"\n","    Chat function that streams responses from the Llama model.\n","    Args:\n","        message: The user's current message\n","        history: List of [user_message, assistant_message] pairs\n","    Yields:\n","        Partial responses as they are generated\n","    \"\"\"\n","\n","    sys_msg = \"\"\"\n","      You are a expert python coder for a software company.\n","      You write python code for the specified problem.\n","      You never write comment in the code. Just provide raw and succinct python code.\n","      \"\"\"\n","    # Format the conversation history for the model\n","    messages = [{\"role\": \"system\", \"content\": sys_msg}]\n","\n","    # Add conversation history\n","    for user_msg, assistant_msg in history:\n","        messages.append({\"role\": \"user\", \"content\": user_msg})\n","        messages.append({\"role\": \"assistant\", \"content\": assistant_msg})\n","\n","    # Add the current message\n","    messages.append({\"role\": \"user\", \"content\": message})\n","\n","    # Apply chat template\n","    input_text = tokenizer.apply_chat_template(\n","        messages,\n","        tokenize=False,\n","        add_generation_prompt=True\n","    )\n","\n","    # Tokenize inputchat_with_llama\n","    inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n","\n","    # Generate response\n","    with torch.no_grad():\n","        outputs = model.generate(\n","            **inputs,\n","            max_new_tokens=512,\n","            temperature=0.7,\n","            top_p=0.9,\n","            do_sample=True,\n","        )\n","\n","    # Decode and return the response\n","    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)\n","\n","    yield from apply_docstrings(response)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true},"id":"Z6qAHIq1Wbqc"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.12/dist-packages/gradio/chat_interface.py:347: UserWarning: The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.\n","  self.chatbot = Chatbot(\n"]},{"name":"stdout","output_type":"stream","text":["Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n","* Running on public URL: https://eb2c5482d76228fa43.gradio.live\n","\n","This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"]},{"data":{"text/html":["\u003cdiv\u003e\u003ciframe src=\"https://eb2c5482d76228fa43.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen\u003e\u003c/iframe\u003e\u003c/div\u003e"],"text/plain":["\u003cIPython.core.display.HTML object\u003e"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"]}],"source":["# Create the Gradio interface\n","w_model = model_name.split('/')[-1]\n","demo = gr.ChatInterface(\n","    fn=chat_with_llama,\n","    title = f\"🦙 {w_model} Chat\",\n","    description = f\"Chat with Meta's {w_model} model with streaming responses\",\n","    examples=[\n","        \"What is the capital of France?\",\n","        \"I want to travel to America\",\n","        \"What are some tips for learning a new language?\"\n","    ],\n","    theme=gr.themes.Soft()\n",")\n","\n","demo.launch(share=True, debug=True)\n","\n"]}],"metadata":{"accelerator":"GPU","colab":{"authorship_tag":"ABX9TyME0tqBDXAuteAd1LCi9aKv","gpuType":"T4","name":"","provenance":[{"file_id":"1SDSiYyKWeT0brcZhYgqCU_us_UEsKZiv","timestamp":1761535394656}],"version":""},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}