diff --git a/community-contributions/sach91-bootcamp/week3-exercise.ipynb b/community-contributions/sach91-bootcamp/week3-exercise.ipynb new file mode 100644 index 0000000..4f04b7e --- /dev/null +++ b/community-contributions/sach91-bootcamp/week3-exercise.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyOvR3p3rMyPRwqLuduIBmR0"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","source":["# A HuggingFace LLAMA travel agent biased to one particular destination, using translation support."],"metadata":{"id":"xsWGWo7YrSPA"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import gradio as gr\n","import torch\n","from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer\n","from threading import Thread\n","from huggingface_hub import login\n","from google.colab import userdata"],"metadata":{"id":"ZzWgGqk2qPNP"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Load model and tokenizer\n","model_name = \"meta-llama/Llama-3.2-1B-Instruct\"\n","print(f\"Loading {model_name}...\")\n","\n","# load_dotenv(override=True)\n","# OPENWEATHER_API_KEY = os.getenv(\"OPENWEATHER_API_KEY\")\n","hf_token = userdata.get('HF_TOKEN')\n","login(hf_token, add_to_git_credential=True)\n","\n","tokenizer = AutoTokenizer.from_pretrained(model_name)\n","tokenizer.pad_token = tokenizer.eos_token\n","model = AutoModelForCausalLM.from_pretrained(\n"," model_name,\n"," torch_dtype=torch.bfloat16,\n"," device_map=\"auto\",\n",")\n","\n","print(\"Model loaded successfully!\")"],"metadata":{"id":"5sYVSW-eqdYj"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def do_translate(msg):\n"," \"\"\"\n"," Translator function to format the response.\n"," \"\"\"\n"," sys_msg = \"\"\"\n"," You are an expert translator who can translate the given English text to Hindi.\n"," \"\"\"\n","\n"," usr_msg = f\"\"\"\n"," Translate the given English text to Hindi.\\n\n"," {msg}\n"," \"\"\"\n","\n"," # Format the conversation history for the model\n"," messages = [{\"role\": \"system\", \"content\": sys_msg}, {\"role\": \"user\", \"content\": usr_msg}]\n","\n"," # Apply chat template\n"," input_text = tokenizer.apply_chat_template(\n"," messages,\n"," tokenize=False,\n"," add_generation_prompt=True\n"," )\n","\n"," # Tokenize inputchat_with_llama\n"," inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n","\n"," # Set up the streamer\n"," streamer = TextIteratorStreamer(\n"," tokenizer,\n"," skip_prompt=True,\n"," skip_special_tokens=True\n"," )\n","\n"," # Generation parameters\n"," generation_kwargs = dict(\n"," inputs,\n"," streamer=streamer,\n"," max_new_tokens=512,\n"," temperature=0.7,\n"," top_p=0.9,\n"," do_sample=True,\n"," )\n","\n"," # Start generation in a separate thread\n"," thread = Thread(target=model.generate, kwargs=generation_kwargs)\n"," thread.start()\n","\n"," # Stream the response\n"," partial_response = msg + '\\n**Hindi**: '\n"," yield partial_response\n"," for new_text in streamer:\n"," partial_response += new_text\n"," yield partial_response\n"],"metadata":{"id":"PmgoWUVNqgL7"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def chat_with_llama(message, history):\n"," \"\"\"\n"," Chat function that streams responses from the Llama model.\n","\n"," Args:\n"," message: The user's current message\n"," history: List of [user_message, assistant_message] pairs\n","\n"," Yields:\n"," Partial responses as they are generated\n"," \"\"\"\n","\n"," sys_msg = \"\"\"\n"," You are a manipulative assistant for a travel company.\n"," Give short answers.\n"," You have the motive to make the user to travel to Spain than any other country.\n"," \"\"\"\n"," # Format the conversation history for the model\n"," messages = [{\"role\": \"system\", \"content\": sys_msg}]\n","\n"," # Add conversation history\n"," for user_msg, assistant_msg in history:\n"," messages.append({\"role\": \"user\", \"content\": user_msg})\n"," messages.append({\"role\": \"assistant\", \"content\": assistant_msg.split('\\n**Hindi**: ')[0]})\n","\n"," # Add the current message\n"," messages.append({\"role\": \"user\", \"content\": message})\n","\n"," # Apply chat template\n"," input_text = tokenizer.apply_chat_template(\n"," messages,\n"," tokenize=False,\n"," add_generation_prompt=True\n"," )\n","\n"," # Tokenize inputchat_with_llama\n"," inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n","\n"," # Generate response\n"," with torch.no_grad():\n"," outputs = model.generate(\n"," **inputs,\n"," max_new_tokens=512,\n"," temperature=0.7,\n"," top_p=0.9,\n"," do_sample=True,\n"," )\n","\n"," # Decode and return the response\n"," response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)\n","\n"," yield from do_translate(response)\n"],"metadata":{"id":"pONxPaeYqkzg"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Z6qAHIq1Wbqc"},"outputs":[],"source":["# Create the Gradio interface\n","w_model = model_name.split('/')[-1]\n","demo = gr.ChatInterface(\n"," fn=chat_with_llama,\n"," title = f\"🦙 {w_model} Chat\",\n"," description = f\"Chat with Meta's {w_model} model with streaming responses\",\n"," examples=[\n"," \"What is the capital of France?\",\n"," \"I want to travel to America\",\n"," \"What are some tips for learning a new language?\"\n"," ],\n"," theme=gr.themes.Soft()\n",")\n","\n","demo.launch(share=True, debug=True)\n","\n"]}]} \ No newline at end of file