445 lines
16 KiB
Plaintext
445 lines
16 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Code tested in google colab with T4 GPU"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "bK-WkZmI_L4S"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "Anm0jUBC_dQF"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import requests\n",
|
|
"from IPython.display import Markdown, display, update_display\n",
|
|
"from openai import OpenAI\n",
|
|
"from google.colab import drive\n",
|
|
"from huggingface_hub import login\n",
|
|
"from google.colab import userdata\n",
|
|
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
|
"import torch\n",
|
|
"import json"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "GsKyLRam_hpi"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "AWo-zjJJAIIF"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"hf_token = userdata.get('HF_TOKEN')\n",
|
|
"login(hf_token, add_to_git_credential=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "TSeMKC4S7Ip6"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Utility Functions ::\n",
|
|
"\n",
|
|
"def get_tokenizer(model_name):\n",
|
|
" \"\"\"\n",
|
|
" Args:\n",
|
|
" model_name: LLM model name\n",
|
|
"\n",
|
|
" Returns:\n",
|
|
" Tokenizer\n",
|
|
" \"\"\"\n",
|
|
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
|
" tokenizer.pad_token = tokenizer.eos_token\n",
|
|
"\n",
|
|
" return tokenizer\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_model(model_name):\n",
|
|
" \"\"\"\n",
|
|
" Args:\n",
|
|
" model_name: LLM model name\n",
|
|
"\n",
|
|
" Returns:\n",
|
|
" Model\n",
|
|
" \"\"\"\n",
|
|
" quant_config = BitsAndBytesConfig(\n",
|
|
" load_in_4bit=True,\n",
|
|
" bnb_4bit_use_double_quant=True,\n",
|
|
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
|
" bnb_4bit_quant_type=\"nf4\"\n",
|
|
" )\n",
|
|
"\n",
|
|
" model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"auto\", quantization_config=quant_config)\n",
|
|
"\n",
|
|
" return model\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "O_mUtibSJFWt"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"tokenizer = get_tokenizer(LLAMA)\n",
|
|
"model = get_model(LLAMA)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "Mc0RE4XM_2hW"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"def parse_response(inputs, outputs):\n",
|
|
" \"\"\"\n",
|
|
" Args:\n",
|
|
" inputs: Input Tokens, pt tensors\n",
|
|
" outputs: Model generated output token\n",
|
|
"\n",
|
|
" Returns:\n",
|
|
" response\n",
|
|
" \"\"\"\n",
|
|
" full_input =(tokenizer.batch_decode(inputs, skip_special_tokens=True)[0])\n",
|
|
" full_outputs=tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
|
|
" response = full_outputs[0][len(full_input):]\n",
|
|
"\n",
|
|
" return response\n",
|
|
"\n",
|
|
"# Tools Functions:\n",
|
|
"\n",
|
|
"def get_tools_response(tools_params):\n",
|
|
" function_name = tools_params['name']\n",
|
|
" arguments = tools_params['arguments']\n",
|
|
" location = arguments['location']\n",
|
|
"\n",
|
|
" if function_name == 'get_current_temperature':\n",
|
|
" response = get_current_temperature(location)\n",
|
|
" return response\n",
|
|
" elif function_name == 'get_current_wind_speed':\n",
|
|
" response = get_current_wind_speed(location)\n",
|
|
" return response\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_current_temperature(location: str) -> float:\n",
|
|
" \"\"\"\n",
|
|
" Get the current temperature at a location.\n",
|
|
"\n",
|
|
" Args:\n",
|
|
" location: The location to get the temperature for, in the format \"City, Country\"\n",
|
|
" Returns:\n",
|
|
" The current temperature at the specified location in the specified units, as a float.\n",
|
|
" \"\"\"\n",
|
|
" if location == \"Bangalore, India\": \n",
|
|
" return 22 # for testing purpose, please replace with your logic\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_current_wind_speed(location: str) -> str:\n",
|
|
" \"\"\"\n",
|
|
" Get the current wind speed at a given location.\n",
|
|
"\n",
|
|
" Args:\n",
|
|
" location: The location to get the wind speed for, in the format \"City, Country\"\n",
|
|
" Returns:\n",
|
|
" The current wind speed at the given location in the specified units, as a string.\n",
|
|
" \"\"\"\n",
|
|
" return 6 # for testing purpose, please replace with your logic\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_llm_response(message):\n",
|
|
"\n",
|
|
" messages = [\n",
|
|
" {\"role\": \"system\", \"content\": \"\"\"\n",
|
|
" You are an helpful assistant that responds to weather and wind queries. Please provide accurate answers.\n",
|
|
" Follow instructions:\n",
|
|
" 1. Please provide courteous answers.\n",
|
|
" 2. If you do not know the answer say so.\n",
|
|
" 3. Do not provide any explanations or suggestions.\n",
|
|
" 4. Provide the response in no more than 1 sentence.\n",
|
|
" \"\"\"\n",
|
|
" },\n",
|
|
" ]\n",
|
|
" message = {\"role\": \"user\", \"content\": message}\n",
|
|
"\n",
|
|
" messages.append(message)\n",
|
|
"\n",
|
|
"\n",
|
|
" inputs = tokenizer.apply_chat_template(messages, tools=[get_current_temperature, get_current_wind_speed], add_generation_prompt=True, return_tensors=\"pt\").to(\"cuda\")\n",
|
|
"\n",
|
|
" #streamer = TextStreamer(tokenizer) # include to check the output of model, only for debug\n",
|
|
" outputs = model.generate(inputs, max_new_tokens=2000 )#, streamer=streamer ) # , include to check the output of model, only for debug\n",
|
|
"\n",
|
|
" model_response = parse_response(inputs, outputs)\n",
|
|
"\n",
|
|
" tool_call = json.loads(model_response.replace(\"parameters\", \"arguments\"))\n",
|
|
" messages.append({\"role\": \"assistant\", \"tool_calls\": [{\"type\": \"function\", \"function\": tool_call}]})\n",
|
|
"\n",
|
|
" response = get_tools_response(tool_call)\n",
|
|
"\n",
|
|
" messages.append({\"role\": \"tool\", \"name\": tool_call[\"name\"], \"content\": response})\n",
|
|
"\n",
|
|
" inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors=\"pt\").to(\"cuda\")\n",
|
|
" outputs = model.generate(inputs, max_new_tokens=200)\n",
|
|
"\n",
|
|
" response = parse_response(inputs, outputs)\n",
|
|
"\n",
|
|
" return response\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "AKGLvmnsCrnk",
|
|
"outputId": "f3e5448a-76a5-47a6-a5e8-491ef25b27c2"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n",
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{\"name\": \"get_current_wind_speed\", \"parameters\": {\"location\": \"Bangalore, India\"}}\n",
|
|
"6\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The current wind speed in Bangalore is approximately 6 km/h.\n",
|
|
"####################################################################################################\n",
|
|
"Query: what is the wind speed in Bangalore?\n",
|
|
"Response: The current wind speed in Bangalore is approximately 6 km/h.\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{\"name\": \"get_current_temperature\", \"parameters\": {\"location\": \"Bangalore, India\"}}\n",
|
|
"22\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"I don't have real-time information, but Bangalore's average temperature is around 22 degrees Celsius.\n",
|
|
"####################################################################################################\n",
|
|
"Query: what is the temperature of Bangalore?\n",
|
|
"Response: I don't have real-time information, but Bangalore's average temperature is around 22 degrees Celsius.\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{\"name\": \"get_current_temperature\", \"parameters\": {\"location\": \"Delhi, India\"}}\n",
|
|
"None\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"I don't have the current temperature in Delhi.\n",
|
|
"####################################################################################################\n",
|
|
"Query: temperature in Delhi?\n",
|
|
"Response: I don't have the current temperature in Delhi.\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{\"name\": \"get_current_wind_speed\", \"parameters\": {\"location\": \"Goa\"}}\n",
|
|
"6\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The wind speed in Goa is approximately 6 km/h.\n",
|
|
"####################################################################################################\n",
|
|
"Query: wind speed in Goa?\n",
|
|
"Response: The wind speed in Goa is approximately 6 km/h.\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
|
"Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{\"name\": \"get_current_temperature\", \"parameters\": {\"location\": \"Chennai, India\"}}\n",
|
|
"None\n",
|
|
"I don't have the current temperature of Chennai.\n",
|
|
"####################################################################################################\n",
|
|
"Query: Chennai's temperature?\n",
|
|
"Response: I don't have the current temperature of Chennai.\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"\n",
|
|
"data = [\n",
|
|
" \"what is the wind speed in Bangalore?\",\n",
|
|
" \"what is the temperature of Bangalore?\",\n",
|
|
" \"temperature in Delhi?\",\n",
|
|
" \"wind speed in Goa?\",\n",
|
|
" \"Chennai's temperature?\",\n",
|
|
"]\n",
|
|
"\n",
|
|
"for query in data:\n",
|
|
" response = get_llm_response(query)\n",
|
|
" print(\"#\"*100)\n",
|
|
" print(f\"Query: {query}\\nResponse: {response}\\n\\n\")\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"accelerator": "GPU",
|
|
"colab": {
|
|
"gpuType": "T4",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
}
|