LLM_Engineering_OLD/week2/community-contributions/day5_llama3.1_tools_usecase.ipynb

{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Code tested in google colab with T4 GPU"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "bK-WkZmI_L4S"
      },
      "outputs": [],
      "source": [
        "!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Anm0jUBC_dQF"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "import requests\n",
        "from IPython.display import Markdown, display, update_display\n",
        "from openai import OpenAI\n",
        "from google.colab import drive\n",
        "from huggingface_hub import login\n",
        "from google.colab import userdata\n",
        "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
        "import torch\n",
        "import json"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "GsKyLRam_hpi"
      },
      "outputs": [],
      "source": [
        "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "AWo-zjJJAIIF"
      },
      "outputs": [],
      "source": [
        "hf_token = userdata.get('HF_TOKEN')\n",
        "login(hf_token, add_to_git_credential=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "TSeMKC4S7Ip6"
      },
      "outputs": [],
      "source": [
        "# Utility Functions ::\n",
        "\n",
        "def get_tokenizer(model_name):\n",
        "  \"\"\"\n",
        "  Args:\n",
        "    model_name: LLM model name\n",
        "\n",
        "  Returns:\n",
        "    Tokenizer\n",
        "  \"\"\"\n",
        "  tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
        "  tokenizer.pad_token = tokenizer.eos_token\n",
        "\n",
        "  return tokenizer\n",
        "\n",
        "\n",
        "def get_model(model_name):\n",
        "    \"\"\"\n",
        "    Args:\n",
        "      model_name: LLM model name\n",
        "\n",
        "    Returns:\n",
        "      Model\n",
        "    \"\"\"\n",
        "    quant_config = BitsAndBytesConfig(\n",
        "            load_in_4bit=True,\n",
        "            bnb_4bit_use_double_quant=True,\n",
        "            bnb_4bit_compute_dtype=torch.bfloat16,\n",
        "            bnb_4bit_quant_type=\"nf4\"\n",
        "            )\n",
        "\n",
        "    model = AutoModelForCausalLM.from_pretrained(model_name, device_map=\"auto\", quantization_config=quant_config)\n",
        "\n",
        "    return model\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "O_mUtibSJFWt"
      },
      "outputs": [],
      "source": [
        "tokenizer = get_tokenizer(LLAMA)\n",
        "model = get_model(LLAMA)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Mc0RE4XM_2hW"
      },
      "outputs": [],
      "source": [
        "\n",
        "def parse_response(inputs, outputs):\n",
        "  \"\"\"\n",
        "  Args:\n",
        "    inputs: Input Tokens, pt tensors\n",
        "    outputs: Model generated output token\n",
        "\n",
        "  Returns:\n",
        "    response\n",
        "  \"\"\"\n",
        "  full_input =(tokenizer.batch_decode(inputs, skip_special_tokens=True)[0])\n",
        "  full_outputs=tokenizer.batch_decode(outputs, skip_special_tokens=True)\n",
        "  response = full_outputs[0][len(full_input):]\n",
        "\n",
        "  return response\n",
        "\n",
        "# Tools Functions:\n",
        "\n",
        "def get_tools_response(tools_params):\n",
        "  function_name = tools_params['name']\n",
        "  arguments = tools_params['arguments']\n",
        "  location = arguments['location']\n",
        "\n",
        "  if function_name  == 'get_current_temperature':\n",
        "    response = get_current_temperature(location)\n",
        "    return response\n",
        "  elif function_name  == 'get_current_wind_speed':\n",
        "    response = get_current_wind_speed(location)\n",
        "    return response\n",
        "\n",
        "\n",
        "def get_current_temperature(location: str) -> float:\n",
        "    \"\"\"\n",
        "    Get the current temperature at a location.\n",
        "\n",
        "    Args:\n",
        "        location: The location to get the temperature for, in the format \"City, Country\"\n",
        "    Returns:\n",
        "        The current temperature at the specified location in the specified units, as a float.\n",
        "    \"\"\"\n",
        "    if location ==  \"Bangalore, India\":  \n",
        "      return 22 # for testing purpose, please replace with your logic\n",
        "\n",
        "\n",
        "def get_current_wind_speed(location: str) -> str:\n",
        "    \"\"\"\n",
        "    Get the current wind speed at a given location.\n",
        "\n",
        "    Args:\n",
        "        location: The location to get the wind speed for, in the format \"City, Country\"\n",
        "    Returns:\n",
        "        The current wind speed at the given location in the specified units, as a string.\n",
        "    \"\"\"\n",
        "    return 6 # for testing purpose, please replace with your logic\n",
        "\n",
        "\n",
        "def get_llm_response(message):\n",
        "\n",
        "    messages = [\n",
        "              {\"role\": \"system\", \"content\": \"\"\"\n",
        "                        You are an helpful assistant that responds to weather and wind queries. Please provide accurate answers.\n",
        "                        Follow instructions:\n",
        "                        1. Please provide courteous answers.\n",
        "                        2. If you do not know the answer say so.\n",
        "                        3. Do not provide any explanations or suggestions.\n",
        "                        4. Provide the response in no more than 1 sentence.\n",
        "                    \"\"\"\n",
        "                    },\n",
        "    ]\n",
        "    message = {\"role\": \"user\", \"content\": message}\n",
        "\n",
        "    messages.append(message)\n",
        "\n",
        "\n",
        "    inputs = tokenizer.apply_chat_template(messages, tools=[get_current_temperature, get_current_wind_speed], add_generation_prompt=True, return_tensors=\"pt\").to(\"cuda\")\n",
        "\n",
        "    #streamer = TextStreamer(tokenizer) #  include to check the output of model, only for debug\n",
        "    outputs = model.generate(inputs, max_new_tokens=2000 )#, streamer=streamer ) # , include to check the output of model, only for debug\n",
        "\n",
        "    model_response = parse_response(inputs, outputs)\n",
        "\n",
        "    tool_call = json.loads(model_response.replace(\"parameters\", \"arguments\"))\n",
        "    messages.append({\"role\": \"assistant\", \"tool_calls\": [{\"type\": \"function\", \"function\": tool_call}]})\n",
        "\n",
        "    response = get_tools_response(tool_call)\n",
        "\n",
        "    messages.append({\"role\": \"tool\", \"name\": tool_call[\"name\"], \"content\": response})\n",
        "\n",
        "    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors=\"pt\").to(\"cuda\")\n",
        "    outputs = model.generate(inputs, max_new_tokens=200)\n",
        "\n",
        "    response = parse_response(inputs, outputs)\n",
        "\n",
        "    return response\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "AKGLvmnsCrnk",
        "outputId": "f3e5448a-76a5-47a6-a5e8-491ef25b27c2"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n",
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{\"name\": \"get_current_wind_speed\", \"parameters\": {\"location\": \"Bangalore, India\"}}\n",
            "6\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "The current wind speed in Bangalore is approximately 6 km/h.\n",
            "####################################################################################################\n",
            "Query: what is the wind speed in Bangalore?\n",
            "Response: The current wind speed in Bangalore is approximately 6 km/h.\n",
            "\n",
            "\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{\"name\": \"get_current_temperature\", \"parameters\": {\"location\": \"Bangalore, India\"}}\n",
            "22\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "I don't have real-time information, but Bangalore's average temperature is around 22 degrees Celsius.\n",
            "####################################################################################################\n",
            "Query: what is the temperature of Bangalore?\n",
            "Response: I don't have real-time information, but Bangalore's average temperature is around 22 degrees Celsius.\n",
            "\n",
            "\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{\"name\": \"get_current_temperature\", \"parameters\": {\"location\": \"Delhi, India\"}}\n",
            "None\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "I don't have the current temperature in Delhi.\n",
            "####################################################################################################\n",
            "Query: temperature in Delhi?\n",
            "Response: I don't have the current temperature in Delhi.\n",
            "\n",
            "\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{\"name\": \"get_current_wind_speed\", \"parameters\": {\"location\": \"Goa\"}}\n",
            "6\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "The wind speed in Goa is approximately 6 km/h.\n",
            "####################################################################################################\n",
            "Query: wind speed in Goa?\n",
            "Response: The wind speed in Goa is approximately 6 km/h.\n",
            "\n",
            "\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{\"name\": \"get_current_temperature\", \"parameters\": {\"location\": \"Chennai, India\"}}\n",
            "None\n",
            "I don't have the current temperature of Chennai.\n",
            "####################################################################################################\n",
            "Query: Chennai's temperature?\n",
            "Response: I don't have the current temperature of Chennai.\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "\n",
        "\n",
        "data = [\n",
        "    \"what is the wind speed in Bangalore?\",\n",
        "    \"what is the temperature of Bangalore?\",\n",
        "    \"temperature in Delhi?\",\n",
        "    \"wind speed in Goa?\",\n",
        "    \"Chennai's temperature?\",\n",
        "]\n",
        "\n",
        "for query in data:\n",
        "  response = get_llm_response(query)\n",
        "  print(\"#\"*100)\n",
        "  print(f\"Query: {query}\\nResponse: {response}\\n\\n\")\n",
        "\n"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}