LLM_Engineering_OLD/week1/community-contributions/llava-For-Image-week1.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f97c7598-f571-4ea1-838c-e9158f729c3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import ollama\n",
    "import base64\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fc1393c-f0b1-4982-94a2-bfd502e85b23",
   "metadata": {},
   "outputs": [],
   "source": [
    "def encode_image(image_path):\n",
    "    with open(image_path, 'rb') as f:\n",
    "        return base64.b64encode(f.read()).decode('utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53cca1fa-6db2-4fe4-8990-ffd98423964a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# image_path = r\"C:\\Users\\LAKSHYA\\OneDrive\\Pictures\\Camera Roll\\WIN_20250614_02_46_47_Pro.jpg\"\n",
    "# image_base64 = encode_image(image_path)\n",
    "# print(image_base64[:100]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71146ccf-25af-48d3-8068-ee3c9008cebf",
   "metadata": {},
   "outputs": [],
   "source": [
    "image_list = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f8801a8-0c30-4199-a334-587096e6edeb",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee3c5d82-e530-40f5-901a-681421f21d1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def put_image():\n",
    "    global image_list\n",
    "    user_input_image = input(\"Enter image path or press enter to skip: \").strip()\n",
    "    \n",
    "    if not user_input_image:\n",
    "        print(\"No image inserted\")\n",
    "        return image_list\n",
    "\n",
    "    image_path = os.path.normpath(user_input_image)\n",
    "    \n",
    "    if not os.path.exists(image_path):\n",
    "        print(\"Image path not found! Try again or enter to leave blank\")\n",
    "        return put_image()  # Continue to allow more inputs\n",
    "        \n",
    "\n",
    "\n",
    "\n",
    "        \n",
    "    image_base64 = encode_image(image_path)\n",
    "    image_list.append(image_base64)\n",
    "    \n",
    "    # Detect file extension for MIME type\n",
    "    # ext = os.path.splitext(image_path)[-1].lower()\n",
    "    # mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else 'image/png'  # Extend if needed\n",
    "\n",
    "\n",
    "    return image_list\n",
    "    \n",
    "    # return f\"data:{mime_type};base64,{image_base64[:100]}\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "032f1abb-ca6c-4f03-bda1-1a0a62f2ec43",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt=  (\"System prompt: (You are a compassionate and intelligent visual assistant designed to help people who are blind or visually impaired. \"\n",
    "    \"Your job is to look at an image and describe it in a way that helps the user understand the scene clearly. \"\n",
    "    \"Use simple, descriptive language and avoid technical terms. Describe what is happening in the image, people's body language, clothing, facial expressions, objects, and surroundings. \"\n",
    "    \"Be vivid and precise, as if you are painting a picture with words. \"\n",
    "    \"Also, take into account any personal instructions or questions provided by the user—such as describing a specific person, activity, or object. \"\n",
    "    \"If the user includes a specific prompt, prioritize that in your description.)\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "29494db0-4770-4689-9904-8eebc4390e7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def put_prompt():\n",
    "    global prompt\n",
    "    user_input = input(\"Put new prompt: \")\n",
    "    if not user_input:\n",
    "        print(\"please enter a prompt\")\n",
    "        return put_prompt()\n",
    "    prompt += \"\\nUser: \" + user_input\n",
    "    return prompt\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d286369c-e6ef-4a20-a3a8-3563af28940a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def image_description():\n",
    "    global prompt\n",
    "\n",
    "    put_image()\n",
    "    if not image_list: \n",
    "        return \"No images available. Skipping...\"\n",
    "\n",
    "    user_prompt = put_prompt()\n",
    "    full_answer = \"\"\n",
    "\n",
    "    for chunk in ollama.generate(\n",
    "        model='llava:7b-v1.6',\n",
    "        prompt=user_prompt,\n",
    "        images=image_list,\n",
    "        stream=True\n",
    "    ):\n",
    "        content = chunk.get(\"response\", \"\")\n",
    "        print(\"\\n\\n Final Answer:\",content, end=\"\", flush=True)  # Live stream to console\n",
    "        full_answer += content\n",
    "\n",
    "    prompt += \"\\nUser: \" + user_prompt + \"\\nAssistant: \" + full_answer\n",
    "    return full_answer\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbda35a3-45ed-4509-ab41-6827eacd922c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def call_llava():\n",
    "    image_list.clear()\n",
    "    for i in range(5):\n",
    "        print(f\"\\n Iteration {i+1}\")\n",
    "        answer = image_description()\n",
    "        print(\"\\n\\n Final Answer:\", answer)\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15518865-6c59-4029-bc2d-42d313eb78bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "call_llava()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c77bd493-f893-402e-b4e3-64854e9d2e19",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}