{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "f97c7598-f571-4ea1-838c-e9158f729c3e", "metadata": {}, "outputs": [], "source": [ "import ollama\n", "import base64\n", "import os" ] }, { "cell_type": "code", "execution_count": null, "id": "9fc1393c-f0b1-4982-94a2-bfd502e85b23", "metadata": {}, "outputs": [], "source": [ "def encode_image(image_path):\n", " with open(image_path, 'rb') as f:\n", " return base64.b64encode(f.read()).decode('utf-8')" ] }, { "cell_type": "code", "execution_count": null, "id": "53cca1fa-6db2-4fe4-8990-ffd98423964a", "metadata": {}, "outputs": [], "source": [ "# image_path = r\"C:\\Users\\LAKSHYA\\OneDrive\\Pictures\\Camera Roll\\WIN_20250614_02_46_47_Pro.jpg\"\n", "# image_base64 = encode_image(image_path)\n", "# print(image_base64[:100]) " ] }, { "cell_type": "code", "execution_count": null, "id": "71146ccf-25af-48d3-8068-ee3c9008cebf", "metadata": {}, "outputs": [], "source": [ "image_list = []" ] }, { "cell_type": "code", "execution_count": null, "id": "6f8801a8-0c30-4199-a334-587096e6edeb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ee3c5d82-e530-40f5-901a-681421f21d1e", "metadata": {}, "outputs": [], "source": [ "def put_image():\n", " global image_list\n", " user_input_image = input(\"Enter image path or press enter to skip: \").strip()\n", " \n", " if not user_input_image:\n", " print(\"No image inserted\")\n", " return image_list\n", "\n", " image_path = os.path.normpath(user_input_image)\n", " \n", " if not os.path.exists(image_path):\n", " print(\"Image path not found! Try again or enter to leave blank\")\n", " return put_image() # Continue to allow more inputs\n", " \n", "\n", "\n", "\n", " \n", " image_base64 = encode_image(image_path)\n", " image_list.append(image_base64)\n", " \n", " # Detect file extension for MIME type\n", " # ext = os.path.splitext(image_path)[-1].lower()\n", " # mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else 'image/png' # Extend if needed\n", "\n", "\n", " return image_list\n", " \n", " # return f\"data:{mime_type};base64,{image_base64[:100]}\"\n" ] }, { "cell_type": "code", "execution_count": null, "id": "032f1abb-ca6c-4f03-bda1-1a0a62f2ec43", "metadata": {}, "outputs": [], "source": [ "prompt= (\"System prompt: (You are a compassionate and intelligent visual assistant designed to help people who are blind or visually impaired. \"\n", " \"Your job is to look at an image and describe it in a way that helps the user understand the scene clearly. \"\n", " \"Use simple, descriptive language and avoid technical terms. Describe what is happening in the image, people's body language, clothing, facial expressions, objects, and surroundings. \"\n", " \"Be vivid and precise, as if you are painting a picture with words. \"\n", " \"Also, take into account any personal instructions or questions provided by the user—such as describing a specific person, activity, or object. \"\n", " \"If the user includes a specific prompt, prioritize that in your description.)\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "29494db0-4770-4689-9904-8eebc4390e7c", "metadata": {}, "outputs": [], "source": [ "def put_prompt():\n", " global prompt\n", " user_input = input(\"Put new prompt: \")\n", " if not user_input:\n", " print(\"please enter a prompt\")\n", " return put_prompt()\n", " prompt += \"\\nUser: \" + user_input\n", " return prompt\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d286369c-e6ef-4a20-a3a8-3563af28940a", "metadata": {}, "outputs": [], "source": [ "def image_description():\n", " global prompt\n", "\n", " put_image()\n", " if not image_list: \n", " return \"No images available. Skipping...\"\n", "\n", " user_prompt = put_prompt()\n", " full_answer = \"\"\n", "\n", " for chunk in ollama.generate(\n", " model='llava:7b-v1.6',\n", " prompt=user_prompt,\n", " images=image_list,\n", " stream=True\n", " ):\n", " content = chunk.get(\"response\", \"\")\n", " print(\"\\n\\n Final Answer:\",content, end=\"\", flush=True) # Live stream to console\n", " full_answer += content\n", "\n", " prompt += \"\\nUser: \" + user_prompt + \"\\nAssistant: \" + full_answer\n", " return full_answer\n" ] }, { "cell_type": "code", "execution_count": null, "id": "cbda35a3-45ed-4509-ab41-6827eacd922c", "metadata": {}, "outputs": [], "source": [ "def call_llava():\n", " image_list.clear()\n", " for i in range(5):\n", " print(f\"\\n Iteration {i+1}\")\n", " answer = image_description()\n", " print(\"\\n\\n Final Answer:\", answer)\n", " \n" ] }, { "cell_type": "code", "execution_count": null, "id": "15518865-6c59-4029-bc2d-42d313eb78bc", "metadata": {}, "outputs": [], "source": [ "call_llava()" ] }, { "cell_type": "code", "execution_count": null, "id": "c77bd493-f893-402e-b4e3-64854e9d2e19", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }