215 lines
6.1 KiB
Plaintext
215 lines
6.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f97c7598-f571-4ea1-838c-e9158f729c3e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import ollama\n",
|
|
"import base64\n",
|
|
"import os"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9fc1393c-f0b1-4982-94a2-bfd502e85b23",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def encode_image(image_path):\n",
|
|
" with open(image_path, 'rb') as f:\n",
|
|
" return base64.b64encode(f.read()).decode('utf-8')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "53cca1fa-6db2-4fe4-8990-ffd98423964a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# image_path = r\"C:\\Users\\LAKSHYA\\OneDrive\\Pictures\\Camera Roll\\WIN_20250614_02_46_47_Pro.jpg\"\n",
|
|
"# image_base64 = encode_image(image_path)\n",
|
|
"# print(image_base64[:100]) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "71146ccf-25af-48d3-8068-ee3c9008cebf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"image_list = []"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6f8801a8-0c30-4199-a334-587096e6edeb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ee3c5d82-e530-40f5-901a-681421f21d1e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def put_image():\n",
|
|
" global image_list\n",
|
|
" user_input_image = input(\"Enter image path or press enter to skip: \").strip()\n",
|
|
" \n",
|
|
" if not user_input_image:\n",
|
|
" print(\"No image inserted\")\n",
|
|
" return image_list\n",
|
|
"\n",
|
|
" image_path = os.path.normpath(user_input_image)\n",
|
|
" \n",
|
|
" if not os.path.exists(image_path):\n",
|
|
" print(\"Image path not found! Try again or enter to leave blank\")\n",
|
|
" return put_image() # Continue to allow more inputs\n",
|
|
" \n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
" \n",
|
|
" image_base64 = encode_image(image_path)\n",
|
|
" image_list.append(image_base64)\n",
|
|
" \n",
|
|
" # Detect file extension for MIME type\n",
|
|
" # ext = os.path.splitext(image_path)[-1].lower()\n",
|
|
" # mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else 'image/png' # Extend if needed\n",
|
|
"\n",
|
|
"\n",
|
|
" return image_list\n",
|
|
" \n",
|
|
" # return f\"data:{mime_type};base64,{image_base64[:100]}\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "032f1abb-ca6c-4f03-bda1-1a0a62f2ec43",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prompt= (\"System prompt: (You are a compassionate and intelligent visual assistant designed to help people who are blind or visually impaired. \"\n",
|
|
" \"Your job is to look at an image and describe it in a way that helps the user understand the scene clearly. \"\n",
|
|
" \"Use simple, descriptive language and avoid technical terms. Describe what is happening in the image, people's body language, clothing, facial expressions, objects, and surroundings. \"\n",
|
|
" \"Be vivid and precise, as if you are painting a picture with words. \"\n",
|
|
" \"Also, take into account any personal instructions or questions provided by the user—such as describing a specific person, activity, or object. \"\n",
|
|
" \"If the user includes a specific prompt, prioritize that in your description.)\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "29494db0-4770-4689-9904-8eebc4390e7c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def put_prompt():\n",
|
|
" global prompt\n",
|
|
" user_input = input(\"Put new prompt: \")\n",
|
|
" if not user_input:\n",
|
|
" print(\"please enter a prompt\")\n",
|
|
" return put_prompt()\n",
|
|
" prompt += \"\\nUser: \" + user_input\n",
|
|
" return prompt\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d286369c-e6ef-4a20-a3a8-3563af28940a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def image_description():\n",
|
|
" global prompt\n",
|
|
"\n",
|
|
" put_image()\n",
|
|
" if not image_list: \n",
|
|
" return \"No images available. Skipping...\"\n",
|
|
"\n",
|
|
" user_prompt = put_prompt()\n",
|
|
" full_answer = \"\"\n",
|
|
"\n",
|
|
" for chunk in ollama.generate(\n",
|
|
" model='llava:7b-v1.6',\n",
|
|
" prompt=user_prompt,\n",
|
|
" images=image_list,\n",
|
|
" stream=True\n",
|
|
" ):\n",
|
|
" content = chunk.get(\"response\", \"\")\n",
|
|
" print(\"\\n\\n Final Answer:\",content, end=\"\", flush=True) # Live stream to console\n",
|
|
" full_answer += content\n",
|
|
"\n",
|
|
" prompt += \"\\nUser: \" + user_prompt + \"\\nAssistant: \" + full_answer\n",
|
|
" return full_answer\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cbda35a3-45ed-4509-ab41-6827eacd922c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def call_llava():\n",
|
|
" image_list.clear()\n",
|
|
" for i in range(5):\n",
|
|
" print(f\"\\n Iteration {i+1}\")\n",
|
|
" answer = image_description()\n",
|
|
" print(\"\\n\\n Final Answer:\", answer)\n",
|
|
" \n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "15518865-6c59-4029-bc2d-42d313eb78bc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"call_llava()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c77bd493-f893-402e-b4e3-64854e9d2e19",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.13"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|