{ "cells": [ { "cell_type": "markdown", "id": "44d517c6", "metadata": {}, "source": [ "# Code Documenter\n", "This application documents a code module:\n", "Generates a DocString\n", "Inserts comments where applicable\n", "\n", "Output is in TextBox and can be written to a file.\n", "\n", "Offeres model selection" ] }, { "cell_type": "code", "execution_count": null, "id": "c349e47b", "metadata": {}, "outputs": [], "source": [ "# Import Libraries\n", "\n", "from dotenv import load_dotenv\n", "import os\n", "#import requests\n", "from IPython.display import Markdown, display, update_display\n", "from openai import OpenAI\n", "import anthropic\n", "from google import genai\n", "from google.genai import types\n", "# from google.colab import drive\n", "from huggingface_hub import login\n", "#from google.colab import userdata\n", "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, TextIteratorStreamer\n", "import torch\n", "import gradio as gr\n", "#import threading " ] }, { "cell_type": "code", "execution_count": null, "id": "46caf8e5", "metadata": {}, "outputs": [], "source": [ "# Constants - model names\n", "LLAMA_MODEL = \"codellama-7b-kstack\" # we will be using LM_Studio for this model\n", "QWEN_MODEL = \"qwen2.5-coder-14b-instruct\" # we will be using LM_Studio for this model, might be too large for some systems\n", "OPENAI_MODEL = \"gpt-4o\"\n", "ANTHROPIC_MODEL = \"claude-3-5-haiku-latest\"\n", "GOOGLE_MODEL = \"gemini-2.5-pro\"\n", "model_choices = [LLAMA_MODEL, QWEN_MODEL, OPENAI_MODEL, ANTHROPIC_MODEL, GOOGLE_MODEL]" ] }, { "cell_type": "code", "execution_count": null, "id": "6df20bf1", "metadata": {}, "outputs": [], "source": [ "# Load environment variables and set up API connections\n", "load_dotenv(override=True)\n", "openai_api_key = os.getenv('OPENAI_API_KEY')\n", "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", "google_api_key = os.getenv('GOOGLE_API_KEY')\n", "hf_api_key = os.getenv('HF_API_KEY')\n", "\n", "\n", "if openai_api_key:\n", " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", " openai = OpenAI(api_key=openai_api_key)\n", "else:\n", " print(\"OpenAI API Key not set\")\n", " \n", "if anthropic_api_key:\n", " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n", " claude = anthropic.Anthropic(api_key=anthropic_api_key)\n", "else:\n", " print(\"Anthropic API Key not set\")\n", "\n", "if google_api_key:\n", " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n", " gemini = genai.Client(api_key=google_api_key) \n", "else:\n", " print(\"Google API Key not set\")\n", "\n", "if hf_api_key:\n", " print(f\"HuggingFace API Key exists and begins {hf_api_key[:7]}\")\n", " login(hf_api_key, add_to_git_credential=True)\n", "else:\n", " print(\"HuggingFace API Key not set\")\n", "\n", "# Set up LM Studio connection\n", "lm_studio_via_openai = OpenAI(base_url=\"http://127.0.0.1:1234/v1\", api_key=\"lmstudio\")" ] }, { "cell_type": "code", "execution_count": null, "id": "4c9178a6", "metadata": {}, "outputs": [], "source": [ "# Model invocation function\n", "# This function will handle the invocation of different models based on the selected model name.\n", "\n", "def invoke_model(model_name, prompt, max_tokens=1000, temperature=0.4):\n", " if model_name == OPENAI_MODEL:\n", " stream = openai.chat.completions.create(\n", " model=OPENAI_MODEL,\n", " messages=prompt,\n", " max_tokens=max_tokens,\n", " temperature=temperature,\n", " stream=True\n", " )\n", " #return response.choices[0].message.content\n", " for chunk in stream:\n", " yield chunk.choices[0].delta.content or ''\n", " \n", " elif model_name == ANTHROPIC_MODEL:\n", " #Invoke Claude model\n", " # Handle the prompt structure for Claude\n", " #print(f\"Invoking model: {model_name}\")\n", " #print(f\"System prompt: {prompt[0]['content']}\")\n", " #print(f\"User prompt: %s\", prompt[1]['content'][:100])\n", " try:\n", " # Use context manager for proper stream handling\n", " with claude.messages.stream(\n", " model=ANTHROPIC_MODEL,\n", " system=prompt[0]['content'],\n", " messages=[prompt[1]],\n", " max_tokens=max_tokens,\n", " temperature=temperature\n", " ) as stream:\n", " #print(\"Stream created successfully\")\n", " chunk_count = 0\n", " \n", " for chunk in stream.text_stream:\n", " chunk_count += 1\n", " #print(f\"Chunk {chunk_count}: {repr(chunk)}\") # Use repr to see None/empty values\n", " \n", " if chunk: # Only yield non-empty chunks\n", " yield chunk\n", " \n", " #print(f\"Stream completed. Total chunks: {chunk_count}\")\n", " \n", " except Exception as e:\n", " print(f\"Error invoking Claude model: {e}\")\n", " yield f\"Error invoking Claude model: {e}\"\n", " return\n", " \n", " elif model_name == GOOGLE_MODEL:\n", " #Invoke Gemini model\n", " # Handle the prompt structure for Gemini\n", " #print(f\"Invoking model: {model_name}\")\n", " stream = gemini.models.generate_content_stream(\n", " model=GOOGLE_MODEL,\n", " contents=prompt[1]['content'],\n", " config=types.GenerateContentConfig(\n", " temperature=temperature,\n", " maxOutputTokens=max_tokens,\n", " system_instruction=prompt[0]['content'],)\n", " )\n", " #print(\"Streaming response from Gemini...\")\n", " for chunk in stream:\n", " yield chunk.text or ''\n", " \n", " elif model_name == LLAMA_MODEL or model_name == QWEN_MODEL:\n", " # invoke LM Studio model\n", " #print(f\"Invoking model: {model_name}\")\n", " stream = lm_studio_via_openai.chat.completions.create(\n", " model=model_name,\n", " messages=prompt,\n", " max_tokens=max_tokens,\n", " temperature=temperature,\n", " stream=True\n", " )\n", " #print(\"Streaming response from LM Studio...\")\n", " #return response.choices[0].message.content\n", " #response=\"\"\n", " for chunk in stream:\n", " #response += chunk.choices[0].delta.content or ''\n", " yield chunk.choices[0].delta.content or ''\n", " else:\n", " raise ValueError(\"Unsupported model name\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d781b697", "metadata": {}, "outputs": [], "source": [ "# Save text to a selected location\n", "# This function will save the generated text to a specified file or the current directory if no file\n", "def save_text_to_selected_location(text_content):\n", " if not text_content.strip():\n", " return \"No content to save\"\n", "\n", " save_path = \"output.txt\"\n", "\n", " # If no file is selected, save to current directory\n", " try:\n", " with open(save_path, 'w', encoding='utf-8') as f:\n", " f.write(text_content)\n", " return f\"Successfully saved to: {save_path}\"\n", " except Exception as e:\n", " return f\"Error saving file: {str(e)}\"\n", " \n", "# Set up event handlers\n", "def generate_response(system_input, prompt, max_tokens, temperature, model_name):\n", " if system_input == \"Documentation\":\n", " system_prompt = \"\"\"You are an experienced coding assistant. You will identify the programming language used in a provided code snippet and generate documentation for the code. \n", " Ensure generally acceptable documentation standards are followed. Also generate short inline comments where applicable to explain complicated code. Respond ONLY with the updated code \n", " with documentation and comments, do not include any other preamble or explanation.\"\"\"\n", " elif system_input == \"Test Code Generation\":\n", " system_prompt = \"\"\"You are an experienced coding assistant. You will identify the programming language used in a provided code function and generate test code for it. \" \\\n", " \"The code should test against normal and edge cases, and ensure proper error handling:\"\"\"\n", " messages=[{\"role\": \"system\", \"content\": system_prompt}, {\"role\": \"user\", \"content\": f\"This is the code to process: ```\\n{prompt}\\n```\"}]\n", " try:\n", " acumulated_response = \"\"\n", " for chunk in invoke_model(model_name=model_name, prompt=messages, max_tokens=max_tokens, temperature=temperature):\n", " acumulated_response += chunk\n", " yield acumulated_response\n", " except Exception as e:\n", " return f\"Error: {str(e)}\"" ] }, { "cell_type": "code", "execution_count": null, "id": "a06d6af3", "metadata": {}, "outputs": [], "source": [ "with gr.Blocks() as ui:\n", " gr.Markdown(\"# Coding Assistant\\n Choose from the available models to generate responses. Choose from either Documentation generation, or Test Code generation.\")\n", " with gr.Row():\n", " system_input = gr.Dropdown(label=\"Task Type\", choices=[\"Documentation\", \"Test Code Generation\"], value=\"Documentation\", interactive=True, visible=True)\n", " with gr.Row():\n", " prompt_input= gr.Textbox(label=\"Enter your prompt\", placeholder=\"Type your prompt here...\", lines=4)\n", " response_output = gr.Textbox(label=\"Model Response\", lines=10, interactive=False)\n", " with gr.Row():\n", " max_tokens_input = gr.Slider(minimum=1, maximum=4096, value=1000, step=1, label=\"Max Tokens\")\n", " temperature_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label=\"Temperature\")\n", " model_selector = gr.Dropdown(\n", " label=\"Select Model\", \n", " choices=model_choices, \n", " value=LLAMA_MODEL, \n", " interactive=True\n", " )\n", " with gr.Row():\n", " generate_button = gr.Button(\"Generate Response\", visible=True)\n", " download_button = gr.Button(\"Download Response\", visible=True)\n", " \n", " generate_button.click(\n", " fn=generate_response,\n", " inputs=[system_input, prompt_input, max_tokens_input, temperature_input, model_selector],\n", " outputs=response_output\n", " )\n", " download_button.click(\n", " fn=save_text_to_selected_location,\n", " inputs=[response_output],\n", " outputs=None\n", " )\n", "\n", "# Launch the UI\n", "ui.launch(inbrowser=True)" ] } ], "metadata": { "kernelspec": { "display_name": "llms", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }