diff --git a/week4/community-contributions/week4-lchanio-code-documenter.ipynb b/week4/community-contributions/week4-lchanio-code-documenter.ipynb new file mode 100644 index 0000000..aee045d --- /dev/null +++ b/week4/community-contributions/week4-lchanio-code-documenter.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "44d517c6", + "metadata": {}, + "source": [ + "# Code Documenter\n", + "This application documents a code module:\n", + "Generates a DocString\n", + "Inserts comments where applicable\n", + "\n", + "Output is in TextBox and can be written to a file.\n", + "\n", + "Offeres model selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c349e47b", + "metadata": {}, + "outputs": [], + "source": [ + "# Import Libraries\n", + "\n", + "from dotenv import load_dotenv\n", + "import os\n", + "#import requests\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "import anthropic\n", + "from google import genai\n", + "from google.genai import types\n", + "# from google.colab import drive\n", + "from huggingface_hub import login\n", + "#from google.colab import userdata\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, TextIteratorStreamer\n", + "import torch\n", + "import gradio as gr\n", + "#import threading " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46caf8e5", + "metadata": {}, + "outputs": [], + "source": [ + "# Constants - model names\n", + "LLAMA_MODEL = \"codellama-7b-kstack\" # we will be using LM_Studio for this model\n", + "QWEN_MODEL = \"qwen2.5-coder-14b-instruct\" # we will be using LM_Studio for this model, might be too large for some systems\n", + "OPENAI_MODEL = \"gpt-4o\"\n", + "ANTHROPIC_MODEL = \"claude-3-5-haiku-latest\"\n", + "GOOGLE_MODEL = \"gemini-2.5-pro\"\n", + "model_choices = [LLAMA_MODEL, QWEN_MODEL, OPENAI_MODEL, ANTHROPIC_MODEL, GOOGLE_MODEL]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6df20bf1", + "metadata": {}, + "outputs": [], + "source": [ + "# Load environment variables and set up API connections\n", + "load_dotenv(override=True)\n", + "openai_api_key = os.getenv('OPENAI_API_KEY')\n", + "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n", + "google_api_key = os.getenv('GOOGLE_API_KEY')\n", + "hf_api_key = os.getenv('HF_API_KEY')\n", + "\n", + "\n", + "if openai_api_key:\n", + " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n", + " openai = OpenAI(api_key=openai_api_key)\n", + "else:\n", + " print(\"OpenAI API Key not set\")\n", + " \n", + "if anthropic_api_key:\n", + " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n", + " claude = anthropic.Anthropic(api_key=anthropic_api_key)\n", + "else:\n", + " print(\"Anthropic API Key not set\")\n", + "\n", + "if google_api_key:\n", + " print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n", + " gemini = genai.Client(api_key=google_api_key) \n", + "else:\n", + " print(\"Google API Key not set\")\n", + "\n", + "if hf_api_key:\n", + " print(f\"HuggingFace API Key exists and begins {hf_api_key[:7]}\")\n", + " login(hf_api_key, add_to_git_credential=True)\n", + "else:\n", + " print(\"HuggingFace API Key not set\")\n", + "\n", + "# Set up LM Studio connection\n", + "lm_studio_via_openai = OpenAI(base_url=\"http://127.0.0.1:1234/v1\", api_key=\"lmstudio\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c9178a6", + "metadata": {}, + "outputs": [], + "source": [ + "# Model invocation function\n", + "# This function will handle the invocation of different models based on the selected model name.\n", + "\n", + "def invoke_model(model_name, prompt, max_tokens=1000, temperature=0.4):\n", + " if model_name == OPENAI_MODEL:\n", + " stream = openai.chat.completions.create(\n", + " model=OPENAI_MODEL,\n", + " messages=prompt,\n", + " max_tokens=max_tokens,\n", + " temperature=temperature,\n", + " stream=True\n", + " )\n", + " #return response.choices[0].message.content\n", + " for chunk in stream:\n", + " yield chunk.choices[0].delta.content or ''\n", + " \n", + " elif model_name == ANTHROPIC_MODEL:\n", + " #Invoke Claude model\n", + " # Handle the prompt structure for Claude\n", + " #print(f\"Invoking model: {model_name}\")\n", + " #print(f\"System prompt: {prompt[0]['content']}\")\n", + " #print(f\"User prompt: %s\", prompt[1]['content'][:100])\n", + " try:\n", + " # Use context manager for proper stream handling\n", + " with claude.messages.stream(\n", + " model=ANTHROPIC_MODEL,\n", + " system=prompt[0]['content'],\n", + " messages=[prompt[1]],\n", + " max_tokens=max_tokens,\n", + " temperature=temperature\n", + " ) as stream:\n", + " #print(\"Stream created successfully\")\n", + " chunk_count = 0\n", + " \n", + " for chunk in stream.text_stream:\n", + " chunk_count += 1\n", + " #print(f\"Chunk {chunk_count}: {repr(chunk)}\") # Use repr to see None/empty values\n", + " \n", + " if chunk: # Only yield non-empty chunks\n", + " yield chunk\n", + " \n", + " #print(f\"Stream completed. Total chunks: {chunk_count}\")\n", + " \n", + " except Exception as e:\n", + " print(f\"Error invoking Claude model: {e}\")\n", + " yield f\"Error invoking Claude model: {e}\"\n", + " return\n", + " \n", + " elif model_name == GOOGLE_MODEL:\n", + " #Invoke Gemini model\n", + " # Handle the prompt structure for Gemini\n", + " #print(f\"Invoking model: {model_name}\")\n", + " stream = gemini.models.generate_content_stream(\n", + " model=GOOGLE_MODEL,\n", + " contents=prompt[1]['content'],\n", + " config=types.GenerateContentConfig(\n", + " temperature=temperature,\n", + " maxOutputTokens=max_tokens,\n", + " system_instruction=prompt[0]['content'],)\n", + " )\n", + " #print(\"Streaming response from Gemini...\")\n", + " for chunk in stream:\n", + " yield chunk.text or ''\n", + " \n", + " elif model_name == LLAMA_MODEL or model_name == QWEN_MODEL:\n", + " # invoke LM Studio model\n", + " #print(f\"Invoking model: {model_name}\")\n", + " stream = lm_studio_via_openai.chat.completions.create(\n", + " model=model_name,\n", + " messages=prompt,\n", + " max_tokens=max_tokens,\n", + " temperature=temperature,\n", + " stream=True\n", + " )\n", + " #print(\"Streaming response from LM Studio...\")\n", + " #return response.choices[0].message.content\n", + " #response=\"\"\n", + " for chunk in stream:\n", + " #response += chunk.choices[0].delta.content or ''\n", + " yield chunk.choices[0].delta.content or ''\n", + " else:\n", + " raise ValueError(\"Unsupported model name\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d781b697", + "metadata": {}, + "outputs": [], + "source": [ + "# Save text to a selected location\n", + "# This function will save the generated text to a specified file or the current directory if no file\n", + "def save_text_to_selected_location(text_content):\n", + " if not text_content.strip():\n", + " return \"No content to save\"\n", + "\n", + " save_path = \"output.txt\"\n", + "\n", + " # If no file is selected, save to current directory\n", + " try:\n", + " with open(save_path, 'w', encoding='utf-8') as f:\n", + " f.write(text_content)\n", + " return f\"Successfully saved to: {save_path}\"\n", + " except Exception as e:\n", + " return f\"Error saving file: {str(e)}\"\n", + " \n", + "# Set up event handlers\n", + "def generate_response(system_input, prompt, max_tokens, temperature, model_name):\n", + " if system_input == \"Documentation\":\n", + " system_prompt = \"\"\"You are an experienced coding assistant. You will identify the programming language used in a provided code snippet and generate documentation for the code. \n", + " Ensure generally acceptable documentation standards are followed. Also generate short inline comments where applicable to explain complicated code. Respond ONLY with the updated code \n", + " with documentation and comments, do not include any other preamble or explanation.\"\"\"\n", + " elif system_input == \"Test Code Generation\":\n", + " system_prompt = \"\"\"You are an experienced coding assistant. You will identify the programming language used in a provided code function and generate test code for it. \" \\\n", + " \"The code should test against normal and edge cases, and ensure proper error handling:\"\"\"\n", + " messages=[{\"role\": \"system\", \"content\": system_prompt}, {\"role\": \"user\", \"content\": f\"This is the code to process: ```\\n{prompt}\\n```\"}]\n", + " try:\n", + " acumulated_response = \"\"\n", + " for chunk in invoke_model(model_name=model_name, prompt=messages, max_tokens=max_tokens, temperature=temperature):\n", + " acumulated_response += chunk\n", + " yield acumulated_response\n", + " except Exception as e:\n", + " return f\"Error: {str(e)}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a06d6af3", + "metadata": {}, + "outputs": [], + "source": [ + "with gr.Blocks() as ui:\n", + " gr.Markdown(\"# Coding Assistant\\n Choose from the available models to generate responses. Choose from either Documentation generation, or Test Code generation.\")\n", + " with gr.Row():\n", + " system_input = gr.Dropdown(label=\"Task Type\", choices=[\"Documentation\", \"Test Code Generation\"], value=\"Documentation\", interactive=True, visible=True)\n", + " with gr.Row():\n", + " prompt_input= gr.Textbox(label=\"Enter your prompt\", placeholder=\"Type your prompt here...\", lines=4)\n", + " response_output = gr.Textbox(label=\"Model Response\", lines=10, interactive=False)\n", + " with gr.Row():\n", + " max_tokens_input = gr.Slider(minimum=1, maximum=4096, value=1000, step=1, label=\"Max Tokens\")\n", + " temperature_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label=\"Temperature\")\n", + " model_selector = gr.Dropdown(\n", + " label=\"Select Model\", \n", + " choices=model_choices, \n", + " value=LLAMA_MODEL, \n", + " interactive=True\n", + " )\n", + " with gr.Row():\n", + " generate_button = gr.Button(\"Generate Response\", visible=True)\n", + " download_button = gr.Button(\"Download Response\", visible=True)\n", + " \n", + " generate_button.click(\n", + " fn=generate_response,\n", + " inputs=[system_input, prompt_input, max_tokens_input, temperature_input, model_selector],\n", + " outputs=response_output\n", + " )\n", + " download_button.click(\n", + " fn=save_text_to_selected_location,\n", + " inputs=[response_output],\n", + " outputs=None\n", + " )\n", + "\n", + "# Launch the UI\n", + "ui.launch(inbrowser=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llms", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}