{ "cells": [ { "cell_type": "markdown", "id": "830865a5", "metadata": {}, "source": [ "# Code Generator - Windows\n", "\n", "Re-codes day4 excercise to build on windows\n", "\n", "### Section 1: Manually Generate and Execute CPP Code" ] }, { "cell_type": "code", "execution_count": null, "id": "f8559090", "metadata": {}, "outputs": [], "source": [ "# imports\n", "\n", "import os\n", "import io\n", "import sys\n", "import json\n", "import requests\n", "from dotenv import load_dotenv\n", "from openai import OpenAI\n", "import google.generativeai\n", "import anthropic\n", "from IPython.display import Markdown, display, update_display\n", "import gradio as gr\n", "import subprocess\n", "from huggingface_hub import login, InferenceClient\n", "from transformers import AutoTokenizer\n", "import platform" ] }, { "cell_type": "code", "execution_count": null, "id": "858b5e7b", "metadata": {}, "outputs": [], "source": [ "# environment\n", "\n", "load_dotenv(override=True)\n", "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n", "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n", "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')" ] }, { "cell_type": "code", "execution_count": null, "id": "98c45ca8", "metadata": {}, "outputs": [], "source": [ "# initialize\n", "\n", "openai = OpenAI()\n", "claude = anthropic.Anthropic()\n", "hf_token = os.environ['HF_TOKEN']\n", "login(hf_token, add_to_git_credential=True)\n", "\n", "# models\n", "OPENAI_MODEL = \"gpt-4o\"\n", "CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\"\n", "CODE_QWEN_MODEL = \"Qwen/CodeQwen1.5-7B-Chat\"\n", "CODE_GEMMA_MODEL = \"Gemini/CodeGemma-7B-IT\"\n", "\n", "# huggingface inference clients\n", "# CODE_QWEN_URL = \"https://h1vdol7jxhje3mpn.us-east-1.aws.endpoints.huggingface.cloud\"\n", "CODE_QWEN_URL = \"https://mb4mgfmpql2yrady.us-east-1.aws.endpoints.huggingface.cloud\"\n", "CODE_GEMMA_URL = \"https://c5hggiyqachmgnqg.us-east-1.aws.endpoints.huggingface.cloud\"\n", "\n", "# path to your visual studio build tools VsDevCmd.bat file - initialize environment\n", "VISUAL_STUDIO_BUILD_TOOLS_PATH = \"C:\\\\Program Files (x86)\\\\Microsoft Visual Studio\\\\2022\\\\BuildTools\\\\Common7\\\\Tools\\\\VsDevCmd.bat\"\n", "\n", "# prefix of the output files, e.g., \"optimized_d4win.cpp\", \"optimized_d4win.exe\", etc.\n", "OUTPUT_FILE_NAME_BASE = \"optimized_d4win\"" ] }, { "cell_type": "code", "execution_count": null, "id": "ac5efb13", "metadata": {}, "outputs": [], "source": [ "# System message\n", "\n", "system_message = \"You are an assistant that reimplements Python code in high performance C++ for an M1 Mac. \"\n", "system_message += \"Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n", "system_message += \"The C++ response needs to produce an identical output in the fastest possible time. Keep implementations of random number generators identical so that results match exactly.\"" ] }, { "cell_type": "code", "execution_count": null, "id": "ce3ce9da", "metadata": {}, "outputs": [], "source": [ "# build user prompts and message structures for openai and claude models\n", "\n", "def user_prompt_for(python):\n", " user_prompt = \"Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. \"\n", " user_prompt += \"Respond only with C++ code; do not explain your work other than a few comments. \"\n", " user_prompt += \"Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\\n\\n\"\n", " user_prompt += python\n", " return user_prompt\n", "\n", "def messages_for(python):\n", " return [\n", " {\"role\": \"system\", \"content\": system_message},\n", " {\"role\": \"user\", \"content\": user_prompt_for(python)}\n", " ]" ] }, { "cell_type": "code", "execution_count": null, "id": "1ea427ef", "metadata": {}, "outputs": [], "source": [ "# write to a file called .cpp\n", "\n", "def write_output(cpp):\n", " code = cpp.replace(\"```cpp\",\"\").replace(\"```\",\"\")\n", " with open(f\"{OUTPUT_FILE_NAME_BASE}.cpp\", \"w\") as f:\n", " f.write(code)" ] }, { "cell_type": "code", "execution_count": null, "id": "4d1198b4", "metadata": {}, "outputs": [], "source": [ "# reade the contents of the output file called .cpp\n", "def read_output():\n", " with open(f\"{OUTPUT_FILE_NAME_BASE}.cpp\", \"r\") as f:\n", " return f.read()" ] }, { "cell_type": "code", "execution_count": null, "id": "7d3d9650", "metadata": {}, "outputs": [], "source": [ "# optimize code using openai and claude\n", "\n", "def optimize_gpt(python): \n", " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", " reply = \"\"\n", " for chunk in stream:\n", " fragment = chunk.choices[0].delta.content or \"\"\n", " reply += fragment\n", " print(fragment, end='', flush=True)\n", " write_output(reply)\n", " \n", "def optimize_claude(python):\n", " result = claude.messages.stream(\n", " model=CLAUDE_MODEL,\n", " max_tokens=2000,\n", " system=system_message,\n", " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", " )\n", " reply = \"\"\n", " with result as stream:\n", " for text in stream.text_stream:\n", " reply += text\n", " print(text, end=\"\", flush=True)\n", " write_output(reply)" ] }, { "cell_type": "code", "execution_count": null, "id": "523d1481", "metadata": {}, "outputs": [], "source": [ "# generate build c++ comiple and run commands based on your platform\n", "# Borrowed from @CloudLlama's contribution!\n", "\n", "def run_cmd(command_to_run):\n", " try:\n", " run_result = subprocess.run(command_to_run, check=True, text=True, capture_output=True)\n", " return run_result.stdout if run_result.stdout else \"SUCCESS\"\n", " except:\n", " return \"\"\n", " \n", "def c_compiler_cmd(filename_base):\n", " my_platform = platform.system()\n", " my_compiler = []\n", " \n", " try: \n", " if my_platform == \"Windows\":\n", " if os.path.isfile(VISUAL_STUDIO_BUILD_TOOLS_PATH):\n", " compile_cmd = [\"cmd\", \"/c\", VISUAL_STUDIO_BUILD_TOOLS_PATH, \"&\", \"cl\", f\"{filename_base}.cpp\"]\n", " my_compiler = [\"Windows\", \"Visual Studio Build Tools\", compile_cmd]\n", " \n", " if not my_compiler:\n", " my_compiler=[my_platform, \"Unavailable\", []]\n", " \n", " elif my_platform == \"Linux\":\n", " compile_cmd = [\"g++\", f\"{filename_base}.cpp\", \"-o\", filename_base]\n", " my_compiler = [\"Linux\", \"GCC (g++)\", compile_cmd]\n", " \n", " if not my_compiler:\n", " compile_cmd = [\"clang++\", f\"{filename_base}.cpp\", \"-o\", filename_base]\n", " my_compiler = [\"Linux\", \"Clang++\", compile_cmd]\n", " \n", " if not my_compiler:\n", " my_compiler=[my_platform, \"Unavailable\", []]\n", " \n", " elif my_platform == \"Darwin\":\n", " compile_cmd = [\"clang++\", \"-Ofast\", \"-std=c++17\", \"-march=armv8.5-a\", \"-mtune=apple-m1\", \"-mcpu=apple-m1\", \"-o\", filename_base, f\"{filename_base}.cpp\"]\n", " my_compiler = [\"Macintosh\", \"Clang++\", compile_cmd]\n", " \n", " if not my_compiler:\n", " my_compiler=[my_platform, \"Unavailable\", []]\n", " except:\n", " my_compiler=[my_platform, \"Unavailable\", []]\n", " \n", " if my_compiler:\n", " return my_compiler\n", " else:\n", " return [\"Unknown\", \"Unavailable\", []] " ] }, { "cell_type": "code", "execution_count": null, "id": "43d84759", "metadata": {}, "outputs": [], "source": [ "# compile and execute the C++ code\n", "\n", "def execute_cpp(code):\n", " filename_base = OUTPUT_FILE_NAME_BASE\n", " compiler_cmd = c_compiler_cmd(filename_base)\n", " write_output(code)\n", " \n", " try:\n", " if os.path.isfile(f\"./{filename_base}.exe\"):\n", " os.remove(f\"./{filename_base}.exe\")\n", " if os.path.isfile(f\"./{filename_base}\"):\n", " os.remove(f\"./{filename_base}\")\n", " compile_result = subprocess.run(compiler_cmd[2], check=True, text=True, capture_output=True)\n", " run_cmd = [f\"./{filename_base}\"]\n", " run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)\n", " return run_result.stdout\n", " except subprocess.CalledProcessError as e:\n", " return f\"An error occurred:\\n{e.stderr}\"" ] }, { "cell_type": "code", "execution_count": null, "id": "a658a7ba", "metadata": {}, "outputs": [], "source": [ "# simple pypton code to optimize\n", "\n", "pi = \"\"\"\n", "import time\n", "\n", "def calculate(iterations, param1, param2):\n", " result = 1.0\n", " for i in range(1, iterations+1):\n", " j = i * param1 - param2\n", " result -= (1/j)\n", " j = i * param1 + param2\n", " result += (1/j)\n", " return result\n", "\n", "start_time = time.time()\n", "result = calculate(100_000_000, 4, 1) * 4\n", "end_time = time.time()\n", "\n", "print(f\"Result: {result:.12f}\")\n", "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "876bfe68", "metadata": {}, "outputs": [], "source": [ "# execute the simple python code\n", "\n", "exec(pi)" ] }, { "cell_type": "code", "execution_count": null, "id": "98c553c8", "metadata": {}, "outputs": [], "source": [ "# optimize the simple python code with openai\n", "\n", "optimize_gpt(pi)" ] }, { "cell_type": "code", "execution_count": null, "id": "db5a1fc3", "metadata": {}, "outputs": [], "source": [ "# compile and run the openai optimized code\n", "\n", "cpp_code = read_output()\n", "execute_cpp(cpp_code)" ] }, { "cell_type": "code", "execution_count": null, "id": "3cd5c138", "metadata": {}, "outputs": [], "source": [ "# optimize the simple python code with claude\n", "\n", "optimize_claude(pi)" ] }, { "cell_type": "code", "execution_count": null, "id": "110a7c11", "metadata": {}, "outputs": [], "source": [ "# compile and run the claude optimized code\n", "\n", "cpp_code = read_output()\n", "execute_cpp(cpp_code)" ] }, { "cell_type": "code", "execution_count": null, "id": "dceaa0db", "metadata": {}, "outputs": [], "source": [ "# hard pypton code to optimize\n", "\n", "pi_hard = \"\"\"# Be careful to support large number sizes\n", "\n", "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", " value = seed\n", " while True:\n", " value = (a * value + c) % m\n", " yield value\n", " \n", "def max_subarray_sum(n, seed, min_val, max_val):\n", " lcg_gen = lcg(seed)\n", " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", " max_sum = float('-inf')\n", " for i in range(n):\n", " current_sum = 0\n", " for j in range(i, n):\n", " current_sum += random_numbers[j]\n", " if current_sum > max_sum:\n", " max_sum = current_sum\n", " return max_sum\n", "\n", "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", " total_sum = 0\n", " lcg_gen = lcg(initial_seed)\n", " for _ in range(20):\n", " seed = next(lcg_gen)\n", " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", " return total_sum\n", "\n", "# Parameters\n", "n = 10000 # Number of random numbers\n", "initial_seed = 42 # Initial seed for the LCG\n", "min_val = -10 # Minimum value of random numbers\n", "max_val = 10 # Maximum value of random numbers\n", "\n", "# Timing the function\n", "import time\n", "start_time = time.time()\n", "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", "end_time = time.time()\n", "\n", "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "371edd71", "metadata": {}, "outputs": [], "source": [ "# execute the simple python code\n", "\n", "exec(pi_hard)" ] }, { "cell_type": "code", "execution_count": null, "id": "9b0a7233", "metadata": {}, "outputs": [], "source": [ "# optimize the hard python code with openai\n", "\n", "optimize_gpt(pi_hard)" ] }, { "cell_type": "code", "execution_count": null, "id": "281e959c", "metadata": {}, "outputs": [], "source": [ "# compile and run the openai optimized code\n", "\n", "cpp_code = read_output()\n", "execute_cpp(cpp_code)" ] }, { "cell_type": "code", "execution_count": null, "id": "8d2712ce", "metadata": {}, "outputs": [], "source": [ "# optimize the hard python code with claude\n", "\n", "optimize_claude(pi_hard)" ] }, { "cell_type": "code", "execution_count": null, "id": "444549a7", "metadata": {}, "outputs": [], "source": [ "# compile and run the openai optimized code\n", "\n", "cpp_code = read_output()\n", "execute_cpp(cpp_code)" ] }, { "cell_type": "markdown", "id": "6aec3927", "metadata": {}, "source": [ "### Section 2: Using Gradio Interface to Generate and Execute CPP Code" ] }, { "cell_type": "code", "execution_count": null, "id": "6607743c", "metadata": {}, "outputs": [], "source": [ "# configure streaming responses from openai and claude\n", "\n", "def stream_gpt(python): \n", " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n", " reply = \"\"\n", " for chunk in stream:\n", " fragment = chunk.choices[0].delta.content or \"\"\n", " reply += fragment\n", " yield reply.replace('```cpp\\n','').replace('```','')\n", " \n", "def stream_claude(python):\n", " result = claude.messages.stream(\n", " model=CLAUDE_MODEL,\n", " max_tokens=2000,\n", " system=system_message,\n", " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n", " )\n", " reply = \"\"\n", " with result as stream:\n", " for text in stream.text_stream:\n", " reply += text\n", " yield reply.replace('```cpp\\n','').replace('```','')" ] }, { "cell_type": "code", "execution_count": null, "id": "052a713e", "metadata": {}, "outputs": [], "source": [ "# optimize code based on selected model\n", "\n", "def optimize(python, model):\n", " if model==\"GPT\":\n", " result = stream_gpt(python)\n", " elif model==\"Claude\":\n", " result = stream_claude(python)\n", " else:\n", " raise ValueError(\"Unknown model\")\n", " for stream_so_far in result:\n", " yield stream_so_far " ] }, { "cell_type": "code", "execution_count": null, "id": "27479cab", "metadata": {}, "outputs": [], "source": [ "# use gradio to optimize python code from the UI\n", "\n", "with gr.Blocks() as ui:\n", " with gr.Row():\n", " python = gr.Textbox(label=\"Python code:\", lines=10, value=pi)\n", " cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n", " with gr.Row():\n", " model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n", " convert = gr.Button(\"Convert code\")\n", "\n", " convert.click(optimize, inputs=[python, model], outputs=[cpp])\n", "\n", "ui.launch(inbrowser=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "7b39125a", "metadata": {}, "outputs": [], "source": [ "# execute python code\n", "\n", "def execute_python(code):\n", " try:\n", " output = io.StringIO()\n", " sys.stdout = output\n", " exec(code)\n", " finally:\n", " sys.stdout = sys.__stdout__\n", " return output.getvalue()" ] }, { "cell_type": "code", "execution_count": null, "id": "d5cb0f55", "metadata": {}, "outputs": [], "source": [ "# css to color by code type\n", "\n", "css = \"\"\"\n", ".python {background-color: #306998;}\n", ".cpp {background-color: #050;}\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "66e5ef37", "metadata": {}, "outputs": [], "source": [ "# add ability to run python code, compile and run c++ code in gradio\n", "\n", "with gr.Blocks(css=css) as ui:\n", " gr.Markdown(\"## Convert code from Python to C++\")\n", " with gr.Row():\n", " python = gr.Textbox(label=\"Python code:\", value=pi, lines=10)\n", " cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n", " with gr.Row():\n", " model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n", " with gr.Row():\n", " convert = gr.Button(\"Convert code\")\n", " with gr.Row():\n", " python_run = gr.Button(\"Run Python\")\n", " cpp_run = gr.Button(\"Run C++\")\n", " with gr.Row():\n", " python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n", " cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n", "\n", " convert.click(optimize, inputs=[python, model], outputs=[cpp])\n", " python_run.click(execute_python, inputs=[python], outputs=[python_out])\n", " cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n", "\n", "ui.launch(inbrowser=True)" ] }, { "cell_type": "markdown", "id": "b4d29c1c", "metadata": {}, "source": [ "### Section 3: Add HuggingFace-hosted Open Source Models to Gradio App" ] }, { "cell_type": "code", "execution_count": null, "id": "8ec5e4cb", "metadata": {}, "outputs": [], "source": [ "# initilize code qwen huggingface endpoint\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(CODE_QWEN_MODEL)\n", "messages = messages_for(pi)\n", "text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "4c3ac865", "metadata": {}, "outputs": [], "source": [ "# inspect the message template (text)\n", "\n", "print(text)" ] }, { "cell_type": "code", "execution_count": null, "id": "8a1de956", "metadata": {}, "outputs": [], "source": [ "# manually call the huggingface endpoint, generate c++ with code qwen\n", "\n", "client = InferenceClient(CODE_QWEN_URL, token=hf_token)\n", "stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)\n", "for r in stream:\n", " print(r.token.text, end = \"\")" ] }, { "cell_type": "code", "execution_count": null, "id": "20d310dc", "metadata": {}, "outputs": [], "source": [ "# configure streaming responses from code qwen\n", "\n", "def stream_code_qwen(python):\n", " tokenizer = AutoTokenizer.from_pretrained(CODE_QWEN_MODEL)\n", " messages = messages_for(python)\n", " text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", " client = InferenceClient(CODE_QWEN_URL, token=hf_token)\n", " stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)\n", " result = \"\"\n", " for r in stream:\n", " result += r.token.text\n", " yield result " ] }, { "cell_type": "code", "execution_count": null, "id": "1340cd15", "metadata": {}, "outputs": [], "source": [ "# update optimize function to include code qwen\n", "\n", "def optimize(python, model):\n", " if model==\"GPT\":\n", " result = stream_gpt(python)\n", " elif model==\"Claude\":\n", " result = stream_claude(python)\n", " elif model==\"CodeQwen\":\n", " result = stream_code_qwen(python)\n", " else:\n", " raise ValueError(\"Unknown model\")\n", " for stream_so_far in result:\n", " yield stream_so_far " ] }, { "cell_type": "code", "execution_count": null, "id": "6761f4ea", "metadata": {}, "outputs": [], "source": [ "# select the python code sample and return the code\n", "\n", "def select_sample_program(sample_program):\n", " if sample_program==\"pi\":\n", " return pi\n", " elif sample_program==\"pi_hard\":\n", " return pi_hard\n", " else:\n", " return \"Type your Python program here\"" ] }, { "cell_type": "code", "execution_count": null, "id": "74ecb73b", "metadata": {}, "outputs": [], "source": [ "# run the updated gradio UI with code qwen\n", "\n", "compiler_cmd = c_compiler_cmd(\"optimized\")\n", "\n", "with gr.Blocks(css=css) as ui:\n", " gr.Markdown(\"## Convert code from Python to C++\")\n", " with gr.Row():\n", " python = gr.Textbox(label=\"Python code:\", value=pi, lines=10)\n", " cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n", " with gr.Row():\n", " with gr.Column():\n", " sample_program = gr.Radio([\"pi\", \"pi_hard\"], label=\"Sample program\", value=\"pi\")\n", " model = gr.Dropdown([\"GPT\", \"Claude\", \"CodeQwen\"], label=\"Select model\", value=\"GPT\")\n", " with gr.Column():\n", " architecture = gr.Radio([compiler_cmd[0]], label=\"Architecture\", interactive=False, value=compiler_cmd[0])\n", " compiler = gr.Radio([compiler_cmd[1]], label=\"Compiler\", interactive=False, value=compiler_cmd[1])\n", " with gr.Row():\n", " convert = gr.Button(\"Convert code\")\n", " with gr.Row():\n", " python_run = gr.Button(\"Run Python\")\n", " if not compiler_cmd[1] == \"Unavailable\":\n", " cpp_run = gr.Button(\"Run C++\")\n", " else:\n", " cpp_run = gr.Button(\"No compiler to run C++\", interactive=False)\n", " with gr.Row():\n", " python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n", " cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n", "\n", " sample_program.change(select_sample_program, inputs=[sample_program], outputs=[python])\n", " convert.click(optimize, inputs=[python, model], outputs=[cpp])\n", " python_run.click(execute_python, inputs=[python], outputs=[python_out])\n", " cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n", "\n", "ui.launch(inbrowser=True)" ] } ], "metadata": { "kernelspec": { "display_name": "llms", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.12" } }, "nbformat": 4, "nbformat_minor": 5 }