800 lines
25 KiB
Plaintext
800 lines
25 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "830865a5",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Code Generator - Windows\n",
|
|
"\n",
|
|
"Re-codes day4 excercise to build on windows\n",
|
|
"\n",
|
|
"### Section 1: Manually Generate and Execute CPP Code"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f8559090",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"\n",
|
|
"import os\n",
|
|
"import io\n",
|
|
"import sys\n",
|
|
"import json\n",
|
|
"import requests\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from openai import OpenAI\n",
|
|
"import google.generativeai\n",
|
|
"import anthropic\n",
|
|
"from IPython.display import Markdown, display, update_display\n",
|
|
"import gradio as gr\n",
|
|
"import subprocess\n",
|
|
"from huggingface_hub import login, InferenceClient\n",
|
|
"from transformers import AutoTokenizer\n",
|
|
"import platform"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "858b5e7b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# environment\n",
|
|
"\n",
|
|
"load_dotenv(override=True)\n",
|
|
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
|
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
|
|
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "98c45ca8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# initialize\n",
|
|
"\n",
|
|
"openai = OpenAI()\n",
|
|
"claude = anthropic.Anthropic()\n",
|
|
"hf_token = os.environ['HF_TOKEN']\n",
|
|
"login(hf_token, add_to_git_credential=True)\n",
|
|
"\n",
|
|
"# models\n",
|
|
"OPENAI_MODEL = \"gpt-4o\"\n",
|
|
"CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\"\n",
|
|
"CODE_QWEN_MODEL = \"Qwen/CodeQwen1.5-7B-Chat\"\n",
|
|
"CODE_GEMMA_MODEL = \"Gemini/CodeGemma-7B-IT\"\n",
|
|
"\n",
|
|
"# huggingface inference clients\n",
|
|
"# CODE_QWEN_URL = \"https://h1vdol7jxhje3mpn.us-east-1.aws.endpoints.huggingface.cloud\"\n",
|
|
"CODE_QWEN_URL = \"https://mb4mgfmpql2yrady.us-east-1.aws.endpoints.huggingface.cloud\"\n",
|
|
"CODE_GEMMA_URL = \"https://c5hggiyqachmgnqg.us-east-1.aws.endpoints.huggingface.cloud\"\n",
|
|
"\n",
|
|
"# path to your visual studio build tools VsDevCmd.bat file - initialize environment\n",
|
|
"VISUAL_STUDIO_BUILD_TOOLS_PATH = \"C:\\\\Program Files (x86)\\\\Microsoft Visual Studio\\\\2022\\\\BuildTools\\\\Common7\\\\Tools\\\\VsDevCmd.bat\"\n",
|
|
"\n",
|
|
"# prefix of the output files, e.g., \"optimized_d4win.cpp\", \"optimized_d4win.exe\", etc.\n",
|
|
"OUTPUT_FILE_NAME_BASE = \"optimized_d4win\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ac5efb13",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# System message\n",
|
|
"\n",
|
|
"system_message = \"You are an assistant that reimplements Python code in high performance C++ for an M1 Mac. \"\n",
|
|
"system_message += \"Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n",
|
|
"system_message += \"The C++ response needs to produce an identical output in the fastest possible time. Keep implementations of random number generators identical so that results match exactly.\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ce3ce9da",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# build user prompts and message structures for openai and claude models\n",
|
|
"\n",
|
|
"def user_prompt_for(python):\n",
|
|
" user_prompt = \"Rewrite this Python code in C++ with the fastest possible implementation that produces identical output in the least time. \"\n",
|
|
" user_prompt += \"Respond only with C++ code; do not explain your work other than a few comments. \"\n",
|
|
" user_prompt += \"Pay attention to number types to ensure no int overflows. Remember to #include all necessary C++ packages such as iomanip.\\n\\n\"\n",
|
|
" user_prompt += python\n",
|
|
" return user_prompt\n",
|
|
"\n",
|
|
"def messages_for(python):\n",
|
|
" return [\n",
|
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\"role\": \"user\", \"content\": user_prompt_for(python)}\n",
|
|
" ]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1ea427ef",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# write to a file called <OUTPUT_FILE_NAME>.cpp\n",
|
|
"\n",
|
|
"def write_output(cpp):\n",
|
|
" code = cpp.replace(\"```cpp\",\"\").replace(\"```\",\"\")\n",
|
|
" with open(f\"{OUTPUT_FILE_NAME_BASE}.cpp\", \"w\") as f:\n",
|
|
" f.write(code)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4d1198b4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# reade the contents of the output file called <OUTPUT_FILE_NAME>.cpp\n",
|
|
"def read_output():\n",
|
|
" with open(f\"{OUTPUT_FILE_NAME_BASE}.cpp\", \"r\") as f:\n",
|
|
" return f.read()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7d3d9650",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# optimize code using openai and claude\n",
|
|
"\n",
|
|
"def optimize_gpt(python): \n",
|
|
" stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
|
|
" reply = \"\"\n",
|
|
" for chunk in stream:\n",
|
|
" fragment = chunk.choices[0].delta.content or \"\"\n",
|
|
" reply += fragment\n",
|
|
" print(fragment, end='', flush=True)\n",
|
|
" write_output(reply)\n",
|
|
" \n",
|
|
"def optimize_claude(python):\n",
|
|
" result = claude.messages.stream(\n",
|
|
" model=CLAUDE_MODEL,\n",
|
|
" max_tokens=2000,\n",
|
|
" system=system_message,\n",
|
|
" messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
|
|
" )\n",
|
|
" reply = \"\"\n",
|
|
" with result as stream:\n",
|
|
" for text in stream.text_stream:\n",
|
|
" reply += text\n",
|
|
" print(text, end=\"\", flush=True)\n",
|
|
" write_output(reply)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "523d1481",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# generate build c++ comiple and run commands based on your platform\n",
|
|
"# Borrowed from @CloudLlama's contribution!\n",
|
|
"\n",
|
|
"def run_cmd(command_to_run):\n",
|
|
" try:\n",
|
|
" run_result = subprocess.run(command_to_run, check=True, text=True, capture_output=True)\n",
|
|
" return run_result.stdout if run_result.stdout else \"SUCCESS\"\n",
|
|
" except:\n",
|
|
" return \"\"\n",
|
|
" \n",
|
|
"def c_compiler_cmd(filename_base):\n",
|
|
" my_platform = platform.system()\n",
|
|
" my_compiler = []\n",
|
|
" \n",
|
|
" try: \n",
|
|
" if my_platform == \"Windows\":\n",
|
|
" if os.path.isfile(VISUAL_STUDIO_BUILD_TOOLS_PATH):\n",
|
|
" compile_cmd = [\"cmd\", \"/c\", VISUAL_STUDIO_BUILD_TOOLS_PATH, \"&\", \"cl\", f\"{filename_base}.cpp\"]\n",
|
|
" my_compiler = [\"Windows\", \"Visual Studio Build Tools\", compile_cmd]\n",
|
|
" \n",
|
|
" if not my_compiler:\n",
|
|
" my_compiler=[my_platform, \"Unavailable\", []]\n",
|
|
" \n",
|
|
" elif my_platform == \"Linux\":\n",
|
|
" compile_cmd = [\"g++\", f\"{filename_base}.cpp\", \"-o\", filename_base]\n",
|
|
" my_compiler = [\"Linux\", \"GCC (g++)\", compile_cmd]\n",
|
|
" \n",
|
|
" if not my_compiler:\n",
|
|
" compile_cmd = [\"clang++\", f\"{filename_base}.cpp\", \"-o\", filename_base]\n",
|
|
" my_compiler = [\"Linux\", \"Clang++\", compile_cmd]\n",
|
|
" \n",
|
|
" if not my_compiler:\n",
|
|
" my_compiler=[my_platform, \"Unavailable\", []]\n",
|
|
" \n",
|
|
" elif my_platform == \"Darwin\":\n",
|
|
" compile_cmd = [\"clang++\", \"-Ofast\", \"-std=c++17\", \"-march=armv8.5-a\", \"-mtune=apple-m1\", \"-mcpu=apple-m1\", \"-o\", filename_base, f\"{filename_base}.cpp\"]\n",
|
|
" my_compiler = [\"Macintosh\", \"Clang++\", compile_cmd]\n",
|
|
" \n",
|
|
" if not my_compiler:\n",
|
|
" my_compiler=[my_platform, \"Unavailable\", []]\n",
|
|
" except:\n",
|
|
" my_compiler=[my_platform, \"Unavailable\", []]\n",
|
|
" \n",
|
|
" if my_compiler:\n",
|
|
" return my_compiler\n",
|
|
" else:\n",
|
|
" return [\"Unknown\", \"Unavailable\", []] "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "43d84759",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# compile and execute the C++ code\n",
|
|
"\n",
|
|
"def execute_cpp(code):\n",
|
|
" filename_base = OUTPUT_FILE_NAME_BASE\n",
|
|
" compiler_cmd = c_compiler_cmd(filename_base)\n",
|
|
" write_output(code)\n",
|
|
" \n",
|
|
" try:\n",
|
|
" if os.path.isfile(f\"./{filename_base}.exe\"):\n",
|
|
" os.remove(f\"./{filename_base}.exe\")\n",
|
|
" if os.path.isfile(f\"./{filename_base}\"):\n",
|
|
" os.remove(f\"./{filename_base}\")\n",
|
|
" compile_result = subprocess.run(compiler_cmd[2], check=True, text=True, capture_output=True)\n",
|
|
" run_cmd = [f\"./{filename_base}\"]\n",
|
|
" run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)\n",
|
|
" return run_result.stdout\n",
|
|
" except subprocess.CalledProcessError as e:\n",
|
|
" return f\"An error occurred:\\n{e.stderr}\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a658a7ba",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# simple pypton code to optimize\n",
|
|
"\n",
|
|
"pi = \"\"\"\n",
|
|
"import time\n",
|
|
"\n",
|
|
"def calculate(iterations, param1, param2):\n",
|
|
" result = 1.0\n",
|
|
" for i in range(1, iterations+1):\n",
|
|
" j = i * param1 - param2\n",
|
|
" result -= (1/j)\n",
|
|
" j = i * param1 + param2\n",
|
|
" result += (1/j)\n",
|
|
" return result\n",
|
|
"\n",
|
|
"start_time = time.time()\n",
|
|
"result = calculate(100_000_000, 4, 1) * 4\n",
|
|
"end_time = time.time()\n",
|
|
"\n",
|
|
"print(f\"Result: {result:.12f}\")\n",
|
|
"print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "876bfe68",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# execute the simple python code\n",
|
|
"\n",
|
|
"exec(pi)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "98c553c8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# optimize the simple python code with openai\n",
|
|
"\n",
|
|
"optimize_gpt(pi)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "db5a1fc3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# compile and run the openai optimized code\n",
|
|
"\n",
|
|
"cpp_code = read_output()\n",
|
|
"execute_cpp(cpp_code)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3cd5c138",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# optimize the simple python code with claude\n",
|
|
"\n",
|
|
"optimize_claude(pi)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "110a7c11",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# compile and run the claude optimized code\n",
|
|
"\n",
|
|
"cpp_code = read_output()\n",
|
|
"execute_cpp(cpp_code)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dceaa0db",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# hard pypton code to optimize\n",
|
|
"\n",
|
|
"pi_hard = \"\"\"# Be careful to support large number sizes\n",
|
|
"\n",
|
|
"def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
|
|
" value = seed\n",
|
|
" while True:\n",
|
|
" value = (a * value + c) % m\n",
|
|
" yield value\n",
|
|
" \n",
|
|
"def max_subarray_sum(n, seed, min_val, max_val):\n",
|
|
" lcg_gen = lcg(seed)\n",
|
|
" random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
|
|
" max_sum = float('-inf')\n",
|
|
" for i in range(n):\n",
|
|
" current_sum = 0\n",
|
|
" for j in range(i, n):\n",
|
|
" current_sum += random_numbers[j]\n",
|
|
" if current_sum > max_sum:\n",
|
|
" max_sum = current_sum\n",
|
|
" return max_sum\n",
|
|
"\n",
|
|
"def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
|
|
" total_sum = 0\n",
|
|
" lcg_gen = lcg(initial_seed)\n",
|
|
" for _ in range(20):\n",
|
|
" seed = next(lcg_gen)\n",
|
|
" total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
|
|
" return total_sum\n",
|
|
"\n",
|
|
"# Parameters\n",
|
|
"n = 10000 # Number of random numbers\n",
|
|
"initial_seed = 42 # Initial seed for the LCG\n",
|
|
"min_val = -10 # Minimum value of random numbers\n",
|
|
"max_val = 10 # Maximum value of random numbers\n",
|
|
"\n",
|
|
"# Timing the function\n",
|
|
"import time\n",
|
|
"start_time = time.time()\n",
|
|
"result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
|
|
"end_time = time.time()\n",
|
|
"\n",
|
|
"print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
|
|
"print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "371edd71",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# execute the simple python code\n",
|
|
"\n",
|
|
"exec(pi_hard)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9b0a7233",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# optimize the hard python code with openai\n",
|
|
"\n",
|
|
"optimize_gpt(pi_hard)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "281e959c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# compile and run the openai optimized code\n",
|
|
"\n",
|
|
"cpp_code = read_output()\n",
|
|
"execute_cpp(cpp_code)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8d2712ce",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# optimize the hard python code with claude\n",
|
|
"\n",
|
|
"optimize_claude(pi_hard)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "444549a7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# compile and run the openai optimized code\n",
|
|
"\n",
|
|
"cpp_code = read_output()\n",
|
|
"execute_cpp(cpp_code)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "6aec3927",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Section 2: Using Gradio Interface to Generate and Execute CPP Code"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6607743c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# configure streaming responses from openai and claude\n",
|
|
"\n",
|
|
"def stream_gpt(python): \n",
|
|
" stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
|
|
" reply = \"\"\n",
|
|
" for chunk in stream:\n",
|
|
" fragment = chunk.choices[0].delta.content or \"\"\n",
|
|
" reply += fragment\n",
|
|
" yield reply.replace('```cpp\\n','').replace('```','')\n",
|
|
" \n",
|
|
"def stream_claude(python):\n",
|
|
" result = claude.messages.stream(\n",
|
|
" model=CLAUDE_MODEL,\n",
|
|
" max_tokens=2000,\n",
|
|
" system=system_message,\n",
|
|
" messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
|
|
" )\n",
|
|
" reply = \"\"\n",
|
|
" with result as stream:\n",
|
|
" for text in stream.text_stream:\n",
|
|
" reply += text\n",
|
|
" yield reply.replace('```cpp\\n','').replace('```','')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "052a713e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# optimize code based on selected model\n",
|
|
"\n",
|
|
"def optimize(python, model):\n",
|
|
" if model==\"GPT\":\n",
|
|
" result = stream_gpt(python)\n",
|
|
" elif model==\"Claude\":\n",
|
|
" result = stream_claude(python)\n",
|
|
" else:\n",
|
|
" raise ValueError(\"Unknown model\")\n",
|
|
" for stream_so_far in result:\n",
|
|
" yield stream_so_far "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "27479cab",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# use gradio to optimize python code from the UI\n",
|
|
"\n",
|
|
"with gr.Blocks() as ui:\n",
|
|
" with gr.Row():\n",
|
|
" python = gr.Textbox(label=\"Python code:\", lines=10, value=pi)\n",
|
|
" cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
|
|
" with gr.Row():\n",
|
|
" model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
|
|
" convert = gr.Button(\"Convert code\")\n",
|
|
"\n",
|
|
" convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
|
|
"\n",
|
|
"ui.launch(inbrowser=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7b39125a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# execute python code\n",
|
|
"\n",
|
|
"def execute_python(code):\n",
|
|
" try:\n",
|
|
" output = io.StringIO()\n",
|
|
" sys.stdout = output\n",
|
|
" exec(code)\n",
|
|
" finally:\n",
|
|
" sys.stdout = sys.__stdout__\n",
|
|
" return output.getvalue()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d5cb0f55",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# css to color by code type\n",
|
|
"\n",
|
|
"css = \"\"\"\n",
|
|
".python {background-color: #306998;}\n",
|
|
".cpp {background-color: #050;}\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "66e5ef37",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# add ability to run python code, compile and run c++ code in gradio\n",
|
|
"\n",
|
|
"with gr.Blocks(css=css) as ui:\n",
|
|
" gr.Markdown(\"## Convert code from Python to C++\")\n",
|
|
" with gr.Row():\n",
|
|
" python = gr.Textbox(label=\"Python code:\", value=pi, lines=10)\n",
|
|
" cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
|
|
" with gr.Row():\n",
|
|
" model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
|
|
" with gr.Row():\n",
|
|
" convert = gr.Button(\"Convert code\")\n",
|
|
" with gr.Row():\n",
|
|
" python_run = gr.Button(\"Run Python\")\n",
|
|
" cpp_run = gr.Button(\"Run C++\")\n",
|
|
" with gr.Row():\n",
|
|
" python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n",
|
|
" cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n",
|
|
"\n",
|
|
" convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
|
|
" python_run.click(execute_python, inputs=[python], outputs=[python_out])\n",
|
|
" cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n",
|
|
"\n",
|
|
"ui.launch(inbrowser=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b4d29c1c",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Section 3: Add HuggingFace-hosted Open Source Models to Gradio App"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8ec5e4cb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# initilize code qwen huggingface endpoint\n",
|
|
"\n",
|
|
"tokenizer = AutoTokenizer.from_pretrained(CODE_QWEN_MODEL)\n",
|
|
"messages = messages_for(pi)\n",
|
|
"text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4c3ac865",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# inspect the message template (text)\n",
|
|
"\n",
|
|
"print(text)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8a1de956",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# manually call the huggingface endpoint, generate c++ with code qwen\n",
|
|
"\n",
|
|
"client = InferenceClient(CODE_QWEN_URL, token=hf_token)\n",
|
|
"stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)\n",
|
|
"for r in stream:\n",
|
|
" print(r.token.text, end = \"\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "20d310dc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# configure streaming responses from code qwen\n",
|
|
"\n",
|
|
"def stream_code_qwen(python):\n",
|
|
" tokenizer = AutoTokenizer.from_pretrained(CODE_QWEN_MODEL)\n",
|
|
" messages = messages_for(python)\n",
|
|
" text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
|
|
" client = InferenceClient(CODE_QWEN_URL, token=hf_token)\n",
|
|
" stream = client.text_generation(text, stream=True, details=True, max_new_tokens=3000)\n",
|
|
" result = \"\"\n",
|
|
" for r in stream:\n",
|
|
" result += r.token.text\n",
|
|
" yield result "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1340cd15",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# update optimize function to include code qwen\n",
|
|
"\n",
|
|
"def optimize(python, model):\n",
|
|
" if model==\"GPT\":\n",
|
|
" result = stream_gpt(python)\n",
|
|
" elif model==\"Claude\":\n",
|
|
" result = stream_claude(python)\n",
|
|
" elif model==\"CodeQwen\":\n",
|
|
" result = stream_code_qwen(python)\n",
|
|
" else:\n",
|
|
" raise ValueError(\"Unknown model\")\n",
|
|
" for stream_so_far in result:\n",
|
|
" yield stream_so_far "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6761f4ea",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# select the python code sample and return the code\n",
|
|
"\n",
|
|
"def select_sample_program(sample_program):\n",
|
|
" if sample_program==\"pi\":\n",
|
|
" return pi\n",
|
|
" elif sample_program==\"pi_hard\":\n",
|
|
" return pi_hard\n",
|
|
" else:\n",
|
|
" return \"Type your Python program here\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "74ecb73b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# run the updated gradio UI with code qwen\n",
|
|
"\n",
|
|
"compiler_cmd = c_compiler_cmd(\"optimized\")\n",
|
|
"\n",
|
|
"with gr.Blocks(css=css) as ui:\n",
|
|
" gr.Markdown(\"## Convert code from Python to C++\")\n",
|
|
" with gr.Row():\n",
|
|
" python = gr.Textbox(label=\"Python code:\", value=pi, lines=10)\n",
|
|
" cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
|
|
" with gr.Row():\n",
|
|
" with gr.Column():\n",
|
|
" sample_program = gr.Radio([\"pi\", \"pi_hard\"], label=\"Sample program\", value=\"pi\")\n",
|
|
" model = gr.Dropdown([\"GPT\", \"Claude\", \"CodeQwen\"], label=\"Select model\", value=\"GPT\")\n",
|
|
" with gr.Column():\n",
|
|
" architecture = gr.Radio([compiler_cmd[0]], label=\"Architecture\", interactive=False, value=compiler_cmd[0])\n",
|
|
" compiler = gr.Radio([compiler_cmd[1]], label=\"Compiler\", interactive=False, value=compiler_cmd[1])\n",
|
|
" with gr.Row():\n",
|
|
" convert = gr.Button(\"Convert code\")\n",
|
|
" with gr.Row():\n",
|
|
" python_run = gr.Button(\"Run Python\")\n",
|
|
" if not compiler_cmd[1] == \"Unavailable\":\n",
|
|
" cpp_run = gr.Button(\"Run C++\")\n",
|
|
" else:\n",
|
|
" cpp_run = gr.Button(\"No compiler to run C++\", interactive=False)\n",
|
|
" with gr.Row():\n",
|
|
" python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n",
|
|
" cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n",
|
|
"\n",
|
|
" sample_program.change(select_sample_program, inputs=[sample_program], outputs=[python])\n",
|
|
" convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
|
|
" python_run.click(execute_python, inputs=[python], outputs=[python_out])\n",
|
|
" cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n",
|
|
"\n",
|
|
"ui.launch(inbrowser=True)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "llms",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|