added day4 exercise with inference provider

2025-02-14 14:40:44 -05:00
parent 289a1b9bd1
commit d57d084d27
1 changed files with 871 additions and 0 deletions
--- a/week4/community-contributions/day4_with_inference_provider.ipynb
+++ b/week4/community-contributions/day4_with_inference_provider.ipynb
@@ -0,0 +1,871 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9",
+   "metadata": {},
+   "source": [
+    "# Code Generator\n",
+    "\n",
+    "The requirement: use an Open Source model to generate high performance C++ code from Python code\n",
+    "\n",
+    "To replicate this, you'll need to set up a HuggingFace endpoint as I do in the video. It's simple to do, and it's quite satisfying to see the results!\n",
+    "\n",
+    "It's also an important part of your learning; this is the first example of deploying an open source model to be behind an API. We'll return to this in Week 8, but this should plant a seed in your mind for what's involved in moving open source models into production.\n",
+    "\n",
+    "Added the use of inference providers that was introduced recently by Hugging Face to convert the code.\n",
+    "Improved the user prompt to include algorithic efficeiny and performance optimization.\n",
+    "\n",
+    "Note: C++ commands work on windows environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "\n",
+    "import os\n",
+    "import io\n",
+    "import sys\n",
+    "import json\n",
+    "import requests\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "import google.generativeai\n",
+    "import anthropic\n",
+    "from IPython.display import Markdown, display, update_display\n",
+    "import gradio as gr\n",
+    "import subprocess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "4f672e1c-87e9-4865-b760-370fa605e614",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# environment\n",
+    "\n",
+    "load_dotenv()\n",
+    "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
+    "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
+    "os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialize\n",
+    "\n",
+    "openai = OpenAI()\n",
+    "claude = anthropic.Anthropic()\n",
+    "OPENAI_MODEL = \"gpt-4o\"\n",
+    "CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "6896636f-923e-4a2c-9d6c-fac07828a201",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_message = \"You are an assistant that reimplements Python code in high performance C++ for an Windows intel i7. \"\n",
+    "system_message += \"Respond only with C++ code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n",
+    "system_message += \"The C++ response needs to produce an identical output in the fastest possible time. Keep implementations of random number generators identical so that results match exactly.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "70583432-e851-40d1-a219-2fb32b830dc8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#updated the original prompt to include algorithic efficeiny and performance optimization\n",
+    "def user_prompt_for(python: str) -> str:\n",
+    "    user_prompt = (\n",
+    "        \"First, analyze the given Python code to understand its core purpose and algorithmic approach. \"\n",
+    "        \"Then, implement a C++ solution that achieves the same output while prioritizing:\\n\"\n",
+    "        \"1. **Algorithmic Efficiency** - Optimize time and space complexity, even if it means using a different approach.\\n\"\n",
+    "        \"2. **Numerical Correctness** - Prevent integer overflows, use appropriate data types (`int64_t`, `uint64_t`, `double`, etc.), \"\n",
+    "        \"and ensure correct handling of edge cases.\\n\"\n",
+    "        \"3. **Performance Optimization** - Utilize C++-specific features (e.g., `std::vector` with preallocation, SIMD optimizations, cache-friendly structures).\\n\\n\"\n",
+    "        \n",
+    "        \"### **Important Notes:**\\n\"\n",
+    "        \"- Use `int64_t` instead of `int` where necessary to prevent overflows.\\n\"\n",
+    "        \"- Ensure random number generation in C++ matches Python's output as closely as possible.\\n\"\n",
+    "        \"- Avoid undefined behavior, such as bit shifts that exceed type width (`1UL << 32` is incorrect for `uint32_t`).\\n\"\n",
+    "        \"- Comment on key optimizations and complexity improvements in the C++ code.\\n\\n\"\n",
+    "        \n",
+    "        \"### **Expected Response:**\\n\"\n",
+    "        \"Respond **only with C++ code**, including all necessary headers and ensuring the output matches the Python version exactly.\\n\\n\"\n",
+    "        \n",
+    "        \"Here's the Python code to analyze and optimize:\\n\\n\"\n",
+    "        + python\n",
+    "    )\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "c6190659-f54c-4951-bef4-4960f8e51cc4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def messages_for(python):\n",
+    "    return [\n",
+    "        {\"role\": \"system\", \"content\": system_message},\n",
+    "        {\"role\": \"user\", \"content\": user_prompt_for(python)}\n",
+    "    ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write to a file called optimized.cpp\n",
+    "\n",
+    "def write_output(cpp):\n",
+    "    code = cpp.replace(\"```cpp\",\"\").replace(\"```\",\"\")\n",
+    "    with open(\"optimized.cpp\", \"w\") as f:\n",
+    "        f.write(code)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def optimize_gpt(python):    \n",
+    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
+    "    reply = \"\"\n",
+    "    for chunk in stream:\n",
+    "        fragment = chunk.choices[0].delta.content or \"\"\n",
+    "        reply += fragment\n",
+    "        print(fragment, end='', flush=True)\n",
+    "    write_output(reply)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "7cd84ad8-d55c-4fe0-9eeb-1895c95c4a9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def optimize_claude(python):\n",
+    "    result = claude.messages.stream(\n",
+    "        model=CLAUDE_MODEL,\n",
+    "        max_tokens=2000,\n",
+    "        system=system_message,\n",
+    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
+    "    )\n",
+    "    reply = \"\"\n",
+    "    with result as stream:\n",
+    "        for text in stream.text_stream:\n",
+    "            reply += text\n",
+    "            print(text, end=\"\", flush=True)\n",
+    "    write_output(reply)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "a1cbb778-fa57-43de-b04b-ed523f396c38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pi = \"\"\"\n",
+    "import time\n",
+    "\n",
+    "def calculate(iterations, param1, param2):\n",
+    "    result = 1.0\n",
+    "    for i in range(1, iterations+1):\n",
+    "        j = i * param1 - param2\n",
+    "        result -= (1/j)\n",
+    "        j = i * param1 + param2\n",
+    "        result += (1/j)\n",
+    "    return result\n",
+    "\n",
+    "start_time = time.time()\n",
+    "result = calculate(100_000_000, 4, 1) * 4\n",
+    "end_time = time.time()\n",
+    "\n",
+    "print(f\"Result: {result:.12f}\")\n",
+    "print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "7fe1cd4b-d2c5-4303-afed-2115a3fef200",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(pi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "105db6f9-343c-491d-8e44-3a5328b81719",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimize_gpt(pi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf26ee95-0c77-491d-9a91-579a1e96a8a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(pi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4194e40c-04ab-4940-9d64-b4ad37c5bb40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!g++ -O3 -std=c++17 -march=native -o optimized optimized.cpp\n",
+    "!optimized.exe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "983a11fe-e24d-4c65-8269-9802c5ef3ae6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimize_claude(pi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5a766f9-3d23-4bb4-a1d4-88ec44b61ddf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!g++ -O3 -std=c++17 -march=native -o optimized optimized.cpp\n",
+    "!optimized.exe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "c3b497b3-f569-420e-b92e-fb0f49957ce0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "python_hard = \"\"\"# Be careful to support large number sizes\n",
+    "\n",
+    "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n",
+    "    value = seed\n",
+    "    while True:\n",
+    "        value = (a * value + c) % m\n",
+    "        yield value\n",
+    "        \n",
+    "def max_subarray_sum(n, seed, min_val, max_val):\n",
+    "    lcg_gen = lcg(seed)\n",
+    "    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n",
+    "    max_sum = float('-inf')\n",
+    "    for i in range(n):\n",
+    "        current_sum = 0\n",
+    "        for j in range(i, n):\n",
+    "            current_sum += random_numbers[j]\n",
+    "            if current_sum > max_sum:\n",
+    "                max_sum = current_sum\n",
+    "    return max_sum\n",
+    "\n",
+    "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n",
+    "    total_sum = 0\n",
+    "    lcg_gen = lcg(initial_seed)\n",
+    "    for _ in range(20):\n",
+    "        seed = next(lcg_gen)\n",
+    "        total_sum += max_subarray_sum(n, seed, min_val, max_val)\n",
+    "    return total_sum\n",
+    "\n",
+    "# Parameters\n",
+    "n = 10000         # Number of random numbers\n",
+    "initial_seed = 42 # Initial seed for the LCG\n",
+    "min_val = -10     # Minimum value of random numbers\n",
+    "max_val = 10      # Maximum value of random numbers\n",
+    "\n",
+    "# Timing the function\n",
+    "import time\n",
+    "start_time = time.time()\n",
+    "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n",
+    "end_time = time.time()\n",
+    "\n",
+    "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n",
+    "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "dab5e4bc-276c-4555-bd4c-12c699d5e899",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(python_hard)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8d24ed5-2c15-4f55-80e7-13a3952b3cb8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimize_gpt(python_hard)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0b3d073-88a2-40b2-831c-6f0c345c256f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!g++ -O3 -std=c++17 -march=native -o optimized optimized.cpp\n",
+    "!optimized.exe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9305446-1d0c-4b51-866a-b8c1e299bf5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimize_claude(python_hard)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c181036-8193-4fdd-aef3-fc513b218d43",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!g++ -O3 -std=c++17 -march=native -o optimized optimized.cpp\n",
+    "!optimized.exe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "0be9f47d-5213-4700-b0e2-d444c7c738c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_gpt(python):    \n",
+    "    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)\n",
+    "    reply = \"\"\n",
+    "    for chunk in stream:\n",
+    "        fragment = chunk.choices[0].delta.content or \"\"\n",
+    "        reply += fragment\n",
+    "        yield reply.replace('```cpp\\n','').replace('```','')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "8669f56b-8314-4582-a167-78842caea131",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def stream_claude(python):\n",
+    "    result = claude.messages.stream(\n",
+    "        model=CLAUDE_MODEL,\n",
+    "        max_tokens=2000,\n",
+    "        system=system_message,\n",
+    "        messages=[{\"role\": \"user\", \"content\": user_prompt_for(python)}],\n",
+    "    )\n",
+    "    reply = \"\"\n",
+    "    with result as stream:\n",
+    "        for text in stream.text_stream:\n",
+    "            reply += text\n",
+    "            yield reply.replace('```cpp\\n','').replace('```','')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "2f1ae8f5-16c8-40a0-aa18-63b617df078d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def optimize(python, model):\n",
+    "    if model==\"GPT\":\n",
+    "        result = stream_gpt(python)\n",
+    "    elif model==\"Claude\":\n",
+    "        result = stream_claude(python)\n",
+    "    else:\n",
+    "        raise ValueError(\"Unknown model\")\n",
+    "    for stream_so_far in result:\n",
+    "        yield stream_so_far        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "f1ddb38e-6b0a-4c37-baa4-ace0b7de887a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with gr.Blocks() as ui:\n",
+    "    with gr.Row():\n",
+    "        python = gr.Textbox(label=\"Python code:\", lines=10, value=python_hard)\n",
+    "        cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
+    "    with gr.Row():\n",
+    "        model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
+    "        convert = gr.Button(\"Convert code\")\n",
+    "\n",
+    "    convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
+    "\n",
+    "ui.launch(inbrowser=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "19bf2bff-a822-4009-a539-f003b1651383",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def execute_python(code):\n",
+    "    try:\n",
+    "        output = io.StringIO()\n",
+    "        sys.stdout = output\n",
+    "        exec(code)\n",
+    "    finally:\n",
+    "        sys.stdout = sys.__stdout__\n",
+    "    return output.getvalue()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "77f3ab5d-fcfb-4d3f-8728-9cacbf833ea6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def execute_cpp(code):\n",
+    "    write_output(code)\n",
+    "    try:\n",
+    "        compile_result = subprocess.run(compiler_cmd[2], check=True, text=True, capture_output=True)\n",
+    "        run_cmd = [\"optimized.exe\"]\n",
+    "        run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)\n",
+    "        return run_result.stdout\n",
+    "    except subprocess.CalledProcessError as e:\n",
+    "        return f\"An error occurred:\\n{e.stderr}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "9a2274f1-d03b-42c0-8dcc-4ce159b18442",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "css = \"\"\"\n",
+    ".python {background-color: #306998;}\n",
+    ".cpp {background-color: #050;}\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "f1303932-160c-424b-97a8-d28c816721b2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with gr.Blocks(css=css) as ui:\n",
+    "    gr.Markdown(\"## Convert code from Python to C++\")\n",
+    "    with gr.Row():\n",
+    "        python = gr.Textbox(label=\"Python code:\", value=python_hard, lines=10)\n",
+    "        cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
+    "    with gr.Row():\n",
+    "        model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n",
+    "    with gr.Row():\n",
+    "        convert = gr.Button(\"Convert code\")\n",
+    "    with gr.Row():\n",
+    "        python_run = gr.Button(\"Run Python\")\n",
+    "        cpp_run = gr.Button(\"Run C++\")\n",
+    "    with gr.Row():\n",
+    "        python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n",
+    "        cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n",
+    "\n",
+    "    convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
+    "    python_run.click(execute_python, inputs=[python], outputs=[python_out])\n",
+    "    cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n",
+    "\n",
+    "ui.launch(inbrowser=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "bb8c5b4e-ec51-4f21-b3f8-6aa94fede86d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import login, InferenceClient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "13347633-4606-4e38-9927-80c39e65c1f1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
+     ]
+    }
+   ],
+   "source": [
+    "hf_token = os.environ['HF_TOKEN']\n",
+    "login(hf_token)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "ef60a4df-6267-4ebd-8eed-dcb917af0a5e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "code_qwen = \"Qwen/CodeQwen1.5-7B-Chat\"\n",
+    "code_gemma = \"google/codegemma-7b-it\"\n",
+    "messages=messages_for(pi)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "3825d77a-03c6-42b2-89bc-ccbcb1585740",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = InferenceClient(\n",
+    "\tprovider=\"sambanova\",\n",
+    "\tapi_key=hf_token\n",
+    ")\n",
+    "stream = client.chat.completions.create(\n",
+    "\tmodel=\"Qwen/Qwen2.5-Coder-32B-Instruct\", \n",
+    "\tmessages=messages, \n",
+    "\tmax_tokens=500,\n",
+    "\tstream=True\n",
+    ")\n",
+    "\n",
+    "for chunk in stream:\n",
+    "    print(chunk.choices[0].delta.content, end=\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "cc0c3e9c-2572-41d1-a476-6eae96b20695",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# using inference providers\n",
+    "def stream_code_qwen(python):\n",
+    "    messages = messages_for(python)\n",
+    "    client = InferenceClient(\n",
+    "    \tprovider=\"sambanova\",\n",
+    "    \tapi_key=hf_token\n",
+    "    )\n",
+    "    stream = client.chat.completions.create(\n",
+    "    \tmodel=\"Qwen/Qwen2.5-Coder-32B-Instruct\", \n",
+    "    \tmessages=messages, \n",
+    "    \tmax_tokens=500,\n",
+    "    \tstream=True\n",
+    "    )\n",
+    "    result = \"\"\n",
+    "    for chunk in stream:\n",
+    "        if chunk.choices and chunk.choices[0].delta.content:\n",
+    "            result += chunk.choices[0].delta.content\n",
+    "            yield result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "a82387d1-7651-4923-995b-fe18356fcaa6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def optimize(python, model):\n",
+    "    if model==\"GPT\":\n",
+    "        result = stream_gpt(python)\n",
+    "    elif model==\"Claude\":\n",
+    "        result = stream_claude(python)\n",
+    "    elif model==\"CodeQwen\":\n",
+    "        result = stream_code_qwen(python)\n",
+    "    else:\n",
+    "        raise ValueError(\"Unknown model\")\n",
+    "    for stream_so_far in result:\n",
+    "        yield stream_so_far    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "4ba311ec-c16a-4fe0-946b-4b940704cf65",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def select_sample_program(sample_program):\n",
+    "    if sample_program==\"pi\":\n",
+    "        return pi\n",
+    "    elif sample_program==\"python_hard\":\n",
+    "        return python_hard\n",
+    "    else:\n",
+    "        return \"Type your Python program here\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "06148e88-501b-4686-a41d-c3be528d8e6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def execute_cpp(code):\n",
+    "        write_output(code)\n",
+    "        try:\n",
+    "            compile_cmd = [\"g++\", \"-Ofast\", \"-std=c++17\", \"-march=native\", \"-mtune=intel\", \"-o\", \"optimized\", \"optimized.cpp\"]\n",
+    "            compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True)\n",
+    "            run_cmd = [\"optimized.exe\"]\n",
+    "            run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)\n",
+    "            return run_result.stdout\n",
+    "        except subprocess.CalledProcessError as e:\n",
+    "            return f\"An error occurred:\\n{e.stderr}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "f9ca2e6f-60c1-4e5f-b570-63c75b2d189b",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7867/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\huggingface_hub\\utils\\_http.py\", line 406, in hf_raise_for_status\n",
+      "    response.raise_for_status()\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\requests\\models.py\", line 1024, in raise_for_status\n",
+      "    raise HTTPError(http_error_msg, response=self)\n",
+      "requests.exceptions.HTTPError: 402 Client Error: Payment Required for url: https://huggingface.co/api/inference-proxy/sambanova/v1/chat/completions\n",
+      "\n",
+      "The above exception was the direct cause of the following exception:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\queueing.py\", line 625, in process_events\n",
+      "    response = await route_utils.call_process_api(\n",
+      "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\route_utils.py\", line 322, in call_process_api\n",
+      "    output = await app.get_blocks().process_api(\n",
+      "             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\blocks.py\", line 2088, in process_api\n",
+      "    result = await self.call_function(\n",
+      "             ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\blocks.py\", line 1647, in call_function\n",
+      "    prediction = await utils.async_iteration(iterator)\n",
+      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\utils.py\", line 728, in async_iteration\n",
+      "    return await anext(iterator)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\utils.py\", line 722, in __anext__\n",
+      "    return await anyio.to_thread.run_sync(\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\anyio\\to_thread.py\", line 56, in run_sync\n",
+      "    return await get_async_backend().run_sync_in_worker_thread(\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 2505, in run_sync_in_worker_thread\n",
+      "    return await future\n",
+      "           ^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 1005, in run\n",
+      "    result = context.run(func, *args)\n",
+      "             ^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\utils.py\", line 705, in run_sync_iterator_async\n",
+      "    return next(iterator)\n",
+      "           ^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\gradio\\utils.py\", line 866, in gen_wrapper\n",
+      "    response = next(iterator)\n",
+      "               ^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\AppData\\Local\\Temp\\ipykernel_16896\\2223836700.py\", line 10, in optimize\n",
+      "    for stream_so_far in result:\n",
+      "  File \"C:\\Users\\danym\\AppData\\Local\\Temp\\ipykernel_16896\\2217507934.py\", line 8, in stream_code_qwen\n",
+      "    stream = client.chat.completions.create(\n",
+      "             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\huggingface_hub\\inference\\_client.py\", line 970, in chat_completion\n",
+      "    data = self._inner_post(request_parameters, stream=stream)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\huggingface_hub\\inference\\_client.py\", line 327, in _inner_post\n",
+      "    hf_raise_for_status(response)\n",
+      "  File \"C:\\Users\\danym\\anaconda3\\envs\\llms\\Lib\\site-packages\\huggingface_hub\\utils\\_http.py\", line 477, in hf_raise_for_status\n",
+      "    raise _format(HfHubHTTPError, str(e), response) from e\n",
+      "huggingface_hub.errors.HfHubHTTPError: 402 Client Error: Payment Required for url: https://huggingface.co/api/inference-proxy/sambanova/v1/chat/completions (Request ID: Root=1-67af964d-18ce264b79019ea460d62fd1;041b0bf3-9206-4a8a-aa61-f493ff9b1f8a)\n",
+      "\n",
+      "You have exceeded your monthly included credits for Inference Endpoints. Subscribe to PRO to get 20x more monthly allowance.\n"
+     ]
+    }
+   ],
+   "source": [
+    "with gr.Blocks(css=css) as ui:\n",
+    "    gr.Markdown(\"## Convert code from Python to C++\")\n",
+    "    with gr.Row():\n",
+    "        python = gr.Textbox(label=\"Python code:\", value=python_hard, lines=10)\n",
+    "        cpp = gr.Textbox(label=\"C++ code:\", lines=10)\n",
+    "    with gr.Row():\n",
+    "        with gr.Column():\n",
+    "            sample_program = gr.Radio([\"pi\", \"python_hard\"], label=\"Sample program\", value=\"python_hard\")\n",
+    "            model = gr.Dropdown([\"GPT\", \"Claude\", \"CodeQwen\"], label=\"Select model\", value=\"GPT\")\n",
+    "    with gr.Row():\n",
+    "        convert = gr.Button(\"Convert code\")\n",
+    "    with gr.Row():\n",
+    "        python_run = gr.Button(\"Run Python\")\n",
+    "        cpp_run = gr.Button(\"Run C++\")\n",
+    "    with gr.Row():\n",
+    "        python_out = gr.TextArea(label=\"Python result:\", elem_classes=[\"python\"])\n",
+    "        cpp_out = gr.TextArea(label=\"C++ result:\", elem_classes=[\"cpp\"])\n",
+    "\n",
+    "    sample_program.change(select_sample_program, inputs=[sample_program], outputs=[python])\n",
+    "    convert.click(optimize, inputs=[python, model], outputs=[cpp])\n",
+    "    python_run.click(execute_python, inputs=[python], outputs=[python_out])\n",
+    "    cpp_run.click(execute_cpp, inputs=[cpp], outputs=[cpp_out])\n",
+    "\n",
+    "ui.launch(inbrowser=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d0ad093-425b-488e-8c3f-67f729dd9c06",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}