diff --git a/week3/community-contributions/anime_audio_translator.colab.ipynb b/week3/community-contributions/anime_audio_translator.colab.ipynb new file mode 100644 index 0000000..0ad7b9b --- /dev/null +++ b/week3/community-contributions/anime_audio_translator.colab.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kayiMLgsBnVt" + }, + "outputs": [], + "source": [ + "!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai gradio" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "executionInfo": { + "elapsed": 15255, + "status": "ok", + "timestamp": 1744678358807, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "ByKEQHyhiLl7" + }, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "from google.colab import drive, userdata\n", + "from huggingface_hub import login\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer\n", + "import torch\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 2, + "status": "ok", + "timestamp": 1744678358815, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "9tzK_t3jiOo1" + }, + "outputs": [], + "source": [ + "AUDIO_MODEL = 'whisper-1'\n", + "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 737, + "status": "ok", + "timestamp": 1744678360474, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "PYNmGaQniW73" + }, + "outputs": [], + "source": [ + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "executionInfo": { + "elapsed": 555, + "status": "ok", + "timestamp": 1744678362522, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "yGjVTeMEig-b" + }, + "outputs": [], + "source": [ + "openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n", + "openai = OpenAI(api_key=openai_api_key)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "executionInfo": { + "elapsed": 9, + "status": "ok", + "timestamp": 1744679561600, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "6jboyASHilLz" + }, + "outputs": [], + "source": [ + "def message_prompt(transciption):\n", + " system_message = \"\"\"\n", + " You are an assistant that translate japanese text into two different languages like 'English' and 'Filipino',\n", + " please display the translated text into markdown and include the original text from japanese using 'Romaji',\n", + " sample format would be - original text (converted to romaji): orignal_translated_text_here \\n\\n translated to english: translated_english_text_here \\n\\n translated to filipino: translated_filipino_text_here\"\n", + " \"\"\"\n", + "\n", + " user_propmpt = f\"Here is the transcripted japanese audio and translate it into two languages: '{transciption}'. No explaination just the translated languages only.\"\n", + "\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_propmpt}\n", + " ]\n", + "\n", + " return messages" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 7, + "status": "ok", + "timestamp": 1744678366113, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "nYrf_wKmmoUs" + }, + "outputs": [], + "source": [ + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_compute_dtype=torch.bfloat16\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "executionInfo": { + "elapsed": 7, + "status": "ok", + "timestamp": 1744678367778, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "ESlOaRGioqUQ" + }, + "outputs": [], + "source": [ + "def translation(messages):\n", + " tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", + " streamer = TextStreamer(tokenizer)\n", + " model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n", + " outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)\n", + "\n", + " return tokenizer.decode(outputs[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "executionInfo": { + "elapsed": 6, + "status": "ok", + "timestamp": 1744679567326, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "FSGFTvIEys0j" + }, + "outputs": [], + "source": [ + "def translate_text(file):\n", + " try:\n", + " audio_file = open(file, \"rb\")\n", + "\n", + " transciption = openai.audio.transcriptions.create(\n", + " model=AUDIO_MODEL,\n", + " file=audio_file,\n", + " response_format=\"text\",\n", + " language=\"ja\"\n", + " )\n", + "\n", + " messages = message_prompt(transciption)\n", + " response = translation(messages)\n", + "\n", + " return response\n", + " except Exception as e:\n", + " return f\"Unexpected error: {str(e)}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bexgSsWuvUmU" + }, + "outputs": [], + "source": [ + "with gr.Blocks() as demo:\n", + " gr.Markdown(\"# 🎙️ Anime Audio Translator\")\n", + " with gr.Row():\n", + " with gr.Column():\n", + " audio_file = gr.Audio(type=\"filepath\", label=\"Upload Audio\")\n", + " button = gr.Button(\"Translate\", variant=\"primary\")\n", + "\n", + " with gr.Column():\n", + " gr.Label(value=\"Result of translated text to 'English' and 'Filipino'\", label=\"Character\")\n", + " output_text = gr.Markdown()\n", + "\n", + " button.click(\n", + " fn=translate_text,\n", + " inputs=audio_file,\n", + " outputs=output_text,\n", + " trigger_mode=\"once\"\n", + " )\n", + "demo.launch()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyO+HrhlkaVchpoGIfmYAHdf", + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/week4/community-contributions/code_conversion.ipynb b/week4/community-contributions/code_conversion.ipynb new file mode 100644 index 0000000..c718abe --- /dev/null +++ b/week4/community-contributions/code_conversion.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "8dee7381-2291-4202-a6e6-9eb94e896141", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "\n", + "import os\n", + "import io\n", + "import sys\n", + "from dotenv import load_dotenv\n", + "from openai import OpenAI\n", + "import google.generativeai\n", + "import anthropic\n", + "from IPython.display import Markdown, display, update_display\n", + "import gradio as gr\n", + "import subprocess\n", + "import platform\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc145e4c-1e06-4414-aa2b-1ea1862b4600", + "metadata": {}, + "outputs": [], + "source": [ + "# environment\n", + "\n", + "load_dotenv(override=True)\n", + "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n", + "os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfaf8584-a10f-43f0-b550-f1b2b6f07160", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize\n", + "\n", + "openai = OpenAI()\n", + "claude = anthropic.Anthropic()\n", + "\n", + "OPENAI_MODEL = \"gpt-4o-mini\"\n", + "CLAUDE_MODEL = \"claude-3-haiku-20240307\"\n", + "\n", + "# OPENAI_MODEL = \"gpt-4o\"\n", + "# CLAUDE_MODEL = \"claude-3-5-sonnet-20240620\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b47508e-dc60-4db5-a29c-f3f0ed57d894", + "metadata": {}, + "outputs": [], + "source": [ + "processor = platform.machine()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ee9ec20-3b1d-4a15-9ab3-b2fbb93296b4", + "metadata": {}, + "outputs": [], + "source": [ + "def get_name_by_extension(extension):\n", + " for lang in programming_languages:\n", + " if lang[\"extension\"] == extension:\n", + " return lang[\"name\"]\n", + " return None " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee408ffd-fde2-4c1e-b87f-c8dce2ad49bc", + "metadata": {}, + "outputs": [], + "source": [ + "def get_system_message(prog_lang):\n", + " name = get_name_by_extension(prog_lang)\n", + " \n", + " system_message = f\"You are an assistant that reimplements Python code to {name} for an {processor} device. \"\n", + " system_message += f\"Respond only with code; use comments sparingly and do not provide any explanation other than occasional comments. \"\n", + " system_message += f\"The {name} response needs to produce an identical output in the fastest possible time.\"\n", + " system_message += f\"If the used function does not exists for {name} language interchange it for its compatibility and if not throw an error\"\n", + "\n", + " return system_message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac8d5d3b-a018-4b94-8080-9b18f5634dc7", + "metadata": {}, + "outputs": [], + "source": [ + "def user_prompt_for(python, prog_lang):\n", + " name = get_name_by_extension(prog_lang)\n", + " \n", + " user_prompt = f\"Rewrite this Python code in {name} with the fastest possible implementation that produces identical output in the least time. \"\n", + " user_prompt += f\"Respond only with {name} code; do not explain your work other than a few comments. \"\n", + " user_prompt += \"Pay attention to number types to ensure no int overflows\\n\\n\"\n", + " user_prompt += python\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23c58e61-5fdd-41f5-9e60-a0847f4bf86f", + "metadata": {}, + "outputs": [], + "source": [ + "def messages_for(python, prog_lang):\n", + " system_message = get_system_message(prog_lang)\n", + " \n", + " return [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt_for(python, prog_lang)}\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e193cd6-16f4-440a-9376-6041672f91fc", + "metadata": {}, + "outputs": [], + "source": [ + "# write to a file called optimized.cpp\n", + "\n", + "def write_output(content, prog_lang):\n", + " code = content.replace(\"```cpp\",\"\").replace(\"javascript\",\"\").replace(\"```\",\"\")\n", + " \n", + " with open(f\"optimized.{prog_lang}\", \"w\") as f:\n", + " f.write(code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28b0be5e-73b6-49d8-8ef6-8209eace5ee6", + "metadata": {}, + "outputs": [], + "source": [ + "python_hard = \"\"\"# Be careful to support large number sizes\n", + "\n", + "def lcg(seed, a=1664525, c=1013904223, m=2**32):\n", + " value = seed\n", + " while True:\n", + " value = (a * value + c) % m\n", + " yield value\n", + " \n", + "def max_subarray_sum(n, seed, min_val, max_val):\n", + " lcg_gen = lcg(seed)\n", + " random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]\n", + " max_sum = float('-inf')\n", + " for i in range(n):\n", + " current_sum = 0\n", + " for j in range(i, n):\n", + " current_sum += random_numbers[j]\n", + " if current_sum > max_sum:\n", + " max_sum = current_sum\n", + " return max_sum\n", + "\n", + "def total_max_subarray_sum(n, initial_seed, min_val, max_val):\n", + " total_sum = 0\n", + " lcg_gen = lcg(initial_seed)\n", + " for _ in range(20):\n", + " seed = next(lcg_gen)\n", + " total_sum += max_subarray_sum(n, seed, min_val, max_val)\n", + " return total_sum\n", + "\n", + "# Parameters\n", + "n = 10000 # Number of random numbers\n", + "initial_seed = 42 # Initial seed for the LCG\n", + "min_val = -10 # Minimum value of random numbers\n", + "max_val = 10 # Maximum value of random numbers\n", + "\n", + "# Timing the function\n", + "import time\n", + "start_time = time.time()\n", + "result = total_max_subarray_sum(n, initial_seed, min_val, max_val)\n", + "end_time = time.time()\n", + "\n", + "print(\"Total Maximum Subarray Sum (20 runs):\", result)\n", + "print(\"Execution Time: {:.6f} seconds\".format(end_time - start_time))\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2818063c-008e-4029-851a-959f63d3f0fc", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_gpt(python, prog_lang): \n", + " stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python, prog_lang), stream=True)\n", + " reply = \"\"\n", + " for chunk in stream:\n", + " fragment = chunk.choices[0].delta.content or \"\"\n", + " reply += fragment\n", + " yield reply.replace('```cpp\\n','').replace('javascript\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e3e0502-8550-46fe-bd2f-394078db6576", + "metadata": {}, + "outputs": [], + "source": [ + "def stream_claude(python, prog_lang):\n", + " system_message = get_system_message(prog_lang)\n", + " \n", + " result = claude.messages.stream(\n", + " model=CLAUDE_MODEL,\n", + " max_tokens=2000,\n", + " system=system_message,\n", + " messages=[{\"role\": \"user\", \"content\": user_prompt_for(python, prog_lang)}],\n", + " )\n", + " reply = \"\"\n", + " with result as stream:\n", + " for text in stream.text_stream:\n", + " reply += text\n", + " yield reply.replace('```cpp\\n','').replace('javascript\\n','').replace('```','')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10accbb2-b56d-4c79-beef-928c2a3b58f0", + "metadata": {}, + "outputs": [], + "source": [ + "def optimize(python, model, prog_lang):\n", + " if model==\"GPT\":\n", + " result = stream_gpt(python, prog_lang)\n", + " elif model==\"Claude\":\n", + " result = stream_claude(python, prog_lang)\n", + " else:\n", + " raise ValueError(\"Unknown model\")\n", + " for stream_so_far in result:\n", + " yield stream_so_far " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1acb130-8b5c-4199-818a-3afa89c342cb", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_python(code):\n", + " try:\n", + " output = io.StringIO()\n", + " sys.stdout = output\n", + "\n", + " namespace = {}\n", + " exec(code, namespace)\n", + " finally:\n", + " sys.stdout = sys.__stdout__\n", + " return output.getvalue()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e901e81-61d8-4ab2-9e16-f70c8ee6bdbe", + "metadata": {}, + "outputs": [], + "source": [ + "css = \"\"\"\n", + ".python {background-color: #306998;}\n", + ".cpp {background-color: #050;}\n", + ".php {background-color: #cb7afa;}\n", + ".js {background-color: #f4ff78;}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e0dfe2e-a87d-4595-b4ef-72797bd1ad44", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_cpp(code):\n", + " try:\n", + " compile_cmd = [\"clang++\", \"-Ofast\", \"-std=c++17\", \"-o\", \"optimized\", \"optimized.cpp\"]\n", + " compile_result = subprocess.run(compile_cmd, shell=True, text=True, capture_output=True)\n", + " run_cmd = [\"./optimized\"]\n", + " run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)\n", + " return run_result.stdout\n", + " except subprocess.CalledProcessError as e:\n", + " return f\"An error occurred:\\n{e.stderr}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91ba8a3c-8686-4636-bf21-efc861f3a2b7", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_js(code):\n", + " try:\n", + " run_result = subprocess.run([\"node\", \"optimized.js\"], check=True, text=True, capture_output=True)\n", + " return run_result.stdout\n", + " except subprocess.CalledProcessError as e:\n", + " return f\"An error occurred:\\n{e.stderr}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9006f67-f631-4ad4-bf45-b9366c822a04", + "metadata": {}, + "outputs": [], + "source": [ + "def execute_php(code):\n", + " try:\n", + " run_result = subprocess.run([\"php\", \"optimized.php\"], check=True, text=True, capture_output=True)\n", + " return run_result.stdout\n", + " except subprocess.CalledProcessError as e:\n", + " return f\"An error occurred:\\n{e.stderr}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3991a09-f60d-448a-8e92-2561296d05cf", + "metadata": {}, + "outputs": [], + "source": [ + "def handle_execution(code, prog_lang):\n", + " write_output(code, prog_lang)\n", + "\n", + " index = next((i for i, lang in enumerate(programming_languages) if lang[\"extension\"] == prog_lang), -1)\n", + " return programming_languages[index][\"fn\"](code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c127bbc9-ef4d-40e4-871a-85873fc9e406", + "metadata": {}, + "outputs": [], + "source": [ + "programming_languages = [\n", + " {\"name\": \"C++\", \"extension\": \"cpp\", \"fn\": execute_cpp},\n", + " {\"name\": \"Javascript\", \"extension\": \"js\", \"fn\": execute_js},\n", + " {\"name\": \"Php\", \"extension\": \"php\", \"fn\": execute_php}\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "126636a1-4315-4811-9de9-61ee032effc8", + "metadata": {}, + "outputs": [], + "source": [ + "def create_prog_lang_ui(lang, model):\n", + " prog_name = lang[\"name\"]\n", + " extension = lang[\"extension\"]\n", + " fn = lang[\"fn\"]\n", + "\n", + " with gr.Row():\n", + " with gr.Column():\n", + " convert = gr.Button(f\"Convert to {prog_name}\")\n", + " converted_code = gr.Textbox(label=f\"Converted {prog_name} code:\", lines=10)\n", + "\n", + " with gr.Column():\n", + " prog_run = gr.Button(f\"Run {prog_name}\")\n", + " prog_out = gr.TextArea(label=f\"{prog_name} result:\", elem_classes=[extension])\n", + "\n", + " current_selected = gr.Dropdown([extension], value=extension, visible=False)\n", + " \n", + " convert.click(optimize, inputs=[python, model, current_selected], outputs=[converted_code])\n", + " prog_run.click(handle_execution, inputs=[converted_code, current_selected], outputs=[prog_out])\n", + "\n", + "with gr.Blocks(css=css) as ui:\n", + " gr.Markdown(\"# Convert code from Python to any Programming Language\")\n", + " with gr.Row():\n", + " with gr.Column():\n", + " python = gr.Textbox(label=\"Python code:\", value=python_hard, lines=10)\n", + " with gr.Column():\n", + " python_run = gr.Button(f\"Run Python\")\n", + " python_out = gr.TextArea(label=f\"Python result:\", elem_classes=[\"python\"])\n", + " \n", + " with gr.Row():\n", + " model = gr.Dropdown([\"GPT\", \"Claude\"], label=\"Select model\", value=\"GPT\")\n", + "\n", + " python_run.click(execute_python, inputs=[python], outputs=[python_out]) \n", + "\n", + "\n", + " for lang in programming_languages:\n", + " create_prog_lang_ui(lang, model)\n", + "\n", + "ui.launch(\n", + " inbrowser=True\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}