From f581e7e027de0d936aedf680c040684574f37b0e Mon Sep 17 00:00:00 2001 From: ken Date: Fri, 25 Apr 2025 21:33:25 +0800 Subject: [PATCH] (fix) space issue removed --- .../anime_audio_translator.colab.ipynb | 293 +++++++++++++++++- 1 file changed, 292 insertions(+), 1 deletion(-) diff --git a/week3/community-contributions/anime_audio_translator.colab.ipynb b/week3/community-contributions/anime_audio_translator.colab.ipynb index 734d9b0..0ad7b9b 100644 --- a/week3/community-contributions/anime_audio_translator.colab.ipynb +++ b/week3/community-contributions/anime_audio_translator.colab.ipynb @@ -1 +1,292 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyO+HrhlkaVchpoGIfmYAHdf"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"kayiMLgsBnVt"},"outputs":[],"source":["!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai gradio"]},{"cell_type":"code","source":["import os\n","import requests\n","from IPython.display import Markdown, display, update_display\n","from openai import OpenAI\n","from google.colab import drive, userdata\n","from huggingface_hub import login\n","from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer\n","import torch\n","import gradio as gr"],"metadata":{"id":"ByKEQHyhiLl7","executionInfo":{"status":"ok","timestamp":1744678358807,"user_tz":-480,"elapsed":15255,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["AUDIO_MODEL = 'whisper-1'\n","LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""],"metadata":{"id":"9tzK_t3jiOo1","executionInfo":{"status":"ok","timestamp":1744678358815,"user_tz":-480,"elapsed":2,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["hf_token = userdata.get('HF_TOKEN')\n","login(hf_token, add_to_git_credential=True)"],"metadata":{"id":"PYNmGaQniW73","executionInfo":{"status":"ok","timestamp":1744678360474,"user_tz":-480,"elapsed":737,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n","openai = OpenAI(api_key=openai_api_key)"],"metadata":{"id":"yGjVTeMEig-b","executionInfo":{"status":"ok","timestamp":1744678362522,"user_tz":-480,"elapsed":555,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["def message_prompt(transciption):\n"," system_message = \"\"\"\n"," You are an assistant that translate japanese text into two different languages like 'English' and 'Filipino',\n"," please display the translated text into markdown and include the original text from japanese using 'Romaji',\n"," sample format would be - original text (converted to romaji): orignal_translated_text_here \\n\\n translated to english: translated_english_text_here \\n\\n translated to filipino: translated_filipino_text_here\"\n"," \"\"\"\n","\n"," user_propmpt = f\"Here is the transcripted japanese audio and translate it into two languages: '{transciption}'. No explaination just the translated languages only.\"\n","\n"," messages = [\n"," {\"role\": \"system\", \"content\": system_message},\n"," {\"role\": \"user\", \"content\": user_propmpt}\n"," ]\n","\n"," return messages"],"metadata":{"id":"6jboyASHilLz","executionInfo":{"status":"ok","timestamp":1744679561600,"user_tz":-480,"elapsed":9,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":36,"outputs":[]},{"cell_type":"code","source":["quant_config = BitsAndBytesConfig(\n"," load_in_4bit=True,\n"," bnb_4bit_use_double_quant=True,\n"," bnb_4bit_quant_type=\"nf4\",\n"," bnb_4bit_compute_dtype=torch.bfloat16\n",")"],"metadata":{"id":"nYrf_wKmmoUs","executionInfo":{"status":"ok","timestamp":1744678366113,"user_tz":-480,"elapsed":7,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["def translation(messages):\n"," tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n"," tokenizer.pad_token = tokenizer.eos_token\n"," inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n"," streamer = TextStreamer(tokenizer)\n"," model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n"," outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)\n","\n"," return tokenizer.decode(outputs[0])"],"metadata":{"id":"ESlOaRGioqUQ","executionInfo":{"status":"ok","timestamp":1744678367778,"user_tz":-480,"elapsed":7,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":8,"outputs":[]},{"cell_type":"code","source":["def translate_text(file):\n"," try:\n"," audio_file = open(file, \"rb\")\n","\n"," transciption = openai.audio.transcriptions.create(\n"," model=AUDIO_MODEL,\n"," file=audio_file,\n"," response_format=\"text\",\n"," language=\"ja\"\n"," )\n","\n"," messages = message_prompt(transciption)\n"," response = translation(messages)\n","\n"," return response\n"," except Exception as e:\n"," return f\"Unexpected error: {str(e)}\""],"metadata":{"id":"FSGFTvIEys0j","executionInfo":{"status":"ok","timestamp":1744679567326,"user_tz":-480,"elapsed":6,"user":{"displayName":"Kenneth Andales","userId":"04047926009324958530"}}},"execution_count":37,"outputs":[]},{"cell_type":"code","source":["with gr.Blocks() as demo:\n"," gr.Markdown(\"# 🎙️ Anime Audio Translator\")\n"," with gr.Row():\n"," with gr.Column():\n"," audio_file = gr.Audio(type=\"filepath\", label=\"Upload Audio\")\n"," button = gr.Button(\"Translate\", variant=\"primary\")\n","\n"," with gr.Column():\n"," gr.Label(value=\"Result of translated text to 'English' and 'Filipino'\", label=\"Character\")\n"," output_text = gr.Markdown()\n","\n"," button.click(\n"," fn=translate_text,\n"," inputs=audio_file,\n"," outputs=output_text,\n"," trigger_mode=\"once\"\n"," )\n","demo.launch(\n"," # share=True\n",")"],"metadata":{"id":"bexgSsWuvUmU"},"execution_count":null,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kayiMLgsBnVt" + }, + "outputs": [], + "source": [ + "!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai gradio" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "executionInfo": { + "elapsed": 15255, + "status": "ok", + "timestamp": 1744678358807, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "ByKEQHyhiLl7" + }, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "from IPython.display import Markdown, display, update_display\n", + "from openai import OpenAI\n", + "from google.colab import drive, userdata\n", + "from huggingface_hub import login\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer\n", + "import torch\n", + "import gradio as gr" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 2, + "status": "ok", + "timestamp": 1744678358815, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "9tzK_t3jiOo1" + }, + "outputs": [], + "source": [ + "AUDIO_MODEL = 'whisper-1'\n", + "LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 737, + "status": "ok", + "timestamp": 1744678360474, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "PYNmGaQniW73" + }, + "outputs": [], + "source": [ + "hf_token = userdata.get('HF_TOKEN')\n", + "login(hf_token, add_to_git_credential=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "executionInfo": { + "elapsed": 555, + "status": "ok", + "timestamp": 1744678362522, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "yGjVTeMEig-b" + }, + "outputs": [], + "source": [ + "openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n", + "openai = OpenAI(api_key=openai_api_key)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "executionInfo": { + "elapsed": 9, + "status": "ok", + "timestamp": 1744679561600, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "6jboyASHilLz" + }, + "outputs": [], + "source": [ + "def message_prompt(transciption):\n", + " system_message = \"\"\"\n", + " You are an assistant that translate japanese text into two different languages like 'English' and 'Filipino',\n", + " please display the translated text into markdown and include the original text from japanese using 'Romaji',\n", + " sample format would be - original text (converted to romaji): orignal_translated_text_here \\n\\n translated to english: translated_english_text_here \\n\\n translated to filipino: translated_filipino_text_here\"\n", + " \"\"\"\n", + "\n", + " user_propmpt = f\"Here is the transcripted japanese audio and translate it into two languages: '{transciption}'. No explaination just the translated languages only.\"\n", + "\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_propmpt}\n", + " ]\n", + "\n", + " return messages" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 7, + "status": "ok", + "timestamp": 1744678366113, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "nYrf_wKmmoUs" + }, + "outputs": [], + "source": [ + "quant_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_compute_dtype=torch.bfloat16\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "executionInfo": { + "elapsed": 7, + "status": "ok", + "timestamp": 1744678367778, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "ESlOaRGioqUQ" + }, + "outputs": [], + "source": [ + "def translation(messages):\n", + " tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + " inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n", + " streamer = TextStreamer(tokenizer)\n", + " model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n", + " outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)\n", + "\n", + " return tokenizer.decode(outputs[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "executionInfo": { + "elapsed": 6, + "status": "ok", + "timestamp": 1744679567326, + "user": { + "displayName": "Kenneth Andales", + "userId": "04047926009324958530" + }, + "user_tz": -480 + }, + "id": "FSGFTvIEys0j" + }, + "outputs": [], + "source": [ + "def translate_text(file):\n", + " try:\n", + " audio_file = open(file, \"rb\")\n", + "\n", + " transciption = openai.audio.transcriptions.create(\n", + " model=AUDIO_MODEL,\n", + " file=audio_file,\n", + " response_format=\"text\",\n", + " language=\"ja\"\n", + " )\n", + "\n", + " messages = message_prompt(transciption)\n", + " response = translation(messages)\n", + "\n", + " return response\n", + " except Exception as e:\n", + " return f\"Unexpected error: {str(e)}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bexgSsWuvUmU" + }, + "outputs": [], + "source": [ + "with gr.Blocks() as demo:\n", + " gr.Markdown(\"# 🎙️ Anime Audio Translator\")\n", + " with gr.Row():\n", + " with gr.Column():\n", + " audio_file = gr.Audio(type=\"filepath\", label=\"Upload Audio\")\n", + " button = gr.Button(\"Translate\", variant=\"primary\")\n", + "\n", + " with gr.Column():\n", + " gr.Label(value=\"Result of translated text to 'English' and 'Filipino'\", label=\"Character\")\n", + " output_text = gr.Markdown()\n", + "\n", + " button.click(\n", + " fn=translate_text,\n", + " inputs=audio_file,\n", + " outputs=output_text,\n", + " trigger_mode=\"once\"\n", + " )\n", + "demo.launch()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyO+HrhlkaVchpoGIfmYAHdf", + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python [conda env:base] *", + "language": "python", + "name": "conda-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}