Files
LLM_Engineering_OLD/week3/community-contributions/anime_audio_translator.colab.ipynb
2025-04-25 21:33:25 +08:00

293 lines
7.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kayiMLgsBnVt"
},
"outputs": [],
"source": [
"!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai gradio"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"executionInfo": {
"elapsed": 15255,
"status": "ok",
"timestamp": 1744678358807,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "ByKEQHyhiLl7"
},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"from IPython.display import Markdown, display, update_display\n",
"from openai import OpenAI\n",
"from google.colab import drive, userdata\n",
"from huggingface_hub import login\n",
"from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer\n",
"import torch\n",
"import gradio as gr"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"executionInfo": {
"elapsed": 2,
"status": "ok",
"timestamp": 1744678358815,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "9tzK_t3jiOo1"
},
"outputs": [],
"source": [
"AUDIO_MODEL = 'whisper-1'\n",
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"executionInfo": {
"elapsed": 737,
"status": "ok",
"timestamp": 1744678360474,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "PYNmGaQniW73"
},
"outputs": [],
"source": [
"hf_token = userdata.get('HF_TOKEN')\n",
"login(hf_token, add_to_git_credential=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"executionInfo": {
"elapsed": 555,
"status": "ok",
"timestamp": 1744678362522,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "yGjVTeMEig-b"
},
"outputs": [],
"source": [
"openai_api_key = userdata.get(\"OPENAI_API_KEY\")\n",
"openai = OpenAI(api_key=openai_api_key)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"executionInfo": {
"elapsed": 9,
"status": "ok",
"timestamp": 1744679561600,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "6jboyASHilLz"
},
"outputs": [],
"source": [
"def message_prompt(transciption):\n",
" system_message = \"\"\"\n",
" You are an assistant that translate japanese text into two different languages like 'English' and 'Filipino',\n",
" please display the translated text into markdown and include the original text from japanese using 'Romaji',\n",
" sample format would be - original text (converted to romaji): orignal_translated_text_here \\n\\n translated to english: translated_english_text_here \\n\\n translated to filipino: translated_filipino_text_here\"\n",
" \"\"\"\n",
"\n",
" user_propmpt = f\"Here is the transcripted japanese audio and translate it into two languages: '{transciption}'. No explaination just the translated languages only.\"\n",
"\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": system_message},\n",
" {\"role\": \"user\", \"content\": user_propmpt}\n",
" ]\n",
"\n",
" return messages"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"executionInfo": {
"elapsed": 7,
"status": "ok",
"timestamp": 1744678366113,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "nYrf_wKmmoUs"
},
"outputs": [],
"source": [
"quant_config = BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_use_double_quant=True,\n",
" bnb_4bit_quant_type=\"nf4\",\n",
" bnb_4bit_compute_dtype=torch.bfloat16\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"executionInfo": {
"elapsed": 7,
"status": "ok",
"timestamp": 1744678367778,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "ESlOaRGioqUQ"
},
"outputs": [],
"source": [
"def translation(messages):\n",
" tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
" tokenizer.pad_token = tokenizer.eos_token\n",
" inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
" streamer = TextStreamer(tokenizer)\n",
" model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
" outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)\n",
"\n",
" return tokenizer.decode(outputs[0])"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"executionInfo": {
"elapsed": 6,
"status": "ok",
"timestamp": 1744679567326,
"user": {
"displayName": "Kenneth Andales",
"userId": "04047926009324958530"
},
"user_tz": -480
},
"id": "FSGFTvIEys0j"
},
"outputs": [],
"source": [
"def translate_text(file):\n",
" try:\n",
" audio_file = open(file, \"rb\")\n",
"\n",
" transciption = openai.audio.transcriptions.create(\n",
" model=AUDIO_MODEL,\n",
" file=audio_file,\n",
" response_format=\"text\",\n",
" language=\"ja\"\n",
" )\n",
"\n",
" messages = message_prompt(transciption)\n",
" response = translation(messages)\n",
"\n",
" return response\n",
" except Exception as e:\n",
" return f\"Unexpected error: {str(e)}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "bexgSsWuvUmU"
},
"outputs": [],
"source": [
"with gr.Blocks() as demo:\n",
" gr.Markdown(\"# 🎙️ Anime Audio Translator\")\n",
" with gr.Row():\n",
" with gr.Column():\n",
" audio_file = gr.Audio(type=\"filepath\", label=\"Upload Audio\")\n",
" button = gr.Button(\"Translate\", variant=\"primary\")\n",
"\n",
" with gr.Column():\n",
" gr.Label(value=\"Result of translated text to 'English' and 'Filipino'\", label=\"Character\")\n",
" output_text = gr.Markdown()\n",
"\n",
" button.click(\n",
" fn=translate_text,\n",
" inputs=audio_file,\n",
" outputs=output_text,\n",
" trigger_mode=\"once\"\n",
" )\n",
"demo.launch()"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"authorship_tag": "ABX9TyO+HrhlkaVchpoGIfmYAHdf",
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python [conda env:base] *",
"language": "python",
"name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}