Bootcamp: Solisoma(week3-assesment)
This commit is contained in:
@@ -0,0 +1,244 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "c861645d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" WARNING: The script isympy.exe is installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
|
||||||
|
" WARNING: The scripts f2py.exe and numpy-config.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
|
||||||
|
" WARNING: The script normalizer.exe is installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
|
||||||
|
" WARNING: The script tqdm.exe is installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
|
||||||
|
" WARNING: The scripts torchfrtrace.exe and torchrun.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
|
||||||
|
" WARNING: The scripts hf.exe, huggingface-cli.exe and tiny-agents.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n",
|
||||||
|
" WARNING: The scripts accelerate-config.exe, accelerate-estimate-memory.exe, accelerate-launch.exe, accelerate-merge-weights.exe and accelerate.exe are installed in 'C:\\Users\\hp\\AppData\\Roaming\\Python\\Python314\\Scripts' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip install -q --upgrade bitsandbytes accelerate"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "ba0f9487",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import requests\n",
|
||||||
|
"import threading\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from IPython.display import Markdown, display, update_display\n",
|
||||||
|
"from openai import OpenAI\n",
|
||||||
|
"from huggingface_hub import login\n",
|
||||||
|
"from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, BitsAndBytesConfig\n",
|
||||||
|
"import torch\n",
|
||||||
|
"import gradio as gr"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "70cc41a4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"load_dotenv(override=True)\n",
|
||||||
|
"hf_token = os.getenv('HF_TOKEN')\n",
|
||||||
|
"login(hf_token, add_to_git_credential=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"id": "a197a483",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "PackageNotFoundError",
|
||||||
|
"evalue": "No package metadata was found for bitsandbytes",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||||
|
"\u001b[31mStopIteration\u001b[39m Traceback (most recent call last)",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:397\u001b[39m, in \u001b[36mDistribution.from_name\u001b[39m\u001b[34m(cls, name)\u001b[39m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m397\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mnext\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mdiscover\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 398\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n",
|
||||||
|
"\u001b[31mStopIteration\u001b[39m: ",
|
||||||
|
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||||
|
"\u001b[31mPackageNotFoundError\u001b[39m Traceback (most recent call last)",
|
||||||
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;43;01mclass\u001b[39;49;00m\u001b[38;5;250;43m \u001b[39;49m\u001b[34;43;01mGenerateMinute\u001b[39;49;00m\u001b[43m:\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[43maudio_model\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mopenai/whisper-medium.en\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mllm_model\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmeta-llama/Llama-3.2-3B-Instruct\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n",
|
||||||
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 4\u001b[39m, in \u001b[36mGenerateMinute\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 2\u001b[39m audio_model = \u001b[33m\"\u001b[39m\u001b[33mopenai/whisper-medium.en\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 3\u001b[39m llm_model = \u001b[33m\"\u001b[39m\u001b[33mmeta-llama/Llama-3.2-3B-Instruct\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m quant_config = \u001b[43mBitsAndBytesConfig\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mload_in_4bit\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mbnb_4bit_use_double_quant\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mbnb_4bit_compute_dtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtorch\u001b[49m\u001b[43m.\u001b[49m\u001b[43mbfloat16\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43mbnb_4bit_quant_type\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mnf4\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m 9\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, progress, audio_model=audio_model, llm_model=llm_model):\n\u001b[32m 12\u001b[39m \u001b[38;5;28mself\u001b[39m.progress = progress\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\hp\\projects\\gen-ai\\llm_engineering\\.venv\\Lib\\site-packages\\transformers\\utils\\quantization_config.py:510\u001b[39m, in \u001b[36mBitsAndBytesConfig.__init__\u001b[39m\u001b[34m(self, load_in_8bit, load_in_4bit, llm_int8_threshold, llm_int8_skip_modules, llm_int8_enable_fp32_cpu_offload, llm_int8_has_fp16_weight, bnb_4bit_compute_dtype, bnb_4bit_quant_type, bnb_4bit_use_double_quant, bnb_4bit_quant_storage, **kwargs)\u001b[39m\n\u001b[32m 507\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m kwargs:\n\u001b[32m 508\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mUnused kwargs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(kwargs.keys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. These kwargs are not used in \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.\u001b[34m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m510\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mpost_init\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\hp\\projects\\gen-ai\\llm_engineering\\.venv\\Lib\\site-packages\\transformers\\utils\\quantization_config.py:568\u001b[39m, in \u001b[36mBitsAndBytesConfig.post_init\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 565\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m.bnb_4bit_use_double_quant, \u001b[38;5;28mbool\u001b[39m):\n\u001b[32m 566\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mbnb_4bit_use_double_quant must be a boolean\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m568\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.load_in_4bit \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m version.parse(\u001b[43mimportlib\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m.\u001b[49m\u001b[43mversion\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mbitsandbytes\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m) >= version.parse(\n\u001b[32m 569\u001b[39m \u001b[33m\"\u001b[39m\u001b[33m0.39.0\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 570\u001b[39m ):\n\u001b[32m 571\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 572\u001b[39m \u001b[33m\"\u001b[39m\u001b[33m4 bit quantization requires bitsandbytes>=0.39.0 - please upgrade your bitsandbytes version\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 573\u001b[39m )\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:889\u001b[39m, in \u001b[36mversion\u001b[39m\u001b[34m(distribution_name)\u001b[39m\n\u001b[32m 882\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mversion\u001b[39m(distribution_name):\n\u001b[32m 883\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Get the version string for the named package.\u001b[39;00m\n\u001b[32m 884\u001b[39m \n\u001b[32m 885\u001b[39m \u001b[33;03m :param distribution_name: The name of the distribution package to query.\u001b[39;00m\n\u001b[32m 886\u001b[39m \u001b[33;03m :return: The version string for the package as defined in the package's\u001b[39;00m\n\u001b[32m 887\u001b[39m \u001b[33;03m \"Version\" metadata key.\u001b[39;00m\n\u001b[32m 888\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m889\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdistribution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m.version\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:862\u001b[39m, in \u001b[36mdistribution\u001b[39m\u001b[34m(distribution_name)\u001b[39m\n\u001b[32m 856\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdistribution\u001b[39m(distribution_name):\n\u001b[32m 857\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Get the ``Distribution`` instance for the named package.\u001b[39;00m\n\u001b[32m 858\u001b[39m \n\u001b[32m 859\u001b[39m \u001b[33;03m :param distribution_name: The name of the distribution package as a string.\u001b[39;00m\n\u001b[32m 860\u001b[39m \u001b[33;03m :return: A ``Distribution`` instance (or subclass thereof).\u001b[39;00m\n\u001b[32m 861\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m862\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDistribution\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_name\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdistribution_name\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\importlib\\metadata\\__init__.py:399\u001b[39m, in \u001b[36mDistribution.from_name\u001b[39m\u001b[34m(cls, name)\u001b[39m\n\u001b[32m 397\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mcls\u001b[39m.discover(name=name))\n\u001b[32m 398\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m399\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m PackageNotFoundError(name)\n",
|
||||||
|
"\u001b[31mPackageNotFoundError\u001b[39m: No package metadata was found for bitsandbytes"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"class GenerateMinute:\n",
|
||||||
|
" audio_model = \"openai/whisper-medium.en\"\n",
|
||||||
|
" llm_model = \"meta-llama/Llama-3.2-3B-Instruct\"\n",
|
||||||
|
" quant_config = BitsAndBytesConfig(\n",
|
||||||
|
" load_in_4bit=True,\n",
|
||||||
|
" bnb_4bit_use_double_quant=True,\n",
|
||||||
|
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||||
|
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" def __init__(self, progress, audio_model=audio_model, llm_model=llm_model):\n",
|
||||||
|
" self.progress = progress\n",
|
||||||
|
" self.audio_model = audio_model\n",
|
||||||
|
" self.llm_model = llm_model\n",
|
||||||
|
" self.tokenizer = AutoTokenizer.from_pretrained(self.llm_model)\n",
|
||||||
|
" self.tokenizer.pad_token = self.tokenizer.eos_token\n",
|
||||||
|
" self.model = AutoModelForCausalLM.from_pretrained(\n",
|
||||||
|
" self.llm_model, quantization_config=self.quant_config, device_map=\"auto\"\n",
|
||||||
|
" )\n",
|
||||||
|
" \n",
|
||||||
|
" def audio_to_text(self, audio_filepath):\n",
|
||||||
|
" self.progress(0.4, desc=\"Transcribing audio...\")\n",
|
||||||
|
" try:\n",
|
||||||
|
" if audio_filepath is None:\n",
|
||||||
|
" raise ValueError(\"No audio file provided\")\n",
|
||||||
|
" \n",
|
||||||
|
" if not os.path.exists(audio_filepath):\n",
|
||||||
|
" raise ValueError(\"Audio file not found: {file_path}\")\n",
|
||||||
|
"\n",
|
||||||
|
" pipe = pipeline(\n",
|
||||||
|
" \"automatic-speech-recognition\",\n",
|
||||||
|
" model=self.audio_model,\n",
|
||||||
|
" chunk_length_s=30,\n",
|
||||||
|
" device=\"cuda\",\n",
|
||||||
|
" return_timestamps=True\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" response = pipe(audio_filepath)\n",
|
||||||
|
"\n",
|
||||||
|
" text = response.strip()\n",
|
||||||
|
"\n",
|
||||||
|
" if not text:\n",
|
||||||
|
" raise ValueError(\"No speech detected in audio\")\n",
|
||||||
|
"\n",
|
||||||
|
" return text\n",
|
||||||
|
"\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" raise ValueError(e)\n",
|
||||||
|
"\n",
|
||||||
|
" def create_minute(self, transcription):\n",
|
||||||
|
" self.progress(0.7, desc=\"Generating meeting minutes...\")\n",
|
||||||
|
"\n",
|
||||||
|
" system_message = \"\"\"\n",
|
||||||
|
" You produce minutes of meetings from transcripts, with summary, key discussion points,\n",
|
||||||
|
" takeaways and action items with owners, in markdown format without code blocks.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" user_prompt = f\"\"\"\n",
|
||||||
|
" Below is an extract transcript of a Denver council meeting.\n",
|
||||||
|
" Please write minutes in markdown without code blocks, including:\n",
|
||||||
|
" - a summary with attendees, location and date\n",
|
||||||
|
" - discussion points\n",
|
||||||
|
" - takeaways\n",
|
||||||
|
" - action items with owners\n",
|
||||||
|
"\n",
|
||||||
|
" Transcription:\n",
|
||||||
|
" {transcription}\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" messages = [\n",
|
||||||
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||||
|
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||||
|
" ]\n",
|
||||||
|
"\n",
|
||||||
|
" inputs = self.tokenizer(messages, return_tensors=\"pt\").to(self.model.device)\n",
|
||||||
|
" streamer = TextIteratorStreamer(self.tokenizer)\n",
|
||||||
|
"\n",
|
||||||
|
" thread = threading.Thread(\n",
|
||||||
|
" target=self.model.generate, \n",
|
||||||
|
" kwargs={\n",
|
||||||
|
" \"input_ids\": inputs,\n",
|
||||||
|
" \"max_new_tokens\": 2000,\n",
|
||||||
|
" \"streamer\": streamer\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" thread.start()\n",
|
||||||
|
" started = False\n",
|
||||||
|
"\n",
|
||||||
|
" for new_text in streamer:\n",
|
||||||
|
" if not started:\n",
|
||||||
|
" if \"<|start_header_id|>assistant<|end_header_id|>\" in new_text:\n",
|
||||||
|
" started = True\n",
|
||||||
|
" new_text = new_text.split(\"<|start_header_id|>assistant<|end_header_id|>\")[-1].strip()\n",
|
||||||
|
"\n",
|
||||||
|
" if started:\n",
|
||||||
|
" if \"<|eot_id|>\" in new_text:\n",
|
||||||
|
" new_text = new_text.replace(\"<|eot_id|>\", \"\") # Remove the unwanted token\n",
|
||||||
|
"\n",
|
||||||
|
" if new_text.strip(): # Only yield non-empty chunks\n",
|
||||||
|
" yield new_text\n",
|
||||||
|
"\n",
|
||||||
|
" def process_meeting(self, audio_filepath, audio_model, llm_model ):\n",
|
||||||
|
" self.audio_model = audio_model\n",
|
||||||
|
" self.llm_model = llm_model\n",
|
||||||
|
" self.progress(0.2, desc=\"Processing audio file...\")\n",
|
||||||
|
" try:\n",
|
||||||
|
" transcription = self.audio_to_text(audio_filepath)\n",
|
||||||
|
" minute = self.create_minute(transcription)\n",
|
||||||
|
"\n",
|
||||||
|
" response = \"\"\n",
|
||||||
|
"\n",
|
||||||
|
" for chunk in minute:\n",
|
||||||
|
" response += chunk\n",
|
||||||
|
" yield response\n",
|
||||||
|
"\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" yield f\"Error processing meeting: {e}\""
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user