250 lines
10 KiB
Plaintext
250 lines
10 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "18b82c6b-10dc-4d94-b8dc-592ff011ce2b",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Meeting minutes creator\n",
|
|
"\n",
|
|
"In this colab, we make a meeting minutes program.\n",
|
|
"\n",
|
|
"It includes useful code to connect your Google Drive to your colab.\n",
|
|
"\n",
|
|
"Upload your own audio to make this work!!\n",
|
|
"\n",
|
|
"https://colab.research.google.com/drive/13wR4Blz3Ot_x0GOpflmvvFffm5XU3Kct?usp=sharing\n",
|
|
"\n",
|
|
"This should run nicely on a low-cost or free T4 box.\n",
|
|
"\n",
|
|
"## **Assignment:**\n",
|
|
"Put Everything into a nice Gradio UI (similar to last week)\n",
|
|
"Input file name of audio to process.\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e9289ba7-200c-43a9-b67a-c5ce826c9537",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"import re, requests, json, tempfile, gradio as gr, torch, os\n",
|
|
"from bs4 import BeautifulSoup\n",
|
|
"from IPython.display import Markdown, display, update_display\n",
|
|
"from google.colab import drive, userdata\n",
|
|
"from huggingface_hub import login\n",
|
|
"from openai import OpenAI\n",
|
|
"from pydub import AudioSegment\n",
|
|
"from pydub.playback import play\n",
|
|
"from io import BytesIO\n",
|
|
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
|
"\n",
|
|
"# Sign in to HuggingFace Hub\n",
|
|
"hf_token = userdata.get('HF_TOKEN')\n",
|
|
"login(hf_token, add_to_git_credential=True)\n",
|
|
"\n",
|
|
"# Sign in to OpenAI using Secrets in Colab\n",
|
|
"openai_api_key = userdata.get('OPENAI_API_KEY')\n",
|
|
"\n",
|
|
"# Initialize client\n",
|
|
"try:\n",
|
|
" openai = OpenAI(api_key=openai_api_key)\n",
|
|
"except Exception as e:\n",
|
|
" openai = None\n",
|
|
" print(f\"OpenAI client not initialized: {e}\")\n",
|
|
"\n",
|
|
"# Constants\n",
|
|
"AUDIO_MODEL = \"whisper-1\"\n",
|
|
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
|
|
"\n",
|
|
"# Google Drive\n",
|
|
"drive.mount(\"/content/drive\")\n",
|
|
"\n",
|
|
"# Local LLM setup (Llama 3.1)\n",
|
|
"try:\n",
|
|
" quant_config = BitsAndBytesConfig(\n",
|
|
" load_in_4bit=True,\n",
|
|
" bnb_4bit_use_double_quant=True,\n",
|
|
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
|
" bnb_4bit_quant_type=\"nf4\"\n",
|
|
" )\n",
|
|
" tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
|
|
"\n",
|
|
" # Set the pad token to the end-of-sequence token for generation\n",
|
|
" tokenizer.pad_token = tokenizer.eos_token\n",
|
|
"\n",
|
|
" model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
|
|
" # model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", torch_dtype=torch.bfloat16, quantization_config=quant_config, trust_remote_code=True)\n",
|
|
"\n",
|
|
" model.eval() # Set model to evaluation mode\n",
|
|
"except Exception as e:\n",
|
|
" # If the local model fails to load, set variables to None\n",
|
|
" model = None\n",
|
|
" tokenizer = None\n",
|
|
" print(f\"Failed to load local model: {e}\")\n",
|
|
"\n",
|
|
"# Updated function to handle audio transcription\n",
|
|
"def transcribe_audio(audio_file):\n",
|
|
" \"\"\"\n",
|
|
" Transcribes an audio file to text using OpenAI's Whisper model.\n",
|
|
" Handles both local file paths and mounted Google Drive file paths.\n",
|
|
" \"\"\"\n",
|
|
" if not openai:\n",
|
|
" return \"OpenAI client not initialized. Please check your API key.\"\n",
|
|
"\n",
|
|
" if audio_file is None:\n",
|
|
" return \"No audio input provided.\"\n",
|
|
"\n",
|
|
" # Check if the file exists before attempting to open it\n",
|
|
" # Construct the expected path in Google Drive\n",
|
|
" # If the input is from the microphone, it will be a temporary file path\n",
|
|
" # If the input is from the textbox, it could be a full path or just a filename\n",
|
|
" if audio_file.startswith(\"/content/drive/MyDrive/llms/\"):\n",
|
|
" file_path_to_open = audio_file\n",
|
|
" else:\n",
|
|
" # Assume it's either a local path or just a filename in MyDrive/llms\n",
|
|
" # We'll prioritize checking MyDrive/llms first\n",
|
|
" gdrive_path_attempt = os.path.join(\"/content/drive/MyDrive/llms\", os.path.basename(audio_file))\n",
|
|
" if os.path.exists(gdrive_path_attempt):\n",
|
|
" file_path_to_open = gdrive_path_attempt\n",
|
|
" elif os.path.exists(audio_file):\n",
|
|
" file_path_to_open = audio_file\n",
|
|
" else:\n",
|
|
" return f\"File not found: {audio_file}. Please ensure the file exists in your Google Drive at /content/drive/MyDrive/llms/ or is a valid local path.\"\n",
|
|
"\n",
|
|
"\n",
|
|
" if not os.path.exists(file_path_to_open):\n",
|
|
" return f\"File not found: {file_path_to_open}. Please ensure the file exists.\"\n",
|
|
"\n",
|
|
"\n",
|
|
" try:\n",
|
|
" with open(file_path_to_open, \"rb\") as f:\n",
|
|
" transcription = openai.audio.transcriptions.create(\n",
|
|
" model=AUDIO_MODEL,\n",
|
|
" file=f,\n",
|
|
" response_format=\"text\"\n",
|
|
" )\n",
|
|
" return transcription\n",
|
|
" except Exception as e:\n",
|
|
" return f\"An error occurred during transcription: {e}\"\n",
|
|
"\n",
|
|
"def generate_minutes(transcription):\n",
|
|
" \"\"\"\n",
|
|
" Generates meeting minutes from a transcript using a local Llama model.\n",
|
|
" Format the input, generate a response, and return the complete text string.\n",
|
|
" \"\"\"\n",
|
|
" # Check if the local model and tokenizer were successfully loaded\n",
|
|
" if not model or not tokenizer:\n",
|
|
" return \"Local Llama model not loaded. Check model paths and hardware compatibility.\"\n",
|
|
"\n",
|
|
" system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n",
|
|
" user_prompt = f\"Below is an extract transcript of an Audio recording. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n",
|
|
"\n",
|
|
" messages = [\n",
|
|
" {\"role\": \"system\", \"content\": system_message},\n",
|
|
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
|
" ]\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # Apply the chat template to format the messages for the model\n",
|
|
" inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
|
|
"\n",
|
|
" # Generate the output. max_new_tokens controls the length of the generated text.\n",
|
|
" outputs = model.generate(inputs, max_new_tokens=2000)\n",
|
|
"\n",
|
|
" # Decode only the new tokens generated by the model (not the input tokens) to a human-readable string\n",
|
|
" response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
|
"\n",
|
|
" # The model's response will contain the full conversation.\n",
|
|
" # Extract only the assistant's part!\n",
|
|
" assistant_start = \"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\"\n",
|
|
" if assistant_start in response_text:\n",
|
|
" response_text = response_text.split(assistant_start)[-1]\n",
|
|
"\n",
|
|
" return response_text\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" return f\"An error occurred during local model generation: {e}\"\n",
|
|
"\n",
|
|
"# Gradio UI components\n",
|
|
"with gr.Blocks() as ui:\n",
|
|
" gr.Markdown(\"# Meeting Minutes Generator\")\n",
|
|
" with gr.Row():\n",
|
|
" chatbot = gr.Chatbot(height=500, label=\"AI Assistant\")\n",
|
|
" with gr.Row():\n",
|
|
" entry = gr.Textbox(label=\"Provide the filename or path of the audio file to transcribe:\", scale=4)\n",
|
|
" submit_btn = gr.Button(\"Generate Minutes\", scale=1)\n",
|
|
" with gr.Row():\n",
|
|
" audio_input = gr.Audio(sources=[\"microphone\"], type=\"filepath\", label=\"Or speak to our AI Assistant to transcribe\", scale=4)\n",
|
|
" submit_audio_btn = gr.Button(\"Transcribe Audio\", scale=1)\n",
|
|
"\n",
|
|
" with gr.Row():\n",
|
|
" clear = gr.Button(\"Clear\")\n",
|
|
"\n",
|
|
" def process_file_and_generate(file_path, history):\n",
|
|
" transcribed_text = transcribe_audio(file_path)\n",
|
|
" minutes = generate_minutes(transcribed_text)\n",
|
|
" new_history = history + [[f\"Transcription of '{os.path.basename(file_path)}':\\n{transcribed_text}\", minutes]]\n",
|
|
" return new_history\n",
|
|
"\n",
|
|
" def process_audio_and_generate(audio_file, history):\n",
|
|
" transcribed_text = transcribe_audio(audio_file)\n",
|
|
" minutes = generate_minutes(transcribed_text)\n",
|
|
" new_history = history + [[f\"Transcription of your recording:\\n{transcribed_text}\", minutes]]\n",
|
|
" return new_history\n",
|
|
"\n",
|
|
"\n",
|
|
" submit_btn.click(\n",
|
|
" process_file_and_generate,\n",
|
|
" inputs=[entry, chatbot],\n",
|
|
" outputs=[chatbot],\n",
|
|
" queue=False\n",
|
|
" )\n",
|
|
"\n",
|
|
" submit_audio_btn.click(\n",
|
|
" process_audio_and_generate,\n",
|
|
" inputs=[audio_input, chatbot],\n",
|
|
" outputs=[chatbot],\n",
|
|
" queue=False\n",
|
|
" )\n",
|
|
"\n",
|
|
" clear.click(lambda: None, inputs=None, outputs=[chatbot], queue=False)\n",
|
|
"\n",
|
|
"ui.launch(inbrowser=True, debug=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cd2020d3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|