Merge pull request #697 from srbmisc/week3_day5_srb
Week 3 Day 5 meeting minutes generator using gradio.
This commit is contained in:
@@ -0,0 +1,167 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ce4a475",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Week 3, day 5, meeting minutes generator. Code for Google Collab."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2abc87f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124\n",
|
||||
"!pip install -q requests bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "83fa62ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
||||
"import torch\n",
|
||||
"import gradio as gr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e5ee86f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sign in to HuggingFace Hub\n",
|
||||
"\n",
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token, add_to_git_credential=True)\n",
|
||||
"\n",
|
||||
"# Sign in to OpenAI using Secrets in Colab\n",
|
||||
"\n",
|
||||
"openai_api_key = userdata.get('OPENAI_API_KEY')\n",
|
||||
"openai = OpenAI(api_key=openai_api_key)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AUDIO_MODEL = \"whisper-1\"\n",
|
||||
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0dd5a0d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_meeting_minutes(file_data):\n",
|
||||
" print(\"calling openai to generate transcript\")\n",
|
||||
" transcription = openai.audio.transcriptions.create(model=AUDIO_MODEL, file=file_data, response_format=\"text\")\n",
|
||||
" print(\"transcript generated\")\n",
|
||||
" print(transcription)\n",
|
||||
" system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n",
|
||||
" user_prompt = f\"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n",
|
||||
"\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
" quant_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||
" )\n",
|
||||
" print(\"calling tokenizer\")\n",
|
||||
" tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
|
||||
" tokenizer.pad_token = tokenizer.eos_token\n",
|
||||
" inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
|
||||
" streamer = TextStreamer(tokenizer)\n",
|
||||
" print(\"calling model\")\n",
|
||||
" model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
|
||||
" outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)\n",
|
||||
" return tokenizer.decode(outputs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a938b681",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def process_binary_file(file_data):\n",
|
||||
" \"\"\"\n",
|
||||
" This function receives the uploaded file content as a bytes object.\n",
|
||||
" You can then process the binary data as needed.\n",
|
||||
" \"\"\"\n",
|
||||
" if file_data:\n",
|
||||
" # Example: Get the size of the binary data\n",
|
||||
" print(f\"processing file\")\n",
|
||||
" with open(file_data.name, \"rb\") as f:\n",
|
||||
" return generate_meeting_minutes(f)\n",
|
||||
"\n",
|
||||
" return \"No file uploaded or file is empty.\"\n",
|
||||
"\n",
|
||||
"with gr.Blocks() as demo:\n",
|
||||
" gr.Markdown(\"Upload a file and its binary content will be processed.\")\n",
|
||||
"\n",
|
||||
" # Use gr.File with type=\"binary\"\n",
|
||||
" file_input = gr.File(type=\"filepath\", file_count=\"single\", file_types=[\".mp3\"], label=\"Upload a meeting file\")\n",
|
||||
"\n",
|
||||
" # Or use gr.UploadButton with type=\"binary\"\n",
|
||||
" # upload_button = gr.UploadButton(\"Click to Upload Binary File\", type=\"binary\")\n",
|
||||
"\n",
|
||||
" output_text = gr.Textbox(label=\"Processing Result\")\n",
|
||||
"\n",
|
||||
" # Link the file_input to the processing function\n",
|
||||
" file_input.upload(process_binary_file, inputs=file_input, outputs=output_text)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6baed239",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"demo.launch(debug=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user