Improvements to descriptions and links
This commit is contained in:
@@ -1,150 +1,160 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"gpuType": "T4",
|
||||
"authorship_tag": "ABX9TyPtAT7Yq5xd4vDcJEZtg69J"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "6gGKXU5RXORf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# getting the latest transformers first, since this will require a restart\n",
|
||||
"\n",
|
||||
"!pip install git+https://github.com/huggingface/transformers.git"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# getting the latest transformers first, since this will require a restart\n",
|
||||
"\n",
|
||||
"!pip install git+https://github.com/huggingface/transformers.git"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "6gGKXU5RXORf"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import torch\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"from transformers import AutoProcessor, AutoModelForImageTextToText\n",
|
||||
"from google.colab import files"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "yCRrF4aiXPPo"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# logging in to HF\n",
|
||||
"\n",
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "AAlOQuCbXcrv"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "_RRVc2j2Vun-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# this will start an input prompt for uploading local files\n",
|
||||
"\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"print(uploaded.keys()) # this will look sth like dict_keys([\"note2.jpg\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"'''\n",
|
||||
"ChatGPT and Gemini explain the following part roughly like so:\n",
|
||||
"The string contained in image_path is the key of the entry in the dictionary of uploaded files (see box above).\n",
|
||||
"The value to that key contains the image in binary format.\n",
|
||||
"The \"with open(image_path, \"wb\") as f\" part means: Create a new file \"note2.jpg\" on the server, and write to it in binary mode (\"wb\").\n",
|
||||
"f.write(image) writes the binary image to that new file. \"note2.jpg\" aka image_path will now contain the image.\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"image_path = \"note2.jpg\" # update this string depending on the printout in the previous cell!\n",
|
||||
"image = uploaded[image_path]\n",
|
||||
"with open(image_path, \"wb\") as f:\n",
|
||||
" f.write(image)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "V_UAuSSkXBKh"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# from HF model instructions\n",
|
||||
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
||||
"model = AutoModelForImageTextToText.from_pretrained(\"stepfun-ai/GOT-OCR-2.0-hf\", device_map=device)\n",
|
||||
"processor = AutoProcessor.from_pretrained(\"stepfun-ai/GOT-OCR-2.0-hf\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "AiFP-mQtXrpV"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# also from HF documentation about this model, see https://huggingface.co/stepfun-ai/GOT-OCR-2.0-hf\n",
|
||||
"\n",
|
||||
"image = image_path\n",
|
||||
"inputs = processor(image, return_tensors=\"pt\").to(device)\n",
|
||||
"\n",
|
||||
"ocr = model.generate(\n",
|
||||
" **inputs,\n",
|
||||
" do_sample=False,\n",
|
||||
" tokenizer=processor.tokenizer,\n",
|
||||
" stop_strings=\"<|im_end|>\",\n",
|
||||
" max_new_tokens=4096,\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "7Adr8HB_YNf5"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# prints out the recognized text. This can read my handwriting pretty well! And it works super quick on the free T4 GPU server here.\n",
|
||||
"\n",
|
||||
"print(processor.decode(ocr[0, inputs[\"input_ids\"].shape[1]:], skip_special_tokens=True))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "nRsRUIIuYdJ9"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "yCRrF4aiXPPo"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import torch\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"from transformers import AutoProcessor, AutoModelForImageTextToText\n",
|
||||
"from google.colab import files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "AAlOQuCbXcrv"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# logging in to HF\n",
|
||||
"\n",
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "_RRVc2j2Vun-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# this will start an input prompt for uploading local files\n",
|
||||
"\n",
|
||||
"uploaded = files.upload()\n",
|
||||
"print(uploaded.keys()) # this will look sth like dict_keys([\"note2.jpg\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "V_UAuSSkXBKh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"ChatGPT and Gemini explain the following part roughly like so:\n",
|
||||
"The string contained in image_path is the key of the entry in the dictionary of uploaded files (see box above).\n",
|
||||
"The value to that key contains the image in binary format.\n",
|
||||
"The \"with open(image_path, \"wb\") as f\" part means: Create a new file \"note2.jpg\" on the server, and write to it in binary mode (\"wb\").\n",
|
||||
"f.write(image) writes the binary image to that new file. \"note2.jpg\" aka image_path will now contain the image.\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"image_path = \"note2.jpg\" # update this string depending on the printout in the previous cell!\n",
|
||||
"image = uploaded[image_path]\n",
|
||||
"with open(image_path, \"wb\") as f:\n",
|
||||
" f.write(image)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "AiFP-mQtXrpV"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from HF model instructions\n",
|
||||
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
||||
"model = AutoModelForImageTextToText.from_pretrained(\"stepfun-ai/GOT-OCR-2.0-hf\", device_map=device)\n",
|
||||
"processor = AutoProcessor.from_pretrained(\"stepfun-ai/GOT-OCR-2.0-hf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "7Adr8HB_YNf5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# also from HF documentation about this model, see https://huggingface.co/stepfun-ai/GOT-OCR-2.0-hf\n",
|
||||
"\n",
|
||||
"image = image_path\n",
|
||||
"inputs = processor(image, return_tensors=\"pt\").to(device)\n",
|
||||
"\n",
|
||||
"ocr = model.generate(\n",
|
||||
" **inputs,\n",
|
||||
" do_sample=False,\n",
|
||||
" tokenizer=processor.tokenizer,\n",
|
||||
" stop_strings=\"<|im_end|>\",\n",
|
||||
" max_new_tokens=4096,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "nRsRUIIuYdJ9"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# prints out the recognized text. This can read my handwriting pretty well! And it works super quick on the free T4 GPU server here.\n",
|
||||
"\n",
|
||||
"print(processor.decode(ocr[0, inputs[\"input_ids\"].shape[1]:], skip_special_tokens=True))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"authorship_tag": "ABX9TyPtAT7Yq5xd4vDcJEZtg69J",
|
||||
"gpuType": "T4",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,302 +1,312 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"gpuType": "T4"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "It89APiAtTUF"
|
||||
},
|
||||
"source": [
|
||||
"# Create meeting minutes from an Audio file\n",
|
||||
"\n",
|
||||
"I downloaded some Denver City Council meeting minutes and selected a portion of the meeting for us to transcribe. You can download it here: \n",
|
||||
"https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing\n",
|
||||
"\n",
|
||||
"If you'd rather work with the original data, the HuggingFace dataset is [here](https://huggingface.co/datasets/huuuyeah/meetingbank) and the audio can be downloaded [here](https://huggingface.co/datasets/huuuyeah/MeetingBank_Audio/tree/main).\n",
|
||||
"\n",
|
||||
"The goal of this product is to use the Audio to generate meeting minutes, including actions.\n",
|
||||
"\n",
|
||||
"For this project, you can either use the Denver meeting minutes, or you can record something of your own!\n",
|
||||
"\n",
|
||||
"## Please note:\n",
|
||||
"\n",
|
||||
"When you run the pip installs in the first cell below, you might get this error - it can be safely ignored - it sounds quite severe, but it doesn't seem to affect anything else in this project!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"> ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
||||
"gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Create meeting minutes from an Audio file\n",
|
||||
"\n",
|
||||
"I downloaded some Denver City Council meeting minutes and selected a portion of the meeting for us to transcribe. You can download it here: \n",
|
||||
"https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing\n",
|
||||
"\n",
|
||||
"If you'd rather work with the original data, the HuggingFace dataset is [here](https://huggingface.co/datasets/huuuyeah/meetingbank) and the audio can be downloaded [here](https://huggingface.co/datasets/huuuyeah/MeetingBank_Audio/tree/main).\n",
|
||||
"\n",
|
||||
"The goal of this product is to use the Audio to generate meeting minutes, including actions.\n",
|
||||
"\n",
|
||||
"For this project, you can either use the Denver meeting minutes, or you can record something of your own!\n",
|
||||
"\n",
|
||||
"## Please note:\n",
|
||||
"\n",
|
||||
"When you run the pip installs in the first cell below, you might get this error - it can be safely ignored - it sounds quite severe, but it doesn't seem to affect anything else in this project!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"> ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
||||
"gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "It89APiAtTUF"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2 gradio"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "f2vvgnFpHpID"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "FW8nl3XRFrz0"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from google.colab import drive\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
||||
"import torch\n",
|
||||
"import gradio as gr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Constants\n",
|
||||
"\n",
|
||||
"AUDIO_MODEL = \"whisper-1\"\n",
|
||||
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "q3D1_T0uG_Qh"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# New capability - connect this Colab to my Google Drive\n",
|
||||
"# See immediately below this for instructions to obtain denver_extract.mp3\n",
|
||||
"\n",
|
||||
"drive.mount(\"/content/drive\")\n",
|
||||
"audio_filename = \"/content/drive/MyDrive/llms/denver_extract.mp3\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Es9GkQ0FGCMt"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Download denver_extract.mp3\n",
|
||||
"\n",
|
||||
"You can either use the same file as me, the extract from Denver city council minutes, or you can try your own..\n",
|
||||
"\n",
|
||||
"If you want to use the same as me, then please download my extract here, and put this on your Google Drive: \n",
|
||||
"https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "HTl3mcjyzIEE"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Sign in to HuggingFace Hub\n",
|
||||
"\n",
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "xYW8kQYtF-3L"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Sign in to OpenAI using Secrets in Colab\n",
|
||||
"\n",
|
||||
"openai_api_key = userdata.get('OPENAI_API_KEY')\n",
|
||||
"openai = OpenAI(api_key=openai_api_key)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "qP6OB2OeGC2C"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Initialize Llama model and tokenizer\n",
|
||||
"\n",
|
||||
"quant_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
|
||||
"tokenizer.pad_token = tokenizer.eos_token\n",
|
||||
"\n",
|
||||
"model = AutoModelForCausalLM.from_pretrained(\n",
|
||||
" LLAMA,\n",
|
||||
" device_map=\"auto\",\n",
|
||||
" quantization_config=quant_config\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "hgQBeIYUyaqj"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Generate meeting minutes\n",
|
||||
"\n",
|
||||
"def generate_minutes(transcription, model, tokenizer, progress=gr.Progress()):\n",
|
||||
" progress(0.6, desc=\"Generating meeting minutes from transcript...\")\n",
|
||||
"\n",
|
||||
" system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n",
|
||||
" user_prompt = f\"Below is an extract transcript of a meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n",
|
||||
"\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
|
||||
" outputs = model.generate(inputs, max_new_tokens=2000)\n",
|
||||
" response = tokenizer.decode(outputs[0])\n",
|
||||
"\n",
|
||||
" # Clean up the response, keep only the minutes\n",
|
||||
" progress(0.9, desc=\"Cleaning and formatting minutes...\")\n",
|
||||
" response = response.split(\"<|end_header_id|>\")[-1].strip().replace(\"<|eot_id|>\",\"\")\n",
|
||||
"\n",
|
||||
" return response"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "u9aFA7tjy3Ri"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Transcribe the uploaded audio file using OpenAI's Whisper model\n",
|
||||
"\n",
|
||||
"def transcribe_audio(audio_path, progress=gr.Progress()):\n",
|
||||
" progress(0.3, desc=\"Creating transcript from audio...\")\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" with open(audio_path, \"rb\") as audio_file:\n",
|
||||
" transcription = openai.audio.transcriptions.create(\n",
|
||||
" model=AUDIO_MODEL,\n",
|
||||
" file=audio_file,\n",
|
||||
" response_format=\"text\"\n",
|
||||
" )\n",
|
||||
" return transcription\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error during transcription: {str(e)}\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "OEuqR90Vy4AZ"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Process the uploaded audio file, transcribe it, and generate meeting minutes\n",
|
||||
"\n",
|
||||
"def process_upload(audio_file, progress=gr.Progress()):\n",
|
||||
" progress(0.1, desc=\"Starting process...\")\n",
|
||||
"\n",
|
||||
" if audio_file is None:\n",
|
||||
" return \"Please upload an audio file.\"\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Check file format\n",
|
||||
" if not str(audio_file).lower().endswith('.mp3'):\n",
|
||||
" return \"Please upload an MP3 file.\"\n",
|
||||
"\n",
|
||||
" # Get transcription\n",
|
||||
" transcription = transcribe_audio(audio_file)\n",
|
||||
" if transcription.startswith(\"Error\"):\n",
|
||||
" return transcription\n",
|
||||
"\n",
|
||||
" # Generate minutes\n",
|
||||
" minutes = generate_minutes(transcription, model, tokenizer)\n",
|
||||
" progress(1.0, desc=\"Process complete!\")\n",
|
||||
" return minutes\n",
|
||||
"\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error processing file: {str(e)}\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "lmdsy2iDy5d7"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Create Gradio interface\n",
|
||||
"\n",
|
||||
"interface = gr.Interface(\n",
|
||||
" fn=process_upload,\n",
|
||||
" inputs=gr.Audio(type=\"filepath\", label=\"Upload MP3 File\", format=\"mp3\"),\n",
|
||||
" outputs=gr.Markdown(label=\"Meeting Minutes\", min_height=60),\n",
|
||||
" title=\"Meeting Minutes Generator\",\n",
|
||||
" description=\"Upload an MP3 recording of your meeting to get AI-generated meeting minutes. This process may take a few minutes.\",\n",
|
||||
" flagging_mode=\"never\"\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "k2U2bWtey7Yo"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Launch Gradio interface\n",
|
||||
"\n",
|
||||
"interface.launch()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "X3JbzRNRy9oG"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "f2vvgnFpHpID"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2 gradio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "FW8nl3XRFrz0"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from google.colab import drive\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
||||
"import torch\n",
|
||||
"import gradio as gr"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "q3D1_T0uG_Qh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Constants\n",
|
||||
"\n",
|
||||
"AUDIO_MODEL = \"whisper-1\"\n",
|
||||
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Es9GkQ0FGCMt"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# New capability - connect this Colab to my Google Drive\n",
|
||||
"# See immediately below this for instructions to obtain denver_extract.mp3\n",
|
||||
"\n",
|
||||
"drive.mount(\"/content/drive\")\n",
|
||||
"audio_filename = \"/content/drive/MyDrive/llms/denver_extract.mp3\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "HTl3mcjyzIEE"
|
||||
},
|
||||
"source": [
|
||||
"# Download denver_extract.mp3\n",
|
||||
"\n",
|
||||
"You can either use the same file as me, the extract from Denver city council minutes, or you can try your own..\n",
|
||||
"\n",
|
||||
"If you want to use the same as me, then please download my extract here, and put this on your Google Drive: \n",
|
||||
"https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "xYW8kQYtF-3L"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sign in to HuggingFace Hub\n",
|
||||
"\n",
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "qP6OB2OeGC2C"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sign in to OpenAI using Secrets in Colab\n",
|
||||
"\n",
|
||||
"openai_api_key = userdata.get('OPENAI_API_KEY')\n",
|
||||
"openai = OpenAI(api_key=openai_api_key)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "hgQBeIYUyaqj"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize Llama model and tokenizer\n",
|
||||
"\n",
|
||||
"quant_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
|
||||
"tokenizer.pad_token = tokenizer.eos_token\n",
|
||||
"\n",
|
||||
"model = AutoModelForCausalLM.from_pretrained(\n",
|
||||
" LLAMA,\n",
|
||||
" device_map=\"auto\",\n",
|
||||
" quantization_config=quant_config\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "u9aFA7tjy3Ri"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Generate meeting minutes\n",
|
||||
"\n",
|
||||
"def generate_minutes(transcription, model, tokenizer, progress=gr.Progress()):\n",
|
||||
" progress(0.6, desc=\"Generating meeting minutes from transcript...\")\n",
|
||||
"\n",
|
||||
" system_message = \"You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown.\"\n",
|
||||
" user_prompt = f\"Below is an extract transcript of a meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\\n{transcription}\"\n",
|
||||
"\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
|
||||
" outputs = model.generate(inputs, max_new_tokens=2000)\n",
|
||||
" response = tokenizer.decode(outputs[0])\n",
|
||||
"\n",
|
||||
" # Clean up the response, keep only the minutes\n",
|
||||
" progress(0.9, desc=\"Cleaning and formatting minutes...\")\n",
|
||||
" response = response.split(\"<|end_header_id|>\")[-1].strip().replace(\"<|eot_id|>\",\"\")\n",
|
||||
"\n",
|
||||
" return response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "OEuqR90Vy4AZ"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Transcribe the uploaded audio file using OpenAI's Whisper model\n",
|
||||
"\n",
|
||||
"def transcribe_audio(audio_path, progress=gr.Progress()):\n",
|
||||
" progress(0.3, desc=\"Creating transcript from audio...\")\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" with open(audio_path, \"rb\") as audio_file:\n",
|
||||
" transcription = openai.audio.transcriptions.create(\n",
|
||||
" model=AUDIO_MODEL,\n",
|
||||
" file=audio_file,\n",
|
||||
" response_format=\"text\"\n",
|
||||
" )\n",
|
||||
" return transcription\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error during transcription: {str(e)}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "lmdsy2iDy5d7"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Process the uploaded audio file, transcribe it, and generate meeting minutes\n",
|
||||
"\n",
|
||||
"def process_upload(audio_file, progress=gr.Progress()):\n",
|
||||
" progress(0.1, desc=\"Starting process...\")\n",
|
||||
"\n",
|
||||
" if audio_file is None:\n",
|
||||
" return \"Please upload an audio file.\"\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Check file format\n",
|
||||
" if not str(audio_file).lower().endswith('.mp3'):\n",
|
||||
" return \"Please upload an MP3 file.\"\n",
|
||||
"\n",
|
||||
" # Get transcription\n",
|
||||
" transcription = transcribe_audio(audio_file)\n",
|
||||
" if transcription.startswith(\"Error\"):\n",
|
||||
" return transcription\n",
|
||||
"\n",
|
||||
" # Generate minutes\n",
|
||||
" minutes = generate_minutes(transcription, model, tokenizer)\n",
|
||||
" progress(1.0, desc=\"Process complete!\")\n",
|
||||
" return minutes\n",
|
||||
"\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error processing file: {str(e)}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "k2U2bWtey7Yo"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create Gradio interface\n",
|
||||
"\n",
|
||||
"interface = gr.Interface(\n",
|
||||
" fn=process_upload,\n",
|
||||
" inputs=gr.Audio(type=\"filepath\", label=\"Upload MP3 File\", format=\"mp3\"),\n",
|
||||
" outputs=gr.Markdown(label=\"Meeting Minutes\", min_height=60),\n",
|
||||
" title=\"Meeting Minutes Generator\",\n",
|
||||
" description=\"Upload an MP3 recording of your meeting to get AI-generated meeting minutes. This process may take a few minutes.\",\n",
|
||||
" flagging_mode=\"never\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "X3JbzRNRy9oG"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Launch Gradio interface\n",
|
||||
"\n",
|
||||
"interface.launch()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"gpuType": "T4",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,322 +1,332 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"gpuType": "T4",
|
||||
"authorship_tag": "ABX9TyPxJzufoQPtui+nhl1J1xiR"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "yqlQTsxNdKrN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2 gradio"
|
||||
]
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "yqlQTsxNdKrN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai httpx==0.27.2 gradio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from google.colab import drive\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
||||
"import torch\n",
|
||||
"import gradio as gr\n",
|
||||
"import re"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "eyfvQrLxdkGT"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# one can always add more models, of course\n",
|
||||
"\n",
|
||||
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
|
||||
"OPENAI_MODEL = \"gpt-4o-mini\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "WW-cSZk7dnp6"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token, add_to_git_credential=True)\n",
|
||||
"openai_api_key = userdata.get('OPENAI_API_KEY')\n",
|
||||
"openai = OpenAI(api_key=openai_api_key)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "XG7Iam6Rdw8F"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"force_dark_mode = \"\"\"\n",
|
||||
"function refresh() {\n",
|
||||
" const url = new URL(window.location);\n",
|
||||
" if (url.searchParams.get('__theme') !== 'dark') {\n",
|
||||
" url.searchParams.set('__theme', 'dark');\n",
|
||||
" window.location.href = url.href;\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\"\"\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Ov7WSdx9dzSt"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def dataset_generator(model, nature, shots, volume, language):\n",
|
||||
"\n",
|
||||
" examples = \"Instruction: 'Make a random sentence.'\\nAnswer: 'When I got home last night, I couldn't believe my eyes: All the pineapples had been removed from the pizza.'\"\n",
|
||||
" system_message = \"You are a random sentence generator. Generate 10 diverse English sentences.\"\n",
|
||||
" user_prompt = f\"Generate 10 random English sentences, like so:\\n{examples}\"\n",
|
||||
" sentences = \"\"\n",
|
||||
"\n",
|
||||
" if language == \"English\":\n",
|
||||
"\n",
|
||||
" for shot in list(shots.keys()):\n",
|
||||
" examples += f\"\\nExample instruction: '{shot}'\\nExample answer: '{shots[shot]}'\\n\"\n",
|
||||
"\n",
|
||||
" system_message = f\"You are a state-of-the art linguistic dataset compiler. You are given a 'Type' of sentence to create. \\\n",
|
||||
"Within the bounds of that type, create {volume} diverse sentences with differing structures and lengths. Make the sentences plausible, \\\n",
|
||||
"but be creative in filling them with random concrete information, names, and data. Here are some examples for how to go about that:\\n{examples}\\n\\\n",
|
||||
"Just output one sentence per line. Do not comment or format yor output in any way, shape, or form.\"\n",
|
||||
"\n",
|
||||
" user_prompt = f\"Generate {volume} English sentences of the following Type: {nature}. Just output one sentence per line. \\\n",
|
||||
"Do not comment or format yor output in any way, shape, or form.\"\n",
|
||||
"\n",
|
||||
" elif language == \"German\":\n",
|
||||
"\n",
|
||||
" for shot in list(shots.keys()):\n",
|
||||
" examples += f\"\\nAnweisung: '{shot}'\\nAntwort: '{shots[shot]}'\\n\"\n",
|
||||
"\n",
|
||||
" system_message = f\"Du bist ein weltklasse Datensatz-Sammler für Sprachdaten. Du erhältst einen 'Typ' von Sätzen, die du erstellen sollst. \\\n",
|
||||
"Im Rahmen dieses Typs, generiere {volume} untereinander verschiedene Sätze mit unterschiedlichen Satzlängen und -strukturen. Mache die Beispielsätze \\\n",
|
||||
"plausibel, aber fülle sie kreativ mit willkürlichen Informationen, Namen, und Daten aller Art. Hier sind ein paar Beispiel, wie du vorgehen sollst:\\n{examples}\\n\\\n",
|
||||
"Gib einfach einen Satz pro Zeile aus. Kommentiere oder formatiere deine Antwort in keinster Weise.\"\n",
|
||||
"\n",
|
||||
" user_prompt = f\"Generiere {volume} deutsche Sätze des folgenden Typs: {nature}. Gib einfach einen Satz pro Zeile aus. \\\n",
|
||||
"Kommentiere oder formatiere deine Antwort in keiner Weise.\"\n",
|
||||
"\n",
|
||||
" elif language == \"French\":\n",
|
||||
"\n",
|
||||
" for shot in list(shots.keys()):\n",
|
||||
" examples += f\"\\nConsigne: '{shot}'\\nRéponse: '{shots[shot]}'\\n\"\n",
|
||||
"\n",
|
||||
" system_message = f\"Tu es un outil linguistique de pointe, à savoir, un genérateur de données linguistiques. Tu seras assigné un 'Type' de phrases à créer. \\\n",
|
||||
"Dans le cadre de ce type-là, crée {volume} phrases diverses, avec des structures et longueurs qui varient. Génère des phrases qui soient plausibles, \\\n",
|
||||
"mais sois créatif, et sers-toi de données, noms, et informations aléatoires pour rendre les phrases plus naturelles. Voici quelques examples comment faire:\\n{examples}\\n\\\n",
|
||||
"Sors une seule phrase par ligne. Ne formatte ni commente ta réponse en aucune manière que ce soit.\"\n",
|
||||
"\n",
|
||||
" user_prompt = f\"S'il te plaît, crée {volume} phrases en français du Type suivant: {nature}. Sors une seule phrase par ligne. \\\n",
|
||||
"Ne formatte ni commente ta réponse en aucune manière que ce soit.\"\n",
|
||||
"\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" if model == \"Llama\":\n",
|
||||
"\n",
|
||||
" quant_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
|
||||
" tokenizer.pad_token = tokenizer.eos_token\n",
|
||||
" inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
|
||||
" streamer = TextStreamer(tokenizer)\n",
|
||||
" model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
|
||||
" outputs = model.generate(inputs, max_new_tokens=10000)\n",
|
||||
"\n",
|
||||
" response = tokenizer.decode(outputs[0])\n",
|
||||
" sentences = list(re.finditer(\"(?:<\\|end_header_id\\|>)([^<]+)(?:<\\|eot_id\\|>)\", str(response), re.DOTALL))[-1].group(1)\n",
|
||||
"\n",
|
||||
" elif model == \"OpenAI\":\n",
|
||||
" response = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages)\n",
|
||||
" sentences = response.choices[0].message.content\n",
|
||||
"\n",
|
||||
" return sentences"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "bEF8w_Mdd2Nb"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"global data\n",
|
||||
"data = \"\"\n",
|
||||
"\n",
|
||||
"with gr.Blocks(\n",
|
||||
" css=\"\"\"\n",
|
||||
" .red-button {\n",
|
||||
" background-color: darkred !important;\n",
|
||||
" border-color: red !important;\n",
|
||||
" }\n",
|
||||
" .blue-button {\n",
|
||||
" background-color: darkblue !important;\n",
|
||||
" border-color: blue !important;\n",
|
||||
" }\n",
|
||||
" .green-button {\n",
|
||||
" background-color: green !important;\n",
|
||||
" border-color: green !important;\n",
|
||||
" }\n",
|
||||
" \"\"\"\n",
|
||||
") as view:\n",
|
||||
" with gr.Row():\n",
|
||||
" title = gr.HTML(\"<h1><big>D</big>ataset Generator <small>PLUS</small></h1><h2>for English, German, and French</h2>\")\n",
|
||||
" subtitle = gr.HTML(\"<h3>Instructions:</h3><ol><li>Pick the language</li>\\\n",
|
||||
"<li>Select a model</li><li>Indicate how many sentences you need</li>\\\n",
|
||||
"<li>Describe the type of sentence you're looking for</li><li>Give up to three examples of the desired output sentence, and describe each of them briefly</li>\\\n",
|
||||
"<li>Hit <q>Create Dataset</q></li>\\\n",
|
||||
"<li>Save the output (.txt) to your Google Drive</li>\")\n",
|
||||
" with gr.Row():\n",
|
||||
" language_choice = gr.Dropdown(choices=[\"English\", \"German\", \"French\"], label=\"Select language\", value=\"English\", interactive=True)\n",
|
||||
" model_choice = gr.Dropdown(choices=[\"Llama\", \"OpenAI\"], label=\"Select model\", value=\"Llama\", interactive=True)\n",
|
||||
" volume = gr.Textbox(label=\"Required number of sentences\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" typeInput = gr.Textbox(label=\"Short description of the kind of sentence you need\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" sentence_1 = gr.Textbox(label=\"Example sentence 1\", interactive=True)\n",
|
||||
" instruction_1 = gr.Textbox(label=\"Description\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" sentence_2 = gr.Textbox(label=\"Example sentence 2\", interactive=True)\n",
|
||||
" instruction_2 = gr.Textbox(label=\"Description\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" sentence_3 = gr.Textbox(label=\"Example sentence 3\", interactive=True)\n",
|
||||
" instruction_3 = gr.Textbox(label=\"Description\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" liveSentences = gr.Markdown(\n",
|
||||
" value='<div style=\"color: #999; padding: 10px;\">Your sentences will be displayed here …</div>',\n",
|
||||
" label=\"Generated sentences:\",\n",
|
||||
" min_height=60,\n",
|
||||
" max_height=200\n",
|
||||
" )\n",
|
||||
" with gr.Row():\n",
|
||||
" generate = gr.Button(value=\"Generate sentences\", elem_classes=\"blue-button\")\n",
|
||||
" with gr.Row():\n",
|
||||
" clear = gr.Button(value=\"Clear everything\", elem_classes=\"red-button\")\n",
|
||||
" with gr.Row():\n",
|
||||
" outputPath = gr.Textbox(label=\"Specify the desired name and location on your Google Drive for the sentences (plain text) to be saved\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" save = gr.Button(value=\"Save generated data\", elem_classes=\"blue-button\")\n",
|
||||
"\n",
|
||||
" def generateSentences(typeInput, s1, i1, s2, i2, s3, i3, volume, language, model):\n",
|
||||
" global data\n",
|
||||
" nature = \"\"\n",
|
||||
" shots = {}\n",
|
||||
" amount = int(volume) if re.search(\"^[0-9]+$\", volume) is not None else 10\n",
|
||||
"\n",
|
||||
" if typeInput != None:\n",
|
||||
" nature = typeInput\n",
|
||||
" else:\n",
|
||||
" nature = \"Random sentences of mixed nature\"\n",
|
||||
"\n",
|
||||
" if s1 != None:\n",
|
||||
" if i1 != None:\n",
|
||||
" shots[i1] = s1\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = s1\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = \"Paul, waking up out of his half-drunken haze, clearly couldn't tell left from right and ran right into the door.\"\n",
|
||||
"\n",
|
||||
" if s2 != None:\n",
|
||||
" if i2 != None:\n",
|
||||
" shots[i2] = s2\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = s2\n",
|
||||
"\n",
|
||||
" if s3 != None:\n",
|
||||
" if i3 != None:\n",
|
||||
" shots[i3] = s3\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = s3\n",
|
||||
"\n",
|
||||
" sentences = dataset_generator(model, nature, shots, amount, language)\n",
|
||||
" data = sentences\n",
|
||||
"\n",
|
||||
" return sentences\n",
|
||||
"\n",
|
||||
" def saveData(path):\n",
|
||||
" global data\n",
|
||||
" drive.mount(\"/content/drive\")\n",
|
||||
"\n",
|
||||
" dir_path = os.path.dirname(\"/content/drive/MyDrive/\" + path)\n",
|
||||
"\n",
|
||||
" if not os.path.exists(dir_path):\n",
|
||||
" os.makedirs(dir_path)\n",
|
||||
"\n",
|
||||
" with open(\"/content/drive/MyDrive/\" + path, \"w\", encoding=\"utf-8\") as f:\n",
|
||||
" f.write(data)\n",
|
||||
"\n",
|
||||
" generate.click(generateSentences, inputs=[typeInput, sentence_1, instruction_1, sentence_2, instruction_2, sentence_3, instruction_3, volume, language_choice, model_choice], outputs=liveSentences)\n",
|
||||
" clear.click(\n",
|
||||
" lambda: [\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value='<div style=\"color: #999; padding: 10px;\">Your sentences will be displayed here …</div>'),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"Save generated data\", elem_classes=\"blue-button\")],\n",
|
||||
" None,\n",
|
||||
" [volume, typeInput, sentence_1, instruction_1, sentence_2, instruction_2,\n",
|
||||
" sentence_3, instruction_3, liveSentences, outputPath, save],\n",
|
||||
" queue=False\n",
|
||||
" )\n",
|
||||
" save.click(saveData, inputs=outputPath, outputs=None).then(lambda: gr.update(value=\"Your data has been saved\", elem_classes=\"green-button\"), [], [save])\n",
|
||||
"\n",
|
||||
"view.launch(share=True) #, debug=True)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "VRKdu0fEt8mg"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "eyfvQrLxdkGT"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from google.colab import drive\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig\n",
|
||||
"import torch\n",
|
||||
"import gradio as gr\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "WW-cSZk7dnp6"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# one can always add more models, of course\n",
|
||||
"\n",
|
||||
"LLAMA = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
|
||||
"OPENAI_MODEL = \"gpt-4o-mini\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "XG7Iam6Rdw8F"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token, add_to_git_credential=True)\n",
|
||||
"openai_api_key = userdata.get('OPENAI_API_KEY')\n",
|
||||
"openai = OpenAI(api_key=openai_api_key)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Ov7WSdx9dzSt"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"force_dark_mode = \"\"\"\n",
|
||||
"function refresh() {\n",
|
||||
" const url = new URL(window.location);\n",
|
||||
" if (url.searchParams.get('__theme') !== 'dark') {\n",
|
||||
" url.searchParams.set('__theme', 'dark');\n",
|
||||
" window.location.href = url.href;\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "bEF8w_Mdd2Nb"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def dataset_generator(model, nature, shots, volume, language):\n",
|
||||
"\n",
|
||||
" examples = \"Instruction: 'Make a random sentence.'\\nAnswer: 'When I got home last night, I couldn't believe my eyes: All the pineapples had been removed from the pizza.'\"\n",
|
||||
" system_message = \"You are a random sentence generator. Generate 10 diverse English sentences.\"\n",
|
||||
" user_prompt = f\"Generate 10 random English sentences, like so:\\n{examples}\"\n",
|
||||
" sentences = \"\"\n",
|
||||
"\n",
|
||||
" if language == \"English\":\n",
|
||||
"\n",
|
||||
" for shot in list(shots.keys()):\n",
|
||||
" examples += f\"\\nExample instruction: '{shot}'\\nExample answer: '{shots[shot]}'\\n\"\n",
|
||||
"\n",
|
||||
" system_message = f\"You are a state-of-the art linguistic dataset compiler. You are given a 'Type' of sentence to create. \\\n",
|
||||
"Within the bounds of that type, create {volume} diverse sentences with differing structures and lengths. Make the sentences plausible, \\\n",
|
||||
"but be creative in filling them with random concrete information, names, and data. Here are some examples for how to go about that:\\n{examples}\\n\\\n",
|
||||
"Just output one sentence per line. Do not comment or format yor output in any way, shape, or form.\"\n",
|
||||
"\n",
|
||||
" user_prompt = f\"Generate {volume} English sentences of the following Type: {nature}. Just output one sentence per line. \\\n",
|
||||
"Do not comment or format yor output in any way, shape, or form.\"\n",
|
||||
"\n",
|
||||
" elif language == \"German\":\n",
|
||||
"\n",
|
||||
" for shot in list(shots.keys()):\n",
|
||||
" examples += f\"\\nAnweisung: '{shot}'\\nAntwort: '{shots[shot]}'\\n\"\n",
|
||||
"\n",
|
||||
" system_message = f\"Du bist ein weltklasse Datensatz-Sammler für Sprachdaten. Du erhältst einen 'Typ' von Sätzen, die du erstellen sollst. \\\n",
|
||||
"Im Rahmen dieses Typs, generiere {volume} untereinander verschiedene Sätze mit unterschiedlichen Satzlängen und -strukturen. Mache die Beispielsätze \\\n",
|
||||
"plausibel, aber fülle sie kreativ mit willkürlichen Informationen, Namen, und Daten aller Art. Hier sind ein paar Beispiel, wie du vorgehen sollst:\\n{examples}\\n\\\n",
|
||||
"Gib einfach einen Satz pro Zeile aus. Kommentiere oder formatiere deine Antwort in keinster Weise.\"\n",
|
||||
"\n",
|
||||
" user_prompt = f\"Generiere {volume} deutsche Sätze des folgenden Typs: {nature}. Gib einfach einen Satz pro Zeile aus. \\\n",
|
||||
"Kommentiere oder formatiere deine Antwort in keiner Weise.\"\n",
|
||||
"\n",
|
||||
" elif language == \"French\":\n",
|
||||
"\n",
|
||||
" for shot in list(shots.keys()):\n",
|
||||
" examples += f\"\\nConsigne: '{shot}'\\nRéponse: '{shots[shot]}'\\n\"\n",
|
||||
"\n",
|
||||
" system_message = f\"Tu es un outil linguistique de pointe, à savoir, un genérateur de données linguistiques. Tu seras assigné un 'Type' de phrases à créer. \\\n",
|
||||
"Dans le cadre de ce type-là, crée {volume} phrases diverses, avec des structures et longueurs qui varient. Génère des phrases qui soient plausibles, \\\n",
|
||||
"mais sois créatif, et sers-toi de données, noms, et informations aléatoires pour rendre les phrases plus naturelles. Voici quelques examples comment faire:\\n{examples}\\n\\\n",
|
||||
"Sors une seule phrase par ligne. Ne formatte ni commente ta réponse en aucune manière que ce soit.\"\n",
|
||||
"\n",
|
||||
" user_prompt = f\"S'il te plaît, crée {volume} phrases en français du Type suivant: {nature}. Sors une seule phrase par ligne. \\\n",
|
||||
"Ne formatte ni commente ta réponse en aucune manière que ce soit.\"\n",
|
||||
"\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" if model == \"Llama\":\n",
|
||||
"\n",
|
||||
" quant_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" tokenizer = AutoTokenizer.from_pretrained(LLAMA)\n",
|
||||
" tokenizer.pad_token = tokenizer.eos_token\n",
|
||||
" inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\").to(\"cuda\")\n",
|
||||
" streamer = TextStreamer(tokenizer)\n",
|
||||
" model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map=\"auto\", quantization_config=quant_config)\n",
|
||||
" outputs = model.generate(inputs, max_new_tokens=10000)\n",
|
||||
"\n",
|
||||
" response = tokenizer.decode(outputs[0])\n",
|
||||
" sentences = list(re.finditer(\"(?:<\\|end_header_id\\|>)([^<]+)(?:<\\|eot_id\\|>)\", str(response), re.DOTALL))[-1].group(1)\n",
|
||||
"\n",
|
||||
" elif model == \"OpenAI\":\n",
|
||||
" response = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages)\n",
|
||||
" sentences = response.choices[0].message.content\n",
|
||||
"\n",
|
||||
" return sentences"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "VRKdu0fEt8mg"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"global data\n",
|
||||
"data = \"\"\n",
|
||||
"\n",
|
||||
"with gr.Blocks(\n",
|
||||
" css=\"\"\"\n",
|
||||
" .red-button {\n",
|
||||
" background-color: darkred !important;\n",
|
||||
" border-color: red !important;\n",
|
||||
" }\n",
|
||||
" .blue-button {\n",
|
||||
" background-color: darkblue !important;\n",
|
||||
" border-color: blue !important;\n",
|
||||
" }\n",
|
||||
" .green-button {\n",
|
||||
" background-color: green !important;\n",
|
||||
" border-color: green !important;\n",
|
||||
" }\n",
|
||||
" \"\"\"\n",
|
||||
") as view:\n",
|
||||
" with gr.Row():\n",
|
||||
" title = gr.HTML(\"<h1><big>D</big>ataset Generator <small>PLUS</small></h1><h2>for English, German, and French</h2>\")\n",
|
||||
" subtitle = gr.HTML(\"<h3>Instructions:</h3><ol><li>Pick the language</li>\\\n",
|
||||
"<li>Select a model</li><li>Indicate how many sentences you need</li>\\\n",
|
||||
"<li>Describe the type of sentence you're looking for</li><li>Give up to three examples of the desired output sentence, and describe each of them briefly</li>\\\n",
|
||||
"<li>Hit <q>Create Dataset</q></li>\\\n",
|
||||
"<li>Save the output (.txt) to your Google Drive</li>\")\n",
|
||||
" with gr.Row():\n",
|
||||
" language_choice = gr.Dropdown(choices=[\"English\", \"German\", \"French\"], label=\"Select language\", value=\"English\", interactive=True)\n",
|
||||
" model_choice = gr.Dropdown(choices=[\"Llama\", \"OpenAI\"], label=\"Select model\", value=\"Llama\", interactive=True)\n",
|
||||
" volume = gr.Textbox(label=\"Required number of sentences\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" typeInput = gr.Textbox(label=\"Short description of the kind of sentence you need\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" sentence_1 = gr.Textbox(label=\"Example sentence 1\", interactive=True)\n",
|
||||
" instruction_1 = gr.Textbox(label=\"Description\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" sentence_2 = gr.Textbox(label=\"Example sentence 2\", interactive=True)\n",
|
||||
" instruction_2 = gr.Textbox(label=\"Description\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" sentence_3 = gr.Textbox(label=\"Example sentence 3\", interactive=True)\n",
|
||||
" instruction_3 = gr.Textbox(label=\"Description\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" liveSentences = gr.Markdown(\n",
|
||||
" value='<div style=\"color: #999; padding: 10px;\">Your sentences will be displayed here …</div>',\n",
|
||||
" label=\"Generated sentences:\",\n",
|
||||
" min_height=60,\n",
|
||||
" max_height=200\n",
|
||||
" )\n",
|
||||
" with gr.Row():\n",
|
||||
" generate = gr.Button(value=\"Generate sentences\", elem_classes=\"blue-button\")\n",
|
||||
" with gr.Row():\n",
|
||||
" clear = gr.Button(value=\"Clear everything\", elem_classes=\"red-button\")\n",
|
||||
" with gr.Row():\n",
|
||||
" outputPath = gr.Textbox(label=\"Specify the desired name and location on your Google Drive for the sentences (plain text) to be saved\", interactive=True)\n",
|
||||
" with gr.Row():\n",
|
||||
" save = gr.Button(value=\"Save generated data\", elem_classes=\"blue-button\")\n",
|
||||
"\n",
|
||||
" def generateSentences(typeInput, s1, i1, s2, i2, s3, i3, volume, language, model):\n",
|
||||
" global data\n",
|
||||
" nature = \"\"\n",
|
||||
" shots = {}\n",
|
||||
" amount = int(volume) if re.search(\"^[0-9]+$\", volume) is not None else 10\n",
|
||||
"\n",
|
||||
" if typeInput != None:\n",
|
||||
" nature = typeInput\n",
|
||||
" else:\n",
|
||||
" nature = \"Random sentences of mixed nature\"\n",
|
||||
"\n",
|
||||
" if s1 != None:\n",
|
||||
" if i1 != None:\n",
|
||||
" shots[i1] = s1\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = s1\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = \"Paul, waking up out of his half-drunken haze, clearly couldn't tell left from right and ran right into the door.\"\n",
|
||||
"\n",
|
||||
" if s2 != None:\n",
|
||||
" if i2 != None:\n",
|
||||
" shots[i2] = s2\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = s2\n",
|
||||
"\n",
|
||||
" if s3 != None:\n",
|
||||
" if i3 != None:\n",
|
||||
" shots[i3] = s3\n",
|
||||
" else:\n",
|
||||
" shots[\"A medium-long random sentence about anything\"] = s3\n",
|
||||
"\n",
|
||||
" sentences = dataset_generator(model, nature, shots, amount, language)\n",
|
||||
" data = sentences\n",
|
||||
"\n",
|
||||
" return sentences\n",
|
||||
"\n",
|
||||
" def saveData(path):\n",
|
||||
" global data\n",
|
||||
" drive.mount(\"/content/drive\")\n",
|
||||
"\n",
|
||||
" dir_path = os.path.dirname(\"/content/drive/MyDrive/\" + path)\n",
|
||||
"\n",
|
||||
" if not os.path.exists(dir_path):\n",
|
||||
" os.makedirs(dir_path)\n",
|
||||
"\n",
|
||||
" with open(\"/content/drive/MyDrive/\" + path, \"w\", encoding=\"utf-8\") as f:\n",
|
||||
" f.write(data)\n",
|
||||
"\n",
|
||||
" generate.click(generateSentences, inputs=[typeInput, sentence_1, instruction_1, sentence_2, instruction_2, sentence_3, instruction_3, volume, language_choice, model_choice], outputs=liveSentences)\n",
|
||||
" clear.click(\n",
|
||||
" lambda: [\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value='<div style=\"color: #999; padding: 10px;\">Your sentences will be displayed here …</div>'),\n",
|
||||
" gr.update(value=\"\"),\n",
|
||||
" gr.update(value=\"Save generated data\", elem_classes=\"blue-button\")],\n",
|
||||
" None,\n",
|
||||
" [volume, typeInput, sentence_1, instruction_1, sentence_2, instruction_2,\n",
|
||||
" sentence_3, instruction_3, liveSentences, outputPath, save],\n",
|
||||
" queue=False\n",
|
||||
" )\n",
|
||||
" save.click(saveData, inputs=outputPath, outputs=None).then(lambda: gr.update(value=\"Your data has been saved\", elem_classes=\"green-button\"), [], [save])\n",
|
||||
"\n",
|
||||
"view.launch(share=True) #, debug=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"authorship_tag": "ABX9TyPxJzufoQPtui+nhl1J1xiR",
|
||||
"gpuType": "T4",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -387,7 +387,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm_engineering-yg2xCEUG",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -401,9 +401,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user