Merge pull request #497 from AniketKakde04/community-contributions-branch
Added my contributions to community-contributions
This commit is contained in:
143
week2/community-contributions/Agent_translate_gemini.ipynb
Normal file
143
week2/community-contributions/Agent_translate_gemini.ipynb
Normal file
@@ -0,0 +1,143 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Additional End of week Exercise - week 2\n",
|
||||
"\n",
|
||||
"Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n",
|
||||
"\n",
|
||||
"This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
|
||||
"\n",
|
||||
"If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n",
|
||||
"\n",
|
||||
"I will publish a full solution here soon - unless someone beats me to it...\n",
|
||||
"\n",
|
||||
"There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a07e7793-b8f5-44f4-aded-5562f633271a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Agent that can listen for audio and convert it to text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "da58ed0f-f781-4c51-8e5d-fdb05db98c8c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import gradio as gr\n",
|
||||
"import google.generativeai as genai\n",
|
||||
"from dotenv import load_dotenv\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "078cf34a-881e-44f4-9947-c45d7fe992a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"google_api_key = os.getenv('GOOGLE_API_KEY')\n",
|
||||
"if google_api_key:\n",
|
||||
" print(f\"Google API Key exists and begins {google_api_key[:8]}\")\n",
|
||||
"else:\n",
|
||||
" print(\"Google API Key not set\")\n",
|
||||
"\n",
|
||||
"genai.configure(api_key=google_api_key)\n",
|
||||
"model = genai.GenerativeModel(\"gemini-2.0-flash\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f77228ea-d0e1-4434-9191-555a6d680625",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def transcribe_translate_with_gemini(audio_file_path):\n",
|
||||
" if not audio_file_path:\n",
|
||||
" return \"⚠️ No audio file received.\"\n",
|
||||
"\n",
|
||||
" prompt = (\n",
|
||||
" \"You're an AI that listens to a voice message in any language and returns the English transcription. \"\n",
|
||||
" \"Please transcribe and translate the following audio to English. If already in English, just transcribe it.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" uploaded_file = genai.upload_file(audio_file_path)\n",
|
||||
"\n",
|
||||
" # 🔁 Send prompt + uploaded audio reference to Gemini\n",
|
||||
" response = model.generate_content(\n",
|
||||
" contents=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"parts\": [\n",
|
||||
" {\"text\": prompt},\n",
|
||||
" uploaded_file \n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" return response.text.strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eb6c6d1e-1be3-404d-83f3-fc0855dc9f67",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gr.Interface(\n",
|
||||
" fn=transcribe_translate_with_gemini,\n",
|
||||
" inputs=gr.Audio(label=\"Record voice\", type=\"filepath\"),\n",
|
||||
" outputs=\"text\",\n",
|
||||
" title=\"🎙️ Voice-to-English Translator (Gemini Only)\",\n",
|
||||
" description=\"Speak in any language and get the English transcription using Gemini multimodal API.\"\n",
|
||||
").launch()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8b105082-e388-44bc-9617-1a81f38e2f3f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user