diff --git a/week3/community-contributions/kachaje-genai-bootcamp/week3.ipynb b/week3/community-contributions/kachaje-genai-bootcamp/week3.ipynb new file mode 100644 index 0000000..f585299 --- /dev/null +++ b/week3/community-contributions/kachaje-genai-bootcamp/week3.ipynb @@ -0,0 +1,211 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e568e8cc", + "metadata": {}, + "source": [ + "# Synthetic Data Generator\n", + "\n", + "Tool for generating sample synthetic data using a local Llama model" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "4191b928", + "metadata": {}, + "outputs": [], + "source": [ + "# imports \n", + "\n", + "from openai import OpenAI\n", + "import json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "93d63879", + "metadata": {}, + "outputs": [], + "source": [ + "openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "0b9821dc", + "metadata": {}, + "outputs": [], + "source": [ + "# model\n", + "\n", + "MODEL = \"llama3.2\"" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "5fe77aa5", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_synthetic_data(user_prompt = (\n", + " \"Generate 5 realistic customer reviews for a product. \"\n", + " \"The review should be 1-2 sentences long and contain a mix of positive and negative comments. \"\n", + " \"The review should be formatted as a JSON object with the following fields: \"\n", + " \"review: a string containing the review text\"\n", + " )):\n", + " \n", + " system_message = (\n", + " \"You are a helpful assistant that generates synthetic data.\"\n", + " )\n", + " response = openai.chat.completions.create(\n", + " model=MODEL,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ],\n", + " response_format={\"type\": \"json_object\"}\n", + " )\n", + " result = json.loads(response.choices[0].message.content)\n", + " return result\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "047309d4", + "metadata": {}, + "outputs": [], + "source": [ + "result = generate_synthetic_data()\n", + "\n", + "formatted_json_result = json.dumps(result, indent=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "07124b11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"review1\": \"I'm really impressed with how easy the setup was for this product! It only took me about 10 minutes to get everything up and running.\",\n", + " \"review2\": \"The quality of the material is top-notch, but I've noticed a few scratches after using it for a week.\",\n", + " \"review3\": \"I was skeptical at first, but this product has truly exceeded my expectations - it's even more functional than I thought it would be!\",\n", + " \"review4\": \"Unfortunately, the battery life could be longer. It's fine for occasional use, but it doesn't hold up as well during extended periods.\",\n", + " \"review5\": \"I love how compact and lightweight this product is - perfect for my morning commute! The only reason I'm giving 4 stars instead of 5 is because the charging port can get a bit finicky.\"\n", + "}\n" + ] + } + ], + "source": [ + "print(formatted_json_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "a937ac81", + "metadata": {}, + "outputs": [], + "source": [ + "user_prompt = \"\"\"\n", + "Generate a dataset of 5 employees with name, department, salary, and years of experience.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "2cef4545", + "metadata": {}, + "outputs": [], + "source": [ + "result = generate_synthetic_data(user_prompt)\n", + "\n", + "formatted_json_result = json.dumps(result, indent=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "f7d64ed3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"employees\": [\n", + " {\n", + " \"name\": \"John Doe\",\n", + " \"department\": \"Marketing\",\n", + " \"salary\": 60000,\n", + " \"years_of_experience\": 8\n", + " },\n", + " {\n", + " \"name\": \"Jane Smith\",\n", + " \"department\": \"IT\",\n", + " \"salary\": 70000,\n", + " \"years_of_experience\": 5\n", + " },\n", + " {\n", + " \"name\": \"Bob Johnson\",\n", + " \"department\": \"Sales\",\n", + " \"salary\": 55000,\n", + " \"years_of_experience\": 10\n", + " },\n", + " {\n", + " \"name\": \"Emily Chen\",\n", + " \"department\": \"Marketing\",\n", + " \"salary\": 65000,\n", + " \"years_of_experience\": 6\n", + " },\n", + " {\n", + " \"name\": \"Michael Davis\",\n", + " \"department\": \"IT\",\n", + " \"salary\": 75000,\n", + " \"years_of_experience\": 7\n", + " }\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(formatted_json_result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}