Week3 GenAi Andela bootcamp project
This commit is contained in:
211
week3/community-contributions/kachaje-genai-bootcamp/week3.ipynb
Normal file
211
week3/community-contributions/kachaje-genai-bootcamp/week3.ipynb
Normal file
@@ -0,0 +1,211 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e568e8cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Synthetic Data Generator\n",
|
||||
"\n",
|
||||
"Tool for generating sample synthetic data using a local Llama model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "4191b928",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports \n",
|
||||
"\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import json\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"id": "93d63879",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "0b9821dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# model\n",
|
||||
"\n",
|
||||
"MODEL = \"llama3.2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "5fe77aa5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_synthetic_data(user_prompt = (\n",
|
||||
" \"Generate 5 realistic customer reviews for a product. \"\n",
|
||||
" \"The review should be 1-2 sentences long and contain a mix of positive and negative comments. \"\n",
|
||||
" \"The review should be formatted as a JSON object with the following fields: \"\n",
|
||||
" \"review: a string containing the review text\"\n",
|
||||
" )):\n",
|
||||
" \n",
|
||||
" system_message = (\n",
|
||||
" \"You are a helpful assistant that generates synthetic data.\"\n",
|
||||
" )\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=MODEL,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" response_format={\"type\": \"json_object\"}\n",
|
||||
" )\n",
|
||||
" result = json.loads(response.choices[0].message.content)\n",
|
||||
" return result\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "047309d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = generate_synthetic_data()\n",
|
||||
"\n",
|
||||
"formatted_json_result = json.dumps(result, indent=4)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "07124b11",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"review1\": \"I'm really impressed with how easy the setup was for this product! It only took me about 10 minutes to get everything up and running.\",\n",
|
||||
" \"review2\": \"The quality of the material is top-notch, but I've noticed a few scratches after using it for a week.\",\n",
|
||||
" \"review3\": \"I was skeptical at first, but this product has truly exceeded my expectations - it's even more functional than I thought it would be!\",\n",
|
||||
" \"review4\": \"Unfortunately, the battery life could be longer. It's fine for occasional use, but it doesn't hold up as well during extended periods.\",\n",
|
||||
" \"review5\": \"I love how compact and lightweight this product is - perfect for my morning commute! The only reason I'm giving 4 stars instead of 5 is because the charging port can get a bit finicky.\"\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(formatted_json_result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "a937ac81",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"user_prompt = \"\"\"\n",
|
||||
"Generate a dataset of 5 employees with name, department, salary, and years of experience.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "2cef4545",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = generate_synthetic_data(user_prompt)\n",
|
||||
"\n",
|
||||
"formatted_json_result = json.dumps(result, indent=4)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "f7d64ed3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"employees\": [\n",
|
||||
" {\n",
|
||||
" \"name\": \"John Doe\",\n",
|
||||
" \"department\": \"Marketing\",\n",
|
||||
" \"salary\": 60000,\n",
|
||||
" \"years_of_experience\": 8\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Jane Smith\",\n",
|
||||
" \"department\": \"IT\",\n",
|
||||
" \"salary\": 70000,\n",
|
||||
" \"years_of_experience\": 5\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Bob Johnson\",\n",
|
||||
" \"department\": \"Sales\",\n",
|
||||
" \"salary\": 55000,\n",
|
||||
" \"years_of_experience\": 10\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Emily Chen\",\n",
|
||||
" \"department\": \"Marketing\",\n",
|
||||
" \"salary\": 65000,\n",
|
||||
" \"years_of_experience\": 6\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"Michael Davis\",\n",
|
||||
" \"department\": \"IT\",\n",
|
||||
" \"salary\": 75000,\n",
|
||||
" \"years_of_experience\": 7\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(formatted_json_result)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user