{ "cells": [ { "cell_type": "markdown", "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "83f28feb", "metadata": {}, "source": [ "###Synthetic Dataset Generator with LLMs (Anthropic API)Everything runs with your Anthropic API key — no model downloads" ] }, { "cell_type": "code", "execution_count": null, "id": "7510bec6", "metadata": {}, "outputs": [], "source": [ "# Imports and API setup\n", "\n", "import os\n", "import json\n", "import requests\n", "import gradio as gr\n", "from dotenv import load_dotenv" ] }, { "cell_type": "code", "execution_count": null, "id": "5abc2ed3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "API key loaded successfully!\n" ] } ], "source": [ "# Load variables from .env file\n", "load_dotenv()\n", "\n", "# Get your Anthropic API key\n", "API_KEY = os.getenv(\"API_KEY\")\n", "\n", "if not API_KEY:\n", " raise ValueError(\" API_KEY not found. Check your .env file\")\n", "\n", "print(\"API key loaded successfully!\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "e49ec675", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'data': [{'type': 'model', 'id': 'claude-haiku-4-5-20251001', 'display_name': 'Claude Haiku 4.5', 'created_at': '2025-10-15T00:00:00Z'}, {'type': 'model', 'id': 'claude-sonnet-4-5-20250929', 'display_name': 'Claude Sonnet 4.5', 'created_at': '2025-09-29T00:00:00Z'}, {'type': 'model', 'id': 'claude-opus-4-1-20250805', 'display_name': 'Claude Opus 4.1', 'created_at': '2025-08-05T00:00:00Z'}, {'type': 'model', 'id': 'claude-opus-4-20250514', 'display_name': 'Claude Opus 4', 'created_at': '2025-05-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-sonnet-4-20250514', 'display_name': 'Claude Sonnet 4', 'created_at': '2025-05-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-7-sonnet-20250219', 'display_name': 'Claude Sonnet 3.7', 'created_at': '2025-02-24T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-5-haiku-20241022', 'display_name': 'Claude Haiku 3.5', 'created_at': '2024-10-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-haiku-20240307', 'display_name': 'Claude Haiku 3', 'created_at': '2024-03-07T00:00:00Z'}], 'has_more': False, 'first_id': 'claude-haiku-4-5-20251001', 'last_id': 'claude-3-haiku-20240307'}\n" ] } ], "source": [ "# Anthropic endpoint\n", "API_URL = \"https://api.anthropic.com/v1/messages\"\n", "\n", "#see the models i can have access to\n", "r = requests.get(\n", " \"https://api.anthropic.com/v1/models\",\n", " headers={\n", " \"x-api-key\": API_KEY,\n", " \"anthropic-version\": \"2023-06-01\"\n", " },\n", ")\n", "print(r.json() if r.ok else r.text)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1b886ff2", "metadata": {}, "outputs": [], "source": [ "# Models to compare (variety)\n", "MODELS = {\n", " \"Claude 3 Haiku\": \"claude-3-haiku-20240307\", # fast & cheap\n", " \"Claude Haiku 4.5\": \"claude-haiku-4-5-20251001\",\n", " \"Claude Sonnet 4.5\": \"claude-sonnet-4-5-20250929\", # fast & cheap\n", " \"Claude Opus 4.1\": \"claude-opus-4-1-20250805\",\n", " \"Claude Opus 4\": \"claude-opus-4-20250514\", # fast & cheap\n", " \"Claude Sonnet 4\": \"claude-sonnet-4-20250514\", # balanced\n", " \"Claude Sonnet 3.7\": \"claude-3-7-sonnet-20250219\" # powerful (slowest)\n", "}\n" ] }, { "cell_type": "markdown", "id": "464ddf4c", "metadata": {}, "source": [ "Synthetic Dataset Generation Function" ] }, { "cell_type": "code", "execution_count": null, "id": "7d64bca8", "metadata": {}, "outputs": [], "source": [ "# Dataset generator\n", "\n", "def generate_dataset(topic, n_records, model_choice):\n", " prompt = f\"\"\"\n", "You are a data generator creating synthetic datasets.\n", "Generate {n_records} records about {topic}.\n", "Output only a valid JSON array (no explanations or markdown).\n", "Each record should have 4–6 fields and look realistic but fake.\n", "\"\"\"\n", "\n", " headers = {\n", " \"x-api-key\": API_KEY,\n", " \"content-type\": \"application/json\",\n", " \"anthropic-version\": \"2023-06-01\",\n", " }\n", "\n", " payload = {\n", " \"model\": model_choice,\n", " \"max_tokens\": 500,\n", " \"temperature\": 0.7,\n", " \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n", " }\n", "\n", " response = requests.post(API_URL, headers=headers, data=json.dumps(payload))\n", " result = response.json()\n", "\n", " if \"content\" in result and len(result[\"content\"]) > 0:\n", " return result[\"content\"][0][\"text\"]\n", " else:\n", " return f\"Error: {result}\"\n" ] }, { "cell_type": "markdown", "id": "bac01702", "metadata": {}, "source": [ "Gradio UI" ] }, { "cell_type": "code", "execution_count": null, "id": "857d078d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7864\n", "* To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "