diff --git a/community-contributions/Cosmus_Week3_exercise.ipynb b/community-contributions/Cosmus_Week3_exercise.ipynb new file mode 100644 index 0000000..04dd692 --- /dev/null +++ b/community-contributions/Cosmus_Week3_exercise.ipynb @@ -0,0 +1,282 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "83f28feb", + "metadata": {}, + "source": [ + "###Synthetic Dataset Generator with LLMs (Anthropic API)Everything runs with your Anthropic API key — no model downloads" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7510bec6", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports and API setup\n", + "\n", + "import os\n", + "import json\n", + "import requests\n", + "import gradio as gr\n", + "from dotenv import load_dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5abc2ed3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "API key loaded successfully!\n" + ] + } + ], + "source": [ + "# Load variables from .env file\n", + "load_dotenv()\n", + "\n", + "# Get your Anthropic API key\n", + "API_KEY = os.getenv(\"API_KEY\")\n", + "\n", + "if not API_KEY:\n", + " raise ValueError(\" API_KEY not found. Check your .env file\")\n", + "\n", + "print(\"API key loaded successfully!\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e49ec675", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': [{'type': 'model', 'id': 'claude-haiku-4-5-20251001', 'display_name': 'Claude Haiku 4.5', 'created_at': '2025-10-15T00:00:00Z'}, {'type': 'model', 'id': 'claude-sonnet-4-5-20250929', 'display_name': 'Claude Sonnet 4.5', 'created_at': '2025-09-29T00:00:00Z'}, {'type': 'model', 'id': 'claude-opus-4-1-20250805', 'display_name': 'Claude Opus 4.1', 'created_at': '2025-08-05T00:00:00Z'}, {'type': 'model', 'id': 'claude-opus-4-20250514', 'display_name': 'Claude Opus 4', 'created_at': '2025-05-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-sonnet-4-20250514', 'display_name': 'Claude Sonnet 4', 'created_at': '2025-05-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-7-sonnet-20250219', 'display_name': 'Claude Sonnet 3.7', 'created_at': '2025-02-24T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-5-haiku-20241022', 'display_name': 'Claude Haiku 3.5', 'created_at': '2024-10-22T00:00:00Z'}, {'type': 'model', 'id': 'claude-3-haiku-20240307', 'display_name': 'Claude Haiku 3', 'created_at': '2024-03-07T00:00:00Z'}], 'has_more': False, 'first_id': 'claude-haiku-4-5-20251001', 'last_id': 'claude-3-haiku-20240307'}\n" + ] + } + ], + "source": [ + "# Anthropic endpoint\n", + "API_URL = \"https://api.anthropic.com/v1/messages\"\n", + "\n", + "#see the models i can have access to\n", + "r = requests.get(\n", + " \"https://api.anthropic.com/v1/models\",\n", + " headers={\n", + " \"x-api-key\": API_KEY,\n", + " \"anthropic-version\": \"2023-06-01\"\n", + " },\n", + ")\n", + "print(r.json() if r.ok else r.text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b886ff2", + "metadata": {}, + "outputs": [], + "source": [ + "# Models to compare (variety)\n", + "MODELS = {\n", + " \"Claude 3 Haiku\": \"claude-3-haiku-20240307\", # fast & cheap\n", + " \"Claude Haiku 4.5\": \"claude-haiku-4-5-20251001\",\n", + " \"Claude Sonnet 4.5\": \"claude-sonnet-4-5-20250929\", # fast & cheap\n", + " \"Claude Opus 4.1\": \"claude-opus-4-1-20250805\",\n", + " \"Claude Opus 4\": \"claude-opus-4-20250514\", # fast & cheap\n", + " \"Claude Sonnet 4\": \"claude-sonnet-4-20250514\", # balanced\n", + " \"Claude Sonnet 3.7\": \"claude-3-7-sonnet-20250219\" # powerful (slowest)\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "id": "464ddf4c", + "metadata": {}, + "source": [ + "Synthetic Dataset Generation Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d64bca8", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset generator\n", + "\n", + "def generate_dataset(topic, n_records, model_choice):\n", + " prompt = f\"\"\"\n", + "You are a data generator creating synthetic datasets.\n", + "Generate {n_records} records about {topic}.\n", + "Output only a valid JSON array (no explanations or markdown).\n", + "Each record should have 4–6 fields and look realistic but fake.\n", + "\"\"\"\n", + "\n", + " headers = {\n", + " \"x-api-key\": API_KEY,\n", + " \"content-type\": \"application/json\",\n", + " \"anthropic-version\": \"2023-06-01\",\n", + " }\n", + "\n", + " payload = {\n", + " \"model\": model_choice,\n", + " \"max_tokens\": 500,\n", + " \"temperature\": 0.7,\n", + " \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n", + " }\n", + "\n", + " response = requests.post(API_URL, headers=headers, data=json.dumps(payload))\n", + " result = response.json()\n", + "\n", + " if \"content\" in result and len(result[\"content\"]) > 0:\n", + " return result[\"content\"][0][\"text\"]\n", + " else:\n", + " return f\"Error: {result}\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "bac01702", + "metadata": {}, + "source": [ + "Gradio UI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "857d078d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Running on local URL: http://127.0.0.1:7864\n", + "* To create a public link, set `share=True` in `launch()`.\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "