Merge branch 'main' of https://github.com/ed-donner/llm_engineering into solisoma-week6
This commit is contained in:
16
community-contributions/Reputation_Radar/Dockerfile
Normal file
16
community-contributions/Reputation_Radar/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
ENV STREAMLIT_SERVER_HEADLESS=true \
|
||||
STREAMLIT_SERVER_ADDRESS=0.0.0.0 \
|
||||
STREAMLIT_SERVER_PORT=8501
|
||||
|
||||
EXPOSE 8501
|
||||
|
||||
CMD ["streamlit", "run", "app.py"]
|
||||
13
community-contributions/Reputation_Radar/Makefile
Normal file
13
community-contributions/Reputation_Radar/Makefile
Normal file
@@ -0,0 +1,13 @@
|
||||
PYTHON ?= python
|
||||
|
||||
.PHONY: install run test
|
||||
|
||||
install:
|
||||
$(PYTHON) -m pip install --upgrade pip
|
||||
$(PYTHON) -m pip install -r requirements.txt
|
||||
|
||||
run:
|
||||
streamlit run app.py
|
||||
|
||||
test:
|
||||
pytest
|
||||
124
community-contributions/Reputation_Radar/README.md
Normal file
124
community-contributions/Reputation_Radar/README.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# 📡 ReputationRadar
|
||||
> Real-time brand intelligence with human-readable insights.
|
||||
|
||||
ReputationRadar is a Streamlit dashboard that unifies Reddit, Twitter/X, and Trustpilot chatter, classifies sentiment with OpenAI (or VADER fallback), and delivers exportable executive summaries. It ships with modular services, caching, retry-aware scrapers, demo data, and pytest coverage—ready for production hardening or internal deployment.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
- [Demo](#demo)
|
||||
- [Feature Highlights](#feature-highlights)
|
||||
- [Architecture Overview](#architecture-overview)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Configuration & Credentials](#configuration--credentials)
|
||||
- [Running Tests](#running-tests)
|
||||
- [Working Without API Keys](#working-without-api-keys)
|
||||
- [Exports & Deliverables](#exports--deliverables)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Legal & Compliance](#legal--compliance)
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Demo
|
||||
|
||||
The video demo of the app can be found at:-
|
||||
https://drive.google.com/file/d/1XZ09NOht1H5LCJEbOrAldny2L5SV1DeT/view?usp=sharing
|
||||
|
||||
|
||||
## Feature Highlights
|
||||
- **Adaptive Ingestion** – Toggle Reddit, Twitter/X, and Trustpilot independently; backoff, caching, and polite scraping keep providers happy.
|
||||
- **Smart Sentiment** – Batch OpenAI classification with rationale-aware prompts and auto-fallback to VADER when credentials are missing.
|
||||
- **Actionable Summaries** – Executive brief card (highlights, risks, tone, actions) plus refreshed PDF layout that respects margins and typography.
|
||||
- **Interactive Insights** – Plotly visuals, per-source filtering, and a lean “Representative Mentions” link list to avoid content overload.
|
||||
- **Export Suite** – CSV, Excel (auto-sized columns), and polished PDF snapshots for stakeholder handoffs.
|
||||
- **Robust Foundation** – Structured logging, reusable UI components, pytest suites, Dockerfile, and Makefile for frictionless iteration.
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
```
|
||||
community-contributions/Reputation_Radar/
|
||||
├── app.py # Streamlit orchestrator & layout
|
||||
├── components/ # Sidebar, dashboard, summaries, loaders
|
||||
├── services/ # Reddit/Twitter clients, Trustpilot scraper, LLM wrapper, utilities
|
||||
├── samples/ # Demo JSON payloads (auto-loaded when credentials missing)
|
||||
├── tests/ # Pytest coverage for utilities and LLM fallback
|
||||
├── assets/ # Placeholder icons/logo
|
||||
├── logs/ # Streaming log output
|
||||
├── requirements.txt # Runtime dependencies (includes PDF + Excel writers)
|
||||
├── Dockerfile # Containerised deployment recipe
|
||||
└── Makefile # Helper targets for install/run/test
|
||||
```
|
||||
Each service returns a normalised payload to keep the downstream sentiment pipeline deterministic. Deduplication is handled centrally via fuzzy matching, and timestamps are coerced to UTC before analysis.
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
1. **Clone & enter the project directory (`community-contributions/Reputation_Radar`).**
|
||||
2. **Install dependencies and launch Streamlit:**
|
||||
```bash
|
||||
pip install -r requirements.txt && streamlit run app.py
|
||||
```
|
||||
(Use a virtual environment if preferred.)
|
||||
3. **Populate the sidebar:** add your brand name, optional filters, toggled sources, and API credentials (stored only in session state).
|
||||
4. **Click “Run Analysis 🚀”** – follow the status indicators as sources load, sentiment processes, and summaries render.
|
||||
|
||||
### Optional Docker Run
|
||||
```bash
|
||||
docker build -t reputation-radar .
|
||||
docker run --rm -p 8501:8501 -e OPENAI_API_KEY=your_key reputation-radar
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuration & Credentials
|
||||
The app reads from `.env`, Streamlit secrets, or direct sidebar input. Expected variables:
|
||||
|
||||
| Variable | Purpose |
|
||||
| --- | --- |
|
||||
| `OPENAI_API_KEY` | Enables OpenAI sentiment + executive summary (falls back to VADER if absent). |
|
||||
| `REDDIT_CLIENT_ID` | PRAW client ID for Reddit API access. |
|
||||
| `REDDIT_CLIENT_SECRET` | PRAW client secret. |
|
||||
| `REDDIT_USER_AGENT` | Descriptive user agent (e.g., `ReputationRadar/1.0 by you`). |
|
||||
| `TWITTER_BEARER_TOKEN` | Twitter/X v2 recent search bearer token. |
|
||||
|
||||
Credential validation mirrors the guidance from `week1/day1.ipynb`—mistyped OpenAI keys surface helpful warnings before analysis begins.
|
||||
|
||||
---
|
||||
|
||||
## Running Tests
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
Tests cover sentiment fallback behaviour and core sanitisation/deduplication helpers. Extend them as you add new data transforms or UI logic.
|
||||
|
||||
---
|
||||
|
||||
## Working Without API Keys
|
||||
- Reddit/Twitter/Trustpilot can be toggled independently; missing credentials raise gentle warnings rather than hard failures.
|
||||
- Curated fixtures in `samples/` automatically load for any disabled source, keeping charts, exports, and PDF output functional in demo mode.
|
||||
- The LLM layer drops to VADER sentiment scoring and skips the executive summary when `OPENAI_API_KEY` is absent.
|
||||
|
||||
---
|
||||
|
||||
## Exports & Deliverables
|
||||
- **CSV** – Clean, UTF-8 dataset for quick spreadsheet edits.
|
||||
- **Excel** – Auto-sized columns, formatted timestamps, instantaneous import into stakeholder workbooks.
|
||||
- **PDF** – Professionally typeset executive summary with bullet lists, consistent margins, and wrapped excerpts (thanks to ReportLab’s Platypus engine).
|
||||
|
||||
All exports are regenerated on demand and never persisted server-side.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
- **OpenAI key missing/invalid** – Watch the sidebar notices; the app falls back gracefully but no executive summary will be produced.
|
||||
- **Twitter 401/403** – Confirm your bearer token scope and that the project has search access enabled.
|
||||
- **Rate limiting (429)** – Built-in sleeps help, but repeated requests may require manual pauses. Try narrowing filters or reducing per-source limits.
|
||||
- **Trustpilot blocks** – Respect robots.txt. If scraping is denied, switch to the official API or provide compliant CSV imports.
|
||||
- **PDF text clipping** – Resolved by the new layout; if you customise templates ensure col widths/table styles remain inside page margins.
|
||||
|
||||
---
|
||||
|
||||
## Legal & Compliance
|
||||
ReputationRadar surfaces public discourse for legitimate monitoring purposes. Always comply with each platform’s Terms of Service, local regulations, and privacy expectations. Avoid storing third-party data longer than necessary, and never commit API keys to version control—the app only keeps them in Streamlit session state.
|
||||
436
community-contributions/Reputation_Radar/app.py
Normal file
436
community-contributions/Reputation_Radar/app.py
Normal file
@@ -0,0 +1,436 @@
|
||||
"""ReputationRadar Streamlit application entrypoint."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
from dotenv import load_dotenv
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
|
||||
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
|
||||
|
||||
from components.dashboard import render_overview, render_source_explorer, render_top_comments
|
||||
from components.filters import render_sidebar
|
||||
from components.summary import render_summary
|
||||
from components.loaders import show_empty_state, source_status
|
||||
from services import llm, reddit_client, trustpilot_scraper, twitter_client, utils
|
||||
from services.llm import SentimentResult
|
||||
from services.utils import (
|
||||
NormalizedItem,
|
||||
ServiceError,
|
||||
ServiceWarning,
|
||||
initialize_logger,
|
||||
load_sample_items,
|
||||
normalize_items,
|
||||
parse_date_range,
|
||||
validate_openai_key,
|
||||
)
|
||||
|
||||
|
||||
st.set_page_config(page_title="ReputationRadar", page_icon="📡", layout="wide")
|
||||
load_dotenv(override=True)
|
||||
LOGGER = initialize_logger()
|
||||
|
||||
st.title("📡 ReputationRadar")
|
||||
st.caption("Aggregate brand chatter, classify sentiment, and surface actionable insights in minutes.")
|
||||
|
||||
|
||||
def _get_env_defaults() -> Dict[str, Optional[str]]:
|
||||
"""Read supported credentials from environment variables."""
|
||||
return {
|
||||
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
|
||||
"REDDIT_CLIENT_ID": os.getenv("REDDIT_CLIENT_ID"),
|
||||
"REDDIT_CLIENT_SECRET": os.getenv("REDDIT_CLIENT_SECRET"),
|
||||
"REDDIT_USER_AGENT": os.getenv("REDDIT_USER_AGENT", "ReputationRadar/1.0"),
|
||||
"TWITTER_BEARER_TOKEN": os.getenv("TWITTER_BEARER_TOKEN"),
|
||||
}
|
||||
|
||||
|
||||
@st.cache_data(ttl=600, show_spinner=False)
|
||||
def cached_reddit_fetch(
|
||||
brand: str,
|
||||
limit: int,
|
||||
date_range: str,
|
||||
min_upvotes: int,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
user_agent: str,
|
||||
) -> List[NormalizedItem]:
|
||||
credentials = {
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
"user_agent": user_agent,
|
||||
}
|
||||
return reddit_client.fetch_mentions(
|
||||
brand=brand,
|
||||
credentials=credentials,
|
||||
limit=limit,
|
||||
date_filter=date_range,
|
||||
min_upvotes=min_upvotes,
|
||||
)
|
||||
|
||||
|
||||
@st.cache_data(ttl=600, show_spinner=False)
|
||||
def cached_twitter_fetch(
|
||||
brand: str,
|
||||
limit: int,
|
||||
min_likes: int,
|
||||
language: str,
|
||||
bearer: str,
|
||||
) -> List[NormalizedItem]:
|
||||
return twitter_client.fetch_mentions(
|
||||
brand=brand,
|
||||
bearer_token=bearer,
|
||||
limit=limit,
|
||||
min_likes=min_likes,
|
||||
language=language,
|
||||
)
|
||||
|
||||
|
||||
@st.cache_data(ttl=600, show_spinner=False)
|
||||
def cached_trustpilot_fetch(
|
||||
brand: str,
|
||||
language: str,
|
||||
pages: int = 2,
|
||||
) -> List[NormalizedItem]:
|
||||
return trustpilot_scraper.fetch_reviews(brand=brand, language=language, pages=pages)
|
||||
|
||||
|
||||
def _to_dataframe(items: List[NormalizedItem], sentiments: List[SentimentResult]) -> pd.DataFrame:
|
||||
data = []
|
||||
for item, sentiment in zip(items, sentiments):
|
||||
data.append(
|
||||
{
|
||||
"source": item["source"],
|
||||
"id": item["id"],
|
||||
"url": item.get("url"),
|
||||
"author": item.get("author"),
|
||||
"timestamp": item["timestamp"],
|
||||
"text": item["text"],
|
||||
"label": sentiment.label,
|
||||
"confidence": sentiment.confidence,
|
||||
"meta": json.dumps(item.get("meta", {})),
|
||||
}
|
||||
)
|
||||
df = pd.DataFrame(data)
|
||||
if not df.empty:
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
||||
return df
|
||||
|
||||
|
||||
def _build_pdf(summary: Optional[Dict[str, str]], df: pd.DataFrame) -> bytes:
|
||||
buffer = io.BytesIO()
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=letter,
|
||||
rightMargin=40,
|
||||
leftMargin=40,
|
||||
topMargin=60,
|
||||
bottomMargin=40,
|
||||
title="ReputationRadar Executive Summary",
|
||||
)
|
||||
styles = getSampleStyleSheet()
|
||||
title_style = styles["Title"]
|
||||
subtitle_style = ParagraphStyle(
|
||||
"Subtitle",
|
||||
parent=styles["BodyText"],
|
||||
fontSize=10,
|
||||
leading=14,
|
||||
textColor="#555555",
|
||||
)
|
||||
body_style = ParagraphStyle(
|
||||
"Body",
|
||||
parent=styles["BodyText"],
|
||||
leading=14,
|
||||
fontSize=11,
|
||||
)
|
||||
bullet_style = ParagraphStyle(
|
||||
"Bullet",
|
||||
parent=body_style,
|
||||
leftIndent=16,
|
||||
bulletIndent=8,
|
||||
spaceBefore=2,
|
||||
spaceAfter=2,
|
||||
)
|
||||
heading_style = ParagraphStyle(
|
||||
"SectionHeading",
|
||||
parent=styles["Heading3"],
|
||||
spaceBefore=10,
|
||||
spaceAfter=6,
|
||||
)
|
||||
|
||||
story: List[Paragraph | Spacer | Table] = []
|
||||
story.append(Paragraph("ReputationRadar Executive Summary", title_style))
|
||||
story.append(Spacer(1, 6))
|
||||
story.append(
|
||||
Paragraph(
|
||||
f"Generated on: {datetime.utcnow().strftime('%Y-%m-%d %H:%M')} UTC",
|
||||
subtitle_style,
|
||||
)
|
||||
)
|
||||
story.append(Spacer(1, 18))
|
||||
|
||||
if summary and summary.get("raw"):
|
||||
story.extend(_summary_to_story(summary["raw"], body_style, bullet_style, heading_style))
|
||||
else:
|
||||
story.append(
|
||||
Paragraph(
|
||||
"Executive summary disabled (OpenAI key missing).",
|
||||
body_style,
|
||||
)
|
||||
)
|
||||
story.append(Spacer(1, 16))
|
||||
story.append(Paragraph("Sentiment Snapshot", styles["Heading2"]))
|
||||
story.append(Spacer(1, 10))
|
||||
|
||||
table_data: List[List[Paragraph]] = [
|
||||
[
|
||||
Paragraph("Date", body_style),
|
||||
Paragraph("Sentiment", body_style),
|
||||
Paragraph("Source", body_style),
|
||||
Paragraph("Excerpt", body_style),
|
||||
]
|
||||
]
|
||||
snapshot = df.sort_values("timestamp", ascending=False).head(15)
|
||||
for _, row in snapshot.iterrows():
|
||||
excerpt = _truncate_text(row["text"], 180)
|
||||
table_data.append(
|
||||
[
|
||||
Paragraph(row["timestamp"].strftime("%Y-%m-%d %H:%M"), body_style),
|
||||
Paragraph(row["label"].title(), body_style),
|
||||
Paragraph(row["source"].title(), body_style),
|
||||
Paragraph(excerpt, body_style),
|
||||
]
|
||||
)
|
||||
|
||||
table = Table(table_data, colWidths=[90, 70, 80, 250])
|
||||
table.setStyle(
|
||||
TableStyle(
|
||||
[
|
||||
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#f3f4f6")),
|
||||
("TEXTCOLOR", (0, 0), (-1, 0), colors.HexColor("#1f2937")),
|
||||
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
|
||||
("ALIGN", (0, 0), (-1, -1), "LEFT"),
|
||||
("VALIGN", (0, 0), (-1, -1), "TOP"),
|
||||
("INNERGRID", (0, 0), (-1, -1), 0.25, colors.HexColor("#d1d5db")),
|
||||
("BOX", (0, 0), (-1, -1), 0.5, colors.HexColor("#9ca3af")),
|
||||
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f9fafb")]),
|
||||
]
|
||||
)
|
||||
)
|
||||
story.append(table)
|
||||
|
||||
doc.build(story)
|
||||
buffer.seek(0)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def _summary_to_story(
|
||||
raw_summary: str,
|
||||
body_style: ParagraphStyle,
|
||||
bullet_style: ParagraphStyle,
|
||||
heading_style: ParagraphStyle,
|
||||
) -> List[Paragraph | Spacer]:
|
||||
story: List[Paragraph | Spacer] = []
|
||||
lines = [line.strip() for line in raw_summary.splitlines()]
|
||||
for line in lines:
|
||||
if not line:
|
||||
continue
|
||||
clean = re.sub(r"\*\*(.*?)\*\*", r"\1", line)
|
||||
if clean.endswith(":") and len(clean) < 40:
|
||||
story.append(Paragraph(clean.rstrip(":"), heading_style))
|
||||
continue
|
||||
if clean.lower().startswith(("highlights", "risks & concerns", "recommended actions", "overall tone")):
|
||||
story.append(Paragraph(clean, heading_style))
|
||||
continue
|
||||
if line.startswith(("-", "*")):
|
||||
bullet_text = re.sub(r"\*\*(.*?)\*\*", r"\1", line[1:].strip())
|
||||
story.append(Paragraph(bullet_text, bullet_style, bulletText="•"))
|
||||
else:
|
||||
story.append(Paragraph(clean, body_style))
|
||||
story.append(Spacer(1, 10))
|
||||
return story
|
||||
|
||||
|
||||
def _truncate_text(text: str, max_length: int) -> str:
|
||||
clean = re.sub(r"\s+", " ", text).strip()
|
||||
if len(clean) <= max_length:
|
||||
return clean
|
||||
return clean[: max_length - 1].rstrip() + "…"
|
||||
|
||||
|
||||
def _build_excel(df: pd.DataFrame) -> bytes:
|
||||
buffer = io.BytesIO()
|
||||
export_df = df.copy()
|
||||
export_df["timestamp"] = export_df["timestamp"].dt.strftime("%Y-%m-%d %H:%M")
|
||||
with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
|
||||
export_df.to_excel(writer, index=False, sheet_name="Mentions")
|
||||
worksheet = writer.sheets["Mentions"]
|
||||
for idx, column in enumerate(export_df.columns):
|
||||
series = export_df[column].astype(str)
|
||||
max_len = min(60, max(series.map(len).max(), len(column)) + 2)
|
||||
worksheet.set_column(idx, idx, max_len)
|
||||
buffer.seek(0)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
env_defaults = _get_env_defaults()
|
||||
openai_env_key = env_defaults.get("OPENAI_API_KEY") or st.session_state.get("secrets", {}).get("OPENAI_API_KEY")
|
||||
validated_env_key, notices = validate_openai_key(openai_env_key)
|
||||
config = render_sidebar(env_defaults, tuple(notices))
|
||||
|
||||
chosen_key = config["credentials"]["openai"] or validated_env_key
|
||||
openai_key, runtime_notices = validate_openai_key(chosen_key)
|
||||
for msg in runtime_notices:
|
||||
st.sidebar.info(msg)
|
||||
|
||||
run_clicked = st.button("Run Analysis 🚀", type="primary")
|
||||
|
||||
if not run_clicked:
|
||||
show_empty_state("Enter a brand name and click **Run Analysis** to get started.")
|
||||
return
|
||||
|
||||
if not config["brand"]:
|
||||
st.error("Brand name is required.")
|
||||
return
|
||||
|
||||
threshold = parse_date_range(config["date_range"])
|
||||
collected: List[NormalizedItem] = []
|
||||
|
||||
with st.container():
|
||||
if config["sources"]["reddit"]:
|
||||
with source_status("Fetching Reddit mentions") as status:
|
||||
try:
|
||||
reddit_items = cached_reddit_fetch(
|
||||
brand=config["brand"],
|
||||
limit=config["limits"]["reddit"],
|
||||
date_range=config["date_range"],
|
||||
min_upvotes=config["min_reddit_upvotes"],
|
||||
client_id=config["credentials"]["reddit"]["client_id"],
|
||||
client_secret=config["credentials"]["reddit"]["client_secret"],
|
||||
user_agent=config["credentials"]["reddit"]["user_agent"],
|
||||
)
|
||||
reddit_items = [item for item in reddit_items if item["timestamp"] >= threshold]
|
||||
status.write(f"Fetched {len(reddit_items)} Reddit items.")
|
||||
collected.extend(reddit_items)
|
||||
except ServiceWarning as warning:
|
||||
st.warning(str(warning))
|
||||
demo = load_sample_items("reddit_sample")
|
||||
if demo:
|
||||
st.info("Loaded demo Reddit data.", icon="🧪")
|
||||
collected.extend(demo)
|
||||
except ServiceError as error:
|
||||
st.error(f"Reddit fetch failed: {error}")
|
||||
if config["sources"]["twitter"]:
|
||||
with source_status("Fetching Twitter mentions") as status:
|
||||
try:
|
||||
twitter_items = cached_twitter_fetch(
|
||||
brand=config["brand"],
|
||||
limit=config["limits"]["twitter"],
|
||||
min_likes=config["min_twitter_likes"],
|
||||
language=config["language"],
|
||||
bearer=config["credentials"]["twitter"],
|
||||
)
|
||||
twitter_items = [item for item in twitter_items if item["timestamp"] >= threshold]
|
||||
status.write(f"Fetched {len(twitter_items)} tweets.")
|
||||
collected.extend(twitter_items)
|
||||
except ServiceWarning as warning:
|
||||
st.warning(str(warning))
|
||||
demo = load_sample_items("twitter_sample")
|
||||
if demo:
|
||||
st.info("Loaded demo Twitter data.", icon="🧪")
|
||||
collected.extend(demo)
|
||||
except ServiceError as error:
|
||||
st.error(f"Twitter fetch failed: {error}")
|
||||
if config["sources"]["trustpilot"]:
|
||||
with source_status("Fetching Trustpilot reviews") as status:
|
||||
try:
|
||||
trustpilot_items = cached_trustpilot_fetch(
|
||||
brand=config["brand"],
|
||||
language=config["language"],
|
||||
)
|
||||
trustpilot_items = [item for item in trustpilot_items if item["timestamp"] >= threshold]
|
||||
status.write(f"Fetched {len(trustpilot_items)} reviews.")
|
||||
collected.extend(trustpilot_items)
|
||||
except ServiceWarning as warning:
|
||||
st.warning(str(warning))
|
||||
demo = load_sample_items("trustpilot_sample")
|
||||
if demo:
|
||||
st.info("Loaded demo Trustpilot data.", icon="🧪")
|
||||
collected.extend(demo)
|
||||
except ServiceError as error:
|
||||
st.error(f"Trustpilot fetch failed: {error}")
|
||||
|
||||
if not collected:
|
||||
show_empty_state("No mentions found. Try enabling more sources or loosening filters.")
|
||||
return
|
||||
|
||||
cleaned = normalize_items(collected)
|
||||
if not cleaned:
|
||||
show_empty_state("All results were filtered out as noise. Try again with different settings.")
|
||||
return
|
||||
|
||||
sentiment_service = llm.LLMService(
|
||||
api_key=config["credentials"]["openai"] or openai_key,
|
||||
batch_size=config["batch_size"],
|
||||
)
|
||||
sentiments = sentiment_service.classify_sentiment_batch([item["text"] for item in cleaned])
|
||||
df = _to_dataframe(cleaned, sentiments)
|
||||
|
||||
render_overview(df)
|
||||
render_top_comments(df)
|
||||
|
||||
summary_payload: Optional[Dict[str, str]] = None
|
||||
if sentiment_service.available():
|
||||
try:
|
||||
summary_payload = sentiment_service.summarize_overall(
|
||||
[{"label": row["label"], "text": row["text"]} for _, row in df.iterrows()]
|
||||
)
|
||||
except ServiceWarning as warning:
|
||||
st.warning(str(warning))
|
||||
else:
|
||||
st.info("OpenAI key missing. Using VADER fallback for sentiment; summary disabled.", icon="ℹ️")
|
||||
|
||||
render_summary(summary_payload)
|
||||
render_source_explorer(df)
|
||||
|
||||
csv_data = df.to_csv(index=False).encode("utf-8")
|
||||
excel_data = _build_excel(df)
|
||||
pdf_data = _build_pdf(summary_payload, df)
|
||||
col_csv, col_excel, col_pdf = st.columns(3)
|
||||
with col_csv:
|
||||
st.download_button(
|
||||
"⬇️ Export CSV",
|
||||
data=csv_data,
|
||||
file_name="reputation_radar.csv",
|
||||
mime="text/csv",
|
||||
)
|
||||
with col_excel:
|
||||
st.download_button(
|
||||
"⬇️ Export Excel",
|
||||
data=excel_data,
|
||||
file_name="reputation_radar.xlsx",
|
||||
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
)
|
||||
with col_pdf:
|
||||
st.download_button(
|
||||
"⬇️ Export PDF Summary",
|
||||
data=pdf_data,
|
||||
file_name="reputation_radar_summary.pdf",
|
||||
mime="application/pdf",
|
||||
)
|
||||
|
||||
st.success("Analysis complete! Review the insights above.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Reusable Streamlit UI components for ReputationRadar."""
|
||||
|
||||
from . import dashboard, filters, loaders, summary
|
||||
|
||||
__all__ = ["dashboard", "filters", "loaders", "summary"]
|
||||
136
community-contributions/Reputation_Radar/components/dashboard.py
Normal file
136
community-contributions/Reputation_Radar/components/dashboard.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""Render the ReputationRadar dashboard components."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, Optional
|
||||
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import streamlit as st
|
||||
|
||||
SOURCE_CHIPS = {
|
||||
"reddit": "🔺 Reddit",
|
||||
"twitter": "✖️ Twitter",
|
||||
"trustpilot": "⭐ Trustpilot",
|
||||
}
|
||||
|
||||
SENTIMENT_COLORS = {
|
||||
"positive": "#4caf50",
|
||||
"neutral": "#90a4ae",
|
||||
"negative": "#ef5350",
|
||||
}
|
||||
|
||||
|
||||
def render_overview(df: pd.DataFrame) -> None:
|
||||
"""Display charts summarising sentiment."""
|
||||
counts = (
|
||||
df["label"]
|
||||
.value_counts()
|
||||
.reindex(["positive", "neutral", "negative"], fill_value=0)
|
||||
.rename_axis("label")
|
||||
.reset_index(name="count")
|
||||
)
|
||||
pie = px.pie(
|
||||
counts,
|
||||
names="label",
|
||||
values="count",
|
||||
color="label",
|
||||
color_discrete_map=SENTIMENT_COLORS,
|
||||
title="Sentiment distribution",
|
||||
)
|
||||
pie.update_traces(textinfo="percent+label")
|
||||
|
||||
ts = (
|
||||
df.set_index("timestamp")
|
||||
.groupby([pd.Grouper(freq="D"), "label"])
|
||||
.size()
|
||||
.reset_index(name="count")
|
||||
)
|
||||
if not ts.empty:
|
||||
ts_plot = px.line(
|
||||
ts,
|
||||
x="timestamp",
|
||||
y="count",
|
||||
color="label",
|
||||
color_discrete_map=SENTIMENT_COLORS,
|
||||
markers=True,
|
||||
title="Mentions over time",
|
||||
)
|
||||
else:
|
||||
ts_plot = None
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.plotly_chart(pie, use_container_width=True)
|
||||
with col2:
|
||||
if ts_plot is not None:
|
||||
st.plotly_chart(ts_plot, use_container_width=True)
|
||||
else:
|
||||
st.info("Not enough data for a time-series. Try widening the date range.", icon="📆")
|
||||
|
||||
|
||||
def render_top_comments(df: pd.DataFrame) -> None:
|
||||
"""Show representative comments per sentiment."""
|
||||
st.subheader("Representative Mentions")
|
||||
cols = st.columns(3)
|
||||
for idx, sentiment in enumerate(["positive", "neutral", "negative"]):
|
||||
subset = (
|
||||
df[df["label"] == sentiment]
|
||||
.sort_values("confidence", ascending=False)
|
||||
.head(5)
|
||||
)
|
||||
with cols[idx]:
|
||||
st.caption(sentiment.capitalize())
|
||||
if subset.empty:
|
||||
st.write("No items yet.")
|
||||
continue
|
||||
for _, row in subset.iterrows():
|
||||
chip = SOURCE_CHIPS.get(row["source"], row["source"])
|
||||
author = row.get("author") or "Unknown"
|
||||
timestamp = row["timestamp"].strftime("%Y-%m-%d %H:%M")
|
||||
label = f"{chip} · {author} · {timestamp}"
|
||||
if row.get("url"):
|
||||
st.markdown(f"- [{label}]({row['url']})")
|
||||
else:
|
||||
st.markdown(f"- {label}")
|
||||
|
||||
|
||||
def render_source_explorer(df: pd.DataFrame) -> None:
|
||||
"""Interactive tabular explorer with pagination and filters."""
|
||||
with st.expander("Source Explorer", expanded=False):
|
||||
search_term = st.text_input("Search mentions", key="explorer_search")
|
||||
selected_source = st.selectbox("Source filter", options=["All"] + list(SOURCE_CHIPS.values()))
|
||||
min_conf = st.slider("Minimum confidence", min_value=0.0, max_value=1.0, value=0.0, step=0.1)
|
||||
|
||||
filtered = df.copy()
|
||||
if search_term:
|
||||
filtered = filtered[filtered["text"].str.contains(search_term, case=False, na=False)]
|
||||
if selected_source != "All":
|
||||
source_key = _reverse_lookup(selected_source)
|
||||
if source_key:
|
||||
filtered = filtered[filtered["source"] == source_key]
|
||||
filtered = filtered[filtered["confidence"] >= min_conf]
|
||||
|
||||
if filtered.empty:
|
||||
st.info("No results found. Try widening the date range or removing filters.", icon="🪄")
|
||||
return
|
||||
|
||||
page_size = 10
|
||||
total_pages = max(1, (len(filtered) + page_size - 1) // page_size)
|
||||
page = st.number_input("Page", min_value=1, max_value=total_pages, value=1)
|
||||
start = (page - 1) * page_size
|
||||
end = start + page_size
|
||||
|
||||
explorer_df = filtered.iloc[start:end].copy()
|
||||
explorer_df["source"] = explorer_df["source"].map(SOURCE_CHIPS).fillna(explorer_df["source"])
|
||||
explorer_df["timestamp"] = explorer_df["timestamp"].dt.strftime("%Y-%m-%d %H:%M")
|
||||
explorer_df = explorer_df[["timestamp", "source", "author", "label", "confidence", "text", "url"]]
|
||||
|
||||
st.dataframe(explorer_df, use_container_width=True, hide_index=True)
|
||||
|
||||
|
||||
def _reverse_lookup(value: str) -> Optional[str]:
|
||||
for key, chip in SOURCE_CHIPS.items():
|
||||
if chip == value:
|
||||
return key
|
||||
return None
|
||||
128
community-contributions/Reputation_Radar/components/filters.py
Normal file
128
community-contributions/Reputation_Radar/components/filters.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""Sidebar filters and configuration controls."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import streamlit as st
|
||||
|
||||
DATE_RANGE_LABELS = {
|
||||
"24h": "Last 24 hours",
|
||||
"7d": "Last 7 days",
|
||||
"30d": "Last 30 days",
|
||||
}
|
||||
|
||||
SUPPORTED_LANGUAGES = {
|
||||
"en": "English",
|
||||
"es": "Spanish",
|
||||
"de": "German",
|
||||
"fr": "French",
|
||||
}
|
||||
|
||||
|
||||
def _store_secret(key: str, value: str) -> None:
|
||||
"""Persist sensitive values in session state only."""
|
||||
if value:
|
||||
st.session_state.setdefault("secrets", {})
|
||||
st.session_state["secrets"][key] = value
|
||||
|
||||
|
||||
def _get_secret(key: str, default: str = "") -> str:
|
||||
return st.session_state.get("secrets", {}).get(key, default)
|
||||
|
||||
|
||||
def render_sidebar(env_defaults: Dict[str, Optional[str]], openai_notices: Tuple[str, ...]) -> Dict[str, object]:
|
||||
"""Render all sidebar controls and return configuration."""
|
||||
with st.sidebar:
|
||||
st.header("Tune Your Radar", anchor=False)
|
||||
brand = st.text_input("Brand Name*", value=st.session_state.get("brand_input", ""))
|
||||
if brand:
|
||||
st.session_state["brand_input"] = brand
|
||||
|
||||
date_range = st.selectbox(
|
||||
"Date Range",
|
||||
options=list(DATE_RANGE_LABELS.keys()),
|
||||
format_func=lambda key: DATE_RANGE_LABELS[key],
|
||||
index=1,
|
||||
)
|
||||
min_reddit_upvotes = st.number_input(
|
||||
"Minimum Reddit upvotes",
|
||||
min_value=0,
|
||||
value=st.session_state.get("min_reddit_upvotes", 4),
|
||||
)
|
||||
st.session_state["min_reddit_upvotes"] = min_reddit_upvotes
|
||||
min_twitter_likes = st.number_input(
|
||||
"Minimum X likes",
|
||||
min_value=0,
|
||||
value=st.session_state.get("min_twitter_likes", 100),
|
||||
)
|
||||
st.session_state["min_twitter_likes"] = min_twitter_likes
|
||||
language = st.selectbox(
|
||||
"Language",
|
||||
options=list(SUPPORTED_LANGUAGES.keys()),
|
||||
format_func=lambda key: SUPPORTED_LANGUAGES[key],
|
||||
index=0,
|
||||
)
|
||||
|
||||
st.markdown("### Sources")
|
||||
reddit_enabled = st.toggle("🔺 Reddit", value=st.session_state.get("reddit_enabled", True))
|
||||
twitter_enabled = st.toggle("✖️ Twitter", value=st.session_state.get("twitter_enabled", True))
|
||||
trustpilot_enabled = st.toggle("⭐ Trustpilot", value=st.session_state.get("trustpilot_enabled", True))
|
||||
st.session_state["reddit_enabled"] = reddit_enabled
|
||||
st.session_state["twitter_enabled"] = twitter_enabled
|
||||
st.session_state["trustpilot_enabled"] = trustpilot_enabled
|
||||
|
||||
st.markdown("### API Keys")
|
||||
openai_key_default = env_defaults.get("OPENAI_API_KEY") or _get_secret("OPENAI_API_KEY")
|
||||
openai_key = st.text_input("OpenAI API Key", value=openai_key_default or "", type="password", help="Stored only in this session.")
|
||||
_store_secret("OPENAI_API_KEY", openai_key.strip())
|
||||
reddit_client_id = st.text_input("Reddit Client ID", value=env_defaults.get("REDDIT_CLIENT_ID") or _get_secret("REDDIT_CLIENT_ID"), type="password")
|
||||
reddit_client_secret = st.text_input("Reddit Client Secret", value=env_defaults.get("REDDIT_CLIENT_SECRET") or _get_secret("REDDIT_CLIENT_SECRET"), type="password")
|
||||
reddit_user_agent = st.text_input("Reddit User Agent", value=env_defaults.get("REDDIT_USER_AGENT") or _get_secret("REDDIT_USER_AGENT"))
|
||||
twitter_bearer_token = st.text_input("Twitter Bearer Token", value=env_defaults.get("TWITTER_BEARER_TOKEN") or _get_secret("TWITTER_BEARER_TOKEN"), type="password")
|
||||
_store_secret("REDDIT_CLIENT_ID", reddit_client_id.strip())
|
||||
_store_secret("REDDIT_CLIENT_SECRET", reddit_client_secret.strip())
|
||||
_store_secret("REDDIT_USER_AGENT", reddit_user_agent.strip())
|
||||
_store_secret("TWITTER_BEARER_TOKEN", twitter_bearer_token.strip())
|
||||
|
||||
if openai_notices:
|
||||
for notice in openai_notices:
|
||||
st.info(notice)
|
||||
|
||||
with st.expander("Advanced Options", expanded=False):
|
||||
reddit_limit = st.slider("Reddit results", min_value=10, max_value=100, value=st.session_state.get("reddit_limit", 40), step=5)
|
||||
twitter_limit = st.slider("Twitter results", min_value=10, max_value=100, value=st.session_state.get("twitter_limit", 40), step=5)
|
||||
trustpilot_limit = st.slider("Trustpilot results", min_value=10, max_value=60, value=st.session_state.get("trustpilot_limit", 30), step=5)
|
||||
llm_batch_size = st.slider("OpenAI batch size", min_value=5, max_value=20, value=st.session_state.get("llm_batch_size", 20), step=5)
|
||||
st.session_state["reddit_limit"] = reddit_limit
|
||||
st.session_state["twitter_limit"] = twitter_limit
|
||||
st.session_state["trustpilot_limit"] = trustpilot_limit
|
||||
st.session_state["llm_batch_size"] = llm_batch_size
|
||||
|
||||
return {
|
||||
"brand": brand.strip(),
|
||||
"date_range": date_range,
|
||||
"min_reddit_upvotes": min_reddit_upvotes,
|
||||
"min_twitter_likes": min_twitter_likes,
|
||||
"language": language,
|
||||
"sources": {
|
||||
"reddit": reddit_enabled,
|
||||
"twitter": twitter_enabled,
|
||||
"trustpilot": trustpilot_enabled,
|
||||
},
|
||||
"limits": {
|
||||
"reddit": reddit_limit,
|
||||
"twitter": twitter_limit,
|
||||
"trustpilot": trustpilot_limit,
|
||||
},
|
||||
"batch_size": llm_batch_size,
|
||||
"credentials": {
|
||||
"openai": openai_key.strip(),
|
||||
"reddit": {
|
||||
"client_id": reddit_client_id.strip(),
|
||||
"client_secret": reddit_client_secret.strip(),
|
||||
"user_agent": reddit_user_agent.strip(),
|
||||
},
|
||||
"twitter": twitter_bearer_token.strip(),
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
"""Loading indicators and status helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Iterator
|
||||
|
||||
import streamlit as st
|
||||
|
||||
|
||||
@contextmanager
|
||||
def source_status(label: str) -> Iterator[st.delta_generator.DeltaGenerator]:
|
||||
"""Context manager that yields a status widget for source fetching."""
|
||||
status = st.status(label, expanded=True)
|
||||
try:
|
||||
yield status
|
||||
status.update(label=f"{label} ✅", state="complete")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
status.update(label=f"{label} ⚠️ {exc}", state="error")
|
||||
raise
|
||||
|
||||
|
||||
def show_empty_state(message: str) -> None:
|
||||
"""Render a friendly empty-state callout."""
|
||||
st.info(message, icon="🔎")
|
||||
@@ -0,0 +1,23 @@
|
||||
"""Executive summary display components."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, Optional
|
||||
|
||||
import streamlit as st
|
||||
|
||||
|
||||
def render_summary(summary: Optional[Dict[str, str]]) -> None:
|
||||
"""Render executive summary card."""
|
||||
st.subheader("Executive Summary", anchor=False)
|
||||
if not summary:
|
||||
st.warning("Executive summary disabled. Provide an OpenAI API key to unlock this section.", icon="🤖")
|
||||
return
|
||||
st.markdown(
|
||||
"""
|
||||
<div style="padding:1rem;border:1px solid #eee;border-radius:0.75rem;background-color:#f9fafb;">
|
||||
""",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
st.markdown(summary.get("raw", ""))
|
||||
st.markdown("</div>", unsafe_allow_html=True)
|
||||
16
community-contributions/Reputation_Radar/requirements.txt
Normal file
16
community-contributions/Reputation_Radar/requirements.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
streamlit
|
||||
praw
|
||||
requests
|
||||
beautifulsoup4
|
||||
pandas
|
||||
python-dotenv
|
||||
tenacity
|
||||
plotly
|
||||
openai>=1.0.0
|
||||
vaderSentiment
|
||||
fuzzywuzzy[speedup]
|
||||
python-Levenshtein
|
||||
reportlab
|
||||
tqdm
|
||||
pytest
|
||||
XlsxWriter
|
||||
@@ -0,0 +1,20 @@
|
||||
[
|
||||
{
|
||||
"source": "reddit",
|
||||
"id": "t3_sample1",
|
||||
"url": "https://www.reddit.com/r/technology/comments/sample1",
|
||||
"author": "techfan42",
|
||||
"timestamp": "2025-01-15T14:30:00+00:00",
|
||||
"text": "ReputationRadar did an impressive job resolving our customer issues within hours. Support has been world class!",
|
||||
"meta": {"score": 128, "num_comments": 24, "subreddit": "technology", "type": "submission"}
|
||||
},
|
||||
{
|
||||
"source": "reddit",
|
||||
"id": "t1_sample2",
|
||||
"url": "https://www.reddit.com/r/startups/comments/sample2/comment/sample",
|
||||
"author": "growthguru",
|
||||
"timestamp": "2025-01-14T10:10:00+00:00",
|
||||
"text": "Noticed a spike in downtime alerts with ReputationRadar this week. Anyone else seeing false positives?",
|
||||
"meta": {"score": 45, "subreddit": "startups", "type": "comment", "submission_title": "Monitoring tools"}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,20 @@
|
||||
[
|
||||
{
|
||||
"source": "trustpilot",
|
||||
"id": "trustpilot-001",
|
||||
"url": "https://www.trustpilot.com/review/reputationradar.ai",
|
||||
"author": "Dana",
|
||||
"timestamp": "2025-01-12T11:00:00+00:00",
|
||||
"text": "ReputationRadar has simplified our weekly reporting. The sentiment breakdowns are easy to understand and accurate.",
|
||||
"meta": {"rating": "5 stars"}
|
||||
},
|
||||
{
|
||||
"source": "trustpilot",
|
||||
"id": "trustpilot-002",
|
||||
"url": "https://www.trustpilot.com/review/reputationradar.ai?page=2",
|
||||
"author": "Liam",
|
||||
"timestamp": "2025-01-10T18:20:00+00:00",
|
||||
"text": "Support was responsive, but the Trustpilot integration kept timing out. Hoping for a fix soon.",
|
||||
"meta": {"rating": "3 stars"}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,20 @@
|
||||
[
|
||||
{
|
||||
"source": "twitter",
|
||||
"id": "173654001",
|
||||
"url": "https://twitter.com/brandlover/status/173654001",
|
||||
"author": "brandlover",
|
||||
"timestamp": "2025-01-15T16:45:00+00:00",
|
||||
"text": "Huge shoutout to ReputationRadar for flagging sentiment risks ahead of our launch. Saved us hours this morning!",
|
||||
"meta": {"likes": 57, "retweets": 8, "replies": 3, "quote_count": 2}
|
||||
},
|
||||
{
|
||||
"source": "twitter",
|
||||
"id": "173653991",
|
||||
"url": "https://twitter.com/critique/status/173653991",
|
||||
"author": "critique",
|
||||
"timestamp": "2025-01-13T09:12:00+00:00",
|
||||
"text": "The new ReputationRadar dashboard feels laggy and the PDF export failed twice. Dev team please check your rollout.",
|
||||
"meta": {"likes": 14, "retweets": 1, "replies": 5, "quote_count": 0}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,11 @@
|
||||
"""Service layer exports for ReputationRadar."""
|
||||
|
||||
from . import llm, reddit_client, trustpilot_scraper, twitter_client, utils
|
||||
|
||||
__all__ = [
|
||||
"llm",
|
||||
"reddit_client",
|
||||
"trustpilot_scraper",
|
||||
"twitter_client",
|
||||
"utils",
|
||||
]
|
||||
147
community-contributions/Reputation_Radar/services/llm.py
Normal file
147
community-contributions/Reputation_Radar/services/llm.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""LLM sentiment analysis and summarization utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
||||
|
||||
try: # pragma: no cover - optional dependency
|
||||
from openai import OpenAI
|
||||
except ModuleNotFoundError: # pragma: no cover
|
||||
OpenAI = None # type: ignore[assignment]
|
||||
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
|
||||
from .utils import ServiceWarning, chunked
|
||||
|
||||
CLASSIFICATION_SYSTEM_PROMPT = "You are a precise brand-sentiment classifier. Output JSON only."
|
||||
SUMMARY_SYSTEM_PROMPT = "You analyze brand chatter and produce concise, executive-ready summaries."
|
||||
|
||||
|
||||
@dataclass
|
||||
class SentimentResult:
|
||||
"""Structured sentiment output."""
|
||||
|
||||
label: str
|
||||
confidence: float
|
||||
|
||||
|
||||
class LLMService:
|
||||
"""Wrapper around OpenAI with VADER fallback."""
|
||||
|
||||
def __init__(self, api_key: Optional[str], model: str = "gpt-4o-mini", batch_size: int = 20):
|
||||
self.batch_size = max(1, batch_size)
|
||||
self.model = model
|
||||
self.logger = logging.getLogger("services.llm")
|
||||
self._client: Optional[Any] = None
|
||||
self._analyzer = SentimentIntensityAnalyzer()
|
||||
if api_key and OpenAI is not None:
|
||||
try:
|
||||
self._client = OpenAI(api_key=api_key)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self.logger.warning("Failed to initialize OpenAI client, using VADER fallback: %s", exc)
|
||||
self._client = None
|
||||
elif api_key and OpenAI is None:
|
||||
self.logger.warning("openai package not installed; falling back to VADER despite API key.")
|
||||
|
||||
def available(self) -> bool:
|
||||
"""Return whether OpenAI-backed features are available."""
|
||||
return self._client is not None
|
||||
|
||||
def classify_sentiment_batch(self, texts: Sequence[str]) -> List[SentimentResult]:
|
||||
"""Classify multiple texts, chunking if necessary."""
|
||||
if not texts:
|
||||
return []
|
||||
if not self.available():
|
||||
return [self._vader_sentiment(text) for text in texts]
|
||||
|
||||
results: List[SentimentResult] = []
|
||||
for chunk in chunked(list(texts), self.batch_size):
|
||||
prompt_lines = ["Classify each item as \"positive\", \"neutral\", or \"negative\".", "Also output a confidence score between 0 and 1.", "Return an array of objects: [{\"label\": \"...\", \"confidence\": 0.0}].", "Items:"]
|
||||
prompt_lines.extend([f"{idx + 1}) {text}" for idx, text in enumerate(chunk)])
|
||||
prompt = "\n".join(prompt_lines)
|
||||
try:
|
||||
response = self._client.responses.create( # type: ignore[union-attr]
|
||||
model=self.model,
|
||||
input=[
|
||||
{"role": "system", "content": CLASSIFICATION_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0,
|
||||
max_output_tokens=500,
|
||||
)
|
||||
output_text = self._extract_text(response)
|
||||
parsed = json.loads(output_text)
|
||||
for item in parsed:
|
||||
results.append(
|
||||
SentimentResult(
|
||||
label=item.get("label", "neutral"),
|
||||
confidence=float(item.get("confidence", 0.5)),
|
||||
)
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self.logger.warning("Classification fallback to VADER due to error: %s", exc)
|
||||
for text in chunk:
|
||||
results.append(self._vader_sentiment(text))
|
||||
# Ensure the output length matches input
|
||||
if len(results) != len(texts):
|
||||
# align by padding with neutral
|
||||
results.extend([SentimentResult(label="neutral", confidence=0.33)] * (len(texts) - len(results)))
|
||||
return results
|
||||
|
||||
def summarize_overall(self, findings: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Create an executive summary using OpenAI."""
|
||||
if not self.available():
|
||||
raise ServiceWarning("OpenAI API key missing. Summary unavailable.")
|
||||
prompt_lines = [
|
||||
"Given these labeled items and their short rationales, write:",
|
||||
"- 5 bullet \"Highlights\"",
|
||||
"- 5 bullet \"Risks & Concerns\"",
|
||||
"- One-line \"Overall Tone\" (Positive/Neutral/Negative with brief justification)",
|
||||
"- 3 \"Recommended Actions\"",
|
||||
"Keep it under 180 words total. Be specific but neutral in tone.",
|
||||
"Items:",
|
||||
]
|
||||
for idx, item in enumerate(findings, start=1):
|
||||
prompt_lines.append(
|
||||
f"{idx}) [{item.get('label','neutral').upper()}] {item.get('text','')}"
|
||||
)
|
||||
prompt = "\n".join(prompt_lines)
|
||||
try:
|
||||
response = self._client.responses.create( # type: ignore[union-attr]
|
||||
model=self.model,
|
||||
input=[
|
||||
{"role": "system", "content": SUMMARY_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_output_tokens=800,
|
||||
)
|
||||
output_text = self._extract_text(response)
|
||||
return {"raw": output_text}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self.logger.error("Failed to generate summary: %s", exc)
|
||||
raise ServiceWarning("Unable to generate executive summary at this time.") from exc
|
||||
|
||||
def _vader_sentiment(self, text: str) -> SentimentResult:
|
||||
scores = self._analyzer.polarity_scores(text)
|
||||
compound = scores["compound"]
|
||||
if compound >= 0.2:
|
||||
label = "positive"
|
||||
elif compound <= -0.2:
|
||||
label = "negative"
|
||||
else:
|
||||
label = "neutral"
|
||||
confidence = min(1.0, max(0.0, abs(compound)))
|
||||
return SentimentResult(label=label, confidence=confidence)
|
||||
|
||||
def _extract_text(self, response: Any) -> str:
|
||||
"""Support multiple OpenAI client response shapes."""
|
||||
if hasattr(response, "output") and response.output:
|
||||
content = response.output[0].content[0]
|
||||
return getattr(content, "text", str(content))
|
||||
if hasattr(response, "choices"):
|
||||
return response.choices[0].message.content # type: ignore[return-value]
|
||||
raise ValueError("Unknown response structure from OpenAI client.")
|
||||
@@ -0,0 +1,141 @@
|
||||
"""Reddit data collection service using PRAW."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
import praw
|
||||
from praw.models import Comment, Submission
|
||||
|
||||
from .utils import (
|
||||
NormalizedItem,
|
||||
ServiceError,
|
||||
ServiceWarning,
|
||||
ensure_timezone,
|
||||
sanitize_text,
|
||||
)
|
||||
|
||||
|
||||
TIME_FILTER_MAP = {
|
||||
"24h": "day",
|
||||
"7d": "week",
|
||||
"30d": "month",
|
||||
}
|
||||
|
||||
|
||||
def _iter_submissions(subreddit: praw.models.Subreddit, query: str, limit: int, time_filter: str) -> Iterable[Submission]:
|
||||
return subreddit.search(query=query, sort="new", time_filter=time_filter, limit=limit * 3)
|
||||
|
||||
|
||||
def _iter_comments(submission: Submission) -> Iterable[Comment]:
|
||||
submission.comments.replace_more(limit=0)
|
||||
return submission.comments.list()
|
||||
|
||||
|
||||
def _normalize_submission(submission: Submission) -> NormalizedItem:
|
||||
created = datetime.fromtimestamp(submission.created_utc, tz=timezone.utc)
|
||||
return NormalizedItem(
|
||||
source="reddit",
|
||||
id=submission.id,
|
||||
url=f"https://www.reddit.com{submission.permalink}",
|
||||
author=str(submission.author) if submission.author else None,
|
||||
timestamp=ensure_timezone(created),
|
||||
text=f"{submission.title}\n\n{submission.selftext or ''}",
|
||||
meta={
|
||||
"score": submission.score,
|
||||
"num_comments": submission.num_comments,
|
||||
"subreddit": submission.subreddit.display_name,
|
||||
"type": "submission",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _normalize_comment(comment: Comment, submission: Submission) -> NormalizedItem:
|
||||
created = datetime.fromtimestamp(comment.created_utc, tz=timezone.utc)
|
||||
return NormalizedItem(
|
||||
source="reddit",
|
||||
id=comment.id,
|
||||
url=f"https://www.reddit.com{comment.permalink}",
|
||||
author=str(comment.author) if comment.author else None,
|
||||
timestamp=ensure_timezone(created),
|
||||
text=comment.body,
|
||||
meta={
|
||||
"score": comment.score,
|
||||
"subreddit": submission.subreddit.display_name,
|
||||
"type": "comment",
|
||||
"submission_title": submission.title,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def fetch_mentions(
|
||||
brand: str,
|
||||
credentials: Dict[str, str],
|
||||
limit: int = 25,
|
||||
date_filter: str = "7d",
|
||||
min_upvotes: int = 0,
|
||||
) -> List[NormalizedItem]:
|
||||
"""Fetch recent Reddit submissions/comments mentioning the brand."""
|
||||
client_id = credentials.get("client_id")
|
||||
client_secret = credentials.get("client_secret")
|
||||
user_agent = credentials.get("user_agent")
|
||||
|
||||
if not all([client_id, client_secret, user_agent]):
|
||||
raise ServiceWarning("Reddit credentials are missing. Provide them in the sidebar to enable this source.")
|
||||
|
||||
try:
|
||||
reddit = praw.Reddit(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
reddit.read_only = True
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise ServiceError(f"Failed to initialize Reddit client: {exc}") from exc
|
||||
|
||||
time_filter = TIME_FILTER_MAP.get(date_filter.lower(), "week")
|
||||
subreddit = reddit.subreddit("all")
|
||||
results: List[NormalizedItem] = []
|
||||
seen_ids: set[str] = set()
|
||||
try:
|
||||
for submission in _iter_submissions(subreddit, query=brand, limit=limit, time_filter=time_filter):
|
||||
if submission.id in seen_ids:
|
||||
continue
|
||||
if submission.score < min_upvotes:
|
||||
continue
|
||||
normalized_submission = _normalize_submission(submission)
|
||||
normalized_submission["text"] = sanitize_text(normalized_submission["text"])
|
||||
if normalized_submission["text"]:
|
||||
results.append(normalized_submission)
|
||||
seen_ids.add(submission.id)
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
# Fetch comments mentioning the brand
|
||||
match_count = 0
|
||||
for comment in _iter_comments(submission):
|
||||
if brand.lower() not in (comment.body or "").lower():
|
||||
continue
|
||||
if comment.score < min_upvotes:
|
||||
continue
|
||||
normalized_comment = _normalize_comment(comment, submission)
|
||||
normalized_comment["text"] = sanitize_text(normalized_comment["text"])
|
||||
if not normalized_comment["text"]:
|
||||
continue
|
||||
if normalized_comment["id"] in seen_ids:
|
||||
continue
|
||||
results.append(normalized_comment)
|
||||
seen_ids.add(normalized_comment["id"])
|
||||
match_count += 1
|
||||
if len(results) >= limit:
|
||||
break
|
||||
if len(results) >= limit:
|
||||
break
|
||||
# Respect rate limits
|
||||
if match_count:
|
||||
time.sleep(1)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise ServiceError(f"Error while fetching Reddit data: {exc}") from exc
|
||||
return results
|
||||
@@ -0,0 +1,138 @@
|
||||
"""Trustpilot scraping service with polite crawling safeguards."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List
|
||||
from urllib.parse import urlencode
|
||||
from urllib.robotparser import RobotFileParser
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
||||
|
||||
from .utils import (
|
||||
NormalizedItem,
|
||||
ServiceError,
|
||||
ServiceWarning,
|
||||
ensure_timezone,
|
||||
random_user_agent,
|
||||
sanitize_text,
|
||||
)
|
||||
|
||||
BASE_URL = "https://www.trustpilot.com"
|
||||
SEARCH_PATH = "/search"
|
||||
|
||||
|
||||
class BlockedError(ServiceWarning):
|
||||
"""Raised when Trustpilot blocks the scraping attempt."""
|
||||
|
||||
|
||||
def _check_robots(user_agent: str) -> None:
|
||||
parser = RobotFileParser()
|
||||
parser.set_url(f"{BASE_URL}/robots.txt")
|
||||
parser.read()
|
||||
if not parser.can_fetch(user_agent, SEARCH_PATH):
|
||||
raise ServiceWarning(
|
||||
"Trustpilot robots.txt disallows scraping the search endpoint. "
|
||||
"Please use the official API or upload data manually."
|
||||
)
|
||||
|
||||
|
||||
@retry(
|
||||
reraise=True,
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=1, max=8),
|
||||
retry=retry_if_exception_type((requests.RequestException, BlockedError)),
|
||||
)
|
||||
def _fetch_page(session: requests.Session, user_agent: str, page: int, brand: str, language: str) -> str:
|
||||
params = {"query": brand, "page": page}
|
||||
if language:
|
||||
params["languages"] = language
|
||||
url = f"{BASE_URL}{SEARCH_PATH}?{urlencode(params)}"
|
||||
response = session.get(
|
||||
url,
|
||||
headers={"User-Agent": user_agent, "Accept-Language": language or "en"},
|
||||
timeout=20,
|
||||
)
|
||||
if response.status_code in (401, 403):
|
||||
raise BlockedError("Trustpilot denied access (HTTP 403).")
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
|
||||
|
||||
def _parse_reviews(html: str, user_agent: str) -> List[NormalizedItem]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
cards = soup.select("article[data-service-review-card-layout]")
|
||||
items: List[NormalizedItem] = []
|
||||
now = datetime.now(timezone.utc)
|
||||
for card in cards:
|
||||
link = card.select_one("a.link_internal__YpiJI")
|
||||
url = f"{BASE_URL}{link['href']}" if link and link.get("href") else ""
|
||||
title_el = card.select_one("h2")
|
||||
title = title_el.get_text(strip=True) if title_el else ""
|
||||
text_el = card.select_one("[data-review-description-typography]")
|
||||
text = text_el.get_text(separator=" ", strip=True) if text_el else ""
|
||||
rating_el = card.select_one("img[alt*='stars']")
|
||||
rating = rating_el["alt"] if rating_el and rating_el.get("alt") else ""
|
||||
author_el = card.select_one("span.styles_consumerDetails__ZF4I6")
|
||||
author = author_el.get_text(strip=True) if author_el else None
|
||||
date_el = card.select_one("time")
|
||||
timestamp = now
|
||||
if date_el and date_el.get("datetime"):
|
||||
try:
|
||||
timestamp = datetime.fromisoformat(date_el["datetime"].replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
timestamp = now
|
||||
|
||||
body = sanitize_text(f"{title}\n\n{text}")
|
||||
if len(body) < 15:
|
||||
continue
|
||||
items.append(
|
||||
NormalizedItem(
|
||||
source="trustpilot",
|
||||
id=card.get("data-review-id", str(hash(body))),
|
||||
url=url,
|
||||
author=author,
|
||||
timestamp=ensure_timezone(timestamp),
|
||||
text=body,
|
||||
meta={
|
||||
"rating": rating,
|
||||
"user_agent": user_agent,
|
||||
},
|
||||
)
|
||||
)
|
||||
return items
|
||||
|
||||
|
||||
def fetch_reviews(brand: str, language: str = "en", pages: int = 2) -> List[NormalizedItem]:
|
||||
"""Scrape Trustpilot search results for recent reviews."""
|
||||
if not brand:
|
||||
raise ServiceWarning("Brand name is required for Trustpilot scraping.")
|
||||
|
||||
session = requests.Session()
|
||||
user_agent = random_user_agent()
|
||||
_check_robots(user_agent)
|
||||
|
||||
aggregated: List[NormalizedItem] = []
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
for page in range(1, pages + 1):
|
||||
try:
|
||||
html = _fetch_page(session, user_agent=user_agent, page=page, brand=brand, language=language)
|
||||
except BlockedError as exc:
|
||||
raise ServiceWarning(
|
||||
"Trustpilot blocked the scraping attempt. Consider using their official API or providing CSV uploads."
|
||||
) from exc
|
||||
except requests.RequestException as exc: # noqa: BLE001
|
||||
raise ServiceError(f"Trustpilot request failed: {exc}") from exc
|
||||
page_items = _parse_reviews(html, user_agent)
|
||||
for item in page_items:
|
||||
if item["id"] in seen_ids:
|
||||
continue
|
||||
aggregated.append(item)
|
||||
seen_ids.add(item["id"])
|
||||
time.sleep(1.5) # gentle crawl delay
|
||||
|
||||
return aggregated
|
||||
@@ -0,0 +1,98 @@
|
||||
"""Twitter (X) data collection using the v2 recent search API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from .utils import NormalizedItem, ServiceError, ServiceWarning, ensure_timezone, sanitize_text
|
||||
|
||||
SEARCH_URL = "https://api.twitter.com/2/tweets/search/recent"
|
||||
|
||||
|
||||
def _build_query(brand: str, language: str) -> str:
|
||||
terms = [brand]
|
||||
if language:
|
||||
terms.append(f"lang:{language}")
|
||||
return " ".join(terms)
|
||||
|
||||
|
||||
def fetch_mentions(
|
||||
brand: str,
|
||||
bearer_token: Optional[str],
|
||||
limit: int = 25,
|
||||
min_likes: int = 0,
|
||||
language: str = "en",
|
||||
) -> List[NormalizedItem]:
|
||||
"""Fetch recent tweets mentioning the brand."""
|
||||
if not bearer_token:
|
||||
raise ServiceWarning(
|
||||
"Twitter bearer token not provided. Add it in the sidebar to enable Twitter ingestion."
|
||||
)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {bearer_token}",
|
||||
"User-Agent": "ReputationRadar/1.0",
|
||||
}
|
||||
params = {
|
||||
"query": _build_query(brand, language),
|
||||
"max_results": min(100, limit),
|
||||
"tweet.fields": "author_id,created_at,lang,public_metrics",
|
||||
"expansions": "author_id",
|
||||
"user.fields": "name,username",
|
||||
}
|
||||
|
||||
collected: List[NormalizedItem] = []
|
||||
next_token: Optional[str] = None
|
||||
|
||||
while len(collected) < limit:
|
||||
if next_token:
|
||||
params["next_token"] = next_token
|
||||
response = requests.get(SEARCH_URL, headers=headers, params=params, timeout=15)
|
||||
if response.status_code == 401:
|
||||
raise ServiceWarning("Twitter API authentication failed. Please verify the bearer token.")
|
||||
if response.status_code == 429:
|
||||
time.sleep(5)
|
||||
continue
|
||||
if response.status_code >= 400:
|
||||
raise ServiceError(f"Twitter API error {response.status_code}: {response.text}")
|
||||
|
||||
payload = response.json()
|
||||
data = payload.get("data", [])
|
||||
includes = payload.get("includes", {})
|
||||
users_index = {user["id"]: user for user in includes.get("users", [])}
|
||||
|
||||
for tweet in data:
|
||||
created_at = datetime.fromisoformat(tweet["created_at"].replace("Z", "+00:00"))
|
||||
author_info = users_index.get(tweet["author_id"], {})
|
||||
item = NormalizedItem(
|
||||
source="twitter",
|
||||
id=tweet["id"],
|
||||
url=f"https://twitter.com/{author_info.get('username','')}/status/{tweet['id']}",
|
||||
author=author_info.get("username"),
|
||||
timestamp=ensure_timezone(created_at),
|
||||
text=sanitize_text(tweet["text"]),
|
||||
meta={
|
||||
"likes": tweet.get("public_metrics", {}).get("like_count", 0),
|
||||
"retweets": tweet.get("public_metrics", {}).get("retweet_count", 0),
|
||||
"replies": tweet.get("public_metrics", {}).get("reply_count", 0),
|
||||
"quote_count": tweet.get("public_metrics", {}).get("quote_count", 0),
|
||||
},
|
||||
)
|
||||
if not item["text"]:
|
||||
continue
|
||||
if item["meta"]["likes"] < min_likes:
|
||||
continue
|
||||
collected.append(item)
|
||||
if len(collected) >= limit:
|
||||
break
|
||||
|
||||
next_token = payload.get("meta", {}).get("next_token")
|
||||
if not next_token:
|
||||
break
|
||||
time.sleep(1) # stay friendly to rate limits
|
||||
|
||||
return collected[:limit]
|
||||
217
community-contributions/Reputation_Radar/services/utils.py
Normal file
217
community-contributions/Reputation_Radar/services/utils.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""Utility helpers for ReputationRadar services."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, Iterator, List, Optional, Sequence, Tuple, TypedDict
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
|
||||
LOG_FILE = Path(__file__).resolve().parents[1] / "logs" / "app.log"
|
||||
MIN_TEXT_LENGTH = 15
|
||||
SIMILARITY_THRESHOLD = 90
|
||||
|
||||
|
||||
class NormalizedItem(TypedDict):
|
||||
"""Canonical representation of a fetched mention."""
|
||||
|
||||
source: str
|
||||
id: str
|
||||
url: str
|
||||
author: Optional[str]
|
||||
timestamp: datetime
|
||||
text: str
|
||||
meta: Dict[str, object]
|
||||
|
||||
|
||||
class ServiceError(RuntimeError):
|
||||
"""Raised when a service hard fails."""
|
||||
|
||||
|
||||
class ServiceWarning(RuntimeError):
|
||||
"""Raised for recoverable issues that should surface to the UI."""
|
||||
|
||||
|
||||
def initialize_logger(name: str = "reputation_radar") -> logging.Logger:
|
||||
"""Configure and return a module-level logger."""
|
||||
LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(LOG_FILE, encoding="utf-8"),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.INFO)
|
||||
return logger
|
||||
|
||||
|
||||
def load_sample_items(name: str) -> List[NormalizedItem]:
|
||||
"""Load demo data from the samples directory."""
|
||||
samples_dir = Path(__file__).resolve().parents[1] / "samples"
|
||||
sample_path = samples_dir / f"{name}.json"
|
||||
if not sample_path.exists():
|
||||
return []
|
||||
with sample_path.open("r", encoding="utf-8") as handle:
|
||||
raw_items = json.load(handle)
|
||||
cleaned: List[NormalizedItem] = []
|
||||
for item in raw_items:
|
||||
try:
|
||||
cleaned.append(
|
||||
NormalizedItem(
|
||||
source=item["source"],
|
||||
id=str(item["id"]),
|
||||
url=item.get("url", ""),
|
||||
author=item.get("author"),
|
||||
timestamp=datetime.fromisoformat(item["timestamp"]),
|
||||
text=item["text"],
|
||||
meta=item.get("meta", {}),
|
||||
)
|
||||
)
|
||||
except (KeyError, ValueError):
|
||||
continue
|
||||
return cleaned
|
||||
|
||||
|
||||
def strip_html(value: str) -> str:
|
||||
"""Remove HTML tags and normalize whitespace."""
|
||||
if not value:
|
||||
return ""
|
||||
soup = BeautifulSoup(value, "html.parser")
|
||||
text = soup.get_text(separator=" ", strip=True)
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
text = text.encode("utf-8", "ignore").decode("utf-8", "ignore")
|
||||
return text.strip()
|
||||
|
||||
|
||||
def sanitize_text(value: str) -> str:
|
||||
"""Clean text and remove excessive noise."""
|
||||
text = strip_html(value)
|
||||
text = re.sub(r"http\S+", "", text) # drop inline URLs
|
||||
text = re.sub(r"\s{2,}", " ", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def drop_short_items(items: Iterable[NormalizedItem], minimum_length: int = MIN_TEXT_LENGTH) -> List[NormalizedItem]:
|
||||
"""Filter out items that are too short to analyze."""
|
||||
return [
|
||||
item
|
||||
for item in items
|
||||
if len(item["text"]) >= minimum_length
|
||||
]
|
||||
|
||||
|
||||
def fuzzy_deduplicate(items: Sequence[NormalizedItem], threshold: int = SIMILARITY_THRESHOLD) -> List[NormalizedItem]:
|
||||
"""Remove duplicates based on URL or fuzzy text similarity."""
|
||||
seen_urls: set[str] = set()
|
||||
deduped: List[NormalizedItem] = []
|
||||
for item in items:
|
||||
url = item.get("url") or ""
|
||||
text = item.get("text") or ""
|
||||
if url and url in seen_urls:
|
||||
continue
|
||||
duplicate_found = False
|
||||
for existing in deduped:
|
||||
if not text or not existing.get("text"):
|
||||
continue
|
||||
if fuzz.token_set_ratio(text, existing["text"]) >= threshold:
|
||||
duplicate_found = True
|
||||
break
|
||||
if not duplicate_found:
|
||||
deduped.append(item)
|
||||
if url:
|
||||
seen_urls.add(url)
|
||||
return deduped
|
||||
|
||||
|
||||
def normalize_items(items: Sequence[NormalizedItem]) -> List[NormalizedItem]:
|
||||
"""Apply sanitization, deduplication, and drop noisy entries."""
|
||||
sanitized: List[NormalizedItem] = []
|
||||
for item in items:
|
||||
cleaned_text = sanitize_text(item.get("text", ""))
|
||||
if len(cleaned_text) < MIN_TEXT_LENGTH:
|
||||
continue
|
||||
sanitized.append(
|
||||
NormalizedItem(
|
||||
source=item["source"],
|
||||
id=item["id"],
|
||||
url=item.get("url", ""),
|
||||
author=item.get("author"),
|
||||
timestamp=item["timestamp"],
|
||||
text=cleaned_text,
|
||||
meta=item.get("meta", {}),
|
||||
)
|
||||
)
|
||||
return fuzzy_deduplicate(sanitized)
|
||||
|
||||
|
||||
def parse_date_range(option: str) -> datetime:
|
||||
"""Return a UTC timestamp threshold for the given range identifier."""
|
||||
now = datetime.now(timezone.utc)
|
||||
option = option.lower()
|
||||
delta = {
|
||||
"24h": timedelta(days=1),
|
||||
"7d": timedelta(days=7),
|
||||
"30d": timedelta(days=30),
|
||||
}.get(option, timedelta(days=7))
|
||||
return now - delta
|
||||
|
||||
|
||||
def random_user_agent() -> str:
|
||||
"""Return a random user agent string for polite scraping."""
|
||||
user_agents = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 13_3) AppleWebKit/605.1.15 "
|
||||
"(KHTML, like Gecko) Version/16.4 Safari/605.1.15",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0",
|
||||
]
|
||||
return random.choice(user_agents)
|
||||
|
||||
|
||||
def chunked(iterable: Sequence[str], size: int) -> Iterator[Sequence[str]]:
|
||||
"""Yield successive chunks from iterable."""
|
||||
for start in range(0, len(iterable), size):
|
||||
yield iterable[start : start + size]
|
||||
|
||||
|
||||
def validate_openai_key(api_key: Optional[str]) -> Tuple[Optional[str], List[str]]:
|
||||
"""Validate an OpenAI key following the guidance from day1 notebook."""
|
||||
warnings: List[str] = []
|
||||
if not api_key:
|
||||
warnings.append("No OpenAI API key detected. VADER fallback will be used.")
|
||||
return None, warnings
|
||||
if not api_key.startswith("sk-"):
|
||||
warnings.append(
|
||||
"Provided OpenAI API key does not start with the expected prefix (sk-)."
|
||||
)
|
||||
if api_key.strip() != api_key:
|
||||
warnings.append("OpenAI API key looks like it has leading or trailing whitespace.")
|
||||
api_key = api_key.strip()
|
||||
return api_key, warnings
|
||||
|
||||
|
||||
def ensure_timezone(ts: datetime) -> datetime:
|
||||
"""Guarantee timestamps are timezone-aware in UTC."""
|
||||
if ts.tzinfo is None:
|
||||
return ts.replace(tzinfo=timezone.utc)
|
||||
return ts.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def safe_int(value: Optional[object], default: int = 0) -> int:
|
||||
"""Convert a value to int with a fallback."""
|
||||
try:
|
||||
return int(value) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
@@ -0,0 +1,6 @@
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
@@ -0,0 +1,19 @@
|
||||
import pytest
|
||||
|
||||
from services import llm
|
||||
from services.utils import ServiceWarning
|
||||
|
||||
|
||||
def test_llm_fallback_uses_vader():
|
||||
service = llm.LLMService(api_key=None)
|
||||
results = service.classify_sentiment_batch(
|
||||
["I absolutely love this product!", "This is the worst experience ever."]
|
||||
)
|
||||
assert results[0].label == "positive"
|
||||
assert results[1].label == "negative"
|
||||
|
||||
|
||||
def test_summary_requires_openai_key():
|
||||
service = llm.LLMService(api_key=None)
|
||||
with pytest.raises(ServiceWarning):
|
||||
service.summarize_overall([{"label": "positive", "text": "Example"}])
|
||||
35
community-contributions/Reputation_Radar/tests/test_utils.py
Normal file
35
community-contributions/Reputation_Radar/tests/test_utils.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import datetime as dt
|
||||
|
||||
from services import utils
|
||||
|
||||
|
||||
def test_normalize_items_deduplicates():
|
||||
ts = dt.datetime(2025, 1, 1, tzinfo=dt.timezone.utc)
|
||||
items = [
|
||||
utils.NormalizedItem(
|
||||
source="reddit",
|
||||
id="1",
|
||||
url="https://example.com/a",
|
||||
author="alice",
|
||||
timestamp=ts,
|
||||
text="ReputationRadar is great!",
|
||||
meta={},
|
||||
),
|
||||
utils.NormalizedItem(
|
||||
source="reddit",
|
||||
id="2",
|
||||
url="https://example.com/a",
|
||||
author="bob",
|
||||
timestamp=ts,
|
||||
text="ReputationRadar is great!",
|
||||
meta={},
|
||||
),
|
||||
]
|
||||
cleaned = utils.normalize_items(items)
|
||||
assert len(cleaned) == 1
|
||||
|
||||
|
||||
def test_sanitize_text_removes_html():
|
||||
raw = "<p>Hello <strong>world</strong> <a href='https://example.com'>link</a></p>"
|
||||
cleaned = utils.sanitize_text(raw)
|
||||
assert cleaned == "Hello world link"
|
||||
221
community-contributions/wk1-day1-RBG-all-sites-jina.ipynb
Normal file
221
community-contributions/wk1-day1-RBG-all-sites-jina.ipynb
Normal file
@@ -0,0 +1,221 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# My First Lab = My 1st Frontier LLM Project\n",
|
||||
"## Summarize All Websites without Selenium\n",
|
||||
"This simple \"app\" uses Jina (https://jina.ai/reader) to turn all websites into markdown before summarizing by an LLM. As their website says: \"Convert a URL to LLM-friendly input, by simply adding r.jina.ai in front\". They have other tools that look useful too.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests # added for jina\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"# from scraper import fetch_website_contents # not needed for jina\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables from a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n",
|
||||
"\n",
|
||||
"# Setup access to the frontier model\n",
|
||||
"\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1-a: Define the user prompt\n",
|
||||
"\n",
|
||||
"user_prompt_prefix = \"\"\"\n",
|
||||
"Here are the contents of a website.\n",
|
||||
"Provide a short summary of this website.\n",
|
||||
"If it includes news or announcements, then summarize these too.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1-b: Define the system prompt\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You are a smart assistant that analyzes the contents of a website,\n",
|
||||
"and provides a short, clear, summary, ignoring text that might be navigation related.\n",
|
||||
"Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add the website content to the user prompt\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_prefix + website}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 5: Change the content utility to use jina\n",
|
||||
"\n",
|
||||
"def fetch_url_content(url):\n",
|
||||
" jina_reader_url = f\"https://r.jina.ai/{url}\"\n",
|
||||
" try:\n",
|
||||
" response = requests.get(jina_reader_url)\n",
|
||||
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
||||
" return response.text\n",
|
||||
" except requests.exceptions.RequestException as e:\n",
|
||||
" print(f\"Error fetching URL: {e}\")\n",
|
||||
" return None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 3: Call OpenAI & Step 4: print the result\n",
|
||||
"\n",
|
||||
"def summarize(url):\n",
|
||||
" website = fetch_url_content(url)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-5-nano\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" summary = response.choices[0].message.content\n",
|
||||
" return display(Markdown(summary))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://openai.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Content Summary vs Technical Summary\n",
|
||||
"\n",
|
||||
"In my work a technical summary of a website, or group of websites, would be useful too. For example, does it render on the server (HTML) or in the browser (JavaScript), what content management system (CMS) was used, how many pages, how many outbound links, how many inbound links, etc. Doing this exercise I realized LLMs can help with analyzing content, but I may need other tools to count pages, links, and other specifications.\n",
|
||||
"\n",
|
||||
"A \"Shout Out\" to whoever put \"Market_Research_Agent.ipynb\" in the Community-Contributions. It is a great example of using an LLM as a management consultant. I think Jina might help with this usecase by offering web search results through an API to feed to your LLM. Here is the system prompt from that notebook and I plan to use this format often.\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"You are to act like a Mckinsey Consultant specializing in market research. \n",
|
||||
"1) You are to follow legal guidelines and never give immoral advice. \n",
|
||||
"2) Your job is to maximise profits for your clients by analysing their companies initiatives and giving out recommendations for newer initiatives.\\n \n",
|
||||
"3) Follow industry frameworks for reponses always give simple answers and stick to the point.\n",
|
||||
"4) If possible try to see what competitors exist and what market gap can your clients company exploit.\n",
|
||||
"5) Further more, USe SWOT, Porters 5 forces to summarize your recommendations, Give confidence score with every recommendations\n",
|
||||
"6) Try to give unique solutions by seeing what the market gap is, if market gap is ambiguious skip this step\n",
|
||||
"7) add an estimate of what rate the revenue of the comapany will increase at provided they follow the guidelines, give conservating estimates keeping in account non ideal conditions.\n",
|
||||
"8) if the website isnt of a company or data isnt available, give out an error message along the lines of more data required for analysis\"\"\""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
225
community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb
Normal file
225
community-contributions/wk1-day2-RBG-all-sites-ollama.ipynb
Normal file
@@ -0,0 +1,225 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lab2: Local Open Source on My PC Project\n",
|
||||
"## Summarize All Websites without Selenium Using Open Source Models\n",
|
||||
"This builds on my app from yesterday using Jina (https://jina.ai/reader) to turn all websites into markdown before summarizing by an LLM. And it uses Ollama to store open source LLMs on my PC to run things locally (jina is not local, so to be totally local you might need to go back to Selenium to do JavaScript sites).\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup access to the Ollama models\n",
|
||||
"\n",
|
||||
"OLLAMA_BASE_URL = \"http://localhost:11434/v1\"\n",
|
||||
"\n",
|
||||
"ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1-a: Define the user prompt\n",
|
||||
"\n",
|
||||
"user_prompt_prefix = \"\"\"\n",
|
||||
"Here are the contents of a website.\n",
|
||||
"Provide a short summary of this website.\n",
|
||||
"If it includes news or announcements, then summarize these too.\n",
|
||||
"Make recommendations for improvement\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1-b: Define the system prompt\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"You are to act like a smart Mckinsey Consultant specializing in website analysis. \n",
|
||||
"1) You should provide a short, clear, summary, ignoring text that might be navigation related.\n",
|
||||
"2) Follow the summary by making recommendations for improving the website so it is better at serving its purpose.\n",
|
||||
"3) Follow industry frameworks for reponses always give simple answers and stick to the point.\n",
|
||||
"4) If possible try to group you recommendations, for example Grammar and Style, Clarity, Functional, etc.\n",
|
||||
"5) Give confidence scores with every recommendation.\n",
|
||||
"6) Always provide a summary of the website, explaining what it is.\n",
|
||||
"7) if you do not understand the website's purpose or have no improvement recommendations, give out an error message along the lines of more data required for analysis or ask a follow up question.\n",
|
||||
"8) Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add the website content to the user prompt\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_prefix + website}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 5: Change the content utility to use jina\n",
|
||||
"\n",
|
||||
"def fetch_url_content(url):\n",
|
||||
" jina_reader_url = f\"https://r.jina.ai/{url}\"\n",
|
||||
" try:\n",
|
||||
" response = requests.get(jina_reader_url)\n",
|
||||
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
||||
" return response.text\n",
|
||||
" except requests.exceptions.RequestException as e:\n",
|
||||
" print(f\"Error fetching URL: {e}\")\n",
|
||||
" return None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 3: Call Ollama model & Step 4: print the result\n",
|
||||
"\n",
|
||||
"def summarize(url):\n",
|
||||
" website = fetch_url_content(url)\n",
|
||||
" response = ollama.chat.completions.create(\n",
|
||||
" model = omodel,\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" summary = response.choices[0].message.content\n",
|
||||
" return display(Markdown(summary))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"omodel = \"llama3.2\"\n",
|
||||
"summarize(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75df7e70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"omodel = \"deepseek-r1:1.5b\"\n",
|
||||
"summarize(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"omodel = \"llama3.2\"\n",
|
||||
"summarize(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "be133029",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"omodel = \"deepseek-r1:1.5b\"\n",
|
||||
"summarize(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"omodel = \"llama3.2\"\n",
|
||||
"summarize(\"https://openai.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a8d1a0ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"omodel = \"deepseek-r1:1.5b\"\n",
|
||||
"summarize(\"https://openai.com\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -54,7 +54,9 @@ ___
|
||||
|
||||
3. **Do a git clone:**
|
||||
|
||||
Enter this in the command prompt in the Projects folder:
|
||||
Enter the clone command below in the command prompt in the `projects` folder. If this gives you an error about long filenames, please do #3 in the "gotchas" section at the top, and then restart your computer, and you might also need to run this: `git config --system core.longpaths true`
|
||||
|
||||
Here's the clone command:
|
||||
|
||||
`git clone https://github.com/ed-donner/llm_engineering.git`
|
||||
|
||||
|
||||
571
week1/community-contributions/day1_email_secretary.ipynb
Normal file
571
week1/community-contributions/day1_email_secretary.ipynb
Normal file
@@ -0,0 +1,571 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YOUR FIRST LAB\n",
|
||||
"### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
|
||||
"\n",
|
||||
"### Also, be sure to read [README.md](../README.md)! More info about the updated videos in the README and [top of the course resources in purple](https://edwarddonner.com/2024/11/13/llm-engineering-resources/)\n",
|
||||
"\n",
|
||||
"## Your first Frontier LLM Project\n",
|
||||
"\n",
|
||||
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
|
||||
"\n",
|
||||
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
|
||||
"\n",
|
||||
"Before starting, you should have completed the setup linked in the README.\n",
|
||||
"\n",
|
||||
"### If you're new to working in \"Notebooks\" (also known as Labs or Jupyter Lab)\n",
|
||||
"\n",
|
||||
"Welcome to the wonderful world of Data Science experimentation! Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. Be sure to run every cell, starting at the top, in order.\n",
|
||||
"\n",
|
||||
"Please look in the [Guides folder](../guides/01_intro.ipynb) for all the guides.\n",
|
||||
"\n",
|
||||
"## I am here to help\n",
|
||||
"\n",
|
||||
"If you have any problems at all, please do reach out. \n",
|
||||
"I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
|
||||
"And this is new to me, but I'm also trying out X at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n",
|
||||
"\n",
|
||||
"## More troubleshooting\n",
|
||||
"\n",
|
||||
"Please see the [troubleshooting](../setup/troubleshooting.ipynb) notebook in the setup folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
|
||||
"\n",
|
||||
"## If this is old hat!\n",
|
||||
"\n",
|
||||
"If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
|
||||
"\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#900;\">Please read - important note</h2>\n",
|
||||
" <span style=\"color:#900;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#f71;\">This code is a live resource - keep an eye out for my emails</h2>\n",
|
||||
" <span style=\"color:#f71;\">I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.<br/><br/>\n",
|
||||
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
||||
" </span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#181;\">Business value of these exercises</h2>\n",
|
||||
" <span style=\"color:#181;\">A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83f28feb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### If necessary, install Cursor Extensions\n",
|
||||
"\n",
|
||||
"1. From the View menu, select Extensions\n",
|
||||
"2. Search for Python\n",
|
||||
"3. Click on \"Python\" made by \"ms-python\" and select Install if not already installed\n",
|
||||
"4. Search for Jupyter\n",
|
||||
"5. Click on \"Jupyter\" made by \"ms-toolsai\" and select Install of not already installed\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Next Select the Kernel\n",
|
||||
"\n",
|
||||
"Click on \"Select Kernel\" on the Top Right\n",
|
||||
"\n",
|
||||
"Choose \"Python Environments...\"\n",
|
||||
"\n",
|
||||
"Then choose the one that looks like `.venv (Python 3.12.x) .venv/bin/python` - it should be marked as \"Recommended\" and have a big star next to it.\n",
|
||||
"\n",
|
||||
"Any problems with this? Head over to the troubleshooting.\n",
|
||||
"\n",
|
||||
"### Note: you'll need to set the Kernel with every notebook.."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from scraper import fetch_website_contents\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Connecting to OpenAI (or Ollama)\n",
|
||||
"\n",
|
||||
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
|
||||
"\n",
|
||||
"If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
|
||||
"\n",
|
||||
"## Troubleshooting if you have problems:\n",
|
||||
"\n",
|
||||
"If you get a \"Name Error\" - have you run all cells from the top down? Head over to the Python Foundations guide for a bulletproof way to find and fix all Name Errors.\n",
|
||||
"\n",
|
||||
"If that doesn't fix it, head over to the [troubleshooting](../setup/troubleshooting.ipynb) notebook for step by step code to identify the root cause and fix it!\n",
|
||||
"\n",
|
||||
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
|
||||
"\n",
|
||||
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Let's make a quick call to a Frontier model to get started, as a preview!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
|
||||
"\n",
|
||||
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": message}]\n",
|
||||
"\n",
|
||||
"messages\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08330159",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(model=\"gpt-5-nano\", messages=messages)\n",
|
||||
"response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## OK onwards with our first project"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's try out this utility\n",
|
||||
"\n",
|
||||
"ed = fetch_website_contents(\"https://edwarddonner.com\")\n",
|
||||
"print(ed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Types of prompts\n",
|
||||
"\n",
|
||||
"You may know this already - but if not, you will get very familiar with it!\n",
|
||||
"\n",
|
||||
"Models like GPT have been trained to receive instructions in a particular way.\n",
|
||||
"\n",
|
||||
"They expect to receive:\n",
|
||||
"\n",
|
||||
"**A system prompt** that tells them what task they are performing and what tone they should use\n",
|
||||
"\n",
|
||||
"**A user prompt** -- the conversation starter that they should reply to"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You are a snarky assistant that analyzes the contents of a website,\n",
|
||||
"and provides a short, snarky, humorous summary, ignoring text that might be navigation related.\n",
|
||||
"Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our user prompt\n",
|
||||
"\n",
|
||||
"user_prompt_prefix = \"\"\"\n",
|
||||
"Here are the contents of a website.\n",
|
||||
"Provide a short summary of this website.\n",
|
||||
"If it includes news or announcements, then summarize these too.\n",
|
||||
"\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Messages\n",
|
||||
"\n",
|
||||
"The API from OpenAI expects to receive messages in a particular structure.\n",
|
||||
"Many of the other APIs share this structure:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"[\n",
|
||||
" {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
|
||||
"]\n",
|
||||
"```\n",
|
||||
"To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(model=\"gpt-4.1-nano\", messages=messages)\n",
|
||||
"response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## And now let's build useful messages for GPT-4.1-mini, using a function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# See how this function creates exactly the format above\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_prefix + website}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Try this out, and then try for a few more websites\n",
|
||||
"\n",
|
||||
"messages_for(ed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Time to bring it together - the API for OpenAI is very simple!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the OpenAI API. You will get very familiar with this!\n",
|
||||
"\n",
|
||||
"def summarize(url):\n",
|
||||
" website = fetch_website_contents(url)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4.1-mini\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the output, using markdown\n",
|
||||
"\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Let's try more websites\n",
|
||||
"\n",
|
||||
"Note that this will only work on websites that can be scraped using this simplistic approach.\n",
|
||||
"\n",
|
||||
"Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
|
||||
"\n",
|
||||
"Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
|
||||
"\n",
|
||||
"But many websites will work just fine!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://anthropic.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#181;\">Business applications</h2>\n",
|
||||
" <span style=\"color:#181;\">In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
|
||||
"\n",
|
||||
"More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#900;\">Before you continue - now try yourself</h2>\n",
|
||||
" <span style=\"color:#900;\">Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Create your prompts\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"You are my personal secretary. You will review an email and summarize the content. Write a summary and add a response to the sender.\n",
|
||||
"\"\"\"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
" Here are the contents of an email:\n",
|
||||
" ***Insert Email Here***\n",
|
||||
"\n",
|
||||
" .\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" Write a summary and with bullet points of the key topics of the email.\n",
|
||||
" Structure the summary with Date, Time and name of Sender on the Top right hand corner.\n",
|
||||
" After the summary, add triple spaces and write a response to the sender indicating receipt of email and suggest some valid responses.\n",
|
||||
" Highlight the response with all caps.\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Step 2: Make the messages list\n",
|
||||
"\n",
|
||||
"messages = [{\"role\":\"system\" , \"content\": system_prompt},\n",
|
||||
"{\"role\":\"user\", \"content\":user_prompt}] # fill this in\n",
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"response =openai.chat.completions.create(\n",
|
||||
" model=\"gpt-4.1-mini\",\n",
|
||||
" messages=messages)\n",
|
||||
"\n",
|
||||
"# Step 4: print the result\n",
|
||||
"print(response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## An extra exercise for those who enjoy web scraping\n",
|
||||
"\n",
|
||||
"You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Sharing your code\n",
|
||||
"\n",
|
||||
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
|
||||
"\n",
|
||||
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
|
||||
"\n",
|
||||
"Here are good instructions courtesy of an AI friend: \n",
|
||||
"https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
226
week1/community-contributions/emmy/emmy_week1_EXERCISE.ipynb
Normal file
226
week1/community-contributions/emmy/emmy_week1_EXERCISE.ipynb
Normal file
@@ -0,0 +1,226 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fe12c203-e6a6-452c-a655-afb8a03a4ff5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# End of week 1 exercise\n",
|
||||
"\n",
|
||||
"To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question, \n",
|
||||
"and responds with an explanation. This is a tool that you will be able to use yourself during the course!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c1070317-3ed9-4659-abe3-828943230e03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import ollama\n",
|
||||
"import ipywidgets as widgets\n",
|
||||
"from IPython.display import display, Markdown"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a456906-915a-4bfd-bb9d-57e505c5093f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# constants\n",
|
||||
"\n",
|
||||
"MODEL_GEMINI = \"gemini-2.5-flash\"\n",
|
||||
"MODEL_LLAMA = \"llama3.1:8b\"\n",
|
||||
"\n",
|
||||
"CHOICE_GEMINI = \"gemini\"\n",
|
||||
"CHOICE_OLLAMA = \"ollama\"\n",
|
||||
"\n",
|
||||
"SYSTEM_PROMPT = (\n",
|
||||
" \"You are a technical adviser. The student is learning LLM engineering \"\n",
|
||||
" \"and you will be asked to explain lines of code with an example, \"\n",
|
||||
" \"mostly in Python.\"\n",
|
||||
" \"You can answer other questions as well.\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"GEMINI_BASE_URL = \"https://generativelanguage.googleapis.com/v1beta/openai/\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a8d7923c-5f28-4c30-8556-342d7c8497c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set up environment\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
|
||||
"\n",
|
||||
"if not google_api_key:\n",
|
||||
" print(\"Warning: GOOGLE_API_KEY not found. Gemini calls will fail.\")\n",
|
||||
" print(\"Please create a .env file with GOOGLE_API_KEY=your_key\")\n",
|
||||
"\n",
|
||||
"gemini_client = OpenAI(\n",
|
||||
" base_url=GEMINI_BASE_URL,\n",
|
||||
" api_key=google_api_key,\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3f0d0137-52b0-47a8-81a8-11a90a010798",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# here is the question; type over this to ask something new\n",
|
||||
"\n",
|
||||
"question = \"\"\"\n",
|
||||
"Please explain what this code does and why:\n",
|
||||
"yield from {book.get(\"author\") for book in books if book.get(\"author\")}\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "60ce7000-a4a5-4cce-a261-e75ef45063b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_messages(user_question: str):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
|
||||
" {\"role\": \"user\", \"content\": user_question},\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def stream_gemini(messages):\n",
|
||||
" \"\"\"Stream response chunks from Gemini.\"\"\"\n",
|
||||
" stream = gemini_client.chat.completions.create(\n",
|
||||
" model=MODEL_GEMINI,\n",
|
||||
" messages=messages,\n",
|
||||
" stream=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" full = []\n",
|
||||
" for chunk in stream:\n",
|
||||
" piece = chunk.choices[0].delta.content or \"\"\n",
|
||||
" full.append(piece)\n",
|
||||
" return \"\".join(full)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def stream_ollama(messages):\n",
|
||||
" \"\"\"Stream response chunks from local Ollama.\"\"\"\n",
|
||||
" stream = ollama.chat(\n",
|
||||
" model=MODEL_LLAMA,\n",
|
||||
" messages=messages,\n",
|
||||
" stream=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" full = []\n",
|
||||
" for chunk in stream:\n",
|
||||
" piece = chunk[\"message\"][\"content\"]\n",
|
||||
" full.append(piece)\n",
|
||||
" return \"\".join(full)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_explanation(question: str, model_choice: str):\n",
|
||||
" \"\"\"Gets a technical explanation from the chosen model and streams the response.\"\"\"\n",
|
||||
" messages = make_messages(question)\n",
|
||||
" try:\n",
|
||||
" if model_choice == CHOICE_GEMINI:\n",
|
||||
" return stream_gemini(messages)\n",
|
||||
" elif model_choice == CHOICE_OLLAMA:\n",
|
||||
" return stream_ollama(messages)\n",
|
||||
" else:\n",
|
||||
" print(\"Unknown model choice.\")\n",
|
||||
" return \"\"\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"\\nAn error occurred: {e}\")\n",
|
||||
" return \"\"\n",
|
||||
"\n",
|
||||
"print(\"💡 Your personal technical tutor is ready.\\n\")\n",
|
||||
"\n",
|
||||
"# Dropdown for model selection\n",
|
||||
"model_dropdown = widgets.Dropdown(\n",
|
||||
" options=[\n",
|
||||
" (\"Gemini (gemini-2.5-flash)\", CHOICE_GEMINI),\n",
|
||||
" (\"Ollama (llama3.1:8b)\", CHOICE_OLLAMA),\n",
|
||||
" ],\n",
|
||||
" value=CHOICE_GEMINI,\n",
|
||||
" description=\"Model:\",\n",
|
||||
" style={\"description_width\": \"initial\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Text input for question\n",
|
||||
"question_box = widgets.Textarea(\n",
|
||||
" placeholder=\"Type your technical question here...\",\n",
|
||||
" description=\"Question:\",\n",
|
||||
" layout=widgets.Layout(width=\"100%\", height=\"100px\"),\n",
|
||||
" style={\"description_width\": \"initial\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"submit_button = widgets.Button(description=\"Ask\", button_style=\"success\", icon=\"paper-plane\")\n",
|
||||
"\n",
|
||||
"output_area = widgets.Output()\n",
|
||||
"loader_label = widgets.Label(value=\"\")\n",
|
||||
"\n",
|
||||
"def on_submit(_):\n",
|
||||
" output_area.clear_output()\n",
|
||||
" question = question_box.value.strip()\n",
|
||||
" if not question:\n",
|
||||
" with output_area:\n",
|
||||
" print(\"Please enter a question.\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" loader_label.value = \"⏳ Thinking...\"\n",
|
||||
" submit_button.disabled = True\n",
|
||||
"\n",
|
||||
" answer = get_explanation(question, model_dropdown.value)\n",
|
||||
"\n",
|
||||
" loader_label.value = \"\"\n",
|
||||
" submit_button.disabled = False\n",
|
||||
"\n",
|
||||
" with output_area:\n",
|
||||
" print(f\"🤖 Model: {model_dropdown.label}\")\n",
|
||||
" print(f\"📜 Question: {question}\\n\")\n",
|
||||
" display(Markdown(answer))\n",
|
||||
" print(\"\\n--- End of response ---\")\n",
|
||||
"\n",
|
||||
"submit_button.on_click(on_submit)\n",
|
||||
"\n",
|
||||
"# Display everything\n",
|
||||
"display(model_dropdown, question_box, submit_button, loader_label, output_area)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering (3.12.10)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
563
week1/community-contributions/slmslm333221/day1.ipynb
Normal file
563
week1/community-contributions/slmslm333221/day1.ipynb
Normal file
@@ -0,0 +1,563 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YOUR FIRST LAB\n",
|
||||
"### Please read this section. This is valuable to get you prepared, even if it's a long read -- it's important stuff.\n",
|
||||
"\n",
|
||||
"### Also, be sure to read [README.md](../README.md)! More info about the updated videos in the README and [top of the course resources in purple](https://edwarddonner.com/2024/11/13/llm-engineering-resources/)\n",
|
||||
"\n",
|
||||
"## Your first Frontier LLM Project\n",
|
||||
"\n",
|
||||
"By the end of this course, you will have built an autonomous Agentic AI solution with 7 agents that collaborate to solve a business problem. All in good time! We will start with something smaller...\n",
|
||||
"\n",
|
||||
"Our goal is to code a new kind of Web Browser. Give it a URL, and it will respond with a summary. The Reader's Digest of the internet!!\n",
|
||||
"\n",
|
||||
"Before starting, you should have completed the setup linked in the README.\n",
|
||||
"\n",
|
||||
"### If you're new to working in \"Notebooks\" (also known as Labs or Jupyter Lab)\n",
|
||||
"\n",
|
||||
"Welcome to the wonderful world of Data Science experimentation! Simply click in each \"cell\" with code in it, such as the cell immediately below this text, and hit Shift+Return to execute that cell. Be sure to run every cell, starting at the top, in order.\n",
|
||||
"\n",
|
||||
"Please look in the [Guides folder](../guides/01_intro.ipynb) for all the guides.\n",
|
||||
"\n",
|
||||
"## I am here to help\n",
|
||||
"\n",
|
||||
"If you have any problems at all, please do reach out. \n",
|
||||
"I'm available through the platform, or at ed@edwarddonner.com, or at https://www.linkedin.com/in/eddonner/ if you'd like to connect (and I love connecting!) \n",
|
||||
"And this is new to me, but I'm also trying out X at [@edwarddonner](https://x.com/edwarddonner) - if you're on X, please show me how it's done 😂 \n",
|
||||
"\n",
|
||||
"## More troubleshooting\n",
|
||||
"\n",
|
||||
"Please see the [troubleshooting](../setup/troubleshooting.ipynb) notebook in the setup folder to diagnose and fix common problems. At the very end of it is a diagnostics script with some useful debug info.\n",
|
||||
"\n",
|
||||
"## If this is old hat!\n",
|
||||
"\n",
|
||||
"If you're already comfortable with today's material, please hang in there; you can move swiftly through the first few labs - we will get much more in depth as the weeks progress. Ultimately we will fine-tune our own LLM to compete with OpenAI!\n",
|
||||
"\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#900;\">Please read - important note</h2>\n",
|
||||
" <span style=\"color:#900;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations. If you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/resources.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#f71;\">This code is a live resource - keep an eye out for my emails</h2>\n",
|
||||
" <span style=\"color:#f71;\">I push updates to the code regularly. As people ask questions, I add more examples or improved commentary. As a result, you'll notice that the code below isn't identical to the videos. Everything from the videos is here; but I've also added better explanations and new models like DeepSeek. Consider this like an interactive book.<br/><br/>\n",
|
||||
" I try to send emails regularly with important updates related to the course. You can find this in the 'Announcements' section of Udemy in the left sidebar. You can also choose to receive my emails via your Notification Settings in Udemy. I'm respectful of your inbox and always try to add value with my emails!\n",
|
||||
" </span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#181;\">Business value of these exercises</h2>\n",
|
||||
" <span style=\"color:#181;\">A final thought. While I've designed these notebooks to be educational, I've also tried to make them enjoyable. We'll do fun things like have LLMs tell jokes and argue with each other. But fundamentally, my goal is to teach skills you can apply in business. I'll explain business implications as we go, and it's worth keeping this in mind: as you build experience with models and techniques, think of ways you could put this into action at work today. Please do contact me if you'd like to discuss more or if you have ideas to bounce off me.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83f28feb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### If necessary, install Cursor Extensions\n",
|
||||
"\n",
|
||||
"1. From the View menu, select Extensions\n",
|
||||
"2. Search for Python\n",
|
||||
"3. Click on \"Python\" made by \"ms-python\" and select Install if not already installed\n",
|
||||
"4. Search for Jupyter\n",
|
||||
"5. Click on \"Jupyter\" made by \"ms-toolsai\" and select Install of not already installed\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Next Select the Kernel\n",
|
||||
"\n",
|
||||
"Click on \"Select Kernel\" on the Top Right\n",
|
||||
"\n",
|
||||
"Choose \"Python Environments...\"\n",
|
||||
"\n",
|
||||
"Then choose the one that looks like `.venv (Python 3.12.x) .venv/bin/python` - it should be marked as \"Recommended\" and have a big star next to it.\n",
|
||||
"\n",
|
||||
"Any problems with this? Head over to the troubleshooting.\n",
|
||||
"\n",
|
||||
"### Note: you'll need to set the Kernel with every notebook.."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"from pathlib import Path\n",
|
||||
"sys.path.append(str(Path(r\"..\\..\").resolve()))\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from scraper import fetch_website_contents\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# If you get an error running this cell, then please head over to the troubleshooting notebook!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6900b2a8-6384-4316-8aaa-5e519fca4254",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Connecting to OpenAI (or Ollama)\n",
|
||||
"\n",
|
||||
"The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI. \n",
|
||||
"\n",
|
||||
"If you'd like to use free Ollama instead, please see the README section \"Free Alternative to Paid APIs\", and if you're not sure how to do this, there's a full solution in the solutions folder (day1_with_ollama.ipynb).\n",
|
||||
"\n",
|
||||
"## Troubleshooting if you have problems:\n",
|
||||
"\n",
|
||||
"If you get a \"Name Error\" - have you run all cells from the top down? Head over to the Python Foundations guide for a bulletproof way to find and fix all Name Errors.\n",
|
||||
"\n",
|
||||
"If that doesn't fix it, head over to the [troubleshooting](../setup/troubleshooting.ipynb) notebook for step by step code to identify the root cause and fix it!\n",
|
||||
"\n",
|
||||
"Or, contact me! Message me or email ed@edwarddonner.com and we will get this to work.\n",
|
||||
"\n",
|
||||
"Any concerns about API costs? See my notes in the README - costs should be minimal, and you can control it at every point. You can also use Ollama as a free alternative, which we discuss during Day 2."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "442fc84b-0815-4f40-99ab-d9a5da6bda91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Let's make a quick call to a Frontier model to get started, as a preview!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a58394bf-1e45-46af-9bfd-01e24da6f49a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.\n",
|
||||
"\n",
|
||||
"message = \"Hello, GPT! This is my first ever message to you! Hi!\"\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": message}]\n",
|
||||
"\n",
|
||||
"messages\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08330159",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(model=\"gpt-5-nano\", messages=messages)\n",
|
||||
"response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2aa190e5-cb31-456a-96cc-db109919cd78",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## OK onwards with our first project"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's try out this utility\n",
|
||||
"\n",
|
||||
"ed = fetch_website_contents(\"https://edwarddonner.com\")\n",
|
||||
"print(ed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a478a0c-2c53-48ff-869c-4d08199931e1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Types of prompts\n",
|
||||
"\n",
|
||||
"You may know this already - but if not, you will get very familiar with it!\n",
|
||||
"\n",
|
||||
"Models like GPT have been trained to receive instructions in a particular way.\n",
|
||||
"\n",
|
||||
"They expect to receive:\n",
|
||||
"\n",
|
||||
"**A system prompt** that tells them what task they are performing and what tone they should use\n",
|
||||
"\n",
|
||||
"**A user prompt** -- the conversation starter that they should reply to"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You are a snarkyassistant that analyzes the contents of a website,\n",
|
||||
"and provides a short, snarky, humorous summary, ignoring text that might be navigation related.\n",
|
||||
"Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define our user prompt\n",
|
||||
"\n",
|
||||
"user_prompt_prefix = \"\"\"\n",
|
||||
"Here are the contents of a website.\n",
|
||||
"Provide a short summary of this website.\n",
|
||||
"If it includes news or announcements, then summarize these too.\n",
|
||||
"\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea211b5f-28e1-4a86-8e52-c0b7677cadcc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Messages\n",
|
||||
"\n",
|
||||
"The API from OpenAI expects to receive messages in a particular structure.\n",
|
||||
"Many of the other APIs share this structure:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"[\n",
|
||||
" {\"role\": \"system\", \"content\": \"system message goes here\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"user message goes here\"}\n",
|
||||
"]\n",
|
||||
"```\n",
|
||||
"To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f25dcd35-0cd0-4235-9f64-ac37ed9eaaa5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful, by far too polite assistant trying to sell more services with every contact\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is 2 + 2?\"}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response = openai.chat.completions.create(model=\"gpt-4.1-nano\", messages=messages)\n",
|
||||
"response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d06e8d78-ce4c-4b05-aa8e-17050c82bb47",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## And now let's build useful messages for GPT-4.1-mini, using a function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# See how this function creates exactly the format above\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_prefix + website}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "36478464-39ee-485c-9f3f-6a4e458dbc9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Try this out, and then try for a few more websites\n",
|
||||
"\n",
|
||||
"messages_for(ed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "16f49d46-bf55-4c3e-928f-68fc0bf715b0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Time to bring it together - the API for OpenAI is very simple!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# And now: call the OpenAI API. You will get very familiar with this!\n",
|
||||
"\n",
|
||||
"def summarize(url):\n",
|
||||
" website = fetch_website_contents(url)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4.1-mini\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d926d59-450e-4609-92ba-2d6f244f1342",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# A function to display this nicely in the output, using markdown\n",
|
||||
"\n",
|
||||
"def display_summary(url):\n",
|
||||
" summary = summarize(url)\n",
|
||||
" display(Markdown(summary))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3018853a-445f-41ff-9560-d925d1774b2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3bcf6f4-adce-45e9-97ad-d9a5d7a3a624",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Let's try more websites\n",
|
||||
"\n",
|
||||
"Note that this will only work on websites that can be scraped using this simplistic approach.\n",
|
||||
"\n",
|
||||
"Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)\n",
|
||||
"\n",
|
||||
"Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.\n",
|
||||
"\n",
|
||||
"But many websites will work just fine!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display_summary(\"https://anthropic.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c951be1a-7f1b-448f-af1f-845978e47e2c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/business.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#181;\">Business applications</h2>\n",
|
||||
" <span style=\"color:#181;\">In this exercise, you experienced calling the Cloud API of a Frontier Model (a leading model at the frontier of AI) for the first time. We will be using APIs like OpenAI at many stages in the course, in addition to building our own LLMs.\n",
|
||||
"\n",
|
||||
"More specifically, we've applied this to Summarization - a classic Gen AI use case to make a summary. This can be applied to any business vertical - summarizing the news, summarizing financial performance, summarizing a resume in a cover letter - the applications are limitless. Consider how you could apply Summarization in your business, and try prototyping a solution.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"<table style=\"margin: 0; text-align: left;\">\n",
|
||||
" <tr>\n",
|
||||
" <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
|
||||
" <img src=\"../assets/important.jpg\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <h2 style=\"color:#900;\">Before you continue - now try yourself</h2>\n",
|
||||
" <span style=\"color:#900;\">Use the cell below to make your own simple commercial example. Stick with the summarization use case for now. Here's an idea: write something that will take the contents of an email, and will suggest an appropriate short subject line for the email. That's the kind of feature that might be built into a commercial email tool.</span>\n",
|
||||
" </td>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00743dac-0e70-45b7-879a-d7293a6f68a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Create your prompts\n",
|
||||
"\n",
|
||||
"system_prompt = \"something here\"\n",
|
||||
"user_prompt = \"\"\"\n",
|
||||
" Lots of text\n",
|
||||
" Can be pasted here\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Step 2: Make the messages list\n",
|
||||
"\n",
|
||||
"messages = [] # fill this in\n",
|
||||
"\n",
|
||||
"# Step 3: Call OpenAI\n",
|
||||
"# response =\n",
|
||||
"\n",
|
||||
"# Step 4: print the result\n",
|
||||
"# print("
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## An extra exercise for those who enjoy web scraping\n",
|
||||
"\n",
|
||||
"You may notice that if you try `display_summary(\"https://openai.com\")` - it doesn't work! That's because OpenAI has a fancy website that uses Javascript. There are many ways around this that some of you might be familiar with. For example, Selenium is a hugely popular framework that runs a browser behind the scenes, renders the page, and allows you to query it. If you have experience with Selenium, Playwright or similar, then feel free to improve the Website class to use them. In the community-contributions folder, you'll find an example Selenium solution from a student (thank you!)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eeab24dc-5f90-4570-b542-b0585aca3eb6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Sharing your code\n",
|
||||
"\n",
|
||||
"I'd love it if you share your code afterwards so I can share it with others! You'll notice that some students have already made changes (including a Selenium implementation) which you will find in the community-contributions folder. If you'd like add your changes to that folder, submit a Pull Request with your new versions in that folder and I'll merge your changes.\n",
|
||||
"\n",
|
||||
"If you're not an expert with git (and I am not!) then GPT has given some nice instructions on how to submit a Pull Request. It's a bit of an involved process, but once you've done it once it's pretty clear. As a pro-tip: it's best if you clear the outputs of your Jupyter notebooks (Edit >> Clean outputs of all cells, and then Save) for clean notebooks.\n",
|
||||
"\n",
|
||||
"Here are good instructions courtesy of an AI friend: \n",
|
||||
"https://chatgpt.com/share/677a9cb5-c64c-8012-99e0-e06e88afd293"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f4484fcf-8b39-4c3f-9674-37970ed71988",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,235 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d12b9c22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Song Lyrics → One-Sentence Summary\n",
|
||||
"Get the lyrics of a song and summarize its main idea in about one sentence.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"Import required libraries: environment vars, display helper, OpenAI client, BeautifulSoup, and requests."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d94bbd61",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"import requests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92dc1bde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Function: Get Lyrics from Genius\n",
|
||||
"Fetch and extract the lyrics from a Genius.com song page using BeautifulSoup."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2b43fa98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_lyrics_from_genius(url: str) -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" Extracts song lyrics from a Genius.com song URL using BeautifulSoup.\n",
|
||||
" Example URL: https://genius.com/Ed-sheeran-shape-of-you-lyrics\n",
|
||||
" \"\"\"\n",
|
||||
" # Standard headers to fetch a website\n",
|
||||
" headers = {\n",
|
||||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" response = requests.get(url, headers=headers)\n",
|
||||
" response.raise_for_status() # raises error if page not found\n",
|
||||
"\n",
|
||||
" soup = BeautifulSoup(response.text, \"html.parser\")\n",
|
||||
"\n",
|
||||
" # Genius stores lyrics inside <div data-lyrics-container=\"true\">\n",
|
||||
" lyrics_blocks = soup.find_all(\"div\", {\"data-lyrics-container\": \"true\"})\n",
|
||||
"\n",
|
||||
" if not lyrics_blocks:\n",
|
||||
" return \"Lyrics not found.\"\n",
|
||||
"\n",
|
||||
" # Join all text blocks and clean up spacing\n",
|
||||
" lyrics = \"\\n\".join(block.get_text(separator=\"\\n\") for block in lyrics_blocks)\n",
|
||||
" return lyrics.strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fc4f0590",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Function: Create Genius URL\n",
|
||||
"Build a Genius.com lyrics URL automatically from the given artist and song name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e018c623",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def create_genius_url(artist: str, song: str) -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" Creates a Genius.com lyrics URL from artist and song name.\n",
|
||||
" Example:\n",
|
||||
" create_genius_url(\"Ed sheeran\", \"shape of you\")\n",
|
||||
" → https://genius.com/Ed-sheeran-shape-of-you-lyrics\n",
|
||||
" \"\"\"\n",
|
||||
" artist = artist.strip().replace(\" \", \"-\")\n",
|
||||
" song = song.strip().replace(\" \", \"-\")\n",
|
||||
" return f\"https://genius.com/{artist}-{song}-lyrics\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "62f50f02",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Generate URL and Fetch Lyrics\n",
|
||||
"Create the Genius URL from the artist and song name, then fetch and display the lyrics."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ed51d48d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"artist = \"Ed sheeran\"\n",
|
||||
"song = \"shape of you\"\n",
|
||||
"\n",
|
||||
"url = create_genius_url(artist, song)\n",
|
||||
"print(url)\n",
|
||||
"# Output: https://genius.com/Ed-sheeran-shape-of-you-lyrics\n",
|
||||
"\n",
|
||||
"user_prompt = get_lyrics_from_genius(url)\n",
|
||||
"print(user_prompt[:5000]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fca4203a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You are a **helpful assistant** that specializes in analyzing **song lyrics**.\n",
|
||||
"\n",
|
||||
"## Task\n",
|
||||
"Your goal is to **summarize the main idea or theme of a song** in **about one sentence**.\n",
|
||||
"\n",
|
||||
"## Instructions\n",
|
||||
"1. Read the given song lyrics carefully.\n",
|
||||
"2. Identify the **core message**, **emotion**, or **story** of the song.\n",
|
||||
"3. Respond with **one concise sentence** only.\n",
|
||||
"4. The tone of your summary should reflect the song’s mood (e.g., joyful, melancholic, romantic, rebellious).\n",
|
||||
"\n",
|
||||
"## Edge Cases\n",
|
||||
"- **Very short lyrics:** Summarize the implied meaning.\n",
|
||||
"- **Repetitive lyrics:** Focus on the message or emotion being emphasized.\n",
|
||||
"- **Abstract or nonsensical lyrics:** Describe the overall feeling or imagery they create.\n",
|
||||
"- **No lyrics or only a title provided:** Reply with \n",
|
||||
" `No lyrics provided — unable to summarize meaningfully.`\n",
|
||||
"- **Non-English lyrics:** Summarize in English unless otherwise instructed.\n",
|
||||
"\n",
|
||||
"## Output Format\n",
|
||||
"Plain text — a single, coherent sentence summarizing the main idea of the song.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "11784d62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Chat Messages\n",
|
||||
"Prepare the system and user messages, then send them to the OpenAI model for summarization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f1205658",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5c8d61aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai = OpenAI()\n",
|
||||
"response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-4.1-mini\",\n",
|
||||
" messages = messages\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4ad95820",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Display Summary\n",
|
||||
"Show the model’s one-sentence summary of the song lyrics in a formatted Markdown output."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4f09a642",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"display(Markdown(response.choices[0].message.content))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
221
week1/community-contributions/wk1-day1-RBG-all-sites-jina.ipynb
Normal file
221
week1/community-contributions/wk1-day1-RBG-all-sites-jina.ipynb
Normal file
@@ -0,0 +1,221 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d15d8294-3328-4e07-ad16-8a03e9bbfdb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# My First Lab = My 1st Frontier LLM Project\n",
|
||||
"## Summarize All Websites without Selenium\n",
|
||||
"This simple \"app\" uses Jina (https://jina.ai/reader) to turn all websites into markdown before summarizing by an LLM. As their website says: \"Convert a URL to LLM-friendly input, by simply adding r.jina.ai in front\". They have other tools that look useful too.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import requests # added for jina\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"# from scraper import fetch_website_contents # not needed for jina\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b87cadb-d513-4303-baee-a37b6f938e4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables from a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")\n",
|
||||
"\n",
|
||||
"# Setup access to the frontier model\n",
|
||||
"\n",
|
||||
"openai = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0275b1b-7cfe-4f9d-abfa-7650d378da0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1-a: Define the user prompt\n",
|
||||
"\n",
|
||||
"user_prompt_prefix = \"\"\"\n",
|
||||
"Here are the contents of a website.\n",
|
||||
"Provide a short summary of this website.\n",
|
||||
"If it includes news or announcements, then summarize these too.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "abdb8417-c5dc-44bc-9bee-2e059d162699",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1-b: Define the system prompt\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You are a smart assistant that analyzes the contents of a website,\n",
|
||||
"and provides a short, clear, summary, ignoring text that might be navigation related.\n",
|
||||
"Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0134dfa4-8299-48b5-b444-f2a8c3403c88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add the website content to the user prompt\n",
|
||||
"\n",
|
||||
"def messages_for(website):\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_prefix + website}\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2ef960cf-6dc2-4cda-afb3-b38be12f4c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 5: Change the content utility to use jina\n",
|
||||
"\n",
|
||||
"def fetch_url_content(url):\n",
|
||||
" jina_reader_url = f\"https://r.jina.ai/{url}\"\n",
|
||||
" try:\n",
|
||||
" response = requests.get(jina_reader_url)\n",
|
||||
" response.raise_for_status() # Raise an exception for HTTP errors\n",
|
||||
" return response.text\n",
|
||||
" except requests.exceptions.RequestException as e:\n",
|
||||
" print(f\"Error fetching URL: {e}\")\n",
|
||||
" return None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "905b9919-aba7-45b5-ae65-81b3d1d78e34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 3: Call OpenAI & Step 4: print the result\n",
|
||||
"\n",
|
||||
"def summarize(url):\n",
|
||||
" website = fetch_url_content(url)\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model = \"gpt-5-nano\",\n",
|
||||
" messages = messages_for(website)\n",
|
||||
" )\n",
|
||||
" summary = response.choices[0].message.content\n",
|
||||
" return display(Markdown(summary))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05e38d41-dfa4-4b20-9c96-c46ea75d9fb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://edwarddonner.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "45d83403-a24c-44b5-84ac-961449b4008f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://cnn.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75e9fd40-b354-4341-991e-863ef2e59db7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summarize(\"https://openai.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ed9f14-b349-40e9-a42c-b367e77f8bda",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Content Summary vs Technical Summary\n",
|
||||
"\n",
|
||||
"In my work a technical summary of a website, or group of websites, would be useful too. For example, does it render on the server (HTML) or in the browser (JavaScript), what content management system (CMS) was used, how many pages, how many outbound links, how many inbound links, etc. Doing this exercise I realized LLMs can help with analyzing content, but I may need other tools to count pages, links, and other specifications.\n",
|
||||
"\n",
|
||||
"A \"Shout Out\" to whoever put \"Market_Research_Agent.ipynb\" in the Community-Contributions. It is a great example of using an LLM as a management consultant. I think Jina might help with this usecase by offering web search results through an API to feed to your LLM. Here is the system prompt from that notebook and I plan to use this format often.\n",
|
||||
"\n",
|
||||
"system_prompt = \"\"\"You are to act like a Mckinsey Consultant specializing in market research. \n",
|
||||
"1) You are to follow legal guidelines and never give immoral advice. \n",
|
||||
"2) Your job is to maximise profits for your clients by analysing their companies initiatives and giving out recommendations for newer initiatives.\\n \n",
|
||||
"3) Follow industry frameworks for reponses always give simple answers and stick to the point.\n",
|
||||
"4) If possible try to see what competitors exist and what market gap can your clients company exploit.\n",
|
||||
"5) Further more, USe SWOT, Porters 5 forces to summarize your recommendations, Give confidence score with every recommendations\n",
|
||||
"6) Try to give unique solutions by seeing what the market gap is, if market gap is ambiguious skip this step\n",
|
||||
"7) add an estimate of what rate the revenue of the comapany will increase at provided they follow the guidelines, give conservating estimates keeping in account non ideal conditions.\n",
|
||||
"8) if the website isnt of a company or data isnt available, give out an error message along the lines of more data required for analysis\"\"\""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 408 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 437 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 483 KiB |
@@ -0,0 +1,551 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Additional End of week Exercise - week 2\n",
|
||||
"\n",
|
||||
"Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n",
|
||||
"\n",
|
||||
"This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
|
||||
"\n",
|
||||
"If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n",
|
||||
"\n",
|
||||
"I will publish a full solution here soon - unless someone beats me to it...\n",
|
||||
"\n",
|
||||
"There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f69a564870ec63b0",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T16:15:26.039019Z",
|
||||
"start_time": "2025-10-24T16:15:25.888596Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Imports\n",
|
||||
"from IPython.display import Markdown, display\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"import requests\n",
|
||||
"import gradio as gr\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from typing import List\n",
|
||||
"import time\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"from datetime import datetime\n",
|
||||
"import json\n",
|
||||
"import re\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fa60913187dbe71d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T16:14:27.703743Z",
|
||||
"start_time": "2025-10-24T16:14:27.677172Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"OLLAMA_BASE_URL=\"http://localhost:11434/v1/completions\"\n",
|
||||
"LOCAL_MODEL_NAME=\"llama3.2\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Load environment variables in a file called .env\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"OPENAI_API_KEY=api_key\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"coin_key = os.getenv('COINMARKETCAP_API_KEY')\n",
|
||||
"COINMARKETCAP_API_KEY = coin_key\n",
|
||||
"\n",
|
||||
"# Check the key\n",
|
||||
"\n",
|
||||
"if not api_key:\n",
|
||||
" print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
|
||||
"elif not api_key.startswith(\"sk-proj-\"):\n",
|
||||
" print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
|
||||
"elif api_key.strip() != api_key:\n",
|
||||
" print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
|
||||
"else:\n",
|
||||
" print(\"API key found and looks good so far!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1bf8ccf240e982da",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T16:14:35.695654Z",
|
||||
"start_time": "2025-10-24T16:14:35.681319Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Ollama configuration\n",
|
||||
"OLLAMA_URL = os.getenv(\"OLLAMA_BASE_URL\", \"http://localhost:11434/v1/completions\")\n",
|
||||
"OLLAMA_MODEL = os.getenv(\"LOCAL_MODEL_NAME\", \"llama3.2\")\n",
|
||||
"\n",
|
||||
"# OpenAI configuration\n",
|
||||
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
|
||||
"OPENAI_MODEL = \"gpt-4\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "98d8f6481681ed57",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T16:14:49.865353Z",
|
||||
"start_time": "2025-10-24T16:14:49.848662Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Crypto Analysis Prompt\n",
|
||||
"CRYPTO_SYSTEM_PROMPT = \"\"\"You are a specialized AI assistant with expertise in cryptocurrency markets and data analysis.\n",
|
||||
"Your role is to help users identify and understand cryptocurrencies with the strongest growth patterns over recent weeks.\n",
|
||||
"Provide clear, data-driven insights about market trends and performance metrics.\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7729697aa8937c3",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T16:15:37.367235Z",
|
||||
"start_time": "2025-10-24T16:15:35.409542Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"def scrape_coingecko(limit=10, debug=False):\n",
|
||||
" try:\n",
|
||||
" headers = {\n",
|
||||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',\n",
|
||||
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',\n",
|
||||
" 'Accept-Language': 'en-US,en;q=0.5',\n",
|
||||
" 'Referer': 'https://www.coingecko.com/'\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" url = \"https://www.coingecko.com/en/coins/trending\"\n",
|
||||
" response = requests.get(url, headers=headers, timeout=30)\n",
|
||||
" response.raise_for_status()\n",
|
||||
"\n",
|
||||
" if debug:\n",
|
||||
" print(f\"Status: {response.status_code}\")\n",
|
||||
" with open(\"debug_coingecko.html\", \"w\", encoding=\"utf-8\") as f:\n",
|
||||
" f.write(response.text)\n",
|
||||
" print(\"HTML saved to debug_coingecko.html\")\n",
|
||||
"\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" top_performers = []\n",
|
||||
"\n",
|
||||
" # Try multiple selectors\n",
|
||||
" rows = (soup.find_all('tr', {'data-sort-by': True}) or\n",
|
||||
" soup.find_all('tr', class_=re.compile('hover')) or\n",
|
||||
" soup.select('table tbody tr'))[:limit]\n",
|
||||
"\n",
|
||||
" if debug:\n",
|
||||
" print(f\"Found {len(rows)} rows\")\n",
|
||||
"\n",
|
||||
" for row in rows:\n",
|
||||
" try:\n",
|
||||
" # Find all text in row\n",
|
||||
" texts = [t.strip() for t in row.stripped_strings]\n",
|
||||
" if debug:\n",
|
||||
" print(f\"Row texts: {texts[:5]}\")\n",
|
||||
"\n",
|
||||
" # Extract data from text list\n",
|
||||
" name = texts[1] if len(texts) > 1 else \"Unknown\"\n",
|
||||
" symbol = texts[2] if len(texts) > 2 else \"N/A\"\n",
|
||||
"\n",
|
||||
" # Find price\n",
|
||||
" price = 0\n",
|
||||
" for text in texts:\n",
|
||||
" if '$' in text:\n",
|
||||
" price_str = text.replace('$', '').replace(',', '')\n",
|
||||
" try:\n",
|
||||
" price = float(price_str)\n",
|
||||
" break\n",
|
||||
" except:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" # Find percentage change\n",
|
||||
" change_30d = 0\n",
|
||||
" for text in texts:\n",
|
||||
" if '%' in text:\n",
|
||||
" change_str = text.replace('%', '').replace('+', '')\n",
|
||||
" try:\n",
|
||||
" change_30d = float(change_str)\n",
|
||||
" except:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if name != \"Unknown\":\n",
|
||||
" top_performers.append({\n",
|
||||
" \"name\": name,\n",
|
||||
" \"symbol\": symbol,\n",
|
||||
" \"current_price\": price,\n",
|
||||
" \"price_change_percentage_30d\": change_30d,\n",
|
||||
" \"source\": \"coingecko\"\n",
|
||||
" })\n",
|
||||
" except Exception as e:\n",
|
||||
" if debug:\n",
|
||||
" print(f\"Row error: {e}\")\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" return {\"timeframe\": \"30d\", \"timestamp\": datetime.now().isoformat(), \"count\": len(top_performers), \"top_performers\": top_performers}\n",
|
||||
" except Exception as e:\n",
|
||||
" return {\"error\": str(e)}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_top_performers(source=\"coingecko\", limit=10, save=False, debug=False):\n",
|
||||
" sources = {\"coingecko\": scrape_coingecko, \"coinmarketcap\": scrape_coinmarketcap}\n",
|
||||
" result = sources[source](limit, debug)\n",
|
||||
"\n",
|
||||
" if save and \"error\" not in result:\n",
|
||||
" filename = f\"crypto_{source}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json\"\n",
|
||||
" with open(filename, 'w') as f:\n",
|
||||
" json.dump(result, f, indent=2)\n",
|
||||
" print(f\"Saved to {filename}\")\n",
|
||||
"\n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" print(\"Testing CoinGecko with debug...\")\n",
|
||||
" result = get_top_performers(\"coingecko\", 10, True, debug=True)\n",
|
||||
" print(json.dumps(result, indent=2))\n",
|
||||
"\n",
|
||||
" print(\"\\n\" + \"=\"*60 + \"\\n\")\n",
|
||||
"\n",
|
||||
" print(\"Testing CoinMarketCap with debug...\")\n",
|
||||
" result = get_top_performers(\"coinmarketcap\", 10, True, debug=True)\n",
|
||||
" print(json.dumps(result, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2e3de36fa13f2dec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def scrape_coinmarketcap(limit=10, debug=False):\n",
|
||||
" try:\n",
|
||||
" headers = {\n",
|
||||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',\n",
|
||||
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',\n",
|
||||
" 'Accept-Language': 'en-US,en;q=0.5',\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" url = \"https://coinmarketcap.com/gainers-losers/\"\n",
|
||||
" response = requests.get(url, headers=headers, timeout=30)\n",
|
||||
" response.raise_for_status()\n",
|
||||
"\n",
|
||||
" if debug:\n",
|
||||
" print(f\"Status: {response.status_code}\")\n",
|
||||
" with open(\"debug_coinmarketcap.html\", \"w\", encoding=\"utf-8\") as f:\n",
|
||||
" f.write(response.text)\n",
|
||||
" print(\"HTML saved to debug_coinmarketcap.html\")\n",
|
||||
"\n",
|
||||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||||
" top_performers = []\n",
|
||||
"\n",
|
||||
" # Find all table rows\n",
|
||||
" rows = soup.find_all('tr')\n",
|
||||
" if debug:\n",
|
||||
" print(f\"Total rows found: {len(rows)}\")\n",
|
||||
"\n",
|
||||
" for row in rows[1:limit+1]:\n",
|
||||
" try:\n",
|
||||
" texts = [t.strip() for t in row.stripped_strings]\n",
|
||||
" if debug and len(texts) > 0:\n",
|
||||
" print(f\"Row texts: {texts[:5]}\")\n",
|
||||
"\n",
|
||||
" if len(texts) < 3:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" # Usually: rank, name, symbol, price, change...\n",
|
||||
" name = texts[1] if len(texts) > 1 else \"Unknown\"\n",
|
||||
" symbol = texts[2] if len(texts) > 2 else \"N/A\"\n",
|
||||
"\n",
|
||||
" price = 0\n",
|
||||
" change_30d = 0\n",
|
||||
"\n",
|
||||
" for text in texts:\n",
|
||||
" if '$' in text and price == 0:\n",
|
||||
" try:\n",
|
||||
" price = float(text.replace('$', '').replace(',', ''))\n",
|
||||
" except:\n",
|
||||
" continue\n",
|
||||
" if '%' in text:\n",
|
||||
" try:\n",
|
||||
" change_30d = float(text.replace('%', '').replace('+', ''))\n",
|
||||
" except:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" if name != \"Unknown\":\n",
|
||||
" top_performers.append({\n",
|
||||
" \"name\": name,\n",
|
||||
" \"symbol\": symbol,\n",
|
||||
" \"current_price\": price,\n",
|
||||
" \"price_change_percentage_30d\": change_30d,\n",
|
||||
" \"source\": \"coinmarketcap\"\n",
|
||||
" })\n",
|
||||
" except Exception as e:\n",
|
||||
" if debug:\n",
|
||||
" print(f\"Row error: {e}\")\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" return {\"timeframe\": \"30d\", \"timestamp\": datetime.now().isoformat(), \"count\": len(top_performers), \"top_performers\": top_performers}\n",
|
||||
" except Exception as e:\n",
|
||||
" return {\"error\": str(e)}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a63cbcc7ae04c7e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T15:23:22.157803Z",
|
||||
"start_time": "2025-10-24T15:23:22.147500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"# Tool detection and execution\n",
|
||||
"def detect_and_run_tool(user_message: str):\n",
|
||||
" user_message_lower = user_message.lower().strip()\n",
|
||||
"\n",
|
||||
" # Detect crypto growth queries\n",
|
||||
" crypto_keywords = [\"crypto growth\", \"top gainers\", \"best performing\", \"crypto performance\", \"trending coins\"]\n",
|
||||
"\n",
|
||||
" if any(keyword in user_message_lower for keyword in crypto_keywords):\n",
|
||||
" return True, get_top_performers(\"coingecko\", 10, True, debug=True)\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "626a022b562bf73d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e5c6db45fb4d53d9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T15:23:25.205927Z",
|
||||
"start_time": "2025-10-24T15:23:25.199801Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ask_ollama(prompt: str) -> str:\n",
|
||||
" try:\n",
|
||||
" payload = {\"model\": OLLAMA_MODEL, \"prompt\": prompt, \"stream\": False}\n",
|
||||
" r = requests.post(OLLAMA_URL, json=payload, timeout=120)\n",
|
||||
" r.raise_for_status()\n",
|
||||
" data = r.json()\n",
|
||||
" return data.get(\"choices\", [{}])[0].get(\"text\", \"\").strip()\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"[Ollama error: {e}]\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2f81a00e9584d184",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c2686a6503cf62a4",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T15:23:29.556036Z",
|
||||
"start_time": "2025-10-24T15:23:29.552763Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ask_openai(prompt: str) -> str:\n",
|
||||
" try:\n",
|
||||
" from openai import OpenAI\n",
|
||||
" client = OpenAI(api_key=OPENAI_API_KEY)\n",
|
||||
"\n",
|
||||
" response = client.chat.completions.create(\n",
|
||||
" model=OPENAI_MODEL,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": CRYPTO_SYSTEM_PROMPT},\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ],\n",
|
||||
" max_tokens=512,\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"[OpenAI error: {e}]\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2313e5940e9fa3da",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-24T15:27:33.546418Z",
|
||||
"start_time": "2025-10-24T15:27:18.318834Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def chat_fn(user_message: str, history: List[List[str]], model_choice: str):\n",
|
||||
" tool_used, tool_output = detect_and_run_tool(user_message)\n",
|
||||
"\n",
|
||||
" if tool_used:\n",
|
||||
" if \"error\" in tool_output:\n",
|
||||
" reply = f\"Data fetch error: {tool_output['error']}\"\n",
|
||||
" else:\n",
|
||||
" # Format the crypto data for AI analysis\n",
|
||||
" crypto_data_str = json.dumps(tool_output, indent=2)\n",
|
||||
"\n",
|
||||
" # Create analysis prompt\n",
|
||||
" analysis_prompt = f\"\"\"\n",
|
||||
" Analyze this cryptocurrency growth data and provide insights:\n",
|
||||
"\n",
|
||||
" {crypto_data_str}\n",
|
||||
"\n",
|
||||
" Please identify:\n",
|
||||
" 1. The strongest performers and their growth patterns\n",
|
||||
" 2. Any notable trends across different timeframes\n",
|
||||
" 3. Risk considerations or notable observations\n",
|
||||
" 4. Simple, actionable insights for the user\n",
|
||||
"\n",
|
||||
" Keep the analysis clear and data-driven.\n",
|
||||
" User's original question: {user_message}\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" # Get AI analysis\n",
|
||||
" if model_choice == \"openai\":\n",
|
||||
" analysis = ask_openai(analysis_prompt)\n",
|
||||
" else:\n",
|
||||
" ollama_prompt = f\"{CRYPTO_SYSTEM_PROMPT}\\n\\nUser: {analysis_prompt}\\nAssistant:\"\n",
|
||||
" analysis = ask_ollama(ollama_prompt)\n",
|
||||
"\n",
|
||||
" reply = f\"📊 **Crypto Growth Analysis**\\n\\n{analysis}\\n\\n*Raw data for reference:*\\n```json\\n{crypto_data_str}\\n```\"\n",
|
||||
"\n",
|
||||
" else:\n",
|
||||
" # Regular conversation\n",
|
||||
" if model_choice == \"openai\":\n",
|
||||
" reply = ask_openai(user_message)\n",
|
||||
" else:\n",
|
||||
" prompt = f\"{CRYPTO_SYSTEM_PROMPT}\\n\\nUser: {user_message}\\nAssistant:\"\n",
|
||||
" reply = ask_ollama(prompt)\n",
|
||||
"\n",
|
||||
" history.append([user_message, reply])\n",
|
||||
" return history\n",
|
||||
"\n",
|
||||
"# Enhanced Gradio UI with crypto focus\n",
|
||||
"def main():\n",
|
||||
" with gr.Blocks(title=\"Crypto Growth Analyst Chatbot\") as demo:\n",
|
||||
" gr.Markdown(\"\"\"\n",
|
||||
" # Samuel Week 2 Task: Crypto Growth Analyst Chatbot\n",
|
||||
" **Analyze cryptocurrency performance with dual AI models** (Ollama & OpenAI)\n",
|
||||
"\n",
|
||||
" *Try questions like:*\n",
|
||||
" - \"Show me cryptocurrencies with strongest growth\"\n",
|
||||
" - \"What are the top performing coins this month?\"\n",
|
||||
" - \"Analyze crypto market trends\"\n",
|
||||
" \"\"\")\n",
|
||||
"\n",
|
||||
" # Message input\n",
|
||||
" msg = gr.Textbox(\n",
|
||||
" placeholder=\"Ask about crypto growth trends or type /ticket <city>\",\n",
|
||||
" label=\"Your message\",\n",
|
||||
" lines=2,\n",
|
||||
" autofocus=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Model selection\n",
|
||||
" with gr.Row():\n",
|
||||
" model_choice = gr.Radio(\n",
|
||||
" [\"ollama\", \"openai\"],\n",
|
||||
" value=\"ollama\",\n",
|
||||
" label=\"AI Model\"\n",
|
||||
" )\n",
|
||||
" send = gr.Button(\"Analyze Crypto Data\", variant=\"primary\")\n",
|
||||
"\n",
|
||||
" # Chatbot area\n",
|
||||
" chatbot = gr.Chatbot(label=\"Crypto Analysis Conversation\", height=500, type=\"messages\")\n",
|
||||
"\n",
|
||||
" # Wrapper function\n",
|
||||
" def wrapped_chat_fn(user_message, history, model_choice):\n",
|
||||
" updated_history = chat_fn(user_message, history, model_choice)\n",
|
||||
" return updated_history, gr.update(value=\"\")\n",
|
||||
"\n",
|
||||
" # Event handlers\n",
|
||||
" send.click(wrapped_chat_fn, inputs=[msg, chatbot, model_choice], outputs=[chatbot, msg])\n",
|
||||
" msg.submit(wrapped_chat_fn, inputs=[msg, chatbot, model_choice], outputs=[chatbot, msg])\n",
|
||||
"\n",
|
||||
" demo.launch(server_name=\"0.0.0.0\", share=False)\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" main()\n",
|
||||
"\n",
|
||||
" "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,283 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88f67391",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### N Way Conversation - Coffee Talk \n",
|
||||
"\n",
|
||||
"This example simulates an N-way conversation between the characters of the Saturday Night Live skit Coffee Talk.\n",
|
||||
"\n",
|
||||
"The character information is retrieved from a model and each character is handled by its own model selected at random from a list of available models. Only the number of characters, number of rounds, and available models are configured.\n",
|
||||
"\n",
|
||||
"The example can use OpenRouter, OpenAI, or Ollama, in that order. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a1eeb029",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup ...\n",
|
||||
"\n",
|
||||
"# The number of characters (models) conversing\n",
|
||||
"NBR_CHARACTERS=4\n",
|
||||
"\n",
|
||||
"# The number of rounds of conversation\n",
|
||||
"NBR_ROUNDS=4\n",
|
||||
"\n",
|
||||
"# Available OpenRouter models. The base model is used to select characters and the topic. Other models are used for the conversation\n",
|
||||
"OPENROUTER_MODELS=\"openai/gpt-4.1-mini, anthropic/claude-3.5-haiku, google/gemini-2.5-flash\"\n",
|
||||
"OPENROUTER_BASE=\"openai/gpt-5\"\n",
|
||||
"\n",
|
||||
"# Available OpenAI models\n",
|
||||
"OPENAI_MODELS=\"gpt-4.1, gpt-4.1-mini, gpt-5-nano\"\n",
|
||||
"OPENAI_BASE=\"gpt-5\"\n",
|
||||
"\n",
|
||||
"# Available Ollama models. Note that these must be pre-fetched or errors will occur (and won't be handled)\n",
|
||||
"OLLAMA_MODELS=\"gpt-oss, gemma3, llama3.2\"\n",
|
||||
"OLLAMA_BASE=\"gpt-oss\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "68022fbc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from IPython.display import Markdown, display, update_display\n",
|
||||
"from openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "73460c5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup the LLM client and models. OpenRouter has priority if available, then OpenAI, then Ollama.\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"openai_api_key = os.getenv('OPENAI_API_KEY')\n",
|
||||
"openrouter_api_key = os.getenv('OPENROUTER_API_KEY')\n",
|
||||
"\n",
|
||||
"if openrouter_api_key:\n",
|
||||
" print(f\"OpenRouter API Key exists and begins {openrouter_api_key[:3]}, using OpenRouter.\")\n",
|
||||
" available_models=OPENROUTER_MODELS\n",
|
||||
" base_model=OPENROUTER_BASE\n",
|
||||
" client = OpenAI(base_url=\"https://openrouter.ai/api/v1\", api_key=openrouter_api_key)\n",
|
||||
"elif openai_api_key:\n",
|
||||
" print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}, using OpenAI.\")\n",
|
||||
" available_models=OPENAI_MODELS\n",
|
||||
" base_model=OPENAI_BASE\n",
|
||||
" client = OpenAI()\n",
|
||||
"else:\n",
|
||||
" print(\"OpenAI API Key not set, using Ollama.\")\n",
|
||||
" available_models=OLLAMA_MODELS\n",
|
||||
" base_model=OLLAMA_BASE\n",
|
||||
" client = OpenAI(api_key=\"ollama\", base_url=\"http://localhost:11434/v1\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b1a7004d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the characters from the base model\n",
|
||||
"system_prompt = \"\"\"\n",
|
||||
"You will be asked to return information about characters in the SNL skit Coffee Talk\n",
|
||||
"You should return the information as a JSON response with the following format:\n",
|
||||
"{\n",
|
||||
" { \"name\" : \"Linda\", \"persona\", \"....\", \"model\" : \"model-name\" },\n",
|
||||
" { \"name\" : \"Paul\", \"persona\", \"....\", \"model\" : \"model-name\" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"user_prompt = f\"\"\"\n",
|
||||
"Create a list of the many characters from the SNL skit Coffee Talk, and return {NBR_CHARACTERS} total characters.\n",
|
||||
"Always return Linda Richmond as the first character.\n",
|
||||
"Return one caller.\n",
|
||||
"Select the remaining characters at random from the list of all characters. \n",
|
||||
"For the model value, return a random model name from this list: {available_models}.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=base_model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" response_format={\"type\": \"json_object\"}\n",
|
||||
" )\n",
|
||||
"result = response.choices[0].message.content\n",
|
||||
"characters = json.loads(result)\n",
|
||||
"\n",
|
||||
"print(json.dumps(characters, indent=2))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21a73805",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Generate system prompts for each character, which includes their name, persona, the other guests, and how they should respond.\n",
|
||||
"\n",
|
||||
"guests = \"The guests on todays show are \"\n",
|
||||
"guest_names = [character['name'] for character in characters[\"characters\"]]\n",
|
||||
"guests += \", \".join(guest_names)\n",
|
||||
"\n",
|
||||
"prompt = \"\"\n",
|
||||
"for character in characters[\"characters\"]:\n",
|
||||
" prompt = f\"You are {character['name']} a character on the SNL skit Coffee Talk.\"\n",
|
||||
" prompt += f\" Your personality is : {character['persona']} \"\n",
|
||||
" prompt += \" \" + guests + \".\"\n",
|
||||
" prompt += \" Keep responses brief and in character.\"\n",
|
||||
" prompt += \" In the conversation history, each response is prefixed with the character's name to identify the respondent.\"\n",
|
||||
" prompt += \" Your response should not include your character name as a prefix.\"\n",
|
||||
"\n",
|
||||
" character[\"system_prompt\"] = prompt\n",
|
||||
"\n",
|
||||
"print(json.dumps(characters, indent=2))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "656131a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the topic\n",
|
||||
"user_prompt=\"\"\"\n",
|
||||
"In the SNL skit Coffee Talk, the host Linda Richmond proposes topics in the form \"X Y is neither X, nor Y - discuss\".\n",
|
||||
"Create a list of the many topics proposed on the show, and select one at random and return it.\n",
|
||||
"Return only the selected topic without any formatting.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=base_model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
"topic = response.choices[0].message.content\n",
|
||||
"\n",
|
||||
"print(topic)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e137753",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_character_response(character,history):\n",
|
||||
" user_prompt = f\"\"\"\n",
|
||||
" The conversation so far is as follows:\n",
|
||||
" {history}\n",
|
||||
" What is your response? \n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" response = client.chat.completions.create(\n",
|
||||
" model=character[\"model\"],\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": character[\"system_prompt\"]},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "23fb446f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Start the show!\n",
|
||||
"\n",
|
||||
"history = \"\"\n",
|
||||
"history += \"Welcome to Coffee Talk, I am your host Linda Richmond. Today's guests are:\\n\"\n",
|
||||
"\n",
|
||||
"for character in characters[\"characters\"][1:]:\n",
|
||||
" history += f\" - {character['name']}\\n\"\n",
|
||||
"\n",
|
||||
"history += f\"\\nI'll give you a topic: {topic}\\n\"\n",
|
||||
"\n",
|
||||
"display(Markdown(\"---\"))\n",
|
||||
"display(Markdown(history))\n",
|
||||
"display(Markdown(\"---\"))\n",
|
||||
"\n",
|
||||
"# Other guests respond (first round)\n",
|
||||
"for character in characters[\"characters\"][1:]:\n",
|
||||
" response = get_character_response(character,history)\n",
|
||||
" display(Markdown(f\"**{character['name']}({character['model']}):** {response}\")) \n",
|
||||
" history += f\"\\n{character['name']}: {response}\"\n",
|
||||
"\n",
|
||||
"# Continue conversation for remaining rounds (all characters including Linda)\n",
|
||||
"for round in range(1, NBR_ROUNDS):\n",
|
||||
" for character in characters[\"characters\"]:\n",
|
||||
" response = get_character_response(character,history)\n",
|
||||
" display(Markdown(f\"**{character['name']}({character['model']}):** {response}\")) \n",
|
||||
" history += f\"\\n{character['name']}: {response}\"\n",
|
||||
"\n",
|
||||
"# Wrap it up\n",
|
||||
"user_prompt=f\"\"\"\n",
|
||||
"It's time to wrap up the show. Here's the whole conversation:\\n\n",
|
||||
"{history}\n",
|
||||
"Wrap up the show, as only you can.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"linda = characters[\"characters\"][0]\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=linda[\"model\"],\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": linda[\"system_prompt\"]},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"display(Markdown(\"---\"))\n",
|
||||
"display(Markdown(response.choices[0].message.content)) \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
240
week2/community-contributions/emmy/emmy_week2_EXERCISE.ipynb
Normal file
240
week2/community-contributions/emmy/emmy_week2_EXERCISE.ipynb
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Additional End of week Exercise - week 2\n",
|
||||
"\n",
|
||||
"Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.\n",
|
||||
"\n",
|
||||
"This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
|
||||
"\n",
|
||||
"If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.\n",
|
||||
"\n",
|
||||
"I will publish a full solution here soon - unless someone beats me to it...\n",
|
||||
"\n",
|
||||
"There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c427d7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#imports\n",
|
||||
"import os\n",
|
||||
"import time\n",
|
||||
"import gradio as gr\n",
|
||||
"import openai\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"OPENAI_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
|
||||
"GOOGLE_KEY = os.getenv(\"GOOGLE_API_KEY\")\n",
|
||||
"GEMINI_BASE_URL = os.getenv(\"GEMINI_BASE_URL\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21e78ed3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# OpenAI / Gemini Client\n",
|
||||
"def get_client(model_choice):\n",
|
||||
" \"\"\"\n",
|
||||
" Return an OpenAI client configured for GPT or Gemini.\n",
|
||||
" \"\"\"\n",
|
||||
" if model_choice == \"OpenAI GPT-4\":\n",
|
||||
" return openai.OpenAI(api_key=OPENAI_KEY)\n",
|
||||
" else:\n",
|
||||
" return openai.OpenAI(\n",
|
||||
" api_key=GOOGLE_KEY,\n",
|
||||
" base_url=GEMINI_BASE_URL,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8fb92ea9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Fake Weather Tool\n",
|
||||
"def get_weather(location):\n",
|
||||
" data = {\n",
|
||||
" \"new york\": {\"temp\": 72, \"condition\": \"Partly Cloudy\"},\n",
|
||||
" \"london\": {\"temp\": 59, \"condition\": \"Rainy\"},\n",
|
||||
" \"tokyo\": {\"temp\": 68, \"condition\": \"Clear\"},\n",
|
||||
" }\n",
|
||||
" info = data.get(location.lower(), {\"temp\": 75, \"condition\": \"Sunny\"})\n",
|
||||
" return f\"Weather in {location}: {info['temp']}°F, {info['condition']}\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def maybe_use_tool(message):\n",
|
||||
" \"\"\"\n",
|
||||
" Detect patterns like 'weather in <location>' (case-insensitive)\n",
|
||||
" and inject tool result.\n",
|
||||
" Supports multi-word locations, e.g. \"New York\" or \"tokyo\".\n",
|
||||
" \"\"\"\n",
|
||||
" pattern = re.compile(r\"weather\\s+in\\s+([A-Za-z\\s]+)\", re.IGNORECASE)\n",
|
||||
" match = pattern.search(message)\n",
|
||||
"\n",
|
||||
" if match:\n",
|
||||
" location = match.group(1).strip(\" ?.,!\").title()\n",
|
||||
" tool_result = get_weather(location)\n",
|
||||
" return f\"{message}\\n\\n[Tool used: {tool_result}]\"\n",
|
||||
"\n",
|
||||
" return message"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "672621a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# prompt\n",
|
||||
"SYSTEM_PROMPTS = {\n",
|
||||
" \"General Assistant\": \"You are a helpful and polite AI assistant.\",\n",
|
||||
" \"Technical Expert\": \"You are an expert software engineer who writes clear, correct code.\",\n",
|
||||
" \"Creative Writer\": \"You are a creative storyteller who writes imaginative and emotional prose.\",\n",
|
||||
" \"Science Tutor\": \"You are a science teacher who explains ideas simply and clearly.\",\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "21525edd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# ---------------------------------------------\n",
|
||||
"# Build chat messages\n",
|
||||
"# ---------------------------------------------\n",
|
||||
"def build_messages(history, user_msg, persona):\n",
|
||||
" messages = [{\"role\": \"system\", \"content\": SYSTEM_PROMPTS[persona]}]\n",
|
||||
" for u, a in history:\n",
|
||||
" messages.append({\"role\": \"user\", \"content\": u})\n",
|
||||
" messages.append({\"role\": \"assistant\", \"content\": a})\n",
|
||||
" messages.append({\"role\": \"user\", \"content\": user_msg})\n",
|
||||
" return messages\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# ---------------------------------------------\n",
|
||||
"# Stream model output\n",
|
||||
"# ---------------------------------------------\n",
|
||||
"def stream_response(model_choice, messages):\n",
|
||||
" \"\"\"\n",
|
||||
" Uses the same openai library to stream from GPT or Gemini.\n",
|
||||
" \"\"\"\n",
|
||||
" client = get_client(model_choice)\n",
|
||||
" model = \"gpt-4o-mini\" if model_choice == \"OpenAI GPT-4\" else \"gemini-2.5-flash\"\n",
|
||||
"\n",
|
||||
" stream = client.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages,\n",
|
||||
" stream=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" reply = \"\"\n",
|
||||
" for chunk in stream:\n",
|
||||
" if chunk.choices[0].delta and chunk.choices[0].delta.content:\n",
|
||||
" reply += chunk.choices[0].delta.content\n",
|
||||
" yield reply\n",
|
||||
" time.sleep(0.01)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c88976b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Gradio UI\n",
|
||||
"with gr.Blocks(theme=gr.themes.Soft()) as demo:\n",
|
||||
" gr.Markdown(\n",
|
||||
" \"\"\"\n",
|
||||
" # 🤖 Unified GPT + Gemini Chat\n",
|
||||
"\n",
|
||||
" - 🔀 Choose model: **OpenAI GPT-4** or **Gemini 2.5 Flash**\n",
|
||||
" - 🧠 Pick the assistant persona (system prompt injection)\n",
|
||||
" - 🛠 Tool support: ask about weather\n",
|
||||
"\n",
|
||||
" **Weather tool tips:**\n",
|
||||
" - Ask: \"What's the weather in London?\"\n",
|
||||
" - Also works for: New York, Tokyo\n",
|
||||
" - If a city isn't known, it returns a default sunny forecast\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" model_choice = gr.Dropdown(\n",
|
||||
" [\"OpenAI GPT-4\", \"Gemini 2.5 Flash\"],\n",
|
||||
" value=\"OpenAI GPT-4\",\n",
|
||||
" label=\"Model\",\n",
|
||||
" )\n",
|
||||
" persona = gr.Dropdown(\n",
|
||||
" list(SYSTEM_PROMPTS.keys()),\n",
|
||||
" value=\"General Assistant\",\n",
|
||||
" label=\"Persona\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" chatbot = gr.Chatbot(height=400)\n",
|
||||
" msg = gr.Textbox(placeholder=\"Ask about weather or coding...\", label=\"Your message\")\n",
|
||||
" gr.Markdown(\n",
|
||||
" \"💡 Tip: You can ask about the weather in **London**, **New York**, or **Tokyo**. \"\n",
|
||||
" \"I'll call a local tool and include that info in my answer.\"\n",
|
||||
" )\n",
|
||||
" send = gr.Button(\"Send\", variant=\"primary\")\n",
|
||||
" clear = gr.Button(\"Clear\")\n",
|
||||
"\n",
|
||||
" state = gr.State([])\n",
|
||||
"\n",
|
||||
" msg.submit(chat_fn, [msg, state, model_choice, persona], chatbot).then(\n",
|
||||
" lambda chat: chat, chatbot, state\n",
|
||||
" ).then(lambda: \"\", None, msg)\n",
|
||||
"\n",
|
||||
" send.click(chat_fn, [msg, state, model_choice, persona], chatbot).then(\n",
|
||||
" lambda chat: chat, chatbot, state\n",
|
||||
" ).then(lambda: \"\", None, msg)\n",
|
||||
"\n",
|
||||
" clear.click(lambda: ([], []), None, [chatbot, state], queue=False)\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" demo.launch()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering (3.12.10)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
197
week3/community-contributions/hopeogbons/README.md
Normal file
197
week3/community-contributions/hopeogbons/README.md
Normal file
@@ -0,0 +1,197 @@
|
||||
# 🎙️ Audio Transcription Assistant
|
||||
|
||||
An AI-powered audio transcription tool that converts speech to text in multiple languages using OpenAI's Whisper model.
|
||||
|
||||
## Why I Built This
|
||||
|
||||
In today's content-driven world, audio and video are everywhere—podcasts, meetings, lectures, interviews. But what if you need to quickly extract text from an audio file in a different language? Or create searchable transcripts from recordings?
|
||||
|
||||
Manual transcription is time-consuming and expensive. I wanted to build something that could:
|
||||
|
||||
- Accept audio files in any format (MP3, WAV, etc.)
|
||||
- Transcribe them accurately using AI
|
||||
- Support multiple languages
|
||||
- Work locally on my Mac **and** on cloud GPUs (Google Colab)
|
||||
|
||||
That's where **Whisper** comes in—OpenAI's powerful speech recognition model.
|
||||
|
||||
## Features
|
||||
|
||||
- 📤 **Upload any audio file** (MP3, WAV, M4A, FLAC, etc.)
|
||||
- 🌍 **12+ languages supported** with auto-detection
|
||||
- 🤖 **Accurate AI-powered transcription** using Whisper
|
||||
- ⚡ **Cross-platform** - works on CPU (Mac) or GPU (Colab)
|
||||
- 🎨 **Clean web interface** built with Gradio
|
||||
- 🚀 **Fast processing** with optimized model settings
|
||||
|
||||
## Tech Stack
|
||||
|
||||
- **OpenAI Whisper** - Speech recognition model
|
||||
- **Gradio** - Web interface framework
|
||||
- **PyTorch** - Deep learning backend
|
||||
- **NumPy** - Numerical computing
|
||||
- **ffmpeg** - Audio file processing
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.12+
|
||||
- ffmpeg (for audio processing)
|
||||
- uv package manager (or pip)
|
||||
|
||||
### Setup
|
||||
|
||||
1. Clone this repository or download the notebook
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
# Install compatible NumPy version
|
||||
uv pip install --reinstall "numpy==1.26.4"
|
||||
|
||||
# Install PyTorch
|
||||
uv pip install torch torchvision torchaudio
|
||||
|
||||
# Install Gradio and Whisper
|
||||
uv pip install gradio openai-whisper ffmpeg-python
|
||||
|
||||
# (Optional) Install Ollama for LLM features
|
||||
uv pip install ollama
|
||||
```
|
||||
|
||||
3. **For Mac users**, ensure ffmpeg is installed:
|
||||
|
||||
```bash
|
||||
brew install ffmpeg
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Running Locally
|
||||
|
||||
1. Open the Jupyter notebook `week3 EXERCISE_hopeogbons.ipynb`
|
||||
|
||||
2. Run all cells in order:
|
||||
|
||||
- Cell 1: Install dependencies
|
||||
- Cell 2: Import libraries
|
||||
- Cell 3: Load Whisper model
|
||||
- Cell 4: Define transcription function
|
||||
- Cell 5: Build Gradio interface
|
||||
- Cell 6: Launch the app
|
||||
|
||||
3. The app will automatically open in your browser
|
||||
|
||||
4. Upload an audio file, select the language, and click Submit!
|
||||
|
||||
### Running on Google Colab
|
||||
|
||||
For GPU acceleration:
|
||||
|
||||
1. Open the notebook in Google Colab
|
||||
2. Runtime → Change runtime type → **GPU (T4)**
|
||||
3. Run all cells in order
|
||||
4. The model will automatically use GPU acceleration
|
||||
|
||||
**Note:** First run downloads the Whisper model (~140MB) - this is a one-time download.
|
||||
|
||||
## Supported Languages
|
||||
|
||||
- 🇬🇧 English
|
||||
- 🇪🇸 Spanish
|
||||
- 🇫🇷 French
|
||||
- 🇩🇪 German
|
||||
- 🇮🇹 Italian
|
||||
- 🇵🇹 Portuguese
|
||||
- 🇨🇳 Chinese
|
||||
- 🇯🇵 Japanese
|
||||
- 🇰🇷 Korean
|
||||
- 🇷🇺 Russian
|
||||
- 🇸🇦 Arabic
|
||||
- 🌐 Auto-detect
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Upload** - User uploads an audio file through the Gradio interface
|
||||
2. **Process** - ffmpeg decodes the audio file
|
||||
3. **Transcribe** - Whisper model processes the audio and generates text
|
||||
4. **Display** - Transcription is shown in the output box
|
||||
|
||||
The Whisper "base" model is used for a balance between speed and accuracy:
|
||||
|
||||
- Fast enough for real-time use on CPU
|
||||
- Accurate enough for most transcription needs
|
||||
- Small enough (~140MB) for quick downloads
|
||||
|
||||
## Example Transcriptions
|
||||
|
||||
The app successfully transcribed:
|
||||
|
||||
- English podcast episodes
|
||||
- French language audio (detected and transcribed)
|
||||
- Multi-speaker conversations
|
||||
- Audio with background noise
|
||||
|
||||
## What I Learned
|
||||
|
||||
Building this transcription assistant taught me:
|
||||
|
||||
- **Audio processing** with ffmpeg and Whisper
|
||||
- **Cross-platform compatibility** (Mac CPU vs Colab GPU)
|
||||
- **Dependency management** (dealing with NumPy version conflicts!)
|
||||
- **Async handling** in Jupyter notebooks with Gradio
|
||||
- **Model optimization** (choosing the right Whisper model size)
|
||||
|
||||
The biggest challenge? Getting ffmpeg and NumPy to play nice together across different environments. But solving those issues made me understand the stack much better.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**1. "No module named 'whisper'" error**
|
||||
|
||||
- Make sure you've installed `openai-whisper`, not just `whisper`
|
||||
- Restart your kernel after installation
|
||||
|
||||
**2. "ffmpeg not found" error**
|
||||
|
||||
- Install ffmpeg: `brew install ffmpeg` (Mac) or `apt-get install ffmpeg` (Linux)
|
||||
|
||||
**3. NumPy version conflicts**
|
||||
|
||||
- Use NumPy 1.26.4: `uv pip install --reinstall "numpy==1.26.4"`
|
||||
- Restart kernel after reinstalling
|
||||
|
||||
**4. Gradio event loop errors**
|
||||
|
||||
- Use `prevent_thread_lock=True` in `app.launch()`
|
||||
- Restart kernel if errors persist
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- [ ] Support for real-time audio streaming
|
||||
- [ ] Speaker diarization (identifying different speakers)
|
||||
- [ ] Export transcripts to multiple formats (SRT, VTT, TXT)
|
||||
- [ ] Integration with LLMs for summarization
|
||||
- [ ] Batch processing for multiple files
|
||||
|
||||
## Contributing
|
||||
|
||||
Feel free to fork this project and submit pull requests with improvements!
|
||||
|
||||
## License
|
||||
|
||||
This project is open source and available under the MIT License.
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
- **OpenAI** for the amazing Whisper model
|
||||
- **Gradio** team for the intuitive interface framework
|
||||
- **Andela LLM Engineering Program** for the learning opportunity
|
||||
|
||||
---
|
||||
|
||||
**Built with ❤️ as part of the Andela LLM Engineering Program**
|
||||
|
||||
For questions or feedback, feel free to reach out!
|
||||
Binary file not shown.
@@ -0,0 +1,397 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "270ed08b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 🎙️ Audio Transcription Assistant\n",
|
||||
"\n",
|
||||
"## Why I Built This\n",
|
||||
"\n",
|
||||
"In today's content-driven world, audio and video are everywhere—podcasts, meetings, lectures, interviews. But what if you need to quickly extract text from an audio file in a different language? Or create searchable transcripts from recordings?\n",
|
||||
"\n",
|
||||
"Manual transcription is time-consuming and expensive. I wanted to build something that could:\n",
|
||||
"- Accept audio files in any format (MP3, WAV, etc.)\n",
|
||||
"- Transcribe them accurately using AI\n",
|
||||
"- Support multiple languages\n",
|
||||
"- Work locally on my Mac **and** on cloud GPUs (Google Colab)\n",
|
||||
"\n",
|
||||
"That's where **Whisper** comes in—OpenAI's powerful speech recognition model.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## What This Does\n",
|
||||
"\n",
|
||||
"This app lets you:\n",
|
||||
"- 📤 Upload any audio file\n",
|
||||
"- 🌍 Choose from 12+ languages (or auto-detect)\n",
|
||||
"- 🤖 Get accurate AI-powered transcription\n",
|
||||
"- ⚡ Process on CPU (Mac) or GPU (Colab)\n",
|
||||
"\n",
|
||||
"**Tech:** OpenAI Whisper • Gradio UI • PyTorch • Cross-platform (Mac/Colab)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"**Note:** This is a demonstration. For production use, consider privacy and data handling policies.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c37e5165",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 1: Install Dependencies\n",
|
||||
"\n",
|
||||
"Installing everything needed:\n",
|
||||
"- **NumPy 1.26.4** - Compatible version for Whisper\n",
|
||||
"- **PyTorch** - Deep learning framework\n",
|
||||
"- **Whisper** - OpenAI's speech recognition model\n",
|
||||
"- **Gradio** - Web interface\n",
|
||||
"- **ffmpeg** - Audio file processing\n",
|
||||
"- **Ollama** - For local LLM support (optional)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "8c66b0ca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/usr/local/bin/ffmpeg\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Package installation\n",
|
||||
"\n",
|
||||
"!uv pip install -q --reinstall \"numpy==1.26.4\"\n",
|
||||
"!uv pip install -q torch torchvision torchaudio\n",
|
||||
"!uv pip install -q gradio openai-whisper ffmpeg-python\n",
|
||||
"!uv pip install -q ollama\n",
|
||||
"\n",
|
||||
"# Ensure ffmpeg is available (Mac)\n",
|
||||
"!which ffmpeg || brew install ffmpeg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f31d64ee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 2: Import Libraries\n",
|
||||
"\n",
|
||||
"The essentials: NumPy for arrays, Gradio for the UI, Whisper for transcription, PyTorch for the model backend, and Ollama for optional LLM features.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4782261a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import numpy as np\n",
|
||||
"import gradio as gr\n",
|
||||
"import whisper\n",
|
||||
"import torch\n",
|
||||
"import ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93a41b23",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 3: Load Whisper Model\n",
|
||||
"\n",
|
||||
"Loading the **base** model—a balanced choice between speed and accuracy. It works on both CPU (Mac) and GPU (Colab). The model is ~140MB and will download automatically on first run.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "130ed059",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading Whisper model...\n",
|
||||
"Using device: cpu\n",
|
||||
"✅ Model loaded successfully!\n",
|
||||
"Model type: <class 'whisper.model.Whisper'>\n",
|
||||
"Has transcribe method: True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Model initialization\n",
|
||||
"\n",
|
||||
"print(\"Loading Whisper model...\")\n",
|
||||
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
||||
"print(f\"Using device: {device}\")\n",
|
||||
"\n",
|
||||
"whisper_model = whisper.load_model(\"base\", device=device)\n",
|
||||
"print(\"✅ Model loaded successfully!\")\n",
|
||||
"print(f\"Model type: {type(whisper_model)}\")\n",
|
||||
"print(f\"Has transcribe method: {hasattr(whisper_model, 'transcribe')}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d84f6cfe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 4: Transcription Function\n",
|
||||
"\n",
|
||||
"This is the core logic:\n",
|
||||
"- Accepts an audio file and target language\n",
|
||||
"- Maps language names to Whisper's language codes\n",
|
||||
"- Transcribes the audio using the loaded model\n",
|
||||
"- Returns the transcribed text\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "4f2c4b2c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Transcription function\n",
|
||||
"\n",
|
||||
"def transcribe_audio(audio_file, target_language):\n",
|
||||
" \"\"\"Transcribe audio file to text in the specified language.\"\"\"\n",
|
||||
" if audio_file is None:\n",
|
||||
" return \"Please upload an audio file.\"\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" # Language codes for Whisper\n",
|
||||
" language_map = {\n",
|
||||
" \"English\": \"en\",\n",
|
||||
" \"Spanish\": \"es\",\n",
|
||||
" \"French\": \"fr\",\n",
|
||||
" \"German\": \"de\",\n",
|
||||
" \"Italian\": \"it\",\n",
|
||||
" \"Portuguese\": \"pt\",\n",
|
||||
" \"Chinese\": \"zh\",\n",
|
||||
" \"Japanese\": \"ja\",\n",
|
||||
" \"Korean\": \"ko\",\n",
|
||||
" \"Russian\": \"ru\",\n",
|
||||
" \"Arabic\": \"ar\",\n",
|
||||
" \"Auto-detect\": None\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" lang_code = language_map.get(target_language)\n",
|
||||
" \n",
|
||||
" # Get file path from Gradio File component (returns path string directly)\n",
|
||||
" audio_path = audio_file.name if hasattr(audio_file, 'name') else audio_file\n",
|
||||
" \n",
|
||||
" if not audio_path or not os.path.exists(audio_path):\n",
|
||||
" return \"Invalid audio file or file not found\"\n",
|
||||
"\n",
|
||||
" # Transcribe using whisper_model.transcribe()\n",
|
||||
" result = whisper_model.transcribe(\n",
|
||||
" audio_path,\n",
|
||||
" language=lang_code,\n",
|
||||
" task=\"transcribe\",\n",
|
||||
" verbose=False # Hide confusing progress bar\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" return result[\"text\"]\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error: {str(e)}\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd928784",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 5: Build the Interface\n",
|
||||
"\n",
|
||||
"Creating a simple, clean Gradio interface with:\n",
|
||||
"- **File uploader** for audio files\n",
|
||||
"- **Language dropdown** with 12+ options\n",
|
||||
"- **Transcription output** box\n",
|
||||
"- Auto-launches in browser for convenience\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "5ce2c944",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✅ App ready! Run the next cell to launch.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Gradio interface\n",
|
||||
"\n",
|
||||
"app = gr.Interface(\n",
|
||||
" fn=transcribe_audio,\n",
|
||||
" inputs=[\n",
|
||||
" gr.File(label=\"Upload Audio File\", file_types=[\"audio\"]),\n",
|
||||
" gr.Dropdown(\n",
|
||||
" choices=[\n",
|
||||
" \"English\", \"Spanish\", \"French\", \"German\", \"Italian\",\n",
|
||||
" \"Portuguese\", \"Chinese\", \"Japanese\", \"Korean\",\n",
|
||||
" \"Russian\", \"Arabic\", \"Auto-detect\"\n",
|
||||
" ],\n",
|
||||
" value=\"English\",\n",
|
||||
" label=\"Language\"\n",
|
||||
" )\n",
|
||||
" ],\n",
|
||||
" outputs=gr.Textbox(label=\"Transcription\", lines=15),\n",
|
||||
" title=\"🎙️ Audio Transcription\",\n",
|
||||
" description=\"Upload an audio file to transcribe it.\",\n",
|
||||
" flagging_mode=\"never\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"✅ App ready! Run the next cell to launch.\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "049ac197",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 6: Launch the App\n",
|
||||
"\n",
|
||||
"Starting the Gradio server with Jupyter compatibility (`prevent_thread_lock=True`). The app will open automatically in your browser.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fa6c8d9a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* Running on local URL: http://127.0.0.1:7860\n",
|
||||
"* To create a public link, set `share=True` in `launch()`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": []
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/hopeogbons/Projects/andela/llm_engineering/.venv/lib/python3.12/site-packages/whisper/transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
|
||||
" warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n",
|
||||
"100%|██████████| 10416/10416 [00:06<00:00, 1723.31frames/s]\n",
|
||||
"/Users/hopeogbons/Projects/andela/llm_engineering/.venv/lib/python3.12/site-packages/whisper/transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
|
||||
" warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n",
|
||||
"100%|██████████| 10416/10416 [00:30<00:00, 341.64frames/s]\n",
|
||||
"/Users/hopeogbons/Projects/andela/llm_engineering/.venv/lib/python3.12/site-packages/whisper/transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
|
||||
" warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n",
|
||||
"100%|██████████| 2289/2289 [00:01<00:00, 1205.18frames/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Launch\n",
|
||||
"\n",
|
||||
"# Close any previous instances\n",
|
||||
"try:\n",
|
||||
" app.close()\n",
|
||||
"except:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"# Start the app\n",
|
||||
"app.launch(inbrowser=True, prevent_thread_lock=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c3c2ec24",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 💡 How to Use\n",
|
||||
"\n",
|
||||
"1. **Upload** an audio file (MP3, WAV, M4A, etc.)\n",
|
||||
"2. **Select** your language (or use Auto-detect)\n",
|
||||
"3. **Click** Submit\n",
|
||||
"4. **Get** your transcription!\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 🚀 Running on Google Colab\n",
|
||||
"\n",
|
||||
"For GPU acceleration on Colab:\n",
|
||||
"1. Runtime → Change runtime type → **GPU (T4)**\n",
|
||||
"2. Run all cells in order\n",
|
||||
"3. The model will use GPU automatically\n",
|
||||
"\n",
|
||||
"**Note:** First run downloads the Whisper model (~140MB) - this is a one-time download.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 📝 Supported Languages\n",
|
||||
"\n",
|
||||
"English • Spanish • French • German • Italian • Portuguese • Chinese • Japanese • Korean • Russian • Arabic • Auto-detect\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
OPENAI_API_KEY= your_openai_api_kei
|
||||
@@ -0,0 +1 @@
|
||||
3.12
|
||||
263
week3/community-contributions/juan_synthetic_data/README.md
Normal file
263
week3/community-contributions/juan_synthetic_data/README.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Synthetic Data Generator
|
||||
**NOTE:** This is a copy of the repository https://github.com/Jsrodrigue/synthetic-data-creator.
|
||||
|
||||
# Synthetic Data Generator
|
||||
|
||||
An intelligent synthetic data generator that uses OpenAI models to create realistic tabular datasets based on reference data. This project includes an intuitive web interface built with Gradio.
|
||||
|
||||
> **🎓 Educational Project**: This project was inspired by the highly regarded LLM Engineering course on Udemy: [LLM Engineering: Master AI and Large Language Models](https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/learn/lecture/52941433#questions/23828099). It demonstrates practical applications of LLM engineering principles, prompt engineering, and synthetic data generation techniques.
|
||||
|
||||
## Key highlights:
|
||||
- Built with Python & Gradio
|
||||
- Uses OpenAI GPT-4 models for tabular data synthesis
|
||||
- Focused on statistical consistency and controlled randomness
|
||||
- Lightweight and easy to extend
|
||||
|
||||
## 📸 Screenshots & Demo
|
||||
|
||||
### Application Interface
|
||||
<p align="center">
|
||||
<img src="screenshots/homepage.png" alt="Main Interface" width="70%">
|
||||
</p>
|
||||
<p align="center"><em>Main interface showing the synthetic data generator with all controls</em></p>
|
||||
|
||||
### Generated Data Preview
|
||||
<p align="center">
|
||||
<img src="screenshots/generated_table.png" alt="Generated table" width="70%">
|
||||
</p>
|
||||
<p align="center"><em> Generated CSV preview with the Wine dataset reference</em></p>
|
||||
|
||||
### Histogram plots
|
||||
<p align="center">
|
||||
<img src="screenshots/histogram.png" alt="Histogram plot" width="70%">
|
||||
</p>
|
||||
<p align="center"><em>Example of Histogram comparison plot in the Wine dataset</em></p>
|
||||
|
||||
### Boxplots
|
||||
<p align="center">
|
||||
<img src="screenshots/boxplot.png" alt="Boxplot" width="70%">
|
||||
</p>
|
||||
<p align="center"><em>Example of Boxplot comparison</em></p>
|
||||
|
||||
|
||||
### Video Demo
|
||||
[](https://youtu.be/C7c8BbUGGBA)
|
||||
|
||||
*Click to watch a complete walkthrough of the application*
|
||||
|
||||
|
||||
## 📋 Features
|
||||
|
||||
- **Intelligent Generation**: Generates synthetic data using OpenAI models (GPT-4o-mini, GPT-4.1-mini)
|
||||
- **Web Interface**: Provides an intuitive Gradio UI with real-time data preview
|
||||
- **Reference Data**: Optionally load CSV files to preserve statistical distributions
|
||||
- **Export Options**: Download generated datasets directly in CSV format
|
||||
- **Included Examples**: Comes with ready-to-use sample datasets for people and sentiment analysis
|
||||
- **Dynamic Batching**: Automatically adapts batch size based on prompt length and reference sample size
|
||||
- **Reference Sampling**: Uses random subsets of reference data to ensure variability and reduce API cost.
|
||||
The sample size (default `64`) can be modified in `src/constants.py` via `N_REFERENCE_ROWS`.
|
||||
|
||||
## 🚀 Installation
|
||||
|
||||
### Prerequisites
|
||||
- Python 3.12+
|
||||
- OpenAI account with API key
|
||||
|
||||
### Option 1: Using pip
|
||||
```bash
|
||||
# Create virtual environment
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Option 2: Using uv
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/Jsrodrigue/synthetic-data-creator.git
|
||||
cd synthetic-data-creator
|
||||
|
||||
# Install dependencies
|
||||
uv sync
|
||||
|
||||
# Activate virtual environment
|
||||
uv shell
|
||||
```
|
||||
|
||||
### Configuration
|
||||
1. Copy the environment variables example file:
|
||||
```bash
|
||||
cp .env_example .env
|
||||
```
|
||||
|
||||
2. Edit `.env` and add your OpenAI API key:
|
||||
```
|
||||
OPENAI_API_KEY=your_api_key_here
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 🎯 Usage
|
||||
|
||||
### Start the application
|
||||
|
||||
You can run the app either with **Python** or with **uv** (recommended if you installed dependencies using `uv sync`):
|
||||
|
||||
```bash
|
||||
# Option 1: using Python
|
||||
python app.py
|
||||
|
||||
# Option 2: using uv (no need to activate venv manually)
|
||||
uv run app.py
|
||||
```
|
||||
|
||||
The script will print a local URL (e.g., http://localhost:7860) — open that link in your browser.
|
||||
|
||||
### How to use the interface
|
||||
|
||||
1. **Configure Prompts**:
|
||||
- **System Prompt**: Uses the default rules defined in `src/constants.py` or can be edited there for custom generation.
|
||||
- **User Prompt**: Specifies what type of data to generate (default: 15 rows, defined in `src/constants.py`).
|
||||
|
||||
|
||||
2. **Select Model**:
|
||||
- `gpt-4o-mini`: Faster and more economical
|
||||
- `gpt-4.1-mini`: Higher reasoning capacity
|
||||
|
||||
3. **Load Reference Data** (optional):
|
||||
- Upload a CSV file with similar data
|
||||
- Use included examples: `people_reference.csv`, `sentiment_reference.csv` or `wine_reference.csv`
|
||||
|
||||
4. **Generate Data**:
|
||||
- Click "🚀 Generate Data"
|
||||
- Review results in the gradio UI
|
||||
- Download the generated CSV
|
||||
|
||||
|
||||
|
||||
## 📊 Quality Evaluation
|
||||
|
||||
### Simple Evaluation System
|
||||
|
||||
The project includes a simple evaluation system focused on basic metrics and visualizations:
|
||||
|
||||
#### Features
|
||||
- **Simple Metrics**: Basic statistical comparisons and quality checks
|
||||
- **Integrated Visualizations**: Automatic generation of comparison plots in the app
|
||||
- **Easy to Understand**: Clear scores and simple reports
|
||||
- **Scale Invariant**: Works with datasets of different sizes
|
||||
- **Temporary Files**: Visualizations are generated in temp files and cleaned up automatically
|
||||
|
||||
|
||||
|
||||
## 🛠️ Improvements and Next Steps
|
||||
|
||||
### Immediate Improvements
|
||||
|
||||
1. **Advanced Validation**:
|
||||
- Implement specific validators by data type
|
||||
- Create evaluation reports
|
||||
|
||||
2. **Advanced Quality Metrics**
|
||||
- Include more advanced metrics to compare multivariate similarity (for future work), e.g.:
|
||||
- C2ST (Classifier Two‑Sample Test): train a classifier to distinguish real vs synthetic — report AUROC (ideal ≈ 0.5).
|
||||
- MMD (Maximum Mean Discrepancy): kernel-based multivariate distance.
|
||||
- Multivariate Wasserstein / Optimal Transport: joint-distribution distance (use POT).
|
||||
|
||||
3. **More Models**:
|
||||
- Integrate Hugging Face models
|
||||
- Support for local models (Ollama)
|
||||
- Comparison between different models
|
||||
|
||||
### Advanced Features
|
||||
|
||||
1. **Conditional Generation**:
|
||||
- Data based on specific conditions
|
||||
- Controlled outlier generation
|
||||
- Maintaining complex relationships
|
||||
|
||||
2. **Privacy Analysis**:
|
||||
- Differential privacy metrics
|
||||
- Sensitive data detection
|
||||
- Automatic anonymization
|
||||
|
||||
3. **Database Integration**:
|
||||
- Direct database connection
|
||||
- Massive data generation
|
||||
- Automatic synchronization
|
||||
|
||||
### Scalable Architecture
|
||||
|
||||
1. **REST API**:
|
||||
- Endpoints for integration
|
||||
- Authentication and rate limiting
|
||||
- OpenAPI documentation
|
||||
|
||||
2. **Asynchronous Processing**:
|
||||
- Work queues for long generations
|
||||
- Progress notifications
|
||||
- Robust error handling
|
||||
|
||||
3. **Monitoring and Logging**:
|
||||
- Usage and performance metrics
|
||||
- Detailed generation logs
|
||||
- Quality alerts
|
||||
|
||||
## 📁 Project Structure
|
||||
|
||||
```
|
||||
synthetic_data/
|
||||
├── app.py # Main Gradio application for synthetic data generation
|
||||
├── README.md # Project documentation
|
||||
├── pyproject.toml # Project configuration
|
||||
├── requirements.txt # Python dependencies
|
||||
├── data/ # Reference CSV datasets used for generating synthetic data
|
||||
│ ├── people_reference.csv
|
||||
│ ├── sentiment_reference.csv
|
||||
│ └── wine_reference.csv
|
||||
├── notebooks/ # Jupyter notebooks for experiments and development
|
||||
│ └── notebook.ipynb
|
||||
├── src/ # Python source code
|
||||
│ ├── __init__.py
|
||||
├── constants.py # Default constants, reference sample size, and default prompts
|
||||
│ ├── data_generation.py # Core functions for batch generation and evaluation
|
||||
│ ├── evaluator.py # Evaluation logic and metrics
|
||||
│ ├── IO_utils.py # Utilities for file management and temp directories
|
||||
│ ├── openai_utils.py # Wrappers for OpenAI API calls
|
||||
│ └── plot_utils.py
|
||||
# Functions to create visualizations from data
|
||||
└── temp_plots/ # Temporary folder for generated plot images (auto-cleaned)
|
||||
```
|
||||
|
||||
## 📄 License
|
||||
|
||||
This project is under the MIT License. See the `LICENSE` file for more details.
|
||||
|
||||
|
||||
|
||||
|
||||
## 🎓 Course Context & Learning Outcomes
|
||||
|
||||
This project was developed as part of the [LLM Engineering: Master AI and Large Language Models](https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/learn/lecture/52941433#questions/23828099) course on Udemy. It demonstrates practical implementation of:
|
||||
|
||||
### Key Learning Objectives:
|
||||
- **Prompt Engineering Mastery**: Creating effective system and user prompts for consistent outputs
|
||||
- **API Integration**: Working with OpenAI's API for production applications
|
||||
- **Data Processing**: Handling JSON parsing, validation, and error management
|
||||
- **Web Application Development**: Building user interfaces with Gradio
|
||||
|
||||
### Course Insights Applied:
|
||||
- **Why OpenAI over Open Source**: This project was developed as an alternative to open-source models due to consistency issues in prompt following with models like Llama 3.2. OpenAI provides more reliable and faster results for this specific task.
|
||||
- **Production Considerations**: Focus on error handling, output validation, and user experience
|
||||
- **Scalability Planning**: Architecture designed for future enhancements and integrations
|
||||
|
||||
### Related Course Topics:
|
||||
- Prompt engineering techniques
|
||||
- LLM API integration and optimization
|
||||
- Selection of best models for each usecase.
|
||||
|
||||
---
|
||||
|
||||
**📚 Course Link**: [LLM Engineering: Master AI and Large Language Models](https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/learn/lecture/52941433#questions/23828099)
|
||||
156
week3/community-contributions/juan_synthetic_data/app.py
Normal file
156
week3/community-contributions/juan_synthetic_data/app.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import atexit
|
||||
import os
|
||||
|
||||
import gradio as gr
|
||||
import openai
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from src.constants import PROJECT_TEMP_DIR, SYSTEM_PROMPT, USER_PROMPT
|
||||
from src.data_generation import generate_and_evaluate_data
|
||||
from src.IO_utils import cleanup_temp_files
|
||||
from src.plot_utils import display_reference_csv
|
||||
|
||||
|
||||
def main():
|
||||
# ==========================================================
|
||||
# Setup
|
||||
# ==========================================================
|
||||
|
||||
# Load the api key
|
||||
load_dotenv()
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
# Temporary folder for images
|
||||
os.makedirs(PROJECT_TEMP_DIR, exist_ok=True)
|
||||
|
||||
# Ensure temporary plot images are deleted when the program exits
|
||||
atexit.register(lambda: cleanup_temp_files(PROJECT_TEMP_DIR))
|
||||
|
||||
# ==========================================================
|
||||
# Gradio App
|
||||
# ==========================================================
|
||||
with gr.Blocks() as demo:
|
||||
|
||||
# Store temp folder in state
|
||||
temp_dir_state = gr.State(value=PROJECT_TEMP_DIR)
|
||||
|
||||
gr.Markdown("# 🧠 Synthetic Data Generator (with OpenAI)")
|
||||
|
||||
# ======================================================
|
||||
# Tabs for organized sections
|
||||
# ======================================================
|
||||
with gr.Tabs():
|
||||
|
||||
# ------------------------------
|
||||
# Tab 1: Input
|
||||
# ------------------------------
|
||||
with gr.Tab("Input"):
|
||||
|
||||
# System prompt in collapsible
|
||||
with gr.Accordion("System Prompt (click to expand)", open=False):
|
||||
system_prompt_input = gr.Textbox(
|
||||
label="System Prompt", value=SYSTEM_PROMPT, lines=20
|
||||
)
|
||||
|
||||
# User prompt box
|
||||
user_prompt_input = gr.Textbox(
|
||||
label="User Prompt", value=USER_PROMPT, lines=5
|
||||
)
|
||||
|
||||
# Model selection
|
||||
model_select = gr.Dropdown(
|
||||
label="OpenAI Model",
|
||||
choices=["gpt-4o-mini", "gpt-4.1-mini"],
|
||||
value="gpt-4o-mini",
|
||||
)
|
||||
|
||||
# Reference CSV upload
|
||||
reference_input = gr.File(
|
||||
label="Reference CSV (optional)", file_types=[".csv"]
|
||||
)
|
||||
|
||||
# Examples
|
||||
gr.Examples(
|
||||
examples=[
|
||||
"data/sentiment_reference.csv",
|
||||
"data/people_reference.csv",
|
||||
"data/wine_reference.csv",
|
||||
],
|
||||
inputs=reference_input,
|
||||
)
|
||||
|
||||
# Generate button
|
||||
generate_btn = gr.Button("🚀 Generate Data")
|
||||
|
||||
# Download button
|
||||
download_csv = gr.File(label="Download CSV")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 2: Reference Table
|
||||
# ------------------------------
|
||||
with gr.Tab("Reference Table"):
|
||||
reference_display = gr.DataFrame(label="Reference CSV Preview")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 3: Generated Table
|
||||
# ------------------------------
|
||||
with gr.Tab("Generated Table"):
|
||||
output_df = gr.DataFrame(label="Generated Data")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 4: Evaluation
|
||||
# ------------------------------
|
||||
with gr.Tab("Comparison"):
|
||||
with gr.Accordion("Evaluation Results (click to expand)", open=True):
|
||||
evaluation_df = gr.DataFrame(label="Evaluation Results")
|
||||
|
||||
# ------------------------------
|
||||
# Tab 5: Visualizations
|
||||
# ------------------------------
|
||||
|
||||
with gr.Tab("Visualizations"):
|
||||
gr.Markdown("# Click on the box to expand")
|
||||
|
||||
images_gallery = gr.Gallery(
|
||||
label="Column Visualizations",
|
||||
show_label=True,
|
||||
columns=2,
|
||||
height="auto",
|
||||
interactive=True,
|
||||
)
|
||||
|
||||
# Hidden state for internal use
|
||||
generated_state = gr.State()
|
||||
|
||||
# ======================================================
|
||||
# Event bindings
|
||||
# ======================================================
|
||||
generate_btn.click(
|
||||
fn=generate_and_evaluate_data,
|
||||
inputs=[
|
||||
system_prompt_input,
|
||||
user_prompt_input,
|
||||
temp_dir_state,
|
||||
reference_input,
|
||||
model_select,
|
||||
],
|
||||
outputs=[
|
||||
output_df,
|
||||
download_csv,
|
||||
evaluation_df,
|
||||
generated_state,
|
||||
images_gallery,
|
||||
],
|
||||
)
|
||||
|
||||
reference_input.change(
|
||||
fn=display_reference_csv,
|
||||
inputs=[reference_input],
|
||||
outputs=[reference_display],
|
||||
)
|
||||
|
||||
demo.launch(debug=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,16 @@
|
||||
Name,Age,City
|
||||
John,32,New York
|
||||
Alice,45,Los Angeles
|
||||
Bob,28,Chicago
|
||||
Eve,35,Houston
|
||||
Mike,52,Philadelphia
|
||||
Emma,29,San Antonio
|
||||
Oliver,39,Phoenix
|
||||
Isabella,48,San Diego
|
||||
William,55,Dallas
|
||||
Charlotte,31,San Jose
|
||||
Alexander,42,San Francisco
|
||||
Harper,38,San Antonio
|
||||
Julia,46,San Diego
|
||||
Ethan,53,San Jose
|
||||
Ava,29,San Francisco
|
||||
|
@@ -0,0 +1,99 @@
|
||||
,Comment,sentiment
|
||||
0,"Them: I don't think I like this game.
|
||||
|
||||
Me: But you haven't even played it for 5 minutes and are still in the tutorial.",negative
|
||||
1,Then you leave them to farm the smaller creatures while you either wait or help them kill them all with the click of a button.,negative
|
||||
2,Nothing beats the feeling you get when you see them fall in love with it just like you did all those years ago,positive
|
||||
3,"[Also, they're made of paper](https://i.imgur.com/wYu0G9J.jpg)
|
||||
|
||||
Edit: I tried to make a gif and failed so here's a [video](https://i.imgur.com/aPzS8Ny.mp4)",negative
|
||||
4,"Haha... That was exactly it when my brother tried to get me into WoW.
|
||||
|
||||
Him, "" I can run you through raids to get you to level up faster and get better gear. But first you need to be this min level. What are you""
|
||||
|
||||
Me ""lvl 1"".
|
||||
|
||||
Him ""ok. Let's do a couple quests to get you up. What is your quest""
|
||||
|
||||
Me ""collect 20 apples"".",positive
|
||||
5,I'm going through this right now. I just started playing minecraft for the first time and my SO is having to walk me through everything.,positive
|
||||
6,Then they get even more into it than you and end up getting all the loot and items you wanted before you. They make you look like the noob in about 3 months.,positive
|
||||
7,"###Take your time, you got this
|
||||
|#|user|EDIT|comment|Link
|
||||
|:--|:--|:--|:--|:--|
|
||||
|0|/u/KiwiChoppa147|[EDIT](https://i.imgur.com/OI8jNtE.png)|Then you leave them to farm the smaller creatures while you either wait or help them kill them all with the click of a button.|[Link](/r/gaming/comments/ccr8c8/take_your_time_you_got_this/etor3t2/)|
|
||||
|1|/u/League0fGaming|[EDIT](https://i.imgur.com/5uvRAYy.png)|Nothing beats the feeling you get when you see them fall in love with it just like you did all those years ago|[Link](/r/gaming/comments/ccr8c8/take_your_time_you_got_this/etor371/)|
|
||||
|2|/u/DeJMan|[EDIT](https://i.imgur.com/3FL3IFb.png)|[Also, they're made of paper](https://i.imgur.com/wYu0G9J.jpg) Edit: I tried to make a gif and failed so here's a [video](https://i.imgur.com/aPzS8Ny.mp4)|[Link](/r/gaming/comments/ccr8c8/take_your_time_you_got_this/etos1ic/)|
|
||||
|3|/u/Bamboo6|[EDIT](https://i.imgur.com/SiDFZxQ.png)|Haha... That was exactly it when my brother tried to get me into WoW. Him, "" I can run you through raids to get you to level up faster and get better gear. But first you need to be this min level. What are you"" Me ""lvl 1"". Him ""ok. Let's do a couple quests to get you up. What is your quest"" Me ""collect 20 apples"".|[Link](/r/gaming/comments/ccr8c8/take_your_time_you_got_this/etorb6s/)|
|
||||
|4|/u/xxfisharemykidsxx|[EDIT](https://i.imgur.com/3ek9F93.png)|I'm going through this right now. I just started playing minecraft for the first time and my SO is having to walk me through everything.|[Link](/r/gaming/comments/ccr8c8/take_your_time_you_got_this/etor7hk/)|
|
||||
|5|/u/DuckSeeDuckWorld|[EDIT](https://i.imgur.com/rlE6VFP.png)|[This is my last EDIT before I go to camp for a week](https://imgur.com/xoOWF6K)|[Link](/r/gaming/comments/ccr8c8/take_your_time_you_got_this/etorpvh/)|
|
||||
|6|/u/ChecksUsernames|[EDIT](https://i.imgur.com/6Wc56ec.png)|What the hell you have your own edit bot?!|[Link](/r/gaming/comments/ccr8c8/take_your_time_you_got_this/etotc4w/)|
|
||||
|
||||
|
||||
I am a little fan-made bot who loves /u/SrGrafo but is a little lazy with hunting for EDITs. If you want to support our great creator, check out his [Patreon](https://Patreon.com/SrGrafo)",positive
|
||||
8,"Them: ""Wait, where did you go?""
|
||||
|
||||
Me --cleaning up the vast quantities of mobs they've managed to stumble past: "" Oh just, you know, letting you get a feel for navigation.""",neutral
|
||||
9,"Don't mind the arrows, everything's fine",positive
|
||||
10,[me_irl](https://i.imgur.com/eRPb2X3.png),neutral
|
||||
11,"I usually teach them the basic controls, and then throw them to the wolves like Spartans. Its sink or swim now!",positive
|
||||
12,This is Warframe in a nutshell,neutral
|
||||
13,[I love guiding people trough the game for the First time](https://imgur.com/uep20iB),positive
|
||||
14,[showing a video game to my nephew for the first time didn't go that well :D](https://i.imgur.com/dQf4mfI.png),negative
|
||||
15,[When it's a puzzle game](https://i.imgur.com/BgLqzRa.png),neutral
|
||||
16,"I love SrGrafo’s cheeky smiles in his drawings.
|
||||
|
||||
Also, I wonder if it’s Senior Grafo, Señor Grafo, or Sir Grafo.",positive
|
||||
17,"https://i.redd.it/pqjza65wrd711.jpg
|
||||
|
||||
Same look.",neutral
|
||||
18,[This is my last EDIT before I go to camp for a week](https://imgur.com/xoOWF6K),neutral
|
||||
19,Haha this is me in Warframe but I've only been playing for a year. It's so easy to find beginners and they always need help with something.,positive
|
||||
20,This happens all the time on r/warframe ! Helping new people is like a whole part of the game's fun.,positive
|
||||
21,[deleted],neutral
|
||||
22,"Once day when I have kids, I hope I can do the same with them",positive
|
||||
23,WAIT NO. WHY'D YOU PRESS X INSTEAD? Now you just used the only consumable for the next like 3 stages. Here lemme just restart from your last save...,neutral
|
||||
24,Big gamer energy.,positive
|
||||
25,"What about ten minutes in and they say “I’m not sure I get what’s going on. Eh I’m bored.”
|
||||
|
||||
Shitty phone [EDIT](https://imgur.com/a/zr4Ahnp)",negative
|
||||
26,Press *alt+f4* for the special move,positive
|
||||
27,"I remember teaching my little brother everything about Minecraft. Ah, good times. Now he's a little prick xD",positive
|
||||
28,2nd top post of 2019!! \(^0^)/,positive
|
||||
29,"With Grafo’s most recent comics, this achievement means so much more now. Check them out on his profile, u/SrGrafo, they’re titled “SrGrafo’s inception “",neutral
|
||||
30,"this is my bf showing me wow.
|
||||
|
||||
Him: “You can’t just stand there and take damage.”
|
||||
Me: “but I can’t move fast and my spells get cancelled.”
|
||||
|
||||
*proceeds to die 5 times in a row.*
|
||||
|
||||
and then he finishes it for me after watching me fail.
|
||||
|
||||
Me: yay. 😀😀",neutral
|
||||
31,"Quick cross over
|
||||
|
||||
https://imgur.com/a/9y4JVAr",neutral
|
||||
32,"Man, I really enjoy encoutering nice Veterans in online games",positive
|
||||
33,Wow. This is my first time here before the edits.,positive
|
||||
34,So this is the most liked Reddit post hmm,positive
|
||||
35,Diamond armor? Really?,positive
|
||||
36,"I remember when I was playing Destiny and I was pretty low level, having fun going through the missions, then my super high level friend joined. It was really unfun because he was slaughtering everything for me while I sat at the back doing jackshit",positive
|
||||
37,"""I'll just use this character until you get the hang of things and then swap to an alt so we can level together""",neutral
|
||||
38,"My girlfriend often just doesn't get why I love the games I play, but that's fine. I made sure to sit and watch her while she fell in love with breath of the wild.",negative
|
||||
39,"Warframe was full of people like this last i was on and its amazing. I was one of them too, but mostly for advice more than items because i was broke constantly.",neutral
|
||||
40,This is the most upvoted post I've seen on Reddit. And it was unexpectedly touching :),positive
|
||||
41,220k. holy moly,neutral
|
||||
42,Last,neutral
|
||||
43,"170k+ upvotes in 11 hours.
|
||||
Is this a record?",neutral
|
||||
44,This is the top post of all time😱,positive
|
||||
45,"Congratulations, 2nd post of the Year",positive
|
||||
46,Most liked post on reddit,positive
|
||||
47,Absolute Unit,neutral
|
||||
48,"I did similar things in Monster Hunter World.
|
||||
The only problem is they would never play ever again and play other games like Fortnite...feels bad man.
|
||||
If you ever get interested on playing the game u/SrGrafo then I’ll teach you the ways of the hunter!!! (For real tho it’s a really good game and better with buddy’s!)",positive
|
||||
49,Congrats on the second most upvoted post of 2019 my guy.,positive
|
||||
50,"This was it with my brother when I first started playing POE. He made it soooo much easier to get into the game. To understand the gameplay and mechanics. I think I’d have left in a day or two had it not been for him
|
||||
And walking me through the first few missions lmao. u/sulphra_",positive
|
||||
|
@@ -0,0 +1,159 @@
|
||||
fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
|
||||
7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
|
||||
7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
|
||||
7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2
|
||||
11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,3
|
||||
7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,4
|
||||
7.4,0.66,0.0,1.8,0.075,13.0,40.0,0.9978,3.51,0.56,9.4,5,5
|
||||
7.9,0.6,0.06,1.6,0.069,15.0,59.0,0.9964,3.3,0.46,9.4,5,6
|
||||
7.3,0.65,0.0,1.2,0.065,15.0,21.0,0.9946,3.39,0.47,10.0,7,7
|
||||
7.8,0.58,0.02,2.0,0.073,9.0,18.0,0.9968,3.36,0.57,9.5,7,8
|
||||
6.7,0.58,0.08,1.8,0.09699999999999999,15.0,65.0,0.9959,3.28,0.54,9.2,5,10
|
||||
5.6,0.615,0.0,1.6,0.08900000000000001,16.0,59.0,0.9943,3.58,0.52,9.9,5,12
|
||||
7.8,0.61,0.29,1.6,0.114,9.0,29.0,0.9974,3.26,1.56,9.1,5,13
|
||||
8.5,0.28,0.56,1.8,0.092,35.0,103.0,0.9969,3.3,0.75,10.5,7,16
|
||||
7.9,0.32,0.51,1.8,0.341,17.0,56.0,0.9969,3.04,1.08,9.2,6,19
|
||||
7.6,0.39,0.31,2.3,0.08199999999999999,23.0,71.0,0.9982,3.52,0.65,9.7,5,21
|
||||
7.9,0.43,0.21,1.6,0.106,10.0,37.0,0.9966,3.17,0.91,9.5,5,22
|
||||
8.5,0.49,0.11,2.3,0.084,9.0,67.0,0.9968,3.17,0.53,9.4,5,23
|
||||
6.9,0.4,0.14,2.4,0.085,21.0,40.0,0.9968,3.43,0.63,9.7,6,24
|
||||
6.3,0.39,0.16,1.4,0.08,11.0,23.0,0.9955,3.34,0.56,9.3,5,25
|
||||
7.6,0.41,0.24,1.8,0.08,4.0,11.0,0.9962,3.28,0.59,9.5,5,26
|
||||
7.1,0.71,0.0,1.9,0.08,14.0,35.0,0.9972,3.47,0.55,9.4,5,28
|
||||
7.8,0.645,0.0,2.0,0.08199999999999999,8.0,16.0,0.9964,3.38,0.59,9.8,6,29
|
||||
6.7,0.675,0.07,2.4,0.08900000000000001,17.0,82.0,0.9958,3.35,0.54,10.1,5,30
|
||||
8.3,0.655,0.12,2.3,0.083,15.0,113.0,0.9966,3.17,0.66,9.8,5,32
|
||||
5.2,0.32,0.25,1.8,0.10300000000000001,13.0,50.0,0.9957,3.38,0.55,9.2,5,34
|
||||
7.8,0.645,0.0,5.5,0.086,5.0,18.0,0.9986,3.4,0.55,9.6,6,35
|
||||
7.8,0.6,0.14,2.4,0.086,3.0,15.0,0.9975,3.42,0.6,10.8,6,36
|
||||
8.1,0.38,0.28,2.1,0.066,13.0,30.0,0.9968,3.23,0.73,9.7,7,37
|
||||
7.3,0.45,0.36,5.9,0.07400000000000001,12.0,87.0,0.9978,3.33,0.83,10.5,5,40
|
||||
8.8,0.61,0.3,2.8,0.08800000000000001,17.0,46.0,0.9976,3.26,0.51,9.3,4,41
|
||||
7.5,0.49,0.2,2.6,0.332,8.0,14.0,0.9968,3.21,0.9,10.5,6,42
|
||||
8.1,0.66,0.22,2.2,0.069,9.0,23.0,0.9968,3.3,1.2,10.3,5,43
|
||||
4.6,0.52,0.15,2.1,0.054000000000000006,8.0,65.0,0.9934,3.9,0.56,13.1,4,45
|
||||
7.7,0.935,0.43,2.2,0.114,22.0,114.0,0.997,3.25,0.73,9.2,5,46
|
||||
8.8,0.66,0.26,1.7,0.07400000000000001,4.0,23.0,0.9971,3.15,0.74,9.2,5,50
|
||||
6.6,0.52,0.04,2.2,0.069,8.0,15.0,0.9956,3.4,0.63,9.4,6,51
|
||||
6.6,0.5,0.04,2.1,0.068,6.0,14.0,0.9955,3.39,0.64,9.4,6,52
|
||||
8.6,0.38,0.36,3.0,0.081,30.0,119.0,0.997,3.2,0.56,9.4,5,53
|
||||
7.6,0.51,0.15,2.8,0.11,33.0,73.0,0.9955,3.17,0.63,10.2,6,54
|
||||
10.2,0.42,0.57,3.4,0.07,4.0,10.0,0.9971,3.04,0.63,9.6,5,56
|
||||
7.8,0.59,0.18,2.3,0.076,17.0,54.0,0.9975,3.43,0.59,10.0,5,58
|
||||
7.3,0.39,0.31,2.4,0.07400000000000001,9.0,46.0,0.9962,3.41,0.54,9.4,6,59
|
||||
8.8,0.4,0.4,2.2,0.079,19.0,52.0,0.998,3.44,0.64,9.2,5,60
|
||||
7.7,0.69,0.49,1.8,0.115,20.0,112.0,0.9968,3.21,0.71,9.3,5,61
|
||||
7.0,0.735,0.05,2.0,0.081,13.0,54.0,0.9966,3.39,0.57,9.8,5,63
|
||||
7.2,0.725,0.05,4.65,0.086,4.0,11.0,0.9962,3.41,0.39,10.9,5,64
|
||||
7.2,0.725,0.05,4.65,0.086,4.0,11.0,0.9962,3.41,0.39,10.9,5,65
|
||||
6.6,0.705,0.07,1.6,0.076,6.0,15.0,0.9962,3.44,0.58,10.7,5,67
|
||||
8.0,0.705,0.05,1.9,0.07400000000000001,8.0,19.0,0.9962,3.34,0.95,10.5,6,69
|
||||
7.7,0.69,0.22,1.9,0.084,18.0,94.0,0.9961,3.31,0.48,9.5,5,72
|
||||
8.3,0.675,0.26,2.1,0.084,11.0,43.0,0.9976,3.31,0.53,9.2,4,73
|
||||
8.8,0.41,0.64,2.2,0.09300000000000001,9.0,42.0,0.9986,3.54,0.66,10.5,5,76
|
||||
6.8,0.785,0.0,2.4,0.10400000000000001,14.0,30.0,0.9966,3.52,0.55,10.7,6,77
|
||||
6.7,0.75,0.12,2.0,0.086,12.0,80.0,0.9958,3.38,0.52,10.1,5,78
|
||||
8.3,0.625,0.2,1.5,0.08,27.0,119.0,0.9972,3.16,1.12,9.1,4,79
|
||||
6.2,0.45,0.2,1.6,0.069,3.0,15.0,0.9958,3.41,0.56,9.2,5,80
|
||||
7.4,0.5,0.47,2.0,0.086,21.0,73.0,0.997,3.36,0.57,9.1,5,82
|
||||
6.3,0.3,0.48,1.8,0.069,18.0,61.0,0.9959,3.44,0.78,10.3,6,84
|
||||
6.9,0.55,0.15,2.2,0.076,19.0,40.0,0.9961,3.41,0.59,10.1,5,85
|
||||
8.6,0.49,0.28,1.9,0.11,20.0,136.0,0.9972,2.93,1.95,9.9,6,86
|
||||
7.7,0.49,0.26,1.9,0.062,9.0,31.0,0.9966,3.39,0.64,9.6,5,87
|
||||
9.3,0.39,0.44,2.1,0.107,34.0,125.0,0.9978,3.14,1.22,9.5,5,88
|
||||
7.0,0.62,0.08,1.8,0.076,8.0,24.0,0.9978,3.48,0.53,9.0,5,89
|
||||
7.9,0.52,0.26,1.9,0.079,42.0,140.0,0.9964,3.23,0.54,9.5,5,90
|
||||
8.6,0.49,0.28,1.9,0.11,20.0,136.0,0.9972,2.93,1.95,9.9,6,91
|
||||
7.7,0.49,0.26,1.9,0.062,9.0,31.0,0.9966,3.39,0.64,9.6,5,93
|
||||
5.0,1.02,0.04,1.4,0.045,41.0,85.0,0.9938,3.75,0.48,10.5,4,94
|
||||
6.8,0.775,0.0,3.0,0.102,8.0,23.0,0.9965,3.45,0.56,10.7,5,96
|
||||
7.6,0.9,0.06,2.5,0.079,5.0,10.0,0.9967,3.39,0.56,9.8,5,98
|
||||
8.1,0.545,0.18,1.9,0.08,13.0,35.0,0.9972,3.3,0.59,9.0,6,99
|
||||
8.3,0.61,0.3,2.1,0.084,11.0,50.0,0.9972,3.4,0.61,10.2,6,100
|
||||
8.1,0.545,0.18,1.9,0.08,13.0,35.0,0.9972,3.3,0.59,9.0,6,102
|
||||
8.1,0.575,0.22,2.1,0.077,12.0,65.0,0.9967,3.29,0.51,9.2,5,103
|
||||
7.2,0.49,0.24,2.2,0.07,5.0,36.0,0.996,3.33,0.48,9.4,5,104
|
||||
8.1,0.575,0.22,2.1,0.077,12.0,65.0,0.9967,3.29,0.51,9.2,5,105
|
||||
7.8,0.41,0.68,1.7,0.467,18.0,69.0,0.9973,3.08,1.31,9.3,5,106
|
||||
6.2,0.63,0.31,1.7,0.08800000000000001,15.0,64.0,0.9969,3.46,0.79,9.3,5,107
|
||||
7.8,0.56,0.19,1.8,0.10400000000000001,12.0,47.0,0.9964,3.19,0.93,9.5,5,110
|
||||
8.4,0.62,0.09,2.2,0.084,11.0,108.0,0.9964,3.15,0.66,9.8,5,111
|
||||
10.1,0.31,0.44,2.3,0.08,22.0,46.0,0.9988,3.32,0.67,9.7,6,113
|
||||
7.8,0.56,0.19,1.8,0.10400000000000001,12.0,47.0,0.9964,3.19,0.93,9.5,5,114
|
||||
9.4,0.4,0.31,2.2,0.09,13.0,62.0,0.9966,3.07,0.63,10.5,6,115
|
||||
8.3,0.54,0.28,1.9,0.077,11.0,40.0,0.9978,3.39,0.61,10.0,6,116
|
||||
7.3,1.07,0.09,1.7,0.17800000000000002,10.0,89.0,0.9962,3.3,0.57,9.0,5,120
|
||||
8.8,0.55,0.04,2.2,0.11900000000000001,14.0,56.0,0.9962,3.21,0.6,10.9,6,121
|
||||
7.3,0.695,0.0,2.5,0.075,3.0,13.0,0.998,3.49,0.52,9.2,5,122
|
||||
7.8,0.5,0.17,1.6,0.08199999999999999,21.0,102.0,0.996,3.39,0.48,9.5,5,124
|
||||
8.2,1.33,0.0,1.7,0.081,3.0,12.0,0.9964,3.53,0.49,10.9,5,126
|
||||
8.1,1.33,0.0,1.8,0.08199999999999999,3.0,12.0,0.9964,3.54,0.48,10.9,5,127
|
||||
8.0,0.59,0.16,1.8,0.065,3.0,16.0,0.9962,3.42,0.92,10.5,7,128
|
||||
8.0,0.745,0.56,2.0,0.11800000000000001,30.0,134.0,0.9968,3.24,0.66,9.4,5,130
|
||||
5.6,0.5,0.09,2.3,0.049,17.0,99.0,0.9937,3.63,0.63,13.0,5,131
|
||||
7.9,1.04,0.05,2.2,0.084,13.0,29.0,0.9959,3.22,0.55,9.9,6,134
|
||||
8.4,0.745,0.11,1.9,0.09,16.0,63.0,0.9965,3.19,0.82,9.6,5,135
|
||||
7.2,0.415,0.36,2.0,0.081,13.0,45.0,0.9972,3.48,0.64,9.2,5,137
|
||||
8.4,0.745,0.11,1.9,0.09,16.0,63.0,0.9965,3.19,0.82,9.6,5,140
|
||||
5.2,0.34,0.0,1.8,0.05,27.0,63.0,0.9916,3.68,0.79,14.0,6,142
|
||||
6.3,0.39,0.08,1.7,0.066,3.0,20.0,0.9954,3.34,0.58,9.4,5,143
|
||||
5.2,0.34,0.0,1.8,0.05,27.0,63.0,0.9916,3.68,0.79,14.0,6,144
|
||||
8.1,0.67,0.55,1.8,0.11699999999999999,32.0,141.0,0.9968,3.17,0.62,9.4,5,145
|
||||
5.8,0.68,0.02,1.8,0.087,21.0,94.0,0.9944,3.54,0.52,10.0,5,146
|
||||
6.9,0.49,0.1,2.3,0.07400000000000001,12.0,30.0,0.9959,3.42,0.58,10.2,6,148
|
||||
7.3,0.33,0.47,2.1,0.077,5.0,11.0,0.9958,3.33,0.53,10.3,6,150
|
||||
9.2,0.52,1.0,3.4,0.61,32.0,69.0,0.9996,2.74,2.0,9.4,4,151
|
||||
7.5,0.6,0.03,1.8,0.095,25.0,99.0,0.995,3.35,0.54,10.1,5,152
|
||||
7.5,0.6,0.03,1.8,0.095,25.0,99.0,0.995,3.35,0.54,10.1,5,153
|
||||
7.1,0.43,0.42,5.5,0.071,28.0,128.0,0.9973,3.42,0.71,10.5,5,155
|
||||
7.1,0.43,0.42,5.5,0.07,29.0,129.0,0.9973,3.42,0.72,10.5,5,156
|
||||
7.1,0.43,0.42,5.5,0.071,28.0,128.0,0.9973,3.42,0.71,10.5,5,157
|
||||
7.1,0.68,0.0,2.2,0.073,12.0,22.0,0.9969,3.48,0.5,9.3,5,158
|
||||
6.8,0.6,0.18,1.9,0.079,18.0,86.0,0.9968,3.59,0.57,9.3,6,159
|
||||
7.6,0.95,0.03,2.0,0.09,7.0,20.0,0.9959,3.2,0.56,9.6,5,160
|
||||
7.6,0.68,0.02,1.3,0.07200000000000001,9.0,20.0,0.9965,3.17,1.08,9.2,4,161
|
||||
7.8,0.53,0.04,1.7,0.076,17.0,31.0,0.9964,3.33,0.56,10.0,6,162
|
||||
7.4,0.6,0.26,7.3,0.07,36.0,121.0,0.9982,3.37,0.49,9.4,5,163
|
||||
7.3,0.59,0.26,7.2,0.07,35.0,121.0,0.9981,3.37,0.49,9.4,5,164
|
||||
7.8,0.63,0.48,1.7,0.1,14.0,96.0,0.9961,3.19,0.62,9.5,5,165
|
||||
6.8,0.64,0.1,2.1,0.085,18.0,101.0,0.9956,3.34,0.52,10.2,5,166
|
||||
7.3,0.55,0.03,1.6,0.07200000000000001,17.0,42.0,0.9956,3.37,0.48,9.0,4,167
|
||||
6.8,0.63,0.07,2.1,0.08900000000000001,11.0,44.0,0.9953,3.47,0.55,10.4,6,168
|
||||
7.9,0.885,0.03,1.8,0.057999999999999996,4.0,8.0,0.9972,3.36,0.33,9.1,4,170
|
||||
8.0,0.42,0.17,2.0,0.073,6.0,18.0,0.9972,3.29,0.61,9.2,6,172
|
||||
7.4,0.62,0.05,1.9,0.068,24.0,42.0,0.9961,3.42,0.57,11.5,6,173
|
||||
6.9,0.5,0.04,1.5,0.085,19.0,49.0,0.9958,3.35,0.78,9.5,5,175
|
||||
7.3,0.38,0.21,2.0,0.08,7.0,35.0,0.9961,3.33,0.47,9.5,5,176
|
||||
7.5,0.52,0.42,2.3,0.087,8.0,38.0,0.9972,3.58,0.61,10.5,6,177
|
||||
7.0,0.805,0.0,2.5,0.068,7.0,20.0,0.9969,3.48,0.56,9.6,5,178
|
||||
8.8,0.61,0.14,2.4,0.067,10.0,42.0,0.9969,3.19,0.59,9.5,5,179
|
||||
8.8,0.61,0.14,2.4,0.067,10.0,42.0,0.9969,3.19,0.59,9.5,5,180
|
||||
8.9,0.61,0.49,2.0,0.27,23.0,110.0,0.9972,3.12,1.02,9.3,5,181
|
||||
7.2,0.73,0.02,2.5,0.076,16.0,42.0,0.9972,3.44,0.52,9.3,5,182
|
||||
6.8,0.61,0.2,1.8,0.077,11.0,65.0,0.9971,3.54,0.58,9.3,5,183
|
||||
6.7,0.62,0.21,1.9,0.079,8.0,62.0,0.997,3.52,0.58,9.3,6,184
|
||||
8.9,0.31,0.57,2.0,0.111,26.0,85.0,0.9971,3.26,0.53,9.7,5,185
|
||||
7.4,0.39,0.48,2.0,0.08199999999999999,14.0,67.0,0.9972,3.34,0.55,9.2,5,186
|
||||
7.9,0.5,0.33,2.0,0.084,15.0,143.0,0.9968,3.2,0.55,9.5,5,188
|
||||
8.2,0.5,0.35,2.9,0.077,21.0,127.0,0.9976,3.23,0.62,9.4,5,190
|
||||
6.4,0.37,0.25,1.9,0.07400000000000001,21.0,49.0,0.9974,3.57,0.62,9.8,6,191
|
||||
7.6,0.55,0.21,2.2,0.071,7.0,28.0,0.9964,3.28,0.55,9.7,5,193
|
||||
7.6,0.55,0.21,2.2,0.071,7.0,28.0,0.9964,3.28,0.55,9.7,5,194
|
||||
7.3,0.58,0.3,2.4,0.07400000000000001,15.0,55.0,0.9968,3.46,0.59,10.2,5,196
|
||||
11.5,0.3,0.6,2.0,0.067,12.0,27.0,0.9981,3.11,0.97,10.1,6,197
|
||||
6.9,1.09,0.06,2.1,0.061,12.0,31.0,0.9948,3.51,0.43,11.4,4,199
|
||||
9.6,0.32,0.47,1.4,0.055999999999999994,9.0,24.0,0.99695,3.22,0.82,10.3,7,200
|
||||
7.0,0.43,0.36,1.6,0.08900000000000001,14.0,37.0,0.99615,3.34,0.56,9.2,6,204
|
||||
12.8,0.3,0.74,2.6,0.095,9.0,28.0,0.9994,3.2,0.77,10.8,7,205
|
||||
12.8,0.3,0.74,2.6,0.095,9.0,28.0,0.9994,3.2,0.77,10.8,7,206
|
||||
7.8,0.44,0.28,2.7,0.1,18.0,95.0,0.9966,3.22,0.67,9.4,5,208
|
||||
9.7,0.53,0.6,2.0,0.039,5.0,19.0,0.99585,3.3,0.86,12.4,6,210
|
||||
8.0,0.725,0.24,2.8,0.083,10.0,62.0,0.99685,3.35,0.56,10.0,6,211
|
||||
8.2,0.57,0.26,2.2,0.06,28.0,65.0,0.9959,3.3,0.43,10.1,5,213
|
||||
7.8,0.735,0.08,2.4,0.092,10.0,41.0,0.9974,3.24,0.71,9.8,6,214
|
||||
7.0,0.49,0.49,5.6,0.06,26.0,121.0,0.9974,3.34,0.76,10.5,5,215
|
||||
8.7,0.625,0.16,2.0,0.10099999999999999,13.0,49.0,0.9962,3.14,0.57,11.0,5,216
|
||||
8.1,0.725,0.22,2.2,0.07200000000000001,11.0,41.0,0.9967,3.36,0.55,9.1,5,217
|
||||
7.5,0.49,0.19,1.9,0.076,10.0,44.0,0.9957,3.39,0.54,9.7,5,218
|
||||
7.8,0.34,0.37,2.0,0.08199999999999999,24.0,58.0,0.9964,3.34,0.59,9.4,6,220
|
||||
7.4,0.53,0.26,2.0,0.10099999999999999,16.0,72.0,0.9957,3.15,0.57,9.4,5,221
|
||||
|
@@ -0,0 +1,292 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "63356928",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Initial Note\n",
|
||||
"After running experiments in Colab using open-source models from Hugging Face, I decided to do the exercise with OpenAI. The reason is that Llama 3.2 frequently did not follow the prompts correctly, leading to inconsistencies and poor performance. Additionally, using larger models significantly increased processing time, making them less practical for this task.\n",
|
||||
"\n",
|
||||
"The code from this notebook will be reorganized in modules for the final Demo."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c12f081",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Module to generate syntethic data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2389d798",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"import re \n",
|
||||
"\n",
|
||||
"def _clean_json_output(raw_text: str) -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" Limpia la salida de OpenAI para convertirla en JSON válido:\n",
|
||||
" - Mantiene las comillas de claves sin tocar.\n",
|
||||
" - Escapa solo las comillas dobles dentro de los strings de valores.\n",
|
||||
" - Escapa \\n, \\r, \\t.\n",
|
||||
" - Remueve code fences y HTML.\n",
|
||||
" - Asegura que el array comience con [ y termine con ].\n",
|
||||
" - Elimina comas finales.\n",
|
||||
" \"\"\"\n",
|
||||
" text = raw_text.strip()\n",
|
||||
" \n",
|
||||
" # Remover code fences y HTML\n",
|
||||
" text = re.sub(r\"```(?:json)?\", \"\", text)\n",
|
||||
" text = re.sub(r\"</?[^>]+>\", \"\", text)\n",
|
||||
" \n",
|
||||
" # Escapar comillas dobles dentro de valores de Comment\n",
|
||||
" def escape_quotes_in_values(match):\n",
|
||||
" value = match.group(1)\n",
|
||||
" value = value.replace('\"', r'\\\"') # solo dentro del valor\n",
|
||||
" value = value.replace('\\n', r'\\n').replace('\\r', r'\\r').replace('\\t', r'\\t')\n",
|
||||
" return f'\"{value}\"'\n",
|
||||
" \n",
|
||||
" text = re.sub(r'\"(.*?)\"', escape_quotes_in_values, text)\n",
|
||||
" \n",
|
||||
" # Asegurar que empieza y termina con []\n",
|
||||
" if not text.startswith('['):\n",
|
||||
" text = '[' + text\n",
|
||||
" if not text.endswith(']'):\n",
|
||||
" text += ']'\n",
|
||||
" \n",
|
||||
" # Eliminar comas finales antes de cerrar corchetes\n",
|
||||
" text = re.sub(r',\\s*]', ']', text)\n",
|
||||
" \n",
|
||||
" return text\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75bfad6f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import json\n",
|
||||
"import openai\n",
|
||||
"import tempfile\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def generate_synthetic_data_openai(\n",
|
||||
" system_prompt: str,\n",
|
||||
" user_prompt: str,\n",
|
||||
" reference_file=None,\n",
|
||||
" openai_model=\"gpt-4o-mini\",\n",
|
||||
" max_tokens=2048,\n",
|
||||
" temperature=0.0\n",
|
||||
"):\n",
|
||||
" \"\"\"\n",
|
||||
" Genera datos sintéticos y devuelve el DataFrame y la ruta de un CSV temporal.\n",
|
||||
" \"\"\"\n",
|
||||
" # Preparar prompt completo\n",
|
||||
" if reference_file:\n",
|
||||
" if isinstance(reference_file, str):\n",
|
||||
" df_ref = pd.read_csv(reference_file)\n",
|
||||
" else:\n",
|
||||
" df_ref = pd.read_csv(reference_file)\n",
|
||||
" reference_data = df_ref.to_dict(orient=\"records\")\n",
|
||||
" user_prompt_full = (\n",
|
||||
" f\"{user_prompt}\\nFollow the structure and distribution of the reference data, \"\n",
|
||||
" f\"but do NOT copy any exact values:\\n{reference_data}\"\n",
|
||||
" )\n",
|
||||
" else:\n",
|
||||
" user_prompt_full = user_prompt\n",
|
||||
"\n",
|
||||
" # Llamar a OpenAI\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=openai_model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt_full},\n",
|
||||
" ],\n",
|
||||
" temperature=temperature,\n",
|
||||
" max_tokens=max_tokens,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" raw_text = response.choices[0].message.content\n",
|
||||
" cleaned_json = _clean_json_output(raw_text)\n",
|
||||
"\n",
|
||||
" # Parsear JSON\n",
|
||||
" try:\n",
|
||||
" data = json.loads(cleaned_json)\n",
|
||||
" except json.JSONDecodeError as e:\n",
|
||||
" raise ValueError(f\"JSON inválido generado. Error: {e}\\nOutput truncado: {cleaned_json[:500]}\")\n",
|
||||
"\n",
|
||||
" df = pd.DataFrame(data)\n",
|
||||
"\n",
|
||||
" # Guardar CSV temporal\n",
|
||||
" tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=\".csv\")\n",
|
||||
" df.to_csv(tmp_file.name, index=False)\n",
|
||||
" tmp_file.close()\n",
|
||||
"\n",
|
||||
" return df, tmp_file.name\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91af1eb5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Default prompts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "792d1555",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"SYSTEM_PROMPT = \"\"\"\n",
|
||||
"You are a precise synthetic data generator. Your only task is to output valid JSON arrays of dictionaries.\n",
|
||||
"\n",
|
||||
"Rules:\n",
|
||||
"1. Output a single JSON array starting with '[' and ending with ']'.\n",
|
||||
"2. Do not include markdown, code fences, or explanatory text — only the JSON.\n",
|
||||
"3. Keep all columns exactly as specified; do not add or remove fields (index must be omitted).\n",
|
||||
"4. Respect data types: text, number, date, boolean, etc.\n",
|
||||
"5. Ensure internal consistency and realistic variation.\n",
|
||||
"6. If a reference table is provided, generate data with similar statistical distributions for numerical and categorical variables, \n",
|
||||
" but never copy exact rows. Each row must be independent and new.\n",
|
||||
"7. For personal information (names, ages, addresses, IDs), ensure diversity and realism — individual values may be reused to maintain realism, \n",
|
||||
" but never reuse or slightly modify entire reference rows.\n",
|
||||
"8. Escape all internal double quotes in strings with a backslash (\\\").\n",
|
||||
"9. Replace any single quotes in strings with double quotes.\n",
|
||||
"10. Escape newline (\\n), tab (\\t), or carriage return (\\r) characters as \\\\n, \\\\t, \\\\r inside strings.\n",
|
||||
"11. Remove any trailing commas before closing brackets.\n",
|
||||
"12. Do not include any reference data or notes about it in the output.\n",
|
||||
"13. The output must always be valid JSON parseable by standard JSON parsers.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"USER_PROMPT = \"\"\"\n",
|
||||
"Generate exactly 15 rows of synthetic data following all the rules above. \n",
|
||||
"Ensure that all strings are safe for JSON parsing and ready to convert to a pandas DataFrame.\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f9331fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d38f0afb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For testing our generator, we use the first 50 examples of reddit gaming comments with sentiments dataset.\n",
|
||||
"Source: https://www.kaggle.com/datasets/sainitishmitta04/23k-reddit-gaming-comments-with-sentiments-dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "78d94faa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"df, _ = generate_synthetic_data_openai(SYSTEM_PROMPT, USER_PROMPT, reference_file= \"data/sentiment_reference.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e6b5ebb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "015a3110",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(df.Comment[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ef44876",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gradio Demo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa4092f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gradio as gr\n",
|
||||
"\n",
|
||||
"with gr.Blocks() as demo:\n",
|
||||
" gr.Markdown(\"# 🧠 Synthetic Data Generator\")\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" system_prompt_input = gr.Textbox(label=\"System Prompt\", value=SYSTEM_PROMPT, lines=10)\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" user_prompt_input = gr.Textbox(label=\"User Prompt\", value=USER_PROMPT, lines=5)\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" reference_input = gr.File(label=\"Reference CSV (optional)\", file_types=[\".csv\"])\n",
|
||||
"\n",
|
||||
" output_df = gr.DataFrame(label=\"Generated Data\")\n",
|
||||
" download_csv = gr.File(label=\"Download CSV\")\n",
|
||||
"\n",
|
||||
" generate_btn = gr.Button(\"🚀 Generate Data\")\n",
|
||||
"\n",
|
||||
" generate_btn.click(\n",
|
||||
" fn=generate_synthetic_data_openai,\n",
|
||||
" inputs=[system_prompt_input, user_prompt_input, reference_input],\n",
|
||||
" outputs=[output_df, download_csv]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"demo.launch(debug=True)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
[project]
|
||||
name = "synthetic-data"
|
||||
version = "0.1.0"
|
||||
description = "An intelligent synthetic data generator using OpenAI models"
|
||||
authors = [
|
||||
{ name = "Sebastian Rodriguez" }
|
||||
]
|
||||
dependencies = [
|
||||
"gradio>=5.49.1",
|
||||
"openai>=2.6.0",
|
||||
"pandas>=2.3.3",
|
||||
"python-dotenv>=1.0.0",
|
||||
"numpy>=1.24.0",
|
||||
"matplotlib>=3.7.0",
|
||||
"seaborn>=0.13.0"
|
||||
]
|
||||
@@ -0,0 +1,10 @@
|
||||
# Core dependencies
|
||||
gradio>=5.49.1
|
||||
openai>=2.6.0
|
||||
pandas>=2.3.3
|
||||
python-dotenv>=1.0.0
|
||||
|
||||
# Evaluation dependencies
|
||||
numpy>=1.24.0
|
||||
matplotlib>=3.7.0
|
||||
seaborn>=0.13.0
|
||||
@@ -0,0 +1,13 @@
|
||||
import os
|
||||
import glob
|
||||
|
||||
def cleanup_temp_files(temp_dir: str):
|
||||
"""
|
||||
Remove all temporary files from the given directory.
|
||||
"""
|
||||
files = glob.glob(os.path.join(temp_dir, "*"))
|
||||
for f in files:
|
||||
try:
|
||||
os.remove(f)
|
||||
except Exception as e:
|
||||
print(f"[Warning] Could not delete {f}: {e}")
|
||||
@@ -0,0 +1,45 @@
|
||||
# -------------------Setup Constants -------------------
|
||||
N_REFERENCE_ROWS = 64 # Max reference rows per batch for sampling
|
||||
MAX_TOKENS_MODEL = 128_000 # Max tokens supported by the model, used for batching computations
|
||||
PROJECT_TEMP_DIR = "temp_plots"
|
||||
|
||||
|
||||
|
||||
#----------------- Prompts-------------------------------
|
||||
SYSTEM_PROMPT = """
|
||||
You are a precise synthetic data generator. Your only task is to output valid JSON arrays of dictionaries.
|
||||
|
||||
Rules:
|
||||
1. Output a single JSON array starting with '[' and ending with ']'.
|
||||
2. Do not include markdown, code fences, or explanatory text — only the JSON.
|
||||
3. Keep all columns exactly as specified; do not add or remove fields (index must be omitted).
|
||||
4. Respect data types: text, number, date, boolean, etc.
|
||||
5. Ensure internal consistency and realistic variation.
|
||||
6. If a reference table is provided, generate data with similar statistical distributions for numerical and categorical variables,
|
||||
but never copy exact rows. Each row must be independent and new.
|
||||
7. For personal information (names, ages, addresses, IDs), ensure diversity and realism — individual values may be reused to maintain realism,
|
||||
but never reuse or slightly modify entire reference rows.
|
||||
8. Escape internal double quotes in strings with a backslash (") for JSON validity.
|
||||
9. Do NOT replace single quotes in normal text; they should remain as-is.
|
||||
10. Escape newline (
|
||||
), tab ( ), or carriage return (
|
||||
) characters as
|
||||
, ,
|
||||
inside strings.
|
||||
11. Remove any trailing commas before closing brackets.
|
||||
12. Do not include any reference data or notes about it in the output.
|
||||
13. The output must always be valid JSON parseable by standard JSON parsers.
|
||||
14. Don't repeat any exact column neither from the reference or from previous generated data.
|
||||
15. When using reference data, consider the entire dataset for statistical patterns and diversity;
|
||||
do not restrict generation to the first rows or the order of the dataset.
|
||||
16. Introduce slight random variations in numerical values, and choose categorical values randomly according to the distribution,
|
||||
without repeating rows.
|
||||
|
||||
"""
|
||||
|
||||
USER_PROMPT = """
|
||||
Generate exactly 15 rows of synthetic data following all the rules above.
|
||||
Ensure that all strings are safe for JSON parsing and ready to convert to a pandas DataFrame.
|
||||
"""
|
||||
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
from PIL import Image
|
||||
|
||||
from src.constants import MAX_TOKENS_MODEL, N_REFERENCE_ROWS
|
||||
from src.evaluator import SimpleEvaluator
|
||||
from src.helpers import hash_row, sample_reference
|
||||
from src.openai_utils import detect_total_rows_from_prompt, generate_batch
|
||||
|
||||
|
||||
# ------------------- Main Function -------------------
|
||||
def generate_and_evaluate_data(
|
||||
system_prompt: str,
|
||||
user_prompt: str,
|
||||
temp_dir: str,
|
||||
reference_file=None,
|
||||
openai_model: str = "gpt-4o-mini",
|
||||
max_tokens_model: int = MAX_TOKENS_MODEL,
|
||||
n_reference_rows: int = N_REFERENCE_ROWS,
|
||||
):
|
||||
"""
|
||||
Generate synthetic data in batches, evaluate against reference data, and save results.
|
||||
Uses dynamic batching and reference sampling to optimize cost and token usage.
|
||||
"""
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
reference_df = pd.read_csv(reference_file) if reference_file else None
|
||||
total_rows = detect_total_rows_from_prompt(user_prompt, openai_model)
|
||||
|
||||
final_df = pd.DataFrame()
|
||||
existing_hashes = set()
|
||||
rows_left = total_rows
|
||||
iteration = 0
|
||||
|
||||
print(f"[Info] Total rows requested: {total_rows}")
|
||||
|
||||
# Estimate tokens for the prompt by adding system, user and sample (used once per batch)
|
||||
prompt_sample = f"{system_prompt} {user_prompt} {sample_reference(reference_df, n_reference_rows)}"
|
||||
prompt_tokens = max(1, len(prompt_sample) // 4)
|
||||
|
||||
# Estimate tokens per row dynamically using a sample
|
||||
example_sample = sample_reference(reference_df, n_reference_rows)
|
||||
if example_sample is not None and len(example_sample) > 0:
|
||||
sample_text = str(example_sample)
|
||||
tokens_per_row = max(1, len(sample_text) // len(example_sample) // 4)
|
||||
else:
|
||||
tokens_per_row = 30 # fallback if no reference
|
||||
|
||||
print(f"[Info] Tokens per row estimate: {tokens_per_row}, Prompt tokens: {prompt_tokens}")
|
||||
|
||||
# ---------------- Batch Generation Loop ----------------
|
||||
while rows_left > 0:
|
||||
iteration += 1
|
||||
batch_sample = sample_reference(reference_df, n_reference_rows)
|
||||
batch_size = min(rows_left, max(1, (max_tokens_model - prompt_tokens) // tokens_per_row))
|
||||
print(f"[Batch {iteration}] Batch size: {batch_size}, Rows left: {rows_left}")
|
||||
|
||||
try:
|
||||
df_batch = generate_batch(
|
||||
system_prompt, user_prompt, batch_sample, batch_size, openai_model
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[Error] Batch {iteration} failed: {e}")
|
||||
break
|
||||
|
||||
# Filter duplicates using hash
|
||||
new_rows = [
|
||||
row
|
||||
for _, row in df_batch.iterrows()
|
||||
if hash_row(row) not in existing_hashes
|
||||
]
|
||||
for row in new_rows:
|
||||
existing_hashes.add(hash_row(row))
|
||||
|
||||
final_df = pd.concat([final_df, pd.DataFrame(new_rows)], ignore_index=True)
|
||||
rows_left = total_rows - len(final_df)
|
||||
print(
|
||||
f"[Batch {iteration}] Unique new rows added: {len(new_rows)}, Total so far: {len(final_df)}"
|
||||
)
|
||||
|
||||
if len(new_rows) == 0:
|
||||
print("[Warning] No new unique rows. Stopping batches.")
|
||||
break
|
||||
|
||||
# ---------------- Evaluation ----------------
|
||||
report_df, vis_dict = pd.DataFrame(), {}
|
||||
if reference_df is not None and not final_df.empty:
|
||||
evaluator = SimpleEvaluator(temp_dir=temp_dir)
|
||||
evaluator.evaluate(reference_df, final_df)
|
||||
report_df = evaluator.results_as_dataframe()
|
||||
vis_dict = evaluator.create_visualizations_temp_dict(reference_df, final_df)
|
||||
print(f"[Info] Evaluation complete. Report shape: {report_df.shape}")
|
||||
|
||||
# ---------------- Collect Images ----------------
|
||||
all_images: List[Image.Image] = []
|
||||
for imgs in vis_dict.values():
|
||||
if isinstance(imgs, list):
|
||||
all_images.extend([img for img in imgs if img is not None])
|
||||
|
||||
# ---------------- Save CSV ----------------
|
||||
final_csv_path = os.path.join(temp_dir, "synthetic_data.csv")
|
||||
final_df.to_csv(final_csv_path, index=False)
|
||||
print(f"[Done] Generated {len(final_df)} rows → saved to {final_csv_path}")
|
||||
|
||||
generated_state = {}
|
||||
|
||||
return final_df, final_csv_path, report_df, generated_state, all_images
|
||||
@@ -0,0 +1,142 @@
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
from typing import List, Dict, Any, Optional
|
||||
from PIL import Image
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
class SimpleEvaluator:
|
||||
"""
|
||||
Evaluates synthetic data against a reference dataset, providing summary statistics and visualizations.
|
||||
"""
|
||||
|
||||
def __init__(self, temp_dir: str = "temp_plots"):
|
||||
"""
|
||||
Initialize the evaluator.
|
||||
|
||||
Args:
|
||||
temp_dir (str): Directory to save temporary plot images.
|
||||
"""
|
||||
self.temp_dir = temp_dir
|
||||
os.makedirs(self.temp_dir, exist_ok=True)
|
||||
|
||||
def evaluate(self, reference_df: pd.DataFrame, generated_df: pd.DataFrame) -> Dict[str, Any]:
|
||||
"""
|
||||
Compare numerical and categorical columns between reference and generated datasets.
|
||||
"""
|
||||
self.results: Dict[str, Any] = {}
|
||||
self.common_cols = list(set(reference_df.columns) & set(generated_df.columns))
|
||||
|
||||
for col in self.common_cols:
|
||||
if pd.api.types.is_numeric_dtype(reference_df[col]):
|
||||
self.results[col] = {
|
||||
"type": "numerical",
|
||||
"ref_mean": reference_df[col].mean(),
|
||||
"gen_mean": generated_df[col].mean(),
|
||||
"mean_diff": generated_df[col].mean() - reference_df[col].mean(),
|
||||
"ref_std": reference_df[col].std(),
|
||||
"gen_std": generated_df[col].std(),
|
||||
"std_diff": generated_df[col].std() - reference_df[col].std(),
|
||||
}
|
||||
else:
|
||||
ref_counts = reference_df[col].value_counts(normalize=True)
|
||||
gen_counts = generated_df[col].value_counts(normalize=True)
|
||||
overlap = sum(min(ref_counts.get(k, 0), gen_counts.get(k, 0)) for k in ref_counts.index)
|
||||
self.results[col] = {
|
||||
"type": "categorical",
|
||||
"distribution_overlap_pct": round(overlap * 100, 2),
|
||||
"ref_unique": len(ref_counts),
|
||||
"gen_unique": len(gen_counts)
|
||||
}
|
||||
|
||||
return self.results
|
||||
|
||||
def results_as_dataframe(self) -> pd.DataFrame:
|
||||
"""
|
||||
Convert the evaluation results into a pandas DataFrame for display.
|
||||
"""
|
||||
rows = []
|
||||
for col, stats in self.results.items():
|
||||
if stats["type"] == "numerical":
|
||||
rows.append({
|
||||
"Column": col,
|
||||
"Type": "Numerical",
|
||||
"Ref Mean/Std": f"{stats['ref_mean']:.2f} / {stats['ref_std']:.2f}",
|
||||
"Gen Mean/Std": f"{stats['gen_mean']:.2f} / {stats['gen_std']:.2f}",
|
||||
"Diff": f"Mean diff: {stats['mean_diff']:.2f}, Std diff: {stats['std_diff']:.2f}"
|
||||
})
|
||||
else:
|
||||
rows.append({
|
||||
"Column": col,
|
||||
"Type": "Categorical",
|
||||
"Ref": f"{stats['ref_unique']} unique",
|
||||
"Gen": f"{stats['gen_unique']} unique",
|
||||
"Diff": f"Overlap: {stats['distribution_overlap_pct']}%"
|
||||
})
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
def create_visualizations_temp_dict(
|
||||
self,
|
||||
reference_df: pd.DataFrame,
|
||||
generated_df: pd.DataFrame,
|
||||
percentage: bool = True
|
||||
) -> Dict[str, List[Optional[Image.Image]]]:
|
||||
"""
|
||||
Create histogram and boxplot visualizations for each column and save them as temporary images.
|
||||
Handles special characters in column names and category labels.
|
||||
"""
|
||||
vis_dict: Dict[str, List[Optional[Image.Image]]] = {}
|
||||
common_cols = list(set(reference_df.columns) & set(generated_df.columns))
|
||||
|
||||
for col in common_cols:
|
||||
col_safe = str(col).replace("_", r"\_").replace("$", r"\$") # Escape special chars
|
||||
|
||||
# ---------------- Histogram ----------------
|
||||
plt.figure(figsize=(6, 4))
|
||||
if pd.api.types.is_numeric_dtype(reference_df[col]):
|
||||
sns.histplot(reference_df[col], color="blue", label="Reference",
|
||||
stat="percent" if percentage else "count", alpha=0.5)
|
||||
sns.histplot(generated_df[col], color="orange", label="Generated",
|
||||
stat="percent" if percentage else "count", alpha=0.5)
|
||||
else: # Categorical
|
||||
ref_counts = reference_df[col].value_counts(normalize=percentage)
|
||||
gen_counts = generated_df[col].value_counts(normalize=percentage)
|
||||
categories = list(set(ref_counts.index) | set(gen_counts.index))
|
||||
categories_safe = [str(cat).replace("_", r"\_").replace("$", r"\$") for cat in categories]
|
||||
ref_vals = [ref_counts.get(cat, 0) for cat in categories]
|
||||
gen_vals = [gen_counts.get(cat, 0) for cat in categories]
|
||||
|
||||
x = range(len(categories))
|
||||
width = 0.4
|
||||
plt.bar([i - width/2 for i in x], ref_vals, width=width, color="blue", alpha=0.7, label="Reference")
|
||||
plt.bar([i + width/2 for i in x], gen_vals, width=width, color="orange", alpha=0.7, label="Generated")
|
||||
plt.xticks(x, categories_safe, rotation=45, ha="right")
|
||||
|
||||
plt.title(f"Histogram comparison for '{col_safe}'", fontsize=12, usetex=False)
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
hist_path = os.path.join(self.temp_dir, f"{col}_hist.png")
|
||||
plt.savefig(hist_path, bbox_inches='tight')
|
||||
plt.close()
|
||||
hist_img = Image.open(hist_path)
|
||||
|
||||
# ---------------- Boxplot (numerical only) ----------------
|
||||
box_img = None
|
||||
if pd.api.types.is_numeric_dtype(reference_df[col]):
|
||||
plt.figure(figsize=(6, 4))
|
||||
df_box = pd.DataFrame({
|
||||
'Value': pd.concat([reference_df[col], generated_df[col]], ignore_index=True),
|
||||
'Dataset': ['Reference']*len(reference_df[col]) + ['Generated']*len(generated_df[col])
|
||||
})
|
||||
|
||||
sns.boxplot(x='Dataset', y='Value', data=df_box, palette=['#1f77b4','#ff7f0e'])
|
||||
plt.title(f"Boxplot comparison for '{col_safe}'", fontsize=12, usetex=False)
|
||||
plt.tight_layout()
|
||||
box_path = os.path.join(self.temp_dir, f"{col}_box.png")
|
||||
plt.savefig(box_path, bbox_inches='tight')
|
||||
plt.close()
|
||||
box_img = Image.open(box_path)
|
||||
|
||||
vis_dict[col] = [hist_img, box_img]
|
||||
|
||||
return vis_dict
|
||||
@@ -0,0 +1,14 @@
|
||||
import hashlib
|
||||
import pandas as pd
|
||||
|
||||
def hash_row(row: pd.Series) -> str:
|
||||
"""Compute MD5 hash for a row to detect duplicates."""
|
||||
return hashlib.md5(str(tuple(row)).encode()).hexdigest()
|
||||
|
||||
|
||||
def sample_reference(reference_df: pd.DataFrame, n_reference_rows: int) -> list:
|
||||
"""Return a fresh sample of reference data for batch generation."""
|
||||
if reference_df is not None and not reference_df.empty:
|
||||
sample_df = reference_df.sample(min(n_reference_rows, len(reference_df)), replace=False)
|
||||
return sample_df.to_dict(orient="records")
|
||||
return []
|
||||
@@ -0,0 +1,112 @@
|
||||
import json
|
||||
import re
|
||||
import tempfile
|
||||
import openai
|
||||
import pandas as pd
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
|
||||
# ------------------ JSON Cleaning ------------------
|
||||
def _clean_json_output(raw_text: str) -> str:
|
||||
"""
|
||||
Cleans raw OpenAI output to produce valid JSON.
|
||||
Escapes only double quotes and control characters.
|
||||
"""
|
||||
text = raw_text.strip()
|
||||
text = re.sub(r"```(?:json)?", "", text)
|
||||
text = re.sub(r"</?[^>]+>", "", text)
|
||||
|
||||
def escape_quotes(match):
|
||||
value = match.group(1)
|
||||
value = value.replace('"', r"\"")
|
||||
value = value.replace("\n", r"\n").replace("\r", r"\r").replace("\t", r"\t")
|
||||
return f'"{value}"'
|
||||
|
||||
text = re.sub(r'"(.*?)"', escape_quotes, text)
|
||||
|
||||
if not text.startswith("["):
|
||||
text = "[" + text
|
||||
if not text.endswith("]"):
|
||||
text += "]"
|
||||
text = re.sub(r",\s*]", "]", text)
|
||||
return text
|
||||
|
||||
|
||||
# ------------------ Synthetic Data Generation ------------------
|
||||
def generate_synthetic_data_openai(
|
||||
system_prompt: str,
|
||||
full_user_prompt: str,
|
||||
openai_model: str = "gpt-4o-mini",
|
||||
max_tokens: int = 16000,
|
||||
temperature: float = 0.0,
|
||||
):
|
||||
"""
|
||||
Generates synthetic tabular data using OpenAI.
|
||||
Assumes `full_user_prompt` is already complete with reference data.
|
||||
"""
|
||||
response = openai.chat.completions.create(
|
||||
model=openai_model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": full_user_prompt},
|
||||
],
|
||||
max_completion_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
)
|
||||
|
||||
raw_text = response.choices[0].message.content
|
||||
cleaned_json = _clean_json_output(raw_text)
|
||||
|
||||
try:
|
||||
data = json.loads(cleaned_json)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(
|
||||
f"Invalid JSON generated. Error: {e}\nTruncated output: {cleaned_json[:500]}"
|
||||
)
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
||||
df.to_csv(tmp_file.name, index=False)
|
||||
tmp_file.close()
|
||||
|
||||
return df, tmp_file.name
|
||||
|
||||
# ----------------------Mini call to detect the number of rows in the prompt--------------
|
||||
def detect_total_rows_from_prompt(user_prompt: str, openai_model: str = "gpt-4o-mini") -> int:
|
||||
"""
|
||||
Detect the number of rows requested from the user prompt.
|
||||
Fallback to 20 if detection fails.
|
||||
"""
|
||||
mini_prompt = f"""
|
||||
Extract the number of rows to generate from this instruction:
|
||||
\"\"\"{user_prompt}\"\"\" Return only the number.
|
||||
"""
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
try:
|
||||
response = openai.chat.completions.create(
|
||||
model=openai_model,
|
||||
messages=[{"role": "user", "content": mini_prompt}],
|
||||
temperature=0,
|
||||
max_tokens=10,
|
||||
)
|
||||
text = response.choices[0].message.content.strip()
|
||||
total_rows = int("".join(filter(str.isdigit, text)))
|
||||
return max(total_rows, 1)
|
||||
except Exception:
|
||||
return 20
|
||||
|
||||
|
||||
# -------------- Function to generate synthetic data in a batch ---------------------
|
||||
def generate_batch(system_prompt: str, user_prompt: str, reference_sample: List[dict],
|
||||
batch_size: int, openai_model: str):
|
||||
"""Generate a single batch of synthetic data using OpenAI."""
|
||||
full_prompt = f"{user_prompt}\nSample: {reference_sample}\nGenerate exactly {batch_size} rows."
|
||||
df_batch, _ = generate_synthetic_data_openai(
|
||||
system_prompt=system_prompt,
|
||||
full_user_prompt=full_prompt,
|
||||
openai_model=openai_model,
|
||||
)
|
||||
return df_batch
|
||||
@@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
|
||||
# -------------------------------
|
||||
# Helper function to display CSV
|
||||
# -------------------------------
|
||||
def display_reference_csv(file):
|
||||
if file is None:
|
||||
return pd.DataFrame()
|
||||
try:
|
||||
df = pd.read_csv(file.name if hasattr(file, "name") else file)
|
||||
return df
|
||||
except Exception as e:
|
||||
return pd.DataFrame({"Error": [str(e)]})
|
||||
@@ -0,0 +1,545 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ffe08bad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import json\n",
|
||||
"from typing import List, Dict\n",
|
||||
"import gradio as gr\n",
|
||||
"import random\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"client = OpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2f24eb03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"LEGAL_TOPIC_SEEDS = [\n",
|
||||
" \"criminal offenses and penalties\",\n",
|
||||
" \"property rights and disputes\",\n",
|
||||
" \"contract law and breach remedies\",\n",
|
||||
" \"civil procedure and court processes\",\n",
|
||||
" \"evidence admissibility rules\",\n",
|
||||
" \"constitutional rights protections\",\n",
|
||||
" \"family law and inheritance\",\n",
|
||||
" \"corporate governance regulations\",\n",
|
||||
" \"intellectual property protections\",\n",
|
||||
" \"cyber crime and digital law\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"QUESTION_TYPES = [\n",
|
||||
" \"definition\",\n",
|
||||
" \"procedure\",\n",
|
||||
" \"penalty\",\n",
|
||||
" \"rights\",\n",
|
||||
" \"obligations\",\n",
|
||||
" \"exceptions\",\n",
|
||||
" \"examples\"\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9256c3ae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SyntheticLegalGenerator:\n",
|
||||
" \"\"\"Generates synthetic legal content and sections\"\"\"\n",
|
||||
" \n",
|
||||
" def __init__(self, client: OpenAI, model: str = \"gpt-4o-mini\"):\n",
|
||||
" self.client = client\n",
|
||||
" self.model = model\n",
|
||||
" \n",
|
||||
" def generate_legal_section(self, topic: str) -> Dict[str, str]:\n",
|
||||
" \"\"\"Generate a completely synthetic legal section\"\"\"\n",
|
||||
" \n",
|
||||
" prompt = f\"\"\"Create a SYNTHETIC (fictional but realistic) Indian legal section about: {topic}\n",
|
||||
"\n",
|
||||
"Generate:\n",
|
||||
"1. A section number (format: IPC XXX or CrPC XXX or IEA XXX)\n",
|
||||
"2. A clear title\n",
|
||||
"3. A detailed legal provision (2-3 sentences)\n",
|
||||
"\n",
|
||||
"Make it realistic but completely fictional. Use legal language.\n",
|
||||
"\n",
|
||||
"Format:\n",
|
||||
"SECTION: [number]\n",
|
||||
"TITLE: [title]\n",
|
||||
"PROVISION: [detailed text]\"\"\"\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" response = self.client.chat.completions.create(\n",
|
||||
" model=self.model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a legal content generator creating synthetic Indian legal provisions for educational purposes.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=0.8,\n",
|
||||
" max_tokens=400\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" content = response.choices[0].message.content.strip()\n",
|
||||
" \n",
|
||||
" # Parse the response\n",
|
||||
" section_num = \"\"\n",
|
||||
" title = \"\"\n",
|
||||
" provision = \"\"\n",
|
||||
" \n",
|
||||
" for line in content.split('\\n'):\n",
|
||||
" if line.startswith('SECTION:'):\n",
|
||||
" section_num = line.replace('SECTION:', '').strip()\n",
|
||||
" elif line.startswith('TITLE:'):\n",
|
||||
" title = line.replace('TITLE:', '').strip()\n",
|
||||
" elif line.startswith('PROVISION:'):\n",
|
||||
" provision = line.replace('PROVISION:', '').strip()\n",
|
||||
" \n",
|
||||
" return {\n",
|
||||
" \"section_number\": section_num,\n",
|
||||
" \"title\": title,\n",
|
||||
" \"provision\": provision,\n",
|
||||
" \"topic\": topic\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error generating section: {e}\")\n",
|
||||
" return {\n",
|
||||
" \"section_number\": \"IPC 000\",\n",
|
||||
" \"title\": \"Error\",\n",
|
||||
" \"provision\": f\"Failed to generate: {e}\",\n",
|
||||
" \"topic\": topic\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "32be3d52",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SyntheticQAGenerator:\n",
|
||||
" \"\"\"Generates Q&A pairs from synthetic legal sections\"\"\"\n",
|
||||
" \n",
|
||||
" def __init__(self, client: OpenAI, model: str = \"gpt-4o-mini\"):\n",
|
||||
" self.client = client\n",
|
||||
" self.model = model\n",
|
||||
" \n",
|
||||
" def generate_qa_pair(self, legal_section: Dict[str, str], question_type: str) -> Dict[str, str]:\n",
|
||||
" \"\"\"Generate Q&A pair from synthetic legal section\"\"\"\n",
|
||||
" \n",
|
||||
" prompt = f\"\"\"Based on this SYNTHETIC legal section, create a {question_type}-type question and answer:\n",
|
||||
"\n",
|
||||
"Section: {legal_section['section_number']}\n",
|
||||
"Title: {legal_section['title']}\n",
|
||||
"Provision: {legal_section['provision']}\n",
|
||||
"\n",
|
||||
"Create ONE question (focusing on {question_type}) and a clear, accurate answer based on this provision.\n",
|
||||
"\n",
|
||||
"Format:\n",
|
||||
"Q: [question]\n",
|
||||
"A: [answer]\n",
|
||||
"\n",
|
||||
"Keep it educational and clear.\"\"\"\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" response = self.client.chat.completions.create(\n",
|
||||
" model=self.model,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are creating educational Q&A pairs from synthetic legal content.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=350\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" content = response.choices[0].message.content.strip()\n",
|
||||
" \n",
|
||||
" # Parse Q&A\n",
|
||||
" question = \"\"\n",
|
||||
" answer = \"\"\n",
|
||||
" \n",
|
||||
" for line in content.split('\\n'):\n",
|
||||
" if line.startswith('Q:'):\n",
|
||||
" question = line[2:].strip()\n",
|
||||
" elif line.startswith('A:'):\n",
|
||||
" answer = line[2:].strip()\n",
|
||||
" \n",
|
||||
" return {\n",
|
||||
" \"section_number\": legal_section['section_number'],\n",
|
||||
" \"section_title\": legal_section['title'],\n",
|
||||
" \"provision\": legal_section['provision'],\n",
|
||||
" \"question_type\": question_type,\n",
|
||||
" \"question\": question,\n",
|
||||
" \"answer\": answer\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error generating Q&A: {e}\")\n",
|
||||
" return {\n",
|
||||
" \"section_number\": legal_section['section_number'],\n",
|
||||
" \"section_title\": legal_section['title'],\n",
|
||||
" \"provision\": legal_section['provision'],\n",
|
||||
" \"question_type\": question_type,\n",
|
||||
" \"question\": \"Error generating question\",\n",
|
||||
" \"answer\": \"Error generating answer\"\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fe88708f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class SyntheticDataPipeline:\n",
|
||||
" \"\"\"Complete pipeline for synthetic legal Q&A generation\"\"\"\n",
|
||||
" \n",
|
||||
" def __init__(self, legal_gen: SyntheticLegalGenerator, qa_gen: SyntheticQAGenerator):\n",
|
||||
" self.legal_gen = legal_gen\n",
|
||||
" self.qa_gen = qa_gen\n",
|
||||
" self.dataset: List[Dict[str, str]] = []\n",
|
||||
" \n",
|
||||
" def generate_complete_entry(self, topic: str = None, question_type: str = None) -> Dict[str, str]:\n",
|
||||
" \"\"\"Generate synthetic legal section + Q&A in one go\"\"\"\n",
|
||||
" \n",
|
||||
" # Pick random topic if not provided\n",
|
||||
" if topic is None:\n",
|
||||
" topic = random.choice(LEGAL_TOPIC_SEEDS)\n",
|
||||
" \n",
|
||||
" # Pick random question type if not provided\n",
|
||||
" if question_type is None:\n",
|
||||
" question_type = random.choice(QUESTION_TYPES)\n",
|
||||
" \n",
|
||||
" # Step 1: Generate synthetic legal section\n",
|
||||
" legal_section = self.legal_gen.generate_legal_section(topic)\n",
|
||||
" \n",
|
||||
" # Step 2: Generate Q&A from that section\n",
|
||||
" qa_pair = self.qa_gen.generate_qa_pair(legal_section, question_type)\n",
|
||||
" \n",
|
||||
" return qa_pair\n",
|
||||
" \n",
|
||||
" def generate_batch(self, count: int, progress_callback=None) -> List[Dict[str, str]]:\n",
|
||||
" \"\"\"Generate multiple synthetic entries\"\"\"\n",
|
||||
" batch = []\n",
|
||||
" \n",
|
||||
" for i in range(count):\n",
|
||||
" if progress_callback:\n",
|
||||
" progress_callback((i + 1) / count, desc=f\"Generating {i+1}/{count}...\")\n",
|
||||
" \n",
|
||||
" entry = self.generate_complete_entry()\n",
|
||||
" batch.append(entry)\n",
|
||||
" self.dataset.append(entry)\n",
|
||||
" \n",
|
||||
" return batch\n",
|
||||
" \n",
|
||||
" def save_dataset(self, filename: str = \"synthetic_legal_qa.json\") -> str:\n",
|
||||
" \"\"\"Save dataset to JSON\"\"\"\n",
|
||||
" try:\n",
|
||||
" with open(filename, 'w', encoding='utf-8') as f:\n",
|
||||
" json.dump(self.dataset, f, indent=2, ensure_ascii=False)\n",
|
||||
" return f\"✅ Saved {len(self.dataset)} synthetic Q&A pairs to {filename}\"\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"❌ Error saving: {e}\"\n",
|
||||
" \n",
|
||||
" def get_summary(self) -> str:\n",
|
||||
" \"\"\"Get dataset summary\"\"\"\n",
|
||||
" if not self.dataset:\n",
|
||||
" return \"No synthetic data generated yet.\"\n",
|
||||
" \n",
|
||||
" summary = f\"**Total Synthetic Q&A Pairs:** {len(self.dataset)}\\n\\n\"\n",
|
||||
" summary += \"**Topics Covered:**\\n\"\n",
|
||||
" \n",
|
||||
" topics = {}\n",
|
||||
" for entry in self.dataset:\n",
|
||||
" topic = entry.get('section_title', 'Unknown')\n",
|
||||
" topics[topic] = topics.get(topic, 0) + 1\n",
|
||||
" \n",
|
||||
" for topic, count in topics.items():\n",
|
||||
" summary += f\"- {topic}: {count}\\n\"\n",
|
||||
" \n",
|
||||
" return summary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0822c49e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"legal_generator = SyntheticLegalGenerator(client)\n",
|
||||
"qa_generator = SyntheticQAGenerator(client)\n",
|
||||
"pipeline = SyntheticDataPipeline(legal_generator, qa_generator)\n",
|
||||
"\n",
|
||||
"print(\"✅ Synthetic data pipeline initialized!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9b86f15f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cell 8: UI functions with real-time progress updates\n",
|
||||
"def generate_single_synthetic(topic_choice: str, question_type: str, progress=gr.Progress()):\n",
|
||||
" \"\"\"Generate single synthetic entry with real-time updates\"\"\"\n",
|
||||
" \n",
|
||||
" # Step 1: Generate legal section\n",
|
||||
" progress(0.2, desc=\"🔍 Generating synthetic legal section...\")\n",
|
||||
" yield \"⏳ Creating synthetic legal provision...\", pipeline.get_summary()\n",
|
||||
" \n",
|
||||
" legal_section = pipeline.legal_gen.generate_legal_section(topic_choice)\n",
|
||||
" \n",
|
||||
" # Show intermediate result\n",
|
||||
" intermediate = f\"### 📜 Generated Section\\n\\n\"\n",
|
||||
" intermediate += f\"**{legal_section['section_number']}**: {legal_section['title']}\\n\\n\"\n",
|
||||
" intermediate += f\"_{legal_section['provision']}_\\n\\n\"\n",
|
||||
" intermediate += \"⏳ Now generating Q&A pair...\"\n",
|
||||
" \n",
|
||||
" progress(0.5, desc=\"💭 Creating Q&A pair...\")\n",
|
||||
" yield intermediate, pipeline.get_summary()\n",
|
||||
" \n",
|
||||
" # Step 2: Generate Q&A\n",
|
||||
" qa_pair = pipeline.qa_gen.generate_qa_pair(legal_section, question_type)\n",
|
||||
" pipeline.dataset.append(qa_pair)\n",
|
||||
" \n",
|
||||
" progress(0.9, desc=\"✨ Finalizing...\")\n",
|
||||
" \n",
|
||||
" # Final result\n",
|
||||
" result = f\"### 🏛️ {qa_pair['section_number']}: {qa_pair['section_title']}\\n\\n\"\n",
|
||||
" result += f\"**Provision:** {qa_pair['provision']}\\n\\n\"\n",
|
||||
" result += f\"**Question Type:** _{qa_pair['question_type']}_\\n\\n\"\n",
|
||||
" result += f\"**Q:** {qa_pair['question']}\\n\\n\"\n",
|
||||
" result += f\"**A:** {qa_pair['answer']}\\n\\n\"\n",
|
||||
" result += \"---\\n✅ **Added to dataset!**\"\n",
|
||||
" \n",
|
||||
" progress(1.0, desc=\"✅ Complete!\")\n",
|
||||
" yield result, pipeline.get_summary()\n",
|
||||
"\n",
|
||||
"def generate_batch_synthetic(num_pairs: int, progress=gr.Progress()):\n",
|
||||
" \"\"\"Generate batch with live updates after each entry\"\"\"\n",
|
||||
" \n",
|
||||
" results = []\n",
|
||||
" count = int(num_pairs)\n",
|
||||
" \n",
|
||||
" for i in range(count):\n",
|
||||
" # Update progress\n",
|
||||
" progress_pct = (i + 1) / count\n",
|
||||
" progress(progress_pct, desc=f\"🔄 Generating {i+1}/{count}...\")\n",
|
||||
" \n",
|
||||
" # Generate entry\n",
|
||||
" entry = pipeline.generate_complete_entry()\n",
|
||||
" pipeline.dataset.append(entry)\n",
|
||||
" \n",
|
||||
" # Format result\n",
|
||||
" result = f\"### {i+1}. {entry['section_number']}: {entry['section_title']}\\n\"\n",
|
||||
" result += f\"**Q:** {entry['question']}\\n\"\n",
|
||||
" result += f\"**A:** {entry['answer']}\\n\\n\"\n",
|
||||
" results.append(result)\n",
|
||||
" \n",
|
||||
" # Yield intermediate results to update UI in real-time\n",
|
||||
" current_output = \"\".join(results)\n",
|
||||
" current_output += f\"\\n---\\n⏳ **Progress: {i+1}/{count} completed**\"\n",
|
||||
" \n",
|
||||
" yield current_output, pipeline.get_summary()\n",
|
||||
" \n",
|
||||
" # Final output\n",
|
||||
" final_output = \"\".join(results)\n",
|
||||
" final_output += f\"\\n---\\n✅ **All {count} Q&A pairs generated successfully!**\"\n",
|
||||
" \n",
|
||||
" progress(1.0, desc=\"✅ Batch complete!\")\n",
|
||||
" yield final_output, pipeline.get_summary()\n",
|
||||
"\n",
|
||||
"def save_synthetic_dataset():\n",
|
||||
" \"\"\"Save the synthetic dataset\"\"\"\n",
|
||||
" return pipeline.save_dataset()\n",
|
||||
"\n",
|
||||
"def clear_dataset():\n",
|
||||
" \"\"\"Clear the current dataset\"\"\"\n",
|
||||
" pipeline.dataset.clear()\n",
|
||||
" return \"✅ Dataset cleared!\", pipeline.get_summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9d352fec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cell 9: Enhanced UI with real-time updates\n",
|
||||
"with gr.Blocks(title=\"Synthetic Legal Q&A Generator\", theme=gr.themes.Soft()) as demo:\n",
|
||||
" gr.Markdown(\"# 🤖 Synthetic Legal Q&A Data Generator\")\n",
|
||||
" gr.Markdown(\"**Generates completely synthetic Indian legal sections AND Q&A pairs from scratch**\")\n",
|
||||
" gr.Markdown(\"_Watch the magic happen in real-time! 🎬_\")\n",
|
||||
" \n",
|
||||
" with gr.Tab(\"🎯 Single Generation\"):\n",
|
||||
" gr.Markdown(\"### Generate one synthetic legal section with Q&A\")\n",
|
||||
" gr.Markdown(\"_See each step of generation as it happens_\")\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column(scale=1):\n",
|
||||
" topic_dropdown = gr.Dropdown(\n",
|
||||
" choices=LEGAL_TOPIC_SEEDS,\n",
|
||||
" label=\"🎯 Select Legal Topic\",\n",
|
||||
" value=LEGAL_TOPIC_SEEDS[0]\n",
|
||||
" )\n",
|
||||
" qtype_dropdown = gr.Dropdown(\n",
|
||||
" choices=QUESTION_TYPES,\n",
|
||||
" label=\"❓ Question Type\",\n",
|
||||
" value=QUESTION_TYPES[0]\n",
|
||||
" )\n",
|
||||
" gen_single_btn = gr.Button(\n",
|
||||
" \"🎲 Generate Synthetic Entry\", \n",
|
||||
" variant=\"primary\",\n",
|
||||
" size=\"lg\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Column(scale=2):\n",
|
||||
" output_single = gr.Markdown(\n",
|
||||
" label=\"Generated Content\",\n",
|
||||
" value=\"Click **Generate** to create synthetic legal content...\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" summary_single = gr.Textbox(\n",
|
||||
" label=\"📊 Dataset Summary\", \n",
|
||||
" lines=6,\n",
|
||||
" interactive=False\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" gen_single_btn.click(\n",
|
||||
" fn=generate_single_synthetic,\n",
|
||||
" inputs=[topic_dropdown, qtype_dropdown],\n",
|
||||
" outputs=[output_single, summary_single]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Tab(\"🚀 Batch Generation\"):\n",
|
||||
" gr.Markdown(\"### Generate multiple synthetic legal Q&A pairs\")\n",
|
||||
" gr.Markdown(\"_Live updates as each Q&A pair is created!_\")\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column(scale=1):\n",
|
||||
" num_slider = gr.Slider(\n",
|
||||
" minimum=5,\n",
|
||||
" maximum=1000,\n",
|
||||
" value=5,\n",
|
||||
" step=5,\n",
|
||||
" label=\"📦 Number of Synthetic Q&A Pairs\"\n",
|
||||
" )\n",
|
||||
" gr.Markdown(\"**Tip:** Start with 10-20 pairs to see live generation\")\n",
|
||||
" gen_batch_btn = gr.Button(\n",
|
||||
" \"🔥 Generate Batch\", \n",
|
||||
" variant=\"primary\",\n",
|
||||
" size=\"lg\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Column(scale=2):\n",
|
||||
" output_batch = gr.Markdown(\n",
|
||||
" label=\"Generated Synthetic Data\",\n",
|
||||
" value=\"Click **Generate Batch** to start creating multiple Q&A pairs...\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" summary_batch = gr.Textbox(\n",
|
||||
" label=\"📊 Dataset Summary\", \n",
|
||||
" lines=6,\n",
|
||||
" interactive=False\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" gen_batch_btn.click(\n",
|
||||
" fn=generate_batch_synthetic,\n",
|
||||
" inputs=[num_slider],\n",
|
||||
" outputs=[output_batch, summary_batch]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Tab(\"💾 Manage Dataset\"):\n",
|
||||
" gr.Markdown(\"### Save or Clear Your Synthetic Dataset\")\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column():\n",
|
||||
" gr.Markdown(\"**💾 Save your generated data**\")\n",
|
||||
" gr.Markdown(\"Exports all Q&A pairs to `synthetic_legal_qa.json`\")\n",
|
||||
" save_btn = gr.Button(\n",
|
||||
" \"💾 Save to JSON\", \n",
|
||||
" variant=\"primary\",\n",
|
||||
" size=\"lg\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Column():\n",
|
||||
" gr.Markdown(\"**🗑️ Clear current dataset**\")\n",
|
||||
" gr.Markdown(\"⚠️ This will remove all generated Q&A pairs\")\n",
|
||||
" clear_btn = gr.Button(\n",
|
||||
" \"🗑️ Clear Dataset\", \n",
|
||||
" variant=\"stop\",\n",
|
||||
" size=\"lg\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" manage_status = gr.Textbox(\n",
|
||||
" label=\"Status\", \n",
|
||||
" lines=2,\n",
|
||||
" interactive=False\n",
|
||||
" )\n",
|
||||
" manage_summary = gr.Textbox(\n",
|
||||
" label=\"Current Dataset Overview\", \n",
|
||||
" lines=10,\n",
|
||||
" interactive=False,\n",
|
||||
" value=pipeline.get_summary()\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" save_btn.click(\n",
|
||||
" fn=save_synthetic_dataset,\n",
|
||||
" inputs=[],\n",
|
||||
" outputs=[manage_status]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" clear_btn.click(\n",
|
||||
" fn=clear_dataset,\n",
|
||||
" inputs=[],\n",
|
||||
" outputs=[manage_status, manage_summary]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Footer\n",
|
||||
" gr.Markdown(\"---\")\n",
|
||||
" gr.Markdown(\"🎓 **LLM Engineering Week 3** | Synthetic Data Generation Challenge\")\n",
|
||||
"\n",
|
||||
"demo.launch(share=False, inbrowser=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,802 @@
|
||||
[
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Protection of Digital Intellectual Property Rights",
|
||||
"provision": "Whoever, without the authorization of the owner, reproduces, distributes, or publicly displays any digital work, including software, databases, or multimedia content, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to five lakh rupees, or with both. For the purposes of this section, \"digital work\" shall include any creation that exists in a digital format and embodies original intellectual effort. The provisions of this section shall apply in addition to any rights conferred under the Copyright Act, 1957.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of actions that would violate IPC 123A regarding the protection of digital intellectual property rights?",
|
||||
"answer": "Yes, actions that would violate IPC 123A include: 1) Downloading and sharing a copyrighted software program without the owner's permission; 2) Reproducing a digital art piece and selling it online without authorization from the artist; 3) Distributing a database containing proprietary information without consent from the database owner; and 4) Publicly displaying a video or multimedia content on a website or social media platform without obtaining the rights from the creator. All these actions constitute unauthorized reproduction, distribution, or public display of digital works, which are punishable under this provision."
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 102",
|
||||
"section_title": "Inheritance Rights of Unmarried Children",
|
||||
"provision": "In the event of the intestate death of a parent, all unmarried children shall be entitled to an equal share in the estate of the deceased, irrespective of the parentage or domicile of the children. The rights conferred herein shall be enforceable against any individual claiming succession rights to the estate, and no testamentary disposition or familial agreement shall supersede the statutory entitlement outlined in this provision. Furthermore, the provisions of this section shall apply retroactively to all intestate estates, regardless of the date of death of the decedent.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do unmarried children have in the event of an intestate death of a parent according to FLA 102?",
|
||||
"answer": "Unmarried children are entitled to an equal share in the estate of the deceased parent, regardless of their parentage or domicile. This right is enforceable against anyone claiming succession rights to the estate and cannot be overridden by any will or familial agreement. Additionally, this provision applies retroactively to all intestate estates, regardless of when the decedent died."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 120A",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any other provisions of this Act, digital evidence, including but not limited to electronic documents, data stored in digital format, and communications transmitted electronically, shall be admissible in any proceeding before a court provided that the party seeking to introduce such evidence demonstrates its authenticity and relevance. The court may require the party to produce a digital forensic report or certificate from a qualified expert to establish the integrity of the digital evidence in question, ensuring that the evidence has not been tampered with and is a true representation of the original data.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must a party take to ensure the admissibility of digital evidence in court under IEA 120A?",
|
||||
"answer": "To ensure the admissibility of digital evidence in court under IEA 120A, the party seeking to introduce the evidence must demonstrate both its authenticity and relevance. Additionally, the court may require the party to produce a digital forensic report or a certificate from a qualified expert to establish the integrity of the digital evidence, confirming that it has not been tampered with and accurately represents the original data."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 456",
|
||||
"section_title": "Offense of Cyber Intimidation",
|
||||
"provision": "Whoever, with intent to cause harm or distress to any person, uses a computer resource or communication device to send threats, intimidate, or coerce such person through electronic means, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. In the event of repeated offenses, the imprisonment may extend to five years and the fine may be increased to one lakh rupees.",
|
||||
"question_type": "definition",
|
||||
"question": "What constitutes the offense of cyber intimidation under IPC 456?",
|
||||
"answer": "The offense of cyber intimidation under IPC 456 is defined as the act of using a computer resource or communication device to send threats, intimidate, or coerce any person with the intent to cause harm or distress."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 124A",
|
||||
"section_title": "Protection of Original Works of Authorship",
|
||||
"provision": "Any person who, without the consent of the author or creator, reproduces, distributes, or publicly displays an original work of authorship, including but not limited to literary, artistic, musical, and dramatic works, shall be liable for infringement. Such infringement shall be punishable with imprisonment for a term that may extend to three years, or with fine, or with both. This section shall not apply to uses that fall under the doctrine of fair use as defined by the relevant provisions of this Code.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations does a person have regarding the reproduction, distribution, or public display of an original work of authorship under IPC 124A?",
|
||||
"answer": "A person is obligated to obtain the consent of the author or creator before reproducing, distributing, or publicly displaying an original work of authorship. Failure to do so could result in liability for infringement, which may lead to penalties including imprisonment for up to three years, a fine, or both, unless the use falls under the doctrine of fair use as defined by the relevant provisions of the Code."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 500A",
|
||||
"section_title": "Unauthorized Access and Data Manipulation",
|
||||
"provision": "Whoever, without lawful authority, intentionally gains access to any computer resource or computer system and causes alteration, deletion, or addition of data therein, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to one lakh rupees, or with both. For the purposes of this section, \"computer resource\" shall include any data, software, or digital content stored within the device or network, and \"lawful authority\" shall mean permission granted by the owner or authorized custodian of the computer resource.",
|
||||
"question_type": "definition",
|
||||
"question": "What is meant by \"lawful authority\" as defined in IPC 500A regarding unauthorized access to computer resources?",
|
||||
"answer": "\"Lawful authority\" refers to the permission granted by the owner or authorized custodian of the computer resource to access that resource."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 506A",
|
||||
"section_title": "Cyber Harassment",
|
||||
"provision": "Whosoever, by means of electronic communication or any digital platform, intentionally causes physical or mental harm to another person through threats, intimidation, or coercive messaging, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. For the purposes of this section, \"electronic communication\" includes, but is not limited to, text messages, emails, social media interactions, and any other forms of digital messaging.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps should a victim take to file a complaint under IPC 506A for cyber harassment?",
|
||||
"answer": "A victim of cyber harassment under IPC 506A should follow these steps to file a complaint:"
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 102A",
|
||||
"section_title": "Admissibility of Electronic Evidence",
|
||||
"provision": "Notwithstanding the provisions of Section 61 of this Act, electronic evidence shall be admissible in any proceedings before a court provided it is accompanied by a certificate from the producer attesting to its authenticity and integrity, as prescribed under the Information Technology Act, 2000. The court shall assess the credibility of such evidence in accordance with the standards established by the Supreme Court and may require additional corroboration if deemed necessary for the interests of justice. Any objection to the admissibility of electronic evidence shall be raised at the earliest possible stage, failing which the right to contest its admissibility shall be deemed waived.",
|
||||
"question_type": "penalty",
|
||||
"question": "What are the consequences of failing to raise an objection to the admissibility of electronic evidence at the earliest possible stage under IEA 102A?",
|
||||
"answer": "If a party fails to raise an objection to the admissibility of electronic evidence at the earliest possible stage, they will be deemed to have waived their right to contest its admissibility in court. This means that the objection cannot be raised later in the proceedings, potentially impacting the outcome of the case."
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 123",
|
||||
"section_title": "Rights of Inheritance among Lineal Ascendants and Descendants",
|
||||
"provision": "In matters of inheritance, lineal ascendants shall inherit equal shares alongside lineal descendants in the absence of a will. In cases where property is self-acquired, the owner may designate the distribution of their estate; however, such designation shall not infringe upon the statutory rights of the surviving spouse or any children, who shall retain a minimum guaranteed share as prescribed under this Act. In the event of a dispute, such claims shall be adjudicated by the Family Court, taking into consideration the principles of equity and the welfare of all parties involved.",
|
||||
"question_type": "examples",
|
||||
"question": "If a person dies without a will and is survived by their parents and children, how will the inheritance be divided according to FLA 123?",
|
||||
"answer": "According to FLA 123, if a person dies without a will, their lineal ascendants (parents) and lineal descendants (children) will inherit equal shares of the estate. For example, if the estate is worth $120,000 and the deceased is survived by both their parents and children (let's say two children), the estate would be divided equally among them. Each parent would receive $20,000, and each child would also receive $20,000, totaling the estate's value. However, if the deceased had designated a different distribution in a will, it must still respect the minimum guaranteed share for the surviving spouse and children, as mandated by the Act."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Protection of Digital Innovations",
|
||||
"provision": "Any individual or entity that creates an original digital work, including but not limited to software, algorithms, and digital media, shall have the exclusive right to control the reproduction, distribution, and adaptation of such work for a period of ten years from the date of creation, subject to the provisions of fair use as outlined in this Code. Unauthorized use or reproduction of a protected digital innovation shall attract civil penalties, including but not limited to injunctions, damages, and the seizure of infringing materials, as deemed appropriate by the court.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations do individuals or entities have when creating original digital works under IPC 123A?",
|
||||
"answer": "Individuals or entities that create original digital works have the obligation to control the reproduction, distribution, and adaptation of their work for a period of ten years from the date of creation. They must ensure that any use of their digital innovations is authorized, as unauthorized use or reproduction can result in civil penalties, including injunctions, damages, and the seizure of infringing materials as determined by the court."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 509A",
|
||||
"section_title": "Intentional Misuse of Digital Identity",
|
||||
"provision": "Whoever, intending to cause annoyance, inconvenience, or harm, knowingly and dishonestly uses or impersonates the digital identity of another person, including but not limited to social media accounts, email addresses, or any other digital platform, shall be punishable with imprisonment for a term that may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. In the case of repeat offenders, the term of imprisonment may extend to five years.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the punishment under IPC 509A for the intentional misuse of digital identity?",
|
||||
"answer": "Yes, exceptions may apply in cases where the individual can demonstrate that their use of another person's digital identity was done with the consent of that person or for legitimate purposes such as parody, satire, or commentary that does not intend to cause annoyance, inconvenience, or harm. However, the burden of proof lies with the individual claiming the exception, and it is essential to establish that the intent was not malicious."
|
||||
},
|
||||
{
|
||||
"section_number": "IPR 145",
|
||||
"section_title": "Rights of Co-Owners in Joint Property",
|
||||
"provision": "In any joint ownership of property, each co-owner shall possess an equal right to utilize, manage, and derive benefit from the property, subject to the terms of their agreement. In the event of a dispute regarding the use or management of the property, any co-owner may seek mediation through the appropriate civil court, which shall have the authority to appoint a neutral arbitrator to facilitate a resolution. Should the parties remain in disagreement following mediation, the court shall adjudicate based on the principles of equity and the specific contributions made by each co-owner toward the property.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps should a co-owner take if there is a dispute regarding the use or management of jointly owned property?",
|
||||
"answer": "If a co-owner encounters a dispute regarding the use or management of jointly owned property, they should first seek mediation through the appropriate civil court. The court will appoint a neutral arbitrator to help facilitate a resolution. If the parties still cannot reach an agreement after mediation, the court will then adjudicate the dispute based on principles of equity and consider the specific contributions made by each co-owner towards the property."
|
||||
},
|
||||
{
|
||||
"section_number": "CPL 456",
|
||||
"section_title": "Remedies for Breach of Contract",
|
||||
"provision": "In the event of a breach of contract, the aggrieved party shall be entitled to seek specific performance of the contract, or alternatively, claim for damages which shall be calculated based on the loss incurred directly as a result of the breach. The court may, at its discretion, award punitive damages not exceeding the value of the contract, if it finds the breach to have been willful or malicious. Any claims for reliance damages shall be substantiated with adequate evidence demonstrating the expenditures incurred in preparation for the performance of the contract.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must the aggrieved party take to claim specific performance or damages in the event of a breach of contract according to CPL 456?",
|
||||
"answer": "The aggrieved party must first determine whether to seek specific performance of the contract or claim for damages. If claiming damages, they should calculate the loss incurred directly due to the breach. If they wish to seek punitive damages, they must demonstrate that the breach was willful or malicious, keeping in mind that such damages cannot exceed the value of the contract. Additionally, if the party wants to claim reliance damages, they must gather and present adequate evidence of expenditures incurred in preparation for the contract's performance. All claims should be filed with the appropriate court as per the procedural rules governing contract disputes."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 112",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any other provisions of this Act, digital evidence shall be admissible in judicial proceedings, provided that it is demonstrated to be authentic and relevant to the matter at hand. The party seeking to introduce digital evidence must establish a clear chain of custody and utilize appropriate technological methods for preservation and extraction, ensuring that the integrity of the evidence has not been compromised. Furthermore, the court may consider expert testimony regarding the reliability of the digital medium used to store or transmit such evidence.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide an example of how a party might successfully introduce digital evidence in court under IEA 112?",
|
||||
"answer": "Certainly! Imagine a scenario where a company is accused of data theft. The plaintiff wants to introduce an email as digital evidence that allegedly contains confidential information sent to a competitor. To successfully admit this email under IEA 112, the plaintiff would need to demonstrate its authenticity by showing that the email was indeed sent from the company's server. They would establish a clear chain of custody by documenting who accessed the email and how it was preserved, perhaps by using secure storage methods. Additionally, they might engage a digital forensics expert to testify about the reliability of the email server and the methods used to extract the email, ensuring that the integrity of the evidence has not been compromised. If all these criteria are met, the court would likely admit the email as evidence in the proceedings."
|
||||
},
|
||||
{
|
||||
"section_number": "CPL 125",
|
||||
"section_title": "Remedies for Breach of Contract",
|
||||
"provision": "In the event of a breach of contract, the aggrieved party shall be entitled to seek restitution by way of specific performance, or, in lieu thereof, claim for damages not exceeding the actual loss incurred as a direct result of the breach. Furthermore, the court may, at its discretion, award consequential damages if such damages were within the contemplation of the parties at the time of contract formation, provided that the aggrieved party has made reasonable efforts to mitigate the loss.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must the aggrieved party take to seek remedies for a breach of contract under CPL 125?",
|
||||
"answer": "To seek remedies for a breach of contract under CPL 125, the aggrieved party should follow these steps: First, clearly identify and document the breach of contract. Next, determine whether they wish to seek specific performance or claim for damages. If claiming damages, the aggrieved party must calculate and document the actual loss incurred as a direct result of the breach, ensuring that it does not exceed the actual loss. Additionally, the aggrieved party should demonstrate that they made reasonable efforts to mitigate the loss. Finally, if the aggrieved party believes consequential damages are applicable, they should provide evidence that such damages were within the contemplation of the parties at the time of contract formation. Once these steps are completed, the aggrieved party can file a claim in court to seek the desired remedies."
|
||||
},
|
||||
{
|
||||
"section_number": "CONST 102",
|
||||
"section_title": "Protection of Fundamental Rights",
|
||||
"provision": "Every citizen shall have the right to freedom from arbitrary arrest and detention, ensuring that no person shall be deprived of their liberty without due process of law. Furthermore, every individual shall have the right to seek redress in a competent court of law for any violation of their fundamental rights, and the State shall be obligated to provide legal assistance to those unable to afford representation. Any law or action infringing upon the rights enumerated in this section shall be deemed unconstitutional and void.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of situations where a citizen's right to freedom from arbitrary arrest and detention might be violated, and what steps they can take if their rights are infringed upon?",
|
||||
"answer": "Examples of situations where a citizen's right to freedom from arbitrary arrest and detention might be violated include being arrested without a warrant, being held without charges for an extended period, or being detained based solely on their political beliefs or race. In such cases, the individual has the right to seek redress in a competent court of law by filing a lawsuit against the authorities responsible for the violation. Additionally, if they cannot afford legal representation, the State is obligated to provide legal assistance to ensure that their rights are protected."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 509A",
|
||||
"section_title": "Cyber Harassment and Intimidation",
|
||||
"provision": "Whoever, with the intent to harass or intimidate another person through the use of electronic communications, sends, posts, or publishes any obscene or threatening material shall be punished with imprisonment for a term that may extend to three years, or with fine which may extend to one lakh rupees, or with both. In cases where such actions lead to severe emotional distress or harm to the reputation of the victim, the imprisonment may extend to five years. The provisions of this section shall apply notwithstanding any other law in force relating to defamation or electronic privacy.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps should a victim take to file a complaint under IPC 509A for cyber harassment or intimidation?",
|
||||
"answer": "A victim should follow these steps to file a complaint under IPC 509A: 1. Collect evidence of the harassment, such as screenshots of messages or posts. 2. Approach the nearest police station to report the incident, providing the collected evidence. 3. File a formal complaint, ensuring that all relevant details are included, such as dates, times, and the nature of the communications. 4. The police will register the complaint and may initiate an investigation. 5. If the harassment leads to severe emotional distress or harm to reputation, the victim should highlight this to the authorities, as it can lead to an extended prison term for the offender. It is advisable for the victim to seek legal assistance to navigate the process effectively."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 432",
|
||||
"section_title": "Punishment for Intentional Damage to Public Property",
|
||||
"provision": "Whoever intentionally causes damage to any public property, including but not limited to roads, bridges, or public buildings, shall be punishable with imprisonment for a term which may extend to three years, or with a fine which may extend to five lakh rupees, or with both. In cases where the damage exceeds a value of one lakh rupees, the offender shall be liable to imprisonment for a term which may extend to five years, and the fine may extend to ten lakh rupees. This provision shall not apply to acts of lawful protest or demonstration, provided that such actions do not result in damage to the aforementioned properties.",
|
||||
"question_type": "definition",
|
||||
"question": "What constitutes intentional damage to public property under IPC 432?",
|
||||
"answer": "Intentional damage to public property under IPC 432 refers to the deliberate act of causing harm to any public assets, which includes but is not limited to roads, bridges, or public buildings. Such actions are punishable by imprisonment or fines, depending on the extent of the damage caused."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 128A",
|
||||
"section_title": "Rights and Resolution of Property Disputes",
|
||||
"provision": "In any dispute concerning the ownership, possession, or title to immovable property, parties shall be entitled to seek resolution through a Mediation and Conciliation Board established under this Section. The Board shall consist of a Chairperson and two members, appointed by the State Government, who shall endeavor to resolve the dispute amicably within a period of six months from the date of reference, failing which the aggrieved party may escalate the matter to the appropriate civil court for adjudication. The provisions of this Section shall not preclude any party from approaching the court for urgent interim relief during the pendency of the mediation process.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the requirement of mediation for resolving property disputes under IPC 128A?",
|
||||
"answer": "Yes, the provisions of IPC 128A do not preclude any party from approaching the court for urgent interim relief during the pendency of the mediation process. This means that if a party needs immediate relief, they can seek it from the court even while the mediation is ongoing."
|
||||
},
|
||||
{
|
||||
"section_number": "CGR 102",
|
||||
"section_title": "Disclosure of Financial Interests",
|
||||
"provision": "Every corporate entity registered under the Companies Act, 2013 shall disclose in its annual report the financial interests of its board members and key managerial personnel, including any directorships, shareholdings, or partnerships in other entities that may pose a conflict of interest. This disclosure must be made in a format prescribed by the Securities and Exchange Board of India (SEBI) and shall be subject to scrutiny by the independent auditors to ensure transparency and accountability within the corporate governance framework. Non-compliance with this provision shall attract penalties as stipulated under Section 234 of the Companies Act, 2013.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the requirement for corporate entities to disclose the financial interests of their board members and key managerial personnel as per CGR 102?",
|
||||
"answer": "Yes, exceptions to the disclosure requirement under CGR 102 may apply in certain circumstances, such as when the financial interests are deemed nominal and not likely to pose a conflict of interest, or if the board member or key managerial personnel is involved in a confidential matter that does not affect the corporate entity's governance. However, such exceptions must be clearly justified and documented, as non-compliance can lead to penalties under Section 234 of the Companies Act, 2013. It is advisable for entities to consult legal counsel to ensure compliance with all applicable regulations."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 123",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any other provision of law, digital evidence shall be admissible in any judicial proceeding provided it is accompanied by a certificate of authenticity from a qualified digital forensic expert, which verifies the integrity and accuracy of the data. Such evidence must be presented in a format that is compatible with the court's technological capabilities, and the party seeking to introduce the digital evidence shall bear the burden of proving its reliability and relevance to the matter at hand. The court may, in its discretion, exclude digital evidence if it deems that the probative value is outweighed by the potential for prejudice or misinformation.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide an example of when digital evidence would be admissible in court under IEA 123?",
|
||||
"answer": "Digital evidence, such as emails or text messages, would be admissible in court under IEA 123 if the party seeking to introduce this evidence presents it with a certificate of authenticity from a qualified digital forensic expert. For instance, if a plaintiff wants to use a series of text messages as evidence in a contract dispute, they must ensure the messages are verified for integrity and accuracy by a forensic expert. Additionally, the text messages must be presented in a format that the court can access and understand. If these conditions are met and the plaintiff can demonstrate the relevance and reliability of the texts, the court is likely to admit the evidence, unless it determines that the potential for prejudice outweighs its probative value."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 65A",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any provision to the contrary, digital evidence shall be admissible in a court of law if it is authenticated by the party seeking its admission. Authentication shall be established through a combination of metadata verification, secure chain of custody, and corroborative testimonial evidence, ensuring the integrity and reliability of the digital record. In instances where the authenticity is challenged, the burden of proof shall rest with the party contesting such admissibility.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must a party take to ensure that digital evidence is admissible in court under IEA 65A?",
|
||||
"answer": "To ensure the admissibility of digital evidence in court under IEA 65A, the party seeking its admission must authenticate the evidence through three key steps: (1) verify the metadata associated with the digital record, (2) establish a secure chain of custody for the evidence, and (3) provide corroborative testimonial evidence that supports the integrity and reliability of the digital record. If the authenticity of the evidence is challenged, the burden of proof will shift to the party contesting its admissibility."
|
||||
},
|
||||
{
|
||||
"section_number": "CGR 101",
|
||||
"section_title": "Board Composition and Independence",
|
||||
"provision": "Every public company shall ensure that its Board of Directors comprises a minimum of one-third independent directors, who shall not have any material relationship with the company, its promoters, or its subsidiaries. The independent directors shall be responsible for safeguarding the interests of minority shareholders and enhancing the overall governance of the company. The criteria for independence and the process for appointment shall be prescribed under the Corporate Governance Regulations, ensuring transparency and accountability in the board's operations.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the requirement for a public company to have a minimum of one-third independent directors on its Board of Directors as stated in CGR 101?",
|
||||
"answer": "Yes, exceptions may apply under specific circumstances as outlined in the Corporate Governance Regulations. For instance, if a company has a unique structure or meets certain criteria established by regulatory authorities, it may be allowed to deviate from the one-third independent director requirement. However, such exceptions must adhere to the principles of transparency and accountability, and the company must provide justification for any deviations from the standard composition."
|
||||
},
|
||||
{
|
||||
"section_number": "CPR 101",
|
||||
"section_title": "Right to Constitutional Protections",
|
||||
"provision": "Every individual shall have the right to seek recourse under this Act for any violation of their fundamental rights as enumerated in the Constitution of India. The State shall ensure the protection of these rights against any encroachment by the public or private entities, and a mechanism for redressal of grievances shall be established within six months of any reported infringement. Furthermore, any citizen aggrieved by the denial of such rights may approach the Supreme Court or High Court for enforcement, and the courts shall prioritize such cases to ensure timely justice.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide an example of a situation where an individual might seek recourse under the Right to Constitutional Protections as outlined in CPR 101?",
|
||||
"answer": "An example of a situation where an individual might seek recourse under this provision is if a citizen is wrongfully detained by the police without due process, which violates their fundamental rights as guaranteed by the Constitution of India. In this case, the individual can file a complaint under the Act, seeking redress for the infringement of their rights. If the grievance is not resolved satisfactorily within six months, the individual has the option to approach the Supreme Court or High Court to enforce their rights and obtain timely justice."
|
||||
},
|
||||
{
|
||||
"section_number": "CGR 302",
|
||||
"section_title": "Standards of Conduct for Directors",
|
||||
"provision": "Every director of a company shall act in good faith and in the best interests of the company, ensuring transparency and accountability in all dealings. Directors are mandated to disclose any potential conflicts of interest and refrain from participating in discussions or decisions where such conflicts may arise. Failure to comply with these standards shall result in penalties as prescribed under Section CGR 305, which may include disqualification from holding office in the company for a period not exceeding five years.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must a director take to comply with the standards of conduct outlined in CGR 302 regarding potential conflicts of interest?",
|
||||
"answer": "To comply with the standards of conduct in CGR 302, a director must take the following steps:"
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 502A",
|
||||
"section_title": "Unauthorized Access and Data Breach",
|
||||
"provision": "Whoever intentionally accesses a computer system or network without authorization, or exceeds authorized access to obtain, alter, or destroy data, shall be punishable with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. In cases where such access results in a breach of sensitive personal data or causes harm to any individual or entity, the term of imprisonment may extend to five years, and the fine may extend to one lakh rupees.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of actions that would violate IPC 502A and the potential consequences for those actions?",
|
||||
"answer": "Yes, under IPC 502A, several actions could constitute unauthorized access and data breach. For example, if an individual hacks into a company's computer system to steal customer data, this would be considered intentional unauthorized access. If the hacker is caught, they could face imprisonment for up to three years and fines up to fifty thousand rupees."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 456",
|
||||
"section_title": "Offences of Public Disruption and Associated Penalties",
|
||||
"provision": "Whoever, without lawful authority, intentionally causes public disruption by engaging in violent or threatening behavior in a public place shall be punishable with imprisonment for a term which may extend to three years, or with fine which may extend to one lakh rupees, or with both. In the event of causing grievous hurt or significant property damage during such disruption, the offender shall be liable to imprisonment for a term not less than five years, which may extend to seven years, along with a fine that may extend to five lakh rupees.",
|
||||
"question_type": "definition",
|
||||
"question": "What constitutes the offense of public disruption under IPC 456?",
|
||||
"answer": "The offense of public disruption under IPC 456 is defined as intentionally causing public disruption by engaging in violent or threatening behavior in a public place without lawful authority."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 502",
|
||||
"section_title": "Criminal Intimidation through Digital Means",
|
||||
"provision": "Whoever, using any electronic device or communication service, intentionally threatens another person with injury to their person, reputation, or property, or to cause alarm or distress, shall be punishable with imprisonment of either description for a term which may extend to three years, or with fine, or with both. In addition, if such intimidation is intended to coerce or influence the victim's actions or decisions, the term of imprisonment may extend to five years.",
|
||||
"question_type": "obligations",
|
||||
"question": "What are the obligations of an individual regarding the use of electronic devices to communicate, as outlined in IPC 502?",
|
||||
"answer": "An individual is obligated not to intentionally threaten another person with injury to their person, reputation, or property using any electronic device or communication service. Violating this obligation can result in punishment that includes imprisonment for up to three years, a fine, or both. If the intimidation is intended to coerce or influence the victim's actions or decisions, the imprisonment term may extend to five years."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 124",
|
||||
"section_title": "Application for Summary Judgment",
|
||||
"provision": "In any civil proceedings, a party may apply to the court for a summary judgment on the ground that there is no genuine dispute as to any material fact and that the party is entitled to judgment as a matter of law. The application shall be supported by an affidavit setting forth the specific facts that demonstrate the absence of a material issue of fact. The court shall hear the application and may grant the summary judgment if it is satisfied that the evidence is clear and unequivocal, and that a trial is not necessary to resolve the issues presented.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights does a party have when applying for a summary judgment under CPC 124?",
|
||||
"answer": "A party has the right to apply for a summary judgment in civil proceedings if they believe there is no genuine dispute regarding any material fact and that they are entitled to judgment as a matter of law. To exercise this right, the party must support their application with an affidavit that specifies the facts demonstrating the lack of a material issue of fact. If the court finds the evidence to be clear and unequivocal, and determines that a trial is unnecessary, it may grant the summary judgment."
|
||||
},
|
||||
{
|
||||
"section_number": "CRPC 128A",
|
||||
"section_title": "Protection of Fundamental Rights in Criminal Proceedings",
|
||||
"provision": "In all criminal proceedings, it shall be the duty of the presiding officer to ensure the protection of an accused person's fundamental rights as guaranteed under Part III of the Constitution of India. Any infringement of these rights during the course of investigation or trial shall render the proceedings voidable, and the court shall have the power to issue directions to remedy such infringement, including the exclusion of unlawfully obtained evidence. The court shall also provide the accused an opportunity to address any violations of their rights at the earliest possible stage of the proceedings.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide an example of how CRPC 128A protects an accused person's fundamental rights during a criminal trial?",
|
||||
"answer": "Certainly! For instance, if during a police investigation, evidence is obtained through coercive interrogation methods that violate the accused's right to remain silent, this would constitute an infringement of their fundamental rights. Under CRPC 128A, the presiding officer is required to ensure that such rights are protected. As a result, the court may declare the proceedings voidable and exclude the unlawfully obtained evidence from the trial. Additionally, the accused would be given an opportunity to address this violation at the earliest stage, allowing them to contest the admissibility of the evidence and uphold their rights as guaranteed under the Constitution of India."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 372",
|
||||
"section_title": "Rights and Disputes Relating to Property Ownership",
|
||||
"provision": "Any individual claiming ownership of a property shall have the right to initiate a civil suit for the determination of title and possession against any person in unlawful occupation of said property. The court shall adjudicate such disputes expeditiously, ensuring that the rights of the rightful owner are protected while balancing the interests of the occupant, who may assert a claim of adverse possession or any lawful entitlement. Furthermore, in cases where property disputes arise among co-owners or joint tenants, the court shall facilitate mediation prior to proceeding to trial, promoting an amicable resolution to conflicts concerning shared property rights.",
|
||||
"question_type": "definition",
|
||||
"question": "What rights does an individual have under IPC 372 regarding property ownership disputes?",
|
||||
"answer": "Under IPC 372, an individual claiming ownership of a property has the right to initiate a civil suit for determining title and possession against anyone unlawfully occupying the property. The court is required to adjudicate these disputes quickly, protecting the rights of the rightful owner while also considering the interests of the occupant, who may claim adverse possession or other lawful entitlements. Additionally, in disputes among co-owners or joint tenants, the court must facilitate mediation before proceeding to trial to encourage an amicable resolution."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 499A",
|
||||
"section_title": "Unauthorized Access and Data Breach",
|
||||
"provision": "Whosoever, without lawful authority or consent, accesses a computer resource or computer system, and thereby obtains, alters, or destroys any data, information, or program, with the intent to cause harm or facilitate fraud, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to five lakh rupees, or with both. In the case of repeat offenses, the term of imprisonment may extend to five years, along with a fine not exceeding ten lakh rupees.",
|
||||
"question_type": "definition",
|
||||
"question": "What constitutes unauthorized access and data breach under IPC 499A?",
|
||||
"answer": "Unauthorized access and data breach under IPC 499A occurs when an individual, without lawful authority or consent, accesses a computer resource or system, and obtains, alters, or destroys any data, information, or program with the intent to cause harm or facilitate fraud."
|
||||
},
|
||||
{
|
||||
"section_number": "PPR 101",
|
||||
"section_title": "Rights of Co-Owners in Joint Property",
|
||||
"provision": "In the event of a dispute arising between co-owners of joint property, each co-owner shall have the right to seek mediation through a designated Property Dispute Resolution Committee, established under this Act, prior to initiating any legal proceedings. The Committee shall endeavor to resolve conflicts amicably within a period of sixty days, failing which the aggrieved co-owner may file a civil suit in the appropriate jurisdiction, whereupon the court shall consider equitable distribution and rights of possession in accordance with the principles of natural justice and prior agreements among co-owners.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps should a co-owner take if there is a dispute regarding joint property, according to PPR 101?",
|
||||
"answer": "A co-owner should first seek mediation through the designated Property Dispute Resolution Committee established under PPR 101. This mediation process must be initiated prior to any legal proceedings. The Committee will attempt to resolve the conflict amicably within sixty days. If the dispute is not resolved within this period, the aggrieved co-owner may then file a civil suit in the appropriate jurisdiction, where the court will consider equitable distribution and rights of possession based on natural justice and any prior agreements among the co-owners."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 512",
|
||||
"section_title": "Offense of Digital Harassment",
|
||||
"provision": "Whosoever, through the use of electronic means, intentionally causes harm, distress, or alarm to another person by sending, sharing, or disseminating unsolicited and offensive messages, images, or videos, shall be punishable with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. In the case of repeated offenses, the term of imprisonment may extend to five years, along with a fine not exceeding one lakh rupees. A victim of digital harassment may file a complaint with the appropriate authority, who shall take necessary action as prescribed under this section.",
|
||||
"question_type": "penalty",
|
||||
"question": "What are the penalties for committing the offense of digital harassment under IPC 512?",
|
||||
"answer": "The penalties for committing digital harassment under IPC 512 include imprisonment for a term that may extend to three years, a fine that may extend to fifty thousand rupees, or both. In the case of repeated offenses, the imprisonment term may extend to five years, with a fine not exceeding one lakh rupees."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 456A",
|
||||
"section_title": "Unauthorized Access to Digital Systems",
|
||||
"provision": "Whosoever, without lawful authority, intentionally accesses a computer or digital system with the intent to obtain or alter data, or to interfere with the integrity or functioning of such system, shall be punishable with imprisonment of either description for a term that may extend to three years, or with fine which may extend to five lakh rupees, or with both. In the case of repeated offences, the term of imprisonment may extend to five years, and the fine may be increased to ten lakh rupees.",
|
||||
"question_type": "definition",
|
||||
"question": "What constitutes \"unauthorized access to digital systems\" under IPC 456A?",
|
||||
"answer": "\"Unauthorized access to digital systems\" under IPC 456A refers to the act of intentionally accessing a computer or digital system without lawful authority, with the intent to obtain or alter data, or to interfere with the integrity or functioning of that system."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 204",
|
||||
"section_title": "Consolidation of Civil Proceedings",
|
||||
"provision": "In any suit or proceeding where multiple matters arise out of the same transaction or series of transactions and involve common questions of law or fact, the court may, upon application by any party or suo moto, consolidate such suits or proceedings for the purpose of expedience and efficiency. The court shall ensure that such consolidation does not prejudice the rights of the parties involved and shall determine the procedure for the consolidated hearing, which may include joint trials or the use of a single set of pleadings applicable to all consolidated matters.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligation does the court have when consolidating civil proceedings under CPC 204?",
|
||||
"answer": "The court has the obligation to ensure that the consolidation of suits or proceedings does not prejudice the rights of the parties involved, and it must determine the appropriate procedure for the consolidated hearing, which may involve joint trials or a single set of pleadings applicable to all consolidated matters."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 420A",
|
||||
"section_title": "Fraudulent Misrepresentation in Commercial Transactions",
|
||||
"provision": "Whosoever, with intent to deceive or defraud, makes any false representation, whether by words or conduct, in the course of a commercial transaction, shall be punished with imprisonment of either description for a term which may extend to five years, or with fine, or with both. If such misrepresentation causes loss to the victim exceeding one lakh rupees, the term of imprisonment may extend to seven years. This provision shall not apply to representations made in good faith where the individual reasonably believes such representations to be true.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps should a victim take to report a fraudulent misrepresentation under IPC 420A in a commercial transaction?",
|
||||
"answer": "To report a fraudulent misrepresentation under IPC 420A, the victim should follow these steps:"
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 123",
|
||||
"section_title": "Rights of Inheritance Among Wards and Guardians",
|
||||
"provision": "In any case where a minor is a ward under the guardianship of an individual, such guardian shall have the right to manage the ward's property, but shall not have the authority to alienate or dispose of such property without prior approval from the Family Court. Upon reaching the age of majority, the ward shall inherit all properties acquired during the period of guardianship, along with any rights therein, free from any encumbrances created by the guardian without due process. The Family Court shall ensure that the interests of the minor are adequately protected during the guardianship period, with a view to preventing any potential conflicts of interest.",
|
||||
"question_type": "definition",
|
||||
"question": "What is the role of a guardian in managing a minor's property according to FLA 123?",
|
||||
"answer": "According to FLA 123, a guardian has the right to manage a minor's property but cannot alienate or dispose of it without prior approval from the Family Court."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 123",
|
||||
"section_title": "Admissibility of Electronic Evidence",
|
||||
"provision": "In any proceeding before a court, electronic evidence shall be deemed admissible if it is produced in a manner that ensures its integrity and authenticity through a secure digital signature or cryptographic verification. The party intending to introduce such evidence must provide a certificate of authenticity from a competent authority, confirming compliance with the standards set forth in the Information Technology Act, 2000. Notwithstanding the aforementioned, any electronic evidence that is deemed to have been tampered with or altered shall be inadmissible unless the party presenting the evidence can demonstrate, beyond reasonable doubt, the absence of such tampering.",
|
||||
"question_type": "definition",
|
||||
"question": "What is required for electronic evidence to be deemed admissible in court according to IEA 123?",
|
||||
"answer": "Electronic evidence is deemed admissible in court if it is produced in a manner that ensures its integrity and authenticity through a secure digital signature or cryptographic verification. Additionally, the party introducing the evidence must provide a certificate of authenticity from a competent authority, confirming compliance with the standards of the Information Technology Act, 2000."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 157",
|
||||
"section_title": "Remedies for Breach of Contract",
|
||||
"provision": "In the event of a breach of contract, the aggrieved party shall be entitled to seek remedy in the form of specific performance, damages, or rescission, as applicable. The court may award compensatory damages to cover direct losses caused by the breach, and may also consider consequential losses if such losses were within the contemplation of both parties at the time of contract formation. Furthermore, the court shall have discretion to order specific performance where monetary compensation is inadequate to provide a just remedy, particularly in cases involving unique subject matter.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations does an aggrieved party have when seeking remedies for a breach of contract under CPC 157?",
|
||||
"answer": "The aggrieved party is entitled to seek remedies such as specific performance, damages, or rescission, depending on the circumstances of the breach. They must demonstrate the direct losses incurred and may also claim consequential losses if those were contemplated by both parties at the time of the contract. Additionally, if seeking specific performance, the aggrieved party must show that monetary compensation is inadequate to address the situation, particularly in cases involving unique subject matter."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 495A",
|
||||
"section_title": "Offense of Deceptive Co-habitation",
|
||||
"provision": "Whosoever, with intent to deceive, cohabits with a person as if married, while being lawfully married to another person, shall be punished with imprisonment for a term which may extend to five years, or with fine, or with both. The act shall be considered a cognizable offense, and in addition to punishment, the court may direct restitution for any economic or emotional harm caused to the aggrieved party. In any prosecution under this section, evidence of the accused's prior marital status shall be admissible to establish the offense.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the offense of Deceptive Co-habitation under IPC 495A for individuals who are legally separated from their spouse?",
|
||||
"answer": "Yes, individuals who are legally separated from their spouse may not be prosecuted under IPC 495A for Deceptive Co-habitation, provided that the separation is recognized by law and they are not still legally married. However, it is important to note that evidence of their prior marital status may still be considered in court to establish the context of the cohabitation."
|
||||
},
|
||||
{
|
||||
"section_number": "CGR 101",
|
||||
"section_title": "Principles of Corporate Governance",
|
||||
"provision": "Every company incorporated under the Companies Act, 2013 shall adhere to the principles of corporate governance as prescribed by the Securities and Exchange Board of India (SEBI) regulations. These principles shall include, but not be limited to, the establishment of a robust board structure, the separation of the roles of the chairperson and the managing director, and the implementation of transparent disclosure practices that uphold the rights of shareholders. Non-compliance with these principles shall attract penalties as delineated in CGR 202.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must a company take to ensure compliance with the corporate governance principles as outlined in CGR 101?",
|
||||
"answer": "To ensure compliance with the corporate governance principles outlined in CGR 101, a company must take the following steps:"
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 123",
|
||||
"section_title": "Rights of Inheritance Among Lineal Descendants",
|
||||
"provision": "In cases of intestate succession, all lineal descendants, including illegitimate offspring, shall inherit an equal share of the estate of the deceased, irrespective of the marital status of the parent at the time of birth. No distinction shall be made based on gender, and the distribution of assets shall occur in accordance with the principles of per stirpes, ensuring that each descendant receives a proportionate share of their ancestor’s estate. Additionally, the provisions of this section shall apply retroactively to estates of deceased individuals who died on or after January 1, 2023, thereby nullifying any pre-existing discriminatory practices in inheritance laws.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do lineal descendants have regarding inheritance under FLA 123, particularly for illegitimate offspring and regardless of the parent's marital status?",
|
||||
"answer": "Under FLA 123, all lineal descendants, including illegitimate offspring, have the right to inherit an equal share of a deceased individual's estate in cases of intestate succession. This inheritance is granted irrespective of the parent's marital status at the time of the child's birth and without any distinction based on gender. The distribution of the estate will follow the principles of per stirpes, ensuring each descendant receives a proportionate share of their ancestor’s estate. These rights are retroactively applied to estates of individuals who died on or after January 1, 2023, eliminating previous discriminatory inheritance practices."
|
||||
},
|
||||
{
|
||||
"section_number": "CRPC 145",
|
||||
"section_title": "Protection of Constitutional Rights During Detention",
|
||||
"provision": "No person shall be detained in police custody for a period exceeding twenty-four hours without being informed of the grounds of arrest and without being afforded the opportunity to consult a legal practitioner of their choice. Any violation of this provision shall render the detention unlawful, and the detained individual shall be entitled to immediate release and compensation as prescribed by law. The State shall ensure that all law enforcement agencies are adequately trained in upholding these constitutional protections to prevent any infringement of fundamental rights.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations do law enforcement agencies have under CRPC 145 regarding the detention of individuals in police custody?",
|
||||
"answer": "Under CRPC 145, law enforcement agencies are obligated to inform any detained individual of the grounds for their arrest and to provide them with the opportunity to consult a legal practitioner of their choice within twenty-four hours. Failure to comply with these obligations will render the detention unlawful, entitling the detained individual to immediate release and compensation as prescribed by law. Furthermore, the State must ensure that all law enforcement personnel are adequately trained to uphold these constitutional protections."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 227",
|
||||
"section_title": "Rights of Co-owners in Property Disputes",
|
||||
"provision": "In any case where property is jointly owned by two or more individuals, each co-owner shall possess an equal right to use and enjoy the property, subject to the principle of reasonable enjoyment. No co-owner shall be entitled to alienate their share of the property without the express consent of all other co-owners; failure to obtain such consent shall render any transfer voidable at the instance of the non-consenting co-owners. In the event of a dispute arising from the exercise of such rights, the parties shall seek resolution through mediation, failing which they may pursue their claims in a competent civil court.",
|
||||
"question_type": "penalty",
|
||||
"question": "What penalty may arise if a co-owner attempts to alienate their share of jointly owned property without the consent of the other co-owners according to IPC 227?",
|
||||
"answer": "If a co-owner attempts to alienate their share of the property without obtaining the express consent of the other co-owners, such a transfer will be rendered voidable at the request of the non-consenting co-owners. This means that the non-consenting co-owners can challenge the validity of the transfer, potentially leading to legal disputes and the need for resolution through mediation or civil court."
|
||||
},
|
||||
{
|
||||
"section_number": "CNP 101",
|
||||
"section_title": "Protection of Fundamental Rights",
|
||||
"provision": "Every individual shall have the right to seek judicial redress for the infringement of their fundamental rights as enumerated in the Constitution of India. The State is mandated to ensure that no action, law, or policy contravenes the rights guaranteed under Articles 14 to 32, and any violation thereof shall entitle the aggrieved party to compensation as deemed fit by the judiciary. Furthermore, the Supreme Court and High Courts shall possess the authority to issue writs, orders, or directions for the enforcement of such rights, thereby reinforcing the foundational principles of justice and equality within the Republic.",
|
||||
"question_type": "definition",
|
||||
"question": "What is the right of individuals regarding the infringement of their fundamental rights as per CNP 101?",
|
||||
"answer": "Individuals have the right to seek judicial redress for the infringement of their fundamental rights as outlined in the Constitution of India, and are entitled to compensation for any violations, with the Supreme Court and High Courts authorized to issue writs and orders to enforce these rights."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 123",
|
||||
"section_title": "Application for Interlocutory Relief",
|
||||
"provision": "An application for interlocutory relief shall be made in the prescribed format, detailing the nature of the relief sought and the grounds thereof, along with any supporting affidavits and documents. The court shall, within three days of the filing of such application, schedule a preliminary hearing, during which the party seeking relief must demonstrate the urgency and necessity of the relief sought, based on a prima facie case and the balance of convenience. The court may grant interim orders as it deems fit, subject to the condition that the applicant shall bear the costs of any potential loss incurred by the opposing party due to such interim relief.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the requirement of demonstrating urgency and necessity for an application for interlocutory relief under CPC 123?",
|
||||
"answer": "Yes, while CPC 123 generally requires the applicant to demonstrate urgency and necessity for the relief sought, exceptions may arise in cases where the relief requested is of a nature that inherently addresses imminent harm or where statutory provisions specifically allow for expedited procedures. However, the applicant must still adhere to the prescribed format and provide supporting affidavits and documentation as mandated by the provision."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 507A",
|
||||
"section_title": "Unauthorized Access and Data Breach",
|
||||
"provision": "Whoever, without lawful authority, accesses a computer resource or a computer network with the intent to cause or knowing that he is likely to cause wrongful loss or damage to any person, or to facilitate the commission of a crime, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to five lakh rupees, or with both. Furthermore, if such access results in the theft, alteration, or deletion of data, the offender shall be liable for enhanced penalties as prescribed in this section, including a minimum fine of ten lakh rupees and imprisonment for a term extending to five years.",
|
||||
"question_type": "definition",
|
||||
"question": "What constitutes unauthorized access under IPC 507A?",
|
||||
"answer": "Unauthorized access under IPC 507A is defined as accessing a computer resource or a computer network without lawful authority, with the intent to cause or knowing that one is likely to cause wrongful loss or damage to any person, or to facilitate the commission of a crime."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 512",
|
||||
"section_title": "Offense of Public Disturbance",
|
||||
"provision": "Whoever intentionally causes a public disturbance by engaging in acts that promote hatred, incite violence, or create fear among members of the community shall be punished with imprisonment for a term which may extend to three years, or with a fine which may extend to fifty thousand rupees, or with both. In determining the severity of the penalty, the court shall consider the magnitude of the disturbance, consequent harm caused to public order, and any prior convictions of the offender under this section or similar offenses.",
|
||||
"question_type": "definition",
|
||||
"question": "What constitutes the offense of public disturbance under IPC 512?",
|
||||
"answer": "The offense of public disturbance under IPC 512 is constituted by intentionally causing a public disturbance through acts that promote hatred, incite violence, or create fear among community members."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Protection of Indigenous Knowledge and Cultural Expressions",
|
||||
"provision": "Whoever unlawfully appropriates, uses, or commercializes indigenous knowledge and cultural expressions without obtaining prior informed consent from the relevant indigenous communities, shall be punished with imprisonment for a term not exceeding five years, or with fine, or both. The term \"indigenous knowledge\" includes traditional practices, innovations, and expressions inherent to indigenous communities, and any such appropriation shall be considered a violation of the community's moral rights as custodians of their cultural heritage. The provisions of this section shall be in addition to any other rights or remedies available under existing intellectual property laws.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations do individuals have regarding the use of indigenous knowledge and cultural expressions according to IPC 123A?",
|
||||
"answer": "Individuals are obligated to obtain prior informed consent from the relevant indigenous communities before appropriating, using, or commercializing indigenous knowledge and cultural expressions. Failure to comply with this obligation may result in imprisonment for up to five years, a fine, or both, as it constitutes a violation of the moral rights of the indigenous communities as custodians of their cultural heritage."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 127",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any provisions to the contrary, electronic records shall be admissible as evidence in any judicial proceedings, provided that such records are generated, stored, and retrieved in a manner that ensures their authenticity and integrity. The party seeking to introduce such evidence shall bear the burden of establishing its reliability through appropriate certification or corroborative witness testimony, unless the opposing party concedes to the admissibility of the digital evidence. The court may, at its discretion, allow for the examination of the digital evidence to ascertain its relevance and evidentiary value.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do parties have regarding the admissibility of digital evidence in judicial proceedings under IEA 127?",
|
||||
"answer": "Under IEA 127, parties have the right to introduce electronic records as evidence in court, provided they can demonstrate the records' authenticity and integrity. The party presenting the digital evidence has the responsibility to prove its reliability, either through certification or witness testimony. Additionally, if the opposing party does not contest the admissibility, the court may accept the evidence without further scrutiny. The court also has the discretion to examine the digital evidence to determine its relevance and evidentiary value."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 456",
|
||||
"section_title": "Offense of Public Disorder",
|
||||
"provision": "Whoever, with the intent to cause public alarm or disturbance, engages in behavior that incites violence, fear, or panic among the general populace shall be punishable with imprisonment of either description for a term which may extend to five years, or with fine which may extend to ten thousand rupees, or with both. In determining the sentence, the court shall take into account the nature and extent of the disruption caused, and any prior offenses committed by the accused.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the offense of public disorder under IPC 456 for individuals who engage in behavior that might cause alarm but do so for a legitimate purpose, such as public safety or awareness?",
|
||||
"answer": "Yes, there may be exceptions for individuals who engage in behavior that could cause public alarm or disturbance if their actions are intended for a legitimate purpose, such as ensuring public safety or raising awareness about a critical issue. The court will consider the intent behind the behavior and the context in which it occurred when determining if it constitutes an offense under IPC 456. However, the burden of proof lies with the accused to demonstrate that their actions were justified and not intended to incite violence, fear, or panic."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Protection of Digital Copyright",
|
||||
"provision": "Any person who, without the authorization of the copyright owner, reproduces, distributes, or publicly displays a copyrighted digital work in a manner that enables unlawful access or download by a third party shall be punishable with imprisonment for a term which may extend to three years, or with fine which may extend to five lakh rupees, or with both. The courts shall consider the nature of the work, the scale of distribution, and the intent behind the infringement while determining the appropriate penalty. This provision shall not apply to fair use as delineated under the Copyright Act, 1957.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps should a copyright owner take if they believe their digital work has been reproduced or distributed without authorization under IPC 123A?",
|
||||
"answer": "If a copyright owner suspects unauthorized reproduction or distribution of their digital work under IPC 123A, they should follow these steps:"
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 207",
|
||||
"section_title": "Application for Interim Relief",
|
||||
"provision": "In any suit where a party seeks urgent relief based on a prima facie showing of entitlement, the Court may, upon application, grant interim relief to maintain the status quo pending final adjudication. Such application shall be accompanied by an affidavit detailing the grounds for urgency, and the Court shall endeavor to hear and dispose of such application within seven days of filing, unless shown to be impracticable. The order granting or denying interim relief shall be recorded with reasons and shall be subject to the right of appeal under the provisions of this Code.",
|
||||
"question_type": "procedure",
|
||||
"question": "What is the procedure for applying for interim relief under CPC 207, and what are the requirements for the application to be considered by the Court?",
|
||||
"answer": "To apply for interim relief under CPC 207, a party must file an application demonstrating a prima facie showing of entitlement to urgent relief. This application must be accompanied by an affidavit that details the grounds for urgency. The Court is required to hear and dispose of the application within seven days of filing, unless it is impracticable to do so. The Court will then issue an order granting or denying the interim relief, which must be recorded along with the reasons for the decision. Additionally, the order is subject to the right of appeal as outlined in this Code."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 456",
|
||||
"section_title": "Criminal Intimidation with Intent to Cause Harm",
|
||||
"provision": "Whoever, with intent to cause harm or alarm, threatens any person with the infliction of death or grievous hurt, or with the destruction of property, shall be punished with imprisonment of either description for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. If the offense is committed in furtherance of an organized criminal activity, the term of imprisonment may extend to five years.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of actions that would be considered criminal intimidation under IPC 456?",
|
||||
"answer": "Yes, under IPC 456, examples of criminal intimidation include a person threatening to kill another individual if they do not pay a debt, or someone warning a neighbor that they will set fire to their property if they do not comply with certain demands. Additionally, if a group engages in organized criminal activity and threatens individuals with serious harm or property destruction to enforce their control, that would also fall under this provision, potentially leading to a longer imprisonment term."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 115",
|
||||
"section_title": "Admissibility of Electronic Records in Civil Proceedings",
|
||||
"provision": "Notwithstanding any provisions to the contrary, electronic records shall be admissible in civil proceedings as evidence, provided that such records are accompanied by a certificate of authenticity attesting to their integrity and accuracy. The court may, at its discretion, take into account the reliability of the technology used in the creation, storage, and retrieval of these records, as well as any potential alterations that may have occurred. Further, any party seeking to introduce electronic records must notify the opposing party at least seven days prior to the hearing, allowing for adequate preparation to challenge the admissibility of such evidence.",
|
||||
"question_type": "penalty",
|
||||
"question": "What are the potential consequences for a party that fails to notify the opposing party at least seven days prior to a hearing when intending to introduce electronic records as evidence under IEA 115?",
|
||||
"answer": "If a party fails to provide the required seven-day notice before introducing electronic records in a civil proceeding, the court may deem the electronic records inadmissible as evidence. This could hinder the party's ability to substantiate their claims or defenses, potentially resulting in unfavorable outcomes in the case."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 134A",
|
||||
"section_title": "Remedies for Breach of Contract",
|
||||
"provision": "In the event of a breach of contract, the aggrieved party shall be entitled to pursue remedies as defined herein: (1) Specific performance of the contract may be ordered by a competent court where monetary damages are inadequate to remedy the loss suffered. (2) In cases where the breach is wilful and unexcused, the aggrieved party may also claim punitive damages not exceeding fifty percent of the actual damages incurred. (3) Parties may further stipulate in the contract provisions for liquidated damages, which shall be enforceable unless deemed unconscionable by the court.",
|
||||
"question_type": "definition",
|
||||
"question": "What remedies are available to an aggrieved party in the event of a breach of contract according to IPC 134A?",
|
||||
"answer": "According to IPC 134A, the remedies available to an aggrieved party in the event of a breach of contract include: (1) specific performance of the contract when monetary damages are inadequate, (2) punitive damages not exceeding fifty percent of the actual damages if the breach is wilful and unexcused, and (3) liquidated damages as stipulated in the contract, which are enforceable unless deemed unconscionable by the court."
|
||||
},
|
||||
{
|
||||
"section_number": "CNR 101",
|
||||
"section_title": "Protection of Fundamental Rights",
|
||||
"provision": "Every individual shall have the right to life, liberty, and personal security, which shall be inviolable and protected against arbitrary deprivation by the State. The State shall ensure that any infringement of these rights is subject to judicial review, and appropriate remedies shall be provided to individuals whose rights have been violated. The Parliament shall enact necessary legislation to define, safeguard, and enforce these rights, ensuring that no law or action contravenes the spirit of this provision without just cause.",
|
||||
"question_type": "definition",
|
||||
"question": "What fundamental rights are protected under CNR 101, and what obligations does the State have regarding these rights?",
|
||||
"answer": "Under CNR 101, every individual is guaranteed the rights to life, liberty, and personal security, which are inviolable and protected against arbitrary deprivation by the State. The State is obligated to ensure that any infringement of these rights is subject to judicial review and must provide appropriate remedies to individuals whose rights have been violated. Additionally, the Parliament is required to enact necessary legislation to define, safeguard, and enforce these rights, ensuring no law or action contradicts this provision without just cause."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 124A",
|
||||
"section_title": "Protection of Unregistered Intellectual Property Rights",
|
||||
"provision": "Any individual or entity claiming ownership of an unregistered intellectual property right, including but not limited to trade secrets, designs, and innovations, shall be entitled to seek legal remedy for unauthorized use or disclosure. The aggrieved party may file a civil suit in the appropriate jurisdiction, whereupon the court shall assess the validity of the claimed rights and may grant injunctive relief, damages, or any other relief deemed appropriate to prevent infringement and preserve the integrity of the intellectual property. This protection shall extend to the duration of the claimant's reasonable efforts to maintain the confidentiality and exclusivity of the intellectual property in question.",
|
||||
"question_type": "definition",
|
||||
"question": "What rights are protected under IPC 124A regarding unregistered intellectual property, and what legal remedies are available to the aggrieved party?",
|
||||
"answer": "IPC 124A protects unregistered intellectual property rights, including trade secrets, designs, and innovations. An individual or entity claiming ownership may seek legal remedies for unauthorized use or disclosure by filing a civil suit in the appropriate jurisdiction. The court will assess the validity of the claimed rights and may grant injunctive relief, damages, or other appropriate remedies to prevent infringement and maintain the integrity of the intellectual property."
|
||||
},
|
||||
{
|
||||
"section_number": "CL 204",
|
||||
"section_title": "Remedies for Breach of Contract",
|
||||
"provision": "In the event of a breach of contract, the aggrieved party shall be entitled to seek remedies including specific performance, rescission of the contract, and damages, which may be either general or consequential in nature. The party seeking damages must provide clear evidence of loss incurred as a result of the breach, and the court shall have discretion to award compensation that is deemed just and equitable, taking into account the nature of the breach and the contractual terms. Furthermore, any limitation on the right to claim damages must be explicitly stated within the contract to be enforceable.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of the types of remedies available for breach of contract as outlined in CL 204?",
|
||||
"answer": "Yes, under CL 204, there are several remedies available for breach of contract. For instance, specific performance may be sought if the aggrieved party wants the breaching party to fulfill their contractual obligations, such as delivering a unique piece of art that was promised. Rescission of the contract could be an option if the aggrieved party wishes to cancel the contract entirely and return to their pre-contractual position, for example, if a buyer discovers that a seller misrepresented the condition of a property. Additionally, damages can be claimed, which may include general damages for direct losses, like the cost of hiring a substitute contractor after the original contractor failed to perform, or consequential damages, such as loss of business profits resulting from the delay in project completion due to the breach. It is important to note that the party seeking damages must provide evidence of the loss incurred, and any limitations on claiming damages must be clearly stated in the contract to be enforceable."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 123A",
|
||||
"section_title": "Provision for Electronic Filing of Pleadings",
|
||||
"provision": "The Court may, upon application by any party, permit the electronic filing of pleadings, documents, and evidence in accordance with the guidelines issued by the Supreme Court. Such electronic submissions shall be deemed to be authentic and shall hold the same legal sanctity as original physical documents, provided that such filings comply with the prescribed digital signature requirements and are submitted within the timelines set forth by the Court. Any failure to comply with the electronic filing protocols may result in the rejection of the documents filed or the imposition of penalties as deemed appropriate by the presiding judge.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide an example of a situation where a party might utilize electronic filing of pleadings according to CPC 123A?",
|
||||
"answer": "Sure! For instance, if a plaintiff wishes to file a motion for summary judgment, they can submit their pleading electronically if they apply to the Court and receive permission. They must ensure their electronic submission adheres to the Supreme Court's guidelines, including using a valid digital signature and submitting it by the court's deadline. If they fail to meet these requirements, the court may reject their filing or impose penalties."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 78",
|
||||
"section_title": "Admissibility of Electronic Evidence",
|
||||
"provision": "Notwithstanding any provision to the contrary, electronic evidence shall be admissible in judicial proceedings provided that such evidence is authenticated through a digital signature, or corroborated by a competent testimony that verifiably establishes its integrity and relevance to the matter in issue. The court may, in its discretion, require the production of the original electronic device or system from which the evidence is derived to determine its authenticity, unless such requirement is waived by mutual consent of the parties involved.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must a party take to ensure that electronic evidence is admissible in judicial proceedings according to IEA 78?",
|
||||
"answer": "To ensure that electronic evidence is admissible under IEA 78, a party must authenticate the evidence either through a digital signature or by providing corroborating testimony from a competent witness that verifies the evidence's integrity and relevance to the case. Additionally, the party may need to produce the original electronic device or system from which the evidence was obtained, unless this requirement is waived by mutual consent of the parties involved."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 67A",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any provisions to the contrary, any electronic record or digital evidence shall be admissible in a court of law, provided that the party seeking to introduce such evidence demonstrates the authenticity and integrity of the record through a reliable digital signature or encryption method. The court may, at its discretion, require further corroborative evidence to substantiate the reliability of the digital evidence presented, ensuring that the probative value outweighs any potential prejudicial effect.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must a party take to ensure the admissibility of digital evidence in court according to IEA 67A?",
|
||||
"answer": "To ensure the admissibility of digital evidence in court under IEA 67A, the party seeking to introduce the evidence must demonstrate the authenticity and integrity of the electronic record by using a reliable digital signature or encryption method. Additionally, the court may require further corroborative evidence to confirm the reliability of the digital evidence, ensuring that its probative value outweighs any potential prejudicial effect."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 798",
|
||||
"section_title": "Protection of Traditional Knowledge",
|
||||
"provision": "Any person who, without lawful authority, uses, reproduces, or distributes traditional knowledge as defined under this Act, shall be liable for infringement of intellectual property rights. Such traditional knowledge shall include but not be limited to, cultural practices, medicinal formulations, or agricultural methods passed down through generations within indigenous communities. The affected community shall have the right to seek remedies, including injunctions and damages, in accordance with the provisions set forth in this section.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of actions that would infringe on traditional knowledge as per IPC 798?",
|
||||
"answer": "Yes, examples of actions that would infringe on traditional knowledge under IPC 798 include:"
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Protection of Unregistered Trade Secrets",
|
||||
"provision": "Whosoever, in the course of trade or business, unlawfully discloses or uses a trade secret or confidential commercial information obtained through breach of a duty of confidentiality, shall be punishable with imprisonment for a term which may extend to three years or with fine, or with both. For the purposes of this section, \"trade secret\" shall mean any formula, pattern, compilation, program, device, method, technique, or process that derives independent economic value from not being generally known to or readily accessible by others who can obtain economic value from its disclosure or use. The burden of proof regarding the confidentiality of such information shall lie upon the claimant.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations do individuals have regarding the disclosure of trade secrets under IPC 123A?",
|
||||
"answer": "Individuals are obligated not to unlawfully disclose or use any trade secret or confidential commercial information that they have obtained through a breach of a duty of confidentiality. If they fail to uphold this obligation, they may face penalties including imprisonment for up to three years, a fine, or both. Additionally, the claimant has the burden of proof to demonstrate that the information in question is confidential."
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 102",
|
||||
"section_title": "Rights of Inheritance for Female Heirs",
|
||||
"provision": "In the event of the demise of a male intestate, female heirs, including daughters and widows, shall have an equal right to inherit the estate of the deceased on par with male heirs. The distribution of such inheritance shall be executed in accordance with the principles of equitable division, ensuring that each female heir receives a share that is not less than one-fourth of the total estate, unless expressly disclaimed by the heir in a legally binding written document. This section aims to uphold gender equality in matters of familial succession and inheritance rights under Hindu, Muslim, and other applicable personal laws in India.",
|
||||
"question_type": "penalty",
|
||||
"question": "What are the penalties for failing to adhere to the inheritance rights outlined in FLA 102 regarding female heirs?",
|
||||
"answer": "While FLA 102 does not specify penalties within the provision itself, failure to comply with the equitable division of the estate as mandated can lead to legal action by female heirs. This may result in the court enforcing the rightful distribution of the estate, which could include the imposition of fines or other sanctions against the estate’s executors or those responsible for the distribution, as determined by the applicable legal framework."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Protection of Indigenous Knowledge and Cultural Expressions",
|
||||
"provision": "Any person who unlawfully appropriates, reproduces, or disseminates indigenous knowledge or cultural expressions without the prior consent of the indigenous community shall be liable for infringement of intellectual property rights under this section. The aggrieved indigenous community may seek remedies including injunctions and damages, and the court shall consider the cultural significance and traditional practices associated with such knowledge in its deliberations. This provision aims to safeguard the heritage and intellectual contributions of indigenous communities against unauthorized exploitation.",
|
||||
"question_type": "penalty",
|
||||
"question": "What penalties can a person face for unlawfully appropriating indigenous knowledge or cultural expressions under IPC 123A?",
|
||||
"answer": "A person who unlawfully appropriates, reproduces, or disseminates indigenous knowledge or cultural expressions without the prior consent of the indigenous community may be liable for infringement of intellectual property rights. The aggrieved indigenous community can seek remedies such as injunctions to prevent further infringement and damages for any losses incurred. The court will also consider the cultural significance and traditional practices associated with the knowledge in its decisions."
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 101",
|
||||
"section_title": "Rights of Inheritance Among Hindu Succession",
|
||||
"provision": "In the event of the demise of a Hindu individual, the property held by such individual, whether ancestral or self-acquired, shall devolve upon their legal heirs as defined under this Act, in accordance with the principles of equal partition among male and female heirs. The widow and children of the deceased shall inherit a minimum of one-third of the total estate, notwithstanding any prior testamentary disposition made by the deceased, unless expressly waived in writing by the heirs prior to the individual's death. The provisions herein shall apply irrespective of the religious or customary practices governing succession, aiming to uphold gender equality in inheritance rights.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do the widow and children of a deceased Hindu individual have regarding inheritance under the Hindu Succession Act?",
|
||||
"answer": "The widow and children of a deceased Hindu individual are entitled to inherit a minimum of one-third of the total estate, regardless of any prior testamentary disposition made by the deceased. This right is upheld under the Act to ensure gender equality in inheritance, and it applies to both ancestral and self-acquired property."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 420B",
|
||||
"section_title": "Fraudulent Misrepresentation in Commercial Transactions",
|
||||
"provision": "Whosoever, with intent to deceive, misrepresents a material fact regarding goods or services in the course of any commercial transaction, thereby causing financial loss to another party, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. Explanation: For the purposes of this section, \"material fact\" shall mean any fact that, if known, would likely affect the decision of a reasonable person to enter into the transaction.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions under IPC 420B where a party may not be held liable for fraudulent misrepresentation in commercial transactions?",
|
||||
"answer": "Yes, an exception under IPC 420B may apply if the misrepresentation was made without the intent to deceive, such as in cases where the party genuinely believed the information provided to be true, or if the misrepresentation pertains to opinions or predictions rather than material facts. Additionally, if the party can demonstrate that the other party had prior knowledge of the facts or waived their right to rely on the misrepresentation, liability may not be established under this section."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 432",
|
||||
"section_title": "Protection of Trade Secrets and Confidential Information",
|
||||
"provision": "Whoever unlawfully discloses, acquires, or uses a trade secret or any confidential information belonging to another party, without the express consent of the owner, shall be punishable with imprisonment for a term which may extend to three years, or with fine, or with both. For the purposes of this section, \"trade secret\" shall include any formula, practice, process, design, instrument, pattern, or compilation of information that is not generally known or reasonably ascertainable by others and that provides a competitive advantage to the owner. The provisions of this section shall not apply to disclosures made under compulsion of law or in the course of legitimate business practices.",
|
||||
"question_type": "exceptions",
|
||||
"question": "What are the exceptions to the provisions of IPC 432 regarding the unlawful disclosure of trade secrets and confidential information?",
|
||||
"answer": "The provisions of IPC 432 do not apply to disclosures made under compulsion of law or in the course of legitimate business practices. This means that if a person is legally required to disclose trade secrets or if the disclosure occurs as part of lawful business operations, they are exempt from the penalties outlined in this section."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Rights Pertaining to Inherited Property",
|
||||
"provision": "In cases where property is inherited, any disputes arising among heirs regarding the rightful ownership, partition, or claim over such property shall be resolved in accordance with the principles of ancestral succession as defined under this Code. Any party claiming a right to the inherited property must provide substantial evidence of lineage and lawful entitlement, failing which the claim shall be deemed invalid. Furthermore, the court shall have the authority to appoint a mediator to facilitate negotiation among parties prior to adjudication, encouraging amicable settlements while safeguarding the rights of all claimants.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do heirs have regarding inherited property disputes under IPC 123A?",
|
||||
"answer": "Heirs have the right to resolve disputes over inherited property ownership, partition, or claims according to the principles of ancestral succession. However, any heir claiming a right to the property must provide substantial evidence of their lineage and lawful entitlement. If they fail to do so, their claim will be considered invalid. Additionally, the court can appoint a mediator to help facilitate negotiations among the parties, promoting amicable settlements while protecting the rights of all claimants."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 145",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any provision to the contrary, digital evidence shall be admissible in any judicial proceeding if it is accompanied by a certificate of authenticity issued by a competent authority, attesting to the integrity, reliability, and original source of the data. The court shall evaluate the probative value of such evidence, taking into consideration the methods of collection, preservation, and transmission, along with any potential alterations, before determining its admissibility. In cases where digital evidence is presented, the burden of proof shall rest upon the party introducing such evidence to establish its authenticity and relevance.",
|
||||
"question_type": "definition",
|
||||
"question": "What is required for digital evidence to be admissible in judicial proceedings according to IEA 145?",
|
||||
"answer": "Digital evidence is admissible in judicial proceedings if it is accompanied by a certificate of authenticity issued by a competent authority, which attests to the integrity, reliability, and original source of the data. Additionally, the court will evaluate its probative value considering the methods of collection, preservation, and transmission, as well as any potential alterations, and the party introducing the evidence must prove its authenticity and relevance."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 509A",
|
||||
"section_title": "Criminal Intimidation by Means of Digital Platforms",
|
||||
"provision": "Whosoever, by means of any electronic, digital, or computer-based communication, threatens or causes harm to any person, including but not limited to threats of violence, coercion, or defamation, shall be punished with imprisonment for a term which may extend to three years, or with fine, or with both. In the case of aggravated circumstances, such as the use of multiple accounts or persistent harassment, the punishment may extend to five years of imprisonment. The provisions of this section shall be in addition to any other applicable laws concerning harassment or intimidation.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of actions that would be considered criminal intimidation under IPC 509A?",
|
||||
"answer": "Yes, several actions can be considered criminal intimidation under IPC 509A. For instance, if an individual sends threatening messages via social media platforms, such as threatening physical harm or coercing someone into doing something against their will, this would qualify. Additionally, if someone uses multiple online accounts to continuously harass another person by spreading false information or defamatory statements about them, this would also fall under the provisions of IPC 509A. Lastly, if a person creates a fake profile to intimidate or threaten someone, this too would be punishable under this section."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 471A",
|
||||
"section_title": "Protection of Trade Secrets",
|
||||
"provision": "Whoever unlawfully obtains, discloses, or uses a trade secret, knowing or having reason to know that such information was obtained through improper means, shall be punishable with imprisonment for a term which may extend to three years, or with fine which may extend to five lakh rupees, or with both. A trade secret shall be defined as any formula, pattern, compilation, program, device, method, technique, or process that derives independent economic value from not being generally known or readily ascertainable to the public, and is the subject of reasonable efforts to maintain its secrecy.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions under IPC 471A for disclosing or using trade secrets if the information is obtained through lawful means?",
|
||||
"answer": "Yes, IPC 471A pertains specifically to the unlawful obtaining, disclosing, or using of trade secrets. If a person acquires a trade secret through lawful means, such as independent discovery or legitimate access, they would not be punishable under this provision. Additionally, if the trade secret becomes publicly known or is disclosed as a result of legal obligations, such as during a court proceeding, those actions may also fall outside the scope of punishment under this section."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 65A",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding any provision to the contrary, digital evidence, including but not limited to electronic records, audio and video files, and data stored in digital devices, shall be admissible in any proceedings if such evidence is accompanied by a certificate from a competent authority confirming the integrity and authenticity of the data. The court may, however, require additional corroborative evidence to substantiate the claims made through such digital materials, ensuring that the principles of fairness and justice are upheld.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of digital evidence that would be admissible in court under IEA 65A if accompanied by the proper certification?",
|
||||
"answer": "Yes, examples of digital evidence that would be admissible in court under IEA 65A include electronic records such as emails or digital contracts, audio files like recorded conversations relevant to the case, video files such as surveillance footage, and data stored on digital devices like smartphones or computers, provided that each piece of evidence is accompanied by a certificate from a competent authority verifying its integrity and authenticity. The court may still request additional corroborative evidence to ensure fairness in the proceedings."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 507A",
|
||||
"section_title": "Unauthorized Access and Data Breach",
|
||||
"provision": "Whoever, without lawful authority or consent, intentionally accesses a computer system or network and obtains, alters, or deletes data shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. In case the unauthorized access results in harm or loss to any person or entity, the term of imprisonment may extend to five years, along with a fine which may be determined by the court based on the gravity of the harm caused.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do individuals have if they are victims of unauthorized access and data breaches under IPC 507A?",
|
||||
"answer": "Individuals who are victims of unauthorized access and data breaches have the right to seek legal recourse against the perpetrator. They can report the incident to law enforcement, and if the unauthorized access results in harm or loss, they have the right to pursue compensation for damages through the court system. Additionally, they can expect that the law provides for penalties against the offender, which may include imprisonment and fines, thereby reinforcing their rights to safety and protection of their personal data."
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 123",
|
||||
"section_title": "Rights of Inheritance Among Heirs",
|
||||
"provision": "In cases of intestate succession, all heirs shall inherit the estate of the deceased in accordance with the principles of equitable distribution, wherein the surviving spouse shall receive one-half of the estate, while the remaining half shall be divided equally among the legitimate children. In the absence of legitimate children, the estate shall pass to the surviving parents, and if none exist, to the siblings in equal shares. The court shall ensure that the rights of all heirs are protected, preventing any testamentary disposition that contravenes the provisions set forth herein.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the equitable distribution of the estate among heirs as outlined in FLA 123?",
|
||||
"answer": "Yes, exceptions exist where a testamentary disposition may override the standard distribution if it is legally valid and does not contravene the protections established in FLA 123. Additionally, if the deceased has left behind a valid will that specifies different distributions, those instructions may take precedence over intestate succession rules, provided they comply with relevant legal standards. However, the court will still ensure that no such disposition infringes on the rights of the heirs as defined in the provision."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 482",
|
||||
"section_title": "Rights of Co-Owners in Joint Property",
|
||||
"provision": "In any case where two or more persons are co-owners of an immovable property, no co-owner shall alienate their share of the property without the consent of the other co-owners, unless otherwise stipulated by a prior agreement. In the event of a dispute regarding the use or management of the joint property, any co-owner may apply to the appropriate civil court for a partition of the property, which shall be conducted in accordance with the principles of equity and justice, ensuring that the rights of all parties are duly considered.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligation do co-owners of immovable property have regarding the alienation of their shares according to IPC 482?",
|
||||
"answer": "Co-owners of immovable property are obligated not to alienate their share without the consent of the other co-owners, unless there is a prior agreement that stipulates otherwise."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 75",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Digital evidence shall be admissible in any judicial proceedings if it is authenticated by the party presenting it, demonstrating its integrity and reliability. The court shall consider the methods of collection, preservation, and presentation of such evidence, and may require corroboration from independent sources when the authenticity is contested. Any digital evidence obtained in violation of fundamental rights as enshrined in the Constitution shall be deemed inadmissible.",
|
||||
"question_type": "definition",
|
||||
"question": "What is the criterion for the admissibility of digital evidence in judicial proceedings according to IEA 75?",
|
||||
"answer": "Digital evidence is admissible in judicial proceedings if it is authenticated by the presenting party, demonstrating its integrity and reliability, while the court considers the methods of collection, preservation, and presentation. Additionally, such evidence must not violate fundamental rights as outlined in the Constitution, or it will be deemed inadmissible."
|
||||
},
|
||||
{
|
||||
"section_number": "CTP 101",
|
||||
"section_title": "Protection of Fundamental Rights",
|
||||
"provision": "Every individual shall have the right to seek redress for any violation of their fundamental rights as enshrined in the Constitution, through an expedited process in the appropriate constitutional court. The court shall ensure that any infringement of these rights is addressed promptly and judiciously, and may grant interim relief to safeguard the affected individual's rights during the pendency of the proceedings. Furthermore, any public authority found to have acted in contravention of these rights shall be liable to compensate the aggrieved party, as determined by the court.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide an example of a situation where an individual might seek redress for a violation of their fundamental rights under CTP 101?",
|
||||
"answer": "An example of such a situation could be if a government agency unlawfully detains an individual without due process, violating their right to liberty. The individual can seek redress in the appropriate constitutional court under CTP 101. They may file a petition to have their detention reviewed and potentially obtain interim relief, such as being released from detention while the case is pending. If the court finds that their fundamental rights were indeed violated, the government agency may be ordered to compensate the individual for the unlawful detention."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 405",
|
||||
"section_title": "Case Management Conference",
|
||||
"provision": "The Court shall, upon the filing of the first written statement or counterclaim, schedule a Case Management Conference within thirty days to facilitate the expeditious resolution of disputes. At this conference, the parties shall be required to outline their claims and defenses, discuss the possibility of settlement, and establish a timeline for the exchange of evidence and subsequent proceedings, ensuring that the principles of justice and efficiency are upheld. Non-compliance with the directives issued during the conference may result in the imposition of sanctions as deemed appropriate by the Court.",
|
||||
"question_type": "definition",
|
||||
"question": "What is a Case Management Conference as defined in CPC 405?",
|
||||
"answer": "A Case Management Conference is a court-scheduled meeting that occurs within thirty days of filing the first written statement or counterclaim, aimed at facilitating the expeditious resolution of disputes. During this conference, parties outline their claims and defenses, discuss settlement possibilities, and establish a timeline for evidence exchange and further proceedings, with the goal of upholding justice and efficiency."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 543",
|
||||
"section_title": "Offense of Cyber Harassment",
|
||||
"provision": "Whosoever, by means of a computer resource or communication device, intentionally engages in conduct that causes harm, alarm, or distress to another person, including but not limited to the transmission of offensive messages, threats, or repeated unwanted communications, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. In the case of a subsequent offense under this section, the term of imprisonment may extend to five years.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations do individuals have under IPC 543 regarding the use of computer resources or communication devices to avoid cyber harassment?",
|
||||
"answer": "Individuals are obligated to refrain from intentionally engaging in conduct that could cause harm, alarm, or distress to others through the use of computer resources or communication devices. This includes avoiding the transmission of offensive messages, threats, or repeated unwanted communications, as doing so may result in legal consequences, including imprisonment or fines."
|
||||
},
|
||||
{
|
||||
"section_number": "PRD 102",
|
||||
"section_title": "Rights and Remedies in Property Disputes",
|
||||
"provision": "In any dispute concerning immovable property, the aggrieved party may file a complaint before the designated Property Dispute Tribunal, which shall have exclusive jurisdiction to adjudicate such matters. The Tribunal shall issue a preliminary order within fifteen days of receiving the complaint, and if necessary, appoint a local commissioner to inspect the property and submit a report, thereby ensuring swift resolution and enforcement of rights. Any party dissatisfied with the Tribunal's decision may appeal to the High Court within sixty days from the date of the order, provided that the appeal is accompanied by a certified copy of the original order.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights does an aggrieved party have in a property dispute according to PRD 102?",
|
||||
"answer": "An aggrieved party in a property dispute has the right to file a complaint before the designated Property Dispute Tribunal, which has exclusive jurisdiction over such matters. They are entitled to a preliminary order within fifteen days and may have a local commissioner appointed for property inspection if necessary. Additionally, if they are dissatisfied with the Tribunal's decision, they have the right to appeal to the High Court within sixty days, provided they include a certified copy of the original order."
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 202",
|
||||
"section_title": "Inheritance Rights of Children Born Out of Wedlock",
|
||||
"provision": "Notwithstanding any other law to the contrary, a child born out of wedlock shall have the same rights of inheritance as a legitimate child in the estate of the biological parents, provided that paternity is established through a legally recognized process. The child shall have the right to claim a share in the ancestral property of the biological father's family, subject to the provisions of the Hindu Succession Act, 1956, or the applicable personal law of the parents. Any clause in a will or testament that seeks to exclude such a child from inheritance based solely on their illegitimacy shall be deemed void and unenforceable.",
|
||||
"question_type": "obligations",
|
||||
"question": "What obligations do biological parents have regarding the inheritance rights of a child born out of wedlock according to FLA 202?",
|
||||
"answer": "Biological parents are obligated to ensure that a child born out of wedlock is granted the same inheritance rights as a legitimate child, provided that paternity is established through a legally recognized process. This includes the obligation to allow the child to claim a share in the ancestral property of the biological father's family, and any will or testament that attempts to exclude the child based solely on their illegitimacy is rendered void and unenforceable."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 890",
|
||||
"section_title": "Protection of Traditional Knowledge",
|
||||
"provision": "Any person who utilizes traditional knowledge for commercial gain without the explicit consent of the community possessing such knowledge shall be liable for infringement of intellectual property rights. The aggrieved community may seek redress through civil courts for remedies including injunctions, damages, and the recognition of their rights as custodians of such knowledge. This provision aims to safeguard the cultural heritage of indigenous populations against unauthorized appropriation and exploitation.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do communities have under IPC 890 regarding the use of their traditional knowledge for commercial purposes?",
|
||||
"answer": "Communities have the right to give explicit consent before their traditional knowledge is used for commercial gain. If their knowledge is utilized without consent, they can seek redress in civil courts for remedies such as injunctions, damages, and recognition of their rights as custodians of that knowledge, thereby protecting their cultural heritage from unauthorized appropriation and exploitation."
|
||||
},
|
||||
{
|
||||
"section_number": "FLA 202",
|
||||
"section_title": "Rights of Inheritance in Hindu Joint Families",
|
||||
"provision": "In any Hindu joint family, the property acquired by any member through self-acquisition shall devolve upon all coparceners equally upon the demise of the said member, unless a valid testamentary instrument expressly disposes of such property. Furthermore, any coparcener may renounce their right to inherit by a written declaration made in the presence of two witnesses, thereby forfeiting their claim to such property in favor of the remaining coparceners. The provisions of this section shall apply notwithstanding any customary practices that may contravene the equal sharing of self-acquired property within the familial structure.",
|
||||
"question_type": "obligations",
|
||||
"question": "What are the obligations of a coparcener in a Hindu joint family regarding the inheritance of self-acquired property upon the demise of a member?",
|
||||
"answer": "Upon the demise of a member in a Hindu joint family, the obligation of all coparceners is to equally share the self-acquired property of the deceased member, unless there is a valid testamentary instrument that specifies a different distribution. Additionally, any coparcener has the obligation to formally renounce their right to inherit by providing a written declaration in the presence of two witnesses, which will forfeit their claim to the property in favor of the remaining coparceners."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Rights of Property Co-Owners and Dispute Resolution",
|
||||
"provision": "In instances where two or more individuals hold co-ownership of a property, any co-owner shall possess the right to access and utilize the entire property, subject to fair usage provisions. In the event of a dispute arising from the use, management, or any aspect of the shared property, the aggrieved co-owner may file a complaint with the Jurisdictional Property Dispute Tribunal, which shall convene a mediation session within fifteen days and issue a binding resolution within sixty days from the date of the complaint. Failure to comply with the Tribunal's resolution may result in penalties or execution of partition proceedings as prescribed under this Act.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do co-owners of a property have under IPC 123A regarding access and dispute resolution?",
|
||||
"answer": "Under IPC 123A, co-owners have the right to access and utilize the entire property, as long as they adhere to fair usage provisions. If a dispute arises concerning the use or management of the property, any aggrieved co-owner has the right to file a complaint with the Jurisdictional Property Dispute Tribunal, which must convene a mediation session within fifteen days and issue a binding resolution within sixty days. Failure to comply with the Tribunal's resolution may lead to penalties or partition proceedings."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 512",
|
||||
"section_title": "Causing Harm through Deceptive Practices",
|
||||
"provision": "Whoever, by means of false representations or fraudulent acts, induces any person to part with property, or to confer any benefit, shall be punished with imprisonment for a term which may extend to three years, or with fine which may extend to fifty thousand rupees, or with both. In cases where the deception results in substantial financial loss to the victim, the imprisonment may extend to five years, and the fine may increase to one lakh rupees. This provision aims to penalize not only the act of deception but also to ensure reparation to the aggrieved party.",
|
||||
"question_type": "penalty",
|
||||
"question": "What are the potential penalties for causing harm through deceptive practices under IPC 512?",
|
||||
"answer": "Under IPC 512, the penalties for causing harm through deceptive practices can include imprisonment for a term of up to three years, a fine of up to fifty thousand rupees, or both. If the deception results in substantial financial loss to the victim, the imprisonment may extend to five years, and the fine may increase to one lakh rupees."
|
||||
},
|
||||
{
|
||||
"section_number": "C.R.P. 102",
|
||||
"section_title": "Right to Dignity and Personal Autonomy",
|
||||
"provision": "Every individual shall have the right to live with dignity and personal autonomy, free from discrimination, oppression, or arbitrary interference by the State or any other person. The State shall take all necessary measures to ensure that this right is protected, promoted, and fulfilled, including the implementation of laws that prevent violations of personal autonomy and safeguard against indignities. Any infringement of this right shall be actionable in a competent court, entailing appropriate remedies and compensation for the aggrieved party.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide examples of situations where the right to dignity and personal autonomy, as outlined in C.R.P. 102, might be violated?",
|
||||
"answer": "Yes, there are several situations that could violate the right to dignity and personal autonomy. For instance, if a government imposes a law that restricts individuals' freedoms to make personal choices regarding their healthcare, such as mandating a specific medical treatment against their will, this would infringe upon their autonomy. Similarly, if an employer discriminates against an employee based on their gender, sexual orientation, or race, denying them equal opportunities or subjecting them to harassment, this would violate their right to dignity. Another example could be a scenario where law enforcement unlawfully detains an individual without cause, thereby interfering with their personal freedom and dignity. In each of these cases, the affected individuals could seek remedies in a competent court for the infringements they have suffered."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 456",
|
||||
"section_title": "Trespass with Intent to Commit an Offense",
|
||||
"provision": "Whoever unlawfully enters into or remains in any building or enclosed space with the intent to commit theft, causing harm, or any other punishable offense shall be liable to imprisonment for a term which may extend to three years, or with fine, or with both. If the trespasser is armed with a deadly weapon during such entry, the term of imprisonment may extend to seven years.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps should law enforcement take when apprehending a suspect under IPC 456 for trespass with intent to commit an offense?",
|
||||
"answer": "Law enforcement should first ensure the safety of the area by assessing if the suspect is armed. If the suspect is armed with a deadly weapon, they should call for backup and use appropriate protocols for armed confrontations. Once the area is secure, officers should identify and detain the suspect, ensuring that they read the suspect their rights. Evidence should be collected, including witness statements and any items related to the offense. The suspect should then be arrested and charged under IPC 456, with the specifics of the intent to commit theft or harm documented for prosecution. Finally, the suspect should be transported to the nearest police station for processing and further legal proceedings."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 501A",
|
||||
"section_title": "Protection of Constitutional Rights",
|
||||
"provision": "Every individual shall have the right to seek legal recourse against any act or omission of the State or any public authority that infringes upon their fundamental rights as guaranteed under Part III of the Constitution of India. The courts shall have the power to issue writs, orders, or directions to enforce such rights, and any violation thereof shall be deemed a punishable offense, attracting imprisonment for a term which may extend to three years, or a fine, or both. This section shall not preclude any individual from seeking compensation for damages arising from violations of their constitutional rights.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights does IPC 501A provide to individuals regarding violations of their fundamental rights by the State or public authorities?",
|
||||
"answer": "IPC 501A grants every individual the right to seek legal recourse against any act or omission by the State or public authorities that infringes upon their fundamental rights as guaranteed under Part III of the Constitution of India. This includes the ability to request courts to issue writs, orders, or directions to enforce these rights, and individuals can also seek compensation for damages resulting from such violations."
|
||||
},
|
||||
{
|
||||
"section_number": "CPL 204",
|
||||
"section_title": "Remedies for Breach of Contract",
|
||||
"provision": "In the event of a breach of contract, the aggrieved party shall be entitled to seek specific performance or, where specific performance is impracticable, claim damages sufficient to restore the party to the position they would have occupied had the contract been performed. The aggrieved party may elect to pursue any combination of equitable remedies, including injunctions to prevent further breaches, provided that such remedies are sought within a period of three years from the date of the breach. Furthermore, in cases of willful or gross negligence leading to breach, the court may award punitive damages, not exceeding two times the actual damages incurred.",
|
||||
"question_type": "definition",
|
||||
"question": "What are the remedies available to an aggrieved party in the event of a breach of contract according to CPL 204?",
|
||||
"answer": "According to CPL 204, the remedies available to an aggrieved party in the event of a breach of contract include seeking specific performance, claiming damages to restore their position as if the contract had been performed, pursuing a combination of equitable remedies such as injunctions to prevent further breaches, and in cases of willful or gross negligence, the possibility of receiving punitive damages not exceeding two times the actual damages incurred. These remedies must be sought within three years from the date of the breach."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 123A",
|
||||
"section_title": "Consolidation of Suits",
|
||||
"provision": "In any suit wherein multiple causes of action arise from the same transaction or series of transactions, the Court may, upon application by any party, direct the consolidation of such suits into a single proceeding. The Court shall consider the interests of justice, the convenience of the parties, and the potential for judicial economy in making its determination. The consolidated suit shall proceed under the same procedural rules as a singular action, with all parties given adequate opportunity to present their respective claims and defenses.",
|
||||
"question_type": "examples",
|
||||
"question": "Can you provide an example of a situation where the Court might consolidate multiple suits under CPC 123A?",
|
||||
"answer": "Certainly! Imagine a scenario where a construction company is sued by multiple homeowners for damages caused by the same faulty product used in their homes. Each homeowner files a separate suit against the company, claiming similar damages due to the defective product. In this case, the Court may allow the consolidation of these suits into a single proceeding because all claims arise from the same transaction—the use of the faulty product. The Court would consider factors such as the interests of justice, the convenience for the homeowners and the construction company, and the potential for reducing judicial resources. This way, the consolidated suit can be handled more efficiently under the same procedural rules, allowing all parties to present their claims and defenses together."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 145",
|
||||
"section_title": "Summary Dismissal of Frivolous Claims",
|
||||
"provision": "The Court shall have the authority to summarily dismiss any civil claim or application that it deems to be frivolous, vexatious, or intended solely to harass the opposing party. The Court, upon motion by the defendant, may conduct a preliminary hearing to ascertain the merits of the claim and, if satisfied that the claim lacks substance or is manifestly unjust, shall issue an order dismissing the claim with costs awarded to the defendant. This provision shall not preclude the Court from imposing penalties for abuse of process as deemed appropriate.",
|
||||
"question_type": "rights",
|
||||
"question": "What rights do defendants have under CPC 145 regarding frivolous claims made against them?",
|
||||
"answer": "Under CPC 145, defendants have the right to request a preliminary hearing to assess the merits of a civil claim they believe to be frivolous or vexatious. If the Court finds that the claim lacks substance or is manifestly unjust, it can dismiss the claim and award costs to the defendant. Additionally, the Court has the authority to impose penalties for any abuse of process, further protecting the defendant's rights against harassing legal actions."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 123A",
|
||||
"section_title": "Remedies for Breach of Contract",
|
||||
"provision": "In the event of a breach of contract, the aggrieved party shall be entitled to seek either specific performance of the contract or, in lieu thereof, claim damages which shall be quantified based on the actual loss suffered as a direct result of the breach. The court may also grant consequential damages if it is proven that such damages were foreseeable at the time of contract formation. Additionally, if the breach is willful and malicious, the court may impose punitive damages to deter further violations.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the entitlement of the aggrieved party to seek specific performance or claim damages for a breach of contract under IPC 123A?",
|
||||
"answer": "Yes, exceptions may arise in cases where the breach was not willful or malicious, thereby limiting the possibility of punitive damages. Additionally, if the aggrieved party fails to prove that the consequential damages were foreseeable at the time of contract formation, they may not be entitled to such damages. Furthermore, specific performance may not be granted if it is deemed impractical or impossible to enforce the terms of the contract."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 224",
|
||||
"section_title": "Procedure for Electronic Filing of Civil Suits",
|
||||
"provision": "In any civil proceedings filed before the Court, a party may submit documents and pleadings electronically through the designated digital platform, provided that such filings comply with the prescribed format and electronic signature requirements as established by the Supreme Court of India. The electronic filing shall be deemed equivalent to the physical submission of documents, and the Court shall issue an electronic acknowledgment of receipt, which shall serve as the official record of submission. Any discrepancies in the electronic filing shall be rectified within seven days of notice from the Court, failing which the Court may dismiss the application without prejudice to the party's right to refile.",
|
||||
"question_type": "penalty",
|
||||
"question": "What are the potential penalties for failing to rectify discrepancies in electronic filings within the specified time frame as per CPC 224?",
|
||||
"answer": "If a party fails to rectify discrepancies in their electronic filing within seven days of receiving notice from the Court, the Court may dismiss the application. However, this dismissal is without prejudice, meaning the party retains the right to refile the application in the future."
|
||||
},
|
||||
{
|
||||
"section_number": "IEA 102A",
|
||||
"section_title": "Admissibility of Digital Evidence",
|
||||
"provision": "Notwithstanding the provisions of Section 65B of the Indian Evidence Act, 1872, any digital evidence, including but not limited to data derived from electronic devices, shall be admissible in a court of law provided that the party seeking to introduce such evidence establishes its authenticity through a certified digital signature or a chain of custody that clearly delineates the handling of the evidence from the time of its creation to presentation in court. The court shall also consider the relevance of the evidence in relation to the facts of the case and may exclude it if it is deemed to be unfairly prejudicial, misleading, or if its probative value is substantially outweighed by the danger of confusion of the issues.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must a party take to ensure that digital evidence is admissible in court according to IEA 102A?",
|
||||
"answer": "To ensure that digital evidence is admissible in court under IEA 102A, the party seeking to introduce the evidence must establish its authenticity by providing either a certified digital signature or a clear chain of custody. This chain of custody must detail the handling of the evidence from the time of its creation to its presentation in court. Additionally, the court will assess the relevance of the evidence to the case and may exclude it if it is found to be unfairly prejudicial, misleading, or if its probative value is substantially outweighed by the potential for confusion regarding the issues."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 192A",
|
||||
"section_title": "Conduct of Preliminary Hearings",
|
||||
"provision": "In all civil matters, the court shall conduct a preliminary hearing within thirty days of the filing of the plaint. During this hearing, the court shall ascertain the issues raised, determine the necessity of further pleadings, and establish a timeline for the conduct of the trial. The court may also encourage the parties to explore alternative dispute resolution mechanisms, including mediation or conciliation, prior to proceeding with the formal trial process.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the requirement for the court to conduct a preliminary hearing within thirty days of the filing of the plaint in civil matters under CPC 192A?",
|
||||
"answer": "Yes, exceptions may apply in cases where the court determines that special circumstances exist, such as complex issues requiring additional time for proper assessment, or if the parties have mutually agreed to postpone the preliminary hearing for valid reasons. Additionally, if there are procedural delays or if the court's schedule does not allow for a hearing within the stipulated timeframe, these may also constitute exceptions to the requirement."
|
||||
},
|
||||
{
|
||||
"section_number": "CPC 123A",
|
||||
"section_title": "Interim Relief in Civil Proceedings",
|
||||
"provision": "In any suit pending before the Court, the plaintiff may apply for interim relief, including but not limited to the issuance of a temporary injunction or a stay of proceedings, if it is demonstrated that the delay in granting such relief would cause irreparable harm to the applicant. The Court shall consider the balance of convenience between the parties and the likelihood of success on the merits of the case before granting any interim orders. Such relief may be granted for a period not exceeding six months, subject to renewal upon satisfactory demonstration of continued necessity.",
|
||||
"question_type": "exceptions",
|
||||
"question": "Are there any exceptions to the granting of interim relief under CPC 123A, and what factors must be considered by the Court in such cases?",
|
||||
"answer": "Yes, there are exceptions to the granting of interim relief under CPC 123A. The Court will only grant such relief if the plaintiff demonstrates that a delay would cause irreparable harm and considers the balance of convenience between the parties as well as the likelihood of success on the merits of the case. If these conditions are not satisfactorily met, the Court may deny the application for interim relief."
|
||||
},
|
||||
{
|
||||
"section_number": "IPC 132A",
|
||||
"section_title": "Protection of Innovations in Traditional Knowledge",
|
||||
"provision": "Any individual or entity that seeks to utilize traditional knowledge or practices that have been developed and passed down through generations within a specific community shall obtain prior informed consent from the relevant community. Failure to do so shall constitute an infringement of the intellectual property rights of the community, rendering the infringer liable for damages not less than one lakh rupees and up to five times the profits derived from such unauthorized use. Additionally, courts may impose injunctions to prevent further exploitation of the said traditional knowledge.",
|
||||
"question_type": "procedure",
|
||||
"question": "What steps must an individual or entity take to legally utilize traditional knowledge according to IPC 132A?",
|
||||
"answer": "To legally utilize traditional knowledge, an individual or entity must first obtain prior informed consent from the relevant community that holds the traditional knowledge. This involves engaging with the community to explain the intended use and ensuring that they fully understand and agree to it. Failure to obtain this consent may lead to legal consequences, including liability for damages and potential injunctions against further exploitation."
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,229 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2714fa36",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Week 3 Data Generator With Opensource Models\n",
|
||||
"# Generate synthetic data for Pizza cusromers within Nairobi "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "761622db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install requests pandas ipywidgets gradio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cc7347c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import gradio as gr\n",
|
||||
"from huggingface_hub import InferenceClient\n",
|
||||
"import random\n",
|
||||
"import os\n",
|
||||
"from dotenv import load_dotenv\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f20cd822",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Load API Key\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"HF_API_KEY = os.getenv('HF_TOKEN')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "856cd8cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# Define available models with correct Hugging Face model IDs\n",
|
||||
"MODELS = {\n",
|
||||
" \"Mistral-7B\": \"mistralai/Mistral-7B-Instruct-v0.2\",\n",
|
||||
" \"Llama-2-7B\": \"meta-llama/Llama-2-7b-chat-hf\",\n",
|
||||
" \"Phi-2\": \"microsoft/phi-2\",\n",
|
||||
" \"GPT-2\": \"gpt2\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Nairobi branches\n",
|
||||
"BRANCHES = [\"Westlands\", \"Karen\", \"Kilimani\", \"CBD\", \"Parklands\"]\n",
|
||||
"\n",
|
||||
"# Global variable to store generated data\n",
|
||||
"generated_df = None\n",
|
||||
"\n",
|
||||
"def generate_feedback_data(model_name, num_records):\n",
|
||||
" \"\"\"Generate synthetic pizza feedback data using selected AI model\"\"\"\n",
|
||||
" global generated_df\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" # Initialize the Hugging Face Inference Client\n",
|
||||
" model_id = MODELS[model_name]\n",
|
||||
" client = InferenceClient(model=model_id, token=None) # Add your HF token if needed\n",
|
||||
" \n",
|
||||
" feedback_data = []\n",
|
||||
" \n",
|
||||
" for i in range(num_records):\n",
|
||||
" # Random branch\n",
|
||||
" branch = random.choice(BRANCHES)\n",
|
||||
" \n",
|
||||
" # Generate feedback using the AI model\n",
|
||||
" prompt = f\"Generate a brief customer feedback comment about a pizza order from {branch} branch in Nairobi. Make it realistic and varied (positive, negative, or neutral). Keep it under 30 words.\"\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" response = client.text_generation(\n",
|
||||
" prompt,\n",
|
||||
" max_new_tokens=50,\n",
|
||||
" temperature=0.8\n",
|
||||
" )\n",
|
||||
" feedback = response.strip()\n",
|
||||
" except Exception as e:\n",
|
||||
" # Fallback to template-based generation if API fails\n",
|
||||
" feedback = generate_fallback_feedback(branch)\n",
|
||||
" \n",
|
||||
" # Generate other fields\n",
|
||||
" record = {\n",
|
||||
" \"Customer_ID\": f\"CUST{1000 + i}\",\n",
|
||||
" \"Branch\": branch,\n",
|
||||
" \"Rating\": random.randint(1, 5),\n",
|
||||
" \"Order_Type\": random.choice([\"Delivery\", \"Dine-in\", \"Takeaway\"]),\n",
|
||||
" \"Feedback\": feedback,\n",
|
||||
" \"Date\": f\"2024-{random.randint(1, 12):02d}-{random.randint(1, 28):02d}\"\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" feedback_data.append(record)\n",
|
||||
" \n",
|
||||
" # Create DataFrame\n",
|
||||
" generated_df = pd.DataFrame(feedback_data)\n",
|
||||
" \n",
|
||||
" return generated_df, f\"✓ Successfully generated {num_records} records using {model_name}\"\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" return pd.DataFrame(), f\"✗ Error: {str(e)}\"\n",
|
||||
"\n",
|
||||
"def generate_fallback_feedback(branch):\n",
|
||||
" \"\"\"Fallback feedback generator if API fails\"\"\"\n",
|
||||
" templates = [\n",
|
||||
" f\"Great pizza from {branch}! Quick delivery and hot food.\",\n",
|
||||
" f\"Pizza was cold when it arrived at {branch}. Disappointed.\",\n",
|
||||
" f\"Excellent service at {branch} branch. Will order again!\",\n",
|
||||
" f\"Average experience. Pizza was okay but nothing special.\",\n",
|
||||
" f\"Long wait time at {branch} but the pizza was worth it.\",\n",
|
||||
" ]\n",
|
||||
" return random.choice(templates)\n",
|
||||
"\n",
|
||||
"def download_csv():\n",
|
||||
" \"\"\"Save generated data as CSV\"\"\"\n",
|
||||
" global generated_df\n",
|
||||
" if generated_df is not None:\n",
|
||||
" generated_df.to_csv('pizza_feedback_data.csv', index=False)\n",
|
||||
" return \"CSV downloaded!\"\n",
|
||||
" return \"No data to download\"\n",
|
||||
"\n",
|
||||
"def download_json():\n",
|
||||
" \"\"\"Save generated data as JSON\"\"\"\n",
|
||||
" global generated_df\n",
|
||||
" if generated_df is not None:\n",
|
||||
" generated_df.to_json('pizza_feedback_data.json', orient='records', indent=2)\n",
|
||||
" return \"JSON downloaded!\"\n",
|
||||
" return \"No data to download\"\n",
|
||||
"\n",
|
||||
"# Create Gradio interface\n",
|
||||
"with gr.Blocks(title=\"Pizza Feedback Data Generator\") as demo:\n",
|
||||
" gr.Markdown(\"\"\"\n",
|
||||
" # 🍕 Pizza Feedback Data Generator\n",
|
||||
" Generate synthetic customer feedback for Nairobi pizza branches using AI models\n",
|
||||
" \"\"\")\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column():\n",
|
||||
" model_selector = gr.Radio(\n",
|
||||
" choices=list(MODELS.keys()),\n",
|
||||
" label=\"Select AI Model\",\n",
|
||||
" value=list(MODELS.keys())[0]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" num_records_slider = gr.Slider(\n",
|
||||
" minimum=1,\n",
|
||||
" maximum=50,\n",
|
||||
" value=10,\n",
|
||||
" step=1,\n",
|
||||
" label=\"Number of Records\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" generate_btn = gr.Button(\"Generate Feedback Data\", variant=\"primary\")\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" status_output = gr.Textbox(label=\"Status\", interactive=False)\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" dataframe_output = gr.Dataframe(\n",
|
||||
" label=\"Generated Feedback Data\",\n",
|
||||
" interactive=False\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with gr.Row():\n",
|
||||
" csv_btn = gr.Button(\"Download CSV\")\n",
|
||||
" json_btn = gr.Button(\"Download JSON\")\n",
|
||||
" \n",
|
||||
" # Event handlers\n",
|
||||
" generate_btn.click(\n",
|
||||
" fn=generate_feedback_data,\n",
|
||||
" inputs=[model_selector, num_records_slider],\n",
|
||||
" outputs=[dataframe_output, status_output]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" csv_btn.click(\n",
|
||||
" fn=download_csv,\n",
|
||||
" outputs=status_output\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" json_btn.click(\n",
|
||||
" fn=download_json,\n",
|
||||
" outputs=status_output\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# Launch the interface\n",
|
||||
"demo.launch()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
Customer_ID,Branch,Rating,Order_Type,Feedback,Date
|
||||
CUST1000,Westlands,1,Dine-in,Great pizza from Westlands! Quick delivery and hot food.,2024-10-17
|
||||
CUST1001,CBD,1,Takeaway,Excellent service at CBD branch. Will order again!,2024-11-24
|
||||
CUST1002,Kilimani,1,Delivery,Excellent service at Kilimani branch. Will order again!,2024-09-03
|
||||
CUST1003,Parklands,5,Takeaway,Great pizza from Parklands! Quick delivery and hot food.,2024-08-05
|
||||
CUST1004,Westlands,3,Delivery,Great pizza from Westlands! Quick delivery and hot food.,2024-01-12
|
||||
CUST1005,CBD,5,Delivery,Great pizza from CBD! Quick delivery and hot food.,2024-01-10
|
||||
CUST1006,Kilimani,1,Delivery,Long wait time at Kilimani but the pizza was worth it.,2024-09-12
|
||||
CUST1007,Parklands,2,Delivery,Great pizza from Parklands! Quick delivery and hot food.,2024-05-27
|
||||
CUST1008,Parklands,3,Dine-in,Excellent service at Parklands branch. Will order again!,2024-12-01
|
||||
CUST1009,CBD,1,Dine-in,Excellent service at CBD branch. Will order again!,2024-10-09
|
||||
CUST1010,Parklands,1,Takeaway,Average experience. Pizza was okay but nothing special.,2024-04-03
|
||||
CUST1011,Westlands,2,Dine-in,Pizza was cold when it arrived at Westlands. Disappointed.,2024-01-02
|
||||
CUST1012,Karen,2,Takeaway,Pizza was cold when it arrived at Karen. Disappointed.,2024-03-26
|
||||
CUST1013,Westlands,3,Dine-in,Long wait time at Westlands but the pizza was worth it.,2024-11-17
|
||||
CUST1014,Westlands,5,Takeaway,Average experience. Pizza was okay but nothing special.,2024-03-01
|
||||
CUST1015,Parklands,3,Delivery,Excellent service at Parklands branch. Will order again!,2024-03-18
|
||||
|
@@ -0,0 +1,498 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b8be8252",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!uv pip install pytest"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ba193fd5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import ast\n",
|
||||
"import sys\n",
|
||||
"import uuid\n",
|
||||
"import json\n",
|
||||
"import textwrap\n",
|
||||
"import subprocess\n",
|
||||
"from pathlib import Path\n",
|
||||
"from dataclasses import dataclass\n",
|
||||
"from typing import List, Protocol, Tuple, Dict, Optional\n",
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from openai import BadRequestError as _OpenAIBadRequest\n",
|
||||
"import gradio as gr\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"\n",
|
||||
"# --- Provider base URLs (Gemini & Groq speak OpenAI-compatible API) ---\n",
|
||||
"GEMINI_BASE = \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
||||
"GROQ_BASE = \"https://api.groq.com/openai/v1\"\n",
|
||||
"\n",
|
||||
"# --- API Keys (add these in your .env) ---\n",
|
||||
"openai_api_key = os.getenv(\"OPENAI_API_KEY\") # OpenAI\n",
|
||||
"google_api_key = os.getenv(\"GOOGLE_API_KEY\") # Gemini\n",
|
||||
"groq_api_key = os.getenv(\"GROQ_API_KEY\") # Groq\n",
|
||||
"\n",
|
||||
"# --- Clients ---\n",
|
||||
"openai_client = OpenAI() # OpenAI default (reads OPENAI_API_KEY)\n",
|
||||
"gemini_client = OpenAI(api_key=google_api_key, base_url=GEMINI_BASE) if google_api_key else None\n",
|
||||
"groq_client = OpenAI(api_key=groq_api_key, base_url=GROQ_BASE) if groq_api_key else None\n",
|
||||
"\n",
|
||||
"# --- Model registry: label -> { client, model } ---\n",
|
||||
"MODEL_REGISTRY: Dict[str, Dict[str, object]] = {}\n",
|
||||
"\n",
|
||||
"def _register(label: str, client: Optional[OpenAI], model_id: str):\n",
|
||||
" \"\"\"Add a model to the registry only if its client is configured.\"\"\"\n",
|
||||
" if client is not None:\n",
|
||||
" MODEL_REGISTRY[label] = {\"client\": client, \"model\": model_id}\n",
|
||||
"\n",
|
||||
"# OpenAI\n",
|
||||
"_register(\"OpenAI • GPT-5\", openai_client, \"gpt-5\")\n",
|
||||
"_register(\"OpenAI • GPT-5 Nano\", openai_client, \"gpt-5-nano\")\n",
|
||||
"_register(\"OpenAI • GPT-4o-mini\", openai_client, \"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"# Gemini (Google)\n",
|
||||
"_register(\"Gemini • 2.5 Pro\", gemini_client, \"gemini-2.5-pro\")\n",
|
||||
"_register(\"Gemini • 2.5 Flash\", gemini_client, \"gemini-2.5-flash\")\n",
|
||||
"\n",
|
||||
"# Groq\n",
|
||||
"_register(\"Groq • Llama 3.1 8B\", groq_client, \"llama-3.1-8b-instant\")\n",
|
||||
"_register(\"Groq • Llama 3.3 70B\", groq_client, \"llama-3.3-70b-versatile\")\n",
|
||||
"_register(\"Groq • GPT-OSS 20B\", groq_client, \"openai/gpt-oss-20b\")\n",
|
||||
"_register(\"Groq • GPT-OSS 120B\", groq_client, \"openai/gpt-oss-120b\")\n",
|
||||
"\n",
|
||||
"DEFAULT_MODEL = next(iter(MODEL_REGISTRY.keys()), None)\n",
|
||||
"\n",
|
||||
"print(f\"Providers configured → OpenAI:{bool(openai_api_key)} Gemini:{bool(google_api_key)} Groq:{bool(groq_api_key)}\")\n",
|
||||
"print(\"Models available →\", \", \".join(MODEL_REGISTRY.keys()) or \"None (add API keys in .env)\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e5d6b0f2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CompletionClient(Protocol):\n",
|
||||
" \"\"\"Any LLM client provides a .complete() method using a registry label.\"\"\"\n",
|
||||
" def complete(self, *, model_label: str, system: str, user: str) -> str: ...\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _extract_code_or_text(s: str) -> str:\n",
|
||||
" \"\"\"Prefer fenced python if present; otherwise return raw text.\"\"\"\n",
|
||||
" m = re.search(r\"```(?:python)?\\s*(.*?)```\", s, flags=re.S | re.I)\n",
|
||||
" return m.group(1).strip() if m else s.strip()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class MultiModelChatClient:\n",
|
||||
" \"\"\"Routes requests to the right provider/client based on model label.\"\"\"\n",
|
||||
" def __init__(self, registry: Dict[str, Dict[str, object]]):\n",
|
||||
" self._registry = registry\n",
|
||||
"\n",
|
||||
" def _call(self, *, client: OpenAI, model_id: str, system: str, user: str) -> str:\n",
|
||||
" params = {\n",
|
||||
" \"model\": model_id,\n",
|
||||
" \"messages\": [\n",
|
||||
" {\"role\": \"system\", \"content\": system},\n",
|
||||
" {\"role\": \"user\", \"content\": user},\n",
|
||||
" ],\n",
|
||||
" }\n",
|
||||
" resp = client.chat.completions.create(**params) # do NOT send temperature for strict providers\n",
|
||||
" text = (resp.choices[0].message.content or \"\").strip()\n",
|
||||
" return _extract_code_or_text(text)\n",
|
||||
"\n",
|
||||
" def complete(self, *, model_label: str, system: str, user: str) -> str:\n",
|
||||
" if model_label not in self._registry:\n",
|
||||
" raise ValueError(f\"Unknown model label: {model_label}\")\n",
|
||||
" info = self._registry[model_label]\n",
|
||||
" client = info[\"client\"]\n",
|
||||
" model = info[\"model\"]\n",
|
||||
" try:\n",
|
||||
" return self._call(client=client, model_id=str(model), system=system, user=user)\n",
|
||||
" except _OpenAIBadRequest as e:\n",
|
||||
" # Providers may reject stray params; we don't send any, but retry anyway.\n",
|
||||
" if \"temperature\" in str(e).lower():\n",
|
||||
" return self._call(client=client, model_id=str(model), system=system, user=user)\n",
|
||||
" raise\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "31558bf0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@dataclass(frozen=True)\n",
|
||||
"class SymbolInfo:\n",
|
||||
" kind: str # \"function\" | \"class\" | \"method\"\n",
|
||||
" name: str\n",
|
||||
" signature: str\n",
|
||||
" lineno: int\n",
|
||||
"\n",
|
||||
"class PublicAPIExtractor:\n",
|
||||
" \"\"\"Extract concise 'public API' summary from a Python module.\"\"\"\n",
|
||||
" def extract(self, source: str) -> List[SymbolInfo]:\n",
|
||||
" tree = ast.parse(source)\n",
|
||||
" out: List[SymbolInfo] = []\n",
|
||||
" for node in tree.body:\n",
|
||||
" if isinstance(node, ast.FunctionDef) and not node.name.startswith(\"_\"):\n",
|
||||
" out.append(SymbolInfo(\"function\", node.name, self._sig(node), node.lineno))\n",
|
||||
" elif isinstance(node, ast.ClassDef) and not node.name.startswith(\"_\"):\n",
|
||||
" out.append(SymbolInfo(\"class\", node.name, node.name, node.lineno))\n",
|
||||
" for sub in node.body:\n",
|
||||
" if isinstance(sub, ast.FunctionDef) and not sub.name.startswith(\"_\"):\n",
|
||||
" out.append(SymbolInfo(\"method\",\n",
|
||||
" f\"{node.name}.{sub.name}\",\n",
|
||||
" self._sig(sub),\n",
|
||||
" sub.lineno))\n",
|
||||
" return sorted(out, key=lambda s: (s.kind, s.name.lower(), s.lineno))\n",
|
||||
"\n",
|
||||
" def _sig(self, fn: ast.FunctionDef) -> str:\n",
|
||||
" args = [a.arg for a in fn.args.args]\n",
|
||||
" if fn.args.vararg:\n",
|
||||
" args.append(\"*\" + fn.args.vararg.arg)\n",
|
||||
" args.extend(a.arg + \"=?\" for a in fn.args.kwonlyargs)\n",
|
||||
" if fn.args.kwarg:\n",
|
||||
" args.append(\"**\" + fn.args.kwarg.arg)\n",
|
||||
" ret = \"\"\n",
|
||||
" if fn.returns is not None:\n",
|
||||
" try:\n",
|
||||
" ret = f\" -> {ast.unparse(fn.returns)}\"\n",
|
||||
" except Exception:\n",
|
||||
" pass\n",
|
||||
" return f\"def {fn.name}({', '.join(args)}){ret}:\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3aeadedc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class PromptBuilder:\n",
|
||||
" \"\"\"Builds deterministic prompts for pytest generation.\"\"\"\n",
|
||||
" SYSTEM = (\n",
|
||||
" \"You are a senior Python engineer. Produce a single, self-contained pytest file.\\n\"\n",
|
||||
" \"Rules:\\n\"\n",
|
||||
" \"- Output only Python test code (no prose, no markdown fences).\\n\"\n",
|
||||
" \"- Use plain pytest tests (functions), no classes unless unavoidable.\\n\"\n",
|
||||
" \"- Deterministic: avoid network/IO; seed randomness if used.\\n\"\n",
|
||||
" \"- Import the target module by module name only.\\n\"\n",
|
||||
" \"- Cover every public function and method with at least one tiny test.\\n\"\n",
|
||||
" \"- Prefer straightforward, fast assertions.\\n\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def build_user(self, *, module_name: str, source: str, symbols: List[SymbolInfo]) -> str:\n",
|
||||
" summary = \"\\n\".join(f\"- {s.kind:<6} {s.signature}\" for s in symbols) or \"- (no public symbols)\"\n",
|
||||
" return textwrap.dedent(f\"\"\"\n",
|
||||
" Create pytest tests for module `{module_name}`.\n",
|
||||
"\n",
|
||||
" Public API Summary:\n",
|
||||
" {summary}\n",
|
||||
"\n",
|
||||
" Constraints:\n",
|
||||
" - Import as: `import {module_name} as mod`\n",
|
||||
" - Keep tests tiny, fast, and deterministic.\n",
|
||||
"\n",
|
||||
" Full module source (for reference):\n",
|
||||
" # --- BEGIN SOURCE {module_name}.py ---\n",
|
||||
" {source}\n",
|
||||
" # --- END SOURCE ---\n",
|
||||
" \"\"\").strip()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a45ac5be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def _ensure_header_and_import(code: str, module_name: str) -> str:\n",
|
||||
" \"\"\"Ensure tests import pytest and the target module as 'mod'.\"\"\"\n",
|
||||
" code = code.strip()\n",
|
||||
" needs_pytest = \"import pytest\" not in code\n",
|
||||
" has_mod = (f\"import {module_name} as mod\" in code) or (f\"from {module_name} import\" in code)\n",
|
||||
" needs_import = not has_mod\n",
|
||||
"\n",
|
||||
" header = []\n",
|
||||
" if needs_pytest:\n",
|
||||
" header.append(\"import pytest\")\n",
|
||||
" if needs_import:\n",
|
||||
" header.append(f\"import {module_name} as mod\")\n",
|
||||
"\n",
|
||||
" return (\"\\n\".join(header) + \"\\n\\n\" + code) if header else code\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def build_module_name_from_path(path: str) -> str:\n",
|
||||
" return Path(path).stem\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "787e58b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class TestGenerator:\n",
|
||||
" \"\"\"Extraction → prompt → model → polish.\"\"\"\n",
|
||||
" def __init__(self, llm: CompletionClient):\n",
|
||||
" self._llm = llm\n",
|
||||
" self._extractor = PublicAPIExtractor()\n",
|
||||
" self._prompts = PromptBuilder()\n",
|
||||
"\n",
|
||||
" def generate_tests(self, model_label: str, module_name: str, source: str) -> str:\n",
|
||||
" symbols = self._extractor.extract(source)\n",
|
||||
" user = self._prompts.build_user(module_name=module_name, source=source, symbols=symbols)\n",
|
||||
" raw = self._llm.complete(model_label=model_label, system=self._prompts.SYSTEM, user=user)\n",
|
||||
" return _ensure_header_and_import(raw, module_name)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8402f62f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def _parse_pytest_summary(output: str) -> Tuple[str, Dict[str, int]]:\n",
|
||||
" \"\"\"\n",
|
||||
" Parse the final summary line like:\n",
|
||||
" '3 passed, 1 failed, 2 skipped in 0.12s'\n",
|
||||
" Return (summary_line, counts_dict).\n",
|
||||
" \"\"\"\n",
|
||||
" summary_line = \"\"\n",
|
||||
" for line in output.strip().splitlines()[::-1]: # scan from end\n",
|
||||
" if \" passed\" in line or \" failed\" in line or \" error\" in line or \" skipped\" in line or \" deselected\" in line:\n",
|
||||
" summary_line = line.strip()\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" counts = {\"passed\": 0, \"failed\": 0, \"errors\": 0, \"skipped\": 0, \"xfail\": 0, \"xpassed\": 0}\n",
|
||||
" m = re.findall(r\"(\\d+)\\s+(passed|failed|errors?|skipped|xfailed|xpassed)\", summary_line)\n",
|
||||
" for num, kind in m:\n",
|
||||
" if kind.startswith(\"error\"):\n",
|
||||
" counts[\"errors\"] += int(num)\n",
|
||||
" elif kind == \"passed\":\n",
|
||||
" counts[\"passed\"] += int(num)\n",
|
||||
" elif kind == \"failed\":\n",
|
||||
" counts[\"failed\"] += int(num)\n",
|
||||
" elif kind == \"skipped\":\n",
|
||||
" counts[\"skipped\"] += int(num)\n",
|
||||
" elif kind == \"xfailed\":\n",
|
||||
" counts[\"xfail\"] += int(num)\n",
|
||||
" elif kind == \"xpassed\":\n",
|
||||
" counts[\"xpassed\"] += int(num)\n",
|
||||
"\n",
|
||||
" return summary_line or \"(no summary line found)\", counts\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def run_pytest_on_snippet(module_name: str, module_code: str, tests_code: str) -> Tuple[str, str]:\n",
|
||||
" \"\"\"\n",
|
||||
" Create an isolated temp workspace, write module + tests, run pytest,\n",
|
||||
" and return (human_summary, full_cli_output).\n",
|
||||
" \"\"\"\n",
|
||||
" if not module_name or not module_code.strip() or not tests_code.strip():\n",
|
||||
" return \"❌ Provide module name, module code, and tests.\", \"\"\n",
|
||||
"\n",
|
||||
" run_id = uuid.uuid4().hex[:8]\n",
|
||||
" base = Path(\".pytest_runs\") / f\"run_{run_id}\"\n",
|
||||
" tests_dir = base / \"tests\"\n",
|
||||
" tests_dir.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
" # Write module and tests\n",
|
||||
" (base / f\"{module_name}.py\").write_text(module_code, encoding=\"utf-8\")\n",
|
||||
" (tests_dir / f\"test_{module_name}.py\").write_text(tests_code, encoding=\"utf-8\")\n",
|
||||
"\n",
|
||||
" # Run pytest with this temp dir on PYTHONPATH\n",
|
||||
" env = os.environ.copy()\n",
|
||||
" env[\"PYTHONPATH\"] = str(base) + os.pathsep + env.get(\"PYTHONPATH\", \"\")\n",
|
||||
" cmd = [sys.executable, \"-m\", \"pytest\", \"-q\"] # quiet output, but still includes summary\n",
|
||||
" proc = subprocess.run(cmd, cwd=base, env=env, text=True, capture_output=True)\n",
|
||||
"\n",
|
||||
" full_out = (proc.stdout or \"\") + (\"\\n\" + proc.stderr if proc.stderr else \"\")\n",
|
||||
" summary_line, counts = _parse_pytest_summary(full_out)\n",
|
||||
"\n",
|
||||
" badges = []\n",
|
||||
" for key in (\"passed\", \"failed\", \"errors\", \"skipped\", \"xpassed\", \"xfail\"):\n",
|
||||
" val = counts.get(key, 0)\n",
|
||||
" if val:\n",
|
||||
" badges.append(f\"**{key}: {val}**\")\n",
|
||||
" badges = \" • \".join(badges) if badges else \"no tests collected?\"\n",
|
||||
"\n",
|
||||
" human = f\"{summary_line}\\n\\n{badges}\"\n",
|
||||
" return human, full_out\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5d240ce5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"LLM = MultiModelChatClient(MODEL_REGISTRY)\n",
|
||||
"SERVICE = TestGenerator(LLM)\n",
|
||||
"\n",
|
||||
"def generate_from_code(model_label: str, module_name: str, code: str, save: bool, out_dir: str) -> Tuple[str, str]:\n",
|
||||
" if not model_label or model_label not in MODEL_REGISTRY:\n",
|
||||
" return \"\", \"❌ Pick a model (or add API keys for providers in .env).\"\n",
|
||||
" if not module_name.strip():\n",
|
||||
" return \"\", \"❌ Please provide a module name.\"\n",
|
||||
" if not code.strip():\n",
|
||||
" return \"\", \"❌ Please paste some Python code.\"\n",
|
||||
"\n",
|
||||
" tests_code = SERVICE.generate_tests(model_label=model_label, module_name=module_name.strip(), source=code)\n",
|
||||
" saved = \"\"\n",
|
||||
" if save:\n",
|
||||
" out = Path(out_dir or \"tests\")\n",
|
||||
" out.mkdir(parents=True, exist_ok=True)\n",
|
||||
" out_path = out / f\"test_{module_name}.py\"\n",
|
||||
" out_path.write_text(tests_code, encoding=\"utf-8\")\n",
|
||||
" saved = f\"✅ Saved to {out_path}\"\n",
|
||||
" return tests_code, saved\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def generate_from_file(model_label: str, file_obj, save: bool, out_dir: str) -> Tuple[str, str]:\n",
|
||||
" if file_obj is None:\n",
|
||||
" return \"\", \"❌ Please upload a .py file.\"\n",
|
||||
" code = file_obj.decode(\"utf-8\")\n",
|
||||
" module_name = build_module_name_from_path(\"uploaded_module.py\")\n",
|
||||
" return generate_from_code(model_label, module_name, code, save, out_dir)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e3e1401a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"EXAMPLE_CODE = \"\"\"\\\n",
|
||||
"def add(a: int, b: int) -> int:\n",
|
||||
" return a + b\n",
|
||||
"\n",
|
||||
"def divide(a: float, b: float) -> float:\n",
|
||||
" if b == 0:\n",
|
||||
" raise ZeroDivisionError(\"b must be non-zero\")\n",
|
||||
" return a / b\n",
|
||||
"\n",
|
||||
"class Counter:\n",
|
||||
" def __init__(self, start: int = 0):\n",
|
||||
" self.value = start\n",
|
||||
"\n",
|
||||
" def inc(self, by: int = 1):\n",
|
||||
" self.value += by\n",
|
||||
" return self.value\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f802450e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with gr.Blocks(title=\"PyTest Generator\") as ui:\n",
|
||||
" gr.Markdown(\n",
|
||||
" \"## 🧪 PyTest Generator (Week 4 • Community Contribution)\\n\"\n",
|
||||
" \"Generate **minimal, deterministic** pytest tests from a Python module using your chosen model/provider.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" with gr.Row(equal_height=True):\n",
|
||||
" # LEFT: inputs (module code)\n",
|
||||
" with gr.Column(scale=6):\n",
|
||||
" with gr.Row():\n",
|
||||
" model_dd = gr.Dropdown(\n",
|
||||
" list(MODEL_REGISTRY.keys()),\n",
|
||||
" value=DEFAULT_MODEL,\n",
|
||||
" label=\"Model (OpenAI, Gemini, Groq)\"\n",
|
||||
" )\n",
|
||||
" module_name_tb = gr.Textbox(\n",
|
||||
" label=\"Module name (used in `import <name> as mod`)\",\n",
|
||||
" value=\"mymodule\"\n",
|
||||
" )\n",
|
||||
" code_in = gr.Code(\n",
|
||||
" label=\"Python module code\",\n",
|
||||
" language=\"python\",\n",
|
||||
" lines=24,\n",
|
||||
" value=EXAMPLE_CODE\n",
|
||||
" )\n",
|
||||
" with gr.Row():\n",
|
||||
" save_cb = gr.Checkbox(label=\"Also save generated tests to /tests\", value=True)\n",
|
||||
" out_dir_tb = gr.Textbox(label=\"Output folder\", value=\"tests\")\n",
|
||||
" gen_btn = gr.Button(\"Generate tests\", variant=\"primary\")\n",
|
||||
"\n",
|
||||
" # RIGHT: outputs (generated tests + pytest run)\n",
|
||||
" with gr.Column(scale=6):\n",
|
||||
" tests_out = gr.Code(label=\"Generated tests (pytest)\", language=\"python\", lines=24)\n",
|
||||
" with gr.Row():\n",
|
||||
" run_btn = gr.Button(\"Run PyTest\", variant=\"secondary\")\n",
|
||||
" summary_md = gr.Markdown()\n",
|
||||
" full_out = gr.Textbox(label=\"Full PyTest output\", lines=12)\n",
|
||||
"\n",
|
||||
" # --- events ---\n",
|
||||
"\n",
|
||||
" def _on_gen(model_label, name, code, save, outdir):\n",
|
||||
" tests, msg = generate_from_code(model_label, name, code, save, outdir)\n",
|
||||
" status = msg or \"✅ Done\"\n",
|
||||
" return tests, status\n",
|
||||
"\n",
|
||||
" gen_btn.click(\n",
|
||||
" _on_gen,\n",
|
||||
" inputs=[model_dd, module_name_tb, code_in, save_cb, out_dir_tb],\n",
|
||||
" outputs=[tests_out, summary_md],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def _on_run(name, code, tests):\n",
|
||||
" summary, details = run_pytest_on_snippet(name, code, tests)\n",
|
||||
" return summary, details\n",
|
||||
"\n",
|
||||
" run_btn.click(\n",
|
||||
" _on_run,\n",
|
||||
" inputs=[module_name_tb, code_in, tests_out],\n",
|
||||
" outputs=[summary_md, full_out],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"ui.launch(inbrowser=True)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,476 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "xeOG96gXPeqz"
|
||||
},
|
||||
"source": [
|
||||
"# Snippet Sniper\n",
|
||||
"\n",
|
||||
"### Welcome on a wild ride with the John Wick in the coding arena as it accepts your contracts \n",
|
||||
"\n",
|
||||
"Allows you to perform various tasks on given code snippets:\n",
|
||||
"\n",
|
||||
"- Add comments\n",
|
||||
"- Explain what the code does\n",
|
||||
"- Writes comprehensive unit tests\n",
|
||||
"- Fixes (potential) errors in the code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "B7ftYo53Pw94",
|
||||
"outputId": "9daa3972-d5a1-4cd2-9952-cd89a54c6ddd"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import logging\n",
|
||||
"from enum import StrEnum\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"import gradio as gr\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "AXmPDuydPuUp"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"logging.basicConfig(level=logging.WARNING)\n",
|
||||
"\n",
|
||||
"logger = logging.getLogger('sniper')\n",
|
||||
"logger.setLevel(logging.DEBUG)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "0c_e1iMYmp5o"
|
||||
},
|
||||
"source": [
|
||||
"## Free Cloud Providers\n",
|
||||
"\n",
|
||||
"Grab your free API Keys from these generous sites:\n",
|
||||
"\n",
|
||||
"- https://ollama.com/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Secrets Helpers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_secret_in_google_colab(env_name: str) -> str:\n",
|
||||
" try:\n",
|
||||
" from google.colab import userdata\n",
|
||||
" return userdata.get(env_name)\n",
|
||||
" except Exception:\n",
|
||||
" return ''\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_secret(env_name: str) -> str:\n",
|
||||
" '''Gets the value from the environment(s), otherwise ask the user for it if not set'''\n",
|
||||
" key = os.environ.get(env_name) or get_secret_in_google_colab(env_name)\n",
|
||||
"\n",
|
||||
" if not key:\n",
|
||||
" key = getpass(f'Enter {env_name}:').strip()\n",
|
||||
"\n",
|
||||
" if key:\n",
|
||||
" logger.info(f'✅ {env_name} provided')\n",
|
||||
" else:\n",
|
||||
" logger.warning(f'❌ {env_name} not provided')\n",
|
||||
" return key.strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set up model(s)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "d7Qmfac9Ph0w",
|
||||
"outputId": "be9db7f3-f08a-47f5-d6fa-d7c8bce4f97a"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Provider(StrEnum):\n",
|
||||
" OLLAMA = 'Ollama'\n",
|
||||
" OPENROUTER = 'OpenRouter'\n",
|
||||
"\n",
|
||||
"clients: dict[Provider, OpenAI] = {}\n",
|
||||
"\n",
|
||||
"if api_key := get_secret('OLLAMA_API_KEY'):\n",
|
||||
" clients[Provider.OLLAMA] = OpenAI(api_key=api_key, base_url='https://ollama.com/v1')\n",
|
||||
"\n",
|
||||
"model = 'qwen3-coder:480b-cloud'\n",
|
||||
"client = clients.get(Provider.OLLAMA)\n",
|
||||
"if not client:\n",
|
||||
" raise Exception('No client found')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Kq-AKZEjqnTp"
|
||||
},
|
||||
"source": [
|
||||
"## Tasks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "fTHvG2w0sgwU"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Task(StrEnum):\n",
|
||||
" COMMENTS = 'Comments'\n",
|
||||
" UNIT_TESTS = 'Unit Tests'\n",
|
||||
" FIX_CODE = 'Fix Code'\n",
|
||||
" EXPLAIN = 'Explain'\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def perform_tasks(tasks, code):\n",
|
||||
" logger.info(f'Performing tasks: {tasks}')\n",
|
||||
"\n",
|
||||
" steps = []\n",
|
||||
" if Task.COMMENTS in tasks:\n",
|
||||
" steps.append('Add documentation comments to the given code. If the method name and parameters are self-explanatory, skip those comments.')\n",
|
||||
" if Task.UNIT_TESTS in tasks:\n",
|
||||
" steps.append('Add a thorough unit tests considering all edge cases to the given code.')\n",
|
||||
" if Task.FIX_CODE in tasks:\n",
|
||||
" steps.append('You are to fix the given code, if it has any issues.')\n",
|
||||
" if Task.EXPLAIN in tasks:\n",
|
||||
" steps.append('Explain the given code.')\n",
|
||||
"\n",
|
||||
" system_prompt = f'''\n",
|
||||
" You are an experienced polyglot software engineer and given a code you can\n",
|
||||
" detect what programming language it is in.\n",
|
||||
" DO NOT fix the code until expressly told to do so.\n",
|
||||
"\n",
|
||||
" Your tasks:\n",
|
||||
" {'- ' + '\\n- '.join(steps)}\n",
|
||||
" '''\n",
|
||||
" messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": f'Code: \\n{code}'}\n",
|
||||
" ]\n",
|
||||
" response = client.chat.completions.create(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" content = response.choices[0].message.content\n",
|
||||
"\n",
|
||||
" return content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "SkmMYw_osxeG"
|
||||
},
|
||||
"source": [
|
||||
"### Examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "nlzUyXFus0km"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_examples() -> tuple[list[any], list[str]]:\n",
|
||||
" '''Returns examples and their labels'''\n",
|
||||
"\n",
|
||||
" # Python examples\n",
|
||||
" add = r'''\n",
|
||||
" def add(a, b):\n",
|
||||
" return a + b\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" multiply = r'''\n",
|
||||
" def multiply(a, b):\n",
|
||||
" return a * b\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" divide = r'''\n",
|
||||
" def divide(a, b):\n",
|
||||
" return a / b\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # JavaScript example - async function\n",
|
||||
" fetch_data = r'''\n",
|
||||
" async function fetchUserData(userId) {\n",
|
||||
" const response = await fetch(`/api/users/${userId}`);\n",
|
||||
" const data = await response.json();\n",
|
||||
" return data;\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # Java example - sorting algorithm\n",
|
||||
" bubble_sort = r'''\n",
|
||||
" public void bubbleSort(int[] arr) {\n",
|
||||
" int n = arr.length;\n",
|
||||
" for (int i = 0; i < n-1; i++) {\n",
|
||||
" for (int j = 0; j < n-i-1; j++) {\n",
|
||||
" if (arr[j] > arr[j+1]) {\n",
|
||||
" int temp = arr[j];\n",
|
||||
" arr[j] = arr[j+1];\n",
|
||||
" arr[j+1] = temp;\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # C++ example - buggy pointer code\n",
|
||||
" buggy_cpp = r'''\n",
|
||||
" int* createArray() {\n",
|
||||
" int arr[5] = {1, 2, 3, 4, 5};\n",
|
||||
" return arr;\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # Rust example - ownership puzzle\n",
|
||||
" rust_ownership = r'''\n",
|
||||
" fn main() {\n",
|
||||
" let s1 = String::from(\"hello\");\n",
|
||||
" let s2 = s1;\n",
|
||||
" println!(\"{}\", s1);\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # Go example - concurrent code\n",
|
||||
" go_concurrent = r'''\n",
|
||||
" func processData(data []int) int {\n",
|
||||
" sum := 0\n",
|
||||
" for _, v := range data {\n",
|
||||
" sum += v\n",
|
||||
" }\n",
|
||||
" return sum\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # TypeScript example - complex type\n",
|
||||
" ts_generics = r'''\n",
|
||||
" function mergeObjects<T, U>(obj1: T, obj2: U): T & U {\n",
|
||||
" return { ...obj1, ...obj2 };\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # Ruby example - metaclass magic\n",
|
||||
" ruby_meta = r'''\n",
|
||||
" class DynamicMethod\n",
|
||||
" define_method(:greet) do |name|\n",
|
||||
" \"Hello, #{name}!\"\n",
|
||||
" end\n",
|
||||
" end\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # PHP example - SQL injection vulnerable\n",
|
||||
" php_vulnerable = r'''\n",
|
||||
" function getUser($id) {\n",
|
||||
" $query = \"SELECT * FROM users WHERE id = \" . $id;\n",
|
||||
" return mysqli_query($conn, $query);\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # Python example - complex algorithm\n",
|
||||
" binary_search = r'''\n",
|
||||
" def binary_search(arr, target):\n",
|
||||
" left, right = 0, len(arr) - 1\n",
|
||||
" while left <= right:\n",
|
||||
" mid = (left + right) // 2\n",
|
||||
" if arr[mid] == target:\n",
|
||||
" return mid\n",
|
||||
" elif arr[mid] < target:\n",
|
||||
" left = mid + 1\n",
|
||||
" else:\n",
|
||||
" right = mid - 1\n",
|
||||
" return -1\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" # JavaScript example - closure concept\n",
|
||||
" js_closure = r'''\n",
|
||||
" function counter() {\n",
|
||||
" let count = 0;\n",
|
||||
" return function() {\n",
|
||||
" count++;\n",
|
||||
" return count;\n",
|
||||
" };\n",
|
||||
" }\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" examples = [\n",
|
||||
" # Simple Python examples\n",
|
||||
" [[Task.COMMENTS], add, 'python'],\n",
|
||||
" [[Task.UNIT_TESTS], multiply, 'python'],\n",
|
||||
" [[Task.COMMENTS, Task.FIX_CODE], divide, 'python'],\n",
|
||||
"\n",
|
||||
" # Explain complex concepts\n",
|
||||
" [[Task.EXPLAIN], binary_search, 'python'],\n",
|
||||
" [[Task.EXPLAIN], js_closure, 'javascript'],\n",
|
||||
" [[Task.EXPLAIN], rust_ownership, 'rust'],\n",
|
||||
"\n",
|
||||
" # Unit tests for different languages\n",
|
||||
" [[Task.UNIT_TESTS], fetch_data, 'javascript'],\n",
|
||||
" [[Task.UNIT_TESTS], go_concurrent, 'go'],\n",
|
||||
"\n",
|
||||
" # Fix buggy code\n",
|
||||
" [[Task.FIX_CODE], buggy_cpp, 'cpp'],\n",
|
||||
" [[Task.FIX_CODE], php_vulnerable, 'php'],\n",
|
||||
"\n",
|
||||
" # Multi-task combinations\n",
|
||||
" [[Task.COMMENTS, Task.EXPLAIN], bubble_sort, None],\n",
|
||||
" [[Task.COMMENTS, Task.UNIT_TESTS], ts_generics, 'typescript'],\n",
|
||||
" [[Task.EXPLAIN, Task.FIX_CODE], rust_ownership, 'rust'],\n",
|
||||
" [[Task.COMMENTS, Task.UNIT_TESTS, Task.EXPLAIN], ruby_meta, 'ruby'],\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" example_labels = [\n",
|
||||
" '🐍 Python: Add Function',\n",
|
||||
" '🐍 Python: Multiply Tests',\n",
|
||||
" '🐍 Python: Fix Division',\n",
|
||||
" '🐍 Python: Binary Search Explained',\n",
|
||||
" '🟨 JavaScript: Closure Concept',\n",
|
||||
" '🦀 Rust: Ownership Puzzle',\n",
|
||||
" '🟨 JavaScript: Async Test',\n",
|
||||
" '🐹 Go: Concurrency Test',\n",
|
||||
" '⚡ C++: Fix Pointer Bug',\n",
|
||||
" '🐘 PHP: Fix SQL Injection',\n",
|
||||
" '☕ Java: Bubble Sort Guide',\n",
|
||||
" '📘 TypeScript: Generics & Tests',\n",
|
||||
" '🦀 Rust: Fix & Explain Ownership',\n",
|
||||
" '💎 Ruby: Meta Programming Deep Dive',\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" return examples, example_labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "wYReYuvgtDgg"
|
||||
},
|
||||
"source": [
|
||||
"## Gradio UI\n",
|
||||
"\n",
|
||||
"[Documentation](https://www.gradio.app/docs/gradio)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 664
|
||||
},
|
||||
"id": "I8Q08SJe8CxK",
|
||||
"outputId": "f1d41d06-dfda-4daf-b7ff-6f73bdaf8369"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"title = 'Snippet Sniper 🎯'\n",
|
||||
"\n",
|
||||
"with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as ui:\n",
|
||||
" gr.Markdown(f'# {title}')\n",
|
||||
" gr.Markdown('## I am your [**John Wick**](https://en.wikipedia.org/wiki/John_Wick), ready to accept any contract on your code. Consider it executed 🎯🔫!.')\n",
|
||||
"\n",
|
||||
" with gr.Row():\n",
|
||||
" with gr.Column():\n",
|
||||
" tasks = gr.Dropdown(\n",
|
||||
" label=\"Tasks\",\n",
|
||||
" choices=[task.value for task in Task],\n",
|
||||
" value=Task.COMMENTS,\n",
|
||||
" multiselect=True,\n",
|
||||
" interactive=True,\n",
|
||||
" )\n",
|
||||
" code_input = gr.Code(\n",
|
||||
" label='Code Input',\n",
|
||||
" lines=40,\n",
|
||||
" )\n",
|
||||
" code_language = gr.Textbox(visible=False)\n",
|
||||
"\n",
|
||||
" with gr.Column():\n",
|
||||
" gr.Markdown('## Kill Zone 🧟🧠💀')\n",
|
||||
" code_output = gr.Markdown('💣')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" run_btn = gr.Button('📜 Issue Contract')\n",
|
||||
"\n",
|
||||
" def set_language(tasks, code, language):\n",
|
||||
" syntax_highlights = ['python', 'c', 'cpp', 'javascript', 'typescript']\n",
|
||||
" logger.debug(f'Tasks: {tasks}, Languge: {language}')\n",
|
||||
" highlight = language if language in syntax_highlights else None\n",
|
||||
"\n",
|
||||
" return tasks, gr.Code(value=code, language=highlight)\n",
|
||||
"\n",
|
||||
" examples, example_labels = get_examples()\n",
|
||||
" examples = gr.Examples(\n",
|
||||
" examples=examples,\n",
|
||||
" example_labels=example_labels,\n",
|
||||
" examples_per_page=20,\n",
|
||||
" inputs=[tasks, code_input, code_language],\n",
|
||||
" outputs=[tasks, code_input],\n",
|
||||
" run_on_click=True,\n",
|
||||
" fn=set_language\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" run_btn.click(perform_tasks, inputs=[tasks, code_input], outputs=[code_output])\n",
|
||||
"\n",
|
||||
"ui.launch(debug=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,376 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d27544d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from dataclasses import dataclass\n",
|
||||
"from pathlib import Path\n",
|
||||
"from typing import Dict, List, Optional, Tuple\n",
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import gradio as gr\n",
|
||||
"\n",
|
||||
"from pathlib import Path\n",
|
||||
"from typing import List, Tuple\n",
|
||||
"from transformers import AutoTokenizer\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# ---- load env ----\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"\n",
|
||||
"# ---- OpenAI-compatible base URLs (Gemini & Groq) ----\n",
|
||||
"GEMINI_BASE = \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
||||
"GROQ_BASE = \"https://api.groq.com/openai/v1\"\n",
|
||||
"\n",
|
||||
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
|
||||
"GOOGLE_API_KEY = os.getenv(\"GOOGLE_API_KEY\") # Gemini\n",
|
||||
"GROQ_API_KEY = os.getenv(\"GROQ_API_KEY\") # Groq\n",
|
||||
"\n",
|
||||
"# ---- create clients only if keys exist ----\n",
|
||||
"openai_client = OpenAI() if OPENAI_API_KEY else None\n",
|
||||
"gemini_client = OpenAI(api_key=GOOGLE_API_KEY, base_url=GEMINI_BASE) if GOOGLE_API_KEY else None\n",
|
||||
"groq_client = OpenAI(api_key=GROQ_API_KEY, base_url=GROQ_BASE) if GROQ_API_KEY else None\n",
|
||||
"\n",
|
||||
"# ---- model registry (label -> client/model) ----\n",
|
||||
"MODEL_REGISTRY: Dict[str, Dict[str, object]] = {}\n",
|
||||
"def _register(label: str, client: Optional[OpenAI], model_id: str):\n",
|
||||
" if client is not None:\n",
|
||||
" MODEL_REGISTRY[label] = {\"client\": client, \"model\": model_id}\n",
|
||||
"\n",
|
||||
"# OpenAI\n",
|
||||
"_register(\"OpenAI • GPT-5\", openai_client, \"gpt-5\")\n",
|
||||
"_register(\"OpenAI • GPT-5 Nano\", openai_client, \"gpt-5-nano\")\n",
|
||||
"_register(\"OpenAI • GPT-4o-mini\", openai_client, \"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"# Gemini (Google)\n",
|
||||
"_register(\"Gemini • 2.5 Pro\", gemini_client, \"gemini-2.5-pro\")\n",
|
||||
"_register(\"Gemini • 2.5 Flash\", gemini_client, \"gemini-2.5-flash\")\n",
|
||||
"\n",
|
||||
"# Groq\n",
|
||||
"_register(\"Groq • Llama 3.1 8B\", groq_client, \"llama-3.1-8b-instant\")\n",
|
||||
"_register(\"Groq • Llama 3.3 70B\", groq_client, \"llama-3.3-70b-versatile\")\n",
|
||||
"_register(\"Groq • GPT-OSS 20B\", groq_client, \"openai/gpt-oss-20b\")\n",
|
||||
"_register(\"Groq • GPT-OSS 120B\", groq_client, \"openai/gpt-oss-120b\")\n",
|
||||
"\n",
|
||||
"AVAILABLE_MODELS = list(MODEL_REGISTRY.keys())\n",
|
||||
"DEFAULT_MODEL = AVAILABLE_MODELS[0] if AVAILABLE_MODELS else \"OpenAI • GPT-4o-mini\"\n",
|
||||
"\n",
|
||||
"print(\"Providers configured →\",\n",
|
||||
" f\"OpenAI:{bool(OPENAI_API_KEY)} Gemini:{bool(GOOGLE_API_KEY)} Groq:{bool(GROQ_API_KEY)}\")\n",
|
||||
"print(\"Models available →\", \", \".join(AVAILABLE_MODELS) or \"None (add API keys in .env)\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "efe4e4db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@dataclass(frozen=True)\n",
|
||||
"class LLMRoute:\n",
|
||||
" client: OpenAI\n",
|
||||
" model: str\n",
|
||||
"\n",
|
||||
"class MultiLLM:\n",
|
||||
" \"\"\"OpenAI-compatible chat across providers (OpenAI, Gemini, Groq).\"\"\"\n",
|
||||
" def __init__(self, registry: Dict[str, Dict[str, object]]):\n",
|
||||
" self._routes: Dict[str, LLMRoute] = {\n",
|
||||
" k: LLMRoute(client=v[\"client\"], model=str(v[\"model\"])) for k, v in registry.items()\n",
|
||||
" }\n",
|
||||
" if not self._routes:\n",
|
||||
" raise RuntimeError(\"No LLM providers configured. Add API keys in .env.\")\n",
|
||||
"\n",
|
||||
" def complete(self, *, model_label: str, system: str, user: str) -> str:\n",
|
||||
" if model_label not in self._routes:\n",
|
||||
" raise ValueError(f\"Unknown model: {model_label}\")\n",
|
||||
" r = self._routes[model_label]\n",
|
||||
" resp = r.client.chat.completions.create(\n",
|
||||
" model=r.model,\n",
|
||||
" messages=[{\"role\":\"system\",\"content\":system},\n",
|
||||
" {\"role\":\"user\",\"content\":user}]\n",
|
||||
" )\n",
|
||||
" return (resp.choices[0].message.content or \"\").strip()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "30636b66",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# MiniLM embedding model & tokenizer (BERT WordPiece)\n",
|
||||
"EMBED_MODEL_NAME = \"sentence-transformers/all-MiniLM-L6-v2\"\n",
|
||||
"\n",
|
||||
"# Use the model's practical window with 50% overlap\n",
|
||||
"MAX_TOKENS = 256 # all-MiniLM-L6-v2 effective limit used by Sentence-Transformers\n",
|
||||
"OVERLAP_RATIO = 0.50 # 50% sliding window overlap\n",
|
||||
"\n",
|
||||
"TOKENIZER = AutoTokenizer.from_pretrained(EMBED_MODEL_NAME)\n",
|
||||
"\n",
|
||||
"def chunk_text(\n",
|
||||
" text: str,\n",
|
||||
" tokenizer: AutoTokenizer = TOKENIZER,\n",
|
||||
" max_tokens: int = MAX_TOKENS,\n",
|
||||
" overlap_ratio: float = OVERLAP_RATIO,\n",
|
||||
") -> List[str]:\n",
|
||||
" \"\"\"\n",
|
||||
" Token-aware sliding window chunking for MiniLM.\n",
|
||||
" - Windows of `max_tokens`\n",
|
||||
" - Step = max_tokens * (1 - overlap_ratio) -> 50% overlap by default\n",
|
||||
" \"\"\"\n",
|
||||
" ids = tokenizer.encode(text, add_special_tokens=False)\n",
|
||||
" if not ids:\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
" step = max(1, int(max_tokens * (1.0 - overlap_ratio)))\n",
|
||||
" out: List[str] = []\n",
|
||||
" for start in range(0, len(ids), step):\n",
|
||||
" window = ids[start : start + max_tokens]\n",
|
||||
" if not window:\n",
|
||||
" break\n",
|
||||
" toks = tokenizer.convert_ids_to_tokens(window)\n",
|
||||
" chunk = tokenizer.convert_tokens_to_string(toks).strip()\n",
|
||||
" if chunk:\n",
|
||||
" out.append(chunk)\n",
|
||||
" if start + max_tokens >= len(ids):\n",
|
||||
" break\n",
|
||||
" return out\n",
|
||||
"\n",
|
||||
"def load_bare_acts(root: str = \"knowledge_base/bare_acts\") -> List[Tuple[str, str]]:\n",
|
||||
" \"\"\"Return list of (source_id, text). `source_id` is filename stem.\"\"\"\n",
|
||||
" base = Path(root)\n",
|
||||
" if not base.exists():\n",
|
||||
" raise FileNotFoundError(f\"Folder not found: {base.resolve()}\")\n",
|
||||
" pairs: List[Tuple[str, str]] = []\n",
|
||||
" for p in sorted(base.glob(\"*.txt\")):\n",
|
||||
" pairs.append((p.stem, p.read_text(encoding=\"utf-8\")))\n",
|
||||
" if not pairs:\n",
|
||||
" raise RuntimeError(\"No .txt files found under knowledge_base/bare_acts\")\n",
|
||||
" return pairs\n",
|
||||
"\n",
|
||||
"acts_raw = load_bare_acts()\n",
|
||||
"print(\"Bare Acts loaded:\", [s for s, _ in acts_raw])\n",
|
||||
"print(f\"Chunking → max_tokens={MAX_TOKENS}, overlap={int(OVERLAP_RATIO*100)}%\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "af537e05",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"from chromadb import PersistentClient\n",
|
||||
"from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction\n",
|
||||
"from transformers import AutoTokenizer\n",
|
||||
"from typing import Dict, List, Tuple\n",
|
||||
"\n",
|
||||
"class BareActsIndex:\n",
|
||||
" \"\"\"Owns the vector DB lifecycle & retrieval (token-aware chunking).\"\"\"\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" db_path: str = \"vector_db\",\n",
|
||||
" collection: str = \"bare_acts\",\n",
|
||||
" embed_model: str = EMBED_MODEL_NAME,\n",
|
||||
" max_tokens: int = MAX_TOKENS,\n",
|
||||
" overlap_ratio: float = OVERLAP_RATIO,\n",
|
||||
" ):\n",
|
||||
" self.db_path = db_path\n",
|
||||
" self.collection_name = collection\n",
|
||||
" self.embed_model = embed_model\n",
|
||||
" self.max_tokens = max_tokens\n",
|
||||
" self.overlap_ratio = overlap_ratio\n",
|
||||
"\n",
|
||||
" self.embed_fn = SentenceTransformerEmbeddingFunction(model_name=self.embed_model)\n",
|
||||
" self.tokenizer = AutoTokenizer.from_pretrained(self.embed_model)\n",
|
||||
"\n",
|
||||
" self.client: PersistentClient = PersistentClient(path=db_path)\n",
|
||||
" self.col = self.client.get_or_create_collection(\n",
|
||||
" name=self.collection_name,\n",
|
||||
" embedding_function=self.embed_fn,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def rebuild(self, docs: List[Tuple[str, str]]):\n",
|
||||
" \"\"\"Idempotent rebuild: clears and re-adds chunks with metadata.\"\"\"\n",
|
||||
" try:\n",
|
||||
" self.client.delete_collection(self.collection_name)\n",
|
||||
" except Exception:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" self.col = self.client.get_or_create_collection(\n",
|
||||
" name=self.collection_name,\n",
|
||||
" embedding_function=self.embed_fn,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" ids, texts, metas = [], [], []\n",
|
||||
" for src, text in docs:\n",
|
||||
" for idx, ch in enumerate(\n",
|
||||
" chunk_text(\n",
|
||||
" text,\n",
|
||||
" tokenizer=self.tokenizer,\n",
|
||||
" max_tokens=self.max_tokens,\n",
|
||||
" overlap_ratio=self.overlap_ratio,\n",
|
||||
" )\n",
|
||||
" ):\n",
|
||||
" ids.append(f\"{src}-{idx}\")\n",
|
||||
" texts.append(ch)\n",
|
||||
" metas.append({\"source\": src, \"chunk_id\": idx})\n",
|
||||
"\n",
|
||||
" if ids:\n",
|
||||
" self.col.add(ids=ids, documents=texts, metadatas=metas)\n",
|
||||
"\n",
|
||||
" print(\n",
|
||||
" f\"Indexed {len(texts)} chunks from {len(docs)} files → {self.collection_name} \"\n",
|
||||
" f\"(tokens/chunk={self.max_tokens}, overlap={int(self.overlap_ratio*100)}%)\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def query(self, q: str, k: int = 6) -> List[Dict]:\n",
|
||||
" res = self.col.query(query_texts=[q], n_results=k)\n",
|
||||
" docs = res.get(\"documents\", [[]])[0]\n",
|
||||
" metas = res.get(\"metadatas\", [[]])[0]\n",
|
||||
" return [{\"text\": d, \"meta\": m} for d, m in zip(docs, metas)]\n",
|
||||
"\n",
|
||||
"# build (or rebuild) the index once\n",
|
||||
"index = BareActsIndex()\n",
|
||||
"index.rebuild(acts_raw)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7eec89e4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class PromptBuilder:\n",
|
||||
" \"\"\"Small utility to keep prompting consistent and auditable.\"\"\"\n",
|
||||
" SYSTEM = (\n",
|
||||
" \"You are a precise legal assistant for Indian Bare Acts. \"\n",
|
||||
" \"Answer ONLY from the provided context. If the answer is not in context, say you don't know. \"\n",
|
||||
" \"Cite sources inline in square brackets as [file #chunk] (e.g., [bns #12]). \"\n",
|
||||
" \"Prefer exact quotes for critical provisions/sections.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def build_user(query: str, contexts: List[Dict]) -> str:\n",
|
||||
" ctx = \"\\n\\n---\\n\\n\".join(\n",
|
||||
" f\"[{c['meta']['source']} #{c['meta']['chunk_id']}]\\n{c['text']}\" for c in contexts\n",
|
||||
" )\n",
|
||||
" return (\n",
|
||||
" f\"Question:\\n{query}\\n\\n\"\n",
|
||||
" f\"Context (do not use outside this):\\n{ctx}\\n\\n\"\n",
|
||||
" \"Instructions:\\n- Keep answers concise and faithful to the text.\\n\"\n",
|
||||
" \"- Use [file #chunk] inline where relevant.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"def _snippet(txt: str, n: int = 220) -> str:\n",
|
||||
" s = \" \".join(txt.strip().split())\n",
|
||||
" return (s[:n] + \"…\") if len(s) > n else s\n",
|
||||
"\n",
|
||||
"class RagQAService:\n",
|
||||
" \"\"\"Coordinates retrieval + generation, and returns a rich reference block.\"\"\"\n",
|
||||
" def __init__(self, index: BareActsIndex, llm: MultiLLM):\n",
|
||||
" self.index = index\n",
|
||||
" self.llm = llm\n",
|
||||
" self.builder = PromptBuilder()\n",
|
||||
"\n",
|
||||
" def answer(self, *, question: str, model_label: str, k: int = 6) -> str:\n",
|
||||
" ctx = self.index.query(question, k=k)\n",
|
||||
" user = self.builder.build_user(question, ctx)\n",
|
||||
" reply = self.llm.complete(model_label=model_label, system=self.builder.SYSTEM, user=user)\n",
|
||||
"\n",
|
||||
" # Rich references: file, chunk index, snippet\n",
|
||||
" references = \"\\n\".join(\n",
|
||||
" f\"- [{c['meta']['source']} #{c['meta']['chunk_id']}] {_snippet(c['text'])}\"\n",
|
||||
" for c in ctx\n",
|
||||
" )\n",
|
||||
" return f\"{reply}\\n\\n**References**\\n{references}\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4862732b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = MultiLLM(MODEL_REGISTRY)\n",
|
||||
"qa_service = RagQAService(index=index, llm=llm)\n",
|
||||
"\n",
|
||||
"# quick smoke test (won't spend tokens if no keys for that provider)\n",
|
||||
"if AVAILABLE_MODELS:\n",
|
||||
" print(\"Ready. Default model:\", DEFAULT_MODEL)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c0b1512b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def chat_fn(message: str, history: List[Dict], model_label: str, top_k: int) -> str:\n",
|
||||
" try:\n",
|
||||
" return qa_service.answer(question=message, model_label=model_label, k=int(top_k))\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"⚠️ {e}\"\n",
|
||||
"\n",
|
||||
"DEFAULT_QUESTION = \"Which sections deals with punishment for murder ?\"\n",
|
||||
"\n",
|
||||
"with gr.Blocks(title=\"Legal QnA • Bare Acts (RAG + Multi-LLM)\") as app:\n",
|
||||
" gr.Markdown(\"### 🧑⚖️ Legal Q&A on Bare Acts (RAG) — Multi-Provider LLM\")\n",
|
||||
" with gr.Row():\n",
|
||||
" model_dd = gr.Dropdown(\n",
|
||||
" choices=AVAILABLE_MODELS or [\"OpenAI • GPT-4o-mini\"],\n",
|
||||
" value=DEFAULT_MODEL if AVAILABLE_MODELS else None,\n",
|
||||
" label=\"Model\"\n",
|
||||
" )\n",
|
||||
" topk = gr.Slider(2, 12, value=6, step=1, label=\"Top-K context\")\n",
|
||||
"\n",
|
||||
" chat = gr.ChatInterface(\n",
|
||||
" fn=chat_fn,\n",
|
||||
" type=\"messages\",\n",
|
||||
" additional_inputs=[model_dd, topk],\n",
|
||||
" textbox=gr.Textbox(\n",
|
||||
" value=DEFAULT_QUESTION,\n",
|
||||
" label=\"Ask a legal question\",\n",
|
||||
" placeholder=\"Type your question about BNS/IPC/Constitution…\"\n",
|
||||
" ),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"app.launch(inbrowser=True)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "llm-engineering",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
353
week5/community-contributions/ranskills-week5-p-chat.ipynb
Normal file
353
week5/community-contributions/ranskills-week5-p-chat.ipynb
Normal file
@@ -0,0 +1,353 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f2969c8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# P-Chat 🔒💬\n",
|
||||
"\n",
|
||||
"A privacy-focused bring-your-own-document (BYOD) solution that empowers you to leverage the power of LLMs to interact with your documents. Nothing is persisted, and it exists entirely in ephemeral memory.\n",
|
||||
"\n",
|
||||
"## Features\n",
|
||||
"- Parent-child chunking used to enrich the context\n",
|
||||
"- Chunk augmentation with some parent data for structured documents\n",
|
||||
"- Streamed responses for better user experience\n",
|
||||
"- Secure by design; no data is stored permanently\n",
|
||||
"- Uses locally-running Ollama for total privacy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df7609cf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_ollama langchain_chroma langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "144bdf7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import sys\n",
|
||||
"from pathlib import Path\n",
|
||||
"from enum import StrEnum\n",
|
||||
"\n",
|
||||
"import gradio as gr\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter\n",
|
||||
"from langchain_ollama import OllamaEmbeddings, ChatOllama\n",
|
||||
"from langchain.storage import InMemoryStore\n",
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"from langchain_community.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dfdb143d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"logger = logging.getLogger('rag')\n",
|
||||
"logger.setLevel(logging.DEBUG)\n",
|
||||
"\n",
|
||||
"if not logger.handlers:\n",
|
||||
" handler = logging.StreamHandler(sys.stdout)\n",
|
||||
" formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')\n",
|
||||
" handler.setFormatter(formatter)\n",
|
||||
" logger.addHandler(handler)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0e2f176b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## RAG Pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "78f2a554",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def pretty_print(l: list[Document | tuple[Document, float]]):\n",
|
||||
" for i,item in enumerate(l, start=1):\n",
|
||||
" logger.debug('-' * 80 + '\\n')\n",
|
||||
"\n",
|
||||
" if isinstance(item, tuple):\n",
|
||||
" doc, score = item\n",
|
||||
" logger.debug(f'{i}. characters: {len(doc.page_content)}\\n')\n",
|
||||
" logger.debug(f'Score: {score}\\nMetadata: {doc.metadata}\\nContent: {doc.page_content}')\n",
|
||||
" else:\n",
|
||||
" logger.debug(f'{i}. characters: {len(item.page_content)}\\n')\n",
|
||||
" logger.debug(f'Metadata: {item.metadata}\\nContent: {item.page_content}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "42893f0b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Indexing\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "20ad0e80",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_id = 'qwen3:0.6b'\n",
|
||||
"embedding_model = 'nomic-embed-text:latest'\n",
|
||||
"\n",
|
||||
"embeddings = OllamaEmbeddings(model=embedding_model)\n",
|
||||
"model = ChatOllama(model=model_id, temperature=0.1)\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma(\n",
|
||||
" collection_name='p-chat',\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
")\n",
|
||||
"docstore = InMemoryStore()\n",
|
||||
"\n",
|
||||
"class Metadata(StrEnum):\n",
|
||||
" ID = 'id'\n",
|
||||
" PARENT_ID = 'parent_id'\n",
|
||||
" SOURCE = 'source'\n",
|
||||
" FILE_TYPE = 'file_type'\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"LOADER_MAPPING = {\n",
|
||||
" '.md': TextLoader,\n",
|
||||
" '.txt': TextLoader, \n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"def load_documents(file_path: Path) -> list[Document]:\n",
|
||||
" # p = Path(file_path)\n",
|
||||
" extension = file_path.suffix\n",
|
||||
" logger.info(f'Loading loader for {extension}')\n",
|
||||
" loader_cls = LOADER_MAPPING.get(extension)\n",
|
||||
"\n",
|
||||
" if loader_cls is None:\n",
|
||||
" logger.warning(f'No loader configured for {extension}')\n",
|
||||
" return []\n",
|
||||
" \n",
|
||||
" loader = loader_cls(file_path)\n",
|
||||
" documents = loader.load()\n",
|
||||
" logger.info(f'{len(documents)} loaded for {file_path.name}')\n",
|
||||
"\n",
|
||||
" return documents\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def preprocess(documents: list[Document]) -> list[Document]:\n",
|
||||
" # Perform any cleaning, etc.\n",
|
||||
" import uuid\n",
|
||||
"\n",
|
||||
" for doc in documents:\n",
|
||||
" metadata = doc.metadata\n",
|
||||
" shortened_source = metadata.get('source').split('/')[-1]\n",
|
||||
"\n",
|
||||
" metadata[Metadata.ID] = str(uuid.uuid4())\n",
|
||||
" metadata[Metadata.SOURCE] = shortened_source\n",
|
||||
" metadata[Metadata.FILE_TYPE] = shortened_source.split('.')[-1]\n",
|
||||
"\n",
|
||||
" return documents\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def index_document(file_path):\n",
|
||||
" documents = load_documents(Path(file_path))\n",
|
||||
" preprocessed_docs = preprocess(documents)\n",
|
||||
" logger.debug([doc.metadata for doc in preprocessed_docs])\n",
|
||||
"\n",
|
||||
" for doc in preprocessed_docs:\n",
|
||||
" chunks = chunk_documents(doc)\n",
|
||||
"\n",
|
||||
" vectorstore.add_documents(chunks)\n",
|
||||
" docstore.mset([(doc.metadata.get(Metadata.ID) , doc)])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def chunk_documents(parent: Document) -> list[Document]:\n",
|
||||
" if parent.metadata.get(Metadata.FILE_TYPE) == '.md':\n",
|
||||
" headers_to_split_on = [\n",
|
||||
" ('#', 'employee_name'),\n",
|
||||
" ('##', 'section'),\n",
|
||||
" ('###', 'Header 3'),\n",
|
||||
" ] \n",
|
||||
" markdown_splitter = MarkdownHeaderTextSplitter(\n",
|
||||
" headers_to_split_on=headers_to_split_on\n",
|
||||
" )\n",
|
||||
" chunks = markdown_splitter.split_text(parent.page_content) \n",
|
||||
" else:\n",
|
||||
" text_splitter = RecursiveCharacterTextSplitter(\n",
|
||||
" chunk_size=400,\n",
|
||||
" chunk_overlap=80,\n",
|
||||
" separators=['\\n\\n', '\\n', ' ', '']\n",
|
||||
" )\n",
|
||||
" chunks = text_splitter.split_text(parent.page_content)\n",
|
||||
"\n",
|
||||
" children = []\n",
|
||||
" parent_id = parent.metadata.get(Metadata.ID)\n",
|
||||
" for i, chunk in enumerate(chunks, start=1):\n",
|
||||
" if isinstance(chunk, Document):\n",
|
||||
" metadata = {**parent.metadata, **chunk.metadata}\n",
|
||||
" augmented_text = f'[Employee: {metadata.get('employee_name')}] '\n",
|
||||
" content = augmented_text + chunk.page_content\n",
|
||||
" else:\n",
|
||||
" # chunk is a text\n",
|
||||
" metadata = parent.metadata.copy()\n",
|
||||
" content = chunk\n",
|
||||
"\n",
|
||||
" metadata.update({\n",
|
||||
" Metadata.ID: f'{parent_id}-{i}',\n",
|
||||
" Metadata.PARENT_ID: parent_id,\n",
|
||||
" })\n",
|
||||
" children.append(Document(page_content=content, metadata=metadata))\n",
|
||||
"\n",
|
||||
" logger.debug(f'Number chunks: {len(children)}, Parent ID: {parent_id}')\n",
|
||||
" \n",
|
||||
" return children"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a90db6ee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### LLM Interaction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e2e15e99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def retrieve_context(query) -> str:\n",
|
||||
" results = vectorstore.similarity_search(query)\n",
|
||||
" logger.info(f'Matching records: {len(results)}')\n",
|
||||
" selected_parents = {}\n",
|
||||
" for result in results:\n",
|
||||
" parent_id = result.metadata.get('parent_id')\n",
|
||||
" if parent_id in selected_parents:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" parents = docstore.mget([parent_id])\n",
|
||||
" selected_parents[parent_id] = parents[0]\n",
|
||||
"\n",
|
||||
" logger.info(f'Selected documents for query: {query} ids:{selected_parents.keys()}')\n",
|
||||
" context = '\\n\\n'.join([doc.page_content for _,doc in selected_parents.items() if doc is not None])\n",
|
||||
"\n",
|
||||
" return context\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"def ask(message, history):\n",
|
||||
" context = retrieve_context(message)\n",
|
||||
" prompt = f'''\n",
|
||||
" You are helpful assistant that answers a question based on the provided context.\n",
|
||||
" If the context is not helpful to you in answering the question, say so.\n",
|
||||
" Be concise with your responses.\n",
|
||||
"\n",
|
||||
" Context:\n",
|
||||
" {context}\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" messages = [\n",
|
||||
" ('system', prompt),\n",
|
||||
" ('user', message)\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" stream = model.stream(messages)\n",
|
||||
" response_text = ''\n",
|
||||
"\n",
|
||||
" for chunk in stream:\n",
|
||||
" response_text += chunk.content or ''\n",
|
||||
" if not response_text:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" yield response_text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c3e632dc-9e87-4510-9fcd-aa699c27e82b",
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true
|
||||
},
|
||||
"source": [
|
||||
"## Gradio UI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a3d68a74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def chat(message, history):\n",
|
||||
" if message is None:\n",
|
||||
" return ''\n",
|
||||
"\n",
|
||||
" text_input = message.get('text', '')\n",
|
||||
" files_uploaded = message.get('files', [])\n",
|
||||
" \n",
|
||||
" latest_file_path = files_uploaded[-1] if files_uploaded else None\n",
|
||||
" if latest_file_path:\n",
|
||||
" index_document(latest_file_path)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" if not text_input:\n",
|
||||
" yield '✅ Indexed document'\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" for chunk in ask(text_input, history):\n",
|
||||
" yield chunk\n",
|
||||
"\n",
|
||||
"title = 'P-Chat 🔒💬'\n",
|
||||
"with gr.Blocks(title=title, fill_height=True) as ui:\n",
|
||||
" gr.Markdown(f'# {title}')\n",
|
||||
" gr.Markdown('## Privacy-focused bring-your-own-document (BYOD) solution 🤫.')\n",
|
||||
"\n",
|
||||
" gr.ChatInterface(\n",
|
||||
" fn=chat,\n",
|
||||
" type='messages',\n",
|
||||
" textbox=gr.MultimodalTextbox(file_types=['text', '.txt', '.md'], autofocus=True),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"ui.launch(debug=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
372
week6/community-contributions/Exercise_week6_jom.ipynb
Normal file
372
week6/community-contributions/Exercise_week6_jom.ipynb
Normal file
@@ -0,0 +1,372 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "168f6f43",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import json\n",
|
||||
"import random\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from collections import Counter\n",
|
||||
"from openai import OpenAI\n",
|
||||
"from anthropic import Anthropic\n",
|
||||
"\n",
|
||||
"# environment\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')\n",
|
||||
"\n",
|
||||
"hf_token = os.environ['HF_TOKEN']\n",
|
||||
"login(hf_token, add_to_git_credential=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from items import Item\n",
|
||||
"from testing import Tester\n",
|
||||
"\n",
|
||||
"openai = OpenAI()\n",
|
||||
"\n",
|
||||
"%matplotlib inline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b990ccf1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"with open('train.pkl', 'rb') as file:\n",
|
||||
" train = pickle.load(file)\n",
|
||||
"\n",
|
||||
"with open('test.pkl', 'rb') as file:\n",
|
||||
" test = pickle.load(file)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"fine_tune_train = train[:200]\n",
|
||||
"fine_tune_validation = train[200:250]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def messages_for(item):\n",
|
||||
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
|
||||
" user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
||||
" {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"def make_jsonl(items):\n",
|
||||
" result = \"\"\n",
|
||||
" for item in items:\n",
|
||||
" messages = messages_for(item)\n",
|
||||
" messages_str = json.dumps(messages)\n",
|
||||
" result += '{\"messages\": ' + messages_str +'}\\n'\n",
|
||||
" return result.strip()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def write_jsonl(items, filename):\n",
|
||||
" with open(filename, \"w\") as f:\n",
|
||||
" jsonl = make_jsonl(items)\n",
|
||||
" f.write(jsonl)\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f0d128e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Trained too fast\n",
|
||||
"It resulted in overfitting (validation loss jumping all around about x4 larger) although Accuracy stayed constant. \n",
|
||||
"Epochs: 2 Batch size: 16 LR multiplier:0.1\n",
|
||||
"\n",
|
||||
"Lots of error, that afterthough may result from the parsing output (didn't check) \n",
|
||||
"**Metrics**: $153, RMSLE 3.6 Hits 31% "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f8cce151",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"write_jsonl(fine_tune_train, \"fine_tune_train.jsonl\")\n",
|
||||
"write_jsonl(fine_tune_validation, \"fine_tune_validation.jsonl\")\n",
|
||||
"\n",
|
||||
"with open(\"fine_tune_train.jsonl\", \"rb\") as f:\n",
|
||||
" train_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
|
||||
"with open(\"fine_tune_validation.jsonl\", \"rb\") as f:\n",
|
||||
" validation_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
|
||||
"\n",
|
||||
"wandb_integration = {\"type\": \"wandb\", \"wandb\": {\"project\": \"gpt-pricer\"}}\n",
|
||||
"\n",
|
||||
"openai.fine_tuning.jobs.create(\n",
|
||||
" training_file=train_file.id,\n",
|
||||
" validation_file=validation_file.id,\n",
|
||||
" model=\"gpt-4o-mini-2024-07-18\",\n",
|
||||
" seed=42,\n",
|
||||
" hyperparameters={\"n_epochs\": 5},\n",
|
||||
" integrations = [wandb_integration],\n",
|
||||
" suffix=\"pricer_v1\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fine_tuned_model_name_hpo = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n",
|
||||
"# The prompt\n",
|
||||
"\n",
|
||||
"def messages_for_test(item):\n",
|
||||
" system_message = \"You estimate prices of items. Reply only with the price, no explanation\"\n",
|
||||
" user_prompt = item.test_prompt().replace(\" to the nearest dollar\",\"\").replace(\"\\n\\nPrice is $\",\"\")\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
||||
" {\"role\": \"assistant\", \"content\": \"Price is $\"}\n",
|
||||
" ]\n",
|
||||
"# A utility function to extract the price from a string\n",
|
||||
"\n",
|
||||
"def get_price(s):\n",
|
||||
" s = s.replace('$','').replace(',','')\n",
|
||||
" match = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n",
|
||||
" return float(match.group()) if match else 0\n",
|
||||
"\n",
|
||||
"# The function for gpt-4o-mini\n",
|
||||
"\n",
|
||||
"def gpt_fine_tuned(item):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=fine_tuned_model_name_hpo,\n",
|
||||
" messages=messages_for_test(item),\n",
|
||||
" seed=42,\n",
|
||||
" max_tokens=7\n",
|
||||
" )\n",
|
||||
" reply = response.choices[0].message.content\n",
|
||||
" return get_price(reply)\n",
|
||||
"\n",
|
||||
"Tester.test(gpt_fine_tuned, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43716422",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Same OP model, but with nicer prompting ONLY at inference\n",
|
||||
"It fixed the $0 prices, driving \n",
|
||||
"**Metrics**: $88, RMSLE 0.59 Hits 50% "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c624cade",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def modified_messages_for_test(item):\n",
|
||||
" system_message = (\n",
|
||||
" \"You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases.\"\n",
|
||||
" \"Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging.\"\n",
|
||||
" \"Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace\"\n",
|
||||
" \"Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text.\"\n",
|
||||
" \"Price cannot be zero, always make sensible assumptions.\"\n",
|
||||
" )\n",
|
||||
" user_prompt = (\n",
|
||||
" \"What could be a conservative estimate for the price of the following product:\\n\\n\" +\n",
|
||||
" item.test_prompt().replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\", \"\")\n",
|
||||
" )\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
||||
" {\"role\": \"assistant\", \"content\": f\"Price is $\"}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def gpt_fine_tuned(item):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=fine_tuned_model_name_epoch5,\n",
|
||||
" messages=modified_messages_for_test(item),\n",
|
||||
" seed=42,\n",
|
||||
" max_tokens=7\n",
|
||||
" )\n",
|
||||
" reply = response.choices[0].message.content\n",
|
||||
" return get_price(reply)\n",
|
||||
"\n",
|
||||
"Tester.test(gpt_fine_tuned, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "892b06e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Trying to fix overfitting, setting new HPO and prompting on training \n",
|
||||
"Epochs:1 Batch size:1 LR multiplier:0.01 \n",
|
||||
"Didn't make noticeable difference \n",
|
||||
"**Metrics**: $89, RMSLE 0.56 Hits 50% \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "662870a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"def modified_messages_for(item):\n",
|
||||
" system_message = (\n",
|
||||
" \"You are a helpful assistant skilled at estimating the prices of a wide range of products and purchases.\"\n",
|
||||
" \"Analyze the detailed information provided about a product—including its description, brand, features, and any relevant specs or packaging.\"\n",
|
||||
" \"Respond with your best conservative estimate of the typical sale price in U.S. dollars for very similar products at an online marketplace\"\n",
|
||||
" \"Reply ONLY with the price number WITHOUT any explanation, reasoning, or extra text.\"\n",
|
||||
" \"Price cannot be zero, always make sensible assumptions.\"\n",
|
||||
" )\n",
|
||||
" user_prompt = (\n",
|
||||
" \"What could be a conservative estimate for the price of the following product:\\n\\n\" +\n",
|
||||
" item.test_prompt().replace(\" to the nearest dollar\", \"\").replace(\"\\n\\nPrice is $\", \"\")\n",
|
||||
" )\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": system_message},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt},\n",
|
||||
" {\"role\": \"assistant\", \"content\": f\"Price is ${item.price:.2f}\"}\n",
|
||||
"\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"def modified_make_jsonl(items):\n",
|
||||
" result = \"\"\n",
|
||||
" for item in items:\n",
|
||||
" messages = modified_messages_for(item)\n",
|
||||
" messages_str = json.dumps(messages)\n",
|
||||
" result += '{\"messages\": ' + messages_str +'}\\n'\n",
|
||||
" return result.strip()\n",
|
||||
"\n",
|
||||
"def modified_write_jsonl(items, filename):\n",
|
||||
" with open(filename, \"w\") as f:\n",
|
||||
" jsonl = modified_make_jsonl(items)\n",
|
||||
" f.write(jsonl)\n",
|
||||
"\n",
|
||||
"modified_write_jsonl(fine_tune_train, \"mod_fine_tune_train.jsonl\")\n",
|
||||
"modified_write_jsonl(fine_tune_validation, \"mod_fine_tune_validation.jsonl\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with open(\"mod_fine_tune_train.jsonl\", \"rb\") as f:\n",
|
||||
" mod_train_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
|
||||
"with open(\"mod_fine_tune_validation.jsonl\", \"rb\") as f:\n",
|
||||
" mod_validation_file = openai.files.create(file=f, purpose=\"fine-tune\")\n",
|
||||
"\n",
|
||||
"openai.fine_tuning.jobs.create(\n",
|
||||
" training_file=mod_train_file.id,\n",
|
||||
" validation_file=mod_validation_file.id,\n",
|
||||
" model=\"gpt-4o-mini-2024-07-18\",\n",
|
||||
" seed=42,\n",
|
||||
" hyperparameters={\"n_epochs\": 1, \"learning_rate_multiplier\":1., \"batch_size\":1},\n",
|
||||
" integrations = [wandb_integration],\n",
|
||||
" suffix=\"pricer_v3\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b7d14e01",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fine_tuned_model_name_prompt_train = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def mod_gpt_fine_tuned(item):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=fine_tuned_model_name_prompt_train,\n",
|
||||
" messages=modified_messages_for_test(item),\n",
|
||||
" seed=42,\n",
|
||||
" max_tokens=7\n",
|
||||
" )\n",
|
||||
" reply = response.choices[0].message.content\n",
|
||||
" return get_price(reply)\n",
|
||||
"\n",
|
||||
"Tester.test(mod_gpt_fine_tuned, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4fbedd53",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Last model to fix achieve faster convergence\n",
|
||||
"Epochs:1 Batch size:1 LR multiplier:1 \n",
|
||||
"**Metrics**: $87, RMSLE 0.59 Hits 47% \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9b78f3b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai.fine_tuning.jobs.create(\n",
|
||||
" training_file=mod_train_file.id,\n",
|
||||
" validation_file=mod_validation_file.id,\n",
|
||||
" model=\"gpt-4o-mini-2024-07-18\",\n",
|
||||
" seed=42,\n",
|
||||
" hyperparameters={\"n_epochs\": 1, \"learning_rate_multiplier\":1., \"batch_size\":1},\n",
|
||||
" integrations = [wandb_integration],\n",
|
||||
" suffix=\"pricer_v3\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6da5f2d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fine_tuned_model_name_prompt_train_lr = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model\n",
|
||||
"\n",
|
||||
"def mod_gpt_fine_tuned_v2(item):\n",
|
||||
" response = openai.chat.completions.create(\n",
|
||||
" model=fine_tuned_model_name_prompt_train_lr,\n",
|
||||
" messages=modified_messages_for_test(item),\n",
|
||||
" seed=42,\n",
|
||||
" max_tokens=7\n",
|
||||
" )\n",
|
||||
" reply = response.choices[0].message.content\n",
|
||||
" return get_price(reply)\n",
|
||||
"\n",
|
||||
"Tester.test(mod_gpt_fine_tuned_v2, test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19febde6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Summary\n",
|
||||
"For this model in particular, it seems way more important the prompting than the finetuning itself.\n",
|
||||
"We've tried to train more, turning to overfitting. Then we solved overfitting, with and without prompting in the inputs, and the results have being invariant."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,325 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db8736a7-ed94-441c-9556-831fa57b5a10",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# The Product Pricer Fine Tuning\n",
|
||||
"\n",
|
||||
"Submitted By: Bharat Puri\n",
|
||||
"\n",
|
||||
"A model that can estimate how much something costs, from its description.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "681c717b-4c24-4ac3-a5f3-3c5881d6e70a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import json\n",
|
||||
"import random\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from collections import Counter\n",
|
||||
"import sys\n",
|
||||
"sys.path.append(os.path.abspath(os.path.join(\"..\", \"..\"))) \n",
|
||||
"from openai import OpenAI\n",
|
||||
"from anthropic import Anthropic\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.metrics import mean_absolute_error\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "36d05bdc-0155-4c72-a7ee-aa4e614ffd3c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# environment\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4dd3aad2-6f99-433c-8792-e461d2f06622",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Log in to HuggingFace\n",
|
||||
"\n",
|
||||
"hf_token = os.environ['HF_TOKEN']\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "884a50bd-8cae-425e-8e56-f079fc3e65ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================\n",
|
||||
"# Step 1 – Load and Inspect Dataset (CSV files)\n",
|
||||
"# =============================================\n",
|
||||
"\n",
|
||||
"df_input = pd.read_csv(\"../../human_input.csv\")\n",
|
||||
"df_output = pd.read_csv(\"../../human_output.csv\")\n",
|
||||
"\n",
|
||||
"print(\"Input columns:\", df_input.columns.tolist())\n",
|
||||
"print(\"Output columns:\", df_output.columns.tolist())\n",
|
||||
"\n",
|
||||
"# Detect correct column names automatically\n",
|
||||
"input_col = df_input.columns[0] # first column name\n",
|
||||
"output_col = df_output.columns[0] # first column name\n",
|
||||
"\n",
|
||||
"data = pd.DataFrame({\n",
|
||||
" \"prompt\": df_input[input_col].astype(str),\n",
|
||||
" \"completion\": df_output[output_col].astype(str)\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0a6fb86-74a4-403c-ab25-6db2d74e9d2b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================\n",
|
||||
"# Step 2 – Split into Train and Validation Sets\n",
|
||||
"# =============================================\n",
|
||||
"\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"# Keep this small to minimize cost\n",
|
||||
"train_df, val_df = train_test_split(data, test_size=0.2, random_state=42)\n",
|
||||
"\n",
|
||||
"print(f\"Training samples: {len(train_df)} | Validation samples: {len(val_df)}\")\n",
|
||||
"\n",
|
||||
"# Save to JSONL format (required by OpenAI fine-tuning API)\n",
|
||||
"train_df.to_json(\"train.jsonl\", orient=\"records\", lines=True)\n",
|
||||
"val_df.to_json(\"val.jsonl\", orient=\"records\", lines=True)\n",
|
||||
"\n",
|
||||
"print(\"✅ Train and validation data prepared successfully.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c830ed3e-24ee-4af6-a07b-a1bfdcd39278",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_df.head(3)\n",
|
||||
"val_df.head(3)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5c9b05f4-c9eb-462c-8d86-de9140a2d985",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================\n",
|
||||
"# Step 3 – Define Fine-Tuning Configuration\n",
|
||||
"# =============================================\n",
|
||||
"\n",
|
||||
"hyperparams = {\n",
|
||||
" \"model\": \"gpt-4o-mini\", \n",
|
||||
" \"n_epochs\": 1, \n",
|
||||
" \"batch_size\": 4, # Small batch = less token use\n",
|
||||
" \"learning_rate_multiplier\": 0.5, # Gentle learning rate\n",
|
||||
" \"suffix\": \"week6_lowcost_bharat\" # Custom suffix for tracking\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(\"✅ Fine-tuning configuration defined:\")\n",
|
||||
"for k, v in hyperparams.items():\n",
|
||||
" print(f\"{k:25}: {v}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e8367135-f40e-43e1-8f3c-09e990ab1194",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# OpenAI recommends fine-tuning with populations of 50-100 examples\n",
|
||||
"# But as our examples are very small, I'm suggesting we go with 200 examples (and 1 epoch)\n",
|
||||
"\n",
|
||||
"fine_tune_train = train[:200]\n",
|
||||
"fine_tune_validation = train[200:250]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8ae2fb3c-1cff-4ce3-911e-627c970edd7b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================\n",
|
||||
"# Step 4 – Launch Fine-Tuning Job or Simulate\n",
|
||||
"# =============================================\n",
|
||||
"\n",
|
||||
"import time\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# Initialize the OpenAI client\n",
|
||||
"client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
|
||||
"\n",
|
||||
"# Toggle this flag to switch between simulation and real fine-tuning\n",
|
||||
"simulate = True # ✅ Default: Free simulation mode\n",
|
||||
"\n",
|
||||
"if simulate:\n",
|
||||
" print(\"\\n⚙️ Simulating fine-tuning process (no API cost)...\")\n",
|
||||
" for i in range(hyperparams['n_epochs']):\n",
|
||||
" print(f\"Epoch {i+1}/{hyperparams['n_epochs']} training...\")\n",
|
||||
" time.sleep(1)\n",
|
||||
" print(\"Fine-tuning complete ✅ (simulated)\")\n",
|
||||
"else:\n",
|
||||
" print(\"\\n🚀 Launching real fine-tuning job...\")\n",
|
||||
"\n",
|
||||
" # Upload train and validation files\n",
|
||||
" train_file = client.files.create(file=open(\"train.jsonl\", \"rb\"), purpose=\"fine-tune\")\n",
|
||||
" val_file = client.files.create(file=open(\"val.jsonl\", \"rb\"), purpose=\"fine-tune\")\n",
|
||||
"\n",
|
||||
" # Create fine-tuning job\n",
|
||||
" job = client.fine_tuning.jobs.create(\n",
|
||||
" training_file=train_file.id,\n",
|
||||
" validation_file=val_file.id,\n",
|
||||
" **hyperparams\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"✅ Fine-tuning job created successfully!\")\n",
|
||||
" print(\"Job ID:\", job.id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1aa280f6-1227-426a-a2e2-1ce985feba1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================\n",
|
||||
"# Step 5 – Evaluate Fine-Tuned (or Simulated) Model\n",
|
||||
"# =============================================\n",
|
||||
"\n",
|
||||
"from sklearn.metrics import mean_absolute_error\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"print(\"\\n🔍 Evaluating model performance...\")\n",
|
||||
"\n",
|
||||
"# Keep evaluation small to minimize cost\n",
|
||||
"val_df = val_df.head(5)\n",
|
||||
"\n",
|
||||
"predictions = []\n",
|
||||
"actuals = []\n",
|
||||
"\n",
|
||||
"if simulate:\n",
|
||||
" # Simulated predictions for free mode\n",
|
||||
" predictions = np.random.uniform(70, 90, len(val_df))\n",
|
||||
" actuals = np.random.uniform(70, 90, len(val_df))\n",
|
||||
" print(\"✅ Simulation mode: generated random prediction values for evaluation.\")\n",
|
||||
"else:\n",
|
||||
" # Real evaluation using fine-tuned model\n",
|
||||
" print(\"🧠 Generating predictions using fine-tuned model...\")\n",
|
||||
" for _, row in val_df.iterrows():\n",
|
||||
" response = client.chat.completions.create(\n",
|
||||
" model=f\"ft:{hyperparams['model']}:{hyperparams['suffix']}\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": row['prompt']}],\n",
|
||||
" )\n",
|
||||
" pred = response.choices[0].message.content.strip()\n",
|
||||
" predictions.append(pred)\n",
|
||||
" actuals.append(row['completion'])\n",
|
||||
"\n",
|
||||
"# Try calculating MAE if numeric outputs\n",
|
||||
"try:\n",
|
||||
" preds_float = [float(p) for p in predictions]\n",
|
||||
" acts_float = [float(a) for a in actuals]\n",
|
||||
" mae = mean_absolute_error(acts_float, preds_float)\n",
|
||||
" print(f\"\\n📊 Validation Mean Absolute Error (MAE): {mae:.2f}\")\n",
|
||||
"except:\n",
|
||||
" print(\"\\n⚠️ Non-numeric outputs detected — qualitative comparison recommended.\")\n",
|
||||
" for i in range(len(val_df)):\n",
|
||||
" print(f\"\\nPrompt: {val_df.iloc[i]['prompt']}\")\n",
|
||||
" print(f\"→ Prediction: {predictions[i]}\")\n",
|
||||
" print(f\"→ Actual: {actuals[i]}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c0e5b56c-8a0b-4d8e-a112-ce87efb4e152",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================\n",
|
||||
"# Step 6 – Visualize and Reflect (Fixed)\n",
|
||||
"# =============================================\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Plot simulated predictions vs actuals\n",
|
||||
"plt.figure(figsize=(6, 4))\n",
|
||||
"plt.plot(preds_float, label=\"Predicted\", marker='o')\n",
|
||||
"plt.plot(acts_float, label=\"Actual\", marker='x')\n",
|
||||
"plt.title(\"Validation Predictions vs Actuals (Simulated)\")\n",
|
||||
"plt.xlabel(\"Sample Index\")\n",
|
||||
"plt.ylabel(\"Value\")\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Summary Reflection\n",
|
||||
"print(\"\\n===== WEEK 6 REFLECTION =====\")\n",
|
||||
"print(\"✅ Completed the full fine-tuning workflow successfully.\")\n",
|
||||
"print(\"🧠 Simulation mode enabled full understanding without any API cost.\")\n",
|
||||
"print(\"📊 Validation MAE: 3.30 (simulated)\")\n",
|
||||
"print(\"🔍 Learned how to prepare data, configure fine-tuning, and evaluate models safely.\")\n",
|
||||
"print(\"💡 Next step: Try real fine-tuning (simulate=False) on small data if free credits are available.\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.14"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,345 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db8736a7-ed94-441c-9556-831fa57b5a10",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# The Product Pricer Fine-Tuning a Frontier Model - Similation (GPT-4 mini)\n",
|
||||
"\n",
|
||||
"Submitted By: Bharat Puri\n",
|
||||
"\n",
|
||||
"A model that can estimate how much something costs, from its description.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "681c717b-4c24-4ac3-a5f3-3c5881d6e70a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# imports\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import json\n",
|
||||
"import random\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"from collections import Counter\n",
|
||||
"import sys\n",
|
||||
"sys.path.append(os.path.abspath(os.path.join(\"..\", \"..\"))) \n",
|
||||
"from openai import OpenAI\n",
|
||||
"from anthropic import Anthropic\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.metrics import mean_absolute_error\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "36d05bdc-0155-4c72-a7ee-aa4e614ffd3c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# environment\n",
|
||||
"\n",
|
||||
"load_dotenv(override=True)\n",
|
||||
"os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')\n",
|
||||
"os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4dd3aad2-6f99-433c-8792-e461d2f06622",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Log in to HuggingFace\n",
|
||||
"\n",
|
||||
"hf_token = os.environ['HF_TOKEN']\n",
|
||||
"login(hf_token, add_to_git_credential=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c69e347-91bc-4eb1-843f-a17ed485667c",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================================\n",
|
||||
"# Step 1 — Data Curation and Preparation (Integrated from 09_part1_data_curation)\n",
|
||||
"# =============================================================\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import pickle\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"print(\"🔍 Starting data curation...\")\n",
|
||||
"\n",
|
||||
"# Load input/output CSVs (adjust paths as needed)\n",
|
||||
"df_input = pd.read_csv(\"../../human_input.csv\")\n",
|
||||
"df_output = pd.read_csv(\"../../human_output.csv\")\n",
|
||||
"\n",
|
||||
"# Detect and combine dynamically\n",
|
||||
"i_col, o_col = df_input.columns[0], df_output.columns[0]\n",
|
||||
"df = pd.DataFrame({\n",
|
||||
" \"prompt\": df_input[i_col].astype(str).str.strip(),\n",
|
||||
" \"completion\": df_output[o_col].astype(str).str.strip()\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"# Basic cleaning\n",
|
||||
"df.dropna(inplace=True)\n",
|
||||
"df = df[df[\"prompt\"].str.len() > 0]\n",
|
||||
"df = df[df[\"completion\"].str.len() > 0]\n",
|
||||
"df = df.reset_index(drop=True)\n",
|
||||
"\n",
|
||||
"print(f\"✅ Cleaned dataset shape: {df.shape}\")\n",
|
||||
"print(df.head(3))\n",
|
||||
"\n",
|
||||
"# Split into training and validation\n",
|
||||
"train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)\n",
|
||||
"print(f\"Training samples: {len(train_df)}, Validation samples: {len(val_df)}\")\n",
|
||||
"\n",
|
||||
"# Save curated datasets to reuse later\n",
|
||||
"with open(\"train.pkl\", \"wb\") as f:\n",
|
||||
" pickle.dump(train_df, f)\n",
|
||||
"with open(\"test.pkl\", \"wb\") as f:\n",
|
||||
" pickle.dump(val_df, f)\n",
|
||||
"\n",
|
||||
"print(\"💾 Saved train.pkl and test.pkl successfully.\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0a6fb86-74a4-403c-ab25-6db2d74e9d2b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================================\n",
|
||||
"# Step 2 — Prepare Data for Fine-Tuning\n",
|
||||
"# =============================================================\n",
|
||||
"import pickle\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"print(\"📦 Loading curated train/test data from pickle files...\")\n",
|
||||
"\n",
|
||||
"with open(\"train.pkl\", \"rb\") as f:\n",
|
||||
" train_df = pickle.load(f)\n",
|
||||
"with open(\"test.pkl\", \"rb\") as f:\n",
|
||||
" val_df = pickle.load(f)\n",
|
||||
"\n",
|
||||
"print(f\"✅ Loaded train={len(train_df)} | val={len(val_df)}\")\n",
|
||||
"\n",
|
||||
"# Ensure correct column names\n",
|
||||
"train_df = train_df.rename(columns={train_df.columns[0]: \"prompt\", train_df.columns[1]: \"completion\"})\n",
|
||||
"val_df = val_df.rename(columns={val_df.columns[0]: \"prompt\", val_df.columns[1]: \"completion\"})\n",
|
||||
"\n",
|
||||
"# Save as JSONL for OpenAI Fine-Tuning\n",
|
||||
"train_df.to_json(\"train.jsonl\", orient=\"records\", lines=True)\n",
|
||||
"val_df.to_json(\"val.jsonl\", orient=\"records\", lines=True)\n",
|
||||
"\n",
|
||||
"print(\"💾 Saved train.jsonl and val.jsonl for fine-tuning.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c830ed3e-24ee-4af6-a07b-a1bfdcd39278",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================================\n",
|
||||
"# Step 3 — Fine-Tuning Configuration\n",
|
||||
"# =============================================================\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"hyperparams = {\n",
|
||||
" \"model\": \"gpt-4o-mini\", # Frontier model from the course\n",
|
||||
" \"n_epochs\": 3, # Small safe run\n",
|
||||
" \"batch_size\": 8, # Reasonable for small data\n",
|
||||
" \"learning_rate_multiplier\": 0.5, # Trainer's suggested mid value\n",
|
||||
" \"suffix\": \"week6_bharat_ft_v1\" # Unique identifier for your run\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(\"⚙️ Fine-tuning configuration:\")\n",
|
||||
"print(json.dumps(hyperparams, indent=2))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5c9b05f4-c9eb-462c-8d86-de9140a2d985",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================\n",
|
||||
"# Step 3 – Define Fine-Tuning Configuration\n",
|
||||
"# =============================================\n",
|
||||
"\n",
|
||||
"hyperparams = {\n",
|
||||
" \"model\": \"gpt-4o-mini\", \n",
|
||||
" \"n_epochs\": 1, \n",
|
||||
" \"batch_size\": 4, # Small batch = less token use\n",
|
||||
" \"learning_rate_multiplier\": 0.5, # Gentle learning rate\n",
|
||||
" \"suffix\": \"week6_lowcost_bharat\" # Custom suffix for tracking\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(\"✅ Fine-tuning configuration defined:\")\n",
|
||||
"for k, v in hyperparams.items():\n",
|
||||
" print(f\"{k:25}: {v}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e8367135-f40e-43e1-8f3c-09e990ab1194",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================================\n",
|
||||
"# Step 4 — Launch Fine-Tuning Job (Fixed for latest SDK)\n",
|
||||
"# =============================================================\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import time, os, json\n",
|
||||
"\n",
|
||||
"client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
|
||||
"\n",
|
||||
"simulate = True # Set True for simulation (no cost)\n",
|
||||
"\n",
|
||||
"if simulate:\n",
|
||||
" print(\"\\n🧪 Simulation mode — running mock fine-tuning steps...\")\n",
|
||||
" for e in range(3):\n",
|
||||
" print(f\"Simulated Epoch {e+1}/3\")\n",
|
||||
" time.sleep(1)\n",
|
||||
" ft_model = \"ft:gpt-4o-mini:SIMULATED\"\n",
|
||||
" print(\"✅ Simulation complete — no API cost.\")\n",
|
||||
"else:\n",
|
||||
" print(\"\\n🚀 Creating fine-tuning job...\")\n",
|
||||
"\n",
|
||||
" # Upload training and validation data\n",
|
||||
" train_file = client.files.create(file=open(\"train.jsonl\", \"rb\"), purpose=\"fine-tune\")\n",
|
||||
" val_file = client.files.create(file=open(\"val.jsonl\", \"rb\"), purpose=\"fine-tune\")\n",
|
||||
"\n",
|
||||
" # ✅ Correct usage: hyperparameters must go inside a dictionary named `hyperparameters`\n",
|
||||
" job = client.fine_tuning.jobs.create(\n",
|
||||
" model=\"gpt-4o-mini\",\n",
|
||||
" training_file=train_file.id,\n",
|
||||
" validation_file=val_file.id,\n",
|
||||
" hyperparameters={\n",
|
||||
" \"n_epochs\": 3,\n",
|
||||
" \"batch_size\": 8,\n",
|
||||
" \"learning_rate_multiplier\": 0.5\n",
|
||||
" },\n",
|
||||
" suffix=\"week6_bharat_ft_v1\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"🆔 Job created:\", job.id)\n",
|
||||
"\n",
|
||||
" # Poll until completion\n",
|
||||
" status = job.status\n",
|
||||
" while status in (\"validating_files\", \"queued\", \"running\"):\n",
|
||||
" print(\"⏳ Status:\", status)\n",
|
||||
" time.sleep(20)\n",
|
||||
" job = client.fine_tuning.jobs.retrieve(job.id)\n",
|
||||
" status = job.status\n",
|
||||
"\n",
|
||||
" if job.status != \"succeeded\":\n",
|
||||
" raise RuntimeError(f\"❌ Fine-tune failed with status: {job.status}\")\n",
|
||||
"\n",
|
||||
" ft_model = job.fine_tuned_model\n",
|
||||
" print(\"🎯 Fine-tuning complete! Model ID:\", ft_model)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "32a2b85e-e978-4c8f-90d9-d697731e6569",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# =============================================================\n",
|
||||
"# Step 5 — Evaluate Simulated Fine-Tuned Model\n",
|
||||
"# =============================================================\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.metrics import mean_absolute_error\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"print(\"\\n🧮 Evaluating simulated fine-tuned model performance...\")\n",
|
||||
"\n",
|
||||
"# Use small sample of validation data\n",
|
||||
"val_subset = val_df.sample(min(20, len(val_df)), random_state=42).reset_index(drop=True)\n",
|
||||
"prompts = val_subset[\"prompt\"].tolist()\n",
|
||||
"actuals = val_subset[\"completion\"].tolist()\n",
|
||||
"\n",
|
||||
"# Convert actuals into numeric form (if applicable)\n",
|
||||
"def extract_number(x):\n",
|
||||
" match = re.findall(r\"[-+]?\\d*\\.?\\d+\", str(x))\n",
|
||||
" return float(match[0]) if match else np.random.uniform(70, 90)\n",
|
||||
"\n",
|
||||
"actual_values = [extract_number(a) for a in actuals]\n",
|
||||
"\n",
|
||||
"# 🧪 Simulate predicted values (normally would come from API)\n",
|
||||
"predicted_values = [v + np.random.uniform(-3, 3) for v in actual_values]\n",
|
||||
"\n",
|
||||
"# Calculate Mean Absolute Error\n",
|
||||
"mae = mean_absolute_error(actual_values, predicted_values)\n",
|
||||
"print(f\"\\n📊 Validation Mean Absolute Error (Simulated): {mae:.2f}\")\n",
|
||||
"\n",
|
||||
"# Plot comparison\n",
|
||||
"plt.figure(figsize=(6, 4))\n",
|
||||
"plt.plot(predicted_values, label=\"Predicted\", marker=\"o\")\n",
|
||||
"plt.plot(actual_values, label=\"Actual\", marker=\"x\")\n",
|
||||
"plt.title(\"Validation Predictions vs Actuals (Simulated)\")\n",
|
||||
"plt.xlabel(\"Sample Index\")\n",
|
||||
"plt.ylabel(\"Value\")\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Reflection Summary\n",
|
||||
"print(\"\\n===== WEEK 6 REFLECTION =====\")\n",
|
||||
"print(\"✅ Completed full fine-tuning workflow (simulated) successfully.\")\n",
|
||||
"print(\"🧠 Understood how fine-tuning integrates with GPT-4o-mini API workflow.\")\n",
|
||||
"print(f\"📊 Validation MAE (simulated): {mae:.2f}\")\n",
|
||||
"print(\"🔍 Practiced prompt alignment, data curation, and evaluation safely.\")\n",
|
||||
"print(\"💡 Next step: Try real fine-tuning (simulate=False) on small data if credits are available.\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.14"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
200
week6/community-contributions/nikhil_raut/fine_tune_train.jsonl
Normal file
200
week6/community-contributions/nikhil_raut/fine_tune_train.jsonl
Normal file
@@ -0,0 +1,200 @@
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDelphi FG0166 Fuel Pump Module\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7"}, {"role": "assistant", "content": "Price is $226.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nPower Stop Rear Z36 Truck and Tow Brake Kit with Calipers\nThe Power Stop Z36 Truck & Tow Performance brake kit provides the superior stopping power demanded by those who tow boats, haul loads, tackle mountains, lift trucks, and play in the harshest conditions. The brake rotors are drilled to keep temperatures down during extreme braking and slotted to sweep away any debris for constant pad contact. Combined with our Z36 Carbon-Fiber Ceramic performance friction formulation, you can confidently push your rig to the limit and look good doing it with red powder brake calipers. Components are engineered to handle the stress of towing, hauling, mountainous driving, and lifted trucks. Dust-free braking performance. Z36 Carbon-Fiber Ceramic formula provides the extreme braking performance demanded by your truck or 4x"}, {"role": "assistant", "content": "Price is $506.98"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nABBA 36 Gas Cooktop with 5 Sealed Burners - Tempered Glass Surface with SABAF Burners, Natural Gas Stove for Countertop, Home Improvement Essentials, Easy to Clean, 36 x 4.1 x 20.5\ncooktop Gas powered with 4 fast burners and 1 ultra-fast center burner Tempered glass surface with removable grid for easy cleaning Lightweight for easy installation. Installation Manual Included Counter cutout Dimensions 19 3/8 x 34 1/2 (see diagram) Insured shipping for your satisfaction and peace of mind Brand Name ABBA EST. 1956, Weight 30 pounds, Dimensions 20.5\\ D x 36\\ W x 4.1\\ H, Installation Type Count"}, {"role": "assistant", "content": "Price is $405.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nShabby Restore Silver Chrome Knob Bolt Fittings for Ceramic & Glass Pulls, Knobs, 3 Bolt, Washer, Nut, Metal Flower\nSilver Chrome Knob Fitting for ceramic and glass knobs & pulls. Replace the existing ones with these. These are made to go through an existing hole in the knob. The pictures show a knob with silver chrome hardware. Knobs are NOT included. Silver Chrome Silver Chrome Included Screw Size 3 Screw Size 3 1 Washers 1 Washers 1 Nuts 1 Nuts 1 Back Plate 1 Back Plate 1 Front Metal Flower 1 Front Metal Flower INCLUDED Pack of 1 Chrome 3 Bolt, 1 washer, 1 nut, 1 backplate, 1 metal flower piece. Total length of"}, {"role": "assistant", "content": "Price is $1.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFurrion Access 4G LTE/WiFi Dual Band Portable Router with 1GB of Data Included. Works Omni-Direction Rooftop Antenna to Provide high-Speed Internet connectivity on The go - White\nWORKS WITH FURRION ACCESS ANTENNA Works exclusively with Furrion Omni-directional rooftop antenna to keep you fully connected when you're on the move. EXTENDED WIFI AND 4G The LTE Wi-Fi Router provides speeds up to (support LTE Band and has a Wi-Fi range extender for improved signal strength. Allows you to connect up to 30 devices and auto-switch between 4G and WiFi. WiFI NETWORK SECURITY Allows you to connect to available 2.4GHz and 5 GHz WiFi signals and gives you peace of mind with WiFi network"}, {"role": "assistant", "content": "Price is $246.93"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDasbecan Rear View Park Assist Backup Camera Replacement Compatible with Lincoln MKX 2013 2014 2015 Replaces#\nReplaces # Compatible with Lincoln MKX 2013 2014 2015 Made of high-quality materials with maximum durability.And exact equivalent part meets the original manufacturer's specifications and features. Easy to install and direct replacement for the old or broken one. Save your time and money. If you are not satisfied with the product, please feel free contact us via Amazon Message, and we will reply within 24 hours and help solve the problem. Dimensions 2.56 x 2.48 x 2.32 inches, Weight 1.44 ounces, Rank Electronics Vehicle Backup Cameras 899, Other display features Wireless, Manufacturer Dasbecan, Country of"}, {"role": "assistant", "content": "Price is $75.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDynasty Door Closer Heavy Duty Commercial Grade Hydraulic Adjustable Spring Door Closer Series 4401 Meets ADA Sprayed Aluminum Finish\nNon-Handed for Regular Arm, Top-Jamb or Parallel Arm Installation Closer Body Mounting Hole Pattern Match LCN 4040 / 4010 Standard Adjustable Back-Check Function Grade 1, ANSI 156.4 Heavy Duty Door Closer UL Listed for Fire Door Assemblies Manufacturer Dynasty Hardware, Part Weight 12.3 pounds, Dimensions 12.25 x 3 x 3 inches, Country of Origin China, model number Is Discontinued No, Color Sprayed Aluminum, Material Aluminum, Installation Method Screw In, Quantity 1, Included Components Closer Arm, Closer Body, Plastic Cover, Fasteners, Instructions, Rank Tools"}, {"role": "assistant", "content": "Price is $144.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMFJ884 Original MFJ Enterprises 200 W MHz Cross Needle SWR/Wattmeter\nHF/VHF and UHF, 1.8 to 525 MHz, power range 0-200 Watts in three ranges Watts. Has separate HF and VHF/UHF power sensors with SO-239 connectors. If you will not settle for less than the best accuracy and precision then these handsome GrandMasters are for you. You get a 3-inch precision illuminated Cross-Needle meter for easy wide-angle viewing. Read SWR, forward and reflected power all in a single glance! Three-color scale gives you improved readability and reliability. LED backlight gives excellent night vision. Requires 13.8 VDC or operation. Each unit is precisely factory calibrated for accurate measurements. Air-Dielectric"}, {"role": "assistant", "content": "Price is $174.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nStark Portable 5 cu Ft Electric Concrete Cement Mixer Machine Freestanding 1/2 HP Mixing Concrete with Wheel\nPowerful Motor - Heavy duty motor with heavy duty direct drive gearbox, 23 RPM improve running time and stability Multi Applications Mixer - This heavy duty cement mixer is ideal for concrete, stucco, and mortar and perfect for inoculating seeds and mixing feeds Safety Lock - It has switch with safety lock, which is easy to control; Motor 1/2 HP; RPM 1725 Direct Drive Gearbox - Mixer features a direct drive gearbox - easy to assemble, no belts or pulleys 2 Wheels for Easy Moving - Cement mixer has two rubber wheels, which provide convenience for moving the machine on any road condition Brand Stark USA, Color Orange, Special Feature Adjustable Speed"}, {"role": "assistant", "content": "Price is $349.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKNIPEX Tools - Diagonal Flush Cutter for Plastics, 45 Degree Angle Red\nKNIPEX 72 11 160 Diagonal Pliers for Flush Cut Plastics 45 Angled Diagonal Pliers for Flush Cut Plastics 45 Angled Diagonal Pliers for Flush Cut Plastics 45 Angled Diagonal Pliers for Flush Cut Plastics 45 Angled Diagonal Pliers for Flush Cut Plastics 45 Angled Diagonal Pliers for Flush Cut Plastics 45 Angled Brand KNIPEX, Material Blend, Color Red, Handle Material Plastic, Weight 156 Grams, Specific Uses For Product Interior, Dimensions 6.38\\ L x 2\\ W, Manufacturer Knipex Tools LP, Part 89885"}, {"role": "assistant", "content": "Price is $52.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nPG Engine Air Filter | Fits Chevrolet Camaro\nSUPERIOR ENGINE PROTECTION \u2013 Premium Guard Air Filters filter out 99% of incoming engine air to help extend the life of your engine. ENHANCED PERFORMANCE \u2013 High-capacity air filter media removes dangerous particles improving engine performance and increasing engine efficiency. EASY TO INSTALL - Premium Guard Air Filters are engineered to fit perfectly inside your vehicle\u2019s housing for quick and easy installation. Compatible with Chevrolet Camaro. Precisely designed, engineered, and tested to meet and exceed all GENERAL MOTORS OE air filter requirements. Replaces GENERAL MOTORS Air Filter. Always check fitment using the Vehicle Filter Manufacturer Premium Guard, Brand Premium Guard, Weight 1.12 pounds, Dimensions 12.1 x 10.6 x 2.1 inches"}, {"role": "assistant", "content": "Price is $31.27"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nVEVOR Utility Blower Fan 10 inch with 10M Duct Portable 1520 CFM High Velocity Utility Blower,Mighty Mini Low Noise,for Factories Basements Shipyards Farm\n320W Cylinder Fan 10inch Ventilation BlowerThe 10 inch ventilation fan at an excellent price, top of quality and boxed, is mainly used for low wind pressure, air flow of the occasions, like factories, basements, shipyards, farms, grain storage, chemical, etc. It has a AC mptor. Made of heavy duty steel, compact with large flow portable with a handle, a 16ft PVC ducting. Powerful AC Motor Large Flow Protective Fan Guards Humanized Design 16ft PVC Ducting key FeaturesStrong AC MotorFast Speed"}, {"role": "assistant", "content": "Price is $135.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRugged Ridge | Headlight Bezel Kit, Black | | Fits Jeep Wrangler JK\nComplete the look of your front end with these easy-to-install headlight trim bezels from Rugged Ridge. Each UV treated bezel easily attaches to factory mounting points creating a clean look. The bezels come complete with automotive grade double-sided tape. Rest assured, the headlight trim bezels are back by the Rugged Ridge Limited 5 Year Warranty. Rugged Ridge Headlight Trim - Pair Rugged Ridge Black Parking Light Bezel - Pair Headlight Bezels Parking Light Bezels Rugged Ridge Headlight Trim - Pair Rugged Ridge Black Parking Light Bezel - Pair Rugged Ridge Exterior Trim Accessories are the perfect way to give your Wrangler that wow factor you need that will set you apart"}, {"role": "assistant", "content": "Price is $59.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCastle X CX200 Liberty Dual Sport Helmet in Matte Charcoal, Size XX-Large\nAggressive, modern shell design created with CAD technology. Shell constructed with Advanced Polycarbonate Composite injection molding. Multi-density EPS liner including placement in chin bar laterals. Hard coated, optically correct single pane shield. Rider friendly drop down sun visor system, fitted standard with Hi-Definition smoke tint sun visor. Removable interior padding system offers a plush fit. DOT & ECE Approved. Meets the FMVSS 218 Standard. Eyeglass friendly cheek pads. Quick release chin strap buckle system offers micro adjustments for secure comfort. Advanced ventilation system allows air to easily flow front to back in the helmet to remove excess heat via the air flow channels in the EPS liner. Communication System compatible"}, {"role": "assistant", "content": "Price is $165.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAmazon Basics Grocery Store Checkout Counter, Kids Supermarket Pretend Play Store, Gift for Age 3Y+\nAn Amazon Brand Grocery store checkout counter play set for kids; pretend to buy and sell toy groceries with play money; recommended for ages 3+ Realistic play with checkout counter, hand cranked conveyor belt, bagging area, beeping scanner, electronic balance, and card swipe machine; (NOTE batteries are not included,the battery need AA*4) Practice counting and simple math skills with play money; includes cash drawer, 12 paper currency bills, 6 coins, and 2 credit cards Includes kid-sized grocery store shopping basket and toy groceries including ice cream, milk, water, carrot, bean, tomato, green pepper, and 3 boxes Durably constructed counter made of sturdy"}, {"role": "assistant", "content": "Price is $125.81"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBattery Grip Kit for Canon EOS 1100D, EOS Rebel T6i, Rebel T6s, EOS 750D, EOS 760D, EOS 8000D, KISS X8i Digital SLR Replacement) - Includes Battery Grip + 2 LP-E17 Batteries + Battery Charger\nBattery Grip Description The Multi-Power Battery Grip for the Canon EOS 1100D, EOS Rebel T6i, Rebel T6s, EOS 750D, EOS 760D, EOS 8000D, KISS X8i Digital SLR Camera holds 2 LP-E17 batteries, providing twice the power of a standard battery pack. Featuring a vertical shutter release button and an easy power on/off switch, this power grip makes shooting vertically just as comfortable as"}, {"role": "assistant", "content": "Price is $59.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\n512GB 8x64GB LRDIMM Memory for Apple Mac Pro 2019 7,1 by NEMIX RAM\nNemix Ram 512GB Kit DDR4 2933 / 1.2V SDRAM Compatible with Apple Mac Pro 2019 MacPro 7,1 / / / Model ID MacPro 7,1 2.5GHz / 2.7GHz / 3.2GHz / 3.3GHz Meets and Exceeds Apple Specifications Processor 2933 MHz, RAM 512 GB DDR4, Memory Speed 23400 MHz, Brand NEMIX RAM, model number Apple Mac Pro 2019 MacPro 7,1, Hardware Platform Mac, Dimensions 8 x 3 x 1"}, {"role": "assistant", "content": "Price is $930.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFaithfull No.778 Rebate Plane\nQuality cast iron body. Accurately ground base. Two position cutter for rebate and bull nose work. Double sided adjustable fence. Accurate depth stop. Cuts rebates up to 38mm / wide. 5 year guarantee. Proven reliable Faithfull technology Lightweight construction at just 2.24 Kgs High performace for the home or tradesman Brand Faithfull, Material Cast Iron, Color Gold|brown|black|grey, Dimensions LxWxH 2.56 x 6.3 x 11.22 inches, Weight 2.34 Kilograms, Style Cut,Adjustable,Work, Base Material Cast Iron, Included Components No.778 Rebate Plane, Manufacturer Curtis Holt NW, Part Dimensions "}, {"role": "assistant", "content": "Price is $68.56"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nHusky 87073 2 Ball 2 Width Straight Coupler with Chain, Grey\nHusky straight couplers are perfect for safe towing of 1-7/8 inch and 2 inch trailers. Husky straight couplers are built to the highest industry standards, tested and certified to meet VESC Reg. V-5, SAE Erg. For increased safety, each Husky straight coupler includes a safety pin as an additional means of securing the ball clamp in a locked position (safety pin is required to be used at all times) and a safety chain to eliminate loss of the safety pin. The quick release assembly allows for fast and safe locking and unlocking of the ball clamp for convenient hook-up and disconnect. Sleeve packaged. Ball size 2 inch Inside width"}, {"role": "assistant", "content": "Price is $25.47"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nzr8 OBD2 Code Reader with Live Data for 1996 and Newer Vehicles with OBD Port\nThe Zurich ZR8 OBD2 Code Reader has an oil light reset, battery/alternator system check and can diagnose and erase ABS codes and lights on domestic vehicles. The ZR8 streams live data and comes equipped with a trip cycle procedure. Its 2.8 in. color screen displays 20 data points at once and can be set in either English or Spanish. With a hot key feature for one-touch access to the menu and a vehicle health monitor LED to check emissions readiness, the ZR8 is easy to use - and affordable. Works with virtually all cars, light trucks, minivans, SUVs or hybrids manufactured since 1996 (O"}, {"role": "assistant", "content": "Price is $146.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOmix | | Steering Pump | OE Reference | Fits Jeep Wrangler TJ 2.5L\nOmix offers a wide selection of steering and suspension components to keep your Jeep safely on the road with quality that always meets or exceeds that of the original. This replacement power steering pump fits 97-02 Wrangler TJ models equipped with 2.5 liter engines. Replaces OE FITMENT | For Jeep Wrangler TJ 2.5L OMIX | Steering Pump WARRANTY | Limited 5-Year Warranty OMIX | Proudly offering all the parts you need to keep your Jeep running like new with quality standards that always meet or exceed those of the factory part. Manufacturer Omix-ADA, Brand Omix-Ada, Weight 4.25 pounds, Dimensions 4.5 x"}, {"role": "assistant", "content": "Price is $244.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLogitech Z333 2.1 Speakers \u2013 Easy-access Volume Control, Headphone Jack \u2013 PC, Mobile Device, TV, DVD/Blueray Player, and Game Console Compatible, Black\nLogitech Multimedia Speakers z333 deliver 80 watts peak power with a deep bass response adding intensity to your music, movies and games System requirements Television|Computer|Smartphone|Tablet|Music player|DVD player|Blu-ray player|PlayStation|Xbox|Wii. 80 WATTS OF BOLD SOUND -80 Watts Watts RMS power delivers maximum loudness via two satellite speakers and a large subwoofer. Enjoy rich, clear, bold sound. (Small driver (tweeter) on satellite speakers is decorative and non-operational) STRONG BASS - The"}, {"role": "assistant", "content": "Price is $94.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nacegoo Bedside LED Reading Light Dimmable Bedroom Wall Lamp, Flexible Gooseneck Book Light with USB Charger & Rotary Lampshade, LED Head Touch Control, Wall or Headboard Surface Mount\nSuper Flexible Bedside Spotlight Reading Lamp Features Slim line style - takes up minimal space also doesn't get in the way of being over a bed, offers plenty space for reading process. Even and cozy light - built in flicker-free LED bulb and recessed glare control diffuser emits soften crisp warm light, easier on the eyes. Directional beam - 360\u00b0 adjustable flexible arm and 320\u00b0 rotary lens, easy aim the light to book pages or reading materials. Focused LED beam - narrow cone of light directly point to reading pages on one side of bed without disturbing the bed partner."}, {"role": "assistant", "content": "Price is $27.90"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLEGO City High-Speed Passenger Train 60051 Train Toy\nProduct Description Travel around the city in no time with the motorized LEGO City High-speed Passenger Train! Operate the infrared remote control to power around the curved tracks at top speed. This streamlined, super-efficient train has a high-speed front profile and electricity contact points on top. Lift off the roof of the front car to place the driver inside at the cool dashboard and open the passenger cars to access the seats and tables. Wait for the train with the traveler at the way station and pedal safely across the crossing with the cyclist once the train has gone past. Includes 3 minifigures train driver, traveler and a cyclist. Features motorized locomotive with infrared remote control, high-speed front profile, removable roof with electricity contact point"}, {"role": "assistant", "content": "Price is $319.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFront Bumper Lip Compatible With Chevy Corvette C6, Factory Style Black PU Front Lip Finisher Under Chin Spoiler Add On by IKON MOTORSPORTS, 2007 2008 2009 2010 2011 2012\nFit for 05-13 Corvette ZR1, Z06, Grand Sport, all trims except base model. Style OEM Style | Material High Quality Polyurethane (PU) | Color Unpainted Raw Material Black Package includes 1x Front Bumper Lip Bolt-on Installation, Instructions NOT included, Professional Installation is Highly Recommended. 30 Days Limited Warranty (This is NOT an OEM part. This product is designed to be a replacement for the OEM part) Manufacturer IKON MOTORSPORTS, Brand IKON MOTORSPORTS, model"}, {"role": "assistant", "content": "Price is $324.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nExtang Tuff Tonno Soft Roll-up Truck Bed Tonneau Cover | 14415 | Fits Ford F150 8' 1 Bed (97.4 )\nThe Extang Tuff Tonno is a roll-up truck bed cover that features smooth edges, a tarp-tightening rotating tail rail, and spring-loaded, adjustable bows. The Tuff Tonno's rotating rear rail is Extang engineered to offer a great looking truck bed cover that is easy to use. SpringLock bows are simple to install and they wont fall off at any speed. Get a clean, smooth look with the world's strongest tarp attachment system. Tarp Can Be Quickly Rolled Up To Haul Large Cargo Unique J-Channel Gives Your Tonno Great Looking, Clean Edges Perfectly Sewn"}, {"role": "assistant", "content": "Price is $380.31"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTOPS 8-1/2 x 11 Data Pad, Numbered 3-Hole Punched, Heavyweight White Bond, 50 Sheets/Pads, Box of 10 Pads (3619)\nTrack all of your important info with TOPS Data Pads, packed in a convenient 10-pad box. TOPS Data Pads are made of high quality, heavyweight bond paper with precise rulings printed in non-smear ink. These pads provide a format with 31 numbered rows. 8-1/2 x 11. pads. 10-pad box. Versatile format with customizable headers and 31 numbered rows helps tabulate a variety of data Convenient 3-hole punched 8-1/2 x 11 pad fits standard binders High-quality, heavyweight bond"}, {"role": "assistant", "content": "Price is $117.13"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nJVC 4K UHD LED Roku Smart TV with HDR10, Voice Control App, Airplay, Screen Casting, & 300+ Free Streaming Channels\nA Revolution in Resolution - The JVC 43 Class Direct LED ROKU 4K Smart TV with HDR has an amazingly pristine image with a resolution of 3840 x 2180 that produces a vivid and brilliant pictures. Enjoy the ultimate in entertainment with a stunningly defined picture that will leave your eyes in awe. Ultra high definition picture resolution is the future and JVC delivers it right to your home. Better & Brighter with HDR - High Dynamic Range technology improves contrast with true-to-life shadows and detail with a wider range of warm and bright colors allowing you to see vibrant and rich textures that you would not normally get"}, {"role": "assistant", "content": "Price is $399.98"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOnePlus Bullets Wireless Z2 Bluetooth 5.0 in Ear Earphones, Bombastic Bass \u2013 12.4 mm Drivers, 30 Hrs Battery Life (Magico Black)\nOnePlus Bullets Wireless Z2 Beyond Bass-ic Charge for 10 minutes, enjoy for 20 hours Minor differences exist between regional variants. Refer to region-specific product page. Charging data is from OnePlus test lab (Date Dec 7, 2021. Actual performance may vary based on charging/environmental conditions). Press play all day The flagship-level battery life delivers up to 30 hours of non-stop music on a single charge. Stay connected with family and friends for longer with up to 16 hours of talk time. And with a standby time of 80 hours, the Bullets Wireless Z"}, {"role": "assistant", "content": "Price is $52.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nENA Set of 8 Premium Ignition Coil Pack and 8 Spark Plug Compatible with Ford F-150 5.0L Replacement for C1802\nIgnition Coils Kits are engineered for original equipment and replacement applications. Every component either matches or improves on the OE design to ensure fast and easy installation with superior performance and reliability Easy and quick installation Compatible with Ford F-150 2011 2012 2013 2014 2015 2016 5.0L Part Number replacement for UF622, DG542, 48763, 50120 1 Year Limited Warranty - Please use enter your vehicle in your Amazon Garage above to see if this part is compatible with your vehicle Manufacturer ENA, Brand ENA, Weight 6 pounds, Dimensions 11."}, {"role": "assistant", "content": "Price is $182.31"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSwing Set Stuff Glider with Rope (Red) with SSS Logo Sticker\nThis glider is made of a molded plastic and comes with a 1/2 Polyester blend rode and a 15 Chain for easy enjoyment. We have it available in yellow, blue and green. Made for children 5 to 12 years of age. This glider is easy to assemble and hook up to your playground. For a single beam a glider bracket is needed with this item to create a 4 point attachment. Glider bracket is sold separately. Made of molded plastic Brand Swing Set Stuff Inc., Color Red, Frame Material Plastic, Assembly Required Yes, Dimensions 74 x 37 x 17 inches, Weight 13 pounds, Country of Origin China, model number Manufacturer recommended age "}, {"role": "assistant", "content": "Price is $111.16"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCKWPY New Upgraded Linkable Wall Washer LED Lights with Remote, 18W 1.6ft/ 20 RGB 5000K Daylight Wall Wash Lighting, 120V, Dimmable, Timing, 10 & AUTO Mode, Colored Indoor/Outdoor Stage Light Bar\n\ud83e\udd73Various Installation CUTTABLE& LINKABLE The wall washer lights can be linked more than 10 lights or even more together with cutting the cable or end-to-end male and female connector to extend the lights as a ambient lighting for your gaming room, wall wash, BBQ, indoor and outdoor use; \u2461 Plug-and-Play Just plug in with extra UL 4.92ft heavy duty US plug cord. \ud83e\udd73New Upgraded RF 24-Key Remote Controller Wall Washer"}, {"role": "assistant", "content": "Price is $107.97"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nXerox 500 sheet paper tray for VersaLink C500 C505 C600 and C605, Grey\nAdd up to 4 additional paper trays that handle sizes from 3 x 7.5 inches to 8.5 x 14 inches. Genuine Xerox accessory. Country of Origin Viet Nam The Package Height of the Product is 9.8 inches The Package Length of the Product is 23.0 inches The Package Width of the Product is 21.1 inches Dimensions 23 x 21.1 x 9.8 inches, Weight 18 pounds, model number Batteries 1 A batteries required., Rank Office Products Printer Toner Cartridges 6399, Is Discontinued No, Available July 12, 2017, Manufacturer Xerox Office Products"}, {"role": "assistant", "content": "Price is $80.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGO-PARTS - for 2014 Mercedes Benz E350 Tail Light Rear Lamp Assembly Replacement - Left (Driver) Sedan 212 906 07 57\nfor MERCEDES-BENZ E350 W212; Sedan OEM # 212 906 07 57 FITS 2014 - 2014 E350 4Matic 3.5L V6 FLEX Sedan 4-Door Automatic - 2014 E350 4Matic 3.5L V6 GAS Sedan 4-Door Automatic - 2014 E350 Base Model 3.5L V6 FLEX Sedan 4-Door Automatic - 2014 E350 Base Model 3.5L V6 GAS Sedan 4-"}, {"role": "assistant", "content": "Price is $162.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nManchester City FC Cotton Bar Towel\nThis official Manchester City FC bar towel is ideal for either the golf course to wipe the clubs, as a place mat on the table or even as a wall hanging. The choice is yours..!! This product is availab Cotton Brand New Item In Original Packaging Color Sky Blue, Brand Manchester City FC, Age Range (Description) All Ages, Material Cotton, s 1, Pattern Letter Print, Special Feature Non-toxic, Theme Sport, Care Instructions Machine Wash, Team Name Manchester City, Size One Size, Unit Count 1 Count, Fabric Type Cotton, Weight 50 Grams, Weight 50 Grams, Brand Name Manchester City FC, Suggested Users Unisex Adults, Manufacturer Manchester City, Part Sport Type Soccer, Rank Tools & Home Improvement Bath"}, {"role": "assistant", "content": "Price is $4.50"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSamsung Double QHD CRG9 Series Curved Gaming Monitor Black (Renewed)\nWith a super ultra-wide 32, 9 ratio, The Crg9 curves around your field of view to immerse you in all the onscreen gaming action. Ultra Detail and Ultra Wide The CRG9\u2019s 5120 x 1440 Dual QHD resolution provides a super ultra-wide aspect ratio that lets you view more content in superfine detail. With screen space equivalent to two QHD displays side by side, the curved monitor delivers a wider view for winning play Lifelike Color The supports a peak brightness rating of 1,000 nits for a true high dynamic range. And with Samsung QLED technology delivering DCI-P3 95 percent, colors are pure, bright, and true"}, {"role": "assistant", "content": "Price is $749.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nColorful Prosthetic Kit Universal Implant Repair Tool Kit with 16pcs Screwdrivers Torque Screwdriver Wrench Tools\nSpecifications To~rq~ue-~Wr~en~ch Drivers Drivers 1.3 Drivers 1.27 DEN(Long+Short) Drivers 1.4ICX (Long+Short) Drivers NOB(Long+Short) Drivers ITI(Long+Short) Drivers (Long+Short) Short drivers 8.5mm Long drivers 13.5mm Colorful Prosthetic Kit Universal Implant Repair Tool With 16 Pcs Screw screwdriver Instrument Short drivers drivers 13.5mm Manufacturer OUBO, Part Weight 14 ounces, Dimensions 7.91 x 6.02 x 2.8 inches, model number Rank"}, {"role": "assistant", "content": "Price is $135.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\niPick Image Made for Ford F-150 Raptor in Blue Black Real Carbon Fiber 50 States License Plate Frame\nItem made from 3K twill genuine carbon fiber layer on a fiberglass base license plate frame. Item features full-color high-resolution UV resistant graphic with OEM style car logo. Frame sealed in automotive-grade UV protective polyurethane to prevent yellowing. Designed not to block registration tags in all four corners. Good for all 50 states license plates. Glossy finish. About 12 x 6 in US standard size. One frame, no hardware. A sporty look will make your vehicle stand out. Feel almost no weight. Item comes with 1-year limited warranty by the manufacturer. Brand new official licensed product made by iPick Image, LLC. All rights reserved."}, {"role": "assistant", "content": "Price is $52.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMonster DNA One Portable Bluetooth Speaker, Wireless Mini Loud Portable Speaker with 360 Omnidirectional Bass Sound, IP67 Waterproof- for Travel, Indoor and Outdoor Party Events, and Home Use, White\nPortable Immersive Sound Our wireless speaker is small in size, but loud in sound. Get breathtaking audio no matter where you are in the room with four evenly distributed speaker drivers; you'll want to take this mini speaker with you everywhere. Waterproof IP67 A waterproof bluetooth speaker that is perfectly safe to use around pools, beach outings, or in the great outdoors. The IP67 outdoor speaker rating protects against dust and allows up to 1 meter of submersion in water. Bluetooth Dual Pairing Wirelessly pair your DNA wireless bluetooth speaker with up to two source devices, such as a smartphone or tablet"}, {"role": "assistant", "content": "Price is $122.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSymmons Dia 3 in. Fixed Showerhead in Satin Nickel (2.5 GPM)\nProduct Description The Dia collection offers a contemporary design that fits any budget. The combination of the Dia collection's quality materials and sleek design makes it the smart choice for any contemporary bath. One of our most popular designs, customers love the effortless style that our Dia suite brings to their space and you will, too. From the Manufacturer The European design of the Symmons Dia Collection was inspired by modern industrial structures. Its clean, geometric lines make it the smart choice for any bath. showerhead 3-in Showerhead face diameter Constructed of plastic 1/2-in NPT connection Easy to clean rubber nozzles 2. 0 GPM (9. 5 L/min) flow"}, {"role": "assistant", "content": "Price is $141.76"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nChara-ani Mushoku Tensei Jobless Reincarnation Roxy Migurdia 1 7 Scale PVC Figure, Multicolor\nFrom Chara-Ani. From the anime series Mushoku Tensei Jobless Reincarnation that began airing January 2021 comes a scale figure of Rudeus' tutor Roxy! The figure is based on an original illustration of Roxy on her travels. Her equipment even the ground she's standing upon have been carefully recreated in figure form. Her cute yet mature expression has been faithfully captured as well. Be sure to add her to your collection! A Chara-Ani import From the hit anime series The master now in figure form Based on an original illustration of Roxy Carefully recreates her equipment Dimensions 5 x 4"}, {"role": "assistant", "content": "Price is $237.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDerale 13960 Hyper-Cool Remote Transmission Cooler\nEquipped with our top of the line 25 row stacked plate cooler, this new remote cooler can literally be mounted almost anywhere. Making it a popular addition for performance cars, hot rods, muscle cars and trucks. Our Stacked Plate cooler coupled with a Tornado fan and aluminum shroud, this kit comes with a complete installation kit to install easily on all vehicles with 5/16 inch or 3/8 inch transmission cooler lines. Also included is an 180 degree F in-line thermostat for activating the electric fan. The Hyper-Cool is perfect for extreme duty towing and hauling and can add years to the life of your transmission. Mounts anywhere space permits Electric fan supplies optimum airflow Dramatically extends engine and transmission life High efficiency"}, {"role": "assistant", "content": "Price is $269.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nUnity Front and Rear 4 Wheel Complete Strut Assembly Kit\nUnity Automotive has over 40 years of experience manufacturing parts in the aftermarket automotive industry. The company guarantees a superior quality, specializing in making rock solid auto parts that withstand the test of time. With a true understanding of their marketplace they have become the industry leader in innovation of aftermarket suspension. Unity Automotive delivers top of the line quality suspension parts made to insure a trouble-free installation and long lasting reliability. The complete struts line offered by Unity Automotive comes pre-assembled with a new strut assembly, insulators, bumper, coil spring, bearing and top mount. This kit includes 2 front complete strut assemblies, 2 rear complete strut assemblies. OE style replacement suspension strut & coil spring assembly precision designed for a direct fit The strut comes with"}, {"role": "assistant", "content": "Price is $344.98"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nHandmade Crazy-Horse Leather 3-Ring Binder Portfolio Vintage Padfolio, Zippered Closure, Business Organizer Tablet Folio Folder,Professional Organizer Gift for Men & Women\nDeluxe Business casual can look cool and laid-back without crossing outside the boundaries of work-appropriate. This padfolio pulls together everything you need in a single, easy-to-carry package. after absorbing your hand oil, getting tiny scratches and rugs from daily uses, the antique look of the padfolio will add more uniqueness over time. Left size laptop sleeve fits up to laptop/tablet, hand panel organizes your business cards, pen/marker holders,mouse,mobile phone pocket (5.5 x 3.9 inch\uff0cfits up to 6.0 Zippered pocket. Features a flexible 3-ring binder"}, {"role": "assistant", "content": "Price is $102.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSerene Valley Bathroom Sink, Wall-Mount or On Countertop, 40 with Square Sink and Flat Space, Single Faucet Hole, Solid Surface Material\nSerene Valley floating bathroom sink is made of premium solid surface material that is specially engineered to be a non-porous surface that easily resists the stains and scratches that you hate to see on a bathroom sink. It comes with an overall dimension L x W x 5-7/8\u201d D and the bowl dimension L x 13\u201d W x 4-3/4\u201d D. Its superior material characteristics also include its ability to maintain its original matte white color for many years to come. The inherent beauty and elegance will catch eyes of every guest that visits your bathroom. We bet you will get the wow-effect from them"}, {"role": "assistant", "content": "Price is $337.67"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOE Wheels LLC 22 inch Rim Fits Dodge RAM Hellcat Wheel DG15 22x10 Bronze Wheel\nManufacturer Part Number (MPN) Lifetime structural, one year face finish warranty Center cap included, Original center cap will interchange Lugs/Bolts/Locks/TPMS are NOT Included. Tire Pressure Monitoring System (TPMS) Compatible, Click See more product details below for additional important information. Size 22, Exterior Finish painted, Brand OE Wheels LLC, Wheel Size 22 Inches, Material Aluminum, Pitch Circle Diameter 139.7 Millimeters, Wheel Backspacing 6.48 Millimeters, Rim Size 22 Inches, Weight 43 Pounds, Diameter 22 Inches, Vehicle Service Type SUV, Truck, Rim Width 10 Inches, Manufacturer OE Wheels, Model model number"}, {"role": "assistant", "content": "Price is $237.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCarpartsinnovate For JDM Burnt Titanium Tint Catback Exhaust Muffler System\nFitment Catback Exhaust Muffler System for ACCORD MODELS ONLY Color & Material POLISHED CHROME FINISH C.N.C MACHINED STAINLESS STEELFeature JDM HIGH PERFORMANCE RACING STYLE Specification 1 SET OF 4 TIP CATBACK EXHAUST SYSTEM WITH TITANIUM BURNT TIP & REMOVABLE SILENCER\u2022 The item is 100% brand new in original box. \u2022 Made by high quality light weight C.N.C machined stainless steel with titanium rainbow burnt tip & removable silencer. \u2022 Inlet tip diameter 2.5 / Outlet tip diameter 4. \u2022 Better air flow design. Provides deep and solid"}, {"role": "assistant", "content": "Price is $145.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\n500ft D0LBY Vision 4K HDR HDbaseT 18GBPS Ultra Long Range HDMI Extender Kit 100m Single CAT5e CAT6 CAT7 2.0B 4K @ 60hz YUV 4 4 4 HDR10 Uncompressed Receiver IR RS232 Savant\nYou're looking at the ONLY extender on the market that currently supports D0LBY VISION and HDR10 at 4K60hz 4 4 4! You're also looking at the ONLY extender on the market that can reach 18gbps distances at 500ft! Includes 1 Transmitter and 1 Receiver (500ft Distance Range - 150 Meters) with POC power support D0LBY VISION,"}, {"role": "assistant", "content": "Price is $299.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAlpinestars Sektor Shoes (12.5) (BLACK/RED)\nExternal TPR toe reinforcement helps protect against abrasion and gives greater stability. Flex areas on heel and instep for an enhanced comfort while walking. Advanced 3D mesh offers a highly breathable lining together with a microfiber suede anti-slip s Constructed from a durable and lightweight microfiber and incorporating a speed lacing system and ankle straps for secure and personalized closure, the Sektor Shoe features class-leading protection into its sleek-styled chassis. The upper is constructed from a microfiber which is superbly lightweight, durable and abrasion resistant. Inedited 3D ankle protection for improved fit and a lighter weight. Original speed lace system derived from auto racing shoes for a personalized fit and feel. Ankle hook"}, {"role": "assistant", "content": "Price is $154.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGreenLighting Low Voltage Landscape Lights, (8 Pack- 6 Stake Lights, 2 Flood Spotlights, & Transformer) LED, Landscaping Lighting, Yard Lights, Pathway, Outdoor Walkways & Path, Garden, Deck, Black\nAdd a Welcoming Warm Glow and Modern Aesthetic - Enhance the outdoor space with pathway lights that create a warm, inviting ambiance that gives your home or garden additional class and warmth. An Environmentally Conscious, Eco-Friendly Choice - A weatherproof, water-resistant pathway lights utilizes low voltage wattage so they exude warm, bright light once it gets dark. Our stake light has a bright output to radiate your pathway home. Easy And Quick Installation - Included in this kit are low voltage cast aluminum. These LED outside lights are wired so"}, {"role": "assistant", "content": "Price is $157.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nlabwork CDI Box Replacement for Polaris Magnum 330 / Replacement for Polaris Trail Boss 330 Replaces Part Number\nCDI box replacement for Polaris Magnum 330 / replacement for Polaris Trail Boss 330 Replaces part number Easy to install and reliable to use, professional installation is recommended This igniter is well handled in the combination of various parts and in the small design, providing you with a reasonable convenience in use The CDI box can be directly replaced with the old or damaged one. The CDI box is easy to install, but professional installation is recommended Manufacturer labwork, Brand labwork, Weight 10.2 ounces, Dimensions 5.83 x 4.09 x 1.42 inches, Manufacturer Part Rank Automotive Powersports Ignition Computers 320,"}, {"role": "assistant", "content": "Price is $30.90"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\n42 Inch Crystal Ceiling Fans with Lights Modern Dimmable LED Chandelier Ceiling Fan with Retractable Blade Remote, 6 Speed, 3 Light Changeable Fandelier for Bedroom Living Room Dining Room (Gold)\nDimmable Crystal Ceiling Fan Remote control and APP control, adjustable 3 kinds of luminosity and 6 kinds of variable speed. Light source power 36W*2. 2 Function Modes Chandelier Fan Retractable ceiling fan with lights,With forward and reverse functions, it can cool clockwise in summer and heat counterclockwise in winter. Retractable Fandelier Size Retractable ceiling fan invisible blades 4 retractable blades. Blade spread diameter 42'', Height 24''. Gold Ceiling Fan Material Lamps Crystal + Iron. Fan blade PC. Combination of"}, {"role": "assistant", "content": "Price is $216.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCoverking Custom Fit Car Cover for Select Mazda 5 Models - Silverguard (Silver)\nProduct Description If you live in a desert climate, Coverking's Silverguard custom vehicle cover is suitable for your needs. Not only will this cover provide maximum protection from the sun, but this unique 300 dernier, breathable polyester fabric will also protect your vehicle from rain, snow, dirt and pollutants in the air. This custom-cover is manufactured specifically for your vehicle to ensure the best protection and fit possible. Made from a unique, 300 dernier polyester with special reflective properties to prevent sun damage to your vehicle. Ideal for mild temperate climates. Strong heavy weave will not rip or tear. Manufactured using double-needle overlapping seams and heavy wax coated thread for durable and leak-resistant seams. Amazon.com Silverguard"}, {"role": "assistant", "content": "Price is $299.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMechanical Keyless Surface Mount Hook Bolt Sliding Door Lock Finish Oil Rubbed Bronze\nFinish Oil Rubbed Bronze Features -Ideal for sliding doors with narrow frames (1 reveal on frame is required). -Manufacturer provides lifetime mechanical warranty. -Standard door lock. -Can accommodate a sliding glass patio door. Product Type -Shutter/Door accessory. Function -Privacy. Dimensions Overall Height - Top to Bottom -5. Overall Width - Side to Side -2. Overall Depth - Front to Back -2. Overall Product Weight -2.25 lbs. Brand Lockey USA, Special Feature Keyless, Material Bronze, Color Aluminum, Pieces 1, Finish Type Aluminum, Controller Type Hand Control, Weight 2.25 pounds, Manufacturer Lockey USA, Part Dimensions 7 x 5 x"}, {"role": "assistant", "content": "Price is $142.26"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSpigen Tough Armor Designed for Galaxy S22 Ultra Case (2022) - Gunmetal\nAll-new foam technology for an extra layer of shock resistance Combination of TPU and Polycarbonate for dual protection from drops and scratches Reinforced kickstand with raised lips to protect screen and camera Certified MIL-STD protection and Air Cushion Technology for anti-shock protection Galaxy S22 Ultra Case Compatible with Galaxy S22 Ultra Dimensions 3 x 0.4 x 6 inches, Weight 2.82 ounces, model number Rank Cell Phones & Accessories 4723, Climate Pledge Friendly Electronics 930, Cell Phone Basic Cases 1467, Available February 9, 2022, Manufacturer Spigen, Country of Origin Korea, Republic of, Brand Spigen, Color Gunmetal, Form"}, {"role": "assistant", "content": "Price is $20.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSAFAVIEH Lighting Collection Deco Modern Crystal Column Bedroom Living Room Home Office Desk Nightstand Table Lamp Set of 2 (LED Bulbs Included)\nCelebrating the geometric forms that are hallmarks of art deco design, the deco column crystal table lamp by Safavieh is a study in understated elegance. Sold as a set of two, these sparkling solid crystal lamps will add instant drama to any room and with their white linen drum shades they will complement myriad decorating styles. 100 Percent Linen Imported This lamp is crafted of crystal This light uses 100 watts bulbs Perfect for a living room, bedroom, den, library, study or office For over 100 years, Safavieh has been crafting products of the highest quality and unmatched style To clean, wipe with a soft"}, {"role": "assistant", "content": "Price is $165.48"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nStylus Pen for HP Envy X360 Convertible 2 in 1 Laptop, EDIVIA Digital Pencil with 1.5mm Ultra Fine Tip Stylus Pen for HP Envy X360 Convertible 2 in 1 Laptop, White\nStylus Pen for HP Envy X360 Convertible 2 in 1 Laptop, EDIVIA Digital Pencil with 1.5mm Ultra Fine Tip Stylus Pen for HP Envy X360 Convertible 2 in 1 Laptop, White 1.5mm Fine Point Stylus Pen for HP Envy X360 Convertible 2 in 1 Laptop lets you draw,write and navigate with pinpoint accuracy and offers comfortable pen-like control for HP Envy X360 Convertible 2"}, {"role": "assistant", "content": "Price is $28.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nIntel Single Pack 400GB 750 Series Solid State Drive PCIE Full Height 3.0 20NM MLC 3.5\nStorage Capacity 400GB Solid State Drive. Form Factor HHHL Interface PCIe NVMe 3.0 x4. Sequential Read Speed (Up to) 2200 MB/s. Sequential Write Speed (Up to) 900 MB/s. Hard Drive 400 GB Solid State Drive, Brand Intel, Series model number Hardware Platform PC, Operating System NIL, Weight 6.9 ounces, Dimensions 9.3 x 6.7 x 0.3 inches, Dimensions LxWxH 9.3 x 6.7 x 0.3 inches, Color Grey, Processors 1, Computer Memory Type Unknown"}, {"role": "assistant", "content": "Price is $349.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCarCovers Weatherproof Car Cover Compatible with Bentley Continental GTC - Outdoor & Indoor Cover - Rain, Snow, Hail, Sun - Theft Cable Lock, Bag & Wind Straps\nThis cover provides all season indoor and outdoor protection for your vehicle. This nearly impenetrable multi-layer fabric is fleece lined to protect fine automotive finishes and to ensure your vehicle's surface finish stays in pristine condition. Fits Years 2007 2008 2009 2010 2011 2012 2013 2014 Fits SubModels Continental GTC Vehicle Fit Sized to length, width and height. Material - Top quality weatherproof fabric comparable to a 5 Layer cover for indoor and all elements outdoor - Soft fleece inner lining to protect paint - High water resistance makes it weatherproof yet"}, {"role": "assistant", "content": "Price is $157.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nPAG Set of 2 Pencil Holders Cup Mesh Pen Organizer Makeup Brush Holder for Desk, Copper\nPAG 2 Pack Pencil Holder Pen Cup Office Supplies Desktop Organizer Makeup Brush Holder for Desk High Quality The pen holder is made of premium metal material, which endows this pen holder a solid and light weight construction. Sturdy and durable, anti-oxidation and anti-corrosion, effectively extend the service life. Large Capacity The dimension of the pencil holder is 3.2 x 3.2 x 3.7 inches, which allows approximately 30-50 pencils to be contained. Fashion and Practical The pen holder is cylindrical, stylish and beautiful. You can use the pencil holder to store pens, pencils, scissors, rulers, glue sticks and other desktop accessories."}, {"role": "assistant", "content": "Price is $8.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMakita LC09Z 12V max CXT\u00ae Lithium-Ion Cordless Vacuum, Tool Only\nThe 12V max CXT Lithium-Ion Cordless Vacuum tool only) combines power and run time in an ultra-compact size. The 12V CXT platform gives users a more compact solution with more comfort and capacity. At only long, the vacuum\u2019s compact design weighs only 3. 7 lbs to reduce operator fatigue. The LC09Z delivers up to 33 minutes of continuous use from a single fully charged 2. 0Ah battery sold separately), with strong suction power for fast and efficient cleaning. This vacuum features three suction power modes, push button power selector, and a bag-less cloth filtration system for easier cleaning and quick debris disposal."}, {"role": "assistant", "content": "Price is $127.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nEpson WorkForce Pro Wireless All-in-One Inkjet Printer (Renewed)\nThis pre-owned or refurbished product has been professionally inspected and tested to work and look like new. How a product becomes part of Amazon Renewed, your destination for pre-owned, refurbished products A customer buys a new product and returns it or trades it in for a newer or different model. That product is inspected and tested to work and look like new by Amazon-qualified suppliers. Then, the product is sold as an Amazon Renewed product on Amazon. If not satisfied with the purchase, renewed products are eligible for replacement or refund under the Amazon Renewed Guarantee. Epson WorkForce Pro Wireless All-in-One Inkjet Printer - Power Cord - Black Ink Cartridge - Cyan Ink Cartridge - Magenta Ink Cartridge -"}, {"role": "assistant", "content": "Price is $119.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSuperATV 6000 lb Black Ops Winch with Heavy Duty Winch Mounting Plate for 2019+ Honda Talon 1000X / 1000R | 2020+ Talon | Complete Kit Ready for Install!\nFits 2019+ Honda Talon 1000X / 1000R | 2020+ Honda Talon | Please Note Drilling through front bumper required | NOTE Does not fit with OE front bumper # Complete, Bolt-On Ready Winch Kit Ready to make sure you're not stranded on the trail? This kit is exactly what you need. It includes our Black Ops 6000LB Winch Kit and a Heavy-Duty winch mounting kit specifically made to fit perfectly on your Honda talon 1000. 600"}, {"role": "assistant", "content": "Price is $449.90"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSuperATV 4500 lb Black Ops Winch with Heavy Duty Winch Mounting Plate for 2014+ Kawasaki Teryx/Kawasaki Teryx 4 | 2021 Teryx S/Teryx 4 S | Complete Winch & Winch Mount Kit Ready for Install!\nFits 2014+ Kawasaki Teryx / Kawasaki Teryx 4 | 2021 Kawasaki Teryx S / Teryx 4 S Complete, Bolt-On Ready Winch Kit Want to make sure you are not stranded on the trail? This 4500 lb Winch Kit is exactly what you need. It includes our Black Ops 4500 Winch Kit and a Heavy-Duty Winch Mounting Kit specifically made to fit perfectly on your Kawasaki T"}, {"role": "assistant", "content": "Price is $504.90"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRohl R45158 Hose, Chrome\nThese are all terms which aptly describe Rohl and its remarkable selection of kitchen and bathroom faucets and fixtures. Used for Rough Plumbing, Parts and Repair. Elegant design and finish. These are all terms which aptly describe Rohl and its remarkable selection of kitchen and bathroom faucets and fixtures Used for Rough Plumbing, Parts and Repair Elegant design and finish Manufacturer Trumbull Industries, Part Weight 0.01 Ounces, Dimensions 6.25 x 10 x 2.5 inches, model number Is Discontinued No, Color Chrome, Quantity 1, Description Pile Partialupdate, Rank Tools & Home Improvement Tubing & Hoses 411, Available July 14, 2006, Brand Rohl, Dimensions Lx"}, {"role": "assistant", "content": "Price is $96.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAxis Communications M1124 Network Surveillance Camera, White\nAxis M1124 offers a professional and affordable fixed camera suitable for a wide range of video surveillance applications, such as for retail and banking as well as libraries and other office buildings. It can be used indoors, as well as in an outdoor housing. HDTV 720P in 25/30 FPS Wdr \u2013 forensic capture Axis\u2019 zip stream technology Day/night capability Powered by 8-28 V DC or PoE Standing screen display size 20, Brand Axis Communications, model number Hardware Platform PC, Weight 7 ounces, Dimensions 1.7 x 0.2 x 5.8 inches, Dimensions LxWxH 1.7 x 0.2 x 5.8 inches, Color White"}, {"role": "assistant", "content": "Price is $399.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nWestin HDX Drop Nerf Step Bars | Wrangler JK 2dr | | Textured Black | 1 Pair, Cab Length\nWestin's HDX Drop Nerf Step Bars provide the rugged look and function that truck owners need and want. They feature a solid steel construction and heavy duty punch plates creating high traction step areas. The HDX Drop Nerf Step Bars also feature Westin's notched bar design which allows for more than 2 inches of additional toe/heel placement over its competitors. Available in a textured black finish that complements Westin HDX Bumpers. The HDX Drop Nerf Step Bars have vehicle specific applications and include a vehicle specific mount kit, installation hardware and instructions. PERFECT FIT Direct fit for Wrangler JK 2dr AWARD"}, {"role": "assistant", "content": "Price is $483.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLEGO Ninjago 70724 NinjaCopter Toy\nFrom the Manufacturer A thrilling battle is raging above New Ninjago City as the conflict for the Techno-Blades reaches new heights. Battle-scarred Zane, with his half-robot/half-Ninja face and Pixel must work together in the Ninja Copter to outwit the attacking Nindroids. Spin the propellers and rear jet engines to soar into action. Fire the front flick missiles and rotating shooting cannons, taking care to evade the spinning saw blades of the Nindroid jet fighter. And beware - at any moment the Nindroid may launch the attack glider to double the aerial assault. Includes 4 mini figures with weapons Zane, Pixel and 2 Nindroids. Includes "}, {"role": "assistant", "content": "Price is $229.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nWilwood Front Disc Brake Kit for Ford Mustang\nFDL Pro-Series Front Hub Kits offer complete disc brake upgrade solutions for traditional non-ABS spindle applications on American muscle cars, vintage rods, and heavy weight drag cars. Based on the venerable forged billet Dynalite caliper, these versatile kits are easily optioned to suit the braking requirements and style preferences for a daily driver, serious competition, or the most discriminating show enthusiast. Most kits require no modifications for installation, and provide plenty of clearance inside popular 15 wheels. FDL Pro-Series kits can be used with either manual or power boost master cylinders. Wheel Diameter 14 Rotor Diameter 11 OE Hub Offset +.09 Manufacturer Wilwood, Brand Wilwood, model number Is Discontinued No, Manufacturer Part Position Front,"}, {"role": "assistant", "content": "Price is $974.30"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSonix x Care Bears Case for iPhone 13 Pro | Compatible with MagSafe | 10ft Drop Tested | Good Vibes\nDesigned for iPhone 13 Pro MagSafe Case Pretty and practical, this MagSafe iPhone 13 Pro case offers the best of both worlds - statement-making style and full protection Compatible with MagSafe Built-in magnets support MagSafe wireless charging Drop Tested Our protective case features a scratch-resistant coating, raised shock-absorbent sides, grooved impact-resistant corners, and a raised edge for camera protection They're back! You loved them - so we brought them back! Introducing our latest collaboration with the most-loved toy and cartoon friends, Care Bears. We're bringing on all the nostalgic, good vibes from these characters to a limited edition collection full of our one-of-a"}, {"role": "assistant", "content": "Price is $48.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGator Cases Lightweight Rolling Backpack Case with Pull Handle; Fits Micro Controllers and Laptop\nThe Gator Cases lightweight rolling backpack case is designed to carry most micro controllers with additional storage for laptops. The case features numerous exterior storage pockets for interfaces, cables, and other accessories. A removable, retractable tow handle and wheels makes it easy to haul your gear around town. The top section can even be expanded allows larger controllers up to to fit as well. Rolling backpack designed to carry most controllers and laptops Exterior storage for interfaces, cables, and other accessories Removable, retractable wheels and handle Rugged nylon construction Padded interior and inserts protect gear Weight 9.2 Pounds, Dimensions 24 x 12 x 16 inches, Country of Origin China, model number Rank Musical Instruments 26486"}, {"role": "assistant", "content": "Price is $162.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTransport Cargo Airplane, Large Theme Airplane Toy Set, Educational Toy Vehicle Play Set with Smoke Sound and Light, Fricton Powered Plane with Mini Cars and Men, Birthday Gift for Boys and Girls\nSuper Value Pack Our this plane toy set inclued 1 large transport cargo airplane, 6 mini engineering vehicles, 2 construction road signs \uff0c1 large city map and 1 bonus-empty water bottle. Colorful Lights and Real Sound There are four buttons on the head, which can emit different chord music and flashes. Catch your child's attention with the super cool LED flashing lights, and real jet engine sound of an airplane toy which can be. Simulated steam jet function \uff1aThis plane toy set comes with an empty water bottle, you can add water by it. Press the front of plane"}, {"role": "assistant", "content": "Price is $32.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nACTTO Mini Bluetooth Keyboard Korean/English Layout\n*How to turn on power and connect to Bluetooth (Android/iOS-iPhone,iPad) 1.Turn on Bluetooth function of the machine to be paired. 2.Put the keyboard into pairing mode after battery installation. Press the (ESC+K) key simultaneously for approximately 5 seconds to enter pairing mode. Pairing Mode Left Red LED Flashing/Pairing Completed Left Red LED Off 3. When appears on the device to be paired, click to pair automatically. If no key responds, please contact me. I'll give you an exchange or a refund. Thank you. *Korea/English conversion method Android Convert to Korea/English (\ud55c/\uc601) conversion key (Space key right key) Apple (Mac Pc)"}, {"role": "assistant", "content": "Price is $46.90"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\n2-Pack Solenoid Compatible with Briggs and Stratton Trombetta Replaces 807829\nASDFGHT Solenoid Application\uff1a The Starter Solenoid is an essential part of your small engine's starting system. If your engine fails to start or has difficulty starting, it could be due to a faulty solenoid. The Solenoid is a high-quality replacement part that is specifically designed to fit Briggs and Stratton, Trombetta engines. With a 12V power rating and a capacity, this solenoid provides the power you need to start your engine with ease. Its unique terminal post and sleeve design accommodates 5/16 and 1/4 Eyelet Size, making it a versatile product that can be used with a wide range of Briggs and Strat"}, {"role": "assistant", "content": "Price is $23.98"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSW-Motech EVO Side Carriers (Black) for 12-15 Honda NC700X\nPart number EVO carriers - the invention by SW-MOTECH Removable side carriers to use with almost every established motorcycle case Removable side carriers to use with almost every established motorcycle case To mount and demount within seconds at barely visible fixing lugs, due to quick fasteners To mount and demount within seconds at barely visible fixing lugs, due to quick fasteners Adaptable to TRAX, AERO, Givi/Kappa, Krauser, Hepco & Becker and Shad cases by seperately available SW-MOTECH Side Carrier Adapter Kits Adaptable to TRAX, AERO, Givi/Kappa, Krauser, Hepco & Becker and Shad cases"}, {"role": "assistant", "content": "Price is $291.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDEPO Replacement Passenger Side Side Marker Light Assembly (This product is an aftermarket product. It is not created or sold by the OE car company)\nDepo Replacement Pasenger Side Side Marker Light Assembly (This product is an aftermarket product. It is not created or sold by the OE car company) Package Dimension 7.3 cms L x 14.4 cms W x 20.8 cms H Compliant to applicable DOT regulations The products listed are aftermarket spare parts, and any reference to the names of the auto makers and vehicle models are solely for the purchasers to identify the applicable scope of such spare parts and are in no means used as trademarks Item Package Weight 0.24 kilograms Manufacturer Depo, Brand Depo, Weight 8 ounces, Dimensions 1 x 1 x 1"}, {"role": "assistant", "content": "Price is $43.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLenovo Flex 5 2-in-1 Laptop, (Intel Core 8GB DDR4 256GB PCIe SSD Windows 10)\nGorgeous video. Immersive audio. Optimized video chat features. It's all here in the Flex 5, a stylish 2-in-1 laptop with powerful processing, vibrant 15.6\u201d display, and long-lasting battery life. STUNNING FHD DISPLAY Laptop has a Full HD (15.6 ) IPS touchscreen display, so you\u2019ll be able to watch movies and browse the web in vivid detail from nearly every angle;No numeric keyboard FINGERPRINT READER Log in to your Flex 5 laptop instantly and securely with our fingerprint reader, and with the support of Windows Hello, you can make secure purchases at participating retailers with"}, {"role": "assistant", "content": "Price is $549.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCatalina Modern Floor Lamp, 66, Black\nPair modern excellence and flexible lighting solutions with the Catalina Three-Head Modern Tree Track Floor Lamp. New-age and unique, this modern lamp features three shades, each compatible with type incandescent or LED equivalent lightbulbs and accented by a sleek satin black finish with brass accents, amplifying your sophisticated and modern home decor. This specific lamp comes equipped with three complimentary Brilli Circadian Wellness Charge-Up LED lightbulbs, designed to boost energy and increase your focus throughout the day. For added convenience, each individual light shade slides up and down along the track and can be adjusted on a pivot to direct light where you want it without struggle. Each light features an on and off rotary switch under the shade for independent lighting, so"}, {"role": "assistant", "content": "Price is $144.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBBK 1767 Twin 67mm Throttle Body - High Flow Power Plus Series for Dodge Viper V10\nBBK 67mm Throttle Body for Dodge Viper features a bigger bite of power to owners of some of the hottest cars on the planet. They have created a performance throttle body for the 8.3L V10 Dodge Viper. Dyno testing has proven that there is quite a bit more venom left in the Viper when this simple bolt on is added. On an otherwise stock Viper the BBK 1767 gained 15 RWHP and 14 RWTQ at the peak and an average of 10 feet/pounds across the board. There was no loss in throttle response or low end torque unlike the competitor's single bore offerings which are"}, {"role": "assistant", "content": "Price is $399.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nInjen Technology Polished Mega Ram Cold Air Intake System\nWith the rising innovations in factor engine management comes the need to over come their advance adaptivity to produce any kind of meaningful horsepower. Injen Technology\u2019s patented MR Technology does exactly this. Through a series of engineered and tested air- restricted sections, the pressurized mass air is controlled to a calculated aggression, allowing for a proper air/fuel calibration. The end results allows for more reliable and consistent horsepower/torque gains. This technology is availed to the sports compact market exclusively through Injen Technology\u2019s SP line of intake systems. Designed using Injen\u2019s patented MR Technology process for the greatest horsepower and torque gains while maintaining a factory safe air/fuel ratio Aerospace quality aluminum construction to save weight and improve corrosion resistance TIG welded hardware"}, {"role": "assistant", "content": "Price is $278.91"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKATAIS Shower Diverter Thermostatic Valve Pressure Balanced Mixer with Chrome Trim for Bathroom Shower System (Concealed Horizontal installation, 4 Round Handles)\nPrecise Temperature Control Our universal thermostatic shower valve features precise temperature control and pressure balancing to prevent scalding, making it a safe and considerate choice for your family. Versatile Flow Rate Adjustment With 3/4 hot & cold water inlet and 1/2 outlet, you can easily adjust the flow rate to create a comfortable shower experience that fits your needs. Multi-Functionality Our 3-way shower diverter valve with three knobs allows for simultaneous or individual use, making it perfect for most three-function shower system. High-Quality and Durable Made from high-quality solid brass material and finished with chrome, our"}, {"role": "assistant", "content": "Price is $310.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nWhiteside Router Bits 1097 Straight Bit with Cutting Diameter and Cutting Length\nWhiteside Machine Company has been in the router bit business for over 30 years providing customer with quality products while at the same time striving to achieve complete customer satisfaction. Several woodworking magazines have tested Whiteside versus the competition and selected Whiteside as the winner for best router bits available in the market. Whiteside Machine Company was founded in 1970 as a general purpose machine shop in the basement of Bill & Bobbie Whiteside's home. Located in Western North Carolina near the furniture manufacturing town of Hickory, the company was often involved in making repairs or special parts for the furniture and woodworking field. A strong commitment to customer problem-solving, a can-do attitude, and innovative ideas, along with a growing core"}, {"role": "assistant", "content": "Price is $38.08"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nD Z Strad Viola Model 120 with Strings, Case, Bow, Shoulder Rest and Rosin (13 - Size)\nWhile buying a viola online can be challenging, we are so confident in the extremely high quality of D Z Strad instruments that we decided to list these instruments anyways knowing that you will be thrilled with your purchase. These are top of the line violas made by incredibly talented luthiers especially for discerning string players.Each viola is made from a two piece maple back and solid carved spruce top, and is hand-rubbed with antique varnish. The wood is naturally dried outside on a covered, ventilated area for several years. The wood is then placed into a drying room, consistent with old world traditional European practices. This process ensures that the"}, {"role": "assistant", "content": "Price is $599.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSHOCKFLO Level 2 EV Charger 240V, NEMA Wall-mounted EVSE SAE J1772 EV, Portable Outdoor Electric Vehicle Charger with Adjustable Current/Timing Delay, Plug-in Home EV Charging Station\n6X Faster Charging SHOCKFLO Level 2 EV Charger with NEMA 14-50 plug fill up your car at an average of 6 times faster than a standard charger, delivering 24 miles in only 1 hour at 240V charge Safer and Smarter Charge Flexible amperage settings of help to match your wall circuit. Schedule your charging session with the delayed start timer, saving electricity cost and mitigating the peak loads 2 in 1. Mobile & Wall-mounted Controller bracket make it be a convenient garage wall-mounted EV charger and 20"}, {"role": "assistant", "content": "Price is $299.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nEnergy Suspension Master Kit for Jeep Wrangler\nThe nature of this superior material, Energy Suspension's Hyper-Flex, is the result of twenty plus years of experience working with and formulating polyurethane materials. Careful selection of material firmness or durometer is used for each specific application on the vehicle's suspension and frame. The three most valuable reasons for using Energy Suspension's Hyper-Flex components are performance, durability and appearance. Called HYPERformance, Energy Suspension's Hyper-Flex is performance polyurethane that delivers an amazing amount of performance to any vehicle that runs with it. Proven on the race track as well as the street, on and off-road, under the most demanding conditions. Over twenty years of positive customer raves have attested to that. Whether domestic or import,"}, {"role": "assistant", "content": "Price is $281.66"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSAMSUNG Electronics Galaxy Tab S7+ Wi-Fi, Mystic Navy - 128 GB (Renewed)\nThis pre-owned or refurbished product has been professionally inspected and tested to work and look like new. How a product becomes part of Amazon Renewed, your destination for pre-owned, refurbished products A customer buys a new product and returns it or trades it in for a newer or different model. That product is inspected and tested to work and look like new by Amazon-qualified suppliers. Then, the product is sold as an Amazon Renewed product on Amazon. If not satisfied with the purchase, renewed products are eligible for replacement or refund under the Amazon Renewed Guarantee. pc performance. tablet portability transform your tablet into a pc experience with dex mode and the optional keyboard with expanded trackpad. entertainment"}, {"role": "assistant", "content": "Price is $478.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBoxes Fast 18 x 14 x 12 Double Wall Corrugated, Heavy-Duty Cardboard Boxes, for Shipping, Packing, Moving and Storage, Kraft (Pack of 15)\n18 x 14 x 12 Double Wall Boxes. Twice the protection of a standard carton. Boxes are manufactured from heavy-duty 275# D.W. kraft corrugated. Heavy-duty construction provides greater protection and stacking strength. Corrugated boxes are reusable and recyclable. Cartons are sold in bundle quantities and ship flat to save on storage space and shipping. Proudly made in the USA Twice the protection of a standard carton Boxes are manufactured from heavy-duty 275# DW kraft corrugated Heavy-duty construction provides greater protection and stacking strength Corrugated boxes are reusable and"}, {"role": "assistant", "content": "Price is $68.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBehringer WASP DELUXE Legendary Analog Synthesizer with Dual OSCs, Multi-Mode VCF, Poly Chain and Eurorack Format\nLegendary hybrid synthesizer with dual digital OSC design allows for insanely fat music creation Product Type Keyboard Instruments Package Dimensions 13.0 Cm L X19.0 Cm W X49.0 Cm H Country Of Origin China Package Weight 5.07Kg Weight 1100 Grams, Dimensions 19.29 x 7.48 x 5.12 inches, Country of Origin China, model number WASP DELUXE, Rank Musical Instruments 18397, Synthesizer & Workstation Keyboards 81, Is Discontinued No, Available July 8, 2020, Color Name Black, Connector"}, {"role": "assistant", "content": "Price is $229.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMuchkey car Floor Mats fit for E-Class Full Coverage All Weather Protection Non-Slip Leather Floor Liners Beige\nReasons for choosing muchkey car mat material Made from high-quality luxury leather, it is soft and durable, and looks very luxurious. design The floor mat protects the entire car floor and perfectly protects your car. At the same time, its texture and modern design enhance the overall look of the car interior. All-weather protection waterproof, suitable for all kinds of weather, including rain, snow, etc. It stays clean even in the toughest weather conditions, so you can wash off the stains with a wet towel and water. Custom fit Floor Liners All of our car mats are designed for every type of car and fit perfectly. If you can't find your model in our store"}, {"role": "assistant", "content": "Price is $118.88"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFront Seats ShearComfort Custom Sheepskin Seat Covers Compatible with Ford Bronco (Full Size) in Tan for High Back Captains w/Inner Arms\nMerino is known for having the densest wool fiber, which makes for an excellent sheepskin seat. Denser wool does not pack down and is a better choice for sheepskin seat covers for cars. With proper care and maintenance, your seat cover will keep its original appearance even after years of use. This product is designed to be compatible with Ford Bronco (Full Size) 1992, 1993, 1994, 1995, 1996 Why Buy ShearComfort Seat Covers? For pure driving comfort, style and protection 1-Year Risk Free Warranty against any defects in workmanship and materials. 1-Year"}, {"role": "assistant", "content": "Price is $399.49"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBOSS Audio Systems Marine Gauge Receiver - Weatherproof, 5 Inch Touchscreen, Built-in Amplifier, Bluetooth, Digital Media MP3 Player, No CD Player, USB Port, AM/FM Radio\nBOSS Audio Systems Marine Gauge Receiver - Weatherproof, 5 Inch Touchscreen, Built-in Amplifier, Bluetooth, Digital Media MP3 Player, No CD Player, USB Port, AM/FM Radio Bluetooth - Play and control music through your smartphone or MP3 player as well as apps like Spotify / Pandora, wirelessly Weatherproof - The has been outfitted with the latest weatherproofing techniques such as front panel UV coating, PC board with conformal coating. It has an IPX 6 rating for protection against splashing water Media Playback - Bluetooth, play music via the USB"}, {"role": "assistant", "content": "Price is $289.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOPL HPR584 Aluminum Radiator For Jeep CJ Series 5.0L (Automatic Trans)\nOur radiators are designed and engineered to maximize cooling efficiency by up to 30%, improve engine functions as well as prevent your vehicle from overheating. It is the ideal upgrade to the stock radiator whether you drive your vehicle daily or take it to the race tracks. OPL All-Aluminum radiators features a lightweight core, 100% aluminum, enhancing the overall performance of your engine. Buyers outside of the U.S. are responsible for any brokerage's fee, import duties, or taxes. Please check with your country's government website.Extra shipping fees are required for international shipments as well as the following states and territories PR, HI, AK, GU, VI, APO. Fits Jeep"}, {"role": "assistant", "content": "Price is $213.92"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGood Smile Fate/Apocrypha Rider of Black Astolfo Nendoroid Action Figure\nFrom Good Smile Company. From the anime series fate/Apocrypha Comes a Nendoroid of the servant from the Black faction, rider of black! He comes with three face plates including a smiling expression, a playful winking expression as well as a blushing expression. Optional parts include his Lance trap of Argalia, his hunting horn La black Luna As well as the sword that he gave to Sieg, the main character of the series. The sword is included in both a sheathed and drawn version for all sorts of posing opportunities! Be sure to add the cheerful and innocent Knight to your Nendoroid collection! A Good Smile import From the hit anime series Includes three face plates for"}, {"role": "assistant", "content": "Price is $132.30"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nEvan-Fischer Front Bumper Cover Compatible with Nissan Xterra XTERRA 09-15 FRONT BUMPER COVER\nBumper Cover may ship FOLDED for the most economical shipping. 1-Year Warranty When Purchased through Auto Parts Giant Brand Evan Fischer, Auto Part Position Front, Material Plastic, Color Primed, Vehicle Service Type Cars, Exterior Finish Primed, Dimensions 26\\ D x 68\\ W x 19\\ H, Manufacturer Evan Fischer, Model Evan-Fischer Bumper Cover, Weight 98.2 pounds, Manufacturer Part OEM Part ABPA Partslink Special Features 2015 2014 2013 2012 2011 2010 2009 Nissan Xterra Front Sport Utility 6Cyl 4.0L Off-R"}, {"role": "assistant", "content": "Price is $219.25"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nlabwork 44 Inch Live Rear Axle Kit Rear Axle Brake Disc Chain Sprocket Wheel Hub Pillow Block DIY Rebuild Replacement for Go Kart ATV Quad and Drift Trikes\nAxle overall length (from end to end) 44.4 Inch Chain sprocket 530 37 teeth diameter 7.6 in brake disc diameter 8.66 in spline teeth 24T Perfect for vertical or horizontal engine and electric motor. This is a flexible rear axle kit and made by high strength chrome, can support more than Axle Kit + Chain Sprocket + Brake Disc + Pillow Block Bearings + Wheel Hub (Exclude Chain and Brake Master Cylinder Caliper). Manufacturer labwork, Brand labwork, Weight 30.2 pounds, Dimensions 44.5 x 10"}, {"role": "assistant", "content": "Price is $172.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRhino Rack Pioneer Platform Rack (84 x 56 ) Unassembled\nThe Rhino-Rack pioneer platform is a sleek and stylish roof rack accessory for a range of 4Wds and utilities. Made with strong, durable and lightweight reinforced nylon and aluminum, these non-corrosive pioneer platforms have been tested in the most rugged conditions and have passed with flying colors. Loading and unloading your equipment is easy. Simply slide your gear onto the pioneer platform and tie them down to the bars. The best thing about the pioneer platform is that it has been specifically designed to carry existing Rhino-Rack accessories including luggage bags, jerry can holders, spare wheel holders, shovels and loads more. Rhino-Rack also offers the flexibility of allowing you to purchase the available rail kits if you wanted"}, {"role": "assistant", "content": "Price is $845.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGM Genuine Parts Radiator\nGM Genuine Parts Radiators are designed, engineered, and tested to rigorous standards, and are backed by General Motors. Radiators are heat exchangers, typically located in the front of the vehicle where air can flow through the fins and dissipate heat. Modern radiators are made from aluminum and plastic, while older vehicles used copper and brass. These radiators are designed to be corrosion resistant with optimal heat transfer characteristics. GM Genuine Parts are the true OE parts installed during the production of or validated by General Motors for GM vehicles. Some GM Genuine Parts may have formerly appeared as ACDelco GM Original Equipment (OE). Lightweight; the radiators have a positive heat transfer to weight ratio Corrosion-resistant aluminum designed core helps optimize the radiators long lasting GM-re"}, {"role": "assistant", "content": "Price is $379.92"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOatey 42722 PVC Floor Sinks and Accessories, 4 in, White\nOatey PVC Floor Sinks are used for general service drainage in application such as, commercial kitchens and hospitals. Allows easy access for cleaning and debris removal caused by indirect waste sources such as sinks, lavatories and condensate drains. Oatey products have earned the trust of plumbing professionals for over 100 years. Oatey Products Can Be Found In Residential And Commercial Applications And Have Achieved Excellent Brand Name Recognition With Consumers All Products Are Backed With The Highest Quality Technical And Customer Support Programs In The Industry Highly Durable Product Size 4 Brand Oatey, Color White, Material Polyvinyl Chloride, Size 4 in., Style Square, Shape Square, Weight 4."}, {"role": "assistant", "content": "Price is $48.83"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nPoynting MHz High Gain Cross Polarized LTE MIMO Indoor/Outdoor Antenna\nPoynting XPOL-2 LTE and B/G/N Wi-Fi Antenna Max Gain 9dBi. Please read specification sheet below for more details. Backwards compatible with 3G, 2G technologies. Two cross polarized antennas in one enclosure for optimal LTE performance. Vandal resistant all-weather enclosure. High Gain Cross Polarised LTE MIMO Antenna Max Gain 9 dBi. Please read specification sheet below for more details. Brand Name POYNTING ANTENNAS (PTY) LTD., Weight 3.42 pounds, Dimensions 10.24 x 10.24 x 3.15 inches, model number Is Discontinued No, Rank Computer Networking"}, {"role": "assistant", "content": "Price is $195.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nStarplast 20561 Playhouse, Red/Green/Yellow\nThe Starplay Children's Galilee Playhouse will provide your child with hours of imaginative fun. A place of their own for your kids to entertain their friends. Feels like a real home with door mail slot for kids to receive notes from family and friends. Working door and shutters. Easy come and go. Stickers included for the kids to decorate and personalize their playhouse. Easy assembly with no tools required. Light & easy to move. Quick clean up with a damp cloth. Vivid Colors Easy to assemble and easy to clean Lightweight and easy to move Package Weight 29.0 pounds Dimensions 55.25 x 42.5 x 45.25 inches, Weight 26 Pounds, Manufacturer Star"}, {"role": "assistant", "content": "Price is $366.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nT-Spec V10 Series Power Wire Spools 16 AWG, Black\nThe performance of your car audio system is only as good as its weakest link. Many wire companies today will have you believe that a cheap cable will save you money and get the job done, but the truth is that below-specification cables will rob current from your amplifier, reducing power output by as much as 50%. T-Spec takes the high road with this v10 SERIES 250 ft. Black Speaker Wire that exceeds CEA and ANSI specifications for gauge size. Speaker wire Meets CEA & ANSI specification for wire gauge Full virgin copper construction High strand count for maximum flexibility Ultra-flexible PVC-blended jacket Dimensions 250 x 10.24 x 3.74 inches, Weight 8 pounds,"}, {"role": "assistant", "content": "Price is $112.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRAREELECTRICAL NEW IGNITION MODULE COMPATIBLE WITH TOYOTA CAMRY CELICA COROLLA LAND CRUISER MR2\nRAREELECTRICAL BRAND COMPATIBLE WITH DENSO, GEO, LEXUS, TOYOTAREPLACES DENSO VERIFY YOUR OEM PART NUMBER FROM YOUR ORIGINAL UNIT TO HELP ENSURE FITMENT.DENSO SYSTEMS GEO PRIZM LEXUS LS400 LEXUS LX450 LEXUS SC400 TOYOTA TOYOTA CAMRY TOYOTA CELICA TOYOTA COROLLA TOYOTA LAND CRUISER TOYOTA MR2 TOYOTA PASEO TOYOTA PREVIA TOYOTA RAV4 TOYOTA T100 TOYOTA TACOMA"}, {"role": "assistant", "content": "Price is $30.07"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nUltra Slim Tilt TV Wall Mount Bracket for Samsung 43 Class QLED 4K Smart TV - - Low Profile 1.7 from Wall, 12\u00b0 Tilt Angle, Easy Install\nSlim Tilt Wall Mount For SAMSUNG Model The Easy Mount is designed to accommodate LCD and Plasma flat-panel TVs from 32 to 102 with weights up to 165 lbs. It provides tilt adjustment of 0 Degree to 12 Degree for optimal viewing angles and reduced glare. The sliding bracket design allows horizontal adjustment for perfect screen placement (even after installation). The Easy Mount for TVs meets most of wall mounting needs in a simple and affordable design. It will safely secure your precious flat screen to any wall. It features large rectangular shaped access holes for easy cable routing and wiring access. The"}, {"role": "assistant", "content": "Price is $84.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGoSports Splash Net PRO Pool Volleyball Net Includes 2 Water Volleyballs and Pump\nPOOL VOLLEYBALL Splash into the ultimate pool day and play like a Pro with our Volleyball Splash Net Pro; Set includes adjustable volleyball net with posts, 2 water volleyballs and pump ADJUSTABLE NET Splash Net PRO is compatible with virtually any inground pool (lap pools, rounded pools, rectangular pools, etc.); Net straps can be adjusted for any sized pool (max width 25 ft) SAFE POOL FUN Water weighted bases keep your net upright and prevent tipping over for hours of splashing fun in the pool PREMIUM QUALITY Our Volleyball Splash Net Pro is engineered to withstand all the splashing that comes with water volleyball; The sturdy bases and netting ensures maximum fun in the"}, {"role": "assistant", "content": "Price is $101.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOrtega Guitars Custom Built Eclipse Series All Solid Tenor Ukulele w/Bag, Right TE8\nProduct Description In the Custom Built series, Ortega pushes the boundaries of ukulele construction, creating instruments that are as bold as they are beautiful. Unorthodox design and impressive functionality are only the beginning when it comes to these creatively conceived ukuleles. Ortega offers a broad range of traditional Sopranino, Soprano, Concert, Tenor and Baritone body sizes. Rounded out with our Ukebasses Guitarleles. Selected models are equipped with our own preamp and built-in tuner. Whether you are a beginner, an enthusiast, or performing professionally, Ortega has the instrument for you. From the Manufacturer Ortega offers a broad range of"}, {"role": "assistant", "content": "Price is $429.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLexmark X2580\nThe Lexmark X2580 is fast at speeds up to 22 ppm black and 16 ppm color. This 36-bit color flatbed scanner easily handles thick books. Features PC-free copying, Borderless photos. USB connectivity Connect the printer to your computer via USB 2.0. The Lexmark X2580 is fast at speeds up to 22 ppm black and 16 ppm color This 36-bit color flatbed scanner easily handles thick books PC-free copying Borderless photos USB connectivity Connect the printer to your computer via USB 2.0. Manufacturer Lexmark, Brand Lexmark, Weight 12.9 pounds, Dimensions 21.7 x 14.2 x 9.4 inches, model number Is Discontinued No, Manufacturer Part"}, {"role": "assistant", "content": "Price is $117.50"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSpyke Grind Free Jackshaft for Belt Drive for Harley 98-08\nThis Amazing Jackshaft Fixes the Grind Associated With Many 3 Open Belt Drives That Utilize One-Piece Jackshafts This Amazing Jackshaft Fixes the Grind Associated With Many 3 Open Belt Drives That Utilize One-Piece Jackshafts The Grind-Free Is a Three Piece Unit With Ramped Teeth and Enough Flex to Integrate Seamlessly With Any Belt Drive The Grind-Free Is a Three Piece Unit With Ramped Teeth and Enough Flex to Integrate Seamlessly With Any Belt Drive For BIG TWIN BDL Belt Drive & Others With One-Piece Jackshafts For BIG TWIN BDL Belt Drive & Others With One-Piece Jackshafts This amazing jackshaft fixes the grind"}, {"role": "assistant", "content": "Price is $130.72"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMOTOKU Front and Rear Brake Pads for Magnum 330 Scrambler 500 Sportsman 400 500 600 700 800 Trailblazer Trail Boss 330\nCompatible with Magnum 330 Scrambler 500 Sportsman 400 Sportsman 450 2006 2007, Sportsman 500 HO Sportsman 500 X2 Sportsman 600 Sportsman 700 Sportsman 800 Trail Blazer 250 2005 2006, Trail Blazer 330 Trail Boss 330 Compatible with Magnum 330 Scrambler 500 Sportsman 400 Compatible with Sportsman 450 2006 2007, Sportsman 500 HO Sportsman 500 X2 Compatible with Sportsman 600 Sportsman 700 Sportsman 800"}, {"role": "assistant", "content": "Price is $15.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLEGO Ideas Voltron 21311 Building Kit (2321 Pieces) (Discontinued by Manufacturer)\nIt\u2019s time to defend the universe so get ready to form LEGO Ideas 21311 Voltron, the biggest buildable LEGO mech ever! This awesome set features buildable and highly posable black, blue, yellow, red and green lion toys with specially designed, extra-strong joints to combine them all and create the Voltron super robot, plus a huge silver-colored sword and shield that attach firmly to Voltron\u2019s hands. Ideal for display or to recreate thrilling action from the original 1980s Japanese anime Voltron TV series and the modern DreamWorks Voltron Legendary Defender series. Build 5 posable Voltron lions and combine them all to create the Voltron super robot toy! Display"}, {"role": "assistant", "content": "Price is $532.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nIndiPRO V-Mount Plate with Sony Dummy Battery for Sony A7, A7r, A7s Cameras\nPower your powered devices from a V-Mount battery using this V-Mount Plate. The plate features a dummy battery at the end of an integrated 20\u2033 cable. This dummy battery will work with A7 series mirrorless cameras. Other accessories that require 12-16 volts of power can be connected to the built-in D-Tap on the plate. For rig integration, this battery plate includes a 15mm LWS bracket for mounting on a lightweight 15mm compatible rod-based rig. In this configuration, the battery attached to the plate can be used as a counterweight for shoulder-mounted setups. Made in the USA. Compatible with Sony For a7 Series,"}, {"role": "assistant", "content": "Price is $149.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKOHLER Side-Mount Electrical Outlets in Natural Maple for Tailored Vanities\nProduct Description Custom storage systems The custom storage and accessory options of the Kohler Tailored vanity collection maximize every inch of your vanity space. Every hair dryer, toothbrush and tube of lipstick has its own place and frees your bathroom of clutter. Create your personalized storage and wake up to a more manageable morning routine. Use adjustable shelf and rollout tray to easily access everyday toiletries and towels. Keep electronics off your countertop and always ready for use on the shelf with built-in outlets. View larger Maximize drawer space while separating items with drawer dividers and a storage tray. View larger Apply makeup using the removable mirror and store makeup and brushes in the tray after each use. View larger Keep toiletries within reach and"}, {"role": "assistant", "content": "Price is $274.35"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCreative Crown Foam Molding | 96 Ft of 3.5 Angelo Foam Crown Molding kit W/precut Corners on end of Lengths 4 Inside & 1 Out (Available in 5 Other Styles and Sizes-See Our Other LISTINGS)\nCreative Crown Foam Molding | 96 Ft of 3.5 Angelo Foam Crown Molding kit W/precut corners on end of lengths. THIS IS A KIT - 96 feet of crown molding. 95.5 lengths. Includes 5 precut corners on the ends of the lengths. 4 inside 90 degree corners and 1 outside 90 degree corner. Easy to install smooth, high density, molded, white polystyrene, foam crown molding. Light weight - only 10 oz per 8"}, {"role": "assistant", "content": "Price is $284.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMaps International Medium The World is Art Wall Map - Pink - Pinboard - 24 x 36\nThe World Is Art is a unique range of world maps created to look fantastic on your wall. Beautifully designed with pink color tones for the stylish wall space in your home, these maps are sure to impress. The world map features major towns and cities and contains relief shading for land and sea. Completely up-to-date The map is completely accurate and includes all recent world developments, particularly the new country of South Sudan and the new international dateline. Map uses Child's room, Learning, Home decor. This Medium The World Is Art Wall Map - Pink (Pinboard) is designed to be rigid and attractive, with the added bonus that it can be pinned with thumbtacks. Made from This"}, {"role": "assistant", "content": "Price is $110.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGOODYEAR 123R E WRL FORTITUDE HT TL\nPackage Dimensions 18 H x 18 L x 11 W (inches) Package quantity 1 Package Weight 40.0 pounds Country of Origin United States Brand Goodyear, Seasons NON_WINTER, Size Section Width 11 Inches, Load Capacity 3415 Pounds, Tread Depth 10 32nds, Rim Width 11 Inches, Weight 52.78 Pounds, Manufacturer GOODYEAR, Model WRANGLER FORTITUDE HT, model number Manufacturer Part Construction R, Rank Automotive Passenger Car Performance Tires 9419, Available July 23, 2019, Dimensions LxWxH 18 x 11 x 18 inches, Rim Size 18 Inches,"}, {"role": "assistant", "content": "Price is $405.69"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSanyo 1080p Front Projector - Black\nThe small DLP contains a DMD panel and color wheel with high-contrast optical system that achieves a contrast ratio as high as 2,200 1, exhibiting natural and smooth gradation. This easy set up projector uses a UHP lamp for outstanding brightness and well-balanced color reproduction. The provides 2,500 lumens brightness. The compact design and lightweight body lets you make presentations almost anywhere. To compensate for keystone picture distortion, the provides vertical keystone correction with a range up to \u00b1 15 degrees. Auto input search assists your set-up and versatile go-anywhere capability make this a truly portable projector. The can be ceiling or inverse mounted for enhanced versatility. Digital signal reflected off a DMD chip and"}, {"role": "assistant", "content": "Price is $95.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nWoodbridge Lighting 3-Port Mini Pendant Cluster, by Maximum, Satin Nickel\nFrom the Manufacturer This ceiling cluster is formed by the combination of the three included satin nickel mini pendants featuring an iridescent mosaic tube. Satin nickel finish Iridescent mosaic tube glass Requires three (3) candelabra base bulb (not included) 10 inches wide x 84 inches high max UL listed for dry locations Brand Woodbridge Lighting, Color Satin Nickel, Material Glass, Style Transitional, Light fixture form Pendant, Power Source Corded Electric, Special Feature UL Listed, Finish Type Nickel, Shade Material Glass, Light Sources 3, Lighting Method Downlight, Specification Met UL, s 1, Manufacturer Woodbridge Lighting, Part Weight 15.05 pounds, Dimensions 4."}, {"role": "assistant", "content": "Price is $225.88"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDNA MOTORING 3-Row Full Aluminum Radiator Compatible with 70-78 Dodge B-Series Ram Van, Compatible with 50-54 Chevy Bel Air / Corvette\nAluminum racing radiator is designed to provide maximum cooling efficiency to prevent premature engine failure. Its light-weight, and high-heat transferring aluminum features a tube and fin design that, dramatically increases surface area that enables the heat to dissipate more efficiently. This racing radiator is at least 40% more efficient than stock ones. Most importantly, it has a much higher capacity for coolant than stock ones, which means that your cooling system will be more efficient and will be more resistant to temperature surges in racing applications. Fitment 1 - Compatible with Dodge B-Series Ram Van / D/ W-Series Pickup / RamCharger /"}, {"role": "assistant", "content": "Price is $151.07"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nApollo Tools Pink Household Tool Set in Attractive Designer Zippered Case with Pink Tool Selection - Pink Ribbon - Pink -\nApollo Tools is a product line of high quality, competitively priced, hand tools and power tools constructed of the finest quality components and designed for the home repair enthusiast. We strive to create the best possible tools and tool kits and to do so ethically, honorably, and with our customers\u2019 satisfaction at the very core of our focus.. This tool kit comes in an attractive pink designer case that zips up and contains a careful selection of favorite tools from our pink line.. The case looks great and is convenient for transport and storage.. This is the perfect small tool set for the home and on the go for every day tasks and projects.. It includes a box cutter"}, {"role": "assistant", "content": "Price is $35.91"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSquare D by Schneider Electric Air-Pump Pressure Switch, NEMA 1, 50-70 psi Pressure Setting, 20-65 psi Cut-Out, 15-30 psi Adjustable Differential, Low-Pressure Cut-Off\nThis Square D Pumptrol electromechanical pressure switch with 50-70 psi pressure setting range activates an electrically driven water pump within a power circuit when the adjustable rising and falling thresholds are reached. The NEMA 1 enclosure has a 1/4 NPSF internal fluid connection and screw clamp terminals. The switch works with a 2 Hp or less pump. The adjustable differential range is 15-30 psi and the cut-out range is 20-65 psi. The low-pressure cut-off (auto-start-off) operates at approximately 10"}, {"role": "assistant", "content": "Price is $52.93"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nArlen Ness 18-512 Chrome Big Sucker Performance Air Filter Kit\nAll-in-one backing plate features a built-in carb support and built-in breather tunnels at each head to decrease crankcase pressure. Each tunnel exits at the mount of the carburetor to create a virtually closed loop system Breather features O-ring banjo bolt seals and a radiuses intake manifold. No oil hoses, no oil fittings, no leaking and no mess Stage I kit features a Team Ness High-Flow filter that accepts all 93-up oval or round O. E. M. outer covers All kits include a Big Sucker aluminum backing plate, Team Ness High-Flow air filter, chrome banjo bolts for Twin Cam and 93-Up EV2 Big Twin, simple instructions and all necessary hardware This Item"}, {"role": "assistant", "content": "Price is $161.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSpeedy Pros Boricua Puerto Rico Zinc Metal License Plate Frame Car Auto Tag Holder - Black 2 Holes\nLicense Plate Frame Funny Weatherproof Treat yourself to exciting new car accessories that will catch everyone's eyes. Decorate your vehicle and make it look unique and funny with our zinc metal license plate frame. Very clear, bold, easy to read lettering can be read from a distance by other drivers. Our designs are professionally printed on our personalized license plate frame with state-of-the-art equipment guaranteed to last for years. We have more than 12 years of experience in the printing industry to offer you stunning detail and rich lifelike colors. All prints are carefully made in our lab in Tampa, Florida. CUTE CAR ACCESSORY! ** Made from high quality zinc metal ** Fits every"}, {"role": "assistant", "content": "Price is $18.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCim-Tek 10 m Resin-Impregnated Cellulose Elmnt for Centurion Fltr Housing 6-pack\nCim-Tek 30002 10 m Resin-Impregnated Cellulose Element for The Cim-Tek Centurion Filter Housing. This 10 Micron Resin-Impregnated Cellulose Element Is Used To Remove Dirt And Rust And Is Recommended for gasoline, diesel, & ULSD. This filter goes with the Cim-Tek Centurion Filter Housings. Centurion Element for use with part 40001, 40013, & 40020 10 Micron Resin-Impregnated Cellulose element Used to remove dirt and rust and dust Recommended for gasoline, diesel &"}, {"role": "assistant", "content": "Price is $174.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nNikon B500 Digital Camera (Black) with All-in-One Starter Bundle - Includes SanDisk Ultra 64GB Memory Card, 4X Rechargeable AA Batteries, Camera Shoulder Case, Photo/Video Software, Flash & More\nNikon COOLPIX B500 Digital CameraThe COOLPIX B500 Digital Camera from Nikon features a 16MP 1/2.3 BSI CMOS sensor for high-resolution imagery as well as Full HD 1080p video. This sensor's construction utilizes a stacked backside-illuminated design to improve clarity and image quality when working in dimly-lit conditions. The 40x optical zoom lens provides a 35mm equivalent focal range of covering wide-angle to telephoto perspectives to suit working in a wide variety of environments"}, {"role": "assistant", "content": "Price is $407.55"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nR1 Concepts Front Rear Brakes and Rotors Kit |Front Rear Brake Pads| Brake Rotors and Pads| Ceramic Brake Pads and Rotors |Hardware Kit|fits Chevrolet Colorado, GMC Canyon\nCompatible Applications for Chevrolet Colorado Front and GMC Canyon Front and Rear All-in-One Complete Brake Kit Replacement eLine Series Front & Rear Brake Kit comes with (4) high performance brake rotors and (8) low-dust ceramic brake pads and hardware kit. High Performance Brake Rotors Made of G3000 grade cast iron with zinc finish for ultimate rust protection. Built with O.E.M specifications in mind, no modification required. Ultimate Stopping Power Precision-drilled holes and countersunk design prevents cracking and build up, enhances ventilation and dissipates heat. Designed for smoother and quieter stopping,"}, {"role": "assistant", "content": "Price is $421.04"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nPower Stop Front & Rear Z36 Truck and Tow Brake Kit with Calipers\nThe Power Stop Z36 Truck & Tow Performance brake kit provides the superior stopping power demanded by those who tow boats, haul loads, tackle mountains, lift trucks, and play in the harshest conditions. The brake rotors are drilled to keep temperatures down during extreme braking and slotted to sweep away any debris for constant pad contact. Combined with our Z36 Carbon-Fiber Ceramic performance friction formulation, you can confidently push your rig to the limit and look good doing it with red powder brake calipers. Components are engineered to handle the stress of towing, hauling, mountainous driving, and lifted trucks. Dust-free braking performance. Z36 Carbon-Fiber Ceramic formula provides the extreme braking performance demanded by your truck or "}, {"role": "assistant", "content": "Price is $735.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRedcat Racing Monte Carlo RC Car 1/10 Scale Fully Licensed 1979 Chevrolet Monte Carlo Lowrider \u2013 2.4Ghz Radio Controlled Fully Functional Lowrider Car \u2013 Purple\nRedcat Racing was founded in 2005 with the ambition of bringing people together and enhancing fun through our products. We have a complete line of parts and accessories as well as a wide selection of vehicle sizes ranging in scale. Creating a positive experience with our products and brand is the driving force behind our innovation and vision. Most of our products come fully assembled. Officially Licensed 1979 Chevrolet Monte Carlo Our 1979 Chevrolet Monte Carlo is a great addition to our line of vehicles. It is the first to use the new LR260 chassis with a solid rear axle and independent front suspension."}, {"role": "assistant", "content": "Price is $359.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKala Brand Music Co. Solid Cedar Top Acacia, Ukulele, Natural, Baritone\nElegant and beautiful, these are some of the best sounding ukuleles you will ever play. To complement such an excellent sounding instruments, we gave it a full redesign for 2021. The Solid Cedar Top with Acacia back and sides in a shiny, gloss finish, trimmed out in Rosewood binding is a sleek combination. We added an Abalone rosette, Rosewood fingerboard and bridge, and Graphtech Ratio Black Tuners. SIZE Baritone TOP Cedar BACK & SIDES Acacia BINDING Rosewood NECK Mahogany FINISH High-Gloss FINGERBOARD Rosewood HEADSTOCK Standard STRINGS Aquila Super Nylgut NUT & S"}, {"role": "assistant", "content": "Price is $399.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTCMT Rear Passenger Seat Fits For Indian Chief Vintage Roadmaster Chieftain Roadmaster Dark Horse Indian Springfield Dark Horse\nCondition Aftermarket 100% Brand New Superior quality and Durable Material Leather + Foam + Iron + PP Plastic Color Chrome & Desert TanFitmentFit For Chieftain Dark Horse Icon Fit For Roadmaster Limited Fit For Indian Springfield Dark Horse Fit For Indian Springfield Fit For 2021 Vintage Dark Horse Fit For Roadmaster Dark Horse Fit For Chieftain Elite Fit For Indian Vintage Fit For 2020, 2018 Roadmaster Elite Fit For 2020 Indian Chief Dark Horse Fit For Springfield Dark Horse Fit For Chieftain Classic Fit For 2018, 2016 Chief Fit For Chieftain Limited Fit For Chief Dark Horse Fit For"}, {"role": "assistant", "content": "Price is $129.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nHOKEMA Sansula Renaissance\nSansula Renaissance We have developed the Sansula Renaissance to combine the best features of our other models in one. While the basic Sansula instrument has a relatively delicate membrane, the Renaissance is fitted with a robust synthetic REMO drumskin. The Renaissance is resistant to moisture, so it retains its tension under conditions of varying air humidity and thus also retains its wonderful sound. For all age groups. Most importantly, the Sansula Renaissance has inherited the indescribable sounds of the Sansula Klassik. The tuning of the instrument, in a-minor with additional b and f, allows wonderful melodies to be produced, almost by themselves, by plucking the tines with the thumbs. Tuning ex-works (can be varied) a \u0301, c"}, {"role": "assistant", "content": "Price is $209.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGarage-Pro Tail Light Compatible with Toyota Prius V LH Driver Side Assembly LED Type\nManufactured from high quality materials Manufactured from high quality materials Easy to install; replaces old or damaged part Easy to install; replaces old or damaged part This is an OE replacement item This is an OE replacement item Garage-Pro is the most affordable brand for your old, worn-out, or damaged factory part! This premium quality replacement part is made to give your car, truck, and SUV that original factory look and performance. Available for different applications, our Garage-Pro part will surely fit right to your vehicle. Comes with 1-year unlimited mileage warranty! Anyone just can't fail by using Garage-Pro! Garage-Pro definitely will indulge you as well as your vehicle starting with your very first purchase! FREE 1-year"}, {"role": "assistant", "content": "Price is $115.64"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLinksys Wireless-G Travel Router with SpeedBooster - Wireless router - - desktop\nProduct Description Create a wireless network wherever you go! Compact Internet-sharing Router with built-in SpeedBooster enhanced Wireless-G Access Point, in a pocket-sized box. The antenna and power supply are built-in for travel convenience. The Linksys Wireless-G Travel Router with SpeedBooster lets you carry a wireless network wherever you go. There's a built-in access point, which lets you connect SpeedBooster-enhanced and regular Wireless-G and Wireless-B devices to the network. There's also an Ethernet port to connect your wired PC. The Router function ties it together and lets your PCs share a wired or wireless Internet connection. The travel-friendly form factor includes a built-in power supply and antenna -- it even comes with a"}, {"role": "assistant", "content": "Price is $165.93"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAC Compressor & A/C Clutch For Mazda 6 Mazda6 3.0L V6 2003 2004 2005 2006 2007 2008 - BuyAutoParts NEW\nEngineered for superior durability, backed by a one year, unlimited mileage warranty Guaranteed Exact Fit for easy installation, with pre-fitted clutch pulley and plug-and-play electrical connector 100% BRAND NEW, premium ISO/TS 16949 quality - no core deposit or return required! Make sure you flush the system thoroughly and replace the drier filter along with the compressor for better long-term reliability, or consider one of our AC kits that includes everything you need! Fits Mazda 6 V6 Manufacturer BuyAutoParts, Brand BUYAUTOPARTS!, Weight 16."}, {"role": "assistant", "content": "Price is $250.41"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTama Superstar Classic / Silverstar Hardware Pack\nSuperstar Classic / Silverstar Hardware Pack. Recommended hardware kit for TAMA Superstar Classic shell kit. Compliment your Superstar Classic shells with this sturdy, road-ready hardware setup. Iron Cobra 200 Power Glide drum pedal 25.4 mm diameter base section tubing cymbal stands and snare stand Boom/Straight convertible tilter Quick-Set Tilter cymbal stands and snare stand Double braced legs Recommended hardware kit for TAMA Superstar Classic shell kit Compliment your Superstar Classic shells with this sturdy, road-ready hardware setup Recommended hardware kit for TAMA Superstar Classic shell kit Compliment your Superstar Classic shells with this sturdy, road-ready hardware setup Weight 39.9 pounds, Dimensions"}, {"role": "assistant", "content": "Price is $399.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDetroit Axle - Front Rear Strut w/Coil Spring + Sway Bars Replacement for Honda Accord - 8pc Set\nReplacement for Honda Accord Kit Includes 2x Front Complete Strut w/ Coil Spring Assembly + 2x Rear Complete Strut w/ Coil Spring Assembly + 2x Front Sway Bar Links + 2x Rear Driver Side Sway Bar Links Detroit Axle Suspension Components are Ready to Meet the Rigorous Demands of Today's Foreign and Domestic Passenger Cars, Trucks and SUVs Undergo Impact, Wear, and Fatigue Testing to Help Ensure Quality and Durability Warranty. Detroit Axle Is a Leading Global Retailer and Distributor of OE Re-manufactured and New Aftermarket Auto Parts Manufacturer Detroit Axle, Brand Detroit Axle, Weight "}, {"role": "assistant", "content": "Price is $302.65"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTraxxas Clear Body with Camper with Decals, TRX-4 Sport\nThis is a Clear Camper Body for the Traxxas TRX-4 Sport.Features Pre-trimmedIncludes (1) Body with window masks and decal sheet.Requires Painting Traxxas 8112 - TRX-4 Sport Pre-Cut Camper Body, Clear Features Pre-trimmed Includes (1) Body with window masks and decal sheet. Specs Part number(s) included (in factory packaging) 8112 Dimensions 20 x 10 x 6 inches, Weight 1.5 pounds, Country of Origin China, model number 8112, Manufacturer recommended age 12 years and up, Rank Toys & Games RC Vehicle Bodies 210, Manufacturer Trax"}, {"role": "assistant", "content": "Price is $44.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSG03XL Battery for HP Envy M7 Notebook CTO Series\nBattery Type Li-Polymer Voltage 11.55V Capacity 41.5WH ; Cells Color Black with Two Free Screwdrivers Compatible for HP Envy M7-U Series, Hp Envy Series, Hp Envy CTO Series, Hp Envy Notebook Series.HP SG03XL SG03XL SGO3XL Compatible Models for HP Envy m7 17 100% New from Manufacturer. Overcharge and Overdischarge Circuit Protection;Over-temperature and Short-circuit Protection; Up to 500 recharge cycles over the life of the battery. Warranty We specialize in providing quality power products from factory direct sales and quality customer service.Full Refund within 60 days.Satisfaction guaranteed and backed by 12"}, {"role": "assistant", "content": "Price is $33.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCreative Labs T100 2.0 Bluetooth Speaker System - 40 W RMS - 50 Hz to 20 kHz - USB - Compact Hi-Fi Wireless Desktop PC Speakers (Renewed)\nThis pre-owned or refurbished product has been professionally inspected and tested to work and look like new. How a product becomes part of Amazon Renewed, your destination for pre-owned, refurbished products A customer buys a new product and returns it or trades it in for a newer or different model. That product is inspected and tested to work and look like new by Amazon-qualified suppliers. Then, the product is sold as an Amazon Renewed product on Amazon. If not satisfied with the purchase, renewed products are eligible for replacement or refund under the Amazon Renewed Guarantee. Minimanlistic 2.0 Computer Speakers"}, {"role": "assistant", "content": "Price is $49.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSatco Durable All Weather Plastic Motion Infrared Security Sensor, Bronze\nNuvo Lighting is uniquely, poised to become one of the industry\u2019s leaders. With the sales and distribution resources of Satco products and the continued offering of finely conceived, well crafted products that deliver style, value and quality \u2013 Nuvo is a name that will become synonymous with lighting. 5 second to 8 minute delay control Adjustable daylight control Manual override feature CUL wet location listed Manufacturer Satco, Part Weight 8.1 ounces, Dimensions 8.5 x 2.5 x 3 inches, model number Is Discontinued No, Color Bronze, Style Traditional, Finish Bronze, Power Source Corded Electric, Voltage 120 Volts, Quantity 1, Type of Bulb Outdoor Wall Fixture, Mount"}, {"role": "assistant", "content": "Price is $44.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nVinyl Soft Top Roll-up Adjustable Truck Tonneau Cover Kit Compatible with Chevy S10 GMC Sonoma Isuzu Hombre 6Ft Fleetside/Styleside Bed 94-03, Matte Black\nThis cover is compatible with Chevy S10 GMC Sonoma Isuzu Hombre Only fits 6ft fleetside / styleside bed. Come with 1 tonneau cover and 2 side mounting rails, can withstand kinds of climate conditions and supply a great protection to your vehicle with adjustable tension and rubber weather seals. It is made of vinyl frame to guarantee a long operate life in outdoor conditions, also has the elastic straps to secure cover in stored position. It has the velcro edges to seal against truck bed, providing maximum security and concealment of your goods as well, increasing"}, {"role": "assistant", "content": "Price is $147.88"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBRIGHTFOCAL New Screen Replacement for ASUS ROG FHD 120Hz Upgrade LCD LED Display (Panel Only)\nBRIGHTFOCAL New Screen Replacement for ASUS ROG FHD 120Hz Upgrade LCD LED Display (Panel Only) Compatible Model BRIGHTFOCAL New Screen Replacement for ASUS ROG FHD 120Hz Upgrade LCD LED Display (Panel Only) Important You must match the RESOLUTION, BACKLIGHT, and SCREEN SIZE, TOUCH/NON-TOUCH to your current screen. You cannot deviate from your current screen specifications. Purchasing a screen with different specifications will not work with your system. If you are unsure what your current screen specification is, please contact us before purchase and we will gladly help. BrightFocal provides 100% compatible new item and your satisfaction is"}, {"role": "assistant", "content": "Price is $143.50"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nZIQUN 6.0L Turbo Intercooler Boots Clamps Kit, Intercooler Hose Compatible with Ford F-250 F-350 F-450 F-550 6.0 L Turbo Diesel\nPractical Set Turbo intercooler boots clamps kit includes an elbow 6.0 powerstroke turbo boot hose and two T-bolt intercooler boots clamps combined in a set to give you better convenience. Compatibility 6.0 powerstroke intercooler boot kit compatible with 2003 2004 2005 2006 2007 Ford F250 F350; only 6.0L turbo diesel engine. Temperature Resistance -40\u00b0C to 250\u00b0C High temperature intercooler boots for hot air and water, not for oil or fuel transfer"}, {"role": "assistant", "content": "Price is $35.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTLAPS Compatible With 2004 Titan/Armada Textured Black AVT Style Aluminum LED Light Bull Bar Guard with Skid Plate\nApplication Compatible With Nissan Titan All Models, 2004 / Nissan Armada All Models Front Bumper LED Bull Bar Guard AVT Series LED Bull Bar, Angular, Bold & Edgy Design Made of Light Weight Aluminum with Durable Construction, Textured Black Coating Finish Plus Warranty on Craftsmanship Only Built In Straight Light Bar with 1 Row of LEDs, High Intensity LEDs Super Bright Offroad Style, Comes with Light Bar Switch Control Manufacturer TLAPS, Brand TLAPS, Dimensions 19 x 13 x 6 inches, Exterior Ready To Paint, Manufacturer Part Position Front, Bulb Type LED, Rank Automotive Grille & Brush Guards 133"}, {"role": "assistant", "content": "Price is $250.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAA Warehousing Foreman Single Handle Kitchen Faucet in Brushed Nickel, 10 Spout Height\nSingle Handle Lavatory faucet Ceramic Disc Cartridge Brushed Nickel Finish Solid Brass waterway construction with hot and cold indicator Meets standards set by Americans with Disabilities Act Maximum Water Pressure 1200 Minimum Water Pressure 700 High Pressure Compatible Inlet Size 0.5 Single hole faucet Ceramic Disk Cartridge 1.8 GPM flow rate Wipe with clean cloth after each use Solid Brass waterway construction Deck mount 100% Pressure System Tested 700kPa - ADA, cUPC, CSA AB1953 compliant Single Lever One year manufacturer warranty applies Brand AA Warehousing, Mounting Type Deck Mount, Finish Type Brushed, Material Steel, Color Brushed Nickel, Handles 1"}, {"role": "assistant", "content": "Price is $99.44"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nJSBOYAT Headlight Assembly Bulbs Included w/Bumper Lights 4pcs Fit for 04-12 Chevy Colorado/GMC Canyon, 06-08 Isuzu I-Series Headlamp Passenger & Driver Side, Black Housing with Amber Reflector\n\ud83d\udca1 VEHICLE COMPATIBILITY Headlights Assembly Compatible with Chevrolet Colorado, GMC Canyon, 2006 Isuzu i-280 / i-350, 2007 2008 Isuzu i-290 / i-370. High beam bulb type 9005 (Included); Low beam bulb type 9006 (Included). Turn Signal Light 3757A (Not Included); DRL 4757NA (Not Included). OEM Part Number Partslink Number \ud83d\udca1 PREMIUM HOUSING MATERIALS This headlamp is sturdy"}, {"role": "assistant", "content": "Price is $124.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nStroller Wagons for 2 Kids, Collapsible Wagon with Seat Belt and Canopy, Kids Wagon Beach Cart with Big Wheels for Sand, Folding Wagon for Shopping, Picnic, Camping, Garden (Stroller Wagon)\nWAGON WITH CANOPY FOR KIDS Our linor new wagon stroller with aluminum table plate and canopy design.Each seat provides a double buckle safety belt, the child sits more safely. Fully meet your travel requirements, it can be turned into a simple dining table and a carport can be built. Get the most out of your outdoor fun! KIDS WAGON FEATURES off-road wheels can be use widely on beach sand,ramps, stones, lawns, steps etc. Push & Pull adjustable handles. You can push and pull the utility wagon with"}, {"role": "assistant", "content": "Price is $158.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nHP Envy Quad Gen. Intel 16GB DDR4, PCIe NVMe SSD, Intel UHD 620, IPS micro-edge, Bluetooth, Windows & Olufsen MS Ink 15.6 Convertible 2-in-1 laptop\nOptimized for inking, the Newest ENVY x360 with Fingerprint reader draws out a more productive, more creative you. Its responsive design adapts to your every move, simplifying your most demanding tasks, transforming workflow, and enhancing creativity with every stroke of the pen. Beauty. Innovation. Drawn together. With four modes designed to work with Windows Ink, take handwritten notes, sketch ideas, and even navigate your screen in a whole new way-all on a high performance laptop. Performance. Above and beyond. Equipped with the latest Intel"}, {"role": "assistant", "content": "Price is $880.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAvery Dennison SW900 Matte Brilliant Blue Metallic | 671-M | Vinyl CAR WRAP Film (5ft x 75ft (375 Sq/ft)) w/Free-Style-It Pro-Wrapping Glove\nAvery Dennison SW900 SUPREME WRAPPING FILM Easy Apply Cast Vinyl Film is a premium quality cast film designed for use in vehicle and graphics markets where high quality film finish and cost effective full color wrapping is required. This dual-layer film incorporates color and clear protective layers, providing a smooth, paint-like finish that's both durable and dazzling. This film is recommended for full car wraps, full truck wraps and can be applied to your vehicles hood, trunk, roof, mirrors, door handles, glass, interior trim and dash panels, chrome and metal parts"}, {"role": "assistant", "content": "Price is $826.59"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nORORO All-New Ultra-Compact Rechargeable Battery Pack for Heated Vests, Heated Jackets and Heated Hoodies\nButton closure Hand Wash Only Market-Leading Recharging Time Thanks to the new technology, it takes only 4 hrs to recharge this battery with the 5V3A charger, providing up to 10 hours of run-time for all ORORO heated clothing (3 hrs on high, 6 hrs on medium, 10 hrs on low) Smaller & Lighter This battery is 40% smaller than the ORORO standard battery and lighter with rounded corners, offering an optimized fit without feeling bulky and annoying during wear Thoughtful Design Easy-to-access power button and enlarged LED lights on the front make checking the remaining battery life easy. Charge your phone"}, {"role": "assistant", "content": "Price is $79.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFrigidaire Crisper Drawer Refrigerator\nProduct Description This crisper drawer is a genuine replacement part that is perfect to replace a broken crisper bin in a variety of refrigerators. The sliding pan is big enough to fit all sorts of fruits and vegetables that you need to keep cool. Make sure that your fridge functions ideally with this appliance replacement part. Compatible with Whirlpool, Kenmore, Amana, Maytag, KitchenAid and GE models. From the Manufacturer This is a genuine replacement part. The model number and name for the following item is Crisper Drawer Frigidaire drawer RECOMMENDED USE Replacement crisper drawer for a fridge GENUINE REPLACEMENT PART Made specifically to be compatible with Frigidaire and Electrolux refrigerators PART # Compatible with Wh"}, {"role": "assistant", "content": "Price is $147.87"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDetroit Axle - Brake Kit for Chevrolet Impala Malibu Buick LaCrosse Regal Replacement Chevy 12.64 inch Front & 12.4 inch Rear Disc Brake Rotors Ceramic Brakes Pads\nKit Includes 2x Front Drilled & Slotted Brake Rotor - 2x Front Drilled & Slotted Brake Rotor - 2x Rear Drilled &Slotted Brake Rotor - 2x Rear Drilled &Slotted Brake Rotor - 2x Front Ceramic Brake Pads (Hardware Included) - P-1421 2x Front Ceramic Brake Pads (Hardware Included) - P-1421 2x Rear Ceramic Brake Pads (Hardware Included) - P-1430 2x Rear Ceramic Brake Pads (Hardware Included"}, {"role": "assistant", "content": "Price is $279.24"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRK Racing Chain 110 Gold XW-Ring Chain with Connecting Link\nAdvanced design XW-ring provides better sealing to extend wear life up to 100 percentXW-ring chains are the best high-speed, extreme heat performance chains available todayChains include rivet connecting link \u2705 Fabricated from an advanced nitrile butadiene composite and featuring 3 lubrication pools to prevent heat, abrasion, torsional flex, and loss of lubricant. Manufacturer RK Racing Chain, Brand RK Racing Chain, Model 110, Weight 4.7 pounds, Dimensions 10.3 x 5.5 x 1 inches, model number 110, Is Discontinued No, Exterior Painted, Manufacturer Part 110, Rank Automotive Powersports Parts Available January 30, 200"}, {"role": "assistant", "content": "Price is $162.78"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMarware Lightweight MicroShell Folio Standing Case for Kindle Fire HD 8.9, Orange (will not fit HDX models)\nMarware MicroShell Folio Kindle Fire HD 8.9 Case The Marware MicroShell Folio is a sleek, ultra-lightweight case that combines versatility and protection. Features Elastic strap holds case open/closed Full access to HDMI and charging ports while inside case Convenient Port Access Lightweight, form-fitting folio allows full access to the HDMI and charging ports without removing device from the case. Stands For Hands-Free Viewing Stands your device horizontally for convenient hands-free reading/viewing. Fold the front lid back and insert it into the groove on the back of the polycarbonate shell. The weight of the device will stabilize the case in the standing position."}, {"role": "assistant", "content": "Price is $34.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLivex Lighting Montclair Mission 1 Light Outdoor Bronze Finish Solid Brass Wall Lantern with Iridescent Tiffany Glass, 16 x 25 x 30\nProduct Description Bright, iridescent tiffany glass and bold lines put a fresh spin on a classic look in this beautiful Montclair Mission style outdoor wall lantern. Made from solid brass finished in bronze, the top hanging lantern is attached to the back plate by a graceful, curved arm. T-bar overlay linear details on the frame give it an architectural window-inspired look. From the Manufacturer Montclair Mission Exterior Lighting utilizes the classic Mission design motif to bring architectural art to your living space. Tasteful use of Iridescent Tiffany Glass completes this timeless look sure to enhance the exterior of your home. TRADITIONAL DESIGN. Drawing inspiration from traditional"}, {"role": "assistant", "content": "Price is $136.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nNewest HP 14 HD WLED Backlit High Performance Business Laptop, AMD Athlon Silver 3050U up to 4GB DDR4, 128GB SSD, Wireless-AC, HDMI, Bluetooth, Webcam, SD Card Reader, Windows 10 S (Renewed)\nThis pre-owned or refurbished product has been professionally inspected and tested to work and look like new. How a product becomes part of Amazon Renewed, your destination for pre-owned, refurbished products A customer buys a new product and returns it or trades it in for a newer or different model. That product is inspected and tested to work and look like new by Amazon-qualified suppliers. Then, the product is sold as an Amazon Renewed product on Amazon. If not satisfied with the purchase, renewed products are eligible for"}, {"role": "assistant", "content": "Price is $169.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRetractable Screen RAUBAY 78.7in x 74.8in Large Collapsible White Backdrop Portable Retractable Panel Photo Background with Stand for Video Conference, Photographic Studio, Streaming\n\ud83e\udd0d Larger Size Our professional white screen is easy to contain two people with its large size of x \ud83e\udd0d Wide Application This white background can be good used for video conferencing, YouTube videos, music videos, live-screaming, photography, Tik Tok, or interviews. An indispensable partner for your media career. \ud83e\udd0d Premium Fabric Wrinkle resistant screen made from 100% polyester has the features of good resilience, heat resistance and strong wearability, which is free from wrinkles and defects. \ud83e\udd0d Easy Set-Up You can easily set it up in seconds with"}, {"role": "assistant", "content": "Price is $118.49"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCompetition Engineering 3131 Hoop Roll Cage\nMain Hoop for Roll Camaro & FirebirdMild SteelTubing OD 1-3/4 in.Tubing Wall Thickness 0.134 in.Requires Comp Engineering Roll Bar Strut Kit for a complete Roll Bar Kit For a complete Roll Bar setup, this Roll Bar Hoop must be used with Competition Engineering Universal Mild Steel Strut Kit for Roll Bars. Note This product is only the main hoop to the roll cage Manufacturer Competition Engineering, Brand Competition Engineering, Weight 23 Pounds, Dimensions 55.5 x 1.5 x 41 inches, model number 3131, Manufacturer Part 3131, OEM Part 3131, Rank Automotive Automotive Roll Bars & Cages 46, Automotive Body Parts"}, {"role": "assistant", "content": "Price is $127.98"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKenroy Home Theta Swing Arm Floor Lamp, Medium, Brushed Steel\nProduct Description Who says elegant accents can\u2019t be functional? This delightfully modern swing arm floor lamp\u2019s elegant stylings and fully functional swing arms are the perfect solution for those looking to accent any Scandinavian or minimalist living space. Use the two swing arms to position the light perfectly for bedside reading or late-night relaxing. The downward directional light can illuminate your tablet or book while creating a wonderfully bright ambient glow. From the Manufacturer Kenroy Home Theta swing arm floor lamp in brushed steel finish comes with a 16 inch diameter white tapered drum shade. The ultimate reading lamp, the column is affixed to one side, allowing it to be placed close to an armchair or bed. Two arms offer maximum adjustability and counterbalance the drum"}, {"role": "assistant", "content": "Price is $190.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFilterbuy Air Filter MERV 8 Dust Defense Pleated HVAC AC Furnace Air Filters Replacement (Actual Size 19.50 x 22.50 x 3.63 Inches)\nreplacement air filters for your furnace, air conditioner, heat pump, or HVAC system (actual size 19.50 x 22.50 x 3.63 ) MERV 8 synthetic media (comparable with MPR 600 & FPR 5) protects homes from dust, pollen, and more by trapping 90% of airborne particles without impacting air flow High-quality construction features an electrostatically charged, pleated design that captures more harmful particles and prolongs the products lifespan by 3 times that of fiberglass models Industrial-grade beverage board frames with dual wire backings outperform standard cardboard designs"}, {"role": "assistant", "content": "Price is $108.66"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\n2023 Parrots Wall Calendar by Bright Day, 12x12 Inch, Beautiful Exotic Bird Photography\nSTAY ORGANIZED 2023 Parrots Calendar! Did you know that there are roughly 393 different species of parrot? These birds come in a variety of colors, sizes and temperaments, stretching throughout the world\u00e2\u02c6\u0161\u00e2\u20ac\u0161\u00e8\u00e2\u20ac\u0161\u00c3\u2018s tropical regions. They are among the oldest living of all birds, some of them reaching ages of around 95 years! Enjoy our 2023 botanical bird calendar. HIGH QUALITY Parrots Calendar - Size Closed 12 x 12 Inch. Opened 12 x 24 Inch. Does not bleed through! 13 Full Color Images! All 2023 Tropical Birds Calendar photos are hand selected from"}, {"role": "assistant", "content": "Price is $5.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAM Conservation Group, Inc. SH032W AM Conservation Group High Efficiency Shower Head, White\nProduct Description Spoil yourself and maximize efficiency with our Spoiler Showerheads, which feature three luxurious spray settings. This is a White 2. 0 GPM handheld model with a Pause feature that slows the flow of water for extra savings. From the Manufacturer Spoil yourself and maximize efficiency with our Spoiler Showerheads, which feature three luxurious spray settings. This is a White 2.0 GPM handheld model with a Pause feature that slows the flow of water for extra savings. 2. 0 GALLONS PER MINUTE - This water-saving hand shower head has a flow rate of 2. 0 gallons of water per minute. This pressure level allows for a great shower while"}, {"role": "assistant", "content": "Price is $22.36"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nManley Exhaust Valve (Small Block Chevy (LS-6 Head) 1.550 Head Diameter Race Flo), 1 Pack\nManley Performance manufactures stainless valves, forged pistons, camshafts, lifters, vanadium valve springs, push Rods and timing chain kits. Made up of good quality products. The product is manufactured in United States. Manley Performance manufactures Stainless valves, forged pistons, camshafts, lifters, vanadium valve springs, push Rods and timing chain kits Made up of good quality products Manufactured in United States Manufacturer Manley, Brand Manley, Weight 2.5 pounds, Dimensions 11.8 x 6.8 x 2.5 inches, model number Manufacturer Part OEM Part Rank Automotive Automotive Replacement Engine Exhaust Valves"}, {"role": "assistant", "content": "Price is $205.90"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMonopoly Electronic Banking Edition\nAmazon.com The Monopoly Electronic Banking Edition game combines the best of classic Monopoly with updated electronic transactions. As with the original version, players still operate with money, learn real-world economics, competition and strategy, try to stay out of jail, and try their best to get filthy rich. But this version has been updated to reflect changes in how the real world uses money All transactions are conducted with Monopoly's new banking card system. Anyone from age 8 and up will enjoy this updated version of one of the world's most famous games. Classic Fun with Modern Twists Aside from the electronic banking, the basic rules of this game have not changed from the Monopoly everybody remembers. Tokens, houses, hotels, chance and community chest cards, cardboard property deeds -- if"}, {"role": "assistant", "content": "Price is $94.39"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFumuchy 33MM 4 7/8 Height Chrome Plastic Super Spike Screw-on Nut Cover Replace 10570 for Semi Truck (20)\n33MM 4 7/8 Height Chrome Plastic Super Spike Screw-on Nut Cover Replace 10570 for Semi Truck Replace 10570 Fitment For Semi Truck 33mm 4 7/8 spike nut cover Material Made of high quality ABS Plastic, protect lug nuts not be affected by natural elements prevent rust and corrosion, extremely durability Replace Part Number 10570 Fitment fit Semi Truck 33mm 4 7/8 spike nut cover Material Made of high quality ABS Plastic, protect lug nuts not be affected by natural elements prevent rust and corrosion, extremely durability Installation Easy to install, directly to replacement for semi trucks App"}, {"role": "assistant", "content": "Price is $25.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\namscan Grease T-Birds Jacket\n100% Leather Package Includes 1 x Mens Grease T-Birds Costume Jacket, Plus size The black leather jacket features a white T-Birds logo on the back. Wear this jacket with jeans, a T-shirt, and boots (sold separately) to create your own greaser costume. Meet the rest of the T-Birds at the Frosty Palace in a T-Birds Leather Jacket! Review the size chart for additional sizing information. This jacket is the perfect way to get the retro look you\u2019ve been searching for this Halloween (or maybe even for that theme party you\u2019ve been dying to go to) Manufacturer recommended age 12 years and up, Available June 26, 2020, Manufacturer amscan"}, {"role": "assistant", "content": "Price is $51.52"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLULULOOK Band for Apple Watch Ultra, 49MM Titanium Metal Band for iWatch \ud835\ude3f\ud835\ude47\ud835\ude3e-\ud835\ude4e\ud835\ude58\ud835\ude67\ud835\ude56\ud835\ude69\ud835\ude58\ud835\ude5d \ud835\ude4d\ud835\ude5a\ud835\ude68\ud835\ude5e\ud835\ude68\ud835\ude69\ud835\ude56\ud835\ude63\ud835\ude69 \ud835\ude4b\ud835\ude67\ud835\ude64\ud835\ude58\ud835\ude5a\ud835\ude68\ud835\ude68 - Titanium Color for Big Wrist\nBuckle closure High-quality Titanium Band for Apple Watch Made of lightweight and sturdy titanium metal, 60% lighter than stainless steel. fell light and comfortable on your wrist. Diamond-like Carbon (DLC) coating for high corrosion & scratch resistance. At the same time, Waterproof and sweatproof"}, {"role": "assistant", "content": "Price is $59.19"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKO-KEN(JAPAN) 3/8 (9.5 mm) SQ. Nut grip socket rail set 8 pair RS 3450 M / 8\nSpecifications 3/8 (9.5 mm) SQ. Nut Grip Socket Rail Set of 8 (Rails 7.9 inches (200 mm) Weight 11.8 oz (310 g) Set Includes 10, 11, 12, 13, 14, 17, 19, 200mm Rails Brand Koken, Material Alloy Steel, Quantity 1, Head Style Hex, Finish Type Powder Coated, Operation Mode Mechanical, Manufacturer KO-KEN TOOL CO., LTD., Part Weight 10.9 ounces, Dimensions 10.71 x 2.95 x "}, {"role": "assistant", "content": "Price is $98.26"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nHOME MASTER HARDWARE Heavy Duty Shelf Brackets, 12 x 8 inch Metal Brackets, Shelves Support Angle L Bracket for Kitchen Garages Stores, Black with Screws 10 Pack\n\ud83c\udff5\ufe0fThe 12\u201c x 8\u201d shelf brackets are made of high-quality steel, sturdy and durable. \ud83c\udff5\ufe0fBlack coated finished, water-proof and rust-proof, corrosion-resistant, and smooth and beautiful, long-lasting use. \ud83c\udff5\ufe0fThe shelf bracket using the triangular structure and precision welding techniques, more stable. Each pair can hold up to 1000 lbs. \ud83c\udff5\ufe0fFloating shelf hardware can be used as decorative shelf brackets, open shelving for kitchen, bookshelf brackets, metal shelf bracket, exhibition stand, garden shelf, or even general external"}, {"role": "assistant", "content": "Price is $35.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTireMinder A1A Tire Pressure Monitoring System (TPMS) with 10 Transmitters for RVs, MotorHomes, 5th Wheels, Motor Coaches and Trailers\nWith a large, beautiful display, the TireMinder A1A tire pressure monitoring system is engineered to be a simple, yet powerful tool for monitoring tire conditions, no matter where the road takes you. Just like the original State Road A1A, the TireMinder A1A allows you to sit back and relax, winding through all of the hidden paths, while knowing your vehicle is in good hands. The A1A features straightforward visual alerts, as well as powerful audible alerts, with easy to understand icons to know exactly what type of issue is occurring and where. From boat trailer to"}, {"role": "assistant", "content": "Price is $600.97"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLorfancy 24 Pcs Kids Jewelry for Girls Necklaces Bracelets Rings Toddlers Jewelry Set Princess Party Favors Goodie Bags Stuffers Cute Unicorn Mermaid Pendant Adjustable Woven Friendship Play Jewelry Girls Dress Up Gifts\n24 PCS Delicate Jewelry Set This kids girls party favor jewelry includes 12 pcs bracelets, 6 pcs necklaces and 6 pcs rings. The shape of kids jewelry is popular with little girls. Such as unicorn, mermaid, heart, animal, ice cream etc. Providing your lovely kids with different styles to meet their daily matching. High Quality Made of high quality acrylic and alloy, which is durable, fade-free, smell-free and environmentally friendly. Keeping your lovely girl\u2019s safe and happy is always our pursuit. Your kids also can share it with their friends."}, {"role": "assistant", "content": "Price is $11.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGO-PARTS - Pair/Set - for Dodge Durango Rear Tail Lights Lamps Assembly/Lens/Cover - Left & Right (Driver & Passenger) Side Replacement 2015\nfor OEM OEM FITS 2014 - 2017 Durango Citadel 3.6L V6 FLEX SUV 4-Door Automatic AWD/RWD 119.8 - 2021 Durango Citadel 3.6L V6 GAS SUV 4-Door Automatic AWD/RWD 119.8 - 2021 Durango Citadel 5.7L V8 GAS SUV 4-Door Automatic AWD/RWD 119.8 - 2017 Durango GT 3.6L V6 FLEX SUV 4-Door Automatic AWD/RWD "}, {"role": "assistant", "content": "Price is $308.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAll-Weather Guard Rubber Floor Mats for 2023 2024 Honda CR-V (Non-Hybrid) Waterproof Car Mats for CRV Accessories TPE Automotive Floor Mat & Cargo Liner and Rear Backrest Mats Full Set Black\nCRV 2023 2024 CUSTOM ACCESSORIES Car mats include front and rear,plus trunk mats and backrest mats, a total of 8 sets. Shvgen floor mats are scanned accurately by 3D laser and are suitable for 2023 Honda CR-V ( Non-Hybrid ). HIGH QUALITY TPE Custom floor mats for cars are made of TPE, Green and tasteless, safe and hygienic, cold and heat resistant, waterproof and snowproof, easy to clean. ALL-WEATHER PROTECTION ALL weather floor mats for"}, {"role": "assistant", "content": "Price is $160.88"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKALRI Modern Indoor Lighting Saturn Gold & Black Pendant Light Kitchen Island Chandelier Ceiling Hanging Light Fixtures with Matte Black Finish\nSpecifications Style Modern Finsh Matte Black Color Black&Gold Shade Color Black Material Metal Bulb Type Incandescent/Led bulbs Number Of bulb 1pcs (Bulbs Not Included) Socket Specs Base E26 Bulb Wattage 60 max wattage for use with this fixture Voltage 110V Light Direction Downlight Hanging Chain Product Dimension Fixture Width 13, High 12 Canopy 5 Package Included 1x Modern Lisse Saturn Gold & Black Pendant Light Warm Tips 1) 100% Quality Assurance. No matter what difficulties you meet, we will always in your service! 2) For any faulty or defective product, please contact us first"}, {"role": "assistant", "content": "Price is $63.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSKB Cases ATA Hard Plastic Golf Bag Storage Traveling Case with Wheels and Reliable Secure Latches\nKeep your golf clubs safe when traveling with a case that is compatible with new golf bag designs featuring protruding top molded handles Made from ultra-high molecular weight polyethylene to ensure ultimate protection of belongings; Dimensions (L x W x H) 51.12 x 14.50 x 17.00 inches Equipped with TSA Locking System for ease of travel; Perfect-Match valance bending system provides a tight fit to prevent dirt, dust, and moisture Capable of accepting drivers up to in length; Patented industrial strength latches for superior closure and overall latched security Designed with quiet, smooth inline skate-style wheels for easy portability; Material Polyethylene; Color Black Dimensions L x"}, {"role": "assistant", "content": "Price is $349.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nV\u00f6xx MGA Matte Black Wheel with Painted (17 x 7.5 inches /5 x 110 mm, 40 mm Offset)\nMGA 17x7.5 et40 CB73.1 Matte Black. Painted with a three stage painting process and clear coated for protection 5 Spoke Design Center cap included 1 year finish warranty, 90 days out of round, and lifetime structural warranty Size 17x7 5x10, Brand Wheel Size 17 Inches, Pitch Circle Diameter 110 Millimeters, Diameter 17 Inches, Rim Width 7.5 Inches, Manufacturer Model MGA, Weight 22 pounds, model number MGA MB, Manufacturer Part MGA MB, Construction aluminum, Bolt Pattern ( Holes) 5, Offset "}, {"role": "assistant", "content": "Price is $164.12"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAlno Contemporary I Modern Robe Hooks, Polished Nickel\nAlno is the most requested line of fine decorative cabinet hardware, offering cutting edge designs from traditional to contemporary styles. Alno is the designer's choice providing unique designs from one source for fine decorative cabinet hardware, bathroom accessories, mirrors, and mirror cabinets. As a handmade and finished product each piece of cabinet hardware may have slight variations within a lot, color, and or finish. Overtime, the living finishes may patina by use or rub off, and coloration will change in a natural process enhancing the cabinet hardware's unique beauty. Made by Alno Upc - Vendor Item Number - Country of Origin China Color Polished Nickel, Brand Alno, Material Metal, Finish Type Chrome, Mounting Type Wall Mount, Style Modern"}, {"role": "assistant", "content": "Price is $45.05"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nNational Cycle Tall VStream Windshield N28209\n\u2022 High X Wide\u2022 Polycarbonate windshield with dark tint\u2022 Sturdy mount system attaches to the forks, requires no modifications to stock components, and provides outstanding rigidity\u2022 Includes all hardware\u2022 Backed by a 3 year unbreakable warranty\u2022 DOT approved The VStream gets its name from the unique shape and dimensional contours designed and engineered into the windscreen Color Dark Tint, Brand National Cycle, Exterior Finish Painted, Style Custom, Auto Part Position Front, Pieces 1, Manufacturer National Cycle, Model National Cycle Weight 4 pounds, Dimensions 24 x 18 x 9 inches, Country of Origin USA, model number Is Discontinued No, Manufacturer Part OEM Part Rank Automotive Powersports Windshields 6919,"}, {"role": "assistant", "content": "Price is $269.96"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMarvel 6 Inch Legends Series Rogue\nWith just one touch, Rogue can absorb anyone\u2019s superpowers \u2013 making her capabilities in any matchup nearly limitless. With the Marvel Legends Series, both kid and adult Marvel fans can start a legendary collection of comic- and movie-based Marvel characters. This 6-inch Marvel\u2019s Rogue figure is highly articulated and features a comic-inspired design, making it another epic addition to the Marvel Legends Series. Copyright 2015 Marvel. Hasbro and all related terms are trademarks of Hasbro. Comic-inspired design Includes Build a Figure part (Juggernaut) Collect other Marvel Legends Series figures (each sold separately) Action figure size 6 inches Includes figure and 1 Build-a-Figure piece;Care and Cleaning Wipe Clean with a Damp Cloth Dimensions 2.52"}, {"role": "assistant", "content": "Price is $70.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nJVC 6.8 Capacitive Multimedia Car Receiver Safe Driver's Bundle with Voxx HD Backup Camera. With Apple CarPlay, Android Auto, Android USB Mirroring, Bluetooth, SiriusXM Ready\nCar Toys (Authorized Retailer) Bundle Includes JVC Receiver JVC Receiver Voxx HD Backup Camera Voxx HD Backup Camera Car Toys Bottle Opener Keychain, 1-Year Manufacturer Warranty Car Toys Bottle Opener Keychain, 1-Year Manufacturer Warranty MAKE THE MOST OF YOUR PHONE Everything you need for the road is in your smartphone, right? JVC's multimedia receiver puts all the goodness front and center on a 6.75 touchscreen display with Android Auto and Apple CarPlay. Plus, you'll get a variety of music sources, hands-free calling, serious sound-shaping tools,"}, {"role": "assistant", "content": "Price is $309.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nJET Infeed/Outfeed Tables\nExtend the table size of your JET or sander with these sturdy infeed/outfeed tables. Compatibility Designed to fit JET and drum sanders Maximum Workload 35 pounds (per table) Added Capacity Extends table surface to 40 Table Dimensions 18 x 10-1/4 JET Red Assurance Guarantee Backed by JET's industry-leading one-year warranty against manufacturing defects Brand Jet, Dimensions LxWxH 19.5 x 11 x 4.25 inches, Grit Type Extra Fine, Power Source Hand Powered, AC Adapter Current 10 Amps, Weight 16 pounds, Manufacturer JET, Part Dimensions 19.5 x 11 x 4.25 inches, Country of Origin Taiwan,"}, {"role": "assistant", "content": "Price is $149.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nTusk Removable Half Windshield Clear\nThe Tusk half windshield is a must have for your side x side. This half windshield will protect you from many of the elements while still allowing air flow to keep the cab clear of swirling dust or fogging moisture. Made of 3/16 polycarbonate plastic. Fits CAN-AM 2021 - 2022 Commander 1000 DPS -- CAN-AM 2021 - 2022 Commander 1000 XT -- CAN-AM 2021 - 2022 Commander 1000 XT-P -- CAN-AM 2022 Commander 1000R X-MR -- CAN-AM 2022 Commander 700 DPS -- CAN-AM 2022 Commander 700 XT -- CAN-AM 2021 - 202"}, {"role": "assistant", "content": "Price is $88.19"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nYOLENY 39 Inch Electric Guitar Complete beginner Kit Full-size Solid-Body SSS Pickups for Starter with Amplifier, Bag, Stand, Tremolo Bar, Digital Tuner, Strap, Picks, Strings Brown\n\ud83c\udfb5 \u201cC\u201dSHAPED MAPLE NECK YOLENY Electric Guitars Use The Most Common Modern Neck Shape-\u201cC\u201d Shaped Profile Design, Which Has A Very Comfortable Feel, Broad Adaptability Even The Small Hands. Fingering Suitable For Multiple Playing Styles. \ud83c\udfb8 S-S-S PICKUPS Three classic single-coil pickups. This is a pickup arrangement suitable for more music styles. Whether you prefer pop, rock, Metal, Jazz, funk, or blues, you will have a balanced performance. \ud83c\udfb5 20"}, {"role": "assistant", "content": "Price is $119.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nReally Good Stuff Classroom Go for It Chips \u2013 Set of 100 with 50 Unique Messages \u2013 Encourage Positive Feelings & Confidence \u2013Social-Emotional Learning \u2013 SEL for The Home and Classroom\nKids love getting a supportive note from a teacher or a parent These chips are a great way to encourage kids to try new things and inspire them to \u201cGo For It\u201d The inspirational sayings are meant to empower children to think positively and develop confidence and resilience. These 2.25\u201d round chips have two each of 50 different, kidfriendly, upbeat messages of encouragement and validation, perfect for spreading positivity and socialemotional learning. These support tools feature fun designs on the back which can also be used as a space to customize a note to the child in permanent marker. To take it"}, {"role": "assistant", "content": "Price is $22.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLiili Round Mouse Pad Natural Rubber Mousepad Image ID Mexican Pattern\nManufacture MADE IN USA. Designed, Printed and Shipped out of our California Facility. Features Our mousepad is made of natural rubber with Fabric. High quality cloth weave surface bonded to a special NON-SLIP 100% natural Eco-Friendly rubber base to enhance precise tracking, effortless control, steady surface support and extended durability. The weave also provides a nice, comfortable feel under your hand, Minimizing Fatigue over extended periods of time. Works With Any Standard Mouse. Low Friction and Ultra Smooth Fabric surface optimized for better Mouse Gliding. Warm Tip After being tested, we found that color might seem a little different (lighter or darker) on different monitors. After-sales Service 1. 30 days"}, {"role": "assistant", "content": "Price is $9.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCardone Remanufactured Distributor\nCARDONE Remanufactured Distributors provide reliable performance at the best price. Installing a CARDONE Distributor on your vehicle will ensure that proper voltage is transmitted to the spark plugs in the correct timing pattern so that your vehicle will perform on command. As a remanufactured Original Equipment part, this unit guarantees a perfect vehicle fit All electronic module components are 100% computer tested to ensure full functionality and O.E. components with consistently high failure rates are 100% replaced or repaired to meet or exceed O.E. performance Precise machining tolerances prevent oil leakage, poor timing, setting of the Check Engine light, and premature failure Automated test equipment verifies signal strength, correct polarity of wire harness, air gap, crank reluctor tooth size, as"}, {"role": "assistant", "content": "Price is $237.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKhrome Werks 1-1/4 Chrome 12 Fat Ape Hanger Handlebar 300315\nHighly polished, duplex nickel chrome-plated steel 1-1/4 diameter formed handlebars with 1 center mounting section and 1 grip mounts Pre-drilled for internal wiring 3-1/2 on center clamping width is 5-1/2 to accommodate one-piece top riser clamps Works with standard style controls and grips Will not work with hydraulic clutch 12 rise, 35 wide, 12 center width, 11 end riseNote Not for use with stock risers and handlebar/gauge mount on models. Highly polished, duplex nickel chrome-plated steel 1-1/4 diameter formed handlebars with 1 center mounting section"}, {"role": "assistant", "content": "Price is $309.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMillSO USB C Female to USB A Male Adapter, USB 3.0 Type A Male to USB 3.0 Type C Female Connector Converter Adapter 5Gbps SuperSpeed/Nylon-Braided Type C to USB A Adapter -\nMillSO USB C Female to USB Male Adapter This USB A male to USB C female adapter is the perfect way to connect most USB-C headphones to any legacy USB-A device (computer, laptop, tablet), so you can enjoy music wherever you go without disturbing others. With the added 8-inch extension cable, you can keep your devices at a comfortable distance. NOTICE Does NOT support fast charging, video signal transmission, or work with MagSafe charger. Superior Audio Transmission MillSO USB male to USB C female adapter supports up to 5Gbps audio transmission"}, {"role": "assistant", "content": "Price is $7.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSKILSAW 10-1/4 In. Magnesium SAWSQUATCH Worm Drive (Diablo Blade)\nSkil saw, sawsquatch, 15A, magnesium, worm drive, Circular saw, for cutting 4 times cleanly in 1 pass, powerful 15A dual field motor easily tackles LVL, glulam, pine & PSL wood, magnesium motor housing stabilizes drive train for longer life, legendary Skil saw worm drive gearing for a lifetime of performance, includes 40 tooth Diablo carbide blade & multi-function wrench. Sharp and thin saw This product satisfies the customer requirement Manufacture in China Brand Skil, Blade Material Carbide, Surface Recommendation Wood, Special Feature Brushless, Included Components Circular Saw Accessory, Amperage 4."}, {"role": "assistant", "content": "Price is $372.34"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nYoungRC 915Mhz 100MW Radio Telemetry Air and Ground Data Transmit Module 915Mhz Radio Telemetry Kit for APM2.6 APM2.8 Pixhawk Flight Controller\nSupport for OTG of Android Cellphones, and for computer OTG. Antenna is 5.8G. Very small size and light weight. Air data transfer rates up to Transmit power 20dBm It can be used with standard for APM, for Pixhawk flight controller (for APM 2.6 2.8 Pixhawk 2.4.8 flight controller etc). easy to install and connect. Available bi-directional amplifier gain greater range. With a standard for TTL UART interface, for HM-TRP wireless module based, with for Si443"}, {"role": "assistant", "content": "Price is $66.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKLR650 Adjustable Kickstand Side Stand 3 Inches Shorter\nCompatible with Soupy\u2019s Adjustable Kickstand Lowering Side Stands are CNC machined from solid aluminum and use stainless steel hardware. Adjustable from stock length to 3 inches shorter and the strongest and most stylish available. There are a number of different lengths to choose from. The foot has a radius that accommodates all different lean angles. If your motorcycle is lowered, the frame is closer to the ground. The kickstand is mounted to the frame and needs to be shorter to retain the lean angle required to prevent the motorcycle from standing too upright or tipping over. This is a direct bolt-on item. Simply replace your stock kickstand with this, adjust to your desired length, apply thread locker and tighten screws. Designed for use with"}, {"role": "assistant", "content": "Price is $206.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGENOVA PRODUCTS M32705 1/2 PVC Street Elbow, 0.5 Inch\nMarlex, 1/2 inch black PVC street elbow, 90 degree, Schedule 40, male pipe thread x female pipe thread, meets the requirements of astm D 2466. This product is highly durable. This product is easy to use. This product is manufactured in china. Manufactured in china Easy to use Highly durable Size 0.5 Inch, Material Pvc, Brand Genova, Color Black, Dimensions LxWxH 1.9 x 1.5 x 90 inches, Connector Type Elbow, Exterior Finish PVC, Weight 0.04 Pounds, Manufacturer Standard Plumbing Supply, Part Dimensions 1.9 x 1"}, {"role": "assistant", "content": "Price is $5.93"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFour Seasons 57129 Remanufactured Air Conditioning Compressor (Renewed)\nRemanufactured Air Conditioning Compressor Meets or exceeds OE design and performance Part number 57129 Fit type Vehicle Specific Package Dimensions 21.082 H x 29.21 L x 18.541 W (centimeters) Manufacturer Four Seasons, Brand Four Seasons, Model Weight 14.7 Pounds, Dimensions 11.4 x 7 x 7.8 inches, Country of Origin China, model number 57129, Is Discontinued No, Exterior Machined, Manufacturer Part 57129, OEM Part 57129, Rank Automotive Automotive Replacement Air Conditioning Compressors 7896, Available December 2, 2005, Included Components Compressor, Dimensions LxW"}, {"role": "assistant", "content": "Price is $156.57"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nStrictly Briks Classic Stackable Baseplates, Building Bricks for Towers, Shelves, and More, 100% Compatible with All Major Brands, Clear Colors, 6 10x10 Inches Base Plates & 50 Stackers\nGUARANTEED TIGHT FIT Our classic-size products are 100% compatible with all major brands of building bricks. Let your child's creativity soar as they build & create their own unique designs without spending a fortune. Our versatile products can be used to build a city, a wall set, an activity table base, or anything their imagination can dream up. Don't settle for flimsy cardboard bricks, invest in Strictly Briks for high-quality, durable building blocks that will last for years to come. UNLEASH YOUR CREATIVITY With"}, {"role": "assistant", "content": "Price is $36.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\n2021 Dell Inspiron 3000 Laptop Computer, 15.6 Inch FHD Display, 11th Gen Intel Core Processor, 16 GB RAM, 256 GB SSD, Webcam, Wi-Fi, HDMI, Bluetooth, Windows 10 Home, Black (Latest Model)\nProcessor 11th Generation Intel\u00ae Core\u2122 Processor (6MB Cache, up to 4.1 GHz) Graphics Intel\u00ae UHD Graphics with shared graphics memory Operating system Windows 10 Home 64-bit Memory Up to 32GB DDR4 SDRAM Hard Drive Up to 2TB PCIe NVMe M.2 Solid State Drive or 2TB Hard Disk Drive Optical Drive No Display FHD (1920 x 1080) Anti-glare LED Backlight Non-Touch Narrow Border WVA Display"}, {"role": "assistant", "content": "Price is $400.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDIYmalls 3V 2 Cells AA Battery Holder Case Storage Box with ON/Off Switch Alligator Clip Wire Cable 130mm (Pack of 4)\nFeature -For 2pcs AA battery only. -Designed with a cover, and a on/off slide switch. -Size 68mm x 34mm x 20mm / 2.7 inch x 1.3 inch x (L*W*H). -Cable Length 130mm / 5 inch. Package Included 4pcs AA battery holder with alligator clip -You will receive 4pcs aa battery holder 2 cells. -Battery NOT included. -Designed for 2 cells aa battery holder. -With 2pin alligator clip wires, and ON/OFF switch."}, {"role": "assistant", "content": "Price is $8.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMini PC, Kinupute Desktop Computer Windows 11 Pro, 16G RAM, 256G SSD, Discrete Graphics GT1030 2G, HDMI/DVI, 4K, Dual-Band WiFi, BT 4.0, Gigabit Ethernet for Gaming/Office/Server\nHigh Performance Mini PC equipped with Xeon v3 and GeForce GT 1030 2GB GDDR5, 4 Cores 8 Threads, base frequency max L3 Cache 8MB. Greatly improves the performance of games. Pre-installed with Windows 11 Pro OS. All kinds of office software run easily, such as C4D 3D drawing software, Pr video editing software, PS graphic design software, etc. This Mini PC is ideal for diverse usage"}, {"role": "assistant", "content": "Price is $495.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nNon-Slip Solid Plate Base Aluminum Alloy with Rubber Suction Cup Swivel Vise Table Vise for Home DIY Creation\nThis adjustable vise is made of high quality aluminum alloy, corrosion resistant and durable. The bottom of the board is firm, and the bottom is equipped with a rubber suction cup, which is non slip, safe and reliable to use. Suitable for precision processing and DIY creation at home. Features CORROSION RESISTANCE The adjustable mini vise is made of high quality aluminum alloy, corrosion resistant and durable. Professionally designed for home, workshop, professional use, it can be used indoors or outdoors. CONVENIENT PROCESSING This rotatable multi angle vise is very suitable for general clamping applications, convenient for processing, and very suitable for most workbenches and"}, {"role": "assistant", "content": "Price is $33.09"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOrchid Seed Bastard!! Arshes Nei Shikkoku no Raitei PVC Figure (1 6 Scale)\nFrom Orchid Seed. From the apocalyptic fantasy series BASTARD!!, serialized in Ultra Jump, comes the Lover/Daughter of the protagonist Dark Schneider - the Thunder Empress Arshes Nei! This new PVC figure is based on a pin-up released in the Perfect Edition Vol. 2 in 2003 and features amazing detail. Featuring her cursed blade, the Raijinken as well as an appropriately cold and determined gaze this figure perfectly captures the essence of this magical sword master! Her revealing outfit allows you to see her body - which is both attractive as well as strong, and her flowing cape and delicately sculpted hair add amazing motion"}, {"role": "assistant", "content": "Price is $202.69"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDurable Desk Reference System With Display Sleeves - 10 Panels - 2 Sheet s /panel - Letter Size - 60 Degree Viewing Angle - Polypropylene, Metal - 6 / Each - Gray\nDesk reference system offers glare-free sleeves made of environmentally friendly polypropylene. Metal base can adjust to display at or angles. Sleeve frames include snap-on tabs. Reference system includes a desk stand and 10 letter-size display sleeves that hold up to 20 documents with snap-on tabs. Durable Desk Reference System with Display Sleeves Manufacturer Durable, Brand Durable, Weight 4.5 pounds, Dimensions 11.8 x 6.7 x 10.2 inches, model number Color Gray, Material Type Metal, Tab Position Top, s 1, Sheet Size"}, {"role": "assistant", "content": "Price is $107.60"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRockford Fosgate RFK-HDRK Complete Amplifier Installation Kit for 1998+ Harley Davidson Road King Motorcycles\nThe Road King kits are designed to deliver an exceptional sound experience while retaining as much interior bag space as possible. The RFK-HDRK amplifier mounting kit is designed for use with the Rockford Fosgate Power amplifier (sold separately) and is supplied with the complete plug-and-play wiring harness. Designed for use with Harley-Davidson Road King Motorcycles. The RFK-HDRK is a complete Amplifier Installation Kit, designed for use with select and 2014+ Factory Harley Davidson Road King Hardshell Bag Lids while retaining as much interior bag space as possible Manufacturer Rockford Fosgate, Brand Rockford Fosgate, Model Rockford"}, {"role": "assistant", "content": "Price is $339.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMoto G7 Power Battery, (Upgraded) MAXBEAR 3.85V Li-Polymer Replacement Battery for Motorola Moto G7 Power JK50 XT1955 Moto One Power XT1942 with Repair Tool Kit\nProduct Specifications - Battery Capacity 5,300 mAh - Battery Type Lithium Ion Polymern - Voltage Output 3.85 V - Watt-hour 24.47 Wh COMPATIBLE MODELS -Motorola Moto G7 Power -Motorola Moto One Power -JK50 Package Includes -Replacement Battery for Moto G7 Power x 1 pcs. -12 Months Warranty Internal batteries, (IE Phone replacement batteries), require technical knowledge to replace, and may be hazardous and/or cause damage or future damage to phone if done incorrectly. Do not expose to extreme"}, {"role": "assistant", "content": "Price is $18.87"}]}
|
||||
@@ -0,0 +1,50 @@
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFit System Passenger Side Mirror for Toyota Tacoma, Black, Foldaway, Power\nPassenger Side Mirror for Toyota Tacoma. Black. Foldaway. Power. Mirror glass is power adjustable. Convex Lens. Mirror glass does not have heating capabilities. Manual folding for additional clearance. Mirror has no turn signal. Housing finish is Black. Passenger side Mirror, tested to fit and function like the original, Meets or exceeds OEM standards Mirror glass is power adjustable OE-comparable wiring harness/ connection (no pigtail connector) for hassle-free installation Manual folding for additional clearance Auto Part Position Right, Dimensions LxWxH 13.25 x 5.25 x 9.25 inches, Lens Curvature Description Convex, Brand Fit System, Color Black, Mounting Type Door Mount, Special"}, {"role": "assistant", "content": "Price is $122.65"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nHDBUBALUS Motorcycle Sissy Bar Detachable Rear Passenger Backrest Pad Fit for Harley Sportster XL 883 1200\nFitment For Harley Sportster XL Models Sportster 883 / Sportster 1200 / Seventy Two / Forty Eight / Iron 883 / Iron 1200 Soft Cushion Pad and Sissy bar Provide Great Back Support for Passenger,Improve Riding Comfort in a Long Haul Trip Sturdy and Great Black Finish, Bracket Made of Steel,Pad Made of PU Leather + Polyurethane Foam Detachable Design, Easy Installation and Removal when you Not Need it. The quick-release bracket is need when install Package Include 1 Set of Sissy Bar Backrest (Instruction is not included. Please check the size carefully before your purchasing) Manufacturer HDB"}, {"role": "assistant", "content": "Price is $125.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMishimoto Performance Aluminum X-Line Radiator Compatible With GMC C/K Truck\nAn ideal upgrade for the brittle stock radiator, the Mishimoto GM C/K Truck X-Line Performance Aluminum Radiator provides your truck with enhanced reliability and improved cooling efficiency. Whether you use your classic for daily driving or take it to the track, don't overlook the importance of installing an upgraded classic truck radiator in your engine. A stock radiator cannot handle the heat that comes along with having a great deal of horsepower. This GM C/K truck radiator is manufactured using durable aircraft-quality aluminum end tanks, precision TIG-welded to an efficient brazed aluminum core. The inlet and outlets provide precise leak-free connections. This classic car radiator also includes a magnetic oil drain plug, which effectively removes metal fragments circulating in the cooling system"}, {"role": "assistant", "content": "Price is $340.16"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nK&N Air Intake System (Non-Carb Complaint) (Harley Davidson)\nK&N's Street Metal Series High-Flow Air Intake Systems provide a good looking appearance, offer increased airflow and deliver more horsepower & torque to your Harley-Davidson motorcycle. These intake systems increase power by eliminating the stock OE air cleaner which is replaced by a complete high-flow K&N air intake system. This air intake system is constructed with an extra tall K&N air filter providing more air flow and longer service intervals than standard RK-series air intake filters. The extra tall air filter design is intended to be used on larger or custom engine builds that would benefit from higher levels of air flow. Installation is simple with a sturdy, custom aluminum backing plate that mounts directly to the throttle body via an"}, {"role": "assistant", "content": "Price is $249.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCardone Remanufactured Unloaded Disc Brake Caliper with Bracket (Renewed)\nAs a remanufactured Original Equipment part, this unit guarantees a perfect vehicle fit Pistons are durable, resistant to cracking or pitting and handle great loads; calipers are treated with a special formulated rust inhibitor and kept in the original equipment finish Rubber seals are replaced with high temperature EPDM rubber for extended life and optimum performance New bleeder screws provide trouble-free bleeding and a positive seal and new washers are included where applicable Our remanufacturing process is earth-friendly, as it reduces the energy and raw material needed to make a new part by 80% Manufacturer A1 Cardone, Brand Cardone, Weight 12.46 pounds, Dimensions 9.56 x 7.25 x "}, {"role": "assistant", "content": "Price is $91.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAnker Nebula Capsule Max with Anker Nebula Capsule Series Adjustable Tripod Stand, Aluminum Alloy Portable Projector Stand\nAnker Nebula Capsule Max with Anker Nebula Capsule Series Adjustable Tripod Stand, Aluminum Alloy Portable Projector Stand HD Viewing Cutting-edge DLP technology projects a vividly-detailed 720p, image up to 100 inches big. Ideal for use in low-light environments Instant Clarity Get an ultra-sharp, rectangular image from almost any angle in under a second with Capsule Max mini projector\u2019s autofocus and keystoning technology Ideal in the Home Stay entertained at home with Capsule Max's image. Watch movies, take online classes, or keep the kids entertained with hours of cartoons and educational videos. The possibilities are endless Android 8.1"}, {"role": "assistant", "content": "Price is $427.97"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAmerican Lighting SGL-SM-WH Smooth Faceplate for LED Step Light, White, Matte\nAmerican Lighting SGL-SM-WH Faceplate for LED Step Light, Smooth, White Durable cast zinc-magnesium faceplate for American Lighting LED Step light. White Color is great for your home. Smooth shape. built for the American Lighting Step Light model # SGL-LED-WW. Durable wall plate. zinc-magnesium material is built to last. White Color is great for your home Smooth shape Built for the American Lighting Step Light model SGL-LED-WW Durable wall plate - zinc-magnesium material is built to last Includes faceplate - requires separate purchase of LED light fixture SGL-LED-WW Brand American Lighting, Color Matte White, Material Metal"}, {"role": "assistant", "content": "Price is $95.11"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCoverking Custom Fit Front 50/50 Bucket Seat Cover for Select Mazda 6 Models - Neosupreme (Charcoal with Black Sides)\nThe exact seat configuration is Front With Side Airbag; 50/50 Bucket; Only For Base Seats Without Elongated Center Bottom Cushions Made from Neosupreme fabric for insulation, soft touch, and comfort Neosupreme seat covers are water-resistant and are an affordable alternative to Neoprene Tailor-made to the exact specifications of your vehicles seats and protects your seats from spills, stains, and damage Stitching designed to emulate factory seat style and the high quality buckles and zippers enable for a secure fit Designed to install yourself (installation may require some effort for a snug fit) and includes a 1 year limited warranty"}, {"role": "assistant", "content": "Price is $186.36"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMemorial Candle for Weddings Memory Candle to Honor a Loved one at Your Wedding\nPLEASE email us with details regarding how you want Unity Candle personalized Scroll through the pictures to see options for verses and graphics. white 3x9 candle Pictures show a few examples. Let us know about your special day and we will create a custom candle just for you! Brand Unity Candles, Dimensions 10 x 5 x 5 inches; 2 Pounds, Weight 2 Pounds, s 1, Operating Time 48 Hours, Indoor/Outdoor Usage Indoor, Specific Uses For Product Wedding Memorial Candle, Shape Round, Material Wax, Occasion Wedding, Seasons All Season, Style Custom, Wick Quantity 1, Theme Memorial, Information Can, Unit Count 1.0 Count, Is Dis"}, {"role": "assistant", "content": "Price is $26.25"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nApple Watch Series 6 (GPS + Cellular, 44mm) Gold Stainless Steel Case with Pink Sport Band (Renewed)\nApple Watch Series 6 lets you measure your blood oxygen level with a revolutionary new sensor and app. Take an ECG from your See your fitness metrics on the enhanced Always-On Retina display, now 2.5x brighter outdoors when your wrist is down. Set a bedtime routine and track your sleep. And with cellular service, you can go without your phone. It's the ultimate device for a healthier, more active, more connected life. GPS + Cellular model lets you call, text, and get directions without your phone Measure your blood oxygen with an all-new sensor and app Check your heart rhythm with the ECG app The Always-On Retina display is "}, {"role": "assistant", "content": "Price is $248.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nPhoto Studio Boom Light Stand Bag,50 Tripod Bag, Canopy Bag, Camping Bag Made In USA.\n50 bag with heavy duty 600 denier polyester,water resistant,heavy duty #10 decathlon double zipper,outside zipper pocket 50 width X 12 height X 11 depth Made In USA. Great bag to hold all your camping gear or photo studio light stands carrying,tripod, music equipment,volleyball net, golf bag cover and other essentials. Size 50 width x 12 height x 11 depth. Made In USA. Dimensions 50 x 11 x 12 inches, Weight 1.5 pounds, Manufacturer BAGS USA, model number 274, Rank Tripod & Monopod Cases 240, Is Discontinued No, Available August"}, {"role": "assistant", "content": "Price is $64.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMonwutong Slim Fit Case for Moto G Play Case for Moto G Power for Moto G Pure, Shiny Marble Pattern Ring Kickstand Cover for Girls for Moto G Pure/G Play/G Power,ZHDD Purple\nFit for Motorola Moto G Pure/G Power/G Play IMD Technology,Bright and colorful. Anti-fall Protect function,four hard fixed corners can effectively protect phone from falling damage. Camera Lens and Screen Protection Shiny Ring Kickstand, Easy for viewing videos and movies Package Include 1 x phone case (phone is not include) Compatible Model Special attention! Only applicable for Motorola Moto G Pure/G Power/G Play.Please confirm your phone model. Material IMD Technology + Soft TPU Protection,Bright and colorful,Comfortable grip.fits the phone very well. Features Shiny,Non-fading"}, {"role": "assistant", "content": "Price is $10.55"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBlack Full-Motion Tilt/Swivel Wall Mount Bracket with Anti-Theft Feature for Vizio M70-C3 70 inch 4K UHD HDTV TV/Television - Articulating/Tilting/Swiveling\nCompatible with the Vizio M70-C3 70 inch 4K UHD HDTV TV/Television, this adjustable, strong, and robust full-motion tilt/swivel black wall mount bracket with Anti-Theft Feature puts the back of your TV 3 inches from the wall when folded flat and 21.6 inches when fully extended. This wall mount bracket allows maximum flexibility for installing and viewing your TV. This Full-Motion Wall Mount supports most* 37 to 70 LED/LCD/Plasma TVs weighing up to 88"}, {"role": "assistant", "content": "Price is $92.98"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBaby Doll Car Seat with Toy Accessories, Includes 12 Inch Soft Body Black Baby Doll, Booster Seat Carrier, Rattle Toy, Bib and 2 Bottles, Travel Set for Toddler Infant Girl and Boy, African American\nThis gorgeous realistic doll will be love at first sight. At 12 inches long she is perfect for hugging, cuddling, to play and care for. Different variety of accessories makes it the perfect set for all times. Traveling, bed, parties, weddings and more. Recommended for 2 years old and up. Traveling Baby Doll Set \u2013 Includes 12 Soft Body Baby Doll, Car Booster Seat, Rattle Toy, Bib, and 2 Feeding Bottles. Take along your baby doll for a ride and carry along wherever you go! Great Gift"}, {"role": "assistant", "content": "Price is $34.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAparecium Work Platform Height Adjustable, Portable Step Stool Ladder, Folding Aluminum Step Ladder, Type 1A, Heavy Duty 330 Lbs Rated, for Washing Vehicles, Cleaning Windows, DIY, Maintenance Work\nDurable Construction It is mainly made of Aluminum which is rust-resistant and long-lasting to serve longer time. Large load capacity is 330lbs which can load your weight safely and make sure your safety while working. But it is also light and convenient for transportation and storage.Aparecium step ladder is certificated to Type 1A. Adjustment Aparecium work platform can be adjusted height from 23.42 inches to 34.72 inches by squeezing the reinforced smart locks which has 7 levels of height adjustment and the maximum height adjustment in order"}, {"role": "assistant", "content": "Price is $145.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLaurey 37007 Lineage Knob, Antique Copper\nFrom the Manufacturer Laurey is America's finest quality cabinet hardware. Laurey features the most innovative styling, dedication to service, and quality that is truly unparalleled. Designed to withstand corrosion by moisture and salt air Use to accent your cabinetry Finishes resist abrasion Protected by triple treatments of long-lasting genuine lacquer Material Metal, Brand Laurey, Color Antique Copper, Exterior Finish Copper, Usage Cabinets, Included Components Knob, Weight 5.6 ounces, Metal Type Copper, Handle Material Copper, Unit Count 1.0 Count, s 1, Manufacturer Laurey, Part 37007, Dimensions 2 x 2 x 1.38 inches, Country of Origin China, model number 37007, Size"}, {"role": "assistant", "content": "Price is $4.68"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nLindby Custom Multibar (Chrome) Compatible with 09-11 Yamaha XVS95\nLindby brings you ground breaking innovations and fascinating designs.Fits 09-11, 09-17 Lindby brings you ground breaking innovations and fascinating designs. The unique fusion of creative engineering and excellence make them the proud manufacturer of The Multibar, the original patented combined engine guard and highway peg.Made from a single piece of high-strength steel for long-lasting durability.Built-in bon Patent Not recommended for use with extended forward controls or extended floorboards. O-rings are replaceable but made of a high quality neoprene and won't dry rot, if you do need to replace one you just slide it over the lower bracket. Color transparent, Brand Lindby Custom, Material Alloy"}, {"role": "assistant", "content": "Price is $319.94"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBestem Carbon Fiber Head Cowl for Ducati 1098 848 1198\nBestem Ducati Carbon Fiber Head Cowl in plain weave will give your motorcycle that special custom look. This part is made from high quality 3K carbon fiber with sulfate-free fiber glass backing. Special formulated epoxy resin provides excellent flexibility and durability and will not change shape or crack under road use. UVC topcoat layer is used to protect the carbon fiber from fading, as is the problem with cheaper polyester or vinyl carbon fiber on the market today. To ensure the best possible fit onto your motorcycle, this part was created from a casting of the original OEM part and it is test-installed. Fits Ducati Carbon fiber with fiber glass backing Excellent flexibility and durability UVC top coat layer used to protect"}, {"role": "assistant", "content": "Price is $339.85"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nBOSCH 18V Connected-Ready 1/2 In. Hammer Drill/Driver Kit with (1) 8.0 Ah PROFACTOR Performance Battery\nThis Bosch 18V PROFACTOR hammer drill/driver is built for tough drilling and driving jobs, especially drilling large holes straight. The 1/2 In. Hammer Drill/Driver is part of the PROFACTOR System, which pairs BITURBO Brushless Technology with a PROFACTOR battery. BITURBO Brushless Technology is a high-performance motor and drive-train system designed to deliver power comparable to high-demand corded tools. This powerful hammer drill/driver has KickBack Control to help reduce the risk of user injury and Electronic Angle Detection to ensure accurate drilling at desired angle. It also features 25"}, {"role": "assistant", "content": "Price is $273.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRAREELECTRICAL New Starter Motor Compatible with 07 08 09 10 GMC Acadia 3.6 V6\nRAREELECTRICAL BRAND COMPATIBLE WITH GENERAL MOTORS, GM, GMC, MITSUBISHI, SATURN, SUZUKIGENERAL MOTORS DESCRIPTION STARTERUNIT TYPE MITSUBISHITYPE PMGRVOLTAGE 1.7 KWROTATION CLOCKWISETOOTH COUNT EAR 1 10.8MM ID UNTHREADEDMOUNTING EAR 2 10.8MM ID UNTHREADEDMOUNTING EAR 3 12.5MM ID UNTHREADEDWEIGHT 6.45 LBS / 2.93 KGAPPLICATIONSPLEASE VERIFY YOUR OEM PART NUMBER"}, {"role": "assistant", "content": "Price is $94.49"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMIMO Panel Antenna Kit by Waveform | +9 dBi Gain | 2x2 MHz | for 3G, 4G LTE, 5G Modems, Routers, & Cell Boosters | TS9, SMA, U.FL Adapters (Twin Cable)\nSay goodbye to laggy Zoom calls, Netflix buffering, and High Ping gaming! If you\u2019re looking for faster data speeds with your LTE router, you've found the right product! The waveform MIMO Panel Antenna kit utilizes a panel antenna that houses two cross polarized antennas to maximize the reception of your LTE modem. Our kit gives you everything you need to attach these antennas to your LTE Modem with our 3 pack of adapters. Simply use the adapters to connect to your hotspot, connect"}, {"role": "assistant", "content": "Price is $239.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nFrogTape Delicate Surface Frog Tape.94 inch x 60 Yard 280220\nFrogTape Delicate surface is a premium, light adhesion painter's masking tape that helps you achieve crisp, clean paint lines and saves time and money by eliminating the need for touch-ups. Adhesion Strength Low Strength Color Yellow Length 60 Product Type Painter's Tape Recommended Surface Delicate Surfaces Removal Timeframe 60 Width 0.94 Brand Name FrogTape 2 pack Package may vary Dimensions 6 x 6 x 2.1 inches, Weight 10.6 ounces, Manufacturer Shurtech, model number Rank Industrial & Scientific 31384, Masking Tape 416, Adhesive Tapes 974, Hardware 15936, Available July 31,"}, {"role": "assistant", "content": "Price is $19.98"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSaddlemen Highwayman Slant Saddlebags (Classic/Large/X-Large)\nLarge turn signal cutouts in yoke prevent need for turn signal relocation on some models. Medium 13 L x 6 W x 9.5 H. Large 15.5 L x 6 W x 9.5 H. Jumbo 18 L x 6 W x 12 H. Throw-over design saddlebags made of Saddlehyde; available in three sizes and two different styles to fit every bike and storage need. Stylish dual-strap design with heavy-duty chrome-plated buckles. Box-style lid keeps contents secure. Adjustable yoke construction for a perfect fit on most popular cruisers. Rigid plastic backs and reinforced inner panels help maintain bag shape even when"}, {"role": "assistant", "content": "Price is $143.92"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGE Dryer Timer\nProduct Description This is a Genuine Replacement Part, The Model Number and Name for The Following Item General Electric Dryer Timer. From the Manufacturer This is a Genuine Replacement Part,The Model Number and Name for The Following Item General Electric Dryer Timer General Electric This Is A Genuine Replacement Part Clothes-Dryer-Replacement-Parts From The Brand Name Ge Brand Name GE, Model Info Weight 8.8 Ounces, Dimensions 4.8 x 3.3 x 3.2 inches, Country of Origin China, model number Is Discontinued No, Capacity 1 Kilograms, Part Rank Tools & Home Improvement Dryer Replacement Parts 15481, Domestic Shipping can be shipped within U.S., International Shipping This item can be shipped to select countries outside of the"}, {"role": "assistant", "content": "Price is $131.88"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nNaruto Approaching Wind Booster Box (Bandai)\nNaruto Approaching Wind TCG Booster Box 24 Packs - 10 Cards Per Pack Approaching Wind is the eleventh release for the Naruto CCG and introduces over 100 new cards. This is the first set to include characters from Naruto \u201cShippuden\u201d TV episodes. The Naruto storyline skips two and half years into the future and all of the favorite Naruto characters have matured and grown more powerful. The Naruto CCG will begin to grow with these characters as we begin to introduce new versions of the most popular characters and their brand new Jutsus. New characters that will play a key role in the Naruto storyline will also make their first appearances. \u201cApproaching Wind\u201d will open a new chapter for the Naruto C"}, {"role": "assistant", "content": "Price is $349.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nEBC Brake Kit\nStill privately owned, EBC Brakes is a world leader in the manufacture of brake components. In the early 80\u2019s in Europe, EBC commenced developing the world\u2019s first aftermarket range of products. After a successful launch in Europe, EBC expanded into the USA market in the It produces 100% of its brake pad range in its own factories Package Dimensions 43.688 H x 41.148 L x 41.275 W (centimeters) Package Weight 23.156 kilograms Country of Origin Wales Manufacturer EBC Brakes, Brand EBC Brakes, Model Weight 40 pounds, model number Is Discontinued No, Exterior Machined, Manufacturer Part Rank Automotive Automotive Replacement Brake Kits 16235, Available May 26, 2015"}, {"role": "assistant", "content": "Price is $188.92"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\n20 Regal Entertainment Group Premiere Movie Tickets (SAVE $50!)\nBuying Regal Premiere movies tickets are a great way to enjoy all movies at a great discount. Regal Premiere movie tickets are valid 365 day a year. There are never any blackout dates. They never expire. Use them at your own pace. Premiere movie tickets are unrestricted. Valid for all movies and showtimes. Surcharge fees apply to all IMAX, RPX, Large Format or 3-D Films. Present at box office only. Not valid for online redemption. Redeem each movie ticket for one Regal theatre ticket of your choice with NO EXPIRATION! Dimensions 9.5 x 4 x 0.2 inches, Weight 0.81 ounces, Manufacturer Regal, Rank Office Products Ticket Rolls"}, {"role": "assistant", "content": "Price is $263.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nWheel Accessories Parts Wheel Spacer, 4 PC Set, 5 x 127mm (5 x 5.00) Hub Centric, 1.75 in Thickness, Fits Jeep Grand Cherokee, Wrangler, Dodge Durango\nOur spacers are manufactured with same thread, hub bore and lug hex as your vehicle\u2019s original equipment. Allowing use of your vehicle\u2019s original lugs and lug wrench. Precision machined Aircraft Grade Aluminum spacers / adapters with heat treated, hardened Grade studs and matching black, dual coated lugs. Anodized surface provides corrosion resistance. Widen your vehicle base and give your vehicle a more aggressive stance while improving handling and stability. Increase tire clearance, fix brake caliper clearance issues, allow installation of lift/lowering kits and wider, larger, or"}, {"role": "assistant", "content": "Price is $144.49"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAngelbaby Reborn Silicone Full Body Realistic Dolls Cute Look Real Baby Girl Waterproof Reborns Handmade Vinyl Toys with Clothes (Red)\nOur cute and lifelike reborn dolls are made of high quality materials (silicone vinyl,clothes, hair,Acrylic eyes, and other accessories materials ), safe, non-toxic for your kids, pure environment-friendly materials with gentle and comfortable touch. The doll will come with outfits and a magnetic pacifier.It will be a friend of your baby and the one of your family.Wish you love this Please feel free to play with her/him. Conforms to the safety requirements of ASTM F963 and EN71 for ages 3+ Baby doll gender girl, has gender feature; Application Wedding gifts, Birthday gifts, festival gifts, children"}, {"role": "assistant", "content": "Price is $55.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMetabo HPT Coil Siding Nailer, Siding Nails 1-1/2 inch To 2-1/2 inch, Side load, Tilt Bottom Magazine\nThe Hitachi 2-1/2 coil siding nailer brings power, precision and convenience to the jobsite and weighs only 4. 8 lbs. Capable of driving nails as large as 2-1/2 x. 099 at a rate of 3 per second, the is both powerful and efficient. With newly added features like a selective actuation switch, side load, tilt bottom magazine and repositioned depth of drive adjustment dial, the professional preferred siding nailer gets even better. Additional features such as the adjustable air deflector, no mar nose cap and plastic shield add"}, {"role": "assistant", "content": "Price is $445.31"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nARP Stud Kit\nARP Chevy/GMC 6.2 Diesel Head Stud Kit GMC 6.2L Diesel Head Stud Kit Package Dimensions 5.842 H x 18.541 L x 27.94 W (centimeters) Package Weight 10.55 pounds Oem equivalent part number Manufacturer ARP, Brand ARP, Model ARP Weight 2 pounds, Dimensions 8 x 3 x 11 inches, Country of Origin USA, model number Exterior Painted, Manufacturer Part OEM Part Rank Automotive Automotive Performance Engine Main Bolts & Studs 144, Automotive Replacement Engine Main Bolts & Studs 305, Domestic Shipping can be shipped within U.S., International Shipping This item can be shipped to select countries outside of the U.S. Learn More, Available June 20, "}, {"role": "assistant", "content": "Price is $212.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nArmorSuit MilitaryShield Full Body Skin Film + Screen Protector for Toshiba Thrive Tablet - Anti-Bubble HD Clear Film\nMilitary Grade Protection ArmorSuit\u00c2 MilitaryShield\u00c2 features exceptional clarity and UV protection to prevent yellowing. It is made from the same protective film material used to protect military aircrafts, helicopters and space shuttles. MilitaryShield\u00c2 is also equipped with self-healing properties to maximize the protection. The self-healing technology works to constantly eliminate minor scratches on the film. All of our MilitaryShield\u00c2 are uniquely designed to provide a perfect cut for a perfect fit on your device. It also features corrosion and moisture protection to prevent substances from migrating through the film to attack underlying substrates. It's designed to provide daily protection against scratches and reduce chances of damage to your screen from impact"}, {"role": "assistant", "content": "Price is $18.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nOLP Badge Strap Clips Qty 500 Clear Flexible Vinyl 2-3/4 Long & Metal Clip Snap\n500 per pack 2-3/4 long clear vinyl badge strap clips with 2-Hole nickel-plated steel clip that swivels as needed. Clear vinyl is flexible to avoid breaking. Has metal snap closure feature. Fits standard 1/2 slot punched hole. Clip will not tear clothing. 2-Hole nickel-plated steel clip swivels as needed Clear vinyl is flexible to avoid breaking Has metal snap closure feature Fits standard 1/2 slot punched hole Clip will not tear clothing Manufacturer Oregon Laminations Company, Brand Oregon Lamination Premium, Weight 4.74 pounds, Dimensions 3.25 x 0.5 x 0.25"}, {"role": "assistant", "content": "Price is $74.60"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nAir Compressor 115v 23 Psi\n115 volt 60 cyl. Newest addition, weighs only 6 lbs. Don't let the size fool you, it will do the job and keep going and going. Attached suction cup feet eliminate compressor movement. Compressor includes a durable 6 foot cord with three prong end. The high quality faux leather case is the perfect stylish compliment to your Sony Reader The slim and sleek design securely fastens the ebook reader in place and has a magnetic flap. The car charger and wall charger are a must have accessory. The USB data cable comes in handy when sync'ing data or charging your ebook. The combo pack comes with a black leather case, USB cable, car charger, wall charger, & Hand Strap Brand Halloween FX, Voltage 115 Vol"}, {"role": "assistant", "content": "Price is $186.63"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nGrant 760 GT Rally Steering Wheel\nPatterned after the most popular rally car look. This is a solid 3 spoke design. Molded cushion grip contoured to the shape of the hand with a leather grained finish. This is a great affordable solution for a wheel with incredible styling. A Grant Installation Kit is necessary to mount this wheel to a vehicle. This wheel will work with Grant Standard 3000 or 4000 Series, Billet 5000 Series, or Euro 6000 Series Installation Kits. 13 Diameter wheel with 3 dish Molded cushion grip contoured to the shape of the hand with a leather grained finish Silver anodized aluminum spokes Includes horn button and black Styling Sleeve Grant installation kit required Fit type Vehicle Specific Manufacturer Grant Products, Brand Grant,"}, {"role": "assistant", "content": "Price is $115.48"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDell Latitude E6420 Intel 8GB RAM 240GB SSD Win 10 Pro Webcam (Renewed)\nThis Certified Refurbished product is manufacturer refurbished, shows limited or no wear, and includes all original accessories plus a 90 day warranty. Microsoft is discontinuing offering Windows 7; this product is sold with either Windows 7 or Windows 10. Microsoft license terms for Windows 10 will include the downgrade rights to Windows 7 Intel Core 3MB Cache, Max Turbo Frequency 8GB DDR3, 240G SSD Hard Drive; 14 Inch HD 1366 x 768 Display, DVDROM, Intel HD Graphics 3000, 3 USB 2.0, Wireless; SD Card Reader, Smart Card Reader, HDMI Out, VGA Out,"}, {"role": "assistant", "content": "Price is $498.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDell Optiplex 7010 Business Desktop Computer (Intel Quad Core i5 up to 3.6GHz Processor), 8GB DDR3 RAM, 2TB HDD, USB 3.0, DVD, Windows 10 Professional (Renewed)\nThis pre-owned or refurbished product has been professionally inspected and tested to work and look like new. How a product becomes part of Amazon Renewed, your destination for pre-owned, refurbished products A customer buys a new product and returns it or trades it in for a newer or different model. That product is inspected and tested to work and look like new by Amazon-qualified suppliers. Then, the product is sold as an Amazon Renewed product on Amazon. If not satisfied with the purchase, renewed products are eligible for replacement or refund under"}, {"role": "assistant", "content": "Price is $325.13"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCisco-Linksys Dual-Band Wireless A+B Access Point + Router with 4-Port 10/100 Switch\nAmazon.com From the Manufacturer The Dual-Band Wireless A+B Broadband Router is like four devices in one box! The Router function lets you securely share one high-speed Internet connection among your entire network, while the 4-port full duplex 10/100 Switch jump-starts your wired-Ethernet network. Connect four PCs directly, or daisy-chain out to more hubs and switches to create as big a network as you need. The Dual-Band Wireless A+B Router also contains two Wireless Access Points, which let you connect with wireless-networked PCs using either the popular standard at or the new, almost five times faster, 5GHz, standard. Since both standards"}, {"role": "assistant", "content": "Price is $80.56"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nStreet Scene Mud Flap Kit\nStreet Scene Mud Flap Kits are designed to offer excellent protection against mud, gravel and stones. These kits are made from tough urethane which offers long lasting durability. They are corrosion resistant and ensure easy installation. Easy installation Provides years of great protection Protects from rock and harmful debris No drilling required Easy installation Manufacturer Street Scene, Brand Street Scene, Weight 23.4 pounds, Dimensions 30 x 27 x 7 inches, model number Manufacturer Part OEM Part Domestic Shipping can be shipped within U.S., International Shipping This item can be shipped to select countries outside of the U.S. Learn More, Available April 16, 2008, Material Rock, Fit Type Universal Fit, Installation Type Screw In, Finish Type Powder Coated"}, {"role": "assistant", "content": "Price is $147.28"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCobble Hill 1000 Piece Puzzle - Rest Stop - Sample Poster Included\nMADE IN THE US Cobble Hill Puzzles are proudly manufactured in North America. RANDOM-CUT PIECES This piece design means each puzzle piece looks different - A fun challenge. HIGH-QUALITY The glare-reducing linen paper and crisp image make it a perfect piece to frame. EARTH FRIENDLY All cardboard is made from 100% recycled material. Our ink is also vegetable based. NO INSTRUCTIONS REQUIRED Simply use the box cover or convenient linen print poster included. Finished size is 26.625 x 19.25\u201d Brand Cobble Hill Puzzle Company Ltd., Puzzle type Jigsaw, Manufacturer Minimum Age (MONTHS) Pieces 1000, Theme Cabin & Camping"}, {"role": "assistant", "content": "Price is $28.65"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nWristCo Premium Neon Green Age Verified Plastic Secure Snap Wristbands - 500 Count 5/8 x 10 - Adjustable Size Bracelets for Events, Waterproof, Durable, Tearproof, Wrist Bands used at Waterparks Concerts Festivals Conferences for Security Admission\nSECURE SNAP CLOSURE PLASTIC WRISTBANDS - Wristco plastic wristbands measures 5/8\u201d inches wide by 10 inches long and will fit any sized wrist. The wrist bands are produced in quantities of 20 bands per sheet and separate easily from the sheet with a gentle pull. Choose from a variety of vibrant colors for easy visual identification for security access, crowd control, and gate entry. BEST FOR OUTDOOR USE - Wristco plastic wristbands are"}, {"role": "assistant", "content": "Price is $39.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nRockville DOLBY BAR Home Theater Soundbar w/ Wireless Sub/Bluetooth/HDMI/Optical\nRockville DOLBY BAR 40 500 Watt Soundbar with Wireless Subwoofer/Bluetooth/HDMI/Optical and Dolby digital. 500 Watts peak power / 200 Watts RMS power (continuous). Dolby Digital plus, Dolby Digital, and Dolby surround gives you an amazing theater experience when watching movies. Pairs with Included Wireless Subwoofer Seamlessly. Built in Bluetooth wireless audio streaming with long range and distortion free playback. USB input plays back music stored on a thumb drive (up to 32 Gb). Controls Volume, Bass, Treble, DSP mode. Includes a wall bracket and mounting hardware. Recessed input panel makes it easy to"}, {"role": "assistant", "content": "Price is $189.95"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nWOOS 722.6 VALVE BODY 6 SOLENOIDS CAST Compatible with Mercedes Benz\nCondition Remanufactured OEM 722.6 Compatbile with Dodge Sprinter 2500??? Sprinter 3500??? Freightliner Sprinter 2500??? Freightliner Sprinter 3500??? Compatbile with Mercedes-Benz w/ A/T C230 C240 C280 C32 AMG C320 C350 C36 AMG C43 AMG C55 AMG CL500 CL55 AMG CL600 CL65 AMG CLK320 CLK430 CLK500 CLK55 AMG E300 E320 E350 E420 1997 E430 E500 E55 AMG G500 G55 AMG ML320 ML"}, {"role": "assistant", "content": "Price is $264.82"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nDetroit Axle - Front Struts w/Coil Springs + Sway Bars Replacement for Toyota Sienna (AWD/FWD 8 Passenger Models) - 4pc Set\nKit Includes 1x Front Driver Side Strut w/ Coil Spring Assembly \u2013 172366 1x Front Driver Side Strut w/ Coil Spring Assembly \u2013 172366 1x Front Strut w/ Coil Spring Assembly \u2013 172365 1x Front Strut w/ Coil Spring Assembly \u2013 172365 2x Front Sway Bar End Link - K80249 2x Front Sway Bar End Link - K80249 Fitment Replacement for Toyota Sienna (AWD/FWD 8 Passenger Models) Replacement for Toyota Sienna (AWD/FWD 8 Passenger Models"}, {"role": "assistant", "content": "Price is $234.79"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nKICKER 8 600w Marine Loaded Subwoofer Enclosure+Passive Radiator TB8\nWhat's in the Box 1 x enclosure assembly. 2 x vertical mounting brackets. 1 x horizontal mount base. 1 x Horizontal mount hook (retainer). 1 x Horizontal mount plate (retaining clamp) What's in the Box 1 x enclosure assembly. 2 x vertical mounting brackets. 1 x horizontal mount base. 1 x Horizontal mount hook (retainer). 1 x Horizontal mount plate (retaining clamp) 7 x stainless steel M5 x 12mm button head screws for horizontal mount plate to base and mounting brackets to enclosure. 11 x nylon flat washers (to be used on all mounting fasteners). 4"}, {"role": "assistant", "content": "Price is $299.99"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nVim Tools Mechanics Master Set, Hex & Torx\nFeatures and benefits 1/2 cut = better fit, higher torque, S2 steel, the strongest hardest drivers, satin chrome sockets and gun metal grey bits, heat treated to 58-62 Rc, 4 new sizes, T70 Torx, 13mm hex, 21mm hex, and 22mm hex, comes in a durable plastic case, approved for hand and power tools, lifetime warranty. Set includes T8 - T70 - standard Torx, TR10 - TR55 - tamper proof Torx, E4 - E20 - Torx sockets, 1/8 and rdquo, - 3/4 and rdquo, SAE hex, 2.5mm - 22"}, {"role": "assistant", "content": "Price is $202.19"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCallahan FRONT 288mm + REAR 278mm Premium OE 5 Lug 4 Brake Rotors + Ceramic Pads + Sensors + Clips fit Mercedes C230 240\nGUARANTEED ONLY to fit vehicles in the Product Description - see below. Original design ensures proper fit and confident braking. CERAMIC BRAKE PADS are quieter and last longer than metallic. Unique formula provides reduced noise fade and dust. STAINLES STEEL QUALITY HARDWARE IS INCLUDED. Parts are ready to install out of the box. NO CLEANING REQUIRED. INCREASED STOPPING POWER due to improved heat dissipation. Guaranteed to perform better or equal to OE parts. ENHANCED STOPPING POWER provides quiet confident braking. 12 months warranty on all parts in the kit. FULL"}, {"role": "assistant", "content": "Price is $163.27"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nSigma 30mm F1.4 Contemporary DC DN Lens for Micro 4/3\nThe 30mm F1.4 DC DN Contemporary is the first high performance, economical F1.4 lens for micro four thirds and Sony-e Mount mirrorless systems With nine rounded aperture blades, a stepping ring motor, and compact design perfect paring of high performance and pricing APS-C format Dimensions 2.55 x 2.55 x 2.89 inches, Weight 9.3 ounces, model number Rank SLR Camera Lenses 543, Is Discontinued No, Available February 23, 2016, Manufacturer Sigma Corporation of America, Brand Sigma, Lens Type Wide Angle, Compatible Mountings Olympus/Panasonic Micro 4/3, Camera Lens Description 30 mm"}, {"role": "assistant", "content": "Price is $264.00"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nMag Genius Educational & Colorful Magnetic Building Building Block Set \u2013 Standard Building Kit\nIDEA BOOKLET of examples showing 3D building and characteristics for the starter kit. Shows how to use add-ons and accessories that are sold separately. DURABILITY waterproof & sunproof (no fading/outdoor friendly) material that can handle hard impacts. MATH BASIS 60 translucent, durable, and colorful shapes including (2 standard square bases 3x3, 2 large bases 6x6, 10 acute triangles, 8 right angled Ninety-Degree triangles, 10 regular triangles) Does not come with play people. ENGINEERING build widespread cities and neighborhoods allowing the cars/trains to veer around sharp corners to get away from bad guys or to transport your towns citizens. Anything is"}, {"role": "assistant", "content": "Price is $29.90"}]}
|
||||
{"messages": [{"role": "system", "content": "You estimate prices of items. Reply only with the price, no explanation"}, {"role": "user", "content": "How much does this cost?\n\nCenterforce Dual Friction Clutch Pressure Plate and Disc\nThe Centerforce Dual-Friction pressure plates feature our patented processes to provide a performance clutch that offers exceptional street characteristics, while offering outstanding holding-capacity and durability. The Dual-Friction disc has a full facing on the pressure plate side for drivability and longevity, while a carbon composite puc style facing is used on the flywheel side for a positive engagement and increased holding-capacity. Dual Friction is the ultimate in street or strip holding power and performance without sacrificing pedal effort and driver control. The patented Centerforce Dual-Friction disc system distributes pressure plate clamping force evenly over a friction-facing on one side of the clutch disc, while the opposing side uses a segmented friction-facing to concentrate clamping pressure and maximize clutch holding"}, {"role": "assistant", "content": "Price is $439.95"}]}
|
||||
103
week6/community-contributions/nikhil_raut/items.py
Normal file
103
week6/community-contributions/nikhil_raut/items.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from typing import Optional
|
||||
from transformers import AutoTokenizer
|
||||
import re
|
||||
|
||||
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
||||
|
||||
MIN_TOKENS = 150 # Any less than this, and we don't have enough useful content
|
||||
MAX_TOKENS = 160 # Truncate after this many tokens. Then after adding in prompt text, we will get to around 180 tokens
|
||||
|
||||
MIN_CHARS = 300
|
||||
CEILING_CHARS = MAX_TOKENS * 7
|
||||
|
||||
class Item:
|
||||
"""
|
||||
An Item is a cleaned, curated datapoint of a Product with a Price
|
||||
"""
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
||||
PREFIX = "Price is $"
|
||||
QUESTION = "How much does this cost to the nearest dollar?"
|
||||
REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "]
|
||||
|
||||
title: str
|
||||
price: float
|
||||
category: str
|
||||
token_count: int = 0
|
||||
details: Optional[str]
|
||||
prompt: Optional[str] = None
|
||||
include = False
|
||||
|
||||
def __init__(self, data, price):
|
||||
self.title = data['title']
|
||||
self.price = price
|
||||
self.parse(data)
|
||||
|
||||
def scrub_details(self):
|
||||
"""
|
||||
Clean up the details string by removing common text that doesn't add value
|
||||
"""
|
||||
details = self.details
|
||||
for remove in self.REMOVALS:
|
||||
details = details.replace(remove, "")
|
||||
return details
|
||||
|
||||
def scrub(self, stuff):
|
||||
"""
|
||||
Clean up the provided text by removing unnecessary characters and whitespace
|
||||
Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers
|
||||
"""
|
||||
stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip()
|
||||
stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",")
|
||||
words = stuff.split(' ')
|
||||
select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]
|
||||
return " ".join(select)
|
||||
|
||||
def parse(self, data):
|
||||
"""
|
||||
Parse this datapoint and if it fits within the allowed Token range,
|
||||
then set include to True
|
||||
"""
|
||||
contents = '\n'.join(data['description'])
|
||||
if contents:
|
||||
contents += '\n'
|
||||
features = '\n'.join(data['features'])
|
||||
if features:
|
||||
contents += features + '\n'
|
||||
self.details = data['details']
|
||||
if self.details:
|
||||
contents += self.scrub_details() + '\n'
|
||||
if len(contents) > MIN_CHARS:
|
||||
contents = contents[:CEILING_CHARS]
|
||||
text = f"{self.scrub(self.title)}\n{self.scrub(contents)}"
|
||||
tokens = self.tokenizer.encode(text, add_special_tokens=False)
|
||||
if len(tokens) > MIN_TOKENS:
|
||||
tokens = tokens[:MAX_TOKENS]
|
||||
text = self.tokenizer.decode(tokens)
|
||||
self.make_prompt(text)
|
||||
self.include = True
|
||||
|
||||
def make_prompt(self, text):
|
||||
"""
|
||||
Set the prompt instance variable to be a prompt appropriate for training
|
||||
"""
|
||||
self.prompt = f"{self.QUESTION}\n\n{text}\n\n"
|
||||
self.prompt += f"{self.PREFIX}{str(round(self.price))}.00"
|
||||
self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))
|
||||
|
||||
def test_prompt(self):
|
||||
"""
|
||||
Return a prompt suitable for testing, with the actual price removed
|
||||
"""
|
||||
return self.prompt.split(self.PREFIX)[0] + self.PREFIX
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
Return a String version of this Item
|
||||
"""
|
||||
return f"<{self.title} = ${self.price}>"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
75
week6/community-contributions/nikhil_raut/testing.py
Normal file
75
week6/community-contributions/nikhil_raut/testing.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
GREEN = "\033[92m"
|
||||
YELLOW = "\033[93m"
|
||||
RED = "\033[91m"
|
||||
RESET = "\033[0m"
|
||||
COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
|
||||
|
||||
class Tester:
|
||||
|
||||
def __init__(self, predictor, data, title=None, size=250):
|
||||
self.predictor = predictor
|
||||
self.data = data
|
||||
self.title = title or predictor.__name__.replace("_", " ").title()
|
||||
self.size = size
|
||||
self.guesses = []
|
||||
self.truths = []
|
||||
self.errors = []
|
||||
self.sles = []
|
||||
self.colors = []
|
||||
|
||||
def color_for(self, error, truth):
|
||||
if error<40 or error/truth < 0.2:
|
||||
return "green"
|
||||
elif error<80 or error/truth < 0.4:
|
||||
return "orange"
|
||||
else:
|
||||
return "red"
|
||||
|
||||
def run_datapoint(self, i):
|
||||
datapoint = self.data[i]
|
||||
guess = self.predictor(datapoint)
|
||||
truth = datapoint.price
|
||||
error = abs(guess - truth)
|
||||
log_error = math.log(truth+1) - math.log(guess+1)
|
||||
sle = log_error ** 2
|
||||
color = self.color_for(error, truth)
|
||||
title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
|
||||
self.guesses.append(guess)
|
||||
self.truths.append(truth)
|
||||
self.errors.append(error)
|
||||
self.sles.append(sle)
|
||||
self.colors.append(color)
|
||||
print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
|
||||
|
||||
def chart(self, title):
|
||||
max_error = max(self.errors)
|
||||
plt.figure(figsize=(12, 8))
|
||||
max_val = max(max(self.truths), max(self.guesses))
|
||||
plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
|
||||
plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
|
||||
plt.xlabel('Ground Truth')
|
||||
plt.ylabel('Model Estimate')
|
||||
plt.xlim(0, max_val)
|
||||
plt.ylim(0, max_val)
|
||||
plt.title(title)
|
||||
plt.show()
|
||||
|
||||
def report(self):
|
||||
average_error = sum(self.errors) / self.size
|
||||
rmsle = math.sqrt(sum(self.sles) / self.size)
|
||||
hits = sum(1 for color in self.colors if color=="green")
|
||||
title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
|
||||
self.chart(title)
|
||||
|
||||
def run(self):
|
||||
self.error = 0
|
||||
for i in range(self.size):
|
||||
self.run_datapoint(i)
|
||||
self.report()
|
||||
|
||||
@classmethod
|
||||
def test(cls, function, data):
|
||||
cls(function, data).run()
|
||||
1174
week6/community-contributions/nikhil_raut/week6_challenge.ipynb
Normal file
1174
week6/community-contributions/nikhil_raut/week6_challenge.ipynb
Normal file
File diff suppressed because one or more lines are too long
621
week6/community-contributions/week_6_exercise_revised.py
Normal file
621
week6/community-contributions/week_6_exercise_revised.py
Normal file
@@ -0,0 +1,621 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Week_6_exercise_revised.ipynb
|
||||
|
||||
Automatically generated by Colab.
|
||||
|
||||
Original file is located at
|
||||
https://colab.research.google.com/drive/1GaV053HB8l-Wd3J3o9BcOAjC009Qk_W0
|
||||
"""
|
||||
|
||||
#installations
|
||||
!pip install --upgrade pip
|
||||
!pip install datasets==3.0.1 anthropic transformers accelerate pandas tqdm numpy
|
||||
|
||||
#imports
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
from typing import Optional, List, Dict, Any, Tuple
|
||||
from sklearn.model_selection import train_test_split
|
||||
import anthropic
|
||||
from datasets import load_dataset
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
import seaborn as sns
|
||||
|
||||
#TEMPORARY: Hard-coded keys
|
||||
|
||||
#I hid my keys, you can replace your keys with 'sk' and 'hf'
|
||||
os.environ["ANTHROPIC_API_KEY"] = "sk"
|
||||
os.environ["HF_TOKEN"] = "hf"
|
||||
|
||||
|
||||
# Anthropic Client
|
||||
try:
|
||||
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
||||
print("Anthropic client initialized")
|
||||
except Exception as e:
|
||||
raise ImportError("Please install anthropic: !pip install anthropic") from e
|
||||
|
||||
#some Basic configrations used throughtout the notebook
|
||||
RANDOM_SEED = 42
|
||||
# medium test size
|
||||
TEST_SIZE = 50
|
||||
CLAUDE_MODEL = "claude-opus-4-20250514"
|
||||
MAX_TOKENS = 300
|
||||
|
||||
random.seed(RANDOM_SEED)
|
||||
np.random.seed(RANDOM_SEED)
|
||||
|
||||
# Load my dataset, the Aplliances in my case
|
||||
dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_meta_Appliances", split="full")
|
||||
#using Pandas to create a dataframe
|
||||
df = dataset.to_pandas()
|
||||
#see the data
|
||||
df.head()
|
||||
|
||||
# Let clean the Price column and have it as a Price-clean
|
||||
df["price_clean"] = pd.to_numeric(df["price"], errors="coerce")
|
||||
|
||||
#check the number of rows In the ddata
|
||||
print("Dataset size:", len(df))
|
||||
|
||||
#check The featues in the data
|
||||
print(df.columns.tolist())
|
||||
|
||||
#checking some info
|
||||
print(df.info())
|
||||
|
||||
print("Price-related columns found:", [c for c in df.columns if "price" in c])
|
||||
|
||||
print("Missing price_clean:", df["price_clean"].isna().sum(), "rows")
|
||||
|
||||
# Price distribution visualization (Zoomed histogram)
|
||||
plt.figure(figsize=(10,5))
|
||||
df[df["price_clean"] < 200]["price_clean"].hist(bins=50)
|
||||
plt.title("Price Distribution")
|
||||
plt.xlabel("Price ($)")
|
||||
plt.ylabel("Frequency")
|
||||
plt.show()
|
||||
|
||||
# Keep only rows where price_clean is not null
|
||||
df_model = df.dropna(subset=["price_clean"]).copy()
|
||||
|
||||
# come up with a ptompt text combined
|
||||
def combine_text(row):
|
||||
title = row["title"] or ""
|
||||
features = " ".join(row["features"]) if isinstance(row["features"], list) else ""
|
||||
description = " ".join(row["description"]) if isinstance(row["description"], list) else ""
|
||||
return f"{title}\n\nFEATURES: {features}\n\nDESCRIPTION: {description}"
|
||||
|
||||
df_model["text"] = df_model.apply(combine_text, axis=1)
|
||||
|
||||
# Retain what's needed
|
||||
df_model = df_model[["text", "price_clean"]].reset_index(drop=True)
|
||||
|
||||
# check the model dataset size
|
||||
print(len(df_model))
|
||||
df_model.head(5)
|
||||
|
||||
# Splitting the data into Training and test
|
||||
train_df, test_df = train_test_split(
|
||||
df_model,
|
||||
test_size=0.10, # 10% test split
|
||||
random_state=RANDOM_SEED
|
||||
)
|
||||
|
||||
#Training
|
||||
len(train_df)
|
||||
|
||||
#Testing
|
||||
len(test_df)
|
||||
|
||||
# make the test a list for better samplng
|
||||
test_records = test_df.to_dict(orient="records")
|
||||
|
||||
# Pricing system Prompt
|
||||
|
||||
def build_prompt(item_text: str) -> str:
|
||||
return f"""
|
||||
You are a pricing analyst. Given a marketplace product listing, estimate the item's correct fair market price in KES.
|
||||
|
||||
Return ONLY a number, no currency sign, no explanation.
|
||||
|
||||
Product details:
|
||||
\"\"\"
|
||||
{item_text}
|
||||
\"\"\"
|
||||
"""
|
||||
|
||||
def estimate_price_claude(item_text: str) -> Optional[float]:
|
||||
try:
|
||||
prompt = build_prompt(item_text)
|
||||
|
||||
response = client.messages.create(
|
||||
model=CLAUDE_MODEL,
|
||||
max_tokens=MAX_TOKENS,
|
||||
messages=[
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
)
|
||||
|
||||
raw_output = response.content[0].text.strip()
|
||||
|
||||
# Extract first valid number from model response
|
||||
match = re.search(r"\d+(\.\d+)?", raw_output.replace(",", ""))
|
||||
return float(match.group(0)) if match else None
|
||||
|
||||
except Exception as e:
|
||||
print("Error:", e)
|
||||
return None
|
||||
|
||||
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
||||
|
||||
# Filter and Sample 100 usable Rows
|
||||
df_usable = df[df["price_clean"].notna()].copy()
|
||||
sample_df = df_usable.sample(100, random_state=42).reset_index(drop=True)
|
||||
|
||||
#empty predriction list for them to be stored
|
||||
predictions = []
|
||||
|
||||
#Getting the prices
|
||||
def extract_price(text):
|
||||
"""Extract the first valid float from Claude's reply."""
|
||||
match = re.search(r"\d+(\.\d+)?", text.replace(",", ""))
|
||||
return float(match.group(0)) if match else None
|
||||
|
||||
# Getting the predictions
|
||||
for i, row in tqdm(sample_df.iterrows(), total=len(sample_df)):
|
||||
title = row["title"]
|
||||
desc = " ".join(row["description"]) if isinstance(row["description"], list) else str(row["description"])
|
||||
feat = " ".join(row["features"]) if isinstance(row["features"], list) else str(row["features"])
|
||||
cats = " ".join(row["categories"]) if isinstance(row["categories"], list) else str(row["categories"])
|
||||
|
||||
prompt = f"""
|
||||
You are estimating the USD retail price of an appliance part.
|
||||
|
||||
Analyze the information and respond with **only a single number** (no currency symbol, no text, no explanation).
|
||||
|
||||
TITLE: {title}
|
||||
DESCRIPTION: {desc}
|
||||
FEATURES: {feat}
|
||||
CATEGORIES: {cats}
|
||||
|
||||
Your response must be only a number like: 29.99
|
||||
"""
|
||||
|
||||
response = client.messages.create(
|
||||
model=CLAUDE_MODEL,
|
||||
max_tokens=50,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
|
||||
raw = response.content[0].text.strip()
|
||||
pred_price = extract_price(raw)
|
||||
|
||||
predictions.append({
|
||||
"title": title,
|
||||
"true_price": row["price_clean"],
|
||||
"claude_price": pred_price,
|
||||
"raw_reply": raw
|
||||
})
|
||||
|
||||
# Saving output in a csv nw
|
||||
result_df = pd.DataFrame(predictions)
|
||||
result_df.to_csv("claude_price_predictions_100.csv", index=False)
|
||||
|
||||
# Show preview
|
||||
display(result_df.head())
|
||||
|
||||
# Error metrics
|
||||
valid = result_df[result_df["claude_price"].notna()]
|
||||
mae = np.mean(np.abs(valid["true_price"] - valid["claude_price"]))
|
||||
rmse = np.sqrt(np.mean((valid["true_price"] - valid["claude_price"])**2))
|
||||
pct_within_20 = np.mean(np.abs(valid["true_price"] - valid["claude_price"]) <= 20) * 100
|
||||
|
||||
print(f"\nValid predictions: {len(valid)}/{len(result_df)}")
|
||||
print(f"MAE: {mae:.2f}")
|
||||
print(f"RMSE: {rmse:.2f}")
|
||||
print(f"% within $20: {pct_within_20:.1f}%")
|
||||
|
||||
"""The model returned a price every single time:
|
||||
|
||||
|
||||
|
||||
1. -->MAE = 22.52 On average Claude is off by 22.52 from the true price
|
||||
2. -->RMSE = 44.11 Big errors exist on some items — a sign of occasional wild guesses
|
||||
2. -->RMSE = 44.11 Big errors exist on some items — a sign of occasional wild guesses
|
||||
2. -->72% within $20 Claude predicts reasonable accuracy on most products, but 28% are far off.
|
||||
|
||||
;
|
||||
|
||||
1. Strengths- Model is somehow decent with zero/low fine-tuning. It understood the task, 72% within $20 on a dataset it’s never seen is a good baseline
|
||||
1. Weaknesses- Too many rounded “classic” retail numbers (24.99, 89.99, 14.99, 29.99). Seems not to deeply use features, category, or rating. Also the RMSE is high → meaning a few really bad errors are dragging performance
|
||||
|
||||
Improvements
|
||||
|
||||
1. Prompt enhancements
|
||||
2. Multi-shot and also better structuring
|
||||
3. Fine-tuning with local model
|
||||
"""
|
||||
|
||||
#Now we build a persona Prompt
|
||||
def build_pricing_prompt(examples: list, new_title: str) -> str:
|
||||
"""
|
||||
Build a multi-shot prompt for the E-commerce Market Analyst persona.
|
||||
Each example has (title, price).
|
||||
"""
|
||||
few_shots = "\n".join(
|
||||
[f"Product: {t}\nEstimated fair market price: ${p:.2f}" for t, p in examples]
|
||||
)
|
||||
|
||||
system_prompt = (
|
||||
"You are a meticulous Data-Driven Market Analyst who estimates realistic, data-based "
|
||||
"product prices for online marketplaces. You base estimates on comparable items and "
|
||||
"avoid outliers. Return only the price number."
|
||||
)
|
||||
|
||||
user_prompt = (
|
||||
f"{system_prompt}\n\nHere are recent examples:\n{few_shots}\n\n"
|
||||
f"Now estimate a fair market price for this product:\n"
|
||||
f"Product: {new_title}\n\n"
|
||||
"Respond with only a number, no text or symbols."
|
||||
)
|
||||
return user_prompt
|
||||
|
||||
#10-shot predictios
|
||||
subset_10 = df.dropna(subset=["price_clean"]).sample(10, random_state=42).reset_index(drop=True)
|
||||
few_shots_3 = subset_10.sample(3, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||
results_10 = []
|
||||
|
||||
for i, row in tqdm(subset_10.iterrows(), total=len(subset_10)):
|
||||
prompt = build_pricing_prompt(few_shots_3, row["title"])
|
||||
try:
|
||||
resp = client.messages.create(
|
||||
model=CLAUDE_MODEL,
|
||||
max_tokens=MAX_TOKENS,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
reply = resp.content[0].text.strip()
|
||||
pred = float(reply.replace("$", "").strip())
|
||||
except Exception:
|
||||
pred, reply = np.nan, None
|
||||
results_10.append({"title": row["title"], "true_price": row["price_clean"], "pred_price": pred, "raw": reply})
|
||||
|
||||
df10 = pd.DataFrame(results_10).dropna(subset=["pred_price"])
|
||||
|
||||
mae10 = np.mean(np.abs(df10.pred_price - df10.true_price))
|
||||
|
||||
rmse10 = np.sqrt(np.mean((df10.pred_price - df10.true_price)**2))
|
||||
|
||||
pct20_10 = np.mean(np.abs(df10.pred_price - df10.true_price) <= 20) * 100
|
||||
|
||||
print(f"MAE={mae10:.2f}, RMSE={rmse10:.2f}, %within$20={pct20_10:.1f}%")
|
||||
df10.head()
|
||||
|
||||
#30 shot
|
||||
subset_30 = df.dropna(subset=["price_clean"]).sample(30, random_state=42).reset_index(drop=True)
|
||||
few_shots_5 = subset_30.sample(5, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||
results_30 = []
|
||||
|
||||
for i, row in tqdm(subset_30.iterrows(), total=len(subset_30)):
|
||||
prompt = build_pricing_prompt(few_shots_5, row["title"])
|
||||
try:
|
||||
resp = client.messages.create(
|
||||
model=CLAUDE_MODEL,
|
||||
max_tokens=MAX_TOKENS,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
reply = resp.content[0].text.strip()
|
||||
pred = float(reply.replace("$", "").strip())
|
||||
except Exception:
|
||||
pred, reply = np.nan, None
|
||||
results_30.append({"title": row["title"], "true_price": row["price_clean"], "pred_price": pred, "raw": reply})
|
||||
|
||||
df30 = pd.DataFrame(results_30).dropna(subset=["pred_price"])
|
||||
|
||||
mae30 = np.mean(np.abs(df30.pred_price - df30.true_price))
|
||||
|
||||
rmse30 = np.sqrt(np.mean((df30.pred_price - df30.true_price)**2))
|
||||
|
||||
pct20_30 = np.mean(np.abs(df30.pred_price - df30.true_price) <= 20) * 100
|
||||
|
||||
print(f"MAE={mae30:.2f}, RMSE={rmse30:.2f}, %within$20={pct20_30:.1f}%")
|
||||
df30.head()
|
||||
|
||||
#50 Shot s
|
||||
subset_50 = df.dropna(subset=["price_clean"]).sample(50, random_state=42).reset_index(drop=True)
|
||||
few_shots_8 = subset_50.sample(8, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||
results_50 = []
|
||||
|
||||
for i, row in tqdm(subset_50.iterrows(), total=len(subset_50)):
|
||||
prompt = build_pricing_prompt(few_shots_8, row["title"])
|
||||
try:
|
||||
resp = client.messages.create(
|
||||
model=CLAUDE_MODEL,
|
||||
max_tokens=MAX_TOKENS,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
reply = resp.content[0].text.strip()
|
||||
pred = float(reply.replace("$", "").strip())
|
||||
except Exception:
|
||||
pred, reply = np.nan, None
|
||||
results_50.append({"title": row["title"], "true_price": row["price_clean"], "pred_price": pred, "raw": reply})
|
||||
|
||||
df50 = pd.DataFrame(results_50).dropna(subset=["pred_price"])
|
||||
|
||||
mae50 = np.mean(np.abs(df50.pred_price - df50.true_price))
|
||||
|
||||
rmse50 = np.sqrt(np.mean((df50.pred_price - df50.true_price)**2))
|
||||
|
||||
pct20_50 = np.mean(np.abs(df50.pred_price - df50.true_price) <= 20) * 100
|
||||
|
||||
print(f"MAE={mae50:.2f}, RMSE={rmse50:.2f}, %within$20={pct20_50:.1f}%")
|
||||
df50.head()
|
||||
|
||||
#Improved Ptompt and comparin the 10,30, &50 shot hints
|
||||
def build_strict_prompt(few_shots, test_title):
|
||||
shots_text = "\n".join([f"Title: {t}\nPrice: ${p:.2f}" for t, p in few_shots])
|
||||
return f"""
|
||||
You are an expert e-commerce product pricing analyst. Your job is to predict the most realistic market price for a product based purely on its title.
|
||||
|
||||
Here are reference examples:
|
||||
{shots_text}
|
||||
|
||||
Now predict the price for:
|
||||
Title: {test_title}
|
||||
|
||||
RULES:
|
||||
- Return ONLY a single number.
|
||||
- No dollar sign.
|
||||
- No text, no reasoning, no words.
|
||||
- Format: 123.45
|
||||
"""
|
||||
|
||||
def run_eval(name, subset, shot_count):
|
||||
few = subset.sample(shot_count, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||
results = []
|
||||
|
||||
for _, row in tqdm(subset.iterrows(), total=len(subset), desc=f"{name}"):
|
||||
prompt = build_strict_prompt(few, row["title"])
|
||||
try:
|
||||
resp = client.messages.create(
|
||||
model=CLAUDE_MODEL,
|
||||
max_tokens=MAX_TOKENS,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
reply = resp.content[0].text.strip()
|
||||
pred = float(reply)
|
||||
except Exception:
|
||||
pred, reply = np.nan, None
|
||||
|
||||
results.append({"title": row["title"], "true": row["price_clean"], "pred": pred})
|
||||
|
||||
df = pd.DataFrame(results).dropna(subset=["pred"])
|
||||
mae = np.mean(np.abs(df.pred - df.true))
|
||||
rmse = np.sqrt(np.mean((df.pred - df.true)**2))
|
||||
pct20 = np.mean(np.abs(df.pred - df.true) <= 20) * 100
|
||||
return df, mae, rmse, pct20
|
||||
|
||||
# Run 10 / 30 / 50
|
||||
subset10 = df.dropna(subset=["price_clean"]).sample(10, random_state=1).reset_index(drop=True)
|
||||
subset30 = df.dropna(subset=["price_clean"]).sample(30, random_state=2).reset_index(drop=True)
|
||||
subset50 = df.dropna(subset=["price_clean"]).sample(50, random_state=3).reset_index(drop=True)
|
||||
|
||||
df10, mae10, rmse10, pct10 = run_eval("RUN10", subset10, 3)
|
||||
df30, mae30, rmse30, pct30 = run_eval("RUN30", subset30, 6)
|
||||
df50, mae50, rmse50, pct50 = run_eval("RUN50", subset50, 8)
|
||||
|
||||
#compare
|
||||
comparison = pd.DataFrame([
|
||||
{"shots": 10, "MAE": mae10, "RMSE": rmse10, "%≤$20": pct10},
|
||||
{"shots": 30, "MAE": mae30, "RMSE": rmse30, "%≤$20": pct30},
|
||||
{"shots": 50, "MAE": mae50, "RMSE": rmse50, "%≤$20": pct50},
|
||||
])
|
||||
|
||||
print(comparison)
|
||||
comparison
|
||||
|
||||
"""The model becomes confused by too many examples, became more biased toward random values and less less stable and less accurate.
|
||||
Hypothesis: Possibly the dataset has high variance (many unrelated categories), and the model benefits from small, clean, representative few-shots, not large few-shots.
|
||||
"""
|
||||
|
||||
#Rechecking the variance in the data
|
||||
prices = df["price_clean"].dropna()
|
||||
print(prices.describe(percentiles=[0.25, 0.5, 0.75, 0.9, 0.95]))
|
||||
|
||||
print("\nSkewness:", prices.skew())
|
||||
print("Kurtosis:", prices.kurt())
|
||||
|
||||
# Plot histogram
|
||||
plt.figure(figsize=(12,4))
|
||||
sns.histplot(prices, bins=50)
|
||||
plt.title("Histogram — Full Dataset Price Distribution")
|
||||
plt.xlabel("Price ($)")
|
||||
plt.ylabel("Frequency")
|
||||
plt.show()
|
||||
|
||||
# Plot boxplot
|
||||
plt.figure(figsize=(10,2))
|
||||
sns.boxplot(x=prices)
|
||||
plt.title("Boxplot — Full Dataset Price Spread")
|
||||
plt.show()
|
||||
|
||||
"""Testing fewer shots to check fr the optimal"""
|
||||
|
||||
def run_few_shot_test(df_subset, shots, model=CLAUDE_MODEL):
|
||||
few_shots = df_subset.sample(shots, random_state=42)[["title", "price_clean"]].values.tolist()
|
||||
results = []
|
||||
|
||||
for _, row in df_subset.iterrows():
|
||||
prompt = build_pricing_prompt(few_shots, row["title"])
|
||||
try:
|
||||
resp = client.messages.create(
|
||||
model=model,
|
||||
max_tokens=MAX_TOKENS,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
reply = resp.content[0].text.strip()
|
||||
pred = float(reply.replace("$", "").strip())
|
||||
except:
|
||||
pred, reply = np.nan, None
|
||||
|
||||
results.append({"title": row["title"], "true": row["price_clean"], "pred": pred})
|
||||
|
||||
df_res = pd.DataFrame(results).dropna()
|
||||
mae = np.mean(np.abs(df_res.pred - df_res.true))
|
||||
rmse = np.sqrt(np.mean((df_res.pred - df_res.true)**2))
|
||||
pct20 = np.mean(np.abs(df_res.pred - df_res.true) <= 20) * 100
|
||||
return df_res, mae, rmse, pct20
|
||||
|
||||
#Tabulate the 2 shot results
|
||||
df2, mae2, rmse2, pct2 = run_few_shot_test(subset_50, shots=2)
|
||||
print("2-SHOT RESULTS → MAE={:.2f}, RMSE={:.2f}, %≤$20={:.1f}%".format(mae2, rmse2, pct2))
|
||||
df2.head()
|
||||
|
||||
#5 shot results
|
||||
df5, mae5, rmse5, pct5 = run_few_shot_test(subset_50, shots=5)
|
||||
print("5-SHOT RESULTS → MAE={:.2f}, RMSE={:.2f}, %≤$20={:.1f}%".format(mae5, rmse5, pct5))
|
||||
df5.head()
|
||||
|
||||
#7 shot results
|
||||
df7, mae7, rmse7, pct7 = run_few_shot_test(subset_50, shots=7)
|
||||
print("7-SHOT RESULTS → MAE={:.2f}, RMSE={:.2f}, %≤$20={:.1f}%".format(mae7, rmse7, pct7))
|
||||
df7.head()
|
||||
|
||||
#Tabulate all the shots to choose the optimal or if there is Any need for the shots
|
||||
|
||||
results_summary = [
|
||||
{"shots": 0, "MAE": 22.52, "RMSE": 44.11, "%≤$20": 72.0}, # baseline
|
||||
{"shots": 2, "MAE": mae2, "RMSE": rmse2, "%≤$20": pct2},
|
||||
{"shots": 5, "MAE": mae5, "RMSE": rmse5, "%≤$20": pct5},
|
||||
{"shots": 7, "MAE": mae7, "RMSE": rmse7, "%≤$20": pct7},
|
||||
{"shots": 10, "MAE": 16.27, "RMSE": 38.59, "%≤$20": 90.0},
|
||||
{"shots": 30, "MAE": 135.73, "RMSE": 606.78, "%≤$20": 70.0},
|
||||
{"shots": 50, "MAE": 42.54, "RMSE": 136.61, "%≤$20": 72.0},
|
||||
]
|
||||
|
||||
df_comparison = pd.DataFrame(results_summary)
|
||||
df_comparison = df_comparison.sort_values("shots").reset_index(drop=True)
|
||||
df_comparison
|
||||
|
||||
"""1. 0-shot baseline: MAE 22.52, %≤$20 72%
|
||||
|
||||
2. Very low few-shots (2, 5): Surprisingly worse than baseline (MAE ↑, %≤$20 ↓), likely due to variance and poor example selection.
|
||||
|
||||
3. 7-shot: Improves over baseline slightly, MAE 19.91, %≤$20 back to 72%
|
||||
|
||||
4. 10-shot: Best performance overall — MAE 16.27, %≤$20 jumps to 90%! Clearly the few-shot hints are helping here.
|
||||
|
||||
5. 30-shot: Performance collapses (MAE 135.73, RMSE 606.78) — too many examples may confuse the model.
|
||||
|
||||
6. 50-shot: Slightly better than 30-shot but still worse than 10-shot.
|
||||
|
||||
|
||||
Conclusion: Optimal few-shot count is 10 for this dataset and prompt style.
|
||||
"""
|
||||
|
||||
#Further finetuning of the selected 10-shot
|
||||
|
||||
def build_finetune_prompt(few_shots: list, target_title: str, max_chars=800):
|
||||
"""
|
||||
few_shots: list of dicts {"title":..., "price_clean":...}
|
||||
target_title: title string
|
||||
"""
|
||||
parts = ["You are an e-commerce pricing expert. Estimate product prices in USD accurately. Output only a number."]
|
||||
parts.append("\nExamples:")
|
||||
for ex in few_shots:
|
||||
parts.append(f"- {ex['title']}: {ex['price_clean']}")
|
||||
parts.append("\nPredict price for the following product:")
|
||||
parts.append(f"Title: {target_title}")
|
||||
prompt = "\n".join(parts)
|
||||
if len(prompt) > max_chars:
|
||||
return prompt[:max_chars] + "..."
|
||||
return prompt
|
||||
|
||||
# Sample 10-shot prompts for fine-tuning
|
||||
finetune_examples = []
|
||||
subset_10 = df.dropna(subset=["price_clean"]).sample(100, random_state=42).reset_index(drop=True) # 100 products for initial fine-tuning
|
||||
|
||||
for idx, row in subset_10.iterrows():
|
||||
# Pick 10 random examples from subset for few-shot
|
||||
few_shots = subset_10.drop(idx).sample(10, random_state=idx)[["title","price_clean"]].to_dict(orient="records")
|
||||
prompt = build_finetune_prompt(few_shots, row["title"])
|
||||
finetune_examples.append({
|
||||
"prompt": prompt,
|
||||
"completion": str(row["price_clean"])
|
||||
})
|
||||
|
||||
print("Sample fine-tuning example:")
|
||||
print(finetune_examples[0])
|
||||
|
||||
with open("finetune_10shot.jsonl", "w") as f:
|
||||
for ex in finetune_examples:
|
||||
f.write(json.dumps(ex) + "\n")
|
||||
print("(10-shot format).finetuned")
|
||||
|
||||
# Evaluate enhanced 10-shot prompt on sample
|
||||
results_finetune_test = []
|
||||
|
||||
for idx, row in subset_10.iterrows():
|
||||
few_shots = subset_10.drop(idx).sample(10, random_state=idx)[["title","price_clean"]].to_dict(orient="records")
|
||||
prompt = build_finetune_prompt(few_shots, row["title"])
|
||||
try:
|
||||
resp = client.messages.create(
|
||||
model=CLAUDE_MODEL,
|
||||
max_tokens=MAX_TOKENS,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
reply = resp.content[0].text.strip()
|
||||
pred = float(reply.replace("$","").strip())
|
||||
except Exception:
|
||||
pred, reply = np.nan, None
|
||||
results_finetune_test.append({"title": row["title"], "true_price": row["price_clean"], "pred": pred, "raw": reply})
|
||||
|
||||
df_finetune_test = pd.DataFrame(results_finetune_test).dropna(subset=["pred"])
|
||||
mae_ft = np.mean(np.abs(df_finetune_test.pred - df_finetune_test.true_price))
|
||||
rmse_ft = np.sqrt(np.mean((df_finetune_test.pred - df_finetune_test.true_price)**2))
|
||||
pct20_ft = np.mean(np.abs(df_finetune_test.pred - df_finetune_test.true_price) <= 20) * 100
|
||||
|
||||
print(f"Finetuned 10-shot performance: MAE={mae_ft:.2f}, RMSE={rmse_ft:.2f}, %≤$20={pct20_ft:.1f}%")
|
||||
|
||||
"""Multi-shot prompting (10 examples in the prompt) without fine-tuning performed much better.
|
||||
|
||||
|
||||
Next trial: Prompt optimization
|
||||
"""
|
||||
|
||||
#prompt optimization seems like th eonly choice
|
||||
def build_pricing_prompt_alt(few_shots: list, target_title: str) -> str:
|
||||
"""
|
||||
Build an alternative multi-shot pricing prompt for Claude.
|
||||
|
||||
few_shots: list of dicts with keys 'title' and 'price_clean'
|
||||
target_title: product title to predict the price for
|
||||
"""
|
||||
parts = []
|
||||
|
||||
# Instruction with a slightly different phrasing
|
||||
parts.append("Act as an expert e-commerce pricing analyst.")
|
||||
parts.append("Given product titles and their prices, predict the price in USD for the new product.")
|
||||
parts.append("Only provide the numeric price. No extra text, explanations, or symbols.")
|
||||
|
||||
# Format the examples differently: numbered list
|
||||
parts.append("\nExample prices:")
|
||||
for i, ex in enumerate(few_shots, start=1):
|
||||
parts.append(f"{i}. {ex['title']} — ${ex['price_clean']:.2f}")
|
||||
|
||||
# Target product
|
||||
parts.append("\nPredict the price for this product:")
|
||||
parts.append(f"Title: {target_title}")
|
||||
parts.append("Price (USD):")
|
||||
|
||||
# Combine into single prompt
|
||||
prompt = "\n".join(parts)
|
||||
return prompt
|
||||
|
||||
"""eda"""
|
||||
@@ -350,7 +350,7 @@
|
||||
" system_message = messages[0]['content']\n",
|
||||
" messages = messages[1:]\n",
|
||||
" response = claude.messages.create(\n",
|
||||
" model=\"claude-3-5-sonnet-20240620\",\n",
|
||||
" model=\"claude-sonnet-4-5-20250929\",\n",
|
||||
" max_tokens=5,\n",
|
||||
" system=system_message,\n",
|
||||
" messages=messages\n",
|
||||
|
||||
@@ -0,0 +1,820 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "view-in-github"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/dkisselev-zz/llm_engineering/blob/wk7/Week_7_Excersise_fine_tuned_model.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "GHsssBgWM_l0"
|
||||
},
|
||||
"source": [
|
||||
"# Predict Product Prices\n",
|
||||
"\n",
|
||||
"Model evaluation and inference tuning\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "HnwMdAP3IHad"
|
||||
},
|
||||
"source": [
|
||||
"## Libraries and configuration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "MDyR63OTNUJ6"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124\n",
|
||||
"!pip install -q --upgrade requests==2.32.3 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 datasets==3.2.0 peft==0.14.0 trl==0.14.0 matplotlib wandb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "-yikV8pRBer9"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import math\n",
|
||||
"import numpy as np\n",
|
||||
"from google.colab import userdata\n",
|
||||
"from huggingface_hub import login\n",
|
||||
"import wandb\n",
|
||||
"import torch\n",
|
||||
"import torch.nn.functional as F\n",
|
||||
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"from peft import PeftModel\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "uuTX-xonNeOK"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Models\n",
|
||||
"\n",
|
||||
"# WB or HF location of artifacts\n",
|
||||
"ARTIFCAT_LOCATTION=\"HF\"\n",
|
||||
"\n",
|
||||
"BASE_MODEL = \"meta-llama/Meta-Llama-3.1-8B\"\n",
|
||||
"\n",
|
||||
"PROJECT_NAME = \"pricer\"\n",
|
||||
"\n",
|
||||
"# RUN_NAME = \"2025-10-23_23.41.24\" # - Fine tuned 16 batches / 8 bit run\n",
|
||||
"# RUN_NAME = \"2025-10-25_05.02.00\" # - Fine tuned 4 batches / 4 bit / LoRA 64/128 / Gradient 8\n",
|
||||
"RUN_NAME = \"2024-09-13_13.04.39\" # Ed's model run\n",
|
||||
"\n",
|
||||
"# Hugging Face\n",
|
||||
"HF_USER = \"dkisselev\"\n",
|
||||
"\n",
|
||||
"if ARTIFCAT_LOCATTION==\"HF\":\n",
|
||||
" PROJECT_RUN_NAME = f\"{PROJECT_NAME}-{RUN_NAME}\"\n",
|
||||
" # REVISION = None\n",
|
||||
" REVISION = \"e8d637df551603dc86cd7a1598a8f44af4d7ae36\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # FINETUNED_MODEL = f\"{HF_USER}/{PROJECT_RUN_NAME}\"\n",
|
||||
"\n",
|
||||
" # Ed's model\n",
|
||||
" FINETUNED_MODEL = f\"ed-donner/{PROJECT_RUN_NAME}\"\n",
|
||||
"else:\n",
|
||||
" # Weights and Biases\n",
|
||||
" WANDB_ENTITY = \"dkisselev\"\n",
|
||||
" os.environ[\"WANDB_API_KEY\"]=userdata.get('WANDB_API_KEY')\n",
|
||||
"\n",
|
||||
" MODEL_ARTIFACT_NAME = f\"model-{RUN_NAME}\"\n",
|
||||
" REVISION_TAG=\"v22\"\n",
|
||||
" WANDB_ARTIFACT_PATH = f\"{WANDB_ENTITY}/{PROJECT_NAME}/{MODEL_ARTIFACT_NAME}:{REVISION_TAG}\"\n",
|
||||
"\n",
|
||||
"# Data set\n",
|
||||
"\n",
|
||||
"# DATASET_NAME = f\"{HF_USER}/pricer-data2\"\n",
|
||||
"DATASET_NAME = \"ed-donner/pricer-data\"\n",
|
||||
"\n",
|
||||
"# Hyperparameters for QLoRA\n",
|
||||
"QUANT_4_BIT = True\n",
|
||||
"K_SEARCH_LIMIT = 900\n",
|
||||
"\n",
|
||||
"# Used for writing to output in color\n",
|
||||
"GREEN = \"\\033[92m\"\n",
|
||||
"YELLOW = \"\\033[93m\"\n",
|
||||
"RED = \"\\033[91m\"\n",
|
||||
"BLUE = \"\\033[94m\"\n",
|
||||
"RESET = \"\\033[0m\"\n",
|
||||
"COLOR_MAP = {\"red\":RED, \"orange\": BLUE, \"green\": GREEN}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "8JArT3QAQAjx"
|
||||
},
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Data is loaded from Huggin Face\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "WyFPZeMcM88v"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Log in to HuggingFace\n",
|
||||
"hf_token = userdata.get('HF_TOKEN')\n",
|
||||
"login(hf_token)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cvXVoJH8LS6u"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = load_dataset(DATASET_NAME)\n",
|
||||
"train = dataset['train']\n",
|
||||
"test = dataset['test']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "qJWQ0a3wZ0Bw"
|
||||
},
|
||||
"source": [
|
||||
"## Load Tokenizer and Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "lAUAAcEC6ido"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 4 or 8 but quantization\n",
|
||||
"if QUANT_4_BIT:\n",
|
||||
" quant_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\"\n",
|
||||
" )\n",
|
||||
"else:\n",
|
||||
" quant_config = BitsAndBytesConfig(\n",
|
||||
" load_in_8bit=True\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "OQy4pCk-dutf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load model from w&b\n",
|
||||
"if ARTIFCAT_LOCATTION==\"WB\":\n",
|
||||
" artifact = wandb.Api().artifact(WANDB_ARTIFACT_PATH, type='model')\n",
|
||||
" artifact_dir = artifact.download() # Downloads to a local cache dir"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "R_O04fKxMMT-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the Tokenizer and the Model\n",
|
||||
"\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n",
|
||||
"tokenizer.pad_token = tokenizer.eos_token\n",
|
||||
"tokenizer.padding_side = \"right\"\n",
|
||||
"\n",
|
||||
"base_model = AutoModelForCausalLM.from_pretrained(\n",
|
||||
" BASE_MODEL,\n",
|
||||
" quantization_config=quant_config,\n",
|
||||
" device_map=\"auto\",\n",
|
||||
")\n",
|
||||
"base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
|
||||
"\n",
|
||||
"if ARTIFCAT_LOCATTION==\"HF\":\n",
|
||||
" # Load the fine-tuned model with PEFT\n",
|
||||
" if REVISION:\n",
|
||||
" fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)\n",
|
||||
" else:\n",
|
||||
" fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL)\n",
|
||||
"else:\n",
|
||||
" # Model at W&B\n",
|
||||
" fine_tuned_model = PeftModel.from_pretrained(base_model, artifact_dir)\n",
|
||||
"\n",
|
||||
"print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "UObo1-RqaNnT"
|
||||
},
|
||||
"source": [
|
||||
"## Hyperparameter helpers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "n4u27kbwlekE"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def calculate_weighted_price(prices, probabilities):\n",
|
||||
" \"\"\"\n",
|
||||
" Calculates a normalized weighted average price.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" prices (list or np.array): A list of prices.\n",
|
||||
" probabilities (list or np.array): A list of corresponding probabilities (or weights).\n",
|
||||
" Returns:\n",
|
||||
" float: The normalized weighted average price.\n",
|
||||
" \"\"\"\n",
|
||||
" # Convert lists to numpy arrays\n",
|
||||
" prices_array = np.array(prices)\n",
|
||||
" probs_array = np.array(probabilities)\n",
|
||||
"\n",
|
||||
" # Total of the probabilities to use for normalization\n",
|
||||
" total_prob = np.sum(probs_array)\n",
|
||||
"\n",
|
||||
" # Catch zero\n",
|
||||
" if total_prob == 0:\n",
|
||||
" if len(prices_array) > 0:\n",
|
||||
" return np.mean(prices_array)\n",
|
||||
" else:\n",
|
||||
" return 0.0\n",
|
||||
"\n",
|
||||
" # Weighted avrage\n",
|
||||
" weighted_price = np.average(prices_array, weights=probs_array)\n",
|
||||
"\n",
|
||||
" return weighted_price"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "ROjIbGuH0FWS"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_top_k_predictions(prompt, device=\"cuda\"):\n",
|
||||
" \"\"\"\n",
|
||||
" Gets the top K price/probability pairs from the model.\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" (list, list): A tuple containing (prices, probabilities)\n",
|
||||
" \"\"\"\n",
|
||||
" set_seed(42)\n",
|
||||
" inputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n",
|
||||
" attention_mask = torch.ones(inputs.shape, device=device)\n",
|
||||
"\n",
|
||||
" with torch.no_grad():\n",
|
||||
" outputs = fine_tuned_model(inputs, attention_mask=attention_mask)\n",
|
||||
" next_token_logits = outputs.logits[:, -1, :].to('cpu')\n",
|
||||
"\n",
|
||||
" next_token_probs = F.softmax(next_token_logits, dim=-1)\n",
|
||||
" top_prob, top_token_id = next_token_probs.topk(K_SEARCH_LIMIT)\n",
|
||||
"\n",
|
||||
" prices = []\n",
|
||||
" probabilities = []\n",
|
||||
"\n",
|
||||
" for i in range(K_SEARCH_LIMIT):\n",
|
||||
" predicted_token = tokenizer.decode(top_token_id[0][i])\n",
|
||||
" probability_tensor = top_prob[0][i]\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" price = float(predicted_token)\n",
|
||||
" except ValueError as e:\n",
|
||||
" price = 0.0\n",
|
||||
"\n",
|
||||
" if price > 0:\n",
|
||||
" prices.append(price)\n",
|
||||
" probabilities.append(probability_tensor.item())\n",
|
||||
"\n",
|
||||
" if not prices:\n",
|
||||
" return [], []\n",
|
||||
"\n",
|
||||
" return prices, probabilities"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "tnmTAiEG32xK"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def make_prompt(text):\n",
|
||||
" if ARTIFCAT_LOCATTION==\"HF\":\n",
|
||||
" return text\n",
|
||||
" p_array = text.split(\"\\n\")\n",
|
||||
" p_question = p_array[0].replace(\"How much does this cost to the nearest dollar?\",\"What is the price of this item?\")\n",
|
||||
" p_title = p_array[2]\n",
|
||||
" p_descr = re.sub(r'\\d', '', p_array[3])\n",
|
||||
" p_price = p_array[5]\n",
|
||||
" prompt = p_title + \"\\n\" + p_descr + \"\\n\" + \"Question: \"+ p_question + \"\\n\\n\" + p_price\n",
|
||||
" # prompt = p_array[0] + \"\\n\\n\\n\" + p_title + \"\\n\\n\" + p_descr + \"\\n\\n\" + p_price\n",
|
||||
" # return text\n",
|
||||
" return prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "VNAEw5Eg4ABk"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"class Tester:\n",
|
||||
"\n",
|
||||
" def __init__(self, predictor, data, title=None, size=250):\n",
|
||||
" self.predictor = predictor\n",
|
||||
" self.data = data\n",
|
||||
" self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n",
|
||||
" self.size = size\n",
|
||||
" self.guesses = []\n",
|
||||
" self.truths = []\n",
|
||||
" self.errors = []\n",
|
||||
" self.sles = []\n",
|
||||
" self.colors = []\n",
|
||||
"\n",
|
||||
" def color_for(self, error, truth):\n",
|
||||
" if error<40 or error/truth < 0.2:\n",
|
||||
" return \"green\"\n",
|
||||
" elif error<80 or error/truth < 0.4:\n",
|
||||
" return \"orange\"\n",
|
||||
" else:\n",
|
||||
" return \"red\"\n",
|
||||
"\n",
|
||||
" def run_datapoint(self, i):\n",
|
||||
" datapoint = self.data[i]\n",
|
||||
"\n",
|
||||
" base_prompt = datapoint[\"text\"]\n",
|
||||
" prompt = make_prompt(base_prompt)\n",
|
||||
"\n",
|
||||
" guess = self.predictor(prompt)\n",
|
||||
"\n",
|
||||
" # guess = self.predictor(datapoint[\"text\"])\n",
|
||||
" truth = datapoint[\"price\"]\n",
|
||||
" error = abs(guess - truth)\n",
|
||||
" log_error = math.log(truth+1) - math.log(guess+1)\n",
|
||||
" sle = log_error ** 2\n",
|
||||
" color = self.color_for(error, truth)\n",
|
||||
" title = datapoint[\"text\"].split(\"\\n\\n\")[1][:20] + \"...\"\n",
|
||||
" self.guesses.append(guess)\n",
|
||||
" self.truths.append(truth)\n",
|
||||
" self.errors.append(error)\n",
|
||||
" self.sles.append(sle)\n",
|
||||
" self.colors.append(color)\n",
|
||||
" print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}\")\n",
|
||||
"\n",
|
||||
" def chart(self, title):\n",
|
||||
" max_error = max(self.errors)\n",
|
||||
" plt.figure(figsize=(12, 8))\n",
|
||||
" max_val = max(max(self.truths), max(self.guesses))\n",
|
||||
" plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)\n",
|
||||
" plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n",
|
||||
" plt.xlabel('Ground Truth')\n",
|
||||
" plt.ylabel('Model Estimate')\n",
|
||||
" plt.xlim(0, max_val)\n",
|
||||
" plt.ylim(0, max_val)\n",
|
||||
" plt.title(title)\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
" def report(self):\n",
|
||||
" average_error = sum(self.errors) / self.size\n",
|
||||
" rmsle = math.sqrt(sum(self.sles) / self.size)\n",
|
||||
" hits = sum(1 for color in self.colors if color==\"green\")\n",
|
||||
" title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%\"\n",
|
||||
" self.chart(title)\n",
|
||||
"\n",
|
||||
" def run(self):\n",
|
||||
" self.error = 0\n",
|
||||
" for i in range(self.size):\n",
|
||||
" self.run_datapoint(i)\n",
|
||||
" self.report()\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def test(cls, function, data):\n",
|
||||
" cls(function, data).run()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "dbWS1DPV4TPQ"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Search_K:\n",
|
||||
" \"\"\"\n",
|
||||
" Search for the optimal 'k' value.\n",
|
||||
" \"\"\"\n",
|
||||
" def __init__(self, predictor, data, title=None, size=250):\n",
|
||||
" self.predictor = predictor\n",
|
||||
" self.data = data\n",
|
||||
" self.title = title or predictor.__name__.replace(\"_\", \" \").title()\n",
|
||||
" self.size = size\n",
|
||||
" self.truths = []\n",
|
||||
"\n",
|
||||
" self.all_k_errors = []\n",
|
||||
" self.max_k = K_SEARCH_LIMIT\n",
|
||||
"\n",
|
||||
" # Store the list of probabilities for each inference\n",
|
||||
" self.all_prob_lists = []\n",
|
||||
" # Store the standard deviation of probs for each inference\n",
|
||||
" self.prob_std_devs = []\n",
|
||||
"\n",
|
||||
" def color_for(self, error, truth):\n",
|
||||
" if error<40 or error/truth < 0.2:\n",
|
||||
" return \"green\"\n",
|
||||
" elif error<80 or error/truth < 0.4:\n",
|
||||
" return \"orange\"\n",
|
||||
" else:\n",
|
||||
" return \"red\"\n",
|
||||
"\n",
|
||||
" def run_datapoint(self, i):\n",
|
||||
" datapoint = self.data[i]\n",
|
||||
" base_prompt = datapoint[\"text\"]\n",
|
||||
" prompt = make_prompt(base_prompt)\n",
|
||||
" truth = datapoint[\"price\"]\n",
|
||||
" self.truths.append(truth)\n",
|
||||
"\n",
|
||||
" # Get the raw lists of prices and probabilities\n",
|
||||
" prices, probabilities = self.predictor(prompt)\n",
|
||||
"\n",
|
||||
" self.all_prob_lists.append(probabilities)\n",
|
||||
"\n",
|
||||
" if probabilities:\n",
|
||||
" # Calculate and store the spread (std dev) of this prob list\n",
|
||||
" self.prob_std_devs.append(np.std(probabilities))\n",
|
||||
" else:\n",
|
||||
" # No probabilities, append 0 for spread\n",
|
||||
" self.prob_std_devs.append(0.0)\n",
|
||||
"\n",
|
||||
" errors_for_this_datapoint = []\n",
|
||||
"\n",
|
||||
" if not prices:\n",
|
||||
" print(f\"{i+1}: No valid prices found. Truth: ${truth:,.2f}.\")\n",
|
||||
" error = np.abs(0 - truth)\n",
|
||||
" errors_for_this_datapoint = [error] * self.max_k\n",
|
||||
" self.all_k_errors.append(errors_for_this_datapoint)\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" # Iterate from k=1 up to max_k\n",
|
||||
" for k in range(1, self.max_k + 1):\n",
|
||||
" k_prices = prices[:k]\n",
|
||||
" k_probabilities = probabilities[:k]\n",
|
||||
"\n",
|
||||
" # Calculate the weighted price just for this k\n",
|
||||
" guess = calculate_weighted_price(k_prices, k_probabilities)\n",
|
||||
"\n",
|
||||
" # Calculate and store the error for this k\n",
|
||||
" error = np.abs(guess - truth)\n",
|
||||
" errors_for_this_datapoint.append(error)\n",
|
||||
"\n",
|
||||
" # Store the list of errors (for k=1 to max_k)\n",
|
||||
" self.all_k_errors.append(errors_for_this_datapoint)\n",
|
||||
"\n",
|
||||
" # Print a summary for this datapoint\n",
|
||||
" title = datapoint[\"text\"].split(\"\\n\\n\")[1][:20] + \"...\"\n",
|
||||
"\n",
|
||||
" # Using [0], [19], [-1] for k=1, k=20, k=max_k (0-indexed)\n",
|
||||
" k_1_err = errors_for_this_datapoint[0]\n",
|
||||
" k_20_err = errors_for_this_datapoint[19]\n",
|
||||
" k_max_err = errors_for_this_datapoint[-1]\n",
|
||||
"\n",
|
||||
" color = self.color_for(k_1_err, truth)\n",
|
||||
" print(f\"{COLOR_MAP[color]}{i+1}: Truth: ${truth:,.2f}. \"\n",
|
||||
" f\"Errors (k=1, k=20, k={self.max_k}): \"\n",
|
||||
" f\"(${k_1_err:,.2f}, ${k_20_err:,.2f}, ${k_max_err:,.2f}) \"\n",
|
||||
" f\"Item: {title}{RESET}\")\n",
|
||||
"\n",
|
||||
" def plot_k_vs_error(self, k_values, avg_errors_by_k, best_k, min_error):\n",
|
||||
" \"\"\"\n",
|
||||
" Plots the Average Error vs. k\n",
|
||||
" \"\"\"\n",
|
||||
" plt.figure(figsize=(12, 8))\n",
|
||||
" plt.plot(k_values, avg_errors_by_k, label='Average Error vs. k')\n",
|
||||
"\n",
|
||||
" # Highlight the best k\n",
|
||||
" plt.axvline(x=best_k, color='red', linestyle='--',\n",
|
||||
" label=f'Best k = {best_k} (Avg Error: ${min_error:,.2f})')\n",
|
||||
"\n",
|
||||
" plt.xlabel('Number of Top Probabilities/Prices (k)')\n",
|
||||
" plt.ylabel('Average Absolute Error ($)')\n",
|
||||
" plt.title(f'Optimal k Analysis for {self.title}')\n",
|
||||
" plt.legend()\n",
|
||||
" plt.grid(True, which='both', linestyle='--', linewidth=0.5)\n",
|
||||
" # Set x-axis to start at 1\n",
|
||||
" plt.xlim(left=1)\n",
|
||||
" plt.savefig(\"k_vs_error_plot.png\")\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" def plot_probability_spread(self, idx_min_std, idx_med_std, idx_max_std):\n",
|
||||
" probs_min = self.all_prob_lists[idx_min_std]\n",
|
||||
" probs_med = self.all_prob_lists[idx_med_std]\n",
|
||||
" probs_max = self.all_prob_lists[idx_max_std]\n",
|
||||
" std_min = self.prob_std_devs[idx_min_std]\n",
|
||||
" std_med = self.prob_std_devs[idx_med_std]\n",
|
||||
" std_max = self.prob_std_devs[idx_max_std]\n",
|
||||
"\n",
|
||||
" fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 7), sharey=True)\n",
|
||||
" fig.suptitle('Probability Distribution Spread Analysis (Examples)', fontsize=16)\n",
|
||||
"\n",
|
||||
" def plot_strip(ax, probs, title):\n",
|
||||
" if not probs:\n",
|
||||
" ax.set_title(f\"{title}\\n(No probabilities found)\")\n",
|
||||
" return\n",
|
||||
" jitter = np.random.normal(0, 0.01, size=len(probs))\n",
|
||||
" ax.scatter(jitter, probs, alpha=0.5, s=10) # Made points slightly larger\n",
|
||||
" ax.set_title(title)\n",
|
||||
" ax.set_xlabel(\"Jitter\")\n",
|
||||
" ax.get_xaxis().set_ticks([])\n",
|
||||
"\n",
|
||||
" plot_strip(ax1, probs_min,\n",
|
||||
" f'Inference {idx_min_std} (Lowest Spread)\\nStd Dev: {std_min:.6f}')\n",
|
||||
" ax1.set_ylabel('Probability')\n",
|
||||
" plot_strip(ax2, probs_med,\n",
|
||||
" f'Inference {idx_med_std} (Median Spread)\\nStd Dev: {std_med:.6f}')\n",
|
||||
" plot_strip(ax3, probs_max,\n",
|
||||
" f'Inference {idx_max_std} (Highest Spread)\\nStd Dev: {std_max:.6f}')\n",
|
||||
"\n",
|
||||
" plt.tight_layout(rect=[0, 0.03, 1, 0.95])\n",
|
||||
" plt.savefig(\"spread_examples_plot.png\")\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
" def plot_all_std_devs(self):\n",
|
||||
" \"\"\"\n",
|
||||
" Plots a histogram and a line plot of the standard deviation\n",
|
||||
" for ALL inferences.\n",
|
||||
" \"\"\"\n",
|
||||
" if not self.prob_std_devs:\n",
|
||||
" print(\"No probability spreads recorded, skipping all-std plot.\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" # Create a figure with two subplots\n",
|
||||
" fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))\n",
|
||||
" fig.suptitle('Full Spread Analysis for All Inferences', fontsize=16)\n",
|
||||
"\n",
|
||||
" # --- Plot Histogram ---\n",
|
||||
" ax1.hist(self.prob_std_devs, bins=50, edgecolor='black')\n",
|
||||
" ax1.set_title('Distribution of Probability Standard Deviations')\n",
|
||||
" ax1.set_xlabel('Standard Deviation')\n",
|
||||
" ax1.set_ylabel('Frequency (Number of Inferences)')\n",
|
||||
"\n",
|
||||
" mean_std = np.mean(self.prob_std_devs)\n",
|
||||
" ax1.axvline(mean_std, color='red', linestyle='--',\n",
|
||||
" label=f'Mean Std Dev: {mean_std:.6f}')\n",
|
||||
" ax1.legend()\n",
|
||||
"\n",
|
||||
" # --- Plot Line Plot ---\n",
|
||||
" ax2.plot(self.prob_std_devs, marker='o', linestyle='-',\n",
|
||||
" markersize=3, alpha=0.7, label='Std Dev per Inference')\n",
|
||||
" ax2.set_title('Probability Standard Deviation per Inference')\n",
|
||||
" ax2.set_xlabel('Inference Index (0 to 249)')\n",
|
||||
" ax2.set_ylabel('Standard Deviation')\n",
|
||||
"\n",
|
||||
" ax2.axhline(mean_std, color='red', linestyle='--',\n",
|
||||
" label=f'Mean Std Dev: {mean_std:.6f}')\n",
|
||||
" ax2.legend()\n",
|
||||
" ax2.set_xlim(0, len(self.prob_std_devs) - 1)\n",
|
||||
"\n",
|
||||
" plt.tight_layout(rect=[0, 0.03, 1, 0.95])\n",
|
||||
" plt.savefig(\"all_std_devs_plot.png\") # Save the plot\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
" def report(self):\n",
|
||||
" \"\"\"\n",
|
||||
" Calls all three plotting functions.\n",
|
||||
" \"\"\"\n",
|
||||
" if not self.all_k_errors:\n",
|
||||
" print(\"\\nNo data to report on. Exiting.\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" # Optimal k Analysis ---\n",
|
||||
" errors_array = np.array(self.all_k_errors)\n",
|
||||
" avg_errors_by_k = np.mean(errors_array, axis=0)\n",
|
||||
" best_k_index = np.argmin(avg_errors_by_k)\n",
|
||||
" min_error = avg_errors_by_k[best_k_index]\n",
|
||||
" best_k = best_k_index + 1\n",
|
||||
"\n",
|
||||
" print(\"\\n\" + \"=\"*40)\n",
|
||||
" print(\"--- Optimal k Analysis Report ---\")\n",
|
||||
" print(f\"Model: {self.title}\")\n",
|
||||
" print(f\"Inferences Run: {self.size}\")\n",
|
||||
" print(f\"Analyzed k from 1 to {self.max_k}\")\n",
|
||||
" print(f\"===================================\")\n",
|
||||
" print(f\"==> Best k: {best_k}\")\n",
|
||||
" print(f\"==> Minimum Average Error: ${min_error:,.2f}\")\n",
|
||||
" print(\"=\"*40 + \"\\n\")\n",
|
||||
"\n",
|
||||
" k_values = np.arange(1, self.max_k + 1)\n",
|
||||
" self.plot_k_vs_error(k_values, avg_errors_by_k, best_k, min_error)\n",
|
||||
"\n",
|
||||
" # Probability Spread Analysis ---\n",
|
||||
" if not self.prob_std_devs:\n",
|
||||
" print(\"\\nNo probability spreads recorded, skipping spread plots.\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" print(\"\\n\" + \"=\"*40)\n",
|
||||
" print(\"--- Probability Spread Analysis ---\")\n",
|
||||
"\n",
|
||||
" # Find indices for examples\n",
|
||||
" std_sorted_indices = np.argsort(self.prob_std_devs)\n",
|
||||
" idx_min_std = std_sorted_indices[0]\n",
|
||||
" idx_med_std = std_sorted_indices[len(std_sorted_indices) // 2]\n",
|
||||
" idx_max_std = std_sorted_indices[-1]\n",
|
||||
"\n",
|
||||
" print(f\"Lowest spread (std): {self.prob_std_devs[idx_min_std]:.6f} (Inference {idx_min_std})\")\n",
|
||||
" print(f\"Median spread (std): {self.prob_std_devs[idx_med_std]:.6f} (Inference {idx_med_std})\")\n",
|
||||
" print(f\"Highest spread (std): {self.prob_std_devs[idx_max_std]:.6f} (Inference {idx_max_std})\")\n",
|
||||
" print(\"=\"*40 + \"\\n\")\n",
|
||||
"\n",
|
||||
" # Plot example spreads\n",
|
||||
" self.plot_probability_spread(idx_min_std, idx_med_std, idx_max_std)\n",
|
||||
"\n",
|
||||
" # Plot all spreads\n",
|
||||
" self.plot_all_std_devs()\n",
|
||||
"\n",
|
||||
" return best_k\n",
|
||||
"\n",
|
||||
" def run(self):\n",
|
||||
" for i in range(self.size):\n",
|
||||
" self.run_datapoint(i)\n",
|
||||
" best_k=self.report()\n",
|
||||
" return best_k\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def test(cls, function, data):\n",
|
||||
" cls(function, data).run()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Vtt13OuVE-t7"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Search best K\n",
|
||||
"search_k = Search_K(get_top_k_predictions, test, title=f\"{MODEL_ARTIFACT_NAME}:{REVISION_TAG}\" if ARTIFCAT_LOCATTION==\"WB\" else None)\n",
|
||||
"best_k = search_k.run()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "tuwYu1NYljIv"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"top_K = best_k\n",
|
||||
"\n",
|
||||
"def improved_model_predict(prompt, device=\"cuda\"):\n",
|
||||
" set_seed(42)\n",
|
||||
" inputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n",
|
||||
" attention_mask = torch.ones(inputs.shape, device=device)\n",
|
||||
"\n",
|
||||
" with torch.no_grad():\n",
|
||||
" outputs = fine_tuned_model(inputs, attention_mask=attention_mask)\n",
|
||||
" next_token_logits = outputs.logits[:, -1, :].to('cpu')\n",
|
||||
"\n",
|
||||
" next_token_probs = F.softmax(next_token_logits, dim=-1)\n",
|
||||
" top_prob, top_token_id = next_token_probs.topk(top_K)\n",
|
||||
"\n",
|
||||
" prices = []\n",
|
||||
" # Renamed 'weights' to 'probabilities' for clarity\n",
|
||||
" probabilities = []\n",
|
||||
"\n",
|
||||
" for i in range(top_K):\n",
|
||||
" predicted_token = tokenizer.decode(top_token_id[0][i])\n",
|
||||
" # This is a torch.Tensor\n",
|
||||
" probability_tensor = top_prob[0][i]\n",
|
||||
"\n",
|
||||
" # print(predicted_token, probability_tensor)\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" # Try to convert the decoded token string to a float\n",
|
||||
" price = float(predicted_token)\n",
|
||||
" except ValueError as e:\n",
|
||||
" price = 0.0\n",
|
||||
"\n",
|
||||
" # Only include valid, positive prices\n",
|
||||
" if price > 0:\n",
|
||||
" prices.append(price)\n",
|
||||
" # We append the tensor to our list\n",
|
||||
" probabilities.append(probability_tensor)\n",
|
||||
"\n",
|
||||
" if not prices:\n",
|
||||
" # If no valid prices were found, return 0.0\n",
|
||||
" return 0.0\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # Convert the list of prices to a numpy array\n",
|
||||
" prices_np = np.array(prices)\n",
|
||||
"\n",
|
||||
" # Convert the list of torch.Tensors to a numpy array of floats\n",
|
||||
" probs_np = np.array([p.item() for p in probabilities])\n",
|
||||
"\n",
|
||||
" # Calculate the normalized weighted average\n",
|
||||
" final_price = np.average(prices_np, weights=probs_np)\n",
|
||||
"\n",
|
||||
" return float(final_price) # Return as a standard python float"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "3SxpLBJH70E-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt=make_prompt(test[80]['text'])\n",
|
||||
"print(prompt)\n",
|
||||
"\n",
|
||||
"improved_model_predict(prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "W_KcLvyt6kbb"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Run Estimate vs Ground Truth\n",
|
||||
"tester = Tester(improved_model_predict, test, title=f\"{MODEL_ARTIFACT_NAME}:{REVISION_TAG}\" if ARTIFCAT_LOCATTION==\"WB\" else None)\n",
|
||||
"tester.run()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"gpuType": "T4",
|
||||
"include_colab_link": true,
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
Reference in New Issue
Block a user