This commit introduces the foundational structure for the Deal Intel project, including: - Environment configuration file (.env.example) for managing secrets and API keys. - Scripts for building a ChromaDB vector store (build_vector_store.py) and training machine learning models (train_rf.py, train_ensemble.py). - Health check functionality (health_check.py) to ensure system readiness. - A launcher script (launcher.py) for executing various commands, including UI launch and health checks. - Logging utilities (logging_utils.py) for consistent logging across the application. - A README file providing an overview and setup instructions for the project. These additions establish a comprehensive framework for an agentic deal-hunting AI system, integrating various components for data processing, model training, and user interaction.
34 lines
969 B
Python
34 lines
969 B
Python
#!/usr/bin/env python3
|
|
"""
|
|
Centralized configuration for Deal Intel.
|
|
"""
|
|
|
|
import os
|
|
from typing import List
|
|
|
|
# Vector store
|
|
DB_PATH = os.getenv("DEAL_INTEL_DB_PATH", "products_vectorstore")
|
|
COLLECTION_NAME = os.getenv("DEAL_INTEL_COLLECTION", "products")
|
|
|
|
# Embedding model
|
|
MODEL_NAME = os.getenv("DEAL_INTEL_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
# Categories (kept consistent with framework plot colors)
|
|
CATEGORIES: List[str] = [
|
|
"Appliances",
|
|
"Automotive",
|
|
"Cell_Phones_and_Accessories",
|
|
"Electronics",
|
|
"Musical_Instruments",
|
|
"Office_Products",
|
|
"Tools_and_Home_Improvement",
|
|
"Toys_and_Games",
|
|
]
|
|
|
|
# Data limits
|
|
MAX_ITEMS_PER_CATEGORY = int(os.getenv("DEAL_INTEL_MAX_ITEMS", "2500"))
|
|
BATCH_SIZE = int(os.getenv("DEAL_INTEL_BATCH_SIZE", "500"))
|
|
|
|
# Training limits
|
|
RF_MAX_DATAPOINTS = int(os.getenv("DEAL_INTEL_RF_MAX_DATAPOINTS", "10000"))
|
|
ENSEMBLE_SAMPLE_SIZE = int(os.getenv("DEAL_INTEL_ENSEMBLE_SAMPLE_SIZE", "200")) |