updates
updates
This commit is contained in:
@@ -61,9 +61,9 @@
|
||||
"\n",
|
||||
"# LLM APIs\n",
|
||||
"from openai import OpenAI\n",
|
||||
"import anthropic\n",
|
||||
"import google.generativeai as genai\n",
|
||||
"from deepseek import DeepSeek\n",
|
||||
"# import anthropic\n",
|
||||
"# import google.generativeai as genai\n",
|
||||
"# from deepseek import DeepSeek\n",
|
||||
"\n",
|
||||
"# HuggingFace\n",
|
||||
"from huggingface_hub import login\n",
|
||||
@@ -72,7 +72,7 @@
|
||||
"# Data processing\n",
|
||||
"import nltk\n",
|
||||
"from nltk.corpus import wordnet\n",
|
||||
"import pyarrow as pa\n",
|
||||
"# import pyarrow as pa\n",
|
||||
"\n",
|
||||
"# UI\n",
|
||||
"import gradio as gr\n",
|
||||
@@ -105,6 +105,10 @@
|
||||
" 'anthropic': userdata.get('ANTHROPIC_API_KEY'),\n",
|
||||
" 'google': userdata.get('GOOGLE_API_KEY'),\n",
|
||||
" 'deepseek': userdata.get('DEEPSEEK_API_KEY'),\n",
|
||||
" # 'groq': userdata.get('GROQ_API_KEY'),\n",
|
||||
" 'grok': userdata.get('GROK_API_KEY'),\n",
|
||||
" # 'openrouter': userdata.get('OPENROUTER_API_KEY'),\n",
|
||||
" # 'ollama': userdata.get('OLLAMA_API_KEY'),\n",
|
||||
" 'hf_token': userdata.get('HF_TOKEN')\n",
|
||||
" }\n",
|
||||
" print(\"✅ Using Colab secrets\")\n",
|
||||
@@ -117,27 +121,44 @@
|
||||
" 'anthropic': os.getenv('ANTHROPIC_API_KEY'),\n",
|
||||
" 'google': os.getenv('GOOGLE_API_KEY'),\n",
|
||||
" 'deepseek': os.getenv('DEEPSEEK_API_KEY'),\n",
|
||||
" # 'groq': os.getenv('GROQ_API_KEY'),\n",
|
||||
" 'grok': os.getenv('GROK_API_KEY'),\n",
|
||||
" # 'openrouter': os.getenv('OPENROUTER_API_KEY'),\n",
|
||||
" # 'ollama': os.getenv('OLLAMA_API_KEY'),\n",
|
||||
" 'hf_token': os.getenv('HF_TOKEN')\n",
|
||||
" }\n",
|
||||
" print(\"✅ Using local .env file\")\n",
|
||||
" \n",
|
||||
" # Initialize API clients\n",
|
||||
" anthropic_url = \"https://api.anthropic.com/v1/\"\n",
|
||||
" gemini_url = \"https://generativelanguage.googleapis.com/v1beta/openai/\"\n",
|
||||
" deepseek_url = \"https://api.deepseek.com\"\n",
|
||||
" # groq_url = \"https://api.groq.com/openai/v1\"\n",
|
||||
" grok_url = \"https://api.x.ai/v1\"\n",
|
||||
" # openrouter_url = \"https://openrouter.ai/api/v1\"\n",
|
||||
" # ollama_url = \"http://localhost:11434/v1\"\n",
|
||||
"\n",
|
||||
" clients = {}\n",
|
||||
" if api_keys['openai']:\n",
|
||||
" clients['openai'] = OpenAI(api_key=api_keys['openai'])\n",
|
||||
" if api_keys['anthropic']:\n",
|
||||
" clients['anthropic'] = anthropic.Anthropic(api_key=api_keys['anthropic'])\n",
|
||||
" clients['anthropic'] = OpenAI(api_key=api_keys['anthropic'], base_url=anthropic_url)\n",
|
||||
" # clients['anthropic'] = anthropic.Anthropic(api_key=api_keys['anthropic'])\n",
|
||||
" if api_keys['google']:\n",
|
||||
" genai.configure(api_key=api_keys['google'])\n",
|
||||
" # genai.configure(api_key=api_keys['google'])\n",
|
||||
" clients['gemini'] = OpenAI(api_key=api_keys['google'], base_url=gemini_url)\n",
|
||||
" if api_keys['deepseek']:\n",
|
||||
" clients['deepseek'] = DeepSeek(api_key=api_keys['deepseek'])\n",
|
||||
" clients['deepseek'] = OpenAI(api_key=api_keys['deepseek'], base_url=deepseek_url)\n",
|
||||
" # clients['deepseek'] = DeepSeek(api_key=api_keys['deepseek'])\n",
|
||||
" if api_keys['grok']:\n",
|
||||
" clients['grok'] = OpenAI(api_key=api_keys['grok'], base_url=grok_url)\n",
|
||||
" if api_keys['hf_token']:\n",
|
||||
" login(api_keys['hf_token'], add_to_git_credential=True)\n",
|
||||
" \n",
|
||||
" return api_keys, clients\n",
|
||||
"\n",
|
||||
"# Initialize API keys and clients\n",
|
||||
"api_keys, clients = setup_api_keys()\n"
|
||||
"api_keys, clients = setup_api_keys()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -152,43 +173,43 @@
|
||||
"HUGGINGFACE_MODELS = {\n",
|
||||
" \"Llama 3.1 8B\": {\n",
|
||||
" \"model_id\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||
" \"description\": \"Versatile 8B model, excellent for structured data generation\",\n",
|
||||
" \"description\": \"8B model - that is good for structured data generation\",\n",
|
||||
" \"size\": \"8B\",\n",
|
||||
" \"type\": \"huggingface\"\n",
|
||||
" },\n",
|
||||
" \"Llama 3.2 3B\": {\n",
|
||||
" \"model_id\": \"meta-llama/Llama-3.2-3B-Instruct\", \n",
|
||||
" \"description\": \"Smaller, faster model, good for simple schemas\",\n",
|
||||
" \"description\": \"3B model - smaller and faster model that is good for simple schemas\",\n",
|
||||
" \"size\": \"3B\",\n",
|
||||
" \"type\": \"huggingface\"\n",
|
||||
" },\n",
|
||||
" \"Phi-3.5 Mini\": {\n",
|
||||
" \"model_id\": \"microsoft/Phi-3.5-mini-instruct\",\n",
|
||||
" \"description\": \"Efficient 3.8B model with strong reasoning capabilities\",\n",
|
||||
" \"description\": \"3.8B model - with reasoning capabilities\",\n",
|
||||
" \"size\": \"3.8B\", \n",
|
||||
" \"type\": \"huggingface\"\n",
|
||||
" },\n",
|
||||
" \"Gemma 2 9B\": {\n",
|
||||
" \"model_id\": \"google/gemma-2-9b-it\",\n",
|
||||
" \"description\": \"Google's 9B instruction-tuned model\",\n",
|
||||
" \"description\": \"9B model - instruction-tuned model\",\n",
|
||||
" \"size\": \"9B\",\n",
|
||||
" \"type\": \"huggingface\"\n",
|
||||
" },\n",
|
||||
" \"Qwen 2.5 7B\": {\n",
|
||||
" \"model_id\": \"Qwen/Qwen2.5-7B-Instruct\",\n",
|
||||
" \"description\": \"Strong multilingual support, good for diverse data\",\n",
|
||||
" \"description\": \"7B model - multilingual that is good for diverse data\",\n",
|
||||
" \"size\": \"7B\",\n",
|
||||
" \"type\": \"huggingface\"\n",
|
||||
" },\n",
|
||||
" \"Mistral 7B\": {\n",
|
||||
" \"model_id\": \"mistralai/Mistral-7B-Instruct-v0.3\",\n",
|
||||
" \"description\": \"Fast inference with reliable outputs\",\n",
|
||||
" \"description\": \"7B model - fast inference\",\n",
|
||||
" \"size\": \"7B\",\n",
|
||||
" \"type\": \"huggingface\"\n",
|
||||
" },\n",
|
||||
" \"Zephyr 7B\": {\n",
|
||||
" \"model_id\": \"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
" \"description\": \"Fine-tuned for helpfulness and instruction following\",\n",
|
||||
" \"description\": \"7B model - fine-tuned for instruction following\",\n",
|
||||
" \"size\": \"7B\",\n",
|
||||
" \"type\": \"huggingface\"\n",
|
||||
" }\n",
|
||||
@@ -196,29 +217,35 @@
|
||||
"\n",
|
||||
"# Commercial Models (Additional Options)\n",
|
||||
"COMMERCIAL_MODELS = {\n",
|
||||
" \"GPT-4o Mini\": {\n",
|
||||
" \"model_id\": \"gpt-4o-mini\",\n",
|
||||
" \"GPT-5 Mini\": {\n",
|
||||
" \"model_id\": \"gpt-5-mini\",\n",
|
||||
" \"description\": \"Fast, cost-effective OpenAI model\",\n",
|
||||
" \"provider\": \"openai\",\n",
|
||||
" \"type\": \"commercial\"\n",
|
||||
" },\n",
|
||||
" \"Claude 3 Haiku\": {\n",
|
||||
" \"model_id\": \"claude-3-haiku-20240307\",\n",
|
||||
" \"description\": \"Good balance of speed and quality\",\n",
|
||||
" \"Claude 4.5 Haiku\": {\n",
|
||||
" \"model_id\": \"claude-4.5-haiku-20251001\",\n",
|
||||
" \"description\": \"Balance of speed and quality\",\n",
|
||||
" \"provider\": \"anthropic\", \n",
|
||||
" \"type\": \"commercial\"\n",
|
||||
" },\n",
|
||||
" \"Gemini 2.0 Flash\": {\n",
|
||||
" \"model_id\": \"gemini-2.0-flash-exp\",\n",
|
||||
" \"description\": \"Fast, multimodal capable Google model\",\n",
|
||||
" \"Gemini 2.5 Flash\": {\n",
|
||||
" \"model_id\": \"gemini-2.5-flash-lite\",\n",
|
||||
" \"description\": \"Fast Google model\",\n",
|
||||
" \"provider\": \"google\",\n",
|
||||
" \"type\": \"commercial\"\n",
|
||||
" },\n",
|
||||
" \"DeepSeek Chat\": {\n",
|
||||
" \"model_id\": \"deepseek-chat\",\n",
|
||||
" \"description\": \"Cost-effective alternative with good performance\",\n",
|
||||
" \"description\": \"Cost-effective with good performance\",\n",
|
||||
" \"provider\": \"deepseek\",\n",
|
||||
" \"type\": \"commercial\"\n",
|
||||
" },\n",
|
||||
" \"Grok 4\": {\n",
|
||||
" \"model_id\": \"grok-4\",\n",
|
||||
" \"description\": \"Grok 4\",\n",
|
||||
" \"provider\": \"grok\",\n",
|
||||
" \"type\": \"commercial\"\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
@@ -370,48 +397,15 @@
|
||||
" model_id = model_info[\"model_id\"]\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" if provider == \"openai\" and \"openai\" in clients:\n",
|
||||
" response = clients[\"openai\"].chat.completions.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=temperature\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" \n",
|
||||
" elif provider == \"anthropic\" and \"anthropic\" in clients:\n",
|
||||
" response = clients[\"anthropic\"].messages.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": user_prompt}],\n",
|
||||
" system=system_prompt,\n",
|
||||
" temperature=temperature,\n",
|
||||
" max_tokens=2000\n",
|
||||
" )\n",
|
||||
" return response.content[0].text\n",
|
||||
" \n",
|
||||
" elif provider == \"google\" and api_keys[\"google\"]:\n",
|
||||
" model = genai.GenerativeModel(model_id)\n",
|
||||
" response = model.generate_content(\n",
|
||||
" f\"{system_prompt}\\n\\n{user_prompt}\",\n",
|
||||
" generation_config=genai.types.GenerationConfig(temperature=temperature)\n",
|
||||
" )\n",
|
||||
" return response.text\n",
|
||||
" \n",
|
||||
" elif provider == \"deepseek\" and \"deepseek\" in clients:\n",
|
||||
" response = clients[\"deepseek\"].chat.completions.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=temperature\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" \n",
|
||||
" else:\n",
|
||||
" return f\"API client not available for {provider}\"\n",
|
||||
" response = clients[provider].chat.completions.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
||||
" {\"role\": \"user\", \"content\": user_prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=temperature\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error querying {model_name}: {str(e)}\"\n",
|
||||
@@ -580,49 +574,16 @@
|
||||
" model_id = model_info[\"model_id\"]\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" if provider == \"openai\" and \"openai\" in clients:\n",
|
||||
" response = clients[\"openai\"].chat.completions.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant that generates realistic datasets.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=temperature\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" response = clients[provider].chat.completions.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant that generates realistic datasets.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=temperature\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" \n",
|
||||
" elif provider == \"anthropic\" and \"anthropic\" in clients:\n",
|
||||
" response = clients[\"anthropic\"].messages.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
||||
" system=\"You are a helpful assistant that generates realistic datasets.\",\n",
|
||||
" temperature=temperature,\n",
|
||||
" max_tokens=4000\n",
|
||||
" )\n",
|
||||
" return response.content[0].text\n",
|
||||
" \n",
|
||||
" elif provider == \"google\" and api_keys[\"google\"]:\n",
|
||||
" model = genai.GenerativeModel(model_id)\n",
|
||||
" response = model.generate_content(\n",
|
||||
" prompt,\n",
|
||||
" generation_config=genai.types.GenerationConfig(temperature=temperature)\n",
|
||||
" )\n",
|
||||
" return response.text\n",
|
||||
" \n",
|
||||
" elif provider == \"deepseek\" and \"deepseek\" in clients:\n",
|
||||
" response = clients[\"deepseek\"].chat.completions.create(\n",
|
||||
" model=model_id,\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant that generates realistic datasets.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": prompt}\n",
|
||||
" ],\n",
|
||||
" temperature=temperature\n",
|
||||
" )\n",
|
||||
" return response.choices[0].message.content\n",
|
||||
" \n",
|
||||
" else:\n",
|
||||
" raise Exception(f\"API client not available for {provider}\")\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" raise Exception(f\"Commercial API error: {str(e)}\")\n",
|
||||
" \n",
|
||||
@@ -1671,7 +1632,7 @@
|
||||
" print(\"🔄 Testing OpenAI schema generation...\")\n",
|
||||
" result = schema_manager.generate_schema_with_llm(\n",
|
||||
" \"Generate a dataset for e-commerce customer analysis\",\n",
|
||||
" \"GPT-4o Mini\",\n",
|
||||
" \"GPT-5 Mini\",\n",
|
||||
" 0.7\n",
|
||||
" )\n",
|
||||
" print(f\"✅ OpenAI schema generation: {len(result)} characters\")\n",
|
||||
@@ -1845,8 +1806,22 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user