Week 2: Technical Assistant - Salah (Bootcamp)
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
from models.message import Message
|
||||
|
||||
class ConversationManager:
|
||||
def __init__(self, system_prompt):
|
||||
self.system_prompt = system_prompt
|
||||
self.messages = []
|
||||
|
||||
def add_user_message(self, content):
|
||||
print(f"[Conversation] Adding user message: {content[:100]}...")
|
||||
print(f"[Conversation] Message length: {len(content)} chars")
|
||||
self.messages.append(Message("user", content))
|
||||
print(f"[Conversation] Total messages: {len(self.messages)}")
|
||||
|
||||
def add_assistant_message(self, content):
|
||||
print(f"[Conversation] Adding assistant message: {content[:100]}...")
|
||||
print(f"[Conversation] Message length: {len(content)} chars")
|
||||
self.messages.append(Message("assistant", content))
|
||||
print(f"[Conversation] Total messages: {len(self.messages)}")
|
||||
|
||||
def get_api_messages(self):
|
||||
# Convert to format expected by APIs
|
||||
api_messages = [{"role": "system", "content": self.system_prompt}]
|
||||
for msg in self.messages:
|
||||
api_messages.append({"role": msg.role, "content": msg.content})
|
||||
|
||||
# Calculate total context size
|
||||
total_chars = sum(len(msg["content"]) for msg in api_messages)
|
||||
estimated_tokens = total_chars // 4 # Rough estimate
|
||||
|
||||
print(f"[Conversation] API messages prepared:")
|
||||
print(f" - Total messages: {len(api_messages)} (including system)")
|
||||
print(f" - Total characters: {total_chars}")
|
||||
print(f" - Estimated tokens: {estimated_tokens}")
|
||||
|
||||
return api_messages
|
||||
@@ -0,0 +1,124 @@
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
import tempfile
|
||||
import wave
|
||||
from interfaces.ai_client import AudioService
|
||||
|
||||
class GeminiAudioService(AudioService):
|
||||
def __init__(self, api_key, stt_model, tts_model, voice_name):
|
||||
self.client = genai.Client(api_key=api_key)
|
||||
self.stt_model = stt_model
|
||||
self.tts_model = tts_model
|
||||
self.voice_name = voice_name
|
||||
|
||||
def speech_to_text(self, audio_file):
|
||||
print(f"[Gemini STT] Processing audio file: {audio_file}")
|
||||
print(f"[Gemini STT] Model: {self.stt_model}")
|
||||
|
||||
try:
|
||||
# Get file size for logging
|
||||
import os
|
||||
file_size = os.path.getsize(audio_file)
|
||||
print(f"[Gemini STT] Audio file size: {file_size} bytes")
|
||||
|
||||
print("[Gemini STT] Uploading to Gemini...")
|
||||
uploaded_file = self.client.files.upload(file=audio_file)
|
||||
print(f"[Gemini STT] File uploaded: {uploaded_file.name}")
|
||||
|
||||
print("[Gemini STT] Transcribing...")
|
||||
response = self.client.models.generate_content(
|
||||
model=self.stt_model,
|
||||
contents=["Transcribe the speech in this audio file. Return only the spoken words, nothing else.", uploaded_file]
|
||||
)
|
||||
|
||||
text = response.text.strip()
|
||||
print(f"[Gemini STT] Transcription length: {len(text)} chars")
|
||||
print(f"[Gemini STT] Transcription: {text[:100]}...")
|
||||
|
||||
# Print usage information if available
|
||||
if hasattr(response, 'usage_metadata'):
|
||||
usage = response.usage_metadata
|
||||
input_tokens = usage.prompt_token_count
|
||||
output_tokens = usage.candidates_token_count
|
||||
total_tokens = usage.total_token_count
|
||||
|
||||
# Audio input cost: $3.00/1M tokens, text output: $2.50/1M tokens
|
||||
cost = (input_tokens * 3.00 + output_tokens * 2.50) / 1_000_000
|
||||
|
||||
print(f"[Gemini STT] Token usage:")
|
||||
print(f" - Input tokens (audio): {input_tokens}")
|
||||
print(f" - Output tokens (text): {output_tokens}")
|
||||
print(f" - Total tokens: {total_tokens}")
|
||||
print(f" - Estimated cost: ${cost:.6f}")
|
||||
|
||||
print("[Gemini STT] Success")
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Gemini STT] Error: {e}")
|
||||
return None
|
||||
|
||||
def text_to_speech(self, text):
|
||||
print(f"[Gemini TTS] Converting text to speech")
|
||||
print(f"[Gemini TTS] Model: {self.tts_model}, Voice: {self.voice_name}")
|
||||
print(f"[Gemini TTS] Input text length: {len(text)} chars")
|
||||
|
||||
try:
|
||||
# Keep it short for TTS
|
||||
text_to_speak = text[:500] if len(text) > 500 else text
|
||||
if len(text) > 500:
|
||||
print(f"[Gemini TTS] Text truncated to 500 chars")
|
||||
|
||||
print(f"[Gemini TTS] Text preview: {text_to_speak[:100]}...")
|
||||
print("[Gemini TTS] Generating audio...")
|
||||
|
||||
response = self.client.models.generate_content(
|
||||
model=self.tts_model,
|
||||
contents=f"Say: {text_to_speak}",
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=["AUDIO"],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
||||
voice_name=self.voice_name,
|
||||
)
|
||||
)
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
pcm_data = response.candidates[0].content.parts[0].inline_data.data
|
||||
print(f"[Gemini TTS] Raw PCM data size: {len(pcm_data)} bytes")
|
||||
|
||||
# Print usage information if available
|
||||
if hasattr(response, 'usage_metadata'):
|
||||
usage = response.usage_metadata
|
||||
input_tokens = usage.prompt_token_count
|
||||
output_tokens = usage.candidates_token_count
|
||||
total_tokens = usage.total_token_count
|
||||
|
||||
# Text input: $0.30/1M tokens, audio output: $10.00/1M tokens
|
||||
cost = (input_tokens * 0.30 + output_tokens * 10.00) / 1_000_000
|
||||
|
||||
print(f"[Gemini TTS] Token usage:")
|
||||
print(f" - Input tokens (text): {input_tokens}")
|
||||
print(f" - Output tokens (audio): {output_tokens}")
|
||||
print(f" - Total tokens: {total_tokens}")
|
||||
print(f" - Estimated cost: ${cost:.6f}")
|
||||
|
||||
# Create WAV file
|
||||
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
||||
with wave.open(temp_file.name, 'wb') as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(24000)
|
||||
wav_file.writeframes(pcm_data)
|
||||
|
||||
temp_file.close()
|
||||
print(f"[Gemini TTS] WAV file created: {temp_file.name}")
|
||||
print("[Gemini TTS] Success")
|
||||
return temp_file.name
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Gemini TTS] Error: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,91 @@
|
||||
from openai import OpenAI
|
||||
from interfaces.ai_client import AIClient
|
||||
|
||||
class OpenRouterClient(AIClient):
|
||||
def __init__(self, api_key, model):
|
||||
self.client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
self.model = model
|
||||
|
||||
def chat(self, messages):
|
||||
print(f"[OpenRouter] Calling {self.model}")
|
||||
print(f"[OpenRouter] Messages count: {len(messages)}")
|
||||
|
||||
# Calculate input tokens estimate (rough)
|
||||
total_chars = sum(len(msg.get('content', '')) for msg in messages)
|
||||
estimated_tokens = total_chars // 4 # Rough estimate
|
||||
print(f"[OpenRouter] Estimated input tokens: {estimated_tokens}")
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
extra_body={
|
||||
"usage": {
|
||||
"include": True
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content
|
||||
print(f"[OpenRouter] Response length: {len(content)} chars")
|
||||
print(f"[OpenRouter] Response preview: {content[:100]}...")
|
||||
|
||||
# Print usage information if available
|
||||
if hasattr(response, 'usage') and response.usage:
|
||||
usage = response.usage
|
||||
print(f"[OpenRouter] Token usage:")
|
||||
print(f" - Prompt tokens: {usage.prompt_tokens}")
|
||||
print(f" - Completion tokens: {usage.completion_tokens}")
|
||||
print(f" - Total tokens: {usage.total_tokens}")
|
||||
|
||||
# Try to get cost information
|
||||
if hasattr(usage, 'cost') and usage.cost:
|
||||
print(f" - Cost: ${usage.cost:.6f}")
|
||||
else:
|
||||
# Rough cost estimate for GPT-4o-mini ($0.15/1M input, $0.60/1M output)
|
||||
estimated_cost = (usage.prompt_tokens * 0.15 + usage.completion_tokens * 0.60) / 1_000_000
|
||||
print(f" - Estimated cost: ${estimated_cost:.6f}")
|
||||
|
||||
print(f"[OpenRouter] Success")
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
print(f"[OpenRouter] Error: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
def analyze_code(self, code, language):
|
||||
print(f"[OpenRouter] Code analysis request - Language: {language}")
|
||||
print(f"[OpenRouter] Code length: {len(code)} chars, {len(code.splitlines())} lines")
|
||||
|
||||
prompt = f"Analyze this {language} code for bugs and improvements:\n\n```{language}\n{code}\n```"
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
return self.chat(messages)
|
||||
|
||||
def generate_linkedin_post(self, topic, tone="professional"):
|
||||
print(f"[OpenRouter] LinkedIn post request - Topic: {topic[:50]}...")
|
||||
print(f"[OpenRouter] Tone: {tone}")
|
||||
|
||||
tone_styles = {
|
||||
"professional": "formal, informative, and industry-focused",
|
||||
"casual": "friendly, approachable, and conversational",
|
||||
"inspirational": "motivating, uplifting, and thought-provoking",
|
||||
"educational": "informative, teaching-focused, and valuable"
|
||||
}
|
||||
|
||||
style = tone_styles.get(tone, "professional and engaging")
|
||||
|
||||
prompt = f"""Create a LinkedIn post about: {topic}
|
||||
|
||||
Make it {style}. Include:
|
||||
- Hook that grabs attention
|
||||
- 2-3 key insights or takeaways
|
||||
- Call to action or question for engagement
|
||||
- Relevant hashtags (3-5)
|
||||
|
||||
Keep it under 300 words and format for LinkedIn readability."""
|
||||
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
return self.chat(messages)
|
||||
Reference in New Issue
Block a user