import os import json import pandas as pd import numpy as np from pathlib import Path import re from datetime import datetime, timedelta from bs4 import BeautifulSoup import html2text from collections import Counter, defaultdict, deque import warnings import time import hashlib import socket import random import zipfile import tempfile import shutil warnings.filterwarnings('ignore') import gradio as gr import chromadb from sentence_transformers import SentenceTransformer from transformers import AutoModelForSequenceClassification, AutoTokenizer from openai import OpenAI import torch # ================================ # USAGE PROTECTION SYSTEM # ================================ class UsageTracker: def __init__(self): self.hourly_limits = defaultdict(lambda: deque()) self.daily_limits = defaultdict(int) self.total_requests = 0 self.total_cost = 0.0 # STRICTER LIMITS for cost control self.max_hourly = 5 # Reduced from 15 self.max_daily = 20 # Reduced from 100 self.max_total = 200 # Reduced from 1000 self.max_daily_cost = 3.0 # $3 daily limit # GPT-4o-mini pricing (approximate cost per request) self.cost_per_request = 0.01 # ~1 cent per request (conservative estimate) def can_make_request(self, user_id): now = datetime.now() hour_ago = now - timedelta(hours=1) # Clean old hourly requests while self.hourly_limits[user_id] and self.hourly_limits[user_id][0] < hour_ago: self.hourly_limits[user_id].popleft() # Check limits if len(self.hourly_limits[user_id]) >= self.max_hourly: return False, f"ā° Hourly limit reached ({self.max_hourly} requests/hour). Please try again in a few minutes." if self.daily_limits[user_id] >= self.max_daily: return False, f"šŸ“… Daily limit reached ({self.max_daily} requests/day). Come back tomorrow!" if self.total_requests >= self.max_total: return False, "🚫 Service temporarily unavailable due to high usage. Please try again later." # Check estimated daily cost if self.total_cost >= self.max_daily_cost: return False, f"šŸ’° Daily cost limit (${self.max_daily_cost}) reached. Service will reset tomorrow." return True, "OK" def record_request(self, user_id): now = datetime.now() self.hourly_limits[user_id].append(now) self.daily_limits[user_id] += 1 self.total_requests += 1 self.total_cost += self.cost_per_request # Track estimated cost def get_usage_info(self): """Get current usage info for display""" return f""" **šŸ“Š Current Usage:** - Total requests today: {self.total_requests}/{self.max_total} - Estimated cost today: ${self.total_cost:.2f}/${self.max_daily_cost} - Service status: {'🟢 Active' if self.total_requests < self.max_total and self.total_cost < self.max_daily_cost else 'šŸ”“ Limited'} """ # Initialize tracker - ADD THIS LINE! usage_tracker = UsageTracker() def protected_function(func): def wrapper(*args, **kwargs): user_id = hashlib.md5(str(time.time()).encode()).hexdigest()[:8] allowed, message = usage_tracker.can_make_request(user_id) if not allowed: return f"āš ļø {message}. Please try again later." usage_tracker.record_request(user_id) return func(*args, **kwargs) return wrapper # ================================ # LINKEDIN DATA PROCESSOR # ================================ class LinkedInDataProcessor: def __init__(self, data_path): self.data_path = Path(data_path) self.profile_data = {} self.processed_data = {} self.articles_content = [] self.rag_documents = [] def load_all_data(self): """Load all LinkedIn JSON and CSV files including HTML articles""" print("šŸ”„ Loading LinkedIn data...") file_mappings = { 'Profile.csv': 'basic_info', 'Connections.csv': 'connections', 'Experience.csv': 'experience', 'Education.csv': 'education', 'Skills.csv': 'skills', 'Certifications.csv': 'certifications', 'Articles.csv': 'articles_metadata', 'Comments.csv': 'comments', 'Shares.csv': 'shares', 'Positions.csv': 'positions', 'Languages.csv': 'languages', 'Projects.csv': 'projects', 'Publications.csv': 'publications', 'Recommendations.csv': 'recommendations', 'Endorsement_Given_Info.csv': 'endorsements_given', 'Endorsement_Received_Info.csv': 'endorsements_received', 'Courses.csv': 'courses', 'Learning.csv': 'learning_paths', 'Interests.csv': 'interests', 'Company Follow.csv': 'companies_followed', 'Reactions.csv': 'reactions', 'Views.csv': 'views', 'Saved_Items.csv': 'saved_items', } loaded_count = 0 for file_name, data_type in file_mappings.items(): file_path = self.data_path / file_name if file_path.exists(): try: df = pd.read_csv(file_path, encoding='utf-8') self.profile_data[data_type] = df print(f"āœ… Loaded {file_name}: {len(df)} records") loaded_count += 1 except Exception as e: print(f"āš ļø Could not load {file_name}: {str(e)}") else: print(f"šŸ“ {file_name} not found") self.load_html_articles() print(f"šŸŽ‰ Successfully loaded {loaded_count} data files") return loaded_count > 0 def load_html_articles(self): """Load and parse HTML articles""" print("\nšŸ“° Loading HTML articles...") articles_paths = [ self.data_path / "Articles" / "Articles", self.data_path / "Articles", self.data_path / "articles" / "articles", self.data_path / "articles", ] found_path = None for path in articles_paths: if path.exists(): found_path = path break if not found_path: print("šŸ“ Articles folder not found") return html_files = list(found_path.glob("*.html")) if not html_files: print("šŸ“„ No HTML files found") return print(f"šŸ“„ Found {len(html_files)} HTML articles") articles_data = [] for html_file in html_files: try: article_data = self.parse_html_article(html_file) if article_data: articles_data.append(article_data) except Exception as e: print(f"āš ļø Error parsing {html_file.name}: {str(e)}") self.articles_content = articles_data self.profile_data['articles_html'] = articles_data print(f"šŸŽ‰ Successfully loaded {len(articles_data)} articles") def extract_linkedin_url_from_html(self, html_content, filename): """Extract LinkedIn URL from HTML article content""" try: soup = BeautifulSoup(html_content, 'html.parser') # Look for canonical URL canonical = soup.find('link', {'rel': 'canonical'}) if canonical and canonical.get('href'): url = canonical.get('href') if 'linkedin.com' in url: return url # Look for meta property og:url og_url = soup.find('meta', {'property': 'og:url'}) if og_url and og_url.get('content'): url = og_url.get('content') if 'linkedin.com' in url: return url # Look for any LinkedIn URLs in the content linkedin_pattern = r'https?://(?:www\.)?linkedin\.com/pulse/[^"\s<>]+' matches = re.findall(linkedin_pattern, html_content) if matches: return matches[0] # Fallback: construct URL from filename if filename: clean_name = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d+-', '', filename) clean_name = clean_name.replace('.html', '') if len(clean_name) > 10 and '-' in clean_name: return f"https://www.linkedin.com/pulse/{clean_name}/" return None except Exception as e: print(f"Error extracting LinkedIn URL: {e}") return None def parse_html_article(self, file_path): """Parse individual HTML article with LinkedIn URL extraction""" with open(file_path, 'r', encoding='utf-8') as f: content = f.read() soup = BeautifulSoup(content, 'html.parser') # Extract title title_elem = soup.find('h1') or soup.find('title') title = title_elem.get_text().strip() if title_elem else self.extract_title_from_filename(file_path.name) # Extract LinkedIn URL linkedin_url = self.extract_linkedin_url_from_html(content, file_path.name) # Extract content content_selectors = ['article', '.article-content', '.post-content', 'main', '.content', 'body'] article_content = None for selector in content_selectors: article_content = soup.select_one(selector) if article_content: break if not article_content: article_content = soup.find('body') or soup # Convert to plain text h = html2text.HTML2Text() h.ignore_links = True h.ignore_images = True plain_text = h.handle(str(article_content)).strip() # Extract metadata words = re.findall(r'\b\w+\b', plain_text.lower()) return { 'filename': file_path.name, 'title': title, 'content': str(article_content), 'plain_text': plain_text, 'date_published': self.extract_date_from_filename(file_path.name), 'word_count': len(words), 'topics': self.extract_topics(plain_text), 'writing_style': self.analyze_writing_style(plain_text), 'linkedin_url': linkedin_url } def extract_title_from_filename(self, filename): """Extract readable title from filename""" title = filename.replace('.html', '') title = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d+-', '', title) title = title.replace('-', ' ').replace('_', ' ') return ' '.join(word.capitalize() for word in title.split()) def extract_date_from_filename(self, filename): """Extract publication date from filename""" date_match = re.search(r'(\d{4}-\d{2}-\d{2})', filename) return date_match.group(1) if date_match else '' def analyze_writing_style(self, text): """Analyze writing style indicators""" text_lower = text.lower() sentences = re.split(r'[.!?]+', text) words = re.findall(r'\b\w+\b', text_lower) return { 'word_count': len(words), 'sentence_count': len(sentences), 'avg_sentence_length': len(words) / max(len(sentences), 1), 'question_count': text.count('?'), 'first_person_usage': len(re.findall(r'\b(i|me|my|myself|we|us|our)\b', text_lower)), 'technical_terms': sum(text_lower.count(term) for term in ['algorithm', 'framework', 'methodology', 'data', 'analysis', 'technology']), } def extract_topics(self, text, max_topics=10): """Extract main topics from text""" words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower()) stop_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'been', 'have', 'has', 'had'} word_freq = Counter(word for word in words if word not in stop_words and len(word) > 3) return [word for word, count in word_freq.most_common(max_topics)] def create_rag_documents(self): """Create documents for RAG system with LinkedIn URLs""" self.rag_documents = [] # Process profile data for data_type, data_content in self.profile_data.items(): if isinstance(data_content, pd.DataFrame) and not data_content.empty: self.process_dataframe_to_documents(data_content, data_type) elif isinstance(data_content, list) and data_content: self.process_list_to_documents(data_content, data_type) # Process articles with LinkedIn URLs if self.articles_content: for article in self.articles_content: if article['plain_text'].strip(): self.rag_documents.append({ 'text': article['plain_text'], 'title': article['title'], 'source_type': 'article', 'date_published': article['date_published'], 'word_count': article['word_count'], 'topics': article['topics'], 'linkedin_url': article.get('linkedin_url', ''), 'filename': article['filename'] }) print(f"šŸ“š Created {len(self.rag_documents)} RAG documents with LinkedIn URLs") return self.rag_documents def process_dataframe_to_documents(self, df, data_type): """Convert DataFrame to RAG documents""" if data_type == 'experience': for _, row in df.iterrows(): text = f"Experience: {row.get('Title', '')} at {row.get('Company', '')}\n" text += f"Duration: {row.get('Started On', '')} - {row.get('Finished On', 'Present')}\n" text += f"Description: {row.get('Description', '')}" self.rag_documents.append({ 'text': text, 'title': f"{row.get('Title', '')} at {row.get('Company', '')}", 'source_type': 'experience', 'linkedin_url': '' }) elif data_type == 'education': for _, row in df.iterrows(): text = f"Education: {row.get('Degree', '')} in {row.get('Field Of Study', '')} from {row.get('School', '')}\n" text += f"Duration: {row.get('Start Date', '')} - {row.get('End Date', '')}" self.rag_documents.append({ 'text': text, 'title': f"{row.get('Degree', '')} - {row.get('School', '')}", 'source_type': 'education', 'linkedin_url': '' }) elif data_type == 'skills': if 'Skill' in df.columns: skills_text = "Professional Skills: " + ", ".join(df['Skill'].dropna().tolist()) self.rag_documents.append({ 'text': skills_text, 'title': 'Professional Skills', 'source_type': 'skills', 'linkedin_url': '' }) elif data_type == 'certifications': if 'Name' in df.columns: certs_text = "Certifications: " + ", ".join(df['Name'].dropna().tolist()) self.rag_documents.append({ 'text': certs_text, 'title': 'Certifications', 'source_type': 'certifications', 'linkedin_url': '' }) elif data_type == 'projects': for _, row in df.iterrows(): text = f"Project: {row.get('Title', '')}\n" text += f"Description: {row.get('Description', '')}\n" text += f"URL: {row.get('Url', '')}" project_url = row.get('Url', '') linkedin_url = project_url if 'linkedin.com' in str(project_url) else '' self.rag_documents.append({ 'text': text, 'title': row.get('Title', 'Project'), 'source_type': 'projects', 'linkedin_url': linkedin_url }) def process_list_to_documents(self, data_list, data_type): """Convert list data to RAG documents""" if data_type == 'articles_html': return def get_profile_summary(self): """Get comprehensive profile summary""" summary = { 'total_documents': len(self.rag_documents), 'articles_count': len(self.articles_content), 'data_types': list(self.profile_data.keys()), 'skills_count': len(self.profile_data.get('skills', [])), 'experience_count': len(self.profile_data.get('experience', [])), 'education_count': len(self.profile_data.get('education', [])), } if self.articles_content: total_words = sum(article['word_count'] for article in self.articles_content) summary['total_article_words'] = total_words summary['avg_article_length'] = total_words // len(self.articles_content) return summary # ================================ # RAG SYSTEM # ================================ class LinkedInRAGSystem: def __init__(self, chroma_db_path): self.chroma_db_path = chroma_db_path self.embedding_model = None self.cross_encoder_model = None self.cross_encoder_tokenizer = None self.chroma_client = None self.collection = None self.openai_client = None def initialize_models(self): """Initialize all required models""" print("šŸ”„ Initializing RAG models...") # Initialize OpenAI client try: api_key = os.getenv('OPENAI_API_KEY') if not api_key: print("āŒ OpenAI API key not found in environment variables") return False self.openai_client = OpenAI(api_key=api_key) print("āœ… OpenAI client initialized") except Exception as e: print(f"āŒ Failed to initialize OpenAI client: {e}") return False # Load embedding model try: self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2') print("āœ… Embedding model loaded") except Exception as e: print(f"āŒ Failed to load embedding model: {e}") return False # Load cross-encoder for reranking try: cross_encoder_name = "cross-encoder/ms-marco-MiniLM-L-6-v2" self.cross_encoder_tokenizer = AutoTokenizer.from_pretrained(cross_encoder_name) self.cross_encoder_model = AutoModelForSequenceClassification.from_pretrained(cross_encoder_name) print("āœ… Cross-encoder model loaded") except Exception as e: print(f"āŒ Failed to load cross-encoder: {e}") return False # Initialize ChromaDB try: self.chroma_client = chromadb.PersistentClient(path=self.chroma_db_path) print("āœ… ChromaDB initialized") except Exception as e: print(f"āŒ Failed to initialize ChromaDB: {e}") return False return True def create_vector_store(self, documents): """Create vector store from documents with enhanced metadata""" print("šŸ”„ Creating vector store with LinkedIn URLs...") # Delete existing collection if it exists try: self.chroma_client.delete_collection("linkedin_profile") except: pass # Create new collection self.collection = self.chroma_client.create_collection("linkedin_profile") # Generate embeddings texts = [doc['text'] for doc in documents] embeddings = self.embedding_model.encode(texts, show_progress_bar=True) # Prepare data for ChromaDB with enhanced metadata ids = [f"doc_{i}" for i in range(len(documents))] metadatas = [] for doc in documents: metadata = {} for k, v in doc.items(): if k != 'text': if k == 'linkedin_url' and v: metadata[k] = str(v) elif k == 'date_published' and v: metadata[k] = str(v) elif k == 'topics' and isinstance(v, list): metadata[k] = ', '.join(v) if v else '' elif v is not None: metadata[k] = str(v) else: metadata[k] = '' metadatas.append(metadata) # Add to collection batch_size = 100 for i in range(0, len(texts), batch_size): end_idx = min(i + batch_size, len(texts)) self.collection.add( embeddings=embeddings[i:end_idx].tolist(), documents=texts[i:end_idx], metadatas=metadatas[i:end_idx], ids=ids[i:end_idx] ) print(f"āœ… Vector store created with {self.collection.count()} documents") return True def retrieve_and_rerank(self, query, initial_k=20, final_n=5): """Retrieve and rerank documents""" if not self.collection: return [] try: # Initial retrieval query_embedding = self.embedding_model.encode(query).tolist() results = self.collection.query( query_embeddings=[query_embedding], n_results=initial_k, include=['documents', 'metadatas'] ) if not results['documents'][0]: return [] # Prepare for reranking documents = results['documents'][0] metadatas = results['metadatas'][0] # Rerank with cross-encoder pairs = [[query, doc] for doc in documents] inputs = self.cross_encoder_tokenizer( pairs, padding=True, truncation=True, return_tensors='pt', max_length=512 ) with torch.no_grad(): scores = self.cross_encoder_model(**inputs).logits.squeeze() if scores.dim() == 0: scores = [scores.item()] else: scores = scores.tolist() # Sort by score scored_docs = list(zip(documents, metadatas, scores)) scored_docs.sort(key=lambda x: x[2], reverse=True) # Return top documents return [{'text': doc, 'metadata': meta, 'score': score} for doc, meta, score in scored_docs[:final_n]] except Exception as e: print(f"Error in retrieve_and_rerank: {e}") return [] def generate_response(self, query, retrieved_docs): """Generate response using OpenAI""" if not retrieved_docs: return "I couldn't find relevant information to answer your question." context = "\n\n".join([doc['text'] for doc in retrieved_docs]) messages = [ { "role": "system", "content": """You are an AI assistant representing a LinkedIn profile. Answer questions based ONLY on the provided context from the LinkedIn profile data and articles. Guidelines: - Be professional and personable - Provide specific details when available - If information isn't in the context, politely say so - Use first person when appropriate (since you're representing the profile owner) - Keep responses concise but informative - Do not mention or reference the sources in your response - that will be handled separately""" }, { "role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}\n\nPlease answer based on the LinkedIn profile information provided:" } ] try: response = self.openai_client.chat.completions.create( model="gpt-4o-mini", messages=messages, max_tokens=400, temperature=0.3, top_p=0.9 ) return response.choices[0].message.content.strip() except Exception as e: return f"Sorry, I encountered an error generating a response: {str(e)}" def format_sources_with_links(self, retrieved_docs): """Format sources with clickable LinkedIn links""" if not retrieved_docs: return "" sources_html = "

**šŸ“š Sources:**
" for i, doc in enumerate(retrieved_docs, 1): metadata = doc['metadata'] source_type = metadata.get('source_type', 'Unknown') title = metadata.get('title', 'Untitled') linkedin_url = metadata.get('linkedin_url', '') date_published = metadata.get('date_published', '') # Create source entry if linkedin_url: # Clickable LinkedIn link source_entry = f"šŸ”— {title}" if date_published: source_entry += f" ({date_published})" else: # No link available source_entry = f"šŸ“„ **{title}**" if date_published: source_entry += f" ({date_published})" # Add source type badge type_color = { 'article': '#0077B5', 'experience': '#2D7D32', 'education': '#7B1FA2', 'skills': '#F57C00', 'projects': '#D32F2F', 'certifications': '#1976D2' }.get(source_type, '#666') source_type_badge = f"{source_type.title()}" sources_html += f"{i}. {source_entry}{source_type_badge}
" return sources_html def chat(self, query): """Main chat function with enhanced source linking""" retrieved_docs = self.retrieve_and_rerank(query) response = self.generate_response(query, retrieved_docs) # Add formatted sources with links sources_info = self.format_sources_with_links(retrieved_docs) return response + sources_info # ================================ # UTILITY FUNCTIONS # ================================ def extract_uploaded_data(zip_file_path, extract_to): """Extract uploaded LinkedIn data zip file""" try: with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: zip_ref.extractall(extract_to) print(f"āœ… Extracted data to {extract_to}") return True except Exception as e: print(f"āŒ Failed to extract zip file: {e}") return False def initialize_linkedin_chatbot(data_path): """Initialize the complete LinkedIn chatbot system with clickable sources""" print("šŸš€ Initializing LinkedIn Profile Chatbot with clickable sources...") # Step 1: Load and process data processor = LinkedInDataProcessor(data_path) if not processor.load_all_data(): return None, "Failed to load LinkedIn data. Please check the uploaded data." # Step 2: Create RAG documents with LinkedIn URLs documents = processor.create_rag_documents() if not documents: return None, "No documents created from LinkedIn data." # Count articles with LinkedIn URLs articles_with_urls = sum(1 for doc in documents if doc.get('linkedin_url') and doc.get('source_type') == 'article') # Step 3: Initialize RAG system temp_db_path = tempfile.mkdtemp() rag_system = LinkedInRAGSystem(temp_db_path) if not rag_system.initialize_models(): return None, "Failed to initialize RAG models." # Step 4: Create vector store if not rag_system.create_vector_store(documents): return None, "Failed to create vector store." # Step 5: Get profile summary summary = processor.get_profile_summary() # Create a clean status message summary_text = f""" ### āœ… **AI Assistant Ready with Clickable Sources!** I have successfully analyzed the LinkedIn profile data including **{summary['total_documents']} documents** and **{summary['articles_count']} published articles** ({articles_with_urls} with direct LinkedIn links). **šŸ’¼ What I can help you discover:** - šŸŽÆ **Professional Journey** - Career progression and experience - šŸ› ļø **Skills & Expertise** - Technical and professional capabilities - šŸŽ“ **Educational Background** - Academic achievements and learning - šŸ“ **Published Content** - Articles with direct LinkedIn links - šŸš€ **Projects & Achievements** - Notable work and accomplishments - 🌐 **Professional Network** - Industry connections and activities **šŸ”— Enhanced Features:** - **Clickable Sources** - Direct links to LinkedIn articles and content - **Smart Source Attribution** - See exactly where information comes from - **Professional Context** - Answers based on real LinkedIn profile data **Ready to explore this professional profile!** Ask me anything you'd like to know. """ return rag_system, summary_text # ================================ # GRADIO INTERFACE # ================================ # Global variables current_rag_system = None current_status = "Upload your LinkedIn data to get started!" # Add this anywhere in your Gradio interface after the status_display usage_info = gr.Markdown(value=usage_tracker.get_usage_info()) def process_upload(zip_file): """Process uploaded LinkedIn data""" global current_rag_system, current_status if zip_file is None: return "Please upload a LinkedIn data ZIP file first.", "" try: # Create temporary directory for extraction temp_dir = tempfile.mkdtemp() # Extract the uploaded file if extract_uploaded_data(zip_file.name, temp_dir): # Initialize the RAG system rag_system, status_message = initialize_linkedin_chatbot(temp_dir) if rag_system: current_rag_system = rag_system current_status = status_message return status_message, "āœ… **Ready to chat!** Ask me anything about the LinkedIn profile." else: return f"āŒ Failed to initialize: {status_message}", "" else: return "āŒ Failed to extract uploaded file.", "" except Exception as e: return f"āŒ Error processing upload: {str(e)}", "" @protected_function def chat_with_profile(message, history): """Chat function with protection""" global current_rag_system if current_rag_system is None: bot_response = "āŒ **Please upload your LinkedIn data first using the file upload above.**" history.append((message, bot_response)) return history, "" if not message.strip(): bot_response = "šŸ‘‹ Please enter a question about the LinkedIn profile!" history.append((message, bot_response)) return history, "" try: bot_response = current_rag_system.chat(message) history.append((message, bot_response)) except Exception as e: bot_response = f"āŒ **Error**: {str(e)}" history.append((message, bot_response)) return history, "" # Premium CSS premium_css = """ /* Import Google Fonts */ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); /* Main container styling */ .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; } /* Header styling */ .main-header { background: linear-gradient(135deg, #0077B5 0%, #00A0DC 50%, #40E0D0 100%); color: white; padding: 2rem; border-radius: 20px; margin-bottom: 2rem; text-align: center; box-shadow: 0 10px 30px rgba(0,119,181,0.3); border: 1px solid rgba(255,255,255,0.2); backdrop-filter: blur(10px); } .main-header h1 { font-size: 2.5rem; font-weight: 700; margin-bottom: 0.5rem; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); } .main-header p { font-size: 1.2rem; opacity: 0.95; font-weight: 400; } /* Status card styling */ .status-card { background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%); border-radius: 16px; padding: 1.5rem; margin-bottom: 2rem; box-shadow: 0 8px 25px rgba(0,0,0,0.1); border: 1px solid rgba(0,119,181,0.1); } /* Chat container */ .chat-container { background: white; border-radius: 20px; padding: 1.5rem; box-shadow: 0 10px 40px rgba(0,0,0,0.1); border: 1px solid rgba(0,119,181,0.1); max-width: 900px; margin: 0 auto; } /* Upload container */ .upload-container { background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); border-radius: 16px; padding: 1.5rem; margin-bottom: 2rem; border: 2px dashed #0077B5; } /* Button styling */ .primary-btn { background: linear-gradient(135deg, #0077B5 0%, #00A0DC 100%); color: white; border: none; border-radius: 12px; padding: 0.75rem 1.5rem; font-weight: 600; transition: all 0.3s ease; box-shadow: 0 4px 15px rgba(0,119,181,0.3); } .primary-btn:hover { transform: translateY(-2px); box-shadow: 0 6px 20px rgba(0,119,181,0.4); } /* Example buttons */ .example-btn { background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%); color: #0077B5; border: 1px solid #0077B5; border-radius: 25px; padding: 0.6rem 1.2rem; font-weight: 500; margin: 0.3rem; transition: all 0.3s ease; font-size: 0.9rem; } .example-btn:hover { background: linear-gradient(135deg, #0077B5 0%, #00A0DC 100%); color: white; transform: translateY(-1px); box-shadow: 0 4px 12px rgba(0,119,181,0.3); } /* Input styling */ .input-text { border: 2px solid #e1e8ed; border-radius: 12px; padding: 1rem; font-size: 1rem; transition: all 0.3s ease; background: #f8fafc; } .input-text:focus { border-color: #0077B5; box-shadow: 0 0 0 3px rgba(0,119,181,0.1); background: white; } /* Chatbot styling */ .chatbot { border: none; border-radius: 16px; box-shadow: inset 0 2px 10px rgba(0,0,0,0.05); } /* Accordion styling */ .accordion { background: linear-gradient(135deg, #f8fafc 0%, #e1e8ed 100%); border-radius: 12px; border: 1px solid #e1e8ed; } """ # Create Gradio interface with gr.Blocks(css=premium_css, title="LinkedIn Profile AI Assistant", theme=gr.themes.Soft()) as interface: # Main Header gr.HTML("""

šŸ¤– LinkedIn Profile AI Assistant

Intelligent insights with clickable sources to original LinkedIn content

""") # Upload Section with gr.Column(elem_classes=["upload-container"]): gr.Markdown("### šŸ“ **Upload Your LinkedIn Data**") gr.Markdown("Upload your LinkedIn data export ZIP file to get started. [Learn how to export your LinkedIn data](https://www.linkedin.com/help/linkedin/answer/a1339364)") with gr.Row(): upload_file = gr.File( label="LinkedIn Data ZIP File", file_types=[".zip"], type="filepath" ) upload_btn = gr.Button( "šŸš€ Process Data", variant="primary", elem_classes=["primary-btn"] ) # Status Display status_display = gr.Markdown( value="šŸ“ **Upload your LinkedIn data ZIP file above to get started!**", elem_classes=["status-card"] ) chat_status = gr.Markdown( value="", elem_classes=["status-card"] ) # Main Chat Interface with gr.Column(elem_classes=["chat-container"]): # Chat Display chatbot = gr.Chatbot( label="šŸ’¬ Professional Profile Assistant", height=550, show_copy_button=True, avatar_images=("šŸ‘¤", "šŸ¤–"), bubble_full_width=False, elem_classes=["chatbot"] ) # Input Section with gr.Row(): with gr.Column(scale=5): msg = gr.Textbox( placeholder="Ask about experience, skills, education, articles, or any aspect of the professional profile...", label="Your Question", lines=2, max_lines=4, elem_classes=["input-text"] ) with gr.Column(scale=1, min_width=100): submit_btn = gr.Button( "Send šŸ’¬", variant="primary", size="lg", elem_classes=["primary-btn"] ) # Quick Action Buttons with gr.Row(): clear_btn = gr.Button("šŸ—‘ļø Clear Chat", variant="secondary", size="sm") # Enhanced Examples Section with gr.Accordion("šŸ’” Example Questions - Click to Try", open=False, elem_classes=["accordion"]) as examples_accordion: gr.Markdown("### šŸŽÆ **Professional Experience & Career**") with gr.Row(): exp_q1 = gr.Button("What is the professional background?", elem_classes=["example-btn"], size="sm") exp_q2 = gr.Button("Describe the career progression", elem_classes=["example-btn"], size="sm") exp_q3 = gr.Button("What are the key achievements?", elem_classes=["example-btn"], size="sm") gr.Markdown("### šŸ› ļø **Skills & Expertise**") with gr.Row(): skill_q1 = gr.Button("What skills and expertise are highlighted?", elem_classes=["example-btn"], size="sm") skill_q2 = gr.Button("What technologies are mentioned?", elem_classes=["example-btn"], size="sm") skill_q3 = gr.Button("What are the main areas of expertise?", elem_classes=["example-btn"], size="sm") gr.Markdown("### šŸ“š **Education & Learning**") with gr.Row(): edu_q1 = gr.Button("Tell me about the educational background", elem_classes=["example-btn"], size="sm") edu_q2 = gr.Button("What certifications are mentioned?", elem_classes=["example-btn"], size="sm") edu_q3 = gr.Button("What courses or learning paths are included?", elem_classes=["example-btn"], size="sm") gr.Markdown("### šŸ“ **Articles & Content**") with gr.Row(): content_q1 = gr.Button("What articles have been published?", elem_classes=["example-btn"], size="sm") content_q2 = gr.Button("What topics are covered in the writing?", elem_classes=["example-btn"], size="sm") content_q3 = gr.Button("What is the writing style like?", elem_classes=["example-btn"], size="sm") # Connect example buttons to input example_questions = [ (exp_q1, "What is the professional background and experience?"), (exp_q2, "Describe the career progression and professional journey"), (exp_q3, "What are the key achievements and accomplishments?"), (skill_q1, "What skills and expertise are highlighted in the profile?"), (skill_q2, "What technologies, tools, and platforms are mentioned?"), (skill_q3, "What are the main areas of expertise and specialization?"), (edu_q1, "Tell me about the educational background and qualifications"), (edu_q2, "What certifications and professional credentials are mentioned?"), (edu_q3, "What courses, training, or learning paths are included?"), (content_q1, "What articles and content have been published?"), (content_q2, "What topics and themes are covered in the published writing?"), (content_q3, "What is the writing style and approach in the articles?") ] for btn, question in example_questions: btn.click(lambda q=question: q, outputs=msg) # About Section with gr.Accordion("ā„¹ļø About This AI Assistant", open=False, elem_classes=["accordion"]): gr.Markdown(""" ### šŸš€ **Advanced AI-Powered Profile Analysis with Clickable Sources** This intelligent assistant uses cutting-edge **Retrieval-Augmented Generation (RAG)** technology to provide accurate, contextual answers about LinkedIn profiles with direct links to original content. **šŸ”§ Technical Capabilities:** - **Vector Search**: Semantic similarity matching for relevant information retrieval - **Cross-Encoder Reranking**: Advanced relevance scoring for precision - **GPT-4 Generation**: Natural, human-like response generation - **Multi-Source Integration**: Combines structured data and article content - **Clickable Sources**: Direct links to original LinkedIn articles and content **šŸ“Š Data Sources Analyzed:** - Professional experience and job history - Educational background and certifications - Skills, endorsements, and expertise areas - Published articles and thought leadership content (with clickable links) - Projects, achievements, and recommendations - Professional network activities and engagement **šŸ”’ Privacy & Security:** - Only uses uploaded LinkedIn profile data - No external data access or web browsing - Responses based solely on uploaded content - Secure processing with no data retention **⚔ Built with:** - Gradio for the interface - OpenAI GPT-4 for generation - ChromaDB for vector storage - Sentence Transformers for embeddings - Custom LinkedIn URL extraction """) # Event Handlers upload_btn.click( process_upload, inputs=[upload_file], outputs=[status_display, chat_status] ) msg.submit(chat_with_profile, inputs=[msg, chatbot], outputs=[chatbot, msg]) submit_btn.click(chat_with_profile, inputs=[msg, chatbot], outputs=[chatbot, msg]) clear_btn.click(lambda: [], outputs=chatbot) # Add this to your existing event handlers submit_btn.click( lambda: usage_tracker.get_usage_info(), outputs=usage_info, queue=False ) # Footer gr.HTML("""

šŸ¤– LinkedIn Profile AI Assistant | Powered by Advanced RAG Technology with Clickable Sources

Built with ā¤ļø using Gradio, OpenAI GPT-4, ChromaDB, and Custom LinkedIn URL extraction

""") # Launch the interface # Launch the interface if __name__ == "__main__": interface.launch()