Akshay Chame
commited on
Commit
Β·
035c4af
1
Parent(s):
af9b2d5
π Add LinkedIn Profile Enhancer Streamlit app with all agents and dependencies
Browse files- .env.example +14 -0
- requirements.txt +12 -1
- src/agents/__init__.py +1 -0
- src/agents/__pycache__/__init__.cpython-311.pyc +0 -0
- src/agents/__pycache__/analyzer_agent.cpython-311.pyc +0 -0
- src/agents/__pycache__/content_agent.cpython-311.pyc +0 -0
- src/agents/__pycache__/orchestrator.cpython-311.pyc +0 -0
- src/agents/__pycache__/scraper_agent.cpython-311.pyc +0 -0
- src/agents/analyzer_agent.py +265 -0
- src/agents/content_agent.py +347 -0
- src/agents/orchestrator.py +186 -0
- src/agents/scraper_agent.py +284 -0
- src/memory/__init__.py +1 -0
- src/memory/__pycache__/__init__.cpython-311.pyc +0 -0
- src/memory/__pycache__/memory_manager.cpython-311.pyc +0 -0
- src/memory/memory_manager.py +241 -0
- src/prompts/__init__.py +1 -0
- src/prompts/__pycache__/agent_prompts.cpython-311.pyc +0 -0
- src/prompts/agent_prompts.py +243 -0
- src/streamlit_app.py +716 -33
- src/utils/__init__.py +1 -0
- src/utils/job_matcher.py +353 -0
- src/utils/linkedin_parser.py +288 -0
.env.example
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Environment Variables for LinkedIn Profile Enhancer
|
2 |
+
|
3 |
+
# OpenAI API Configuration
|
4 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
5 |
+
|
6 |
+
# Apify API Configuration
|
7 |
+
APIFY_API_TOKEN=your_apify_api_token_here
|
8 |
+
|
9 |
+
# Optional: Custom model settings
|
10 |
+
OPENAI_MODEL=gpt-4o-mini
|
11 |
+
TEMPERATURE=0.7
|
12 |
+
|
13 |
+
# Optional: Debugging
|
14 |
+
DEBUG=False
|
requirements.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1 |
altair
|
|
|
2 |
pandas
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
altair
|
2 |
+
streamlit
|
3 |
pandas
|
4 |
+
requests
|
5 |
+
beautifulsoup4
|
6 |
+
selenium
|
7 |
+
numpy
|
8 |
+
python-dotenv
|
9 |
+
pydantic
|
10 |
+
openai
|
11 |
+
anthropic
|
12 |
+
apify-client
|
13 |
+
plotly
|
14 |
+
Pillow
|
src/agents/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Agents package initialization
|
src/agents/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (195 Bytes). View file
|
|
src/agents/__pycache__/analyzer_agent.cpython-311.pyc
ADDED
Binary file (13.8 kB). View file
|
|
src/agents/__pycache__/content_agent.cpython-311.pyc
ADDED
Binary file (18.4 kB). View file
|
|
src/agents/__pycache__/orchestrator.cpython-311.pyc
ADDED
Binary file (11.3 kB). View file
|
|
src/agents/__pycache__/scraper_agent.cpython-311.pyc
ADDED
Binary file (16 kB). View file
|
|
src/agents/analyzer_agent.py
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Profile Analysis Agent
|
2 |
+
import re
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
from collections import Counter
|
5 |
+
|
6 |
+
class AnalyzerAgent:
|
7 |
+
"""Agent responsible for analyzing LinkedIn profiles and providing insights"""
|
8 |
+
|
9 |
+
def __init__(self):
|
10 |
+
self.action_words = [
|
11 |
+
'led', 'managed', 'developed', 'created', 'implemented', 'designed',
|
12 |
+
'built', 'improved', 'increased', 'reduced', 'optimized', 'delivered',
|
13 |
+
'achieved', 'launched', 'established', 'coordinated', 'executed'
|
14 |
+
]
|
15 |
+
|
16 |
+
def analyze_profile(self, profile_data: Dict[str, Any], job_description: str = "") -> Dict[str, Any]:
|
17 |
+
"""
|
18 |
+
Analyze a LinkedIn profile and provide comprehensive insights
|
19 |
+
|
20 |
+
Args:
|
21 |
+
profile_data (Dict[str, Any]): Extracted profile data
|
22 |
+
job_description (str): Optional job description for matching analysis
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
Dict[str, Any]: Analysis results with scores and recommendations
|
26 |
+
"""
|
27 |
+
if not profile_data:
|
28 |
+
return self._empty_analysis()
|
29 |
+
|
30 |
+
try:
|
31 |
+
# Calculate completeness score
|
32 |
+
completeness_score = self._calculate_completeness(profile_data)
|
33 |
+
|
34 |
+
# Analyze keywords
|
35 |
+
keyword_analysis = self._analyze_keywords(profile_data, job_description)
|
36 |
+
|
37 |
+
# Assess content quality
|
38 |
+
content_quality = self._assess_content_quality(profile_data)
|
39 |
+
|
40 |
+
# Identify strengths and weaknesses
|
41 |
+
strengths = self._identify_strengths(profile_data)
|
42 |
+
weaknesses = self._identify_weaknesses(profile_data)
|
43 |
+
|
44 |
+
# Calculate job match if job description provided
|
45 |
+
job_match_score = 0
|
46 |
+
if job_description:
|
47 |
+
job_match_score = self._calculate_job_match(profile_data, job_description)
|
48 |
+
|
49 |
+
return {
|
50 |
+
'completeness_score': completeness_score,
|
51 |
+
'keyword_analysis': keyword_analysis,
|
52 |
+
'content_quality': content_quality,
|
53 |
+
'strengths': strengths,
|
54 |
+
'weaknesses': weaknesses,
|
55 |
+
'job_match_score': job_match_score,
|
56 |
+
'recommendations': self._generate_recommendations(profile_data, weaknesses),
|
57 |
+
'overall_rating': self._calculate_overall_rating(completeness_score, content_quality, job_match_score)
|
58 |
+
}
|
59 |
+
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Error in profile analysis: {str(e)}")
|
62 |
+
return self._empty_analysis()
|
63 |
+
|
64 |
+
def _calculate_completeness(self, profile_data: Dict[str, Any]) -> float:
|
65 |
+
"""Calculate profile completeness percentage"""
|
66 |
+
score = 0
|
67 |
+
total_points = 10
|
68 |
+
|
69 |
+
# Basic information (2 points)
|
70 |
+
if profile_data.get('name'): score += 1
|
71 |
+
if profile_data.get('headline'): score += 1
|
72 |
+
|
73 |
+
# About section (2 points)
|
74 |
+
about = profile_data.get('about', '')
|
75 |
+
if about and len(about) > 50: score += 1
|
76 |
+
if about and len(about) > 200: score += 1
|
77 |
+
|
78 |
+
# Experience (2 points)
|
79 |
+
experience = profile_data.get('experience', [])
|
80 |
+
if len(experience) >= 1: score += 1
|
81 |
+
if len(experience) >= 2: score += 1
|
82 |
+
|
83 |
+
# Education (1 point)
|
84 |
+
if profile_data.get('education'): score += 1
|
85 |
+
|
86 |
+
# Skills (2 points)
|
87 |
+
skills = profile_data.get('skills', [])
|
88 |
+
if len(skills) >= 5: score += 1
|
89 |
+
if len(skills) >= 10: score += 1
|
90 |
+
|
91 |
+
# Location (1 point)
|
92 |
+
if profile_data.get('location'): score += 1
|
93 |
+
|
94 |
+
return (score / total_points) * 100
|
95 |
+
|
96 |
+
def _analyze_keywords(self, profile_data: Dict[str, Any], job_description: str) -> Dict[str, Any]:
|
97 |
+
"""Analyze keywords in profile vs job description"""
|
98 |
+
profile_text = self._extract_all_text(profile_data).lower()
|
99 |
+
|
100 |
+
# Extract common tech keywords
|
101 |
+
tech_keywords = [
|
102 |
+
'python', 'javascript', 'react', 'node.js', 'sql', 'mongodb',
|
103 |
+
'aws', 'docker', 'kubernetes', 'git', 'agile', 'scrum'
|
104 |
+
]
|
105 |
+
|
106 |
+
found_keywords = []
|
107 |
+
for keyword in tech_keywords:
|
108 |
+
if keyword.lower() in profile_text:
|
109 |
+
found_keywords.append(keyword)
|
110 |
+
|
111 |
+
# Analyze job description keywords if provided
|
112 |
+
missing_keywords = []
|
113 |
+
if job_description:
|
114 |
+
job_keywords = re.findall(r'\b[a-zA-Z]{3,}\b', job_description.lower())
|
115 |
+
job_keyword_freq = Counter(job_keywords)
|
116 |
+
|
117 |
+
for keyword, freq in job_keyword_freq.most_common(10):
|
118 |
+
if keyword not in profile_text and len(keyword) > 3:
|
119 |
+
missing_keywords.append(keyword)
|
120 |
+
|
121 |
+
return {
|
122 |
+
'found_keywords': found_keywords,
|
123 |
+
'missing_keywords': missing_keywords[:5], # Top 5 missing
|
124 |
+
'keyword_density': len(found_keywords)
|
125 |
+
}
|
126 |
+
|
127 |
+
def _assess_content_quality(self, profile_data: Dict[str, Any]) -> Dict[str, Any]:
|
128 |
+
"""Assess the quality of content"""
|
129 |
+
about_section = profile_data.get('about', '')
|
130 |
+
headline = profile_data.get('headline', '')
|
131 |
+
|
132 |
+
return {
|
133 |
+
'headline_length': len(headline),
|
134 |
+
'about_length': len(about_section),
|
135 |
+
'has_quantified_achievements': self._has_numbers(about_section),
|
136 |
+
'uses_action_words': self._has_action_words(about_section)
|
137 |
+
}
|
138 |
+
|
139 |
+
def _identify_strengths(self, profile_data: Dict[str, Any]) -> List[str]:
|
140 |
+
"""Identify profile strengths"""
|
141 |
+
strengths = []
|
142 |
+
|
143 |
+
if len(profile_data.get('experience', [])) >= 3:
|
144 |
+
strengths.append("Good work experience history")
|
145 |
+
|
146 |
+
if len(profile_data.get('skills', [])) >= 10:
|
147 |
+
strengths.append("Comprehensive skills list")
|
148 |
+
|
149 |
+
if len(profile_data.get('about', '')) > 200:
|
150 |
+
strengths.append("Detailed about section")
|
151 |
+
|
152 |
+
return strengths
|
153 |
+
|
154 |
+
def _identify_weaknesses(self, profile_data: Dict[str, Any]) -> List[str]:
|
155 |
+
"""Identify areas for improvement"""
|
156 |
+
weaknesses = []
|
157 |
+
|
158 |
+
if not profile_data.get('about') or len(profile_data.get('about', '')) < 100:
|
159 |
+
weaknesses.append("About section needs improvement")
|
160 |
+
|
161 |
+
if len(profile_data.get('skills', [])) < 5:
|
162 |
+
weaknesses.append("Limited skills listed")
|
163 |
+
|
164 |
+
if not self._has_numbers(profile_data.get('about', '')):
|
165 |
+
weaknesses.append("Lacks quantified achievements")
|
166 |
+
|
167 |
+
return weaknesses
|
168 |
+
|
169 |
+
def _calculate_job_match(self, profile_data: Dict[str, Any], job_description: str) -> float:
|
170 |
+
"""Calculate how well profile matches job description"""
|
171 |
+
if not job_description:
|
172 |
+
return 0
|
173 |
+
|
174 |
+
profile_text = self._extract_all_text(profile_data).lower()
|
175 |
+
job_text = job_description.lower()
|
176 |
+
|
177 |
+
# Extract keywords from job description
|
178 |
+
job_keywords = set(re.findall(r'\b[a-zA-Z]{4,}\b', job_text))
|
179 |
+
|
180 |
+
# Count matches
|
181 |
+
matches = 0
|
182 |
+
for keyword in job_keywords:
|
183 |
+
if keyword in profile_text:
|
184 |
+
matches += 1
|
185 |
+
|
186 |
+
return min((matches / len(job_keywords)) * 100, 100) if job_keywords else 0
|
187 |
+
|
188 |
+
def _extract_all_text(self, profile_data: Dict[str, Any]) -> str:
|
189 |
+
"""Extract all text from profile for analysis"""
|
190 |
+
text_parts = []
|
191 |
+
|
192 |
+
# Add basic info
|
193 |
+
text_parts.append(profile_data.get('headline', ''))
|
194 |
+
text_parts.append(profile_data.get('about', ''))
|
195 |
+
|
196 |
+
# Add experience descriptions
|
197 |
+
for exp in profile_data.get('experience', []):
|
198 |
+
text_parts.append(exp.get('description', ''))
|
199 |
+
text_parts.append(exp.get('title', ''))
|
200 |
+
|
201 |
+
# Add skills
|
202 |
+
text_parts.extend(profile_data.get('skills', []))
|
203 |
+
|
204 |
+
return ' '.join(text_parts)
|
205 |
+
|
206 |
+
def _has_numbers(self, text: str) -> bool:
|
207 |
+
"""Check if text contains numbers/metrics"""
|
208 |
+
return bool(re.search(r'\d+', text))
|
209 |
+
|
210 |
+
def _has_action_words(self, text: str) -> bool:
|
211 |
+
"""Check if text contains action words"""
|
212 |
+
text_lower = text.lower()
|
213 |
+
return any(word in text_lower for word in self.action_words)
|
214 |
+
|
215 |
+
def _generate_recommendations(self, profile_data: Dict[str, Any], weaknesses: List[str]) -> List[str]:
|
216 |
+
"""Generate specific recommendations based on analysis"""
|
217 |
+
recommendations = []
|
218 |
+
|
219 |
+
for weakness in weaknesses:
|
220 |
+
if "about section" in weakness.lower():
|
221 |
+
recommendations.append("Add a compelling about section with 150-300 words describing your expertise")
|
222 |
+
elif "skills" in weakness.lower():
|
223 |
+
recommendations.append("Add more relevant skills to reach at least 10 skills")
|
224 |
+
elif "quantified" in weakness.lower():
|
225 |
+
recommendations.append("Include specific numbers and metrics in your descriptions")
|
226 |
+
|
227 |
+
return recommendations
|
228 |
+
|
229 |
+
def _calculate_overall_rating(self, completeness: float, content_quality: Dict[str, Any], job_match: float) -> str:
|
230 |
+
"""Calculate overall profile rating"""
|
231 |
+
score = completeness * 0.4
|
232 |
+
|
233 |
+
# Add content quality score
|
234 |
+
if content_quality.get('has_quantified_achievements'):
|
235 |
+
score += 10
|
236 |
+
if content_quality.get('uses_action_words'):
|
237 |
+
score += 10
|
238 |
+
if content_quality.get('about_length', 0) > 150:
|
239 |
+
score += 10
|
240 |
+
|
241 |
+
# Add job match if available
|
242 |
+
if job_match > 0:
|
243 |
+
score += job_match * 0.3
|
244 |
+
|
245 |
+
if score >= 80:
|
246 |
+
return "Excellent"
|
247 |
+
elif score >= 60:
|
248 |
+
return "Good"
|
249 |
+
elif score >= 40:
|
250 |
+
return "Fair"
|
251 |
+
else:
|
252 |
+
return "Needs Improvement"
|
253 |
+
|
254 |
+
def _empty_analysis(self) -> Dict[str, Any]:
|
255 |
+
"""Return empty analysis structure"""
|
256 |
+
return {
|
257 |
+
'completeness_score': 0,
|
258 |
+
'keyword_analysis': {'found_keywords': [], 'missing_keywords': [], 'keyword_density': 0},
|
259 |
+
'content_quality': {'headline_length': 0, 'about_length': 0, 'has_quantified_achievements': False, 'uses_action_words': False},
|
260 |
+
'strengths': [],
|
261 |
+
'weaknesses': ['Profile data not available'],
|
262 |
+
'job_match_score': 0,
|
263 |
+
'recommendations': ['Please provide valid profile data'],
|
264 |
+
'overall_rating': 'Unknown'
|
265 |
+
}
|
src/agents/content_agent.py
ADDED
@@ -0,0 +1,347 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Content Generation Agent
|
2 |
+
import os
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
from prompts.agent_prompts import ContentPrompts
|
5 |
+
from openai import OpenAI
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
class ContentAgent:
|
12 |
+
"""Agent responsible for generating content suggestions and improvements using OpenAI"""
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
self.prompts = ContentPrompts()
|
16 |
+
|
17 |
+
# Initialize OpenAI client
|
18 |
+
api_key = os.getenv('OPENAI_API_KEY')
|
19 |
+
if not api_key:
|
20 |
+
print("Warning: OPENAI_API_KEY not found. Using fallback content generation.")
|
21 |
+
self.openai_client = None
|
22 |
+
else:
|
23 |
+
self.openai_client = OpenAI(api_key=api_key)
|
24 |
+
|
25 |
+
def generate_suggestions(self, analysis: Dict[str, Any], job_description: str = "") -> Dict[str, Any]:
|
26 |
+
"""
|
27 |
+
Generate enhancement suggestions based on analysis
|
28 |
+
|
29 |
+
Args:
|
30 |
+
analysis (Dict[str, Any]): Profile analysis results
|
31 |
+
job_description (str): Optional job description for tailored suggestions
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
Dict[str, Any]: Enhancement suggestions
|
35 |
+
"""
|
36 |
+
try:
|
37 |
+
suggestions = {
|
38 |
+
'headline_improvements': self._suggest_headline_improvements(analysis, job_description),
|
39 |
+
'about_section': self._suggest_about_improvements(analysis, job_description),
|
40 |
+
'experience_optimization': self._suggest_experience_improvements(analysis),
|
41 |
+
'skills_enhancement': self._suggest_skills_improvements(analysis, job_description),
|
42 |
+
'keyword_optimization': self._suggest_keyword_improvements(analysis),
|
43 |
+
'content_quality': self._suggest_content_quality_improvements(analysis)
|
44 |
+
}
|
45 |
+
|
46 |
+
# Add AI-generated content if OpenAI is available
|
47 |
+
if self.openai_client:
|
48 |
+
suggestions['ai_generated_content'] = self._generate_ai_content(analysis, job_description)
|
49 |
+
|
50 |
+
return suggestions
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
raise Exception(f"Failed to generate suggestions: {str(e)}")
|
54 |
+
|
55 |
+
def _generate_ai_content(self, analysis: Dict[str, Any], job_description: str) -> Dict[str, Any]:
|
56 |
+
"""Generate AI-powered content using OpenAI"""
|
57 |
+
ai_content = {}
|
58 |
+
|
59 |
+
try:
|
60 |
+
# Generate AI headline suggestions
|
61 |
+
ai_content['ai_headlines'] = self._generate_ai_headlines(analysis, job_description)
|
62 |
+
|
63 |
+
# Generate AI about section
|
64 |
+
ai_content['ai_about_section'] = self._generate_ai_about_section(analysis, job_description)
|
65 |
+
|
66 |
+
# Generate AI experience descriptions
|
67 |
+
ai_content['ai_experience_descriptions'] = self._generate_ai_experience_descriptions(analysis)
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
print(f"Error generating AI content: {str(e)}")
|
71 |
+
ai_content['error'] = "AI content generation temporarily unavailable"
|
72 |
+
|
73 |
+
return ai_content
|
74 |
+
|
75 |
+
def _generate_ai_headlines(self, analysis: Dict[str, Any], job_description: str) -> List[str]:
|
76 |
+
"""Generate AI-powered headline suggestions"""
|
77 |
+
if not self.openai_client:
|
78 |
+
return []
|
79 |
+
|
80 |
+
prompt = f"""
|
81 |
+
Generate 5 compelling LinkedIn headlines for this professional profile:
|
82 |
+
|
83 |
+
Current analysis: {analysis.get('summary', 'No analysis available')}
|
84 |
+
Target job (if any): {job_description[:200] if job_description else 'General optimization'}
|
85 |
+
|
86 |
+
Requirements:
|
87 |
+
- Maximum 120 characters each
|
88 |
+
- Include relevant keywords
|
89 |
+
- Professional and engaging tone - Show value proposition
|
90 |
+
- Vary the style (some formal, some creative)
|
91 |
+
|
92 |
+
Return only the headlines, numbered 1-5:
|
93 |
+
"""
|
94 |
+
|
95 |
+
try:
|
96 |
+
response = self.openai_client.chat.completions.create(
|
97 |
+
model="gpt-4o-mini",
|
98 |
+
messages=[{"role": "user", "content": prompt}],
|
99 |
+
max_tokens=300,
|
100 |
+
temperature=0.7
|
101 |
+
)
|
102 |
+
|
103 |
+
headlines = response.choices[0].message.content.strip().split('\n')
|
104 |
+
return [h.strip() for h in headlines if h.strip()][:5]
|
105 |
+
|
106 |
+
except Exception as e:
|
107 |
+
print(f"Error generating AI headlines: {str(e)}")
|
108 |
+
return []
|
109 |
+
|
110 |
+
def _generate_ai_about_section(self, analysis: Dict[str, Any], job_description: str) -> str:
|
111 |
+
"""Generate AI-powered about section"""
|
112 |
+
if not self.openai_client:
|
113 |
+
return ""
|
114 |
+
|
115 |
+
prompt = f"""
|
116 |
+
Write a compelling LinkedIn About section for this professional:
|
117 |
+
|
118 |
+
Profile Analysis: {analysis.get('summary', 'No analysis available')}
|
119 |
+
Strengths: {', '.join(analysis.get('strengths', []))}
|
120 |
+
Target Role: {job_description[:300] if job_description else 'Career advancement'}
|
121 |
+
|
122 |
+
Requirements:
|
123 |
+
- 150-300 words
|
124 |
+
- Professional yet personable tone
|
125 |
+
- Include quantified achievements
|
126 |
+
- Strong opening hook
|
127 |
+
- Clear value proposition
|
128 |
+
- Call to action at the end
|
129 |
+
- Use bullet points for key skills/achievements
|
130 |
+
Write the complete About section:
|
131 |
+
"""
|
132 |
+
|
133 |
+
try:
|
134 |
+
response = self.openai_client.chat.completions.create(
|
135 |
+
model="gpt-4o-mini",
|
136 |
+
messages=[{"role": "user", "content": prompt}],
|
137 |
+
max_tokens=500,
|
138 |
+
temperature=0.7
|
139 |
+
)
|
140 |
+
|
141 |
+
return response.choices[0].message.content.strip()
|
142 |
+
|
143 |
+
except Exception as e:
|
144 |
+
print(f"Error generating AI about section: {str(e)}")
|
145 |
+
return ""
|
146 |
+
|
147 |
+
def _generate_ai_experience_descriptions(self, analysis: Dict[str, Any]) -> List[str]:
|
148 |
+
"""Generate AI-powered experience descriptions"""
|
149 |
+
if not self.openai_client:
|
150 |
+
return []
|
151 |
+
|
152 |
+
# This would ideally take specific experience entries
|
153 |
+
# For now, return general improvement suggestions
|
154 |
+
|
155 |
+
prompt = """
|
156 |
+
Generate 3 example bullet points for LinkedIn experience descriptions that:
|
157 |
+
- Start with strong action verbs
|
158 |
+
- Include quantified achievements
|
159 |
+
- Show business impact - Are relevant for tech professionals
|
160 |
+
|
161 |
+
Format: Return only the bullet points, one per line with β’ prefix
|
162 |
+
"""
|
163 |
+
|
164 |
+
try:
|
165 |
+
response = self.openai_client.chat.completions.create(
|
166 |
+
model="gpt-4o-mini",
|
167 |
+
messages=[{"role": "user", "content": prompt}],
|
168 |
+
max_tokens=200,
|
169 |
+
temperature=0.7
|
170 |
+
)
|
171 |
+
|
172 |
+
descriptions = response.choices[0].message.content.strip().split('\n')
|
173 |
+
return [d.strip() for d in descriptions if d.strip()]
|
174 |
+
|
175 |
+
except Exception as e:
|
176 |
+
print(f"Error generating AI experience descriptions: {str(e)}")
|
177 |
+
return []
|
178 |
+
|
179 |
+
def _suggest_headline_improvements(self, analysis: Dict[str, Any], job_description: str = "") -> List[str]:
|
180 |
+
"""Generate headline improvement suggestions"""
|
181 |
+
suggestions = []
|
182 |
+
|
183 |
+
content_quality = analysis.get('content_quality', {})
|
184 |
+
headline_length = content_quality.get('headline_length', 0)
|
185 |
+
|
186 |
+
if headline_length < 50:
|
187 |
+
suggestions.append("Expand your headline to include more keywords and value proposition")
|
188 |
+
elif headline_length > 120:
|
189 |
+
suggestions.append("Shorten your headline to be more concise and impactful")
|
190 |
+
|
191 |
+
suggestions.extend([
|
192 |
+
"Include specific technologies or skills you specialize in",
|
193 |
+
"Mention your years of experience or seniority level",
|
194 |
+
"Add a unique value proposition that sets you apart",
|
195 |
+
"Use action-oriented language to show what you do"
|
196 |
+
])
|
197 |
+
|
198 |
+
return suggestions
|
199 |
+
|
200 |
+
def _suggest_about_improvements(self, analysis: Dict[str, Any], job_description: str = "") -> List[str]:
|
201 |
+
"""Generate about section improvement suggestions"""
|
202 |
+
suggestions = []
|
203 |
+
|
204 |
+
content_quality = analysis.get('content_quality', {})
|
205 |
+
about_length = content_quality.get('about_length', 0)
|
206 |
+
has_numbers = content_quality.get('has_quantified_achievements', False)
|
207 |
+
has_action_words = content_quality.get('uses_action_words', False)
|
208 |
+
|
209 |
+
if about_length < 100:
|
210 |
+
suggestions.append("Expand your about section to at least 2-3 paragraphs")
|
211 |
+
|
212 |
+
if not has_numbers:
|
213 |
+
suggestions.append("Add quantified achievements (e.g., 'Increased sales by 30%')")
|
214 |
+
|
215 |
+
if not has_action_words:
|
216 |
+
suggestions.append("Use more action verbs to describe your accomplishments")
|
217 |
+
|
218 |
+
suggestions.extend([
|
219 |
+
"Start with a compelling hook that grabs attention",
|
220 |
+
"Include your professional mission or passion",
|
221 |
+
"Mention specific technologies, tools, or methodologies you use",
|
222 |
+
"End with a call-to-action for potential connections"
|
223 |
+
])
|
224 |
+
|
225 |
+
return suggestions
|
226 |
+
|
227 |
+
def _suggest_experience_improvements(self, analysis: Dict[str, Any]) -> List[str]:
|
228 |
+
"""Generate experience section improvement suggestions"""
|
229 |
+
suggestions = [
|
230 |
+
"Use bullet points to highlight key achievements in each role",
|
231 |
+
"Start each bullet point with an action verb",
|
232 |
+
"Include metrics and numbers to quantify your impact",
|
233 |
+
"Focus on results rather than just responsibilities",
|
234 |
+
"Tailor descriptions to align with your target role"
|
235 |
+
]
|
236 |
+
|
237 |
+
return suggestions
|
238 |
+
|
239 |
+
def _suggest_skills_improvements(self, analysis: Dict[str, Any], job_description: str) -> List[str]:
|
240 |
+
"""Generate skills section improvement suggestions"""
|
241 |
+
suggestions = []
|
242 |
+
|
243 |
+
keyword_analysis = analysis.get('keyword_analysis', {})
|
244 |
+
missing_keywords = keyword_analysis.get('missing_keywords', [])
|
245 |
+
|
246 |
+
if missing_keywords and job_description:
|
247 |
+
suggestions.append(f"Consider adding these relevant skills: {', '.join(missing_keywords[:5])}")
|
248 |
+
|
249 |
+
suggestions.extend([
|
250 |
+
"Prioritize your most relevant skills at the top",
|
251 |
+
"Include both technical and soft skills",
|
252 |
+
"Get endorsements from colleagues for your key skills",
|
253 |
+
"Add skills that are trending in your industry"
|
254 |
+
])
|
255 |
+
|
256 |
+
return suggestions
|
257 |
+
|
258 |
+
def _suggest_keyword_improvements(self, analysis: Dict[str, Any]) -> List[str]:
|
259 |
+
"""Generate keyword optimization suggestions"""
|
260 |
+
suggestions = []
|
261 |
+
|
262 |
+
keyword_analysis = analysis.get('keyword_analysis', {})
|
263 |
+
keyword_density = keyword_analysis.get('keyword_density', 0)
|
264 |
+
missing_keywords = keyword_analysis.get('missing_keywords', [])
|
265 |
+
|
266 |
+
if keyword_density < 50:
|
267 |
+
suggestions.append("Increase keyword density by incorporating more relevant terms")
|
268 |
+
|
269 |
+
if missing_keywords:
|
270 |
+
suggestions.append(f"Consider adding these keywords: {', '.join(missing_keywords[:3])}")
|
271 |
+
|
272 |
+
suggestions.extend([
|
273 |
+
"Use industry-specific terminology naturally throughout your profile",
|
274 |
+
"Include location-based keywords if relevant",
|
275 |
+
"Add keywords related to your target roles"
|
276 |
+
])
|
277 |
+
|
278 |
+
return suggestions
|
279 |
+
|
280 |
+
def _suggest_content_quality_improvements(self, analysis: Dict[str, Any]) -> List[str]:
|
281 |
+
"""Generate general content quality improvement suggestions"""
|
282 |
+
completeness_score = analysis.get('completeness_score', 0)
|
283 |
+
|
284 |
+
suggestions = []
|
285 |
+
|
286 |
+
if completeness_score < 80:
|
287 |
+
suggestions.append("Complete all sections of your profile for better visibility")
|
288 |
+
|
289 |
+
suggestions.extend([
|
290 |
+
"Use a professional headshot as your profile photo",
|
291 |
+
"Add a background image that reflects your industry",
|
292 |
+
"Keep your profile updated with recent achievements",
|
293 |
+
"Engage regularly by posting and commenting on relevant content",
|
294 |
+
"Ask for recommendations from colleagues and clients"
|
295 |
+
])
|
296 |
+
|
297 |
+
return suggestions
|
298 |
+
|
299 |
+
def generate_headline_examples(self, current_headline: str, job_description: str = "") -> List[str]:
|
300 |
+
"""Generate example headlines"""
|
301 |
+
examples = [
|
302 |
+
"Senior Software Engineer | Full-Stack Developer | React & Node.js Expert",
|
303 |
+
"Data Scientist | Machine Learning Engineer | Python & AI Specialist",
|
304 |
+
"Digital Marketing Manager | SEO Expert | Growth Hacker",
|
305 |
+
"Product Manager | Agile Expert | B2B SaaS Specialist"
|
306 |
+
]
|
307 |
+
|
308 |
+
return examples
|
309 |
+
|
310 |
+
def generate_about_template(self, analysis: Dict[str, Any]) -> str:
|
311 |
+
"""Generate an about section template"""
|
312 |
+
template = """
|
313 |
+
π [Opening Hook - What makes you unique]
|
314 |
+
|
315 |
+
πΌ [Years] years of experience in [Industry/Field], specializing in [Key Skills/Technologies]. I'm passionate about [What drives you professionally].
|
316 |
+
|
317 |
+
π― **What I do:**
|
318 |
+
β’ [Key responsibility/achievement 1]
|
319 |
+
β’ [Key responsibility/achievement 2]
|
320 |
+
β’ [Key responsibility/achievement 3]
|
321 |
+
|
322 |
+
π **Recent achievements:**
|
323 |
+
β’ [Quantified achievement 1]
|
324 |
+
β’ [Quantified achievement 2]
|
325 |
+
β’ [Quantified achievement 3]
|
326 |
+
|
327 |
+
π οΈ **Technical expertise:** [List 5-8 key skills/technologies]
|
328 |
+
|
329 |
+
π€ **Let's connect** if you're interested in [collaboration opportunity/your goals] """
|
330 |
+
|
331 |
+
return template.strip()
|
332 |
+
|
333 |
+
def test_openai_connection(self) -> bool:
|
334 |
+
"""Test if OpenAI connection is working"""
|
335 |
+
if not self.openai_client:
|
336 |
+
return False
|
337 |
+
|
338 |
+
try:
|
339 |
+
response = self.openai_client.chat.completions.create(
|
340 |
+
model="gpt-4o-mini",
|
341 |
+
messages=[{"role": "user", "content": "Test connection"}],
|
342 |
+
max_tokens=10
|
343 |
+
)
|
344 |
+
return True
|
345 |
+
except Exception as e:
|
346 |
+
print(f"OpenAI connection test failed: {str(e)}")
|
347 |
+
return False
|
src/agents/orchestrator.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Main Agent Coordinator
|
2 |
+
import time
|
3 |
+
from .scraper_agent import ScraperAgent
|
4 |
+
from .analyzer_agent import AnalyzerAgent
|
5 |
+
from .content_agent import ContentAgent
|
6 |
+
from memory.memory_manager import MemoryManager
|
7 |
+
|
8 |
+
class ProfileOrchestrator:
|
9 |
+
"""Main coordinator for all LinkedIn profile enhancement agents"""
|
10 |
+
|
11 |
+
def __init__(self):
|
12 |
+
self.scraper = ScraperAgent()
|
13 |
+
self.analyzer = AnalyzerAgent()
|
14 |
+
self.content_generator = ContentAgent()
|
15 |
+
self.memory = MemoryManager()
|
16 |
+
|
17 |
+
def enhance_profile(self, linkedin_url, job_description="", force_refresh=True):
|
18 |
+
"""
|
19 |
+
Main workflow for enhancing a LinkedIn profile
|
20 |
+
|
21 |
+
Args:
|
22 |
+
linkedin_url (str): LinkedIn profile URL
|
23 |
+
job_description (str): Optional job description for tailored suggestions
|
24 |
+
force_refresh (bool): Force fresh scraping instead of using cache
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
str: Enhancement suggestions and analysis
|
28 |
+
"""
|
29 |
+
try:
|
30 |
+
print(f"π― Starting profile enhancement for: {linkedin_url}")
|
31 |
+
|
32 |
+
# Always clear cache for fresh data extraction
|
33 |
+
if force_refresh:
|
34 |
+
print("ποΈ Clearing all cached data...")
|
35 |
+
self.memory.force_refresh_session(linkedin_url)
|
36 |
+
# Clear any session data for this URL
|
37 |
+
self.memory.clear_session_cache(linkedin_url)
|
38 |
+
# Also clear any general cache
|
39 |
+
self.memory.clear_session_cache() # Clear all sessions
|
40 |
+
|
41 |
+
# Step 1: Scrape LinkedIn profile data
|
42 |
+
print("π‘ Step 1: Scraping profile data...")
|
43 |
+
print(f"π Target URL: {linkedin_url}")
|
44 |
+
profile_data = self.scraper.extract_profile_data(linkedin_url)
|
45 |
+
|
46 |
+
# Verify we got data for the correct URL
|
47 |
+
if profile_data.get('url') != linkedin_url:
|
48 |
+
print(f"β οΈ URL mismatch detected!")
|
49 |
+
print(f" Expected: {linkedin_url}")
|
50 |
+
print(f" Got: {profile_data.get('url', 'Unknown')}")
|
51 |
+
|
52 |
+
# Step 2: Analyze the profile
|
53 |
+
print("π Step 2: Analyzing profile...")
|
54 |
+
analysis = self.analyzer.analyze_profile(profile_data, job_description)
|
55 |
+
|
56 |
+
# Step 3: Generate enhancement suggestions
|
57 |
+
print("π‘ Step 3: Generating suggestions...")
|
58 |
+
suggestions = self.content_generator.generate_suggestions(analysis, job_description)
|
59 |
+
|
60 |
+
# Step 4: Store in memory for future reference
|
61 |
+
session_data = {
|
62 |
+
'profile_data': profile_data,
|
63 |
+
'analysis': analysis,
|
64 |
+
'suggestions': suggestions,
|
65 |
+
'job_description': job_description,
|
66 |
+
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
|
67 |
+
}
|
68 |
+
self.memory.store_session(linkedin_url, session_data)
|
69 |
+
|
70 |
+
print("β
Profile enhancement completed!")
|
71 |
+
return self._format_output(analysis, suggestions)
|
72 |
+
|
73 |
+
except Exception as e:
|
74 |
+
return f"Error in orchestration: {str(e)}"
|
75 |
+
|
76 |
+
def _format_output(self, analysis, suggestions):
|
77 |
+
"""Format the final output for display"""
|
78 |
+
output = []
|
79 |
+
|
80 |
+
# Profile Analysis Section
|
81 |
+
output.append("## π Profile Analysis")
|
82 |
+
output.append("")
|
83 |
+
output.append(f"**π Completeness Score:** {analysis.get('completeness_score', 0):.1f}%")
|
84 |
+
output.append(f"**β Overall Rating:** {analysis.get('overall_rating', 'Unknown')}")
|
85 |
+
output.append(f"**π― Job Match Score:** {analysis.get('job_match_score', 0):.1f}%")
|
86 |
+
output.append("")
|
87 |
+
|
88 |
+
# Strengths
|
89 |
+
strengths = analysis.get('strengths', [])
|
90 |
+
if strengths:
|
91 |
+
output.append("### π Profile Strengths")
|
92 |
+
for strength in strengths:
|
93 |
+
output.append(f"β
{strength}")
|
94 |
+
output.append("")
|
95 |
+
|
96 |
+
# Areas for Improvement
|
97 |
+
weaknesses = analysis.get('weaknesses', [])
|
98 |
+
if weaknesses:
|
99 |
+
output.append("### π§ Areas for Improvement")
|
100 |
+
for weakness in weaknesses:
|
101 |
+
output.append(f"πΈ {weakness}")
|
102 |
+
output.append("")
|
103 |
+
|
104 |
+
# Keyword Analysis
|
105 |
+
keyword_analysis = analysis.get('keyword_analysis', {})
|
106 |
+
if keyword_analysis:
|
107 |
+
found_keywords = keyword_analysis.get('found_keywords', [])
|
108 |
+
missing_keywords = keyword_analysis.get('missing_keywords', [])
|
109 |
+
|
110 |
+
output.append("### οΏ½ Keyword Analysis")
|
111 |
+
output.append(f"**Keywords Found ({len(found_keywords)}):** {', '.join(found_keywords[:10])}")
|
112 |
+
if missing_keywords:
|
113 |
+
output.append(f"**Missing Keywords:** {', '.join(missing_keywords[:5])}")
|
114 |
+
output.append("")
|
115 |
+
|
116 |
+
# Enhancement Suggestions Section
|
117 |
+
output.append("## π― Enhancement Suggestions")
|
118 |
+
output.append("")
|
119 |
+
|
120 |
+
for category, items in suggestions.items():
|
121 |
+
if category == 'ai_generated_content':
|
122 |
+
# Special formatting for AI content
|
123 |
+
output.append("### π€ AI-Generated Content Suggestions")
|
124 |
+
ai_content = items if isinstance(items, dict) else {}
|
125 |
+
|
126 |
+
if 'ai_headlines' in ai_content and ai_content['ai_headlines']:
|
127 |
+
output.append("")
|
128 |
+
output.append("#### β¨ Professional Headlines")
|
129 |
+
for i, headline in enumerate(ai_content['ai_headlines'], 1):
|
130 |
+
# Clean up the headline format
|
131 |
+
cleaned_headline = headline.strip('"').replace('\\"', '"')
|
132 |
+
if cleaned_headline.startswith(('1.', '2.', '3.', '4.', '5.')):
|
133 |
+
cleaned_headline = cleaned_headline[2:].strip()
|
134 |
+
output.append(f"{i}. {cleaned_headline}")
|
135 |
+
output.append("")
|
136 |
+
|
137 |
+
if 'ai_about_section' in ai_content and ai_content['ai_about_section']:
|
138 |
+
output.append("#### π Enhanced About Section")
|
139 |
+
output.append("```")
|
140 |
+
about_content = ai_content['ai_about_section']
|
141 |
+
# Clean up the about section
|
142 |
+
about_lines = about_content.split('\n')
|
143 |
+
for line in about_lines:
|
144 |
+
if line.strip():
|
145 |
+
output.append(line.strip())
|
146 |
+
output.append("```")
|
147 |
+
output.append("")
|
148 |
+
|
149 |
+
if 'ai_experience_descriptions' in ai_content and ai_content['ai_experience_descriptions']:
|
150 |
+
output.append("#### πΌ Experience Description Ideas")
|
151 |
+
for desc in ai_content['ai_experience_descriptions']:
|
152 |
+
output.append(f"β’ {desc}")
|
153 |
+
output.append("")
|
154 |
+
else:
|
155 |
+
# Standard formatting for other categories
|
156 |
+
category_name = category.replace('_', ' ').title()
|
157 |
+
output.append(f"### {category_name}")
|
158 |
+
if isinstance(items, list):
|
159 |
+
for item in items:
|
160 |
+
output.append(f"β’ {item}")
|
161 |
+
else:
|
162 |
+
output.append(f"β’ {items}")
|
163 |
+
output.append("")
|
164 |
+
|
165 |
+
# Next Steps Section
|
166 |
+
output.append("## π Implementation Roadmap")
|
167 |
+
output.append("")
|
168 |
+
recommendations = analysis.get('recommendations', [])
|
169 |
+
if recommendations:
|
170 |
+
output.append("### π― Priority Actions")
|
171 |
+
for i, rec in enumerate(recommendations[:5], 1):
|
172 |
+
output.append(f"{i}. {rec}")
|
173 |
+
output.append("")
|
174 |
+
|
175 |
+
output.append("### π General Best Practices")
|
176 |
+
output.append("πΈ Update your profile regularly with new achievements")
|
177 |
+
output.append("πΈ Use professional keywords relevant to your industry")
|
178 |
+
output.append("πΈ Engage with your network by sharing valuable content")
|
179 |
+
output.append("πΈ Ask for recommendations from colleagues and clients")
|
180 |
+
output.append("πΈ Monitor profile views and connection requests")
|
181 |
+
output.append("")
|
182 |
+
|
183 |
+
output.append("---")
|
184 |
+
output.append("*Analysis powered by AI β’ Data scraped with respect to LinkedIn's ToS*")
|
185 |
+
|
186 |
+
return "\n".join(output)
|
src/agents/scraper_agent.py
ADDED
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import json
|
4 |
+
import requests
|
5 |
+
from typing import Dict, Any
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
class ScraperAgent:
|
12 |
+
"""Agent responsible for extracting data from LinkedIn profiles using Apify REST API"""
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
self.apify_token = os.getenv('APIFY_API_TOKEN')
|
16 |
+
if not self.apify_token:
|
17 |
+
raise ValueError("APIFY_API_TOKEN not found in environment variables")
|
18 |
+
|
19 |
+
# Validate token format
|
20 |
+
if not self.apify_token.startswith('apify_api_'):
|
21 |
+
print(f"β οΈ Warning: Token doesn't start with 'apify_api_'. Current token starts with: {self.apify_token[:10]}...")
|
22 |
+
|
23 |
+
# Use the new actor API endpoint
|
24 |
+
self.api_url = f"https://api.apify.com/v2/acts/dev_fusion~linkedin-profile-scraper/run-sync-get-dataset-items?token={self.apify_token}"
|
25 |
+
|
26 |
+
print(f"π Using Apify token: {self.apify_token[:15]}...") # Show first 15 chars for debugging
|
27 |
+
|
28 |
+
def extract_profile_data(self, linkedin_url: str) -> Dict[str, Any]:
|
29 |
+
"""
|
30 |
+
Extract profile data from LinkedIn URL using Apify REST API
|
31 |
+
|
32 |
+
Args:
|
33 |
+
linkedin_url (str): LinkedIn profile URL
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
Dict[str, Any]: Extracted profile data
|
37 |
+
"""
|
38 |
+
try:
|
39 |
+
print(f"π Starting scraping for: {linkedin_url}")
|
40 |
+
print(f"π URL being processed: {linkedin_url}")
|
41 |
+
print(f"β° Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
42 |
+
|
43 |
+
# Clean and validate URL
|
44 |
+
original_url = linkedin_url
|
45 |
+
linkedin_url = linkedin_url.strip()
|
46 |
+
if not linkedin_url.startswith('http'):
|
47 |
+
linkedin_url = 'https://' + linkedin_url
|
48 |
+
|
49 |
+
print(f"π§Ή Cleaned URL: {linkedin_url}")
|
50 |
+
|
51 |
+
# Verify URL consistency
|
52 |
+
if original_url != linkedin_url:
|
53 |
+
print(f"π URL normalized: {original_url} β {linkedin_url}")
|
54 |
+
|
55 |
+
# Configure the run input with fresh URL
|
56 |
+
run_input = {
|
57 |
+
"profileUrls": [linkedin_url], # This actor expects profileUrls, not startUrls
|
58 |
+
"slowDown": True, # To avoid being blocked
|
59 |
+
"includeSkills": True,
|
60 |
+
"includeExperience": True,
|
61 |
+
"includeEducation": True,
|
62 |
+
"includeRecommendations": False, # Optional, can be slow
|
63 |
+
"saveHtml": False,
|
64 |
+
"saveMarkdown": False
|
65 |
+
}
|
66 |
+
|
67 |
+
print(f"π Apify input: {json.dumps(run_input, indent=2)}")
|
68 |
+
|
69 |
+
# Make the API request
|
70 |
+
print("π Running Apify scraper via REST API...")
|
71 |
+
response = requests.post(
|
72 |
+
self.api_url,
|
73 |
+
json=run_input,
|
74 |
+
headers={'Content-Type': 'application/json'},
|
75 |
+
timeout=180 # 3 minutes timeout
|
76 |
+
)
|
77 |
+
|
78 |
+
if response.status_code in [200, 201]: # 201 is also success for Apify
|
79 |
+
results = response.json()
|
80 |
+
print(f"β
API Response received: {len(results)} items")
|
81 |
+
|
82 |
+
if results and len(results) > 0:
|
83 |
+
# Process the first result (since we're scraping one profile)
|
84 |
+
raw_data = results[0]
|
85 |
+
processed_data = self._process_apify_data(raw_data, linkedin_url)
|
86 |
+
print("β
Successfully extracted and processed profile data")
|
87 |
+
return processed_data
|
88 |
+
else:
|
89 |
+
error_msg = "No data returned from Apify API. The profile may be private or the scraper encountered an issue."
|
90 |
+
print(f"β {error_msg}")
|
91 |
+
raise ValueError(error_msg)
|
92 |
+
else:
|
93 |
+
error_details = ""
|
94 |
+
try:
|
95 |
+
error_response = response.json()
|
96 |
+
error_details = f" - {error_response.get('error', {}).get('message', response.text)}"
|
97 |
+
except:
|
98 |
+
error_details = f" - {response.text}"
|
99 |
+
|
100 |
+
if response.status_code == 401:
|
101 |
+
error_msg = f"Authentication failed (401): Invalid or expired API token{error_details}"
|
102 |
+
print(f"β {error_msg}")
|
103 |
+
print(f"π Token being used: {self.apify_token[:15]}...")
|
104 |
+
print(f"π‘ Please check your APIFY_API_TOKEN in your .env file")
|
105 |
+
elif response.status_code == 404:
|
106 |
+
error_msg = f"Actor not found (404): The actor 'dev_fusion~linkedin-profile-scraper' may not exist{error_details}"
|
107 |
+
print(f"β {error_msg}")
|
108 |
+
elif response.status_code == 429:
|
109 |
+
error_msg = f"Rate limit exceeded (429): Too many requests{error_details}"
|
110 |
+
print(f"β {error_msg}")
|
111 |
+
else:
|
112 |
+
error_msg = f"API request failed with status {response.status_code}{error_details}"
|
113 |
+
print(f"β {error_msg}")
|
114 |
+
|
115 |
+
raise requests.RequestException(error_msg)
|
116 |
+
|
117 |
+
except requests.Timeout:
|
118 |
+
error_msg = "Request timed out. The scraping operation took too long to complete."
|
119 |
+
print(f"β° {error_msg}")
|
120 |
+
raise requests.Timeout(error_msg)
|
121 |
+
except Exception as e:
|
122 |
+
error_msg = f"Error extracting profile data: {str(e)}"
|
123 |
+
print(f"β {error_msg}")
|
124 |
+
raise Exception(error_msg)
|
125 |
+
|
126 |
+
def test_apify_connection(self) -> bool:
|
127 |
+
"""Test if Apify connection is working"""
|
128 |
+
try:
|
129 |
+
# Test with the actor endpoint
|
130 |
+
test_url = f"https://api.apify.com/v2/acts/dev_fusion~linkedin-profile-scraper?token={self.apify_token}"
|
131 |
+
print(f"π Testing connection to: {test_url[:50]}...")
|
132 |
+
|
133 |
+
response = requests.get(test_url, timeout=10)
|
134 |
+
|
135 |
+
if response.status_code == 200:
|
136 |
+
actor_info = response.json()
|
137 |
+
print(f"β
Successfully connected to Apify actor: {actor_info.get('name', 'LinkedIn Profile Scraper')}")
|
138 |
+
return True
|
139 |
+
elif response.status_code == 401:
|
140 |
+
print(f"β Authentication failed (401): Invalid or expired API token")
|
141 |
+
print(f"π Token being used: {self.apify_token[:15]}...")
|
142 |
+
print(f"π‘ Please check your APIFY_API_TOKEN in your .env file")
|
143 |
+
return False
|
144 |
+
elif response.status_code == 404:
|
145 |
+
print(f"β Actor not found (404): The actor 'dev_fusion~linkedin-profile-scraper' may not exist or be accessible")
|
146 |
+
return False
|
147 |
+
else:
|
148 |
+
print(f"β Failed to connect to Apify: {response.status_code} - {response.text}")
|
149 |
+
return False
|
150 |
+
except Exception as e:
|
151 |
+
print(f"β Failed to connect to Apify: {str(e)}")
|
152 |
+
return False
|
153 |
+
|
154 |
+
def _process_apify_data(self, raw_data: Dict[str, Any], url: str) -> Dict[str, Any]:
|
155 |
+
"""Process raw Apify data into standardized format"""
|
156 |
+
|
157 |
+
print(f"π Processing data for URL: {url}")
|
158 |
+
print(f"π Raw data keys: {list(raw_data.keys())}")
|
159 |
+
|
160 |
+
# Extract basic information - using the correct field names from API
|
161 |
+
profile_data = {
|
162 |
+
'name': raw_data.get('fullName', ''),
|
163 |
+
'headline': raw_data.get('headline', ''),
|
164 |
+
'location': raw_data.get('addressWithCountry', raw_data.get('addressWithoutCountry', '')),
|
165 |
+
'about': raw_data.get('about', ''), # API uses 'about' not 'summary'
|
166 |
+
'connections': raw_data.get('connections', 0),
|
167 |
+
'followers': raw_data.get('followers', 0),
|
168 |
+
'email': raw_data.get('email', ''),
|
169 |
+
'url': url, # Use the URL that was actually requested
|
170 |
+
'profile_image': raw_data.get('profilePic', ''),
|
171 |
+
'profile_image_hq': raw_data.get('profilePicHighQuality', ''),
|
172 |
+
'scraped_at': time.strftime('%Y-%m-%d %H:%M:%S'),
|
173 |
+
'job_title': raw_data.get('jobTitle', ''),
|
174 |
+
'company_name': raw_data.get('companyName', ''),
|
175 |
+
'company_industry': raw_data.get('companyIndustry', ''),
|
176 |
+
'company_website': raw_data.get('companyWebsite', ''),
|
177 |
+
'company_size': raw_data.get('companySize', ''),
|
178 |
+
'current_job_duration': raw_data.get('currentJobDuration', ''),
|
179 |
+
'top_skills': raw_data.get('topSkillsByEndorsements', '')
|
180 |
+
}
|
181 |
+
|
182 |
+
print(f"β
Extracted profile for: {profile_data.get('name', 'Unknown')}")
|
183 |
+
print(f"π Profile URL stored: {profile_data['url']}")
|
184 |
+
|
185 |
+
# Process experience - API uses 'experiences' array
|
186 |
+
experience_list = []
|
187 |
+
for exp in raw_data.get('experiences', []):
|
188 |
+
experience_item = {
|
189 |
+
'title': exp.get('title', ''),
|
190 |
+
'company': exp.get('subtitle', '').replace(' Β· Full-time', '').replace(' Β· Part-time', ''),
|
191 |
+
'duration': exp.get('caption', ''),
|
192 |
+
'description': '', # Extract from subComponents if available
|
193 |
+
'location': exp.get('metadata', ''),
|
194 |
+
'company_logo': exp.get('logo', ''),
|
195 |
+
'is_current': 'Present' in exp.get('caption', '') or 'Β·' not in exp.get('caption', '')
|
196 |
+
}
|
197 |
+
|
198 |
+
# Extract description from subComponents
|
199 |
+
if 'subComponents' in exp and exp['subComponents']:
|
200 |
+
for sub in exp['subComponents']:
|
201 |
+
if 'description' in sub and sub['description']:
|
202 |
+
descriptions = []
|
203 |
+
for desc in sub['description']:
|
204 |
+
if isinstance(desc, dict) and desc.get('text'):
|
205 |
+
descriptions.append(desc['text'])
|
206 |
+
experience_item['description'] = ' '.join(descriptions)
|
207 |
+
|
208 |
+
experience_list.append(experience_item)
|
209 |
+
profile_data['experience'] = experience_list
|
210 |
+
|
211 |
+
# Process education - API uses 'educations' array
|
212 |
+
education_list = []
|
213 |
+
for edu in raw_data.get('educations', []):
|
214 |
+
education_item = {
|
215 |
+
'degree': edu.get('subtitle', ''),
|
216 |
+
'school': edu.get('title', ''),
|
217 |
+
'field': '', # Extract from subtitle
|
218 |
+
'year': edu.get('caption', ''),
|
219 |
+
'logo': edu.get('logo', ''),
|
220 |
+
'grade': '' # Extract from subComponents if available
|
221 |
+
}
|
222 |
+
|
223 |
+
# Split degree and field from subtitle
|
224 |
+
subtitle = edu.get('subtitle', '')
|
225 |
+
if ' - ' in subtitle:
|
226 |
+
parts = subtitle.split(' - ', 1)
|
227 |
+
education_item['degree'] = parts[0]
|
228 |
+
education_item['field'] = parts[1] if len(parts) > 1 else ''
|
229 |
+
elif ', ' in subtitle:
|
230 |
+
parts = subtitle.split(', ', 1)
|
231 |
+
education_item['degree'] = parts[0]
|
232 |
+
education_item['field'] = parts[1] if len(parts) > 1 else ''
|
233 |
+
|
234 |
+
# Extract grade from subComponents
|
235 |
+
if 'subComponents' in edu and edu['subComponents']:
|
236 |
+
for sub in edu['subComponents']:
|
237 |
+
if 'description' in sub and sub['description']:
|
238 |
+
for desc in sub['description']:
|
239 |
+
if isinstance(desc, dict) and desc.get('text', '').startswith('Grade:'):
|
240 |
+
education_item['grade'] = desc['text']
|
241 |
+
|
242 |
+
education_list.append(education_item)
|
243 |
+
profile_data['education'] = education_list
|
244 |
+
|
245 |
+
# Process skills - API uses 'skills' array with title
|
246 |
+
skills_list = []
|
247 |
+
for skill in raw_data.get('skills', []):
|
248 |
+
if isinstance(skill, dict) and 'title' in skill:
|
249 |
+
skills_list.append(skill['title'])
|
250 |
+
elif isinstance(skill, str):
|
251 |
+
skills_list.append(skill)
|
252 |
+
profile_data['skills'] = skills_list
|
253 |
+
|
254 |
+
# Process certifications - API uses 'licenseAndCertificates'
|
255 |
+
certifications_list = []
|
256 |
+
for cert in raw_data.get('licenseAndCertificates', []):
|
257 |
+
cert_item = {
|
258 |
+
'title': cert.get('title', ''),
|
259 |
+
'issuer': cert.get('subtitle', ''),
|
260 |
+
'date': cert.get('caption', ''),
|
261 |
+
'credential_id': cert.get('metadata', ''),
|
262 |
+
'logo': cert.get('logo', '')
|
263 |
+
}
|
264 |
+
certifications_list.append(cert_item)
|
265 |
+
profile_data['certifications'] = certifications_list
|
266 |
+
|
267 |
+
# Process languages (if available)
|
268 |
+
profile_data['languages'] = raw_data.get('languages', [])
|
269 |
+
|
270 |
+
# Process volunteer experience (if available)
|
271 |
+
volunteer_list = []
|
272 |
+
for vol in raw_data.get('volunteerAndAwards', []):
|
273 |
+
if isinstance(vol, dict):
|
274 |
+
volunteer_list.append(vol)
|
275 |
+
profile_data['volunteer_experience'] = volunteer_list
|
276 |
+
|
277 |
+
# Additional rich data
|
278 |
+
profile_data['honors_awards'] = raw_data.get('honorsAndAwards', [])
|
279 |
+
profile_data['projects'] = raw_data.get('projects', [])
|
280 |
+
profile_data['publications'] = raw_data.get('publications', [])
|
281 |
+
profile_data['recommendations'] = raw_data.get('recommendations', [])
|
282 |
+
profile_data['interests'] = raw_data.get('interests', [])
|
283 |
+
|
284 |
+
return profile_data
|
src/memory/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Memory package initialization
|
src/memory/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (195 Bytes). View file
|
|
src/memory/__pycache__/memory_manager.cpython-311.pyc
ADDED
Binary file (12.4 kB). View file
|
|
src/memory/memory_manager.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Session & Persistent Memory Manager
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from datetime import datetime
|
5 |
+
from typing import Dict, Any, Optional
|
6 |
+
|
7 |
+
class MemoryManager:
|
8 |
+
"""Manages session data and persistent storage for the LinkedIn enhancer"""
|
9 |
+
|
10 |
+
def __init__(self, storage_dir: str = "data"):
|
11 |
+
self.storage_dir = storage_dir
|
12 |
+
self.session_data = {}
|
13 |
+
self.persistent_file = os.path.join(storage_dir, "persistent_data.json")
|
14 |
+
|
15 |
+
# Create storage directory if it doesn't exist
|
16 |
+
os.makedirs(storage_dir, exist_ok=True)
|
17 |
+
|
18 |
+
# Load existing persistent data
|
19 |
+
self.persistent_data = self._load_persistent_data()
|
20 |
+
|
21 |
+
def store_session(self, profile_url: str, data: Dict[str, Any]) -> None:
|
22 |
+
"""
|
23 |
+
Store session data for a specific profile
|
24 |
+
|
25 |
+
Args:
|
26 |
+
profile_url (str): LinkedIn profile URL as key
|
27 |
+
data (Dict[str, Any]): Session data to store
|
28 |
+
"""
|
29 |
+
session_key = self._create_session_key(profile_url)
|
30 |
+
|
31 |
+
self.session_data[session_key] = {
|
32 |
+
'timestamp': datetime.now().isoformat(),
|
33 |
+
'profile_url': profile_url,
|
34 |
+
'data': data
|
35 |
+
}
|
36 |
+
|
37 |
+
def get_session(self, profile_url: str) -> Optional[Dict[str, Any]]:
|
38 |
+
"""
|
39 |
+
Retrieve session data for a specific profile
|
40 |
+
|
41 |
+
Args:
|
42 |
+
profile_url (str): LinkedIn profile URL
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
Optional[Dict[str, Any]]: Session data if exists
|
46 |
+
"""
|
47 |
+
session_key = self._create_session_key(profile_url)
|
48 |
+
return self.session_data.get(session_key)
|
49 |
+
|
50 |
+
def store_persistent(self, key: str, data: Any) -> None:
|
51 |
+
"""
|
52 |
+
Store data persistently to disk
|
53 |
+
|
54 |
+
Args:
|
55 |
+
key (str): Storage key
|
56 |
+
data (Any): Data to store
|
57 |
+
"""
|
58 |
+
self.persistent_data[key] = {
|
59 |
+
'timestamp': datetime.now().isoformat(),
|
60 |
+
'data': data
|
61 |
+
}
|
62 |
+
|
63 |
+
self._save_persistent_data()
|
64 |
+
|
65 |
+
def get_persistent(self, key: str) -> Optional[Any]:
|
66 |
+
"""
|
67 |
+
Retrieve persistent data
|
68 |
+
|
69 |
+
Args:
|
70 |
+
key (str): Storage key
|
71 |
+
|
72 |
+
Returns:
|
73 |
+
Optional[Any]: Stored data if exists
|
74 |
+
"""
|
75 |
+
stored_item = self.persistent_data.get(key)
|
76 |
+
return stored_item['data'] if stored_item else None
|
77 |
+
|
78 |
+
def store_user_preferences(self, user_id: str, preferences: Dict[str, Any]) -> None:
|
79 |
+
"""
|
80 |
+
Store user preferences
|
81 |
+
|
82 |
+
Args:
|
83 |
+
user_id (str): User identifier
|
84 |
+
preferences (Dict[str, Any]): User preferences
|
85 |
+
"""
|
86 |
+
pref_key = f"user_preferences_{user_id}"
|
87 |
+
self.store_persistent(pref_key, preferences)
|
88 |
+
|
89 |
+
def get_user_preferences(self, user_id: str) -> Dict[str, Any]:
|
90 |
+
"""
|
91 |
+
Retrieve user preferences
|
92 |
+
|
93 |
+
Args:
|
94 |
+
user_id (str): User identifier
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
Dict[str, Any]: User preferences
|
98 |
+
"""
|
99 |
+
pref_key = f"user_preferences_{user_id}"
|
100 |
+
preferences = self.get_persistent(pref_key)
|
101 |
+
return preferences if preferences else {}
|
102 |
+
|
103 |
+
def store_analysis_history(self, profile_url: str, analysis: Dict[str, Any]) -> None:
|
104 |
+
"""
|
105 |
+
Store analysis history for tracking improvements
|
106 |
+
|
107 |
+
Args:
|
108 |
+
profile_url (str): LinkedIn profile URL
|
109 |
+
analysis (Dict[str, Any]): Analysis results
|
110 |
+
"""
|
111 |
+
history_key = f"analysis_history_{self._create_session_key(profile_url)}"
|
112 |
+
|
113 |
+
# Get existing history
|
114 |
+
history = self.get_persistent(history_key) or []
|
115 |
+
|
116 |
+
# Add new analysis with timestamp
|
117 |
+
history.append({
|
118 |
+
'timestamp': datetime.now().isoformat(),
|
119 |
+
'analysis': analysis
|
120 |
+
})
|
121 |
+
|
122 |
+
# Keep only last 10 analyses
|
123 |
+
history = history[-10:]
|
124 |
+
|
125 |
+
self.store_persistent(history_key, history)
|
126 |
+
|
127 |
+
def get_analysis_history(self, profile_url: str) -> list:
|
128 |
+
"""
|
129 |
+
Retrieve analysis history for a profile
|
130 |
+
|
131 |
+
Args:
|
132 |
+
profile_url (str): LinkedIn profile URL
|
133 |
+
|
134 |
+
Returns:
|
135 |
+
list: Analysis history
|
136 |
+
"""
|
137 |
+
history_key = f"analysis_history_{self._create_session_key(profile_url)}"
|
138 |
+
return self.get_persistent(history_key) or []
|
139 |
+
|
140 |
+
def clear_session(self, profile_url: str = None) -> None:
|
141 |
+
"""
|
142 |
+
Clear session data
|
143 |
+
|
144 |
+
Args:
|
145 |
+
profile_url (str, optional): Specific profile to clear, or all if None
|
146 |
+
"""
|
147 |
+
if profile_url:
|
148 |
+
session_key = self._create_session_key(profile_url)
|
149 |
+
self.session_data.pop(session_key, None)
|
150 |
+
else:
|
151 |
+
self.session_data.clear()
|
152 |
+
|
153 |
+
def clear_session_cache(self, profile_url: str = None) -> None:
|
154 |
+
"""
|
155 |
+
Clear session cache for a specific profile or all profiles
|
156 |
+
|
157 |
+
Args:
|
158 |
+
profile_url (str, optional): URL to clear cache for. If None, clears all.
|
159 |
+
"""
|
160 |
+
if profile_url:
|
161 |
+
session_key = self._create_session_key(profile_url)
|
162 |
+
if session_key in self.session_data:
|
163 |
+
del self.session_data[session_key]
|
164 |
+
print(f"ποΈ Cleared session cache for: {profile_url}")
|
165 |
+
else:
|
166 |
+
self.session_data.clear()
|
167 |
+
print("ποΈ Cleared all session cache")
|
168 |
+
|
169 |
+
def force_refresh_session(self, profile_url: str) -> None:
|
170 |
+
"""
|
171 |
+
Force refresh by clearing cache for a specific profile
|
172 |
+
|
173 |
+
Args:
|
174 |
+
profile_url (str): LinkedIn profile URL
|
175 |
+
"""
|
176 |
+
self.clear_session_cache(profile_url)
|
177 |
+
print(f"π Forced refresh for: {profile_url}")
|
178 |
+
|
179 |
+
def get_session_summary(self) -> Dict[str, Any]:
|
180 |
+
"""
|
181 |
+
Get summary of current session data
|
182 |
+
|
183 |
+
Returns:
|
184 |
+
Dict[str, Any]: Session summary
|
185 |
+
"""
|
186 |
+
return {
|
187 |
+
'active_sessions': len(self.session_data),
|
188 |
+
'sessions': list(self.session_data.keys()),
|
189 |
+
'storage_location': self.storage_dir
|
190 |
+
}
|
191 |
+
|
192 |
+
def _create_session_key(self, profile_url: str) -> str:
|
193 |
+
"""Create a clean session key from profile URL"""
|
194 |
+
# Extract username or create a hash-like key
|
195 |
+
import hashlib
|
196 |
+
return hashlib.md5(profile_url.encode()).hexdigest()[:16]
|
197 |
+
|
198 |
+
def _load_persistent_data(self) -> Dict[str, Any]:
|
199 |
+
"""Load persistent data from disk"""
|
200 |
+
if os.path.exists(self.persistent_file):
|
201 |
+
try:
|
202 |
+
with open(self.persistent_file, 'r', encoding='utf-8') as f:
|
203 |
+
return json.load(f)
|
204 |
+
except (json.JSONDecodeError, IOError):
|
205 |
+
return {}
|
206 |
+
return {}
|
207 |
+
|
208 |
+
def _save_persistent_data(self) -> None:
|
209 |
+
"""Save persistent data to disk"""
|
210 |
+
try:
|
211 |
+
with open(self.persistent_file, 'w', encoding='utf-8') as f:
|
212 |
+
json.dump(self.persistent_data, f, indent=2, ensure_ascii=False)
|
213 |
+
except IOError as e:
|
214 |
+
print(f"Warning: Could not save persistent data: {e}")
|
215 |
+
|
216 |
+
def export_data(self, filename: str = None) -> str:
|
217 |
+
"""
|
218 |
+
Export all data to a JSON file
|
219 |
+
|
220 |
+
Args:
|
221 |
+
filename (str, optional): Custom filename
|
222 |
+
|
223 |
+
Returns:
|
224 |
+
str: Path to exported file
|
225 |
+
"""
|
226 |
+
if not filename:
|
227 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
228 |
+
filename = f"linkedin_enhancer_export_{timestamp}.json"
|
229 |
+
|
230 |
+
export_path = os.path.join(self.storage_dir, filename)
|
231 |
+
|
232 |
+
export_data = {
|
233 |
+
'session_data': self.session_data,
|
234 |
+
'persistent_data': self.persistent_data,
|
235 |
+
'export_timestamp': datetime.now().isoformat()
|
236 |
+
}
|
237 |
+
|
238 |
+
with open(export_path, 'w', encoding='utf-8') as f:
|
239 |
+
json.dump(export_data, f, indent=2, ensure_ascii=False)
|
240 |
+
|
241 |
+
return export_path
|
src/prompts/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Prompts package initialization
|
src/prompts/__pycache__/agent_prompts.cpython-311.pyc
ADDED
Binary file (9.63 kB). View file
|
|
src/prompts/agent_prompts.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Agent Prompts for LinkedIn Profile Enhancer
|
2 |
+
|
3 |
+
class ContentPrompts:
|
4 |
+
"""Collection of prompts for content generation agents"""
|
5 |
+
|
6 |
+
def __init__(self):
|
7 |
+
self.headline_prompts = HeadlinePrompts()
|
8 |
+
self.about_prompts = AboutPrompts()
|
9 |
+
self.experience_prompts = ExperiencePrompts()
|
10 |
+
self.general_prompts = GeneralPrompts()
|
11 |
+
|
12 |
+
class HeadlinePrompts:
|
13 |
+
"""Prompts for headline optimization"""
|
14 |
+
|
15 |
+
HEADLINE_ANALYSIS = """
|
16 |
+
Analyze this LinkedIn headline and provide improvement suggestions:
|
17 |
+
|
18 |
+
Current headline: "{headline}"
|
19 |
+
Target role: "{target_role}"
|
20 |
+
Key skills: {skills}
|
21 |
+
|
22 |
+
Consider:
|
23 |
+
1. Keyword optimization for the target role
|
24 |
+
2. Value proposition clarity
|
25 |
+
3. Professional branding
|
26 |
+
4. Character limit (120 chars max)
|
27 |
+
5. Industry-specific terms
|
28 |
+
|
29 |
+
Provide 3-5 alternative headline suggestions.
|
30 |
+
"""
|
31 |
+
|
32 |
+
HEADLINE_TEMPLATES = [
|
33 |
+
"{title} | {specialization} | {key_skills}",
|
34 |
+
"{seniority} {title} specializing in {domain} | {achievement}",
|
35 |
+
"{title} | Helping {target_audience} with {solution} | {technologies}",
|
36 |
+
"{role} with {years}+ years in {industry} | {unique_value_prop}"
|
37 |
+
]
|
38 |
+
|
39 |
+
class AboutPrompts:
|
40 |
+
"""Prompts for about section optimization"""
|
41 |
+
|
42 |
+
ABOUT_STRUCTURE = """
|
43 |
+
Create an engaging LinkedIn About section with this structure:
|
44 |
+
|
45 |
+
Profile info:
|
46 |
+
- Name: {name}
|
47 |
+
- Current role: {current_role}
|
48 |
+
- Years of experience: {experience_years}
|
49 |
+
- Key skills: {key_skills}
|
50 |
+
- Notable achievements: {achievements}
|
51 |
+
- Target audience: {target_audience}
|
52 |
+
|
53 |
+
Structure:
|
54 |
+
1. Hook (compelling opening line)
|
55 |
+
2. Professional summary (2-3 sentences)
|
56 |
+
3. Key expertise and skills
|
57 |
+
4. Notable achievements with metrics
|
58 |
+
5. Call to action
|
59 |
+
|
60 |
+
Keep it conversational, professional, and under 2000 characters.
|
61 |
+
"""
|
62 |
+
|
63 |
+
ABOUT_HOOKS = [
|
64 |
+
"π Passionate about transforming {industry} through {technology}",
|
65 |
+
"π‘ {Years} years of turning complex {domain} challenges into simple solutions",
|
66 |
+
"π― Helping {target_audience} achieve {outcome} through {approach}",
|
67 |
+
"β‘ {Achievement} specialist with a track record of {impact}"
|
68 |
+
]
|
69 |
+
|
70 |
+
class ExperiencePrompts:
|
71 |
+
"""Prompts for experience section optimization"""
|
72 |
+
|
73 |
+
EXPERIENCE_ENHANCEMENT = """
|
74 |
+
Enhance this work experience entry:
|
75 |
+
|
76 |
+
Current description: "{description}"
|
77 |
+
Role: {title}
|
78 |
+
Company: {company}
|
79 |
+
Duration: {duration}
|
80 |
+
|
81 |
+
Improve by:
|
82 |
+
1. Starting with strong action verbs
|
83 |
+
2. Adding quantified achievements
|
84 |
+
3. Highlighting relevant skills used
|
85 |
+
4. Showing business impact
|
86 |
+
5. Using bullet points for readability
|
87 |
+
|
88 |
+
Target the experience for: {target_role}
|
89 |
+
"""
|
90 |
+
ACTION_VERBS = {
|
91 |
+
"Leadership": ["led", "managed", "directed", "coordinated", "supervised"],
|
92 |
+
"Achievement": ["achieved", "delivered", "exceeded", "accomplished", "attained"],
|
93 |
+
"Development": ["developed", "created", "built", "designed", "implemented"],
|
94 |
+
"Improvement": ["optimized", "enhanced", "streamlined", "upgraded", "modernized"],
|
95 |
+
"Problem-solving": ["resolved", "troubleshot", "analyzed", "diagnosed", "solved"]
|
96 |
+
}
|
97 |
+
|
98 |
+
class GeneralPrompts:
|
99 |
+
"""General prompts for profile enhancement"""
|
100 |
+
|
101 |
+
SKILLS_OPTIMIZATION = """
|
102 |
+
Optimize this skills list for the target role:
|
103 |
+
|
104 |
+
Current skills: {current_skills}
|
105 |
+
Target role: {target_role}
|
106 |
+
Job description keywords: {job_keywords}
|
107 |
+
|
108 |
+
Provide:
|
109 |
+
1. Priority ranking of current skills
|
110 |
+
2. Missing skills to add
|
111 |
+
3. Skills to remove or deprioritize
|
112 |
+
4. Skill categories organization
|
113 |
+
"""
|
114 |
+
|
115 |
+
KEYWORD_OPTIMIZATION = """
|
116 |
+
Analyze keyword optimization for this profile:
|
117 |
+
|
118 |
+
Profile content: {profile_content}
|
119 |
+
Target job description: {job_description}
|
120 |
+
|
121 |
+
Identify:
|
122 |
+
1. Current keyword density
|
123 |
+
2. Missing important keywords
|
124 |
+
3. Over-optimized keywords
|
125 |
+
4. Natural integration suggestions
|
126 |
+
5. Industry-specific terminology gaps
|
127 |
+
"""
|
128 |
+
|
129 |
+
PROFILE_AUDIT = """
|
130 |
+
Conduct a comprehensive LinkedIn profile audit:
|
131 |
+
|
132 |
+
Profile data: {profile_data}
|
133 |
+
Target role: {target_role}
|
134 |
+
Industry: {industry}
|
135 |
+
|
136 |
+
Audit areas:
|
137 |
+
1. Profile completeness (%)
|
138 |
+
2. Keyword optimization
|
139 |
+
3. Content quality and engagement potential
|
140 |
+
4. Professional branding consistency
|
141 |
+
5. Call-to-action effectiveness
|
142 |
+
6. Visual elements (photo, banner) recommendations
|
143 |
+
|
144 |
+
Provide actionable improvement suggestions with priority levels.
|
145 |
+
"""
|
146 |
+
|
147 |
+
class AnalysisPrompts:
|
148 |
+
"""Prompts for profile analysis"""
|
149 |
+
|
150 |
+
COMPETITIVE_ANALYSIS = """
|
151 |
+
Compare this profile against industry standards:
|
152 |
+
|
153 |
+
Profile: {profile_data}
|
154 |
+
Industry: {industry}
|
155 |
+
Seniority level: {seniority}
|
156 |
+
|
157 |
+
Analyze:
|
158 |
+
1. Profile completeness vs industry average
|
159 |
+
2. Keyword usage vs competitors
|
160 |
+
3. Content quality benchmarks
|
161 |
+
4. Engagement potential indicators
|
162 |
+
5. Areas of competitive advantage
|
163 |
+
6. Improvement opportunities
|
164 |
+
"""
|
165 |
+
|
166 |
+
CONTENT_QUALITY = """
|
167 |
+
Assess content quality across this LinkedIn profile:
|
168 |
+
|
169 |
+
Profile sections: {profile_sections}
|
170 |
+
|
171 |
+
Evaluate:
|
172 |
+
1. Clarity and readability
|
173 |
+
2. Professional tone consistency
|
174 |
+
3. Value proposition strength
|
175 |
+
4. Quantified achievements presence
|
176 |
+
5. Industry relevance
|
177 |
+
6. Call-to-action effectiveness
|
178 |
+
|
179 |
+
Rate each section 1-10 and provide specific improvement suggestions.
|
180 |
+
"""
|
181 |
+
|
182 |
+
class JobMatchingPrompts:
|
183 |
+
"""Prompts for job matching analysis"""
|
184 |
+
|
185 |
+
JOB_MATCH_ANALYSIS = """
|
186 |
+
Analyze how well this profile matches the job requirements:
|
187 |
+
|
188 |
+
Profile: {profile_data}
|
189 |
+
Job description: {job_description}
|
190 |
+
|
191 |
+
Match analysis:
|
192 |
+
1. Skills alignment (%)
|
193 |
+
2. Experience relevance
|
194 |
+
3. Keyword overlap
|
195 |
+
4. Education/certification fit
|
196 |
+
5. Overall match score
|
197 |
+
|
198 |
+
Provide specific recommendations to improve match score.
|
199 |
+
"""
|
200 |
+
|
201 |
+
TAILORING_SUGGESTIONS = """
|
202 |
+
Suggest profile modifications to better match this opportunity:
|
203 |
+
|
204 |
+
Current profile: {profile_data}
|
205 |
+
Target job: {job_description}
|
206 |
+
Match score: {current_match_score}
|
207 |
+
|
208 |
+
Prioritized suggestions:
|
209 |
+
1. High-impact changes (immediate wins)
|
210 |
+
2. Medium-impact improvements
|
211 |
+
3. Long-term development areas
|
212 |
+
4. Skills to highlight/add
|
213 |
+
5. Content restructuring recommendations
|
214 |
+
"""
|
215 |
+
|
216 |
+
# Utility functions for prompt formatting
|
217 |
+
def format_prompt(template: str, **kwargs) -> str:
|
218 |
+
"""Format prompt template with provided variables"""
|
219 |
+
try:
|
220 |
+
return template.format(**kwargs)
|
221 |
+
except KeyError as e:
|
222 |
+
return f"Error formatting prompt: Missing variable {e}"
|
223 |
+
|
224 |
+
def get_prompt_by_category(category: str, prompt_name: str) -> str:
|
225 |
+
"""Get a specific prompt by category and name"""
|
226 |
+
prompt_classes = {
|
227 |
+
'headline': HeadlinePrompts(),
|
228 |
+
'about': AboutPrompts(),
|
229 |
+
'experience': ExperiencePrompts(),
|
230 |
+
'general': GeneralPrompts(),
|
231 |
+
'analysis': AnalysisPrompts(),
|
232 |
+
'job_matching': JobMatchingPrompts()
|
233 |
+
}
|
234 |
+
|
235 |
+
prompt_class = prompt_classes.get(category.lower())
|
236 |
+
if not prompt_class:
|
237 |
+
return f"Category '{category}' not found"
|
238 |
+
|
239 |
+
prompt = getattr(prompt_class, prompt_name.upper(), None)
|
240 |
+
if not prompt:
|
241 |
+
return f"Prompt '{prompt_name}' not found in category '{category}'"
|
242 |
+
|
243 |
+
return prompt
|
src/streamlit_app.py
CHANGED
@@ -1,40 +1,723 @@
|
|
1 |
-
import altair as alt
|
2 |
-
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"""
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
indices = np.linspace(0, 1, num_points)
|
20 |
-
theta = 2 * np.pi * num_turns * indices
|
21 |
-
radius = indices
|
22 |
-
|
23 |
-
x = radius * np.cos(theta)
|
24 |
-
y = radius * np.sin(theta)
|
25 |
-
|
26 |
-
df = pd.DataFrame({
|
27 |
-
"x": x,
|
28 |
-
"y": y,
|
29 |
-
"idx": indices,
|
30 |
-
"rand": np.random.randn(num_points),
|
31 |
-
})
|
32 |
-
|
33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
34 |
-
.mark_point(filled=True)
|
35 |
-
.encode(
|
36 |
-
x=alt.X("x", axis=None),
|
37 |
-
y=alt.Y("y", axis=None),
|
38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
40 |
-
))
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import json
|
3 |
+
import pandas as pd
|
4 |
+
from agents.orchestrator import ProfileOrchestrator
|
5 |
+
from agents.scraper_agent import ScraperAgent
|
6 |
+
from agents.content_agent import ContentAgent
|
7 |
+
import plotly.express as px
|
8 |
+
import plotly.graph_objects as go
|
9 |
+
from datetime import datetime
|
10 |
+
|
11 |
+
# Configure Streamlit page
|
12 |
+
st.set_page_config(
|
13 |
+
page_title="π LinkedIn Profile Enhancer",
|
14 |
+
page_icon="π",
|
15 |
+
layout="wide",
|
16 |
+
initial_sidebar_state="expanded"
|
17 |
+
)
|
18 |
+
|
19 |
+
# Custom CSS for better styling
|
20 |
+
st.markdown("""
|
21 |
+
<style>
|
22 |
+
.main-header {
|
23 |
+
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
24 |
+
padding: 2rem;
|
25 |
+
border-radius: 10px;
|
26 |
+
color: white;
|
27 |
+
text-align: center;
|
28 |
+
margin-bottom: 2rem;
|
29 |
+
}
|
30 |
+
|
31 |
+
.metric-card {
|
32 |
+
background: #f8f9fa;
|
33 |
+
padding: 1rem;
|
34 |
+
border-radius: 8px;
|
35 |
+
border-left: 4px solid #667eea;
|
36 |
+
margin: 0.5rem 0;
|
37 |
+
}
|
38 |
+
|
39 |
+
.success-card {
|
40 |
+
background: #d4edda;
|
41 |
+
padding: 1rem;
|
42 |
+
border-radius: 8px;
|
43 |
+
border-left: 4px solid #28a745;
|
44 |
+
margin: 0.5rem 0;
|
45 |
+
}
|
46 |
+
|
47 |
+
.warning-card {
|
48 |
+
background: #fff3cd;
|
49 |
+
padding: 1rem;
|
50 |
+
border-radius: 8px;
|
51 |
+
border-left: 4px solid #ffc107;
|
52 |
+
margin: 0.5rem 0;
|
53 |
+
}
|
54 |
+
|
55 |
+
.info-card {
|
56 |
+
background: #e7f3ff;
|
57 |
+
padding: 1rem;
|
58 |
+
border-radius: 8px;
|
59 |
+
border-left: 4px solid #17a2b8;
|
60 |
+
margin: 0.5rem 0;
|
61 |
+
}
|
62 |
+
|
63 |
+
.stTabs > div > div > div > div {
|
64 |
+
padding: 1rem;
|
65 |
+
}
|
66 |
+
|
67 |
+
.profile-section {
|
68 |
+
background: white;
|
69 |
+
padding: 1.5rem;
|
70 |
+
border-radius: 10px;
|
71 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
72 |
+
margin: 1rem 0;
|
73 |
+
}
|
74 |
+
</style>
|
75 |
+
""", unsafe_allow_html=True)
|
76 |
+
|
77 |
+
def initialize_session_state():
|
78 |
+
"""Initialize session state variables"""
|
79 |
+
if 'orchestrator' not in st.session_state:
|
80 |
+
st.session_state.orchestrator = ProfileOrchestrator()
|
81 |
+
if 'analysis_results' not in st.session_state:
|
82 |
+
st.session_state.analysis_results = None
|
83 |
+
if 'profile_data' not in st.session_state:
|
84 |
+
st.session_state.profile_data = None
|
85 |
+
if 'suggestions' not in st.session_state:
|
86 |
+
st.session_state.suggestions = None
|
87 |
+
if 'current_url' not in st.session_state:
|
88 |
+
st.session_state.current_url = None
|
89 |
+
|
90 |
+
def clear_results_if_url_changed(linkedin_url):
|
91 |
+
"""Clear cached results if URL has changed"""
|
92 |
+
if st.session_state.current_url != linkedin_url:
|
93 |
+
st.session_state.analysis_results = None
|
94 |
+
st.session_state.profile_data = None
|
95 |
+
st.session_state.suggestions = None
|
96 |
+
st.session_state.current_url = linkedin_url
|
97 |
+
st.cache_data.clear() # Clear any Streamlit cache
|
98 |
+
print(f"π URL changed to: {linkedin_url} - Clearing cached data")
|
99 |
+
|
100 |
+
def create_header():
|
101 |
+
"""Create the main header"""
|
102 |
+
st.markdown("""
|
103 |
+
<div class="main-header">
|
104 |
+
<h1>π LinkedIn Profile Enhancer</h1>
|
105 |
+
<p style="font-size: 1.2em; margin: 1rem 0;">AI-powered LinkedIn profile analysis and enhancement suggestions</p>
|
106 |
+
<div style="display: flex; justify-content: center; gap: 2rem; margin-top: 1rem;">
|
107 |
+
<div style="text-align: center;">
|
108 |
+
<div style="font-size: 2em;">π</div>
|
109 |
+
<div>Real Scraping</div>
|
110 |
+
</div>
|
111 |
+
<div style="text-align: center;">
|
112 |
+
<div style="font-size: 2em;">π€</div>
|
113 |
+
<div>AI Analysis</div>
|
114 |
+
</div>
|
115 |
+
<div style="text-align: center;">
|
116 |
+
<div style="font-size: 2em;">π―</div>
|
117 |
+
<div>Smart Suggestions</div>
|
118 |
+
</div>
|
119 |
+
<div style="text-align: center;">
|
120 |
+
<div style="font-size: 2em;">π</div>
|
121 |
+
<div>Data Insights</div>
|
122 |
+
</div>
|
123 |
+
</div>
|
124 |
+
</div>
|
125 |
+
""", unsafe_allow_html=True)
|
126 |
+
|
127 |
+
def create_sidebar():
|
128 |
+
"""Create the sidebar with input controls"""
|
129 |
+
with st.sidebar:
|
130 |
+
st.header("π Configuration")
|
131 |
+
|
132 |
+
# LinkedIn URL input
|
133 |
+
linkedin_url = st.text_input(
|
134 |
+
"π LinkedIn Profile URL",
|
135 |
+
placeholder="https://linkedin.com/in/your-profile",
|
136 |
+
help="Enter the full LinkedIn profile URL to analyze"
|
137 |
+
)
|
138 |
+
|
139 |
+
# Job description input
|
140 |
+
job_description = st.text_area(
|
141 |
+
"π― Target Job Description (Optional)",
|
142 |
+
placeholder="Paste the job description here for tailored suggestions...",
|
143 |
+
height=150,
|
144 |
+
help="Include job description for personalized optimization"
|
145 |
+
)
|
146 |
+
|
147 |
+
# API Status
|
148 |
+
st.subheader("π API Status")
|
149 |
+
|
150 |
+
# Test API connections
|
151 |
+
if st.button("π Test Connections"):
|
152 |
+
with st.spinner("Testing API connections..."):
|
153 |
+
# Test Apify
|
154 |
+
try:
|
155 |
+
scraper = ScraperAgent()
|
156 |
+
apify_status = scraper.test_apify_connection()
|
157 |
+
if apify_status:
|
158 |
+
st.success("β
Apify: Connected")
|
159 |
+
else:
|
160 |
+
st.error("β Apify: Failed")
|
161 |
+
except Exception as e:
|
162 |
+
st.error(f"β Apify: Error - {str(e)}")
|
163 |
+
|
164 |
+
# Test OpenAI
|
165 |
+
try:
|
166 |
+
content_agent = ContentAgent()
|
167 |
+
openai_status = content_agent.test_openai_connection()
|
168 |
+
if openai_status:
|
169 |
+
st.success("β
OpenAI: Connected")
|
170 |
+
else:
|
171 |
+
st.error("β OpenAI: Failed")
|
172 |
+
except Exception as e:
|
173 |
+
st.error(f"β OpenAI: Error - {str(e)}")
|
174 |
+
|
175 |
+
# Examples
|
176 |
+
st.subheader("π‘ Example URLs")
|
177 |
+
example_urls = [
|
178 |
+
"https://linkedin.com/in/example-profile",
|
179 |
+
"https://www.linkedin.com/in/sample-user"
|
180 |
+
]
|
181 |
+
|
182 |
+
for url in example_urls:
|
183 |
+
if st.button(f"π {url.split('/')[-1]}", key=url):
|
184 |
+
st.session_state.example_url = url
|
185 |
+
|
186 |
+
return linkedin_url, job_description
|
187 |
+
|
188 |
+
def create_metrics_display(analysis):
|
189 |
+
"""Create metrics display"""
|
190 |
+
col1, col2, col3, col4 = st.columns(4)
|
191 |
+
|
192 |
+
with col1:
|
193 |
+
st.metric(
|
194 |
+
"π Completeness Score",
|
195 |
+
f"{analysis.get('completeness_score', 0):.1f}%",
|
196 |
+
delta=None
|
197 |
+
)
|
198 |
+
|
199 |
+
with col2:
|
200 |
+
rating = analysis.get('overall_rating', 'Unknown')
|
201 |
+
st.metric(
|
202 |
+
"β Overall Rating",
|
203 |
+
rating,
|
204 |
+
delta=None
|
205 |
+
)
|
206 |
+
|
207 |
+
with col3:
|
208 |
+
st.metric(
|
209 |
+
"π― Job Match Score",
|
210 |
+
f"{analysis.get('job_match_score', 0):.1f}%",
|
211 |
+
delta=None
|
212 |
+
)
|
213 |
+
|
214 |
+
with col4:
|
215 |
+
keywords = analysis.get('keyword_analysis', {})
|
216 |
+
found_count = len(keywords.get('found_keywords', []))
|
217 |
+
st.metric(
|
218 |
+
"π Keywords Found",
|
219 |
+
found_count,
|
220 |
+
delta=None
|
221 |
+
)
|
222 |
+
|
223 |
+
def create_analysis_charts(analysis):
|
224 |
+
"""Create analysis charts"""
|
225 |
+
col1, col2 = st.columns(2)
|
226 |
+
|
227 |
+
with col1:
|
228 |
+
# Completeness breakdown
|
229 |
+
scores = {
|
230 |
+
'Profile Info': 20,
|
231 |
+
'About Section': 25,
|
232 |
+
'Experience': 25,
|
233 |
+
'Skills': 15,
|
234 |
+
'Education': 15
|
235 |
+
}
|
236 |
+
|
237 |
+
fig_pie = px.pie(
|
238 |
+
values=list(scores.values()),
|
239 |
+
names=list(scores.keys()),
|
240 |
+
title="Profile Section Weights",
|
241 |
+
color_discrete_sequence=px.colors.qualitative.Set3
|
242 |
+
)
|
243 |
+
fig_pie.update_layout(height=400)
|
244 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
245 |
+
|
246 |
+
with col2:
|
247 |
+
# Score comparison
|
248 |
+
current_score = analysis.get('completeness_score', 0)
|
249 |
+
target_score = 90
|
250 |
+
|
251 |
+
fig_gauge = go.Figure(go.Indicator(
|
252 |
+
mode = "gauge+number+delta",
|
253 |
+
value = current_score,
|
254 |
+
domain = {'x': [0, 1], 'y': [0, 1]},
|
255 |
+
title = {'text': "Profile Completeness"},
|
256 |
+
delta = {'reference': target_score, 'increasing': {'color': "green"}},
|
257 |
+
gauge = {
|
258 |
+
'axis': {'range': [None, 100]},
|
259 |
+
'bar': {'color': "darkblue"},
|
260 |
+
'steps': [
|
261 |
+
{'range': [0, 50], 'color': "lightgray"},
|
262 |
+
{'range': [50, 80], 'color': "gray"},
|
263 |
+
{'range': [80, 100], 'color': "lightgreen"}
|
264 |
+
],
|
265 |
+
'threshold': {
|
266 |
+
'line': {'color': "red", 'width': 4},
|
267 |
+
'thickness': 0.75,
|
268 |
+
'value': 90
|
269 |
+
}
|
270 |
+
}
|
271 |
+
))
|
272 |
+
fig_gauge.update_layout(height=400)
|
273 |
+
st.plotly_chart(fig_gauge, use_container_width=True)
|
274 |
+
|
275 |
+
def display_profile_data(profile_data):
|
276 |
+
"""Display scraped profile data in a structured format"""
|
277 |
+
if not profile_data:
|
278 |
+
st.warning("No profile data available")
|
279 |
+
return
|
280 |
+
|
281 |
+
# Profile Header with Image
|
282 |
+
st.subheader("π€ Profile Overview")
|
283 |
+
|
284 |
+
# Create columns for profile image and basic info
|
285 |
+
col1, col2, col3 = st.columns([1, 2, 2])
|
286 |
+
|
287 |
+
with col1:
|
288 |
+
# Display profile image
|
289 |
+
profile_image = profile_data.get('profile_image_hq') or profile_data.get('profile_image')
|
290 |
+
if profile_image:
|
291 |
+
st.image(profile_image, width=150, caption="Profile Picture")
|
292 |
+
else:
|
293 |
+
st.markdown("""
|
294 |
+
<div style="width: 150px; height: 150px; background-color: #f0f0f0; border-radius: 50%;
|
295 |
+
display: flex; align-items: center; justify-content: center; font-size: 48px;">
|
296 |
+
π€
|
297 |
+
</div>
|
298 |
+
""", unsafe_allow_html=True)
|
299 |
+
|
300 |
+
with col2:
|
301 |
+
st.markdown(f"""
|
302 |
+
<div class="info-card">
|
303 |
+
<strong>Name:</strong> {profile_data.get('name', 'N/A')}<br>
|
304 |
+
<strong>Headline:</strong> {profile_data.get('headline', 'N/A')}<br>
|
305 |
+
<strong>Location:</strong> {profile_data.get('location', 'N/A')}<br>
|
306 |
+
<strong>Connections:</strong> {profile_data.get('connections', 'N/A')}<br>
|
307 |
+
<strong>Followers:</strong> {profile_data.get('followers', 'N/A')}
|
308 |
+
</div>
|
309 |
+
""", unsafe_allow_html=True)
|
310 |
+
|
311 |
+
with col3:
|
312 |
+
st.markdown(f"""
|
313 |
+
<div class="info-card">
|
314 |
+
<strong>Current Job:</strong> {profile_data.get('job_title', 'N/A')}<br>
|
315 |
+
<strong>Company:</strong> {profile_data.get('company_name', 'N/A')}<br>
|
316 |
+
<strong>Industry:</strong> {profile_data.get('company_industry', 'N/A')}<br>
|
317 |
+
<strong>Email:</strong> {profile_data.get('email', 'N/A')}<br>
|
318 |
+
<strong>Profile URL:</strong> <a href="{profile_data.get('url', '#')}" target="_blank">View Profile</a>
|
319 |
+
</div>
|
320 |
+
""", unsafe_allow_html=True)
|
321 |
+
|
322 |
+
# About Section
|
323 |
+
if profile_data.get('about'):
|
324 |
+
st.subheader("π About Section")
|
325 |
+
st.markdown(f"""
|
326 |
+
<div class="profile-section">
|
327 |
+
{profile_data.get('about', 'No about section available')}
|
328 |
+
</div>
|
329 |
+
""", unsafe_allow_html=True)
|
330 |
+
|
331 |
+
# Experience
|
332 |
+
if profile_data.get('experience'):
|
333 |
+
st.subheader("πΌ Experience")
|
334 |
+
for i, exp in enumerate(profile_data.get('experience', [])):
|
335 |
+
with st.expander(f"{exp.get('title', 'Position')} at {exp.get('company', 'Company')}", expanded=i==0):
|
336 |
+
col1, col2 = st.columns([2, 1])
|
337 |
+
with col1:
|
338 |
+
st.write(f"**Duration:** {exp.get('duration', 'N/A')}")
|
339 |
+
st.write(f"**Location:** {exp.get('location', 'N/A')}")
|
340 |
+
if exp.get('description'):
|
341 |
+
st.write("**Description:**")
|
342 |
+
st.write(exp.get('description'))
|
343 |
+
with col2:
|
344 |
+
st.write(f"**Current Role:** {'Yes' if exp.get('is_current') else 'No'}")
|
345 |
+
|
346 |
+
# Skills
|
347 |
+
if profile_data.get('skills'):
|
348 |
+
st.subheader("π οΈ Skills")
|
349 |
+
skills = profile_data.get('skills', [])
|
350 |
+
if skills:
|
351 |
+
# Create a DataFrame for better display
|
352 |
+
skills_df = pd.DataFrame({'Skills': skills})
|
353 |
+
st.dataframe(skills_df, use_container_width=True)
|
354 |
+
|
355 |
+
# Education
|
356 |
+
if profile_data.get('education'):
|
357 |
+
st.subheader("π Education")
|
358 |
+
for edu in profile_data.get('education', []):
|
359 |
+
st.markdown(f"""
|
360 |
+
<div class="info-card">
|
361 |
+
<strong>{edu.get('degree', 'Degree')}</strong><br>
|
362 |
+
{edu.get('school', 'School')} | {edu.get('field', 'Field')}<br>
|
363 |
+
<em>{edu.get('year', 'Year')}</em>
|
364 |
+
</div>
|
365 |
+
""", unsafe_allow_html=True)
|
366 |
+
|
367 |
+
# Raw Data (collapsible)
|
368 |
+
with st.expander("π Raw JSON Data"):
|
369 |
+
st.json(profile_data)
|
370 |
|
371 |
+
def display_analysis_results(analysis):
|
372 |
+
"""Display analysis results"""
|
373 |
+
if not analysis:
|
374 |
+
st.warning("No analysis results available")
|
375 |
+
return
|
376 |
+
|
377 |
+
# Metrics
|
378 |
+
create_metrics_display(analysis)
|
379 |
+
|
380 |
+
# Charts
|
381 |
+
st.subheader("π Analysis Visualization")
|
382 |
+
create_analysis_charts(analysis)
|
383 |
+
|
384 |
+
# Strengths and Weaknesses
|
385 |
+
col1, col2 = st.columns(2)
|
386 |
+
|
387 |
+
with col1:
|
388 |
+
st.subheader("π Profile Strengths")
|
389 |
+
strengths = analysis.get('strengths', [])
|
390 |
+
if strengths:
|
391 |
+
for strength in strengths:
|
392 |
+
st.markdown(f"""
|
393 |
+
<div class="success-card">
|
394 |
+
β
{strength}
|
395 |
+
</div>
|
396 |
+
""", unsafe_allow_html=True)
|
397 |
+
else:
|
398 |
+
st.info("No specific strengths identified")
|
399 |
+
|
400 |
+
with col2:
|
401 |
+
st.subheader("π§ Areas for Improvement")
|
402 |
+
weaknesses = analysis.get('weaknesses', [])
|
403 |
+
if weaknesses:
|
404 |
+
for weakness in weaknesses:
|
405 |
+
st.markdown(f"""
|
406 |
+
<div class="warning-card">
|
407 |
+
πΈ {weakness}
|
408 |
+
</div>
|
409 |
+
""", unsafe_allow_html=True)
|
410 |
+
else:
|
411 |
+
st.success("No major areas for improvement identified")
|
412 |
+
|
413 |
+
# Keyword Analysis
|
414 |
+
keyword_analysis = analysis.get('keyword_analysis', {})
|
415 |
+
if keyword_analysis:
|
416 |
+
st.subheader("π Keyword Analysis")
|
417 |
+
|
418 |
+
col1, col2 = st.columns(2)
|
419 |
+
with col1:
|
420 |
+
found_keywords = keyword_analysis.get('found_keywords', [])
|
421 |
+
if found_keywords:
|
422 |
+
st.write("**Keywords Found:**")
|
423 |
+
st.write(", ".join(found_keywords[:10]))
|
424 |
+
|
425 |
+
with col2:
|
426 |
+
missing_keywords = keyword_analysis.get('missing_keywords', [])
|
427 |
+
if missing_keywords:
|
428 |
+
st.write("**Missing Keywords:**")
|
429 |
+
st.write(", ".join(missing_keywords[:5]))
|
430 |
+
|
431 |
+
def generate_suggestions_markdown(suggestions, profile_data=None):
|
432 |
+
"""Generate markdown content from suggestions"""
|
433 |
+
if not suggestions:
|
434 |
+
return "# LinkedIn Profile Enhancement Suggestions\n\nNo suggestions available."
|
435 |
+
|
436 |
+
# Get profile name for personalization
|
437 |
+
profile_name = profile_data.get('name', 'Your Profile') if profile_data else 'Your Profile'
|
438 |
+
current_date = datetime.now().strftime("%B %d, %Y")
|
439 |
+
|
440 |
+
markdown_content = f"""# LinkedIn Profile Enhancement Suggestions
|
441 |
+
|
442 |
+
**Profile:** {profile_name}
|
443 |
+
**Generated on:** {current_date}
|
444 |
+
**Powered by:** LinkedIn Profile Enhancer AI
|
445 |
+
|
446 |
+
---
|
447 |
+
|
448 |
+
## π Table of Contents
|
449 |
"""
|
450 |
+
|
451 |
+
# Add table of contents
|
452 |
+
toc_items = []
|
453 |
+
for category in suggestions.keys():
|
454 |
+
if category == 'ai_generated_content':
|
455 |
+
toc_items.append("- [π€ AI-Generated Content Suggestions](#ai-generated-content-suggestions)")
|
456 |
+
else:
|
457 |
+
category_name = category.replace('_', ' ').title()
|
458 |
+
toc_items.append(f"- [π {category_name}](#{category.replace('_', '-').lower()})")
|
459 |
+
|
460 |
+
markdown_content += "\n".join(toc_items) + "\n\n---\n\n"
|
461 |
+
|
462 |
+
# Add suggestions content
|
463 |
+
for category, items in suggestions.items():
|
464 |
+
if category == 'ai_generated_content':
|
465 |
+
markdown_content += "## π€ AI-Generated Content Suggestions\n\n"
|
466 |
+
ai_content = items if isinstance(items, dict) else {}
|
467 |
+
|
468 |
+
# Headlines
|
469 |
+
if 'ai_headlines' in ai_content and ai_content['ai_headlines']:
|
470 |
+
markdown_content += "### β¨ Professional Headlines\n\n"
|
471 |
+
for i, headline in enumerate(ai_content['ai_headlines'], 1):
|
472 |
+
cleaned_headline = headline.strip('"').replace('\\"', '"')
|
473 |
+
if cleaned_headline.startswith(('1.', '2.', '3.', '4.', '5.')):
|
474 |
+
cleaned_headline = cleaned_headline[2:].strip()
|
475 |
+
markdown_content += f"{i}. {cleaned_headline}\n"
|
476 |
+
markdown_content += "\n"
|
477 |
+
|
478 |
+
# About Section
|
479 |
+
if 'ai_about_section' in ai_content and ai_content['ai_about_section']:
|
480 |
+
markdown_content += "### π Enhanced About Section\n\n"
|
481 |
+
markdown_content += f"```\n{ai_content['ai_about_section']}\n```\n\n"
|
482 |
+
|
483 |
+
# Experience Descriptions
|
484 |
+
if 'ai_experience_descriptions' in ai_content and ai_content['ai_experience_descriptions']:
|
485 |
+
markdown_content += "### πΌ Experience Description Ideas\n\n"
|
486 |
+
for desc in ai_content['ai_experience_descriptions']:
|
487 |
+
markdown_content += f"- {desc}\n"
|
488 |
+
markdown_content += "\n"
|
489 |
+
else:
|
490 |
+
# Standard categories
|
491 |
+
category_name = category.replace('_', ' ').title()
|
492 |
+
markdown_content += f"## π {category_name}\n\n"
|
493 |
+
if isinstance(items, list):
|
494 |
+
for item in items:
|
495 |
+
markdown_content += f"- {item}\n"
|
496 |
+
else:
|
497 |
+
markdown_content += f"- {items}\n"
|
498 |
+
markdown_content += "\n"
|
499 |
+
|
500 |
+
# Add footer
|
501 |
+
markdown_content += """---
|
502 |
+
|
503 |
+
## π Implementation Tips
|
504 |
+
|
505 |
+
### Getting Started
|
506 |
+
1. **Prioritize High-Impact Changes**: Start with headline and about section improvements
|
507 |
+
2. **Use Keywords Strategically**: Incorporate industry-relevant keywords naturally
|
508 |
+
3. **Maintain Authenticity**: Ensure all changes reflect your genuine experience and personality
|
509 |
+
4. **Regular Updates**: Keep your profile fresh with recent achievements and experiences
|
510 |
|
511 |
+
### Best Practices
|
512 |
+
- **Professional Photo**: Use a high-quality, professional headshot
|
513 |
+
- **Active Engagement**: Regularly share industry insights and engage with your network
|
514 |
+
- **Skills Endorsements**: Ask colleagues to endorse your key skills
|
515 |
+
- **Recommendations**: Request recommendations from supervisors and colleagues
|
516 |
+
- **Content Strategy**: Share articles, insights, and achievements regularly
|
517 |
|
518 |
+
### Measuring Success
|
519 |
+
- Monitor profile views and connection requests
|
520 |
+
- Track engagement on your posts and content
|
521 |
+
- Observe changes in recruiter outreach
|
522 |
+
- Measure network growth and quality
|
523 |
+
|
524 |
+
---
|
525 |
+
|
526 |
+
*This report was generated by LinkedIn Profile Enhancer AI. For best results, implement changes gradually and monitor their impact on your profile performance.*
|
527 |
+
|
528 |
+
**Need Help?** Contact support or revisit the LinkedIn Profile Enhancer tool for updated suggestions.
|
529 |
"""
|
530 |
+
|
531 |
+
return markdown_content
|
532 |
+
|
533 |
+
def display_suggestions(suggestions):
|
534 |
+
"""Display enhancement suggestions with download option"""
|
535 |
+
if not suggestions:
|
536 |
+
st.warning("No suggestions available")
|
537 |
+
return
|
538 |
+
|
539 |
+
# Add download button at the top
|
540 |
+
col1, col2 = st.columns([1, 4])
|
541 |
+
|
542 |
+
with col1:
|
543 |
+
# Generate markdown content
|
544 |
+
markdown_content = generate_suggestions_markdown(
|
545 |
+
suggestions,
|
546 |
+
st.session_state.get('profile_data')
|
547 |
+
)
|
548 |
+
|
549 |
+
# Create filename with timestamp
|
550 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
551 |
+
profile_name = ""
|
552 |
+
if st.session_state.get('profile_data'):
|
553 |
+
name = st.session_state.profile_data.get('name', '')
|
554 |
+
if name:
|
555 |
+
# Clean name for filename
|
556 |
+
profile_name = "".join(c for c in name if c.isalnum() or c in (' ', '_')).rstrip()
|
557 |
+
profile_name = profile_name.replace(' ', '_') + "_"
|
558 |
+
|
559 |
+
filename = f"linkedin_suggestions_{profile_name}{timestamp}.md"
|
560 |
+
|
561 |
+
st.download_button(
|
562 |
+
label="π₯ Download Suggestions",
|
563 |
+
data=markdown_content,
|
564 |
+
file_name=filename,
|
565 |
+
mime="text/markdown",
|
566 |
+
help="Download all suggestions as a markdown file",
|
567 |
+
use_container_width=True
|
568 |
+
)
|
569 |
+
|
570 |
+
with col2:
|
571 |
+
st.markdown("*π‘ Click the download button to save all suggestions as a markdown file for easy reference and implementation.*")
|
572 |
+
|
573 |
+
st.markdown("---")
|
574 |
+
|
575 |
+
# Display suggestions as before
|
576 |
+
for category, items in suggestions.items():
|
577 |
+
if category == 'ai_generated_content':
|
578 |
+
st.subheader("π€ AI-Generated Content Suggestions")
|
579 |
+
ai_content = items if isinstance(items, dict) else {}
|
580 |
+
|
581 |
+
# Headlines
|
582 |
+
if 'ai_headlines' in ai_content and ai_content['ai_headlines']:
|
583 |
+
st.write("**β¨ Professional Headlines:**")
|
584 |
+
for i, headline in enumerate(ai_content['ai_headlines'], 1):
|
585 |
+
cleaned_headline = headline.strip('"').replace('\\"', '"')
|
586 |
+
if cleaned_headline.startswith(('1.', '2.', '3.', '4.', '5.')):
|
587 |
+
cleaned_headline = cleaned_headline[2:].strip()
|
588 |
+
st.write(f"{i}. {cleaned_headline}")
|
589 |
+
st.write("")
|
590 |
+
|
591 |
+
# About Section
|
592 |
+
if 'ai_about_section' in ai_content and ai_content['ai_about_section']:
|
593 |
+
st.write("**π Enhanced About Section:**")
|
594 |
+
st.code(ai_content['ai_about_section'], language='text')
|
595 |
+
st.write("")
|
596 |
+
|
597 |
+
# Experience Descriptions
|
598 |
+
if 'ai_experience_descriptions' in ai_content and ai_content['ai_experience_descriptions']:
|
599 |
+
st.write("**πΌ Experience Description Ideas:**")
|
600 |
+
for desc in ai_content['ai_experience_descriptions']:
|
601 |
+
st.write(f"β’ {desc}")
|
602 |
+
st.write("")
|
603 |
+
else:
|
604 |
+
# Standard categories
|
605 |
+
category_name = category.replace('_', ' ').title()
|
606 |
+
st.subheader(f"π {category_name}")
|
607 |
+
if isinstance(items, list):
|
608 |
+
for item in items:
|
609 |
+
st.write(f"β’ {item}")
|
610 |
+
else:
|
611 |
+
st.write(f"β’ {items}")
|
612 |
+
st.write("")
|
613 |
+
|
614 |
+
def main():
|
615 |
+
"""Main Streamlit application"""
|
616 |
+
initialize_session_state()
|
617 |
+
create_header()
|
618 |
+
|
619 |
+
# Sidebar
|
620 |
+
linkedin_url, job_description = create_sidebar()
|
621 |
+
|
622 |
+
# Main content
|
623 |
+
if st.button("π Enhance Profile", type="primary", use_container_width=True):
|
624 |
+
if not linkedin_url.strip():
|
625 |
+
st.error("Please enter a LinkedIn profile URL")
|
626 |
+
elif not any(pattern in linkedin_url.lower() for pattern in ['linkedin.com/in/', 'www.linkedin.com/in/']):
|
627 |
+
st.error("Please enter a valid LinkedIn profile URL")
|
628 |
+
else:
|
629 |
+
# Clear cached data if URL has changed
|
630 |
+
clear_results_if_url_changed(linkedin_url)
|
631 |
+
|
632 |
+
with st.spinner("π Analyzing LinkedIn profile..."):
|
633 |
+
try:
|
634 |
+
st.info(f"π Extracting data from: {linkedin_url}")
|
635 |
+
|
636 |
+
# Get profile data and analysis (force fresh extraction)
|
637 |
+
profile_data = st.session_state.orchestrator.scraper.extract_profile_data(linkedin_url)
|
638 |
+
|
639 |
+
st.info(f"β
Profile data extracted for: {profile_data.get('name', 'Unknown')}")
|
640 |
+
|
641 |
+
analysis = st.session_state.orchestrator.analyzer.analyze_profile(profile_data, job_description)
|
642 |
+
suggestions = st.session_state.orchestrator.content_generator.generate_suggestions(analysis, job_description)
|
643 |
+
|
644 |
+
# Store in session state
|
645 |
+
st.session_state.profile_data = profile_data
|
646 |
+
st.session_state.analysis_results = analysis
|
647 |
+
st.session_state.suggestions = suggestions
|
648 |
+
|
649 |
+
st.success("β
Profile analysis completed!")
|
650 |
+
|
651 |
+
except Exception as e:
|
652 |
+
st.error(f"β Error analyzing profile: {str(e)}")
|
653 |
+
|
654 |
+
# Display results if available
|
655 |
+
if st.session_state.profile_data or st.session_state.analysis_results:
|
656 |
+
st.markdown("---")
|
657 |
+
|
658 |
+
# Create tabs for different views
|
659 |
+
tab1, tab2, tab3, tab4 = st.tabs(["π Analysis", "π Scraped Data", "π― Suggestions", "π Implementation"])
|
660 |
+
|
661 |
+
with tab1:
|
662 |
+
st.header("π Profile Analysis")
|
663 |
+
if st.session_state.analysis_results:
|
664 |
+
display_analysis_results(st.session_state.analysis_results)
|
665 |
+
else:
|
666 |
+
st.info("No analysis results available yet")
|
667 |
+
|
668 |
+
with tab2:
|
669 |
+
st.header("π Scraped Profile Data")
|
670 |
+
if st.session_state.profile_data:
|
671 |
+
display_profile_data(st.session_state.profile_data)
|
672 |
+
else:
|
673 |
+
st.info("No profile data available yet")
|
674 |
+
|
675 |
+
with tab3:
|
676 |
+
st.header("π― Enhancement Suggestions")
|
677 |
+
if st.session_state.suggestions:
|
678 |
+
display_suggestions(st.session_state.suggestions)
|
679 |
+
else:
|
680 |
+
st.info("No suggestions available yet")
|
681 |
+
|
682 |
+
with tab4:
|
683 |
+
st.header("π Implementation Roadmap")
|
684 |
+
if st.session_state.analysis_results:
|
685 |
+
recommendations = st.session_state.analysis_results.get('recommendations', [])
|
686 |
+
if recommendations:
|
687 |
+
st.subheader("π― Priority Actions")
|
688 |
+
for i, rec in enumerate(recommendations[:5], 1):
|
689 |
+
st.markdown(f"""
|
690 |
+
<div class="metric-card">
|
691 |
+
<strong>{i}.</strong> {rec}
|
692 |
+
</div>
|
693 |
+
""", unsafe_allow_html=True)
|
694 |
+
|
695 |
+
st.subheader("π General Best Practices")
|
696 |
+
best_practices = [
|
697 |
+
"Update your profile regularly with new achievements",
|
698 |
+
"Use professional keywords relevant to your industry",
|
699 |
+
"Engage with your network by sharing valuable content",
|
700 |
+
"Ask for recommendations from colleagues and clients",
|
701 |
+
"Monitor profile views and connection requests"
|
702 |
+
]
|
703 |
+
|
704 |
+
for practice in best_practices:
|
705 |
+
st.markdown(f"""
|
706 |
+
<div class="info-card">
|
707 |
+
πΈ {practice}
|
708 |
+
</div>
|
709 |
+
""", unsafe_allow_html=True)
|
710 |
+
else:
|
711 |
+
st.info("Complete the analysis first to see implementation suggestions")
|
712 |
+
|
713 |
+
# Footer
|
714 |
+
st.markdown("---")
|
715 |
+
st.markdown("""
|
716 |
+
<div style="text-align: center; color: #666; margin-top: 2rem;">
|
717 |
+
<p>π <strong>LinkedIn Profile Enhancer</strong> | Powered by AI | Data scraped with respect to LinkedIn's ToS</p>
|
718 |
+
<p>Built with β€οΈ using Streamlit, OpenAI GPT-4o-mini, and Apify</p>
|
719 |
+
</div>
|
720 |
+
""", unsafe_allow_html=True)
|
721 |
|
722 |
+
if __name__ == "__main__":
|
723 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Utils package initialization
|
src/utils/job_matcher.py
ADDED
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Job Matching Logic
|
2 |
+
from typing import Dict, Any, List, Tuple
|
3 |
+
import re
|
4 |
+
from collections import Counter
|
5 |
+
|
6 |
+
class JobMatcher:
|
7 |
+
"""Utility class for matching LinkedIn profiles with job descriptions"""
|
8 |
+
|
9 |
+
def __init__(self):
|
10 |
+
self.weight_config = {
|
11 |
+
'skills': 0.4,
|
12 |
+
'experience': 0.3,
|
13 |
+
'keywords': 0.2,
|
14 |
+
'education': 0.1
|
15 |
+
}
|
16 |
+
|
17 |
+
self.skill_synonyms = {
|
18 |
+
'javascript': ['js', 'ecmascript', 'node.js', 'nodejs'],
|
19 |
+
'python': ['py', 'django', 'flask', 'fastapi'],
|
20 |
+
'react': ['reactjs', 'react.js'],
|
21 |
+
'angular': ['angularjs', 'angular.js'],
|
22 |
+
'machine learning': ['ml', 'ai', 'artificial intelligence'],
|
23 |
+
'database': ['db', 'sql', 'mysql', 'postgresql', 'mongodb']
|
24 |
+
}
|
25 |
+
|
26 |
+
def calculate_match_score(self, profile_data: Dict[str, Any], job_description: str) -> Dict[str, Any]:
|
27 |
+
"""
|
28 |
+
Calculate comprehensive match score between profile and job
|
29 |
+
|
30 |
+
Args:
|
31 |
+
profile_data (Dict[str, Any]): Cleaned profile data
|
32 |
+
job_description (str): Job description text
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
Dict[str, Any]: Match analysis with scores and details
|
36 |
+
"""
|
37 |
+
job_requirements = self._parse_job_requirements(job_description)
|
38 |
+
|
39 |
+
# Calculate individual scores
|
40 |
+
skills_score = self._calculate_skills_match(
|
41 |
+
profile_data.get('skills', []),
|
42 |
+
job_requirements['skills']
|
43 |
+
)
|
44 |
+
|
45 |
+
experience_score = self._calculate_experience_match(
|
46 |
+
profile_data.get('experience', []),
|
47 |
+
job_requirements
|
48 |
+
)
|
49 |
+
|
50 |
+
keywords_score = self._calculate_keywords_match(
|
51 |
+
profile_data,
|
52 |
+
job_requirements['keywords']
|
53 |
+
)
|
54 |
+
|
55 |
+
education_score = self._calculate_education_match(
|
56 |
+
profile_data.get('education', []),
|
57 |
+
job_requirements
|
58 |
+
)
|
59 |
+
|
60 |
+
# Calculate weighted overall score
|
61 |
+
overall_score = (
|
62 |
+
skills_score['score'] * self.weight_config['skills'] +
|
63 |
+
experience_score['score'] * self.weight_config['experience'] +
|
64 |
+
keywords_score['score'] * self.weight_config['keywords'] +
|
65 |
+
education_score['score'] * self.weight_config['education']
|
66 |
+
)
|
67 |
+
|
68 |
+
return {
|
69 |
+
'overall_score': round(overall_score, 2),
|
70 |
+
'breakdown': {
|
71 |
+
'skills': skills_score,
|
72 |
+
'experience': experience_score,
|
73 |
+
'keywords': keywords_score,
|
74 |
+
'education': education_score
|
75 |
+
},
|
76 |
+
'recommendations': self._generate_match_recommendations(
|
77 |
+
skills_score, experience_score, keywords_score, education_score
|
78 |
+
),
|
79 |
+
'job_requirements': job_requirements
|
80 |
+
}
|
81 |
+
|
82 |
+
def find_skill_gaps(self, profile_skills: List[str], job_requirements: List[str]) -> Dict[str, List[str]]:
|
83 |
+
"""
|
84 |
+
Identify skill gaps between profile and job requirements
|
85 |
+
|
86 |
+
Args:
|
87 |
+
profile_skills (List[str]): Current profile skills
|
88 |
+
job_requirements (List[str]): Required job skills
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
Dict[str, List[str]]: Missing and matching skills
|
92 |
+
"""
|
93 |
+
profile_skills_lower = [skill.lower() for skill in profile_skills]
|
94 |
+
job_skills_lower = [skill.lower() for skill in job_requirements]
|
95 |
+
|
96 |
+
# Find exact matches
|
97 |
+
matching_skills = []
|
98 |
+
missing_skills = []
|
99 |
+
|
100 |
+
for job_skill in job_skills_lower:
|
101 |
+
if job_skill in profile_skills_lower:
|
102 |
+
matching_skills.append(job_skill)
|
103 |
+
else:
|
104 |
+
# Check for synonyms
|
105 |
+
found_synonym = False
|
106 |
+
for profile_skill in profile_skills_lower:
|
107 |
+
if self._are_skills_similar(profile_skill, job_skill):
|
108 |
+
matching_skills.append(job_skill)
|
109 |
+
found_synonym = True
|
110 |
+
break
|
111 |
+
|
112 |
+
if not found_synonym:
|
113 |
+
missing_skills.append(job_skill)
|
114 |
+
|
115 |
+
return {
|
116 |
+
'matching_skills': matching_skills,
|
117 |
+
'missing_skills': missing_skills,
|
118 |
+
'match_percentage': len(matching_skills) / max(len(job_skills_lower), 1) * 100
|
119 |
+
}
|
120 |
+
|
121 |
+
def suggest_profile_improvements(self, match_analysis: Dict[str, Any]) -> List[str]:
|
122 |
+
"""
|
123 |
+
Generate specific improvement suggestions based on match analysis
|
124 |
+
|
125 |
+
Args:
|
126 |
+
match_analysis (Dict[str, Any]): Match analysis results
|
127 |
+
|
128 |
+
Returns:
|
129 |
+
List[str]: Improvement suggestions
|
130 |
+
"""
|
131 |
+
suggestions = []
|
132 |
+
breakdown = match_analysis['breakdown']
|
133 |
+
|
134 |
+
# Skills suggestions
|
135 |
+
if breakdown['skills']['score'] < 70:
|
136 |
+
missing_skills = breakdown['skills']['details']['missing_skills'][:3]
|
137 |
+
if missing_skills:
|
138 |
+
suggestions.append(
|
139 |
+
f"Add these high-priority skills: {', '.join(missing_skills)}"
|
140 |
+
)
|
141 |
+
|
142 |
+
# Experience suggestions
|
143 |
+
if breakdown['experience']['score'] < 60:
|
144 |
+
suggestions.append(
|
145 |
+
"Highlight more relevant experience in your current/previous roles"
|
146 |
+
)
|
147 |
+
suggestions.append(
|
148 |
+
"Add quantified achievements that demonstrate impact"
|
149 |
+
)
|
150 |
+
|
151 |
+
# Keywords suggestions
|
152 |
+
if breakdown['keywords']['score'] < 50:
|
153 |
+
suggestions.append(
|
154 |
+
"Incorporate more industry-specific keywords throughout your profile"
|
155 |
+
)
|
156 |
+
|
157 |
+
# Education suggestions
|
158 |
+
if breakdown['education']['score'] < 40:
|
159 |
+
suggestions.append(
|
160 |
+
"Consider adding relevant certifications or courses"
|
161 |
+
)
|
162 |
+
|
163 |
+
return suggestions
|
164 |
+
|
165 |
+
def _parse_job_requirements(self, job_description: str) -> Dict[str, Any]:
|
166 |
+
"""Parse job description to extract requirements"""
|
167 |
+
requirements = {
|
168 |
+
'skills': [],
|
169 |
+
'keywords': [],
|
170 |
+
'experience_years': 0,
|
171 |
+
'education_level': '',
|
172 |
+
'industry': '',
|
173 |
+
'role_type': ''
|
174 |
+
}
|
175 |
+
|
176 |
+
# Extract skills (common technical skills)
|
177 |
+
skill_patterns = [
|
178 |
+
r'\b(python|javascript|java|react|angular|node\.?js|sql|aws|docker|kubernetes)\b',
|
179 |
+
r'\b(machine learning|ai|data science|devops|full.?stack)\b',
|
180 |
+
r'\b(project management|agile|scrum|leadership)\b'
|
181 |
+
]
|
182 |
+
|
183 |
+
for pattern in skill_patterns:
|
184 |
+
matches = re.findall(pattern, job_description, re.IGNORECASE)
|
185 |
+
requirements['skills'].extend([match.lower() for match in matches])
|
186 |
+
|
187 |
+
# Extract experience years
|
188 |
+
exp_pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?experience'
|
189 |
+
exp_matches = re.findall(exp_pattern, job_description, re.IGNORECASE)
|
190 |
+
if exp_matches:
|
191 |
+
requirements['experience_years'] = int(exp_matches[0])
|
192 |
+
|
193 |
+
# Extract keywords (all meaningful words)
|
194 |
+
keywords = re.findall(r'\b[a-zA-Z]{3,}\b', job_description)
|
195 |
+
stop_words = {'the', 'and', 'for', 'with', 'you', 'will', 'are', 'have'}
|
196 |
+
requirements['keywords'] = [
|
197 |
+
word.lower() for word in keywords
|
198 |
+
if word.lower() not in stop_words
|
199 |
+
]
|
200 |
+
|
201 |
+
# Remove duplicates
|
202 |
+
requirements['skills'] = list(set(requirements['skills']))
|
203 |
+
requirements['keywords'] = list(set(requirements['keywords']))
|
204 |
+
|
205 |
+
return requirements
|
206 |
+
|
207 |
+
def _calculate_skills_match(self, profile_skills: List[str], job_skills: List[str]) -> Dict[str, Any]:
|
208 |
+
"""Calculate skills match score"""
|
209 |
+
if not job_skills:
|
210 |
+
return {'score': 100, 'details': {'matching_skills': [], 'missing_skills': []}}
|
211 |
+
|
212 |
+
skill_gap_analysis = self.find_skill_gaps(profile_skills, job_skills)
|
213 |
+
|
214 |
+
return {
|
215 |
+
'score': skill_gap_analysis['match_percentage'],
|
216 |
+
'details': skill_gap_analysis
|
217 |
+
}
|
218 |
+
|
219 |
+
def _calculate_experience_match(self, profile_experience: List[Dict], job_requirements: Dict) -> Dict[str, Any]:
|
220 |
+
"""Calculate experience match score"""
|
221 |
+
score = 0
|
222 |
+
details = {
|
223 |
+
'relevant_roles': 0,
|
224 |
+
'total_experience': 0,
|
225 |
+
'required_experience': job_requirements.get('experience_years', 0)
|
226 |
+
}
|
227 |
+
|
228 |
+
# Calculate total years of experience
|
229 |
+
total_years = 0
|
230 |
+
relevant_roles = 0
|
231 |
+
|
232 |
+
for exp in profile_experience:
|
233 |
+
duration_info = exp.get('duration_info', {})
|
234 |
+
if duration_info.get('duration_months'):
|
235 |
+
total_years += duration_info['duration_months'] / 12
|
236 |
+
|
237 |
+
# Check if role is relevant (simple keyword matching)
|
238 |
+
role_text = f"{exp.get('title', '')} {exp.get('description', '')}".lower()
|
239 |
+
job_keywords = job_requirements.get('keywords', [])
|
240 |
+
|
241 |
+
if any(keyword in role_text for keyword in job_keywords[:10]):
|
242 |
+
relevant_roles += 1
|
243 |
+
|
244 |
+
details['total_experience'] = round(total_years, 1)
|
245 |
+
details['relevant_roles'] = relevant_roles
|
246 |
+
|
247 |
+
# Calculate score based on experience and relevance
|
248 |
+
if job_requirements.get('experience_years', 0) > 0:
|
249 |
+
exp_ratio = min(total_years / job_requirements['experience_years'], 1.0)
|
250 |
+
score = exp_ratio * 70 + (relevant_roles / max(len(profile_experience), 1)) * 30
|
251 |
+
else:
|
252 |
+
score = 80 # Default good score if no specific experience required
|
253 |
+
|
254 |
+
return {
|
255 |
+
'score': round(score, 2),
|
256 |
+
'details': details
|
257 |
+
}
|
258 |
+
|
259 |
+
def _calculate_keywords_match(self, profile_data: Dict, job_keywords: List[str]) -> Dict[str, Any]:
|
260 |
+
"""Calculate keywords match score"""
|
261 |
+
if not job_keywords:
|
262 |
+
return {'score': 100, 'details': {'matched': 0, 'total': 0}}
|
263 |
+
|
264 |
+
# Extract all text from profile
|
265 |
+
profile_text = ""
|
266 |
+
for key, value in profile_data.items():
|
267 |
+
if isinstance(value, str):
|
268 |
+
profile_text += f" {value}"
|
269 |
+
elif isinstance(value, list):
|
270 |
+
for item in value:
|
271 |
+
if isinstance(item, dict):
|
272 |
+
profile_text += f" {' '.join(str(v) for v in item.values())}"
|
273 |
+
else:
|
274 |
+
profile_text += f" {item}"
|
275 |
+
|
276 |
+
profile_text = profile_text.lower()
|
277 |
+
|
278 |
+
# Count keyword matches
|
279 |
+
matched_keywords = 0
|
280 |
+
for keyword in job_keywords:
|
281 |
+
if keyword.lower() in profile_text:
|
282 |
+
matched_keywords += 1
|
283 |
+
|
284 |
+
score = (matched_keywords / len(job_keywords)) * 100
|
285 |
+
|
286 |
+
return {
|
287 |
+
'score': round(score, 2),
|
288 |
+
'details': {
|
289 |
+
'matched': matched_keywords,
|
290 |
+
'total': len(job_keywords),
|
291 |
+
'percentage': round(score, 2)
|
292 |
+
}
|
293 |
+
}
|
294 |
+
|
295 |
+
def _calculate_education_match(self, profile_education: List[Dict], job_requirements: Dict) -> Dict[str, Any]:
|
296 |
+
"""Calculate education match score"""
|
297 |
+
score = 70 # Default score
|
298 |
+
details = {
|
299 |
+
'has_degree': len(profile_education) > 0,
|
300 |
+
'degree_count': len(profile_education)
|
301 |
+
}
|
302 |
+
|
303 |
+
if profile_education:
|
304 |
+
score = 85 # Boost for having education
|
305 |
+
|
306 |
+
# Check for relevant fields
|
307 |
+
job_keywords = job_requirements.get('keywords', [])
|
308 |
+
for edu in profile_education:
|
309 |
+
edu_text = f"{edu.get('degree', '')} {edu.get('field', '')}".lower()
|
310 |
+
if any(keyword in edu_text for keyword in job_keywords[:5]):
|
311 |
+
score = 95
|
312 |
+
break
|
313 |
+
|
314 |
+
return {
|
315 |
+
'score': score,
|
316 |
+
'details': details
|
317 |
+
}
|
318 |
+
|
319 |
+
def _are_skills_similar(self, skill1: str, skill2: str) -> bool:
|
320 |
+
"""Check if two skills are similar using synonyms"""
|
321 |
+
skill1_lower = skill1.lower()
|
322 |
+
skill2_lower = skill2.lower()
|
323 |
+
|
324 |
+
# Check direct synonyms
|
325 |
+
for main_skill, synonyms in self.skill_synonyms.items():
|
326 |
+
if ((skill1_lower == main_skill or skill1_lower in synonyms) and
|
327 |
+
(skill2_lower == main_skill or skill2_lower in synonyms)):
|
328 |
+
return True
|
329 |
+
|
330 |
+
# Check partial matches
|
331 |
+
if skill1_lower in skill2_lower or skill2_lower in skill1_lower:
|
332 |
+
return True
|
333 |
+
|
334 |
+
return False
|
335 |
+
|
336 |
+
def _generate_match_recommendations(self, skills_score: Dict, experience_score: Dict,
|
337 |
+
keywords_score: Dict, education_score: Dict) -> List[str]:
|
338 |
+
"""Generate recommendations based on individual scores"""
|
339 |
+
recommendations = []
|
340 |
+
|
341 |
+
if skills_score['score'] < 60:
|
342 |
+
recommendations.append("Focus on developing missing technical skills")
|
343 |
+
|
344 |
+
if experience_score['score'] < 50:
|
345 |
+
recommendations.append("Highlight more relevant work experience")
|
346 |
+
|
347 |
+
if keywords_score['score'] < 40:
|
348 |
+
recommendations.append("Optimize profile with job-specific keywords")
|
349 |
+
|
350 |
+
if education_score['score'] < 60:
|
351 |
+
recommendations.append("Consider additional certifications or training")
|
352 |
+
|
353 |
+
return recommendations
|
src/utils/linkedin_parser.py
ADDED
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# LinkedIn Data Parser
|
2 |
+
import re
|
3 |
+
from typing import Dict, Any, List, Optional
|
4 |
+
from datetime import datetime
|
5 |
+
|
6 |
+
class LinkedInParser:
|
7 |
+
"""Utility class for parsing and cleaning LinkedIn profile data"""
|
8 |
+
|
9 |
+
def __init__(self):
|
10 |
+
self.skill_categories = {
|
11 |
+
'technical': ['python', 'javascript', 'java', 'react', 'node.js', 'sql', 'aws', 'docker'],
|
12 |
+
'management': ['leadership', 'project management', 'team management', 'agile', 'scrum'],
|
13 |
+
'marketing': ['seo', 'social media', 'content marketing', 'digital marketing', 'analytics'],
|
14 |
+
'design': ['ui/ux', 'photoshop', 'figma', 'adobe', 'design thinking']
|
15 |
+
}
|
16 |
+
|
17 |
+
def clean_profile_data(self, raw_data: Dict[str, Any]) -> Dict[str, Any]:
|
18 |
+
"""
|
19 |
+
Clean and standardize raw profile data
|
20 |
+
|
21 |
+
Args:
|
22 |
+
raw_data (Dict[str, Any]): Raw scraped data
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
Dict[str, Any]: Cleaned profile data
|
26 |
+
"""
|
27 |
+
cleaned_data = {}
|
28 |
+
|
29 |
+
# Clean basic info
|
30 |
+
cleaned_data['name'] = self._clean_text(raw_data.get('name', ''))
|
31 |
+
cleaned_data['headline'] = self._clean_text(raw_data.get('headline', ''))
|
32 |
+
cleaned_data['location'] = self._clean_text(raw_data.get('location', ''))
|
33 |
+
cleaned_data['about'] = self._clean_text(raw_data.get('about', ''))
|
34 |
+
|
35 |
+
# Clean experience
|
36 |
+
cleaned_data['experience'] = self._clean_experience_list(
|
37 |
+
raw_data.get('experience', [])
|
38 |
+
)
|
39 |
+
|
40 |
+
# Clean education
|
41 |
+
cleaned_data['education'] = self._clean_education_list(
|
42 |
+
raw_data.get('education', [])
|
43 |
+
)
|
44 |
+
|
45 |
+
# Clean and categorize skills
|
46 |
+
cleaned_data['skills'] = self._clean_skills_list(
|
47 |
+
raw_data.get('skills', [])
|
48 |
+
)
|
49 |
+
|
50 |
+
# Parse additional info
|
51 |
+
cleaned_data['connections'] = self._parse_connections(
|
52 |
+
raw_data.get('connections', '')
|
53 |
+
)
|
54 |
+
|
55 |
+
cleaned_data['url'] = raw_data.get('url', '')
|
56 |
+
cleaned_data['parsed_at'] = datetime.now().isoformat()
|
57 |
+
|
58 |
+
return cleaned_data
|
59 |
+
|
60 |
+
def extract_keywords(self, text: str, min_length: int = 3) -> List[str]:
|
61 |
+
"""
|
62 |
+
Extract meaningful keywords from text
|
63 |
+
|
64 |
+
Args:
|
65 |
+
text (str): Input text
|
66 |
+
min_length (int): Minimum keyword length
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
List[str]: Extracted keywords
|
70 |
+
"""
|
71 |
+
# Remove special characters and convert to lowercase
|
72 |
+
clean_text = re.sub(r'[^\w\s]', ' ', text.lower())
|
73 |
+
|
74 |
+
# Split into words and filter
|
75 |
+
words = clean_text.split()
|
76 |
+
|
77 |
+
# Common stop words to exclude
|
78 |
+
stop_words = {
|
79 |
+
'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
|
80 |
+
'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before',
|
81 |
+
'after', 'above', 'below', 'between', 'among', 'within', 'without',
|
82 |
+
'under', 'over', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
83 |
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
|
84 |
+
'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these',
|
85 |
+
'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him',
|
86 |
+
'her', 'us', 'them', 'my', 'your', 'his', 'her', 'its', 'our', 'their'
|
87 |
+
}
|
88 |
+
|
89 |
+
# Filter keywords
|
90 |
+
keywords = [
|
91 |
+
word for word in words
|
92 |
+
if len(word) >= min_length and word not in stop_words
|
93 |
+
]
|
94 |
+
|
95 |
+
# Remove duplicates while preserving order
|
96 |
+
unique_keywords = []
|
97 |
+
seen = set()
|
98 |
+
for keyword in keywords:
|
99 |
+
if keyword not in seen:
|
100 |
+
unique_keywords.append(keyword)
|
101 |
+
seen.add(keyword)
|
102 |
+
|
103 |
+
return unique_keywords
|
104 |
+
|
105 |
+
def parse_duration(self, duration_str: str) -> Dict[str, Any]:
|
106 |
+
"""
|
107 |
+
Parse duration strings like "2020 - Present" or "Jan 2020 - Dec 2022"
|
108 |
+
|
109 |
+
Args:
|
110 |
+
duration_str (str): Duration string
|
111 |
+
|
112 |
+
Returns:
|
113 |
+
Dict[str, Any]: Parsed duration info
|
114 |
+
"""
|
115 |
+
duration_info = {
|
116 |
+
'raw': duration_str,
|
117 |
+
'start_date': None,
|
118 |
+
'end_date': None,
|
119 |
+
'is_current': False,
|
120 |
+
'duration_months': 0
|
121 |
+
}
|
122 |
+
|
123 |
+
if not duration_str:
|
124 |
+
return duration_info
|
125 |
+
|
126 |
+
# Check if current position
|
127 |
+
if 'present' in duration_str.lower():
|
128 |
+
duration_info['is_current'] = True
|
129 |
+
|
130 |
+
# Extract years using regex
|
131 |
+
year_pattern = r'\b(19|20)\d{2}\b'
|
132 |
+
years = re.findall(year_pattern, duration_str)
|
133 |
+
|
134 |
+
if years:
|
135 |
+
duration_info['start_date'] = years[0] if len(years) > 0 else None
|
136 |
+
duration_info['end_date'] = years[1] if len(years) > 1 else None
|
137 |
+
|
138 |
+
return duration_info
|
139 |
+
|
140 |
+
def categorize_skills(self, skills: List[str]) -> Dict[str, List[str]]:
|
141 |
+
"""
|
142 |
+
Categorize skills into different types
|
143 |
+
|
144 |
+
Args:
|
145 |
+
skills (List[str]): List of skills
|
146 |
+
|
147 |
+
Returns:
|
148 |
+
Dict[str, List[str]]: Categorized skills
|
149 |
+
"""
|
150 |
+
categorized = {
|
151 |
+
'technical': [],
|
152 |
+
'management': [],
|
153 |
+
'marketing': [],
|
154 |
+
'design': [],
|
155 |
+
'other': []
|
156 |
+
}
|
157 |
+
|
158 |
+
for skill in skills:
|
159 |
+
skill_lower = skill.lower()
|
160 |
+
categorized_flag = False
|
161 |
+
|
162 |
+
for category, keywords in self.skill_categories.items():
|
163 |
+
if any(keyword in skill_lower for keyword in keywords):
|
164 |
+
categorized[category].append(skill)
|
165 |
+
categorized_flag = True
|
166 |
+
break
|
167 |
+
|
168 |
+
if not categorized_flag:
|
169 |
+
categorized['other'].append(skill)
|
170 |
+
|
171 |
+
return categorized
|
172 |
+
|
173 |
+
def extract_achievements(self, text: str) -> List[str]:
|
174 |
+
"""
|
175 |
+
Extract achievements with numbers/metrics from text
|
176 |
+
|
177 |
+
Args:
|
178 |
+
text (str): Input text
|
179 |
+
|
180 |
+
Returns:
|
181 |
+
List[str]: List of achievements
|
182 |
+
"""
|
183 |
+
achievements = []
|
184 |
+
|
185 |
+
# Patterns for achievements with numbers
|
186 |
+
patterns = [
|
187 |
+
r'[^.]*\b\d+%[^.]*', # Percentage achievements
|
188 |
+
r'[^.]*\b\d+[kK]\+?[^.]*', # Numbers with K (thousands)
|
189 |
+
r'[^.]*\b\d+[mM]\+?[^.]*', # Numbers with M (millions)
|
190 |
+
r'[^.]*\$\d+[^.]*', # Money amounts
|
191 |
+
r'[^.]*\b\d+\s*(years?|months?)[^.]*', # Time periods
|
192 |
+
]
|
193 |
+
|
194 |
+
for pattern in patterns:
|
195 |
+
matches = re.findall(pattern, text, re.IGNORECASE)
|
196 |
+
achievements.extend([match.strip() for match in matches])
|
197 |
+
|
198 |
+
return achievements
|
199 |
+
|
200 |
+
def _clean_text(self, text: str) -> str:
|
201 |
+
"""Clean and normalize text"""
|
202 |
+
if not text:
|
203 |
+
return ""
|
204 |
+
|
205 |
+
# Remove extra whitespace
|
206 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
207 |
+
|
208 |
+
# Remove special characters but keep basic punctuation
|
209 |
+
text = re.sub(r'[^\w\s\-.,!?()&/]', '', text)
|
210 |
+
|
211 |
+
return text
|
212 |
+
|
213 |
+
def _clean_experience_list(self, experience_list: List[Dict]) -> List[Dict]:
|
214 |
+
"""Clean experience entries"""
|
215 |
+
cleaned_experience = []
|
216 |
+
|
217 |
+
for exp in experience_list:
|
218 |
+
if isinstance(exp, dict):
|
219 |
+
cleaned_exp = {
|
220 |
+
'title': self._clean_text(exp.get('title', '')),
|
221 |
+
'company': self._clean_text(exp.get('company', '')),
|
222 |
+
'duration': self._clean_text(exp.get('duration', '')),
|
223 |
+
'description': self._clean_text(exp.get('description', '')),
|
224 |
+
'location': self._clean_text(exp.get('location', '')),
|
225 |
+
}
|
226 |
+
|
227 |
+
# Parse duration
|
228 |
+
cleaned_exp['duration_info'] = self.parse_duration(cleaned_exp['duration'])
|
229 |
+
|
230 |
+
# Extract achievements
|
231 |
+
cleaned_exp['achievements'] = self.extract_achievements(
|
232 |
+
cleaned_exp['description']
|
233 |
+
)
|
234 |
+
|
235 |
+
cleaned_experience.append(cleaned_exp)
|
236 |
+
|
237 |
+
return cleaned_experience
|
238 |
+
|
239 |
+
def _clean_education_list(self, education_list: List[Dict]) -> List[Dict]:
|
240 |
+
"""Clean education entries"""
|
241 |
+
cleaned_education = []
|
242 |
+
|
243 |
+
for edu in education_list:
|
244 |
+
if isinstance(edu, dict):
|
245 |
+
cleaned_edu = {
|
246 |
+
'degree': self._clean_text(edu.get('degree', '')),
|
247 |
+
'school': self._clean_text(edu.get('school', '')),
|
248 |
+
'year': self._clean_text(edu.get('year', '')),
|
249 |
+
'field': self._clean_text(edu.get('field', '')),
|
250 |
+
}
|
251 |
+
cleaned_education.append(cleaned_edu)
|
252 |
+
|
253 |
+
return cleaned_education
|
254 |
+
|
255 |
+
def _clean_skills_list(self, skills_list: List[str]) -> List[str]:
|
256 |
+
"""Clean and deduplicate skills"""
|
257 |
+
if not skills_list:
|
258 |
+
return []
|
259 |
+
|
260 |
+
cleaned_skills = []
|
261 |
+
seen_skills = set()
|
262 |
+
|
263 |
+
for skill in skills_list:
|
264 |
+
cleaned_skill = self._clean_text(str(skill))
|
265 |
+
skill_lower = cleaned_skill.lower()
|
266 |
+
|
267 |
+
if cleaned_skill and skill_lower not in seen_skills:
|
268 |
+
cleaned_skills.append(cleaned_skill)
|
269 |
+
seen_skills.add(skill_lower)
|
270 |
+
|
271 |
+
return cleaned_skills
|
272 |
+
|
273 |
+
def _parse_connections(self, connections_str: str) -> int:
|
274 |
+
"""Parse connection count from string"""
|
275 |
+
if not connections_str:
|
276 |
+
return 0
|
277 |
+
|
278 |
+
# Extract numbers from connection string
|
279 |
+
numbers = re.findall(r'\d+', connections_str)
|
280 |
+
|
281 |
+
if numbers:
|
282 |
+
return int(numbers[0])
|
283 |
+
|
284 |
+
# Handle "500+" format
|
285 |
+
if '500+' in connections_str:
|
286 |
+
return 500
|
287 |
+
|
288 |
+
return 0
|