Akshay Chame commited on
Commit
035c4af
Β·
1 Parent(s): af9b2d5

πŸš€ Add LinkedIn Profile Enhancer Streamlit app with all agents and dependencies

Browse files
.env.example ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment Variables for LinkedIn Profile Enhancer
2
+
3
+ # OpenAI API Configuration
4
+ OPENAI_API_KEY=your_openai_api_key_here
5
+
6
+ # Apify API Configuration
7
+ APIFY_API_TOKEN=your_apify_api_token_here
8
+
9
+ # Optional: Custom model settings
10
+ OPENAI_MODEL=gpt-4o-mini
11
+ TEMPERATURE=0.7
12
+
13
+ # Optional: Debugging
14
+ DEBUG=False
requirements.txt CHANGED
@@ -1,3 +1,14 @@
1
  altair
 
2
  pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
1
  altair
2
+ streamlit
3
  pandas
4
+ requests
5
+ beautifulsoup4
6
+ selenium
7
+ numpy
8
+ python-dotenv
9
+ pydantic
10
+ openai
11
+ anthropic
12
+ apify-client
13
+ plotly
14
+ Pillow
src/agents/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Agents package initialization
src/agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (195 Bytes). View file
 
src/agents/__pycache__/analyzer_agent.cpython-311.pyc ADDED
Binary file (13.8 kB). View file
 
src/agents/__pycache__/content_agent.cpython-311.pyc ADDED
Binary file (18.4 kB). View file
 
src/agents/__pycache__/orchestrator.cpython-311.pyc ADDED
Binary file (11.3 kB). View file
 
src/agents/__pycache__/scraper_agent.cpython-311.pyc ADDED
Binary file (16 kB). View file
 
src/agents/analyzer_agent.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Profile Analysis Agent
2
+ import re
3
+ from typing import Dict, Any, List
4
+ from collections import Counter
5
+
6
+ class AnalyzerAgent:
7
+ """Agent responsible for analyzing LinkedIn profiles and providing insights"""
8
+
9
+ def __init__(self):
10
+ self.action_words = [
11
+ 'led', 'managed', 'developed', 'created', 'implemented', 'designed',
12
+ 'built', 'improved', 'increased', 'reduced', 'optimized', 'delivered',
13
+ 'achieved', 'launched', 'established', 'coordinated', 'executed'
14
+ ]
15
+
16
+ def analyze_profile(self, profile_data: Dict[str, Any], job_description: str = "") -> Dict[str, Any]:
17
+ """
18
+ Analyze a LinkedIn profile and provide comprehensive insights
19
+
20
+ Args:
21
+ profile_data (Dict[str, Any]): Extracted profile data
22
+ job_description (str): Optional job description for matching analysis
23
+
24
+ Returns:
25
+ Dict[str, Any]: Analysis results with scores and recommendations
26
+ """
27
+ if not profile_data:
28
+ return self._empty_analysis()
29
+
30
+ try:
31
+ # Calculate completeness score
32
+ completeness_score = self._calculate_completeness(profile_data)
33
+
34
+ # Analyze keywords
35
+ keyword_analysis = self._analyze_keywords(profile_data, job_description)
36
+
37
+ # Assess content quality
38
+ content_quality = self._assess_content_quality(profile_data)
39
+
40
+ # Identify strengths and weaknesses
41
+ strengths = self._identify_strengths(profile_data)
42
+ weaknesses = self._identify_weaknesses(profile_data)
43
+
44
+ # Calculate job match if job description provided
45
+ job_match_score = 0
46
+ if job_description:
47
+ job_match_score = self._calculate_job_match(profile_data, job_description)
48
+
49
+ return {
50
+ 'completeness_score': completeness_score,
51
+ 'keyword_analysis': keyword_analysis,
52
+ 'content_quality': content_quality,
53
+ 'strengths': strengths,
54
+ 'weaknesses': weaknesses,
55
+ 'job_match_score': job_match_score,
56
+ 'recommendations': self._generate_recommendations(profile_data, weaknesses),
57
+ 'overall_rating': self._calculate_overall_rating(completeness_score, content_quality, job_match_score)
58
+ }
59
+
60
+ except Exception as e:
61
+ print(f"Error in profile analysis: {str(e)}")
62
+ return self._empty_analysis()
63
+
64
+ def _calculate_completeness(self, profile_data: Dict[str, Any]) -> float:
65
+ """Calculate profile completeness percentage"""
66
+ score = 0
67
+ total_points = 10
68
+
69
+ # Basic information (2 points)
70
+ if profile_data.get('name'): score += 1
71
+ if profile_data.get('headline'): score += 1
72
+
73
+ # About section (2 points)
74
+ about = profile_data.get('about', '')
75
+ if about and len(about) > 50: score += 1
76
+ if about and len(about) > 200: score += 1
77
+
78
+ # Experience (2 points)
79
+ experience = profile_data.get('experience', [])
80
+ if len(experience) >= 1: score += 1
81
+ if len(experience) >= 2: score += 1
82
+
83
+ # Education (1 point)
84
+ if profile_data.get('education'): score += 1
85
+
86
+ # Skills (2 points)
87
+ skills = profile_data.get('skills', [])
88
+ if len(skills) >= 5: score += 1
89
+ if len(skills) >= 10: score += 1
90
+
91
+ # Location (1 point)
92
+ if profile_data.get('location'): score += 1
93
+
94
+ return (score / total_points) * 100
95
+
96
+ def _analyze_keywords(self, profile_data: Dict[str, Any], job_description: str) -> Dict[str, Any]:
97
+ """Analyze keywords in profile vs job description"""
98
+ profile_text = self._extract_all_text(profile_data).lower()
99
+
100
+ # Extract common tech keywords
101
+ tech_keywords = [
102
+ 'python', 'javascript', 'react', 'node.js', 'sql', 'mongodb',
103
+ 'aws', 'docker', 'kubernetes', 'git', 'agile', 'scrum'
104
+ ]
105
+
106
+ found_keywords = []
107
+ for keyword in tech_keywords:
108
+ if keyword.lower() in profile_text:
109
+ found_keywords.append(keyword)
110
+
111
+ # Analyze job description keywords if provided
112
+ missing_keywords = []
113
+ if job_description:
114
+ job_keywords = re.findall(r'\b[a-zA-Z]{3,}\b', job_description.lower())
115
+ job_keyword_freq = Counter(job_keywords)
116
+
117
+ for keyword, freq in job_keyword_freq.most_common(10):
118
+ if keyword not in profile_text and len(keyword) > 3:
119
+ missing_keywords.append(keyword)
120
+
121
+ return {
122
+ 'found_keywords': found_keywords,
123
+ 'missing_keywords': missing_keywords[:5], # Top 5 missing
124
+ 'keyword_density': len(found_keywords)
125
+ }
126
+
127
+ def _assess_content_quality(self, profile_data: Dict[str, Any]) -> Dict[str, Any]:
128
+ """Assess the quality of content"""
129
+ about_section = profile_data.get('about', '')
130
+ headline = profile_data.get('headline', '')
131
+
132
+ return {
133
+ 'headline_length': len(headline),
134
+ 'about_length': len(about_section),
135
+ 'has_quantified_achievements': self._has_numbers(about_section),
136
+ 'uses_action_words': self._has_action_words(about_section)
137
+ }
138
+
139
+ def _identify_strengths(self, profile_data: Dict[str, Any]) -> List[str]:
140
+ """Identify profile strengths"""
141
+ strengths = []
142
+
143
+ if len(profile_data.get('experience', [])) >= 3:
144
+ strengths.append("Good work experience history")
145
+
146
+ if len(profile_data.get('skills', [])) >= 10:
147
+ strengths.append("Comprehensive skills list")
148
+
149
+ if len(profile_data.get('about', '')) > 200:
150
+ strengths.append("Detailed about section")
151
+
152
+ return strengths
153
+
154
+ def _identify_weaknesses(self, profile_data: Dict[str, Any]) -> List[str]:
155
+ """Identify areas for improvement"""
156
+ weaknesses = []
157
+
158
+ if not profile_data.get('about') or len(profile_data.get('about', '')) < 100:
159
+ weaknesses.append("About section needs improvement")
160
+
161
+ if len(profile_data.get('skills', [])) < 5:
162
+ weaknesses.append("Limited skills listed")
163
+
164
+ if not self._has_numbers(profile_data.get('about', '')):
165
+ weaknesses.append("Lacks quantified achievements")
166
+
167
+ return weaknesses
168
+
169
+ def _calculate_job_match(self, profile_data: Dict[str, Any], job_description: str) -> float:
170
+ """Calculate how well profile matches job description"""
171
+ if not job_description:
172
+ return 0
173
+
174
+ profile_text = self._extract_all_text(profile_data).lower()
175
+ job_text = job_description.lower()
176
+
177
+ # Extract keywords from job description
178
+ job_keywords = set(re.findall(r'\b[a-zA-Z]{4,}\b', job_text))
179
+
180
+ # Count matches
181
+ matches = 0
182
+ for keyword in job_keywords:
183
+ if keyword in profile_text:
184
+ matches += 1
185
+
186
+ return min((matches / len(job_keywords)) * 100, 100) if job_keywords else 0
187
+
188
+ def _extract_all_text(self, profile_data: Dict[str, Any]) -> str:
189
+ """Extract all text from profile for analysis"""
190
+ text_parts = []
191
+
192
+ # Add basic info
193
+ text_parts.append(profile_data.get('headline', ''))
194
+ text_parts.append(profile_data.get('about', ''))
195
+
196
+ # Add experience descriptions
197
+ for exp in profile_data.get('experience', []):
198
+ text_parts.append(exp.get('description', ''))
199
+ text_parts.append(exp.get('title', ''))
200
+
201
+ # Add skills
202
+ text_parts.extend(profile_data.get('skills', []))
203
+
204
+ return ' '.join(text_parts)
205
+
206
+ def _has_numbers(self, text: str) -> bool:
207
+ """Check if text contains numbers/metrics"""
208
+ return bool(re.search(r'\d+', text))
209
+
210
+ def _has_action_words(self, text: str) -> bool:
211
+ """Check if text contains action words"""
212
+ text_lower = text.lower()
213
+ return any(word in text_lower for word in self.action_words)
214
+
215
+ def _generate_recommendations(self, profile_data: Dict[str, Any], weaknesses: List[str]) -> List[str]:
216
+ """Generate specific recommendations based on analysis"""
217
+ recommendations = []
218
+
219
+ for weakness in weaknesses:
220
+ if "about section" in weakness.lower():
221
+ recommendations.append("Add a compelling about section with 150-300 words describing your expertise")
222
+ elif "skills" in weakness.lower():
223
+ recommendations.append("Add more relevant skills to reach at least 10 skills")
224
+ elif "quantified" in weakness.lower():
225
+ recommendations.append("Include specific numbers and metrics in your descriptions")
226
+
227
+ return recommendations
228
+
229
+ def _calculate_overall_rating(self, completeness: float, content_quality: Dict[str, Any], job_match: float) -> str:
230
+ """Calculate overall profile rating"""
231
+ score = completeness * 0.4
232
+
233
+ # Add content quality score
234
+ if content_quality.get('has_quantified_achievements'):
235
+ score += 10
236
+ if content_quality.get('uses_action_words'):
237
+ score += 10
238
+ if content_quality.get('about_length', 0) > 150:
239
+ score += 10
240
+
241
+ # Add job match if available
242
+ if job_match > 0:
243
+ score += job_match * 0.3
244
+
245
+ if score >= 80:
246
+ return "Excellent"
247
+ elif score >= 60:
248
+ return "Good"
249
+ elif score >= 40:
250
+ return "Fair"
251
+ else:
252
+ return "Needs Improvement"
253
+
254
+ def _empty_analysis(self) -> Dict[str, Any]:
255
+ """Return empty analysis structure"""
256
+ return {
257
+ 'completeness_score': 0,
258
+ 'keyword_analysis': {'found_keywords': [], 'missing_keywords': [], 'keyword_density': 0},
259
+ 'content_quality': {'headline_length': 0, 'about_length': 0, 'has_quantified_achievements': False, 'uses_action_words': False},
260
+ 'strengths': [],
261
+ 'weaknesses': ['Profile data not available'],
262
+ 'job_match_score': 0,
263
+ 'recommendations': ['Please provide valid profile data'],
264
+ 'overall_rating': 'Unknown'
265
+ }
src/agents/content_agent.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Content Generation Agent
2
+ import os
3
+ from typing import Dict, Any, List
4
+ from prompts.agent_prompts import ContentPrompts
5
+ from openai import OpenAI
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ class ContentAgent:
12
+ """Agent responsible for generating content suggestions and improvements using OpenAI"""
13
+
14
+ def __init__(self):
15
+ self.prompts = ContentPrompts()
16
+
17
+ # Initialize OpenAI client
18
+ api_key = os.getenv('OPENAI_API_KEY')
19
+ if not api_key:
20
+ print("Warning: OPENAI_API_KEY not found. Using fallback content generation.")
21
+ self.openai_client = None
22
+ else:
23
+ self.openai_client = OpenAI(api_key=api_key)
24
+
25
+ def generate_suggestions(self, analysis: Dict[str, Any], job_description: str = "") -> Dict[str, Any]:
26
+ """
27
+ Generate enhancement suggestions based on analysis
28
+
29
+ Args:
30
+ analysis (Dict[str, Any]): Profile analysis results
31
+ job_description (str): Optional job description for tailored suggestions
32
+
33
+ Returns:
34
+ Dict[str, Any]: Enhancement suggestions
35
+ """
36
+ try:
37
+ suggestions = {
38
+ 'headline_improvements': self._suggest_headline_improvements(analysis, job_description),
39
+ 'about_section': self._suggest_about_improvements(analysis, job_description),
40
+ 'experience_optimization': self._suggest_experience_improvements(analysis),
41
+ 'skills_enhancement': self._suggest_skills_improvements(analysis, job_description),
42
+ 'keyword_optimization': self._suggest_keyword_improvements(analysis),
43
+ 'content_quality': self._suggest_content_quality_improvements(analysis)
44
+ }
45
+
46
+ # Add AI-generated content if OpenAI is available
47
+ if self.openai_client:
48
+ suggestions['ai_generated_content'] = self._generate_ai_content(analysis, job_description)
49
+
50
+ return suggestions
51
+
52
+ except Exception as e:
53
+ raise Exception(f"Failed to generate suggestions: {str(e)}")
54
+
55
+ def _generate_ai_content(self, analysis: Dict[str, Any], job_description: str) -> Dict[str, Any]:
56
+ """Generate AI-powered content using OpenAI"""
57
+ ai_content = {}
58
+
59
+ try:
60
+ # Generate AI headline suggestions
61
+ ai_content['ai_headlines'] = self._generate_ai_headlines(analysis, job_description)
62
+
63
+ # Generate AI about section
64
+ ai_content['ai_about_section'] = self._generate_ai_about_section(analysis, job_description)
65
+
66
+ # Generate AI experience descriptions
67
+ ai_content['ai_experience_descriptions'] = self._generate_ai_experience_descriptions(analysis)
68
+
69
+ except Exception as e:
70
+ print(f"Error generating AI content: {str(e)}")
71
+ ai_content['error'] = "AI content generation temporarily unavailable"
72
+
73
+ return ai_content
74
+
75
+ def _generate_ai_headlines(self, analysis: Dict[str, Any], job_description: str) -> List[str]:
76
+ """Generate AI-powered headline suggestions"""
77
+ if not self.openai_client:
78
+ return []
79
+
80
+ prompt = f"""
81
+ Generate 5 compelling LinkedIn headlines for this professional profile:
82
+
83
+ Current analysis: {analysis.get('summary', 'No analysis available')}
84
+ Target job (if any): {job_description[:200] if job_description else 'General optimization'}
85
+
86
+ Requirements:
87
+ - Maximum 120 characters each
88
+ - Include relevant keywords
89
+ - Professional and engaging tone - Show value proposition
90
+ - Vary the style (some formal, some creative)
91
+
92
+ Return only the headlines, numbered 1-5:
93
+ """
94
+
95
+ try:
96
+ response = self.openai_client.chat.completions.create(
97
+ model="gpt-4o-mini",
98
+ messages=[{"role": "user", "content": prompt}],
99
+ max_tokens=300,
100
+ temperature=0.7
101
+ )
102
+
103
+ headlines = response.choices[0].message.content.strip().split('\n')
104
+ return [h.strip() for h in headlines if h.strip()][:5]
105
+
106
+ except Exception as e:
107
+ print(f"Error generating AI headlines: {str(e)}")
108
+ return []
109
+
110
+ def _generate_ai_about_section(self, analysis: Dict[str, Any], job_description: str) -> str:
111
+ """Generate AI-powered about section"""
112
+ if not self.openai_client:
113
+ return ""
114
+
115
+ prompt = f"""
116
+ Write a compelling LinkedIn About section for this professional:
117
+
118
+ Profile Analysis: {analysis.get('summary', 'No analysis available')}
119
+ Strengths: {', '.join(analysis.get('strengths', []))}
120
+ Target Role: {job_description[:300] if job_description else 'Career advancement'}
121
+
122
+ Requirements:
123
+ - 150-300 words
124
+ - Professional yet personable tone
125
+ - Include quantified achievements
126
+ - Strong opening hook
127
+ - Clear value proposition
128
+ - Call to action at the end
129
+ - Use bullet points for key skills/achievements
130
+ Write the complete About section:
131
+ """
132
+
133
+ try:
134
+ response = self.openai_client.chat.completions.create(
135
+ model="gpt-4o-mini",
136
+ messages=[{"role": "user", "content": prompt}],
137
+ max_tokens=500,
138
+ temperature=0.7
139
+ )
140
+
141
+ return response.choices[0].message.content.strip()
142
+
143
+ except Exception as e:
144
+ print(f"Error generating AI about section: {str(e)}")
145
+ return ""
146
+
147
+ def _generate_ai_experience_descriptions(self, analysis: Dict[str, Any]) -> List[str]:
148
+ """Generate AI-powered experience descriptions"""
149
+ if not self.openai_client:
150
+ return []
151
+
152
+ # This would ideally take specific experience entries
153
+ # For now, return general improvement suggestions
154
+
155
+ prompt = """
156
+ Generate 3 example bullet points for LinkedIn experience descriptions that:
157
+ - Start with strong action verbs
158
+ - Include quantified achievements
159
+ - Show business impact - Are relevant for tech professionals
160
+
161
+ Format: Return only the bullet points, one per line with β€’ prefix
162
+ """
163
+
164
+ try:
165
+ response = self.openai_client.chat.completions.create(
166
+ model="gpt-4o-mini",
167
+ messages=[{"role": "user", "content": prompt}],
168
+ max_tokens=200,
169
+ temperature=0.7
170
+ )
171
+
172
+ descriptions = response.choices[0].message.content.strip().split('\n')
173
+ return [d.strip() for d in descriptions if d.strip()]
174
+
175
+ except Exception as e:
176
+ print(f"Error generating AI experience descriptions: {str(e)}")
177
+ return []
178
+
179
+ def _suggest_headline_improvements(self, analysis: Dict[str, Any], job_description: str = "") -> List[str]:
180
+ """Generate headline improvement suggestions"""
181
+ suggestions = []
182
+
183
+ content_quality = analysis.get('content_quality', {})
184
+ headline_length = content_quality.get('headline_length', 0)
185
+
186
+ if headline_length < 50:
187
+ suggestions.append("Expand your headline to include more keywords and value proposition")
188
+ elif headline_length > 120:
189
+ suggestions.append("Shorten your headline to be more concise and impactful")
190
+
191
+ suggestions.extend([
192
+ "Include specific technologies or skills you specialize in",
193
+ "Mention your years of experience or seniority level",
194
+ "Add a unique value proposition that sets you apart",
195
+ "Use action-oriented language to show what you do"
196
+ ])
197
+
198
+ return suggestions
199
+
200
+ def _suggest_about_improvements(self, analysis: Dict[str, Any], job_description: str = "") -> List[str]:
201
+ """Generate about section improvement suggestions"""
202
+ suggestions = []
203
+
204
+ content_quality = analysis.get('content_quality', {})
205
+ about_length = content_quality.get('about_length', 0)
206
+ has_numbers = content_quality.get('has_quantified_achievements', False)
207
+ has_action_words = content_quality.get('uses_action_words', False)
208
+
209
+ if about_length < 100:
210
+ suggestions.append("Expand your about section to at least 2-3 paragraphs")
211
+
212
+ if not has_numbers:
213
+ suggestions.append("Add quantified achievements (e.g., 'Increased sales by 30%')")
214
+
215
+ if not has_action_words:
216
+ suggestions.append("Use more action verbs to describe your accomplishments")
217
+
218
+ suggestions.extend([
219
+ "Start with a compelling hook that grabs attention",
220
+ "Include your professional mission or passion",
221
+ "Mention specific technologies, tools, or methodologies you use",
222
+ "End with a call-to-action for potential connections"
223
+ ])
224
+
225
+ return suggestions
226
+
227
+ def _suggest_experience_improvements(self, analysis: Dict[str, Any]) -> List[str]:
228
+ """Generate experience section improvement suggestions"""
229
+ suggestions = [
230
+ "Use bullet points to highlight key achievements in each role",
231
+ "Start each bullet point with an action verb",
232
+ "Include metrics and numbers to quantify your impact",
233
+ "Focus on results rather than just responsibilities",
234
+ "Tailor descriptions to align with your target role"
235
+ ]
236
+
237
+ return suggestions
238
+
239
+ def _suggest_skills_improvements(self, analysis: Dict[str, Any], job_description: str) -> List[str]:
240
+ """Generate skills section improvement suggestions"""
241
+ suggestions = []
242
+
243
+ keyword_analysis = analysis.get('keyword_analysis', {})
244
+ missing_keywords = keyword_analysis.get('missing_keywords', [])
245
+
246
+ if missing_keywords and job_description:
247
+ suggestions.append(f"Consider adding these relevant skills: {', '.join(missing_keywords[:5])}")
248
+
249
+ suggestions.extend([
250
+ "Prioritize your most relevant skills at the top",
251
+ "Include both technical and soft skills",
252
+ "Get endorsements from colleagues for your key skills",
253
+ "Add skills that are trending in your industry"
254
+ ])
255
+
256
+ return suggestions
257
+
258
+ def _suggest_keyword_improvements(self, analysis: Dict[str, Any]) -> List[str]:
259
+ """Generate keyword optimization suggestions"""
260
+ suggestions = []
261
+
262
+ keyword_analysis = analysis.get('keyword_analysis', {})
263
+ keyword_density = keyword_analysis.get('keyword_density', 0)
264
+ missing_keywords = keyword_analysis.get('missing_keywords', [])
265
+
266
+ if keyword_density < 50:
267
+ suggestions.append("Increase keyword density by incorporating more relevant terms")
268
+
269
+ if missing_keywords:
270
+ suggestions.append(f"Consider adding these keywords: {', '.join(missing_keywords[:3])}")
271
+
272
+ suggestions.extend([
273
+ "Use industry-specific terminology naturally throughout your profile",
274
+ "Include location-based keywords if relevant",
275
+ "Add keywords related to your target roles"
276
+ ])
277
+
278
+ return suggestions
279
+
280
+ def _suggest_content_quality_improvements(self, analysis: Dict[str, Any]) -> List[str]:
281
+ """Generate general content quality improvement suggestions"""
282
+ completeness_score = analysis.get('completeness_score', 0)
283
+
284
+ suggestions = []
285
+
286
+ if completeness_score < 80:
287
+ suggestions.append("Complete all sections of your profile for better visibility")
288
+
289
+ suggestions.extend([
290
+ "Use a professional headshot as your profile photo",
291
+ "Add a background image that reflects your industry",
292
+ "Keep your profile updated with recent achievements",
293
+ "Engage regularly by posting and commenting on relevant content",
294
+ "Ask for recommendations from colleagues and clients"
295
+ ])
296
+
297
+ return suggestions
298
+
299
+ def generate_headline_examples(self, current_headline: str, job_description: str = "") -> List[str]:
300
+ """Generate example headlines"""
301
+ examples = [
302
+ "Senior Software Engineer | Full-Stack Developer | React & Node.js Expert",
303
+ "Data Scientist | Machine Learning Engineer | Python & AI Specialist",
304
+ "Digital Marketing Manager | SEO Expert | Growth Hacker",
305
+ "Product Manager | Agile Expert | B2B SaaS Specialist"
306
+ ]
307
+
308
+ return examples
309
+
310
+ def generate_about_template(self, analysis: Dict[str, Any]) -> str:
311
+ """Generate an about section template"""
312
+ template = """
313
+ πŸš€ [Opening Hook - What makes you unique]
314
+
315
+ πŸ’Ό [Years] years of experience in [Industry/Field], specializing in [Key Skills/Technologies]. I'm passionate about [What drives you professionally].
316
+
317
+ 🎯 **What I do:**
318
+ β€’ [Key responsibility/achievement 1]
319
+ β€’ [Key responsibility/achievement 2]
320
+ β€’ [Key responsibility/achievement 3]
321
+
322
+ πŸ“Š **Recent achievements:**
323
+ β€’ [Quantified achievement 1]
324
+ β€’ [Quantified achievement 2]
325
+ β€’ [Quantified achievement 3]
326
+
327
+ πŸ› οΈ **Technical expertise:** [List 5-8 key skills/technologies]
328
+
329
+ 🀝 **Let's connect** if you're interested in [collaboration opportunity/your goals] """
330
+
331
+ return template.strip()
332
+
333
+ def test_openai_connection(self) -> bool:
334
+ """Test if OpenAI connection is working"""
335
+ if not self.openai_client:
336
+ return False
337
+
338
+ try:
339
+ response = self.openai_client.chat.completions.create(
340
+ model="gpt-4o-mini",
341
+ messages=[{"role": "user", "content": "Test connection"}],
342
+ max_tokens=10
343
+ )
344
+ return True
345
+ except Exception as e:
346
+ print(f"OpenAI connection test failed: {str(e)}")
347
+ return False
src/agents/orchestrator.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Main Agent Coordinator
2
+ import time
3
+ from .scraper_agent import ScraperAgent
4
+ from .analyzer_agent import AnalyzerAgent
5
+ from .content_agent import ContentAgent
6
+ from memory.memory_manager import MemoryManager
7
+
8
+ class ProfileOrchestrator:
9
+ """Main coordinator for all LinkedIn profile enhancement agents"""
10
+
11
+ def __init__(self):
12
+ self.scraper = ScraperAgent()
13
+ self.analyzer = AnalyzerAgent()
14
+ self.content_generator = ContentAgent()
15
+ self.memory = MemoryManager()
16
+
17
+ def enhance_profile(self, linkedin_url, job_description="", force_refresh=True):
18
+ """
19
+ Main workflow for enhancing a LinkedIn profile
20
+
21
+ Args:
22
+ linkedin_url (str): LinkedIn profile URL
23
+ job_description (str): Optional job description for tailored suggestions
24
+ force_refresh (bool): Force fresh scraping instead of using cache
25
+
26
+ Returns:
27
+ str: Enhancement suggestions and analysis
28
+ """
29
+ try:
30
+ print(f"🎯 Starting profile enhancement for: {linkedin_url}")
31
+
32
+ # Always clear cache for fresh data extraction
33
+ if force_refresh:
34
+ print("πŸ—‘οΈ Clearing all cached data...")
35
+ self.memory.force_refresh_session(linkedin_url)
36
+ # Clear any session data for this URL
37
+ self.memory.clear_session_cache(linkedin_url)
38
+ # Also clear any general cache
39
+ self.memory.clear_session_cache() # Clear all sessions
40
+
41
+ # Step 1: Scrape LinkedIn profile data
42
+ print("πŸ“‘ Step 1: Scraping profile data...")
43
+ print(f"πŸ”— Target URL: {linkedin_url}")
44
+ profile_data = self.scraper.extract_profile_data(linkedin_url)
45
+
46
+ # Verify we got data for the correct URL
47
+ if profile_data.get('url') != linkedin_url:
48
+ print(f"⚠️ URL mismatch detected!")
49
+ print(f" Expected: {linkedin_url}")
50
+ print(f" Got: {profile_data.get('url', 'Unknown')}")
51
+
52
+ # Step 2: Analyze the profile
53
+ print("πŸ” Step 2: Analyzing profile...")
54
+ analysis = self.analyzer.analyze_profile(profile_data, job_description)
55
+
56
+ # Step 3: Generate enhancement suggestions
57
+ print("πŸ’‘ Step 3: Generating suggestions...")
58
+ suggestions = self.content_generator.generate_suggestions(analysis, job_description)
59
+
60
+ # Step 4: Store in memory for future reference
61
+ session_data = {
62
+ 'profile_data': profile_data,
63
+ 'analysis': analysis,
64
+ 'suggestions': suggestions,
65
+ 'job_description': job_description,
66
+ 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S')
67
+ }
68
+ self.memory.store_session(linkedin_url, session_data)
69
+
70
+ print("βœ… Profile enhancement completed!")
71
+ return self._format_output(analysis, suggestions)
72
+
73
+ except Exception as e:
74
+ return f"Error in orchestration: {str(e)}"
75
+
76
+ def _format_output(self, analysis, suggestions):
77
+ """Format the final output for display"""
78
+ output = []
79
+
80
+ # Profile Analysis Section
81
+ output.append("## πŸ“Š Profile Analysis")
82
+ output.append("")
83
+ output.append(f"**πŸ“ˆ Completeness Score:** {analysis.get('completeness_score', 0):.1f}%")
84
+ output.append(f"**⭐ Overall Rating:** {analysis.get('overall_rating', 'Unknown')}")
85
+ output.append(f"**🎯 Job Match Score:** {analysis.get('job_match_score', 0):.1f}%")
86
+ output.append("")
87
+
88
+ # Strengths
89
+ strengths = analysis.get('strengths', [])
90
+ if strengths:
91
+ output.append("### 🌟 Profile Strengths")
92
+ for strength in strengths:
93
+ output.append(f"βœ… {strength}")
94
+ output.append("")
95
+
96
+ # Areas for Improvement
97
+ weaknesses = analysis.get('weaknesses', [])
98
+ if weaknesses:
99
+ output.append("### πŸ”§ Areas for Improvement")
100
+ for weakness in weaknesses:
101
+ output.append(f"πŸ”Έ {weakness}")
102
+ output.append("")
103
+
104
+ # Keyword Analysis
105
+ keyword_analysis = analysis.get('keyword_analysis', {})
106
+ if keyword_analysis:
107
+ found_keywords = keyword_analysis.get('found_keywords', [])
108
+ missing_keywords = keyword_analysis.get('missing_keywords', [])
109
+
110
+ output.append("### οΏ½ Keyword Analysis")
111
+ output.append(f"**Keywords Found ({len(found_keywords)}):** {', '.join(found_keywords[:10])}")
112
+ if missing_keywords:
113
+ output.append(f"**Missing Keywords:** {', '.join(missing_keywords[:5])}")
114
+ output.append("")
115
+
116
+ # Enhancement Suggestions Section
117
+ output.append("## 🎯 Enhancement Suggestions")
118
+ output.append("")
119
+
120
+ for category, items in suggestions.items():
121
+ if category == 'ai_generated_content':
122
+ # Special formatting for AI content
123
+ output.append("### πŸ€– AI-Generated Content Suggestions")
124
+ ai_content = items if isinstance(items, dict) else {}
125
+
126
+ if 'ai_headlines' in ai_content and ai_content['ai_headlines']:
127
+ output.append("")
128
+ output.append("#### ✨ Professional Headlines")
129
+ for i, headline in enumerate(ai_content['ai_headlines'], 1):
130
+ # Clean up the headline format
131
+ cleaned_headline = headline.strip('"').replace('\\"', '"')
132
+ if cleaned_headline.startswith(('1.', '2.', '3.', '4.', '5.')):
133
+ cleaned_headline = cleaned_headline[2:].strip()
134
+ output.append(f"{i}. {cleaned_headline}")
135
+ output.append("")
136
+
137
+ if 'ai_about_section' in ai_content and ai_content['ai_about_section']:
138
+ output.append("#### πŸ“ Enhanced About Section")
139
+ output.append("```")
140
+ about_content = ai_content['ai_about_section']
141
+ # Clean up the about section
142
+ about_lines = about_content.split('\n')
143
+ for line in about_lines:
144
+ if line.strip():
145
+ output.append(line.strip())
146
+ output.append("```")
147
+ output.append("")
148
+
149
+ if 'ai_experience_descriptions' in ai_content and ai_content['ai_experience_descriptions']:
150
+ output.append("#### πŸ’Ό Experience Description Ideas")
151
+ for desc in ai_content['ai_experience_descriptions']:
152
+ output.append(f"β€’ {desc}")
153
+ output.append("")
154
+ else:
155
+ # Standard formatting for other categories
156
+ category_name = category.replace('_', ' ').title()
157
+ output.append(f"### {category_name}")
158
+ if isinstance(items, list):
159
+ for item in items:
160
+ output.append(f"β€’ {item}")
161
+ else:
162
+ output.append(f"β€’ {items}")
163
+ output.append("")
164
+
165
+ # Next Steps Section
166
+ output.append("## πŸ“ˆ Implementation Roadmap")
167
+ output.append("")
168
+ recommendations = analysis.get('recommendations', [])
169
+ if recommendations:
170
+ output.append("### 🎯 Priority Actions")
171
+ for i, rec in enumerate(recommendations[:5], 1):
172
+ output.append(f"{i}. {rec}")
173
+ output.append("")
174
+
175
+ output.append("### πŸ“Š General Best Practices")
176
+ output.append("πŸ”Έ Update your profile regularly with new achievements")
177
+ output.append("πŸ”Έ Use professional keywords relevant to your industry")
178
+ output.append("πŸ”Έ Engage with your network by sharing valuable content")
179
+ output.append("πŸ”Έ Ask for recommendations from colleagues and clients")
180
+ output.append("πŸ”Έ Monitor profile views and connection requests")
181
+ output.append("")
182
+
183
+ output.append("---")
184
+ output.append("*Analysis powered by AI β€’ Data scraped with respect to LinkedIn's ToS*")
185
+
186
+ return "\n".join(output)
src/agents/scraper_agent.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import requests
5
+ from typing import Dict, Any
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ class ScraperAgent:
12
+ """Agent responsible for extracting data from LinkedIn profiles using Apify REST API"""
13
+
14
+ def __init__(self):
15
+ self.apify_token = os.getenv('APIFY_API_TOKEN')
16
+ if not self.apify_token:
17
+ raise ValueError("APIFY_API_TOKEN not found in environment variables")
18
+
19
+ # Validate token format
20
+ if not self.apify_token.startswith('apify_api_'):
21
+ print(f"⚠️ Warning: Token doesn't start with 'apify_api_'. Current token starts with: {self.apify_token[:10]}...")
22
+
23
+ # Use the new actor API endpoint
24
+ self.api_url = f"https://api.apify.com/v2/acts/dev_fusion~linkedin-profile-scraper/run-sync-get-dataset-items?token={self.apify_token}"
25
+
26
+ print(f"πŸ”‘ Using Apify token: {self.apify_token[:15]}...") # Show first 15 chars for debugging
27
+
28
+ def extract_profile_data(self, linkedin_url: str) -> Dict[str, Any]:
29
+ """
30
+ Extract profile data from LinkedIn URL using Apify REST API
31
+
32
+ Args:
33
+ linkedin_url (str): LinkedIn profile URL
34
+
35
+ Returns:
36
+ Dict[str, Any]: Extracted profile data
37
+ """
38
+ try:
39
+ print(f"πŸ” Starting scraping for: {linkedin_url}")
40
+ print(f"πŸ”— URL being processed: {linkedin_url}")
41
+ print(f"⏰ Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
42
+
43
+ # Clean and validate URL
44
+ original_url = linkedin_url
45
+ linkedin_url = linkedin_url.strip()
46
+ if not linkedin_url.startswith('http'):
47
+ linkedin_url = 'https://' + linkedin_url
48
+
49
+ print(f"🧹 Cleaned URL: {linkedin_url}")
50
+
51
+ # Verify URL consistency
52
+ if original_url != linkedin_url:
53
+ print(f"πŸ”„ URL normalized: {original_url} β†’ {linkedin_url}")
54
+
55
+ # Configure the run input with fresh URL
56
+ run_input = {
57
+ "profileUrls": [linkedin_url], # This actor expects profileUrls, not startUrls
58
+ "slowDown": True, # To avoid being blocked
59
+ "includeSkills": True,
60
+ "includeExperience": True,
61
+ "includeEducation": True,
62
+ "includeRecommendations": False, # Optional, can be slow
63
+ "saveHtml": False,
64
+ "saveMarkdown": False
65
+ }
66
+
67
+ print(f"πŸ“‹ Apify input: {json.dumps(run_input, indent=2)}")
68
+
69
+ # Make the API request
70
+ print("πŸš€ Running Apify scraper via REST API...")
71
+ response = requests.post(
72
+ self.api_url,
73
+ json=run_input,
74
+ headers={'Content-Type': 'application/json'},
75
+ timeout=180 # 3 minutes timeout
76
+ )
77
+
78
+ if response.status_code in [200, 201]: # 201 is also success for Apify
79
+ results = response.json()
80
+ print(f"βœ… API Response received: {len(results)} items")
81
+
82
+ if results and len(results) > 0:
83
+ # Process the first result (since we're scraping one profile)
84
+ raw_data = results[0]
85
+ processed_data = self._process_apify_data(raw_data, linkedin_url)
86
+ print("βœ… Successfully extracted and processed profile data")
87
+ return processed_data
88
+ else:
89
+ error_msg = "No data returned from Apify API. The profile may be private or the scraper encountered an issue."
90
+ print(f"❌ {error_msg}")
91
+ raise ValueError(error_msg)
92
+ else:
93
+ error_details = ""
94
+ try:
95
+ error_response = response.json()
96
+ error_details = f" - {error_response.get('error', {}).get('message', response.text)}"
97
+ except:
98
+ error_details = f" - {response.text}"
99
+
100
+ if response.status_code == 401:
101
+ error_msg = f"Authentication failed (401): Invalid or expired API token{error_details}"
102
+ print(f"❌ {error_msg}")
103
+ print(f"πŸ”‘ Token being used: {self.apify_token[:15]}...")
104
+ print(f"πŸ’‘ Please check your APIFY_API_TOKEN in your .env file")
105
+ elif response.status_code == 404:
106
+ error_msg = f"Actor not found (404): The actor 'dev_fusion~linkedin-profile-scraper' may not exist{error_details}"
107
+ print(f"❌ {error_msg}")
108
+ elif response.status_code == 429:
109
+ error_msg = f"Rate limit exceeded (429): Too many requests{error_details}"
110
+ print(f"❌ {error_msg}")
111
+ else:
112
+ error_msg = f"API request failed with status {response.status_code}{error_details}"
113
+ print(f"❌ {error_msg}")
114
+
115
+ raise requests.RequestException(error_msg)
116
+
117
+ except requests.Timeout:
118
+ error_msg = "Request timed out. The scraping operation took too long to complete."
119
+ print(f"⏰ {error_msg}")
120
+ raise requests.Timeout(error_msg)
121
+ except Exception as e:
122
+ error_msg = f"Error extracting profile data: {str(e)}"
123
+ print(f"❌ {error_msg}")
124
+ raise Exception(error_msg)
125
+
126
+ def test_apify_connection(self) -> bool:
127
+ """Test if Apify connection is working"""
128
+ try:
129
+ # Test with the actor endpoint
130
+ test_url = f"https://api.apify.com/v2/acts/dev_fusion~linkedin-profile-scraper?token={self.apify_token}"
131
+ print(f"πŸ”— Testing connection to: {test_url[:50]}...")
132
+
133
+ response = requests.get(test_url, timeout=10)
134
+
135
+ if response.status_code == 200:
136
+ actor_info = response.json()
137
+ print(f"βœ… Successfully connected to Apify actor: {actor_info.get('name', 'LinkedIn Profile Scraper')}")
138
+ return True
139
+ elif response.status_code == 401:
140
+ print(f"❌ Authentication failed (401): Invalid or expired API token")
141
+ print(f"πŸ”‘ Token being used: {self.apify_token[:15]}...")
142
+ print(f"πŸ’‘ Please check your APIFY_API_TOKEN in your .env file")
143
+ return False
144
+ elif response.status_code == 404:
145
+ print(f"❌ Actor not found (404): The actor 'dev_fusion~linkedin-profile-scraper' may not exist or be accessible")
146
+ return False
147
+ else:
148
+ print(f"❌ Failed to connect to Apify: {response.status_code} - {response.text}")
149
+ return False
150
+ except Exception as e:
151
+ print(f"❌ Failed to connect to Apify: {str(e)}")
152
+ return False
153
+
154
+ def _process_apify_data(self, raw_data: Dict[str, Any], url: str) -> Dict[str, Any]:
155
+ """Process raw Apify data into standardized format"""
156
+
157
+ print(f"πŸ“Š Processing data for URL: {url}")
158
+ print(f"πŸ“‹ Raw data keys: {list(raw_data.keys())}")
159
+
160
+ # Extract basic information - using the correct field names from API
161
+ profile_data = {
162
+ 'name': raw_data.get('fullName', ''),
163
+ 'headline': raw_data.get('headline', ''),
164
+ 'location': raw_data.get('addressWithCountry', raw_data.get('addressWithoutCountry', '')),
165
+ 'about': raw_data.get('about', ''), # API uses 'about' not 'summary'
166
+ 'connections': raw_data.get('connections', 0),
167
+ 'followers': raw_data.get('followers', 0),
168
+ 'email': raw_data.get('email', ''),
169
+ 'url': url, # Use the URL that was actually requested
170
+ 'profile_image': raw_data.get('profilePic', ''),
171
+ 'profile_image_hq': raw_data.get('profilePicHighQuality', ''),
172
+ 'scraped_at': time.strftime('%Y-%m-%d %H:%M:%S'),
173
+ 'job_title': raw_data.get('jobTitle', ''),
174
+ 'company_name': raw_data.get('companyName', ''),
175
+ 'company_industry': raw_data.get('companyIndustry', ''),
176
+ 'company_website': raw_data.get('companyWebsite', ''),
177
+ 'company_size': raw_data.get('companySize', ''),
178
+ 'current_job_duration': raw_data.get('currentJobDuration', ''),
179
+ 'top_skills': raw_data.get('topSkillsByEndorsements', '')
180
+ }
181
+
182
+ print(f"βœ… Extracted profile for: {profile_data.get('name', 'Unknown')}")
183
+ print(f"πŸ”— Profile URL stored: {profile_data['url']}")
184
+
185
+ # Process experience - API uses 'experiences' array
186
+ experience_list = []
187
+ for exp in raw_data.get('experiences', []):
188
+ experience_item = {
189
+ 'title': exp.get('title', ''),
190
+ 'company': exp.get('subtitle', '').replace(' Β· Full-time', '').replace(' Β· Part-time', ''),
191
+ 'duration': exp.get('caption', ''),
192
+ 'description': '', # Extract from subComponents if available
193
+ 'location': exp.get('metadata', ''),
194
+ 'company_logo': exp.get('logo', ''),
195
+ 'is_current': 'Present' in exp.get('caption', '') or 'Β·' not in exp.get('caption', '')
196
+ }
197
+
198
+ # Extract description from subComponents
199
+ if 'subComponents' in exp and exp['subComponents']:
200
+ for sub in exp['subComponents']:
201
+ if 'description' in sub and sub['description']:
202
+ descriptions = []
203
+ for desc in sub['description']:
204
+ if isinstance(desc, dict) and desc.get('text'):
205
+ descriptions.append(desc['text'])
206
+ experience_item['description'] = ' '.join(descriptions)
207
+
208
+ experience_list.append(experience_item)
209
+ profile_data['experience'] = experience_list
210
+
211
+ # Process education - API uses 'educations' array
212
+ education_list = []
213
+ for edu in raw_data.get('educations', []):
214
+ education_item = {
215
+ 'degree': edu.get('subtitle', ''),
216
+ 'school': edu.get('title', ''),
217
+ 'field': '', # Extract from subtitle
218
+ 'year': edu.get('caption', ''),
219
+ 'logo': edu.get('logo', ''),
220
+ 'grade': '' # Extract from subComponents if available
221
+ }
222
+
223
+ # Split degree and field from subtitle
224
+ subtitle = edu.get('subtitle', '')
225
+ if ' - ' in subtitle:
226
+ parts = subtitle.split(' - ', 1)
227
+ education_item['degree'] = parts[0]
228
+ education_item['field'] = parts[1] if len(parts) > 1 else ''
229
+ elif ', ' in subtitle:
230
+ parts = subtitle.split(', ', 1)
231
+ education_item['degree'] = parts[0]
232
+ education_item['field'] = parts[1] if len(parts) > 1 else ''
233
+
234
+ # Extract grade from subComponents
235
+ if 'subComponents' in edu and edu['subComponents']:
236
+ for sub in edu['subComponents']:
237
+ if 'description' in sub and sub['description']:
238
+ for desc in sub['description']:
239
+ if isinstance(desc, dict) and desc.get('text', '').startswith('Grade:'):
240
+ education_item['grade'] = desc['text']
241
+
242
+ education_list.append(education_item)
243
+ profile_data['education'] = education_list
244
+
245
+ # Process skills - API uses 'skills' array with title
246
+ skills_list = []
247
+ for skill in raw_data.get('skills', []):
248
+ if isinstance(skill, dict) and 'title' in skill:
249
+ skills_list.append(skill['title'])
250
+ elif isinstance(skill, str):
251
+ skills_list.append(skill)
252
+ profile_data['skills'] = skills_list
253
+
254
+ # Process certifications - API uses 'licenseAndCertificates'
255
+ certifications_list = []
256
+ for cert in raw_data.get('licenseAndCertificates', []):
257
+ cert_item = {
258
+ 'title': cert.get('title', ''),
259
+ 'issuer': cert.get('subtitle', ''),
260
+ 'date': cert.get('caption', ''),
261
+ 'credential_id': cert.get('metadata', ''),
262
+ 'logo': cert.get('logo', '')
263
+ }
264
+ certifications_list.append(cert_item)
265
+ profile_data['certifications'] = certifications_list
266
+
267
+ # Process languages (if available)
268
+ profile_data['languages'] = raw_data.get('languages', [])
269
+
270
+ # Process volunteer experience (if available)
271
+ volunteer_list = []
272
+ for vol in raw_data.get('volunteerAndAwards', []):
273
+ if isinstance(vol, dict):
274
+ volunteer_list.append(vol)
275
+ profile_data['volunteer_experience'] = volunteer_list
276
+
277
+ # Additional rich data
278
+ profile_data['honors_awards'] = raw_data.get('honorsAndAwards', [])
279
+ profile_data['projects'] = raw_data.get('projects', [])
280
+ profile_data['publications'] = raw_data.get('publications', [])
281
+ profile_data['recommendations'] = raw_data.get('recommendations', [])
282
+ profile_data['interests'] = raw_data.get('interests', [])
283
+
284
+ return profile_data
src/memory/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Memory package initialization
src/memory/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (195 Bytes). View file
 
src/memory/__pycache__/memory_manager.cpython-311.pyc ADDED
Binary file (12.4 kB). View file
 
src/memory/memory_manager.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Session & Persistent Memory Manager
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from typing import Dict, Any, Optional
6
+
7
+ class MemoryManager:
8
+ """Manages session data and persistent storage for the LinkedIn enhancer"""
9
+
10
+ def __init__(self, storage_dir: str = "data"):
11
+ self.storage_dir = storage_dir
12
+ self.session_data = {}
13
+ self.persistent_file = os.path.join(storage_dir, "persistent_data.json")
14
+
15
+ # Create storage directory if it doesn't exist
16
+ os.makedirs(storage_dir, exist_ok=True)
17
+
18
+ # Load existing persistent data
19
+ self.persistent_data = self._load_persistent_data()
20
+
21
+ def store_session(self, profile_url: str, data: Dict[str, Any]) -> None:
22
+ """
23
+ Store session data for a specific profile
24
+
25
+ Args:
26
+ profile_url (str): LinkedIn profile URL as key
27
+ data (Dict[str, Any]): Session data to store
28
+ """
29
+ session_key = self._create_session_key(profile_url)
30
+
31
+ self.session_data[session_key] = {
32
+ 'timestamp': datetime.now().isoformat(),
33
+ 'profile_url': profile_url,
34
+ 'data': data
35
+ }
36
+
37
+ def get_session(self, profile_url: str) -> Optional[Dict[str, Any]]:
38
+ """
39
+ Retrieve session data for a specific profile
40
+
41
+ Args:
42
+ profile_url (str): LinkedIn profile URL
43
+
44
+ Returns:
45
+ Optional[Dict[str, Any]]: Session data if exists
46
+ """
47
+ session_key = self._create_session_key(profile_url)
48
+ return self.session_data.get(session_key)
49
+
50
+ def store_persistent(self, key: str, data: Any) -> None:
51
+ """
52
+ Store data persistently to disk
53
+
54
+ Args:
55
+ key (str): Storage key
56
+ data (Any): Data to store
57
+ """
58
+ self.persistent_data[key] = {
59
+ 'timestamp': datetime.now().isoformat(),
60
+ 'data': data
61
+ }
62
+
63
+ self._save_persistent_data()
64
+
65
+ def get_persistent(self, key: str) -> Optional[Any]:
66
+ """
67
+ Retrieve persistent data
68
+
69
+ Args:
70
+ key (str): Storage key
71
+
72
+ Returns:
73
+ Optional[Any]: Stored data if exists
74
+ """
75
+ stored_item = self.persistent_data.get(key)
76
+ return stored_item['data'] if stored_item else None
77
+
78
+ def store_user_preferences(self, user_id: str, preferences: Dict[str, Any]) -> None:
79
+ """
80
+ Store user preferences
81
+
82
+ Args:
83
+ user_id (str): User identifier
84
+ preferences (Dict[str, Any]): User preferences
85
+ """
86
+ pref_key = f"user_preferences_{user_id}"
87
+ self.store_persistent(pref_key, preferences)
88
+
89
+ def get_user_preferences(self, user_id: str) -> Dict[str, Any]:
90
+ """
91
+ Retrieve user preferences
92
+
93
+ Args:
94
+ user_id (str): User identifier
95
+
96
+ Returns:
97
+ Dict[str, Any]: User preferences
98
+ """
99
+ pref_key = f"user_preferences_{user_id}"
100
+ preferences = self.get_persistent(pref_key)
101
+ return preferences if preferences else {}
102
+
103
+ def store_analysis_history(self, profile_url: str, analysis: Dict[str, Any]) -> None:
104
+ """
105
+ Store analysis history for tracking improvements
106
+
107
+ Args:
108
+ profile_url (str): LinkedIn profile URL
109
+ analysis (Dict[str, Any]): Analysis results
110
+ """
111
+ history_key = f"analysis_history_{self._create_session_key(profile_url)}"
112
+
113
+ # Get existing history
114
+ history = self.get_persistent(history_key) or []
115
+
116
+ # Add new analysis with timestamp
117
+ history.append({
118
+ 'timestamp': datetime.now().isoformat(),
119
+ 'analysis': analysis
120
+ })
121
+
122
+ # Keep only last 10 analyses
123
+ history = history[-10:]
124
+
125
+ self.store_persistent(history_key, history)
126
+
127
+ def get_analysis_history(self, profile_url: str) -> list:
128
+ """
129
+ Retrieve analysis history for a profile
130
+
131
+ Args:
132
+ profile_url (str): LinkedIn profile URL
133
+
134
+ Returns:
135
+ list: Analysis history
136
+ """
137
+ history_key = f"analysis_history_{self._create_session_key(profile_url)}"
138
+ return self.get_persistent(history_key) or []
139
+
140
+ def clear_session(self, profile_url: str = None) -> None:
141
+ """
142
+ Clear session data
143
+
144
+ Args:
145
+ profile_url (str, optional): Specific profile to clear, or all if None
146
+ """
147
+ if profile_url:
148
+ session_key = self._create_session_key(profile_url)
149
+ self.session_data.pop(session_key, None)
150
+ else:
151
+ self.session_data.clear()
152
+
153
+ def clear_session_cache(self, profile_url: str = None) -> None:
154
+ """
155
+ Clear session cache for a specific profile or all profiles
156
+
157
+ Args:
158
+ profile_url (str, optional): URL to clear cache for. If None, clears all.
159
+ """
160
+ if profile_url:
161
+ session_key = self._create_session_key(profile_url)
162
+ if session_key in self.session_data:
163
+ del self.session_data[session_key]
164
+ print(f"πŸ—‘οΈ Cleared session cache for: {profile_url}")
165
+ else:
166
+ self.session_data.clear()
167
+ print("πŸ—‘οΈ Cleared all session cache")
168
+
169
+ def force_refresh_session(self, profile_url: str) -> None:
170
+ """
171
+ Force refresh by clearing cache for a specific profile
172
+
173
+ Args:
174
+ profile_url (str): LinkedIn profile URL
175
+ """
176
+ self.clear_session_cache(profile_url)
177
+ print(f"πŸ”„ Forced refresh for: {profile_url}")
178
+
179
+ def get_session_summary(self) -> Dict[str, Any]:
180
+ """
181
+ Get summary of current session data
182
+
183
+ Returns:
184
+ Dict[str, Any]: Session summary
185
+ """
186
+ return {
187
+ 'active_sessions': len(self.session_data),
188
+ 'sessions': list(self.session_data.keys()),
189
+ 'storage_location': self.storage_dir
190
+ }
191
+
192
+ def _create_session_key(self, profile_url: str) -> str:
193
+ """Create a clean session key from profile URL"""
194
+ # Extract username or create a hash-like key
195
+ import hashlib
196
+ return hashlib.md5(profile_url.encode()).hexdigest()[:16]
197
+
198
+ def _load_persistent_data(self) -> Dict[str, Any]:
199
+ """Load persistent data from disk"""
200
+ if os.path.exists(self.persistent_file):
201
+ try:
202
+ with open(self.persistent_file, 'r', encoding='utf-8') as f:
203
+ return json.load(f)
204
+ except (json.JSONDecodeError, IOError):
205
+ return {}
206
+ return {}
207
+
208
+ def _save_persistent_data(self) -> None:
209
+ """Save persistent data to disk"""
210
+ try:
211
+ with open(self.persistent_file, 'w', encoding='utf-8') as f:
212
+ json.dump(self.persistent_data, f, indent=2, ensure_ascii=False)
213
+ except IOError as e:
214
+ print(f"Warning: Could not save persistent data: {e}")
215
+
216
+ def export_data(self, filename: str = None) -> str:
217
+ """
218
+ Export all data to a JSON file
219
+
220
+ Args:
221
+ filename (str, optional): Custom filename
222
+
223
+ Returns:
224
+ str: Path to exported file
225
+ """
226
+ if not filename:
227
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
228
+ filename = f"linkedin_enhancer_export_{timestamp}.json"
229
+
230
+ export_path = os.path.join(self.storage_dir, filename)
231
+
232
+ export_data = {
233
+ 'session_data': self.session_data,
234
+ 'persistent_data': self.persistent_data,
235
+ 'export_timestamp': datetime.now().isoformat()
236
+ }
237
+
238
+ with open(export_path, 'w', encoding='utf-8') as f:
239
+ json.dump(export_data, f, indent=2, ensure_ascii=False)
240
+
241
+ return export_path
src/prompts/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Prompts package initialization
src/prompts/__pycache__/agent_prompts.cpython-311.pyc ADDED
Binary file (9.63 kB). View file
 
src/prompts/agent_prompts.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agent Prompts for LinkedIn Profile Enhancer
2
+
3
+ class ContentPrompts:
4
+ """Collection of prompts for content generation agents"""
5
+
6
+ def __init__(self):
7
+ self.headline_prompts = HeadlinePrompts()
8
+ self.about_prompts = AboutPrompts()
9
+ self.experience_prompts = ExperiencePrompts()
10
+ self.general_prompts = GeneralPrompts()
11
+
12
+ class HeadlinePrompts:
13
+ """Prompts for headline optimization"""
14
+
15
+ HEADLINE_ANALYSIS = """
16
+ Analyze this LinkedIn headline and provide improvement suggestions:
17
+
18
+ Current headline: "{headline}"
19
+ Target role: "{target_role}"
20
+ Key skills: {skills}
21
+
22
+ Consider:
23
+ 1. Keyword optimization for the target role
24
+ 2. Value proposition clarity
25
+ 3. Professional branding
26
+ 4. Character limit (120 chars max)
27
+ 5. Industry-specific terms
28
+
29
+ Provide 3-5 alternative headline suggestions.
30
+ """
31
+
32
+ HEADLINE_TEMPLATES = [
33
+ "{title} | {specialization} | {key_skills}",
34
+ "{seniority} {title} specializing in {domain} | {achievement}",
35
+ "{title} | Helping {target_audience} with {solution} | {technologies}",
36
+ "{role} with {years}+ years in {industry} | {unique_value_prop}"
37
+ ]
38
+
39
+ class AboutPrompts:
40
+ """Prompts for about section optimization"""
41
+
42
+ ABOUT_STRUCTURE = """
43
+ Create an engaging LinkedIn About section with this structure:
44
+
45
+ Profile info:
46
+ - Name: {name}
47
+ - Current role: {current_role}
48
+ - Years of experience: {experience_years}
49
+ - Key skills: {key_skills}
50
+ - Notable achievements: {achievements}
51
+ - Target audience: {target_audience}
52
+
53
+ Structure:
54
+ 1. Hook (compelling opening line)
55
+ 2. Professional summary (2-3 sentences)
56
+ 3. Key expertise and skills
57
+ 4. Notable achievements with metrics
58
+ 5. Call to action
59
+
60
+ Keep it conversational, professional, and under 2000 characters.
61
+ """
62
+
63
+ ABOUT_HOOKS = [
64
+ "πŸš€ Passionate about transforming {industry} through {technology}",
65
+ "πŸ’‘ {Years} years of turning complex {domain} challenges into simple solutions",
66
+ "🎯 Helping {target_audience} achieve {outcome} through {approach}",
67
+ "⚑ {Achievement} specialist with a track record of {impact}"
68
+ ]
69
+
70
+ class ExperiencePrompts:
71
+ """Prompts for experience section optimization"""
72
+
73
+ EXPERIENCE_ENHANCEMENT = """
74
+ Enhance this work experience entry:
75
+
76
+ Current description: "{description}"
77
+ Role: {title}
78
+ Company: {company}
79
+ Duration: {duration}
80
+
81
+ Improve by:
82
+ 1. Starting with strong action verbs
83
+ 2. Adding quantified achievements
84
+ 3. Highlighting relevant skills used
85
+ 4. Showing business impact
86
+ 5. Using bullet points for readability
87
+
88
+ Target the experience for: {target_role}
89
+ """
90
+ ACTION_VERBS = {
91
+ "Leadership": ["led", "managed", "directed", "coordinated", "supervised"],
92
+ "Achievement": ["achieved", "delivered", "exceeded", "accomplished", "attained"],
93
+ "Development": ["developed", "created", "built", "designed", "implemented"],
94
+ "Improvement": ["optimized", "enhanced", "streamlined", "upgraded", "modernized"],
95
+ "Problem-solving": ["resolved", "troubleshot", "analyzed", "diagnosed", "solved"]
96
+ }
97
+
98
+ class GeneralPrompts:
99
+ """General prompts for profile enhancement"""
100
+
101
+ SKILLS_OPTIMIZATION = """
102
+ Optimize this skills list for the target role:
103
+
104
+ Current skills: {current_skills}
105
+ Target role: {target_role}
106
+ Job description keywords: {job_keywords}
107
+
108
+ Provide:
109
+ 1. Priority ranking of current skills
110
+ 2. Missing skills to add
111
+ 3. Skills to remove or deprioritize
112
+ 4. Skill categories organization
113
+ """
114
+
115
+ KEYWORD_OPTIMIZATION = """
116
+ Analyze keyword optimization for this profile:
117
+
118
+ Profile content: {profile_content}
119
+ Target job description: {job_description}
120
+
121
+ Identify:
122
+ 1. Current keyword density
123
+ 2. Missing important keywords
124
+ 3. Over-optimized keywords
125
+ 4. Natural integration suggestions
126
+ 5. Industry-specific terminology gaps
127
+ """
128
+
129
+ PROFILE_AUDIT = """
130
+ Conduct a comprehensive LinkedIn profile audit:
131
+
132
+ Profile data: {profile_data}
133
+ Target role: {target_role}
134
+ Industry: {industry}
135
+
136
+ Audit areas:
137
+ 1. Profile completeness (%)
138
+ 2. Keyword optimization
139
+ 3. Content quality and engagement potential
140
+ 4. Professional branding consistency
141
+ 5. Call-to-action effectiveness
142
+ 6. Visual elements (photo, banner) recommendations
143
+
144
+ Provide actionable improvement suggestions with priority levels.
145
+ """
146
+
147
+ class AnalysisPrompts:
148
+ """Prompts for profile analysis"""
149
+
150
+ COMPETITIVE_ANALYSIS = """
151
+ Compare this profile against industry standards:
152
+
153
+ Profile: {profile_data}
154
+ Industry: {industry}
155
+ Seniority level: {seniority}
156
+
157
+ Analyze:
158
+ 1. Profile completeness vs industry average
159
+ 2. Keyword usage vs competitors
160
+ 3. Content quality benchmarks
161
+ 4. Engagement potential indicators
162
+ 5. Areas of competitive advantage
163
+ 6. Improvement opportunities
164
+ """
165
+
166
+ CONTENT_QUALITY = """
167
+ Assess content quality across this LinkedIn profile:
168
+
169
+ Profile sections: {profile_sections}
170
+
171
+ Evaluate:
172
+ 1. Clarity and readability
173
+ 2. Professional tone consistency
174
+ 3. Value proposition strength
175
+ 4. Quantified achievements presence
176
+ 5. Industry relevance
177
+ 6. Call-to-action effectiveness
178
+
179
+ Rate each section 1-10 and provide specific improvement suggestions.
180
+ """
181
+
182
+ class JobMatchingPrompts:
183
+ """Prompts for job matching analysis"""
184
+
185
+ JOB_MATCH_ANALYSIS = """
186
+ Analyze how well this profile matches the job requirements:
187
+
188
+ Profile: {profile_data}
189
+ Job description: {job_description}
190
+
191
+ Match analysis:
192
+ 1. Skills alignment (%)
193
+ 2. Experience relevance
194
+ 3. Keyword overlap
195
+ 4. Education/certification fit
196
+ 5. Overall match score
197
+
198
+ Provide specific recommendations to improve match score.
199
+ """
200
+
201
+ TAILORING_SUGGESTIONS = """
202
+ Suggest profile modifications to better match this opportunity:
203
+
204
+ Current profile: {profile_data}
205
+ Target job: {job_description}
206
+ Match score: {current_match_score}
207
+
208
+ Prioritized suggestions:
209
+ 1. High-impact changes (immediate wins)
210
+ 2. Medium-impact improvements
211
+ 3. Long-term development areas
212
+ 4. Skills to highlight/add
213
+ 5. Content restructuring recommendations
214
+ """
215
+
216
+ # Utility functions for prompt formatting
217
+ def format_prompt(template: str, **kwargs) -> str:
218
+ """Format prompt template with provided variables"""
219
+ try:
220
+ return template.format(**kwargs)
221
+ except KeyError as e:
222
+ return f"Error formatting prompt: Missing variable {e}"
223
+
224
+ def get_prompt_by_category(category: str, prompt_name: str) -> str:
225
+ """Get a specific prompt by category and name"""
226
+ prompt_classes = {
227
+ 'headline': HeadlinePrompts(),
228
+ 'about': AboutPrompts(),
229
+ 'experience': ExperiencePrompts(),
230
+ 'general': GeneralPrompts(),
231
+ 'analysis': AnalysisPrompts(),
232
+ 'job_matching': JobMatchingPrompts()
233
+ }
234
+
235
+ prompt_class = prompt_classes.get(category.lower())
236
+ if not prompt_class:
237
+ return f"Category '{category}' not found"
238
+
239
+ prompt = getattr(prompt_class, prompt_name.upper(), None)
240
+ if not prompt:
241
+ return f"Prompt '{prompt_name}' not found in category '{category}'"
242
+
243
+ return prompt
src/streamlit_app.py CHANGED
@@ -1,40 +1,723 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
 
 
 
 
 
 
 
 
 
 
14
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import json
3
+ import pandas as pd
4
+ from agents.orchestrator import ProfileOrchestrator
5
+ from agents.scraper_agent import ScraperAgent
6
+ from agents.content_agent import ContentAgent
7
+ import plotly.express as px
8
+ import plotly.graph_objects as go
9
+ from datetime import datetime
10
+
11
+ # Configure Streamlit page
12
+ st.set_page_config(
13
+ page_title="πŸš€ LinkedIn Profile Enhancer",
14
+ page_icon="πŸš€",
15
+ layout="wide",
16
+ initial_sidebar_state="expanded"
17
+ )
18
+
19
+ # Custom CSS for better styling
20
+ st.markdown("""
21
+ <style>
22
+ .main-header {
23
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
24
+ padding: 2rem;
25
+ border-radius: 10px;
26
+ color: white;
27
+ text-align: center;
28
+ margin-bottom: 2rem;
29
+ }
30
+
31
+ .metric-card {
32
+ background: #f8f9fa;
33
+ padding: 1rem;
34
+ border-radius: 8px;
35
+ border-left: 4px solid #667eea;
36
+ margin: 0.5rem 0;
37
+ }
38
+
39
+ .success-card {
40
+ background: #d4edda;
41
+ padding: 1rem;
42
+ border-radius: 8px;
43
+ border-left: 4px solid #28a745;
44
+ margin: 0.5rem 0;
45
+ }
46
+
47
+ .warning-card {
48
+ background: #fff3cd;
49
+ padding: 1rem;
50
+ border-radius: 8px;
51
+ border-left: 4px solid #ffc107;
52
+ margin: 0.5rem 0;
53
+ }
54
+
55
+ .info-card {
56
+ background: #e7f3ff;
57
+ padding: 1rem;
58
+ border-radius: 8px;
59
+ border-left: 4px solid #17a2b8;
60
+ margin: 0.5rem 0;
61
+ }
62
+
63
+ .stTabs > div > div > div > div {
64
+ padding: 1rem;
65
+ }
66
+
67
+ .profile-section {
68
+ background: white;
69
+ padding: 1.5rem;
70
+ border-radius: 10px;
71
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
72
+ margin: 1rem 0;
73
+ }
74
+ </style>
75
+ """, unsafe_allow_html=True)
76
+
77
+ def initialize_session_state():
78
+ """Initialize session state variables"""
79
+ if 'orchestrator' not in st.session_state:
80
+ st.session_state.orchestrator = ProfileOrchestrator()
81
+ if 'analysis_results' not in st.session_state:
82
+ st.session_state.analysis_results = None
83
+ if 'profile_data' not in st.session_state:
84
+ st.session_state.profile_data = None
85
+ if 'suggestions' not in st.session_state:
86
+ st.session_state.suggestions = None
87
+ if 'current_url' not in st.session_state:
88
+ st.session_state.current_url = None
89
+
90
+ def clear_results_if_url_changed(linkedin_url):
91
+ """Clear cached results if URL has changed"""
92
+ if st.session_state.current_url != linkedin_url:
93
+ st.session_state.analysis_results = None
94
+ st.session_state.profile_data = None
95
+ st.session_state.suggestions = None
96
+ st.session_state.current_url = linkedin_url
97
+ st.cache_data.clear() # Clear any Streamlit cache
98
+ print(f"πŸ”„ URL changed to: {linkedin_url} - Clearing cached data")
99
+
100
+ def create_header():
101
+ """Create the main header"""
102
+ st.markdown("""
103
+ <div class="main-header">
104
+ <h1>πŸš€ LinkedIn Profile Enhancer</h1>
105
+ <p style="font-size: 1.2em; margin: 1rem 0;">AI-powered LinkedIn profile analysis and enhancement suggestions</p>
106
+ <div style="display: flex; justify-content: center; gap: 2rem; margin-top: 1rem;">
107
+ <div style="text-align: center;">
108
+ <div style="font-size: 2em;">πŸ”</div>
109
+ <div>Real Scraping</div>
110
+ </div>
111
+ <div style="text-align: center;">
112
+ <div style="font-size: 2em;">πŸ€–</div>
113
+ <div>AI Analysis</div>
114
+ </div>
115
+ <div style="text-align: center;">
116
+ <div style="font-size: 2em;">🎯</div>
117
+ <div>Smart Suggestions</div>
118
+ </div>
119
+ <div style="text-align: center;">
120
+ <div style="font-size: 2em;">πŸ“Š</div>
121
+ <div>Data Insights</div>
122
+ </div>
123
+ </div>
124
+ </div>
125
+ """, unsafe_allow_html=True)
126
+
127
+ def create_sidebar():
128
+ """Create the sidebar with input controls"""
129
+ with st.sidebar:
130
+ st.header("πŸ“ Configuration")
131
+
132
+ # LinkedIn URL input
133
+ linkedin_url = st.text_input(
134
+ "πŸ”— LinkedIn Profile URL",
135
+ placeholder="https://linkedin.com/in/your-profile",
136
+ help="Enter the full LinkedIn profile URL to analyze"
137
+ )
138
+
139
+ # Job description input
140
+ job_description = st.text_area(
141
+ "🎯 Target Job Description (Optional)",
142
+ placeholder="Paste the job description here for tailored suggestions...",
143
+ height=150,
144
+ help="Include job description for personalized optimization"
145
+ )
146
+
147
+ # API Status
148
+ st.subheader("πŸ”Œ API Status")
149
+
150
+ # Test API connections
151
+ if st.button("πŸ”„ Test Connections"):
152
+ with st.spinner("Testing API connections..."):
153
+ # Test Apify
154
+ try:
155
+ scraper = ScraperAgent()
156
+ apify_status = scraper.test_apify_connection()
157
+ if apify_status:
158
+ st.success("βœ… Apify: Connected")
159
+ else:
160
+ st.error("❌ Apify: Failed")
161
+ except Exception as e:
162
+ st.error(f"❌ Apify: Error - {str(e)}")
163
+
164
+ # Test OpenAI
165
+ try:
166
+ content_agent = ContentAgent()
167
+ openai_status = content_agent.test_openai_connection()
168
+ if openai_status:
169
+ st.success("βœ… OpenAI: Connected")
170
+ else:
171
+ st.error("❌ OpenAI: Failed")
172
+ except Exception as e:
173
+ st.error(f"❌ OpenAI: Error - {str(e)}")
174
+
175
+ # Examples
176
+ st.subheader("πŸ’‘ Example URLs")
177
+ example_urls = [
178
+ "https://linkedin.com/in/example-profile",
179
+ "https://www.linkedin.com/in/sample-user"
180
+ ]
181
+
182
+ for url in example_urls:
183
+ if st.button(f"πŸ“‹ {url.split('/')[-1]}", key=url):
184
+ st.session_state.example_url = url
185
+
186
+ return linkedin_url, job_description
187
+
188
+ def create_metrics_display(analysis):
189
+ """Create metrics display"""
190
+ col1, col2, col3, col4 = st.columns(4)
191
+
192
+ with col1:
193
+ st.metric(
194
+ "πŸ“ˆ Completeness Score",
195
+ f"{analysis.get('completeness_score', 0):.1f}%",
196
+ delta=None
197
+ )
198
+
199
+ with col2:
200
+ rating = analysis.get('overall_rating', 'Unknown')
201
+ st.metric(
202
+ "⭐ Overall Rating",
203
+ rating,
204
+ delta=None
205
+ )
206
+
207
+ with col3:
208
+ st.metric(
209
+ "🎯 Job Match Score",
210
+ f"{analysis.get('job_match_score', 0):.1f}%",
211
+ delta=None
212
+ )
213
+
214
+ with col4:
215
+ keywords = analysis.get('keyword_analysis', {})
216
+ found_count = len(keywords.get('found_keywords', []))
217
+ st.metric(
218
+ "πŸ” Keywords Found",
219
+ found_count,
220
+ delta=None
221
+ )
222
+
223
+ def create_analysis_charts(analysis):
224
+ """Create analysis charts"""
225
+ col1, col2 = st.columns(2)
226
+
227
+ with col1:
228
+ # Completeness breakdown
229
+ scores = {
230
+ 'Profile Info': 20,
231
+ 'About Section': 25,
232
+ 'Experience': 25,
233
+ 'Skills': 15,
234
+ 'Education': 15
235
+ }
236
+
237
+ fig_pie = px.pie(
238
+ values=list(scores.values()),
239
+ names=list(scores.keys()),
240
+ title="Profile Section Weights",
241
+ color_discrete_sequence=px.colors.qualitative.Set3
242
+ )
243
+ fig_pie.update_layout(height=400)
244
+ st.plotly_chart(fig_pie, use_container_width=True)
245
+
246
+ with col2:
247
+ # Score comparison
248
+ current_score = analysis.get('completeness_score', 0)
249
+ target_score = 90
250
+
251
+ fig_gauge = go.Figure(go.Indicator(
252
+ mode = "gauge+number+delta",
253
+ value = current_score,
254
+ domain = {'x': [0, 1], 'y': [0, 1]},
255
+ title = {'text': "Profile Completeness"},
256
+ delta = {'reference': target_score, 'increasing': {'color': "green"}},
257
+ gauge = {
258
+ 'axis': {'range': [None, 100]},
259
+ 'bar': {'color': "darkblue"},
260
+ 'steps': [
261
+ {'range': [0, 50], 'color': "lightgray"},
262
+ {'range': [50, 80], 'color': "gray"},
263
+ {'range': [80, 100], 'color': "lightgreen"}
264
+ ],
265
+ 'threshold': {
266
+ 'line': {'color': "red", 'width': 4},
267
+ 'thickness': 0.75,
268
+ 'value': 90
269
+ }
270
+ }
271
+ ))
272
+ fig_gauge.update_layout(height=400)
273
+ st.plotly_chart(fig_gauge, use_container_width=True)
274
+
275
+ def display_profile_data(profile_data):
276
+ """Display scraped profile data in a structured format"""
277
+ if not profile_data:
278
+ st.warning("No profile data available")
279
+ return
280
+
281
+ # Profile Header with Image
282
+ st.subheader("πŸ‘€ Profile Overview")
283
+
284
+ # Create columns for profile image and basic info
285
+ col1, col2, col3 = st.columns([1, 2, 2])
286
+
287
+ with col1:
288
+ # Display profile image
289
+ profile_image = profile_data.get('profile_image_hq') or profile_data.get('profile_image')
290
+ if profile_image:
291
+ st.image(profile_image, width=150, caption="Profile Picture")
292
+ else:
293
+ st.markdown("""
294
+ <div style="width: 150px; height: 150px; background-color: #f0f0f0; border-radius: 50%;
295
+ display: flex; align-items: center; justify-content: center; font-size: 48px;">
296
+ πŸ‘€
297
+ </div>
298
+ """, unsafe_allow_html=True)
299
+
300
+ with col2:
301
+ st.markdown(f"""
302
+ <div class="info-card">
303
+ <strong>Name:</strong> {profile_data.get('name', 'N/A')}<br>
304
+ <strong>Headline:</strong> {profile_data.get('headline', 'N/A')}<br>
305
+ <strong>Location:</strong> {profile_data.get('location', 'N/A')}<br>
306
+ <strong>Connections:</strong> {profile_data.get('connections', 'N/A')}<br>
307
+ <strong>Followers:</strong> {profile_data.get('followers', 'N/A')}
308
+ </div>
309
+ """, unsafe_allow_html=True)
310
+
311
+ with col3:
312
+ st.markdown(f"""
313
+ <div class="info-card">
314
+ <strong>Current Job:</strong> {profile_data.get('job_title', 'N/A')}<br>
315
+ <strong>Company:</strong> {profile_data.get('company_name', 'N/A')}<br>
316
+ <strong>Industry:</strong> {profile_data.get('company_industry', 'N/A')}<br>
317
+ <strong>Email:</strong> {profile_data.get('email', 'N/A')}<br>
318
+ <strong>Profile URL:</strong> <a href="{profile_data.get('url', '#')}" target="_blank">View Profile</a>
319
+ </div>
320
+ """, unsafe_allow_html=True)
321
+
322
+ # About Section
323
+ if profile_data.get('about'):
324
+ st.subheader("πŸ“ About Section")
325
+ st.markdown(f"""
326
+ <div class="profile-section">
327
+ {profile_data.get('about', 'No about section available')}
328
+ </div>
329
+ """, unsafe_allow_html=True)
330
+
331
+ # Experience
332
+ if profile_data.get('experience'):
333
+ st.subheader("πŸ’Ό Experience")
334
+ for i, exp in enumerate(profile_data.get('experience', [])):
335
+ with st.expander(f"{exp.get('title', 'Position')} at {exp.get('company', 'Company')}", expanded=i==0):
336
+ col1, col2 = st.columns([2, 1])
337
+ with col1:
338
+ st.write(f"**Duration:** {exp.get('duration', 'N/A')}")
339
+ st.write(f"**Location:** {exp.get('location', 'N/A')}")
340
+ if exp.get('description'):
341
+ st.write("**Description:**")
342
+ st.write(exp.get('description'))
343
+ with col2:
344
+ st.write(f"**Current Role:** {'Yes' if exp.get('is_current') else 'No'}")
345
+
346
+ # Skills
347
+ if profile_data.get('skills'):
348
+ st.subheader("πŸ› οΈ Skills")
349
+ skills = profile_data.get('skills', [])
350
+ if skills:
351
+ # Create a DataFrame for better display
352
+ skills_df = pd.DataFrame({'Skills': skills})
353
+ st.dataframe(skills_df, use_container_width=True)
354
+
355
+ # Education
356
+ if profile_data.get('education'):
357
+ st.subheader("πŸŽ“ Education")
358
+ for edu in profile_data.get('education', []):
359
+ st.markdown(f"""
360
+ <div class="info-card">
361
+ <strong>{edu.get('degree', 'Degree')}</strong><br>
362
+ {edu.get('school', 'School')} | {edu.get('field', 'Field')}<br>
363
+ <em>{edu.get('year', 'Year')}</em>
364
+ </div>
365
+ """, unsafe_allow_html=True)
366
+
367
+ # Raw Data (collapsible)
368
+ with st.expander("πŸ” Raw JSON Data"):
369
+ st.json(profile_data)
370
 
371
+ def display_analysis_results(analysis):
372
+ """Display analysis results"""
373
+ if not analysis:
374
+ st.warning("No analysis results available")
375
+ return
376
+
377
+ # Metrics
378
+ create_metrics_display(analysis)
379
+
380
+ # Charts
381
+ st.subheader("πŸ“Š Analysis Visualization")
382
+ create_analysis_charts(analysis)
383
+
384
+ # Strengths and Weaknesses
385
+ col1, col2 = st.columns(2)
386
+
387
+ with col1:
388
+ st.subheader("🌟 Profile Strengths")
389
+ strengths = analysis.get('strengths', [])
390
+ if strengths:
391
+ for strength in strengths:
392
+ st.markdown(f"""
393
+ <div class="success-card">
394
+ βœ… {strength}
395
+ </div>
396
+ """, unsafe_allow_html=True)
397
+ else:
398
+ st.info("No specific strengths identified")
399
+
400
+ with col2:
401
+ st.subheader("πŸ”§ Areas for Improvement")
402
+ weaknesses = analysis.get('weaknesses', [])
403
+ if weaknesses:
404
+ for weakness in weaknesses:
405
+ st.markdown(f"""
406
+ <div class="warning-card">
407
+ πŸ”Έ {weakness}
408
+ </div>
409
+ """, unsafe_allow_html=True)
410
+ else:
411
+ st.success("No major areas for improvement identified")
412
+
413
+ # Keyword Analysis
414
+ keyword_analysis = analysis.get('keyword_analysis', {})
415
+ if keyword_analysis:
416
+ st.subheader("πŸ” Keyword Analysis")
417
+
418
+ col1, col2 = st.columns(2)
419
+ with col1:
420
+ found_keywords = keyword_analysis.get('found_keywords', [])
421
+ if found_keywords:
422
+ st.write("**Keywords Found:**")
423
+ st.write(", ".join(found_keywords[:10]))
424
+
425
+ with col2:
426
+ missing_keywords = keyword_analysis.get('missing_keywords', [])
427
+ if missing_keywords:
428
+ st.write("**Missing Keywords:**")
429
+ st.write(", ".join(missing_keywords[:5]))
430
+
431
+ def generate_suggestions_markdown(suggestions, profile_data=None):
432
+ """Generate markdown content from suggestions"""
433
+ if not suggestions:
434
+ return "# LinkedIn Profile Enhancement Suggestions\n\nNo suggestions available."
435
+
436
+ # Get profile name for personalization
437
+ profile_name = profile_data.get('name', 'Your Profile') if profile_data else 'Your Profile'
438
+ current_date = datetime.now().strftime("%B %d, %Y")
439
+
440
+ markdown_content = f"""# LinkedIn Profile Enhancement Suggestions
441
+
442
+ **Profile:** {profile_name}
443
+ **Generated on:** {current_date}
444
+ **Powered by:** LinkedIn Profile Enhancer AI
445
+
446
+ ---
447
+
448
+ ## πŸ“‹ Table of Contents
449
  """
450
+
451
+ # Add table of contents
452
+ toc_items = []
453
+ for category in suggestions.keys():
454
+ if category == 'ai_generated_content':
455
+ toc_items.append("- [πŸ€– AI-Generated Content Suggestions](#ai-generated-content-suggestions)")
456
+ else:
457
+ category_name = category.replace('_', ' ').title()
458
+ toc_items.append(f"- [πŸ“‹ {category_name}](#{category.replace('_', '-').lower()})")
459
+
460
+ markdown_content += "\n".join(toc_items) + "\n\n---\n\n"
461
+
462
+ # Add suggestions content
463
+ for category, items in suggestions.items():
464
+ if category == 'ai_generated_content':
465
+ markdown_content += "## πŸ€– AI-Generated Content Suggestions\n\n"
466
+ ai_content = items if isinstance(items, dict) else {}
467
+
468
+ # Headlines
469
+ if 'ai_headlines' in ai_content and ai_content['ai_headlines']:
470
+ markdown_content += "### ✨ Professional Headlines\n\n"
471
+ for i, headline in enumerate(ai_content['ai_headlines'], 1):
472
+ cleaned_headline = headline.strip('"').replace('\\"', '"')
473
+ if cleaned_headline.startswith(('1.', '2.', '3.', '4.', '5.')):
474
+ cleaned_headline = cleaned_headline[2:].strip()
475
+ markdown_content += f"{i}. {cleaned_headline}\n"
476
+ markdown_content += "\n"
477
+
478
+ # About Section
479
+ if 'ai_about_section' in ai_content and ai_content['ai_about_section']:
480
+ markdown_content += "### πŸ“ Enhanced About Section\n\n"
481
+ markdown_content += f"```\n{ai_content['ai_about_section']}\n```\n\n"
482
+
483
+ # Experience Descriptions
484
+ if 'ai_experience_descriptions' in ai_content and ai_content['ai_experience_descriptions']:
485
+ markdown_content += "### πŸ’Ό Experience Description Ideas\n\n"
486
+ for desc in ai_content['ai_experience_descriptions']:
487
+ markdown_content += f"- {desc}\n"
488
+ markdown_content += "\n"
489
+ else:
490
+ # Standard categories
491
+ category_name = category.replace('_', ' ').title()
492
+ markdown_content += f"## πŸ“‹ {category_name}\n\n"
493
+ if isinstance(items, list):
494
+ for item in items:
495
+ markdown_content += f"- {item}\n"
496
+ else:
497
+ markdown_content += f"- {items}\n"
498
+ markdown_content += "\n"
499
+
500
+ # Add footer
501
+ markdown_content += """---
502
+
503
+ ## πŸ“š Implementation Tips
504
+
505
+ ### Getting Started
506
+ 1. **Prioritize High-Impact Changes**: Start with headline and about section improvements
507
+ 2. **Use Keywords Strategically**: Incorporate industry-relevant keywords naturally
508
+ 3. **Maintain Authenticity**: Ensure all changes reflect your genuine experience and personality
509
+ 4. **Regular Updates**: Keep your profile fresh with recent achievements and experiences
510
 
511
+ ### Best Practices
512
+ - **Professional Photo**: Use a high-quality, professional headshot
513
+ - **Active Engagement**: Regularly share industry insights and engage with your network
514
+ - **Skills Endorsements**: Ask colleagues to endorse your key skills
515
+ - **Recommendations**: Request recommendations from supervisors and colleagues
516
+ - **Content Strategy**: Share articles, insights, and achievements regularly
517
 
518
+ ### Measuring Success
519
+ - Monitor profile views and connection requests
520
+ - Track engagement on your posts and content
521
+ - Observe changes in recruiter outreach
522
+ - Measure network growth and quality
523
+
524
+ ---
525
+
526
+ *This report was generated by LinkedIn Profile Enhancer AI. For best results, implement changes gradually and monitor their impact on your profile performance.*
527
+
528
+ **Need Help?** Contact support or revisit the LinkedIn Profile Enhancer tool for updated suggestions.
529
  """
530
+
531
+ return markdown_content
532
+
533
+ def display_suggestions(suggestions):
534
+ """Display enhancement suggestions with download option"""
535
+ if not suggestions:
536
+ st.warning("No suggestions available")
537
+ return
538
+
539
+ # Add download button at the top
540
+ col1, col2 = st.columns([1, 4])
541
+
542
+ with col1:
543
+ # Generate markdown content
544
+ markdown_content = generate_suggestions_markdown(
545
+ suggestions,
546
+ st.session_state.get('profile_data')
547
+ )
548
+
549
+ # Create filename with timestamp
550
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
551
+ profile_name = ""
552
+ if st.session_state.get('profile_data'):
553
+ name = st.session_state.profile_data.get('name', '')
554
+ if name:
555
+ # Clean name for filename
556
+ profile_name = "".join(c for c in name if c.isalnum() or c in (' ', '_')).rstrip()
557
+ profile_name = profile_name.replace(' ', '_') + "_"
558
+
559
+ filename = f"linkedin_suggestions_{profile_name}{timestamp}.md"
560
+
561
+ st.download_button(
562
+ label="πŸ“₯ Download Suggestions",
563
+ data=markdown_content,
564
+ file_name=filename,
565
+ mime="text/markdown",
566
+ help="Download all suggestions as a markdown file",
567
+ use_container_width=True
568
+ )
569
+
570
+ with col2:
571
+ st.markdown("*πŸ’‘ Click the download button to save all suggestions as a markdown file for easy reference and implementation.*")
572
+
573
+ st.markdown("---")
574
+
575
+ # Display suggestions as before
576
+ for category, items in suggestions.items():
577
+ if category == 'ai_generated_content':
578
+ st.subheader("πŸ€– AI-Generated Content Suggestions")
579
+ ai_content = items if isinstance(items, dict) else {}
580
+
581
+ # Headlines
582
+ if 'ai_headlines' in ai_content and ai_content['ai_headlines']:
583
+ st.write("**✨ Professional Headlines:**")
584
+ for i, headline in enumerate(ai_content['ai_headlines'], 1):
585
+ cleaned_headline = headline.strip('"').replace('\\"', '"')
586
+ if cleaned_headline.startswith(('1.', '2.', '3.', '4.', '5.')):
587
+ cleaned_headline = cleaned_headline[2:].strip()
588
+ st.write(f"{i}. {cleaned_headline}")
589
+ st.write("")
590
+
591
+ # About Section
592
+ if 'ai_about_section' in ai_content and ai_content['ai_about_section']:
593
+ st.write("**πŸ“ Enhanced About Section:**")
594
+ st.code(ai_content['ai_about_section'], language='text')
595
+ st.write("")
596
+
597
+ # Experience Descriptions
598
+ if 'ai_experience_descriptions' in ai_content and ai_content['ai_experience_descriptions']:
599
+ st.write("**πŸ’Ό Experience Description Ideas:**")
600
+ for desc in ai_content['ai_experience_descriptions']:
601
+ st.write(f"β€’ {desc}")
602
+ st.write("")
603
+ else:
604
+ # Standard categories
605
+ category_name = category.replace('_', ' ').title()
606
+ st.subheader(f"πŸ“‹ {category_name}")
607
+ if isinstance(items, list):
608
+ for item in items:
609
+ st.write(f"β€’ {item}")
610
+ else:
611
+ st.write(f"β€’ {items}")
612
+ st.write("")
613
+
614
+ def main():
615
+ """Main Streamlit application"""
616
+ initialize_session_state()
617
+ create_header()
618
+
619
+ # Sidebar
620
+ linkedin_url, job_description = create_sidebar()
621
+
622
+ # Main content
623
+ if st.button("πŸš€ Enhance Profile", type="primary", use_container_width=True):
624
+ if not linkedin_url.strip():
625
+ st.error("Please enter a LinkedIn profile URL")
626
+ elif not any(pattern in linkedin_url.lower() for pattern in ['linkedin.com/in/', 'www.linkedin.com/in/']):
627
+ st.error("Please enter a valid LinkedIn profile URL")
628
+ else:
629
+ # Clear cached data if URL has changed
630
+ clear_results_if_url_changed(linkedin_url)
631
+
632
+ with st.spinner("πŸ” Analyzing LinkedIn profile..."):
633
+ try:
634
+ st.info(f"πŸ” Extracting data from: {linkedin_url}")
635
+
636
+ # Get profile data and analysis (force fresh extraction)
637
+ profile_data = st.session_state.orchestrator.scraper.extract_profile_data(linkedin_url)
638
+
639
+ st.info(f"βœ… Profile data extracted for: {profile_data.get('name', 'Unknown')}")
640
+
641
+ analysis = st.session_state.orchestrator.analyzer.analyze_profile(profile_data, job_description)
642
+ suggestions = st.session_state.orchestrator.content_generator.generate_suggestions(analysis, job_description)
643
+
644
+ # Store in session state
645
+ st.session_state.profile_data = profile_data
646
+ st.session_state.analysis_results = analysis
647
+ st.session_state.suggestions = suggestions
648
+
649
+ st.success("βœ… Profile analysis completed!")
650
+
651
+ except Exception as e:
652
+ st.error(f"❌ Error analyzing profile: {str(e)}")
653
+
654
+ # Display results if available
655
+ if st.session_state.profile_data or st.session_state.analysis_results:
656
+ st.markdown("---")
657
+
658
+ # Create tabs for different views
659
+ tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Analysis", "πŸ” Scraped Data", "🎯 Suggestions", "πŸ“ˆ Implementation"])
660
+
661
+ with tab1:
662
+ st.header("πŸ“Š Profile Analysis")
663
+ if st.session_state.analysis_results:
664
+ display_analysis_results(st.session_state.analysis_results)
665
+ else:
666
+ st.info("No analysis results available yet")
667
+
668
+ with tab2:
669
+ st.header("πŸ” Scraped Profile Data")
670
+ if st.session_state.profile_data:
671
+ display_profile_data(st.session_state.profile_data)
672
+ else:
673
+ st.info("No profile data available yet")
674
+
675
+ with tab3:
676
+ st.header("🎯 Enhancement Suggestions")
677
+ if st.session_state.suggestions:
678
+ display_suggestions(st.session_state.suggestions)
679
+ else:
680
+ st.info("No suggestions available yet")
681
+
682
+ with tab4:
683
+ st.header("πŸ“ˆ Implementation Roadmap")
684
+ if st.session_state.analysis_results:
685
+ recommendations = st.session_state.analysis_results.get('recommendations', [])
686
+ if recommendations:
687
+ st.subheader("🎯 Priority Actions")
688
+ for i, rec in enumerate(recommendations[:5], 1):
689
+ st.markdown(f"""
690
+ <div class="metric-card">
691
+ <strong>{i}.</strong> {rec}
692
+ </div>
693
+ """, unsafe_allow_html=True)
694
+
695
+ st.subheader("πŸ“Š General Best Practices")
696
+ best_practices = [
697
+ "Update your profile regularly with new achievements",
698
+ "Use professional keywords relevant to your industry",
699
+ "Engage with your network by sharing valuable content",
700
+ "Ask for recommendations from colleagues and clients",
701
+ "Monitor profile views and connection requests"
702
+ ]
703
+
704
+ for practice in best_practices:
705
+ st.markdown(f"""
706
+ <div class="info-card">
707
+ πŸ”Έ {practice}
708
+ </div>
709
+ """, unsafe_allow_html=True)
710
+ else:
711
+ st.info("Complete the analysis first to see implementation suggestions")
712
+
713
+ # Footer
714
+ st.markdown("---")
715
+ st.markdown("""
716
+ <div style="text-align: center; color: #666; margin-top: 2rem;">
717
+ <p>πŸš€ <strong>LinkedIn Profile Enhancer</strong> | Powered by AI | Data scraped with respect to LinkedIn's ToS</p>
718
+ <p>Built with ❀️ using Streamlit, OpenAI GPT-4o-mini, and Apify</p>
719
+ </div>
720
+ """, unsafe_allow_html=True)
721
 
722
+ if __name__ == "__main__":
723
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Utils package initialization
src/utils/job_matcher.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Job Matching Logic
2
+ from typing import Dict, Any, List, Tuple
3
+ import re
4
+ from collections import Counter
5
+
6
+ class JobMatcher:
7
+ """Utility class for matching LinkedIn profiles with job descriptions"""
8
+
9
+ def __init__(self):
10
+ self.weight_config = {
11
+ 'skills': 0.4,
12
+ 'experience': 0.3,
13
+ 'keywords': 0.2,
14
+ 'education': 0.1
15
+ }
16
+
17
+ self.skill_synonyms = {
18
+ 'javascript': ['js', 'ecmascript', 'node.js', 'nodejs'],
19
+ 'python': ['py', 'django', 'flask', 'fastapi'],
20
+ 'react': ['reactjs', 'react.js'],
21
+ 'angular': ['angularjs', 'angular.js'],
22
+ 'machine learning': ['ml', 'ai', 'artificial intelligence'],
23
+ 'database': ['db', 'sql', 'mysql', 'postgresql', 'mongodb']
24
+ }
25
+
26
+ def calculate_match_score(self, profile_data: Dict[str, Any], job_description: str) -> Dict[str, Any]:
27
+ """
28
+ Calculate comprehensive match score between profile and job
29
+
30
+ Args:
31
+ profile_data (Dict[str, Any]): Cleaned profile data
32
+ job_description (str): Job description text
33
+
34
+ Returns:
35
+ Dict[str, Any]: Match analysis with scores and details
36
+ """
37
+ job_requirements = self._parse_job_requirements(job_description)
38
+
39
+ # Calculate individual scores
40
+ skills_score = self._calculate_skills_match(
41
+ profile_data.get('skills', []),
42
+ job_requirements['skills']
43
+ )
44
+
45
+ experience_score = self._calculate_experience_match(
46
+ profile_data.get('experience', []),
47
+ job_requirements
48
+ )
49
+
50
+ keywords_score = self._calculate_keywords_match(
51
+ profile_data,
52
+ job_requirements['keywords']
53
+ )
54
+
55
+ education_score = self._calculate_education_match(
56
+ profile_data.get('education', []),
57
+ job_requirements
58
+ )
59
+
60
+ # Calculate weighted overall score
61
+ overall_score = (
62
+ skills_score['score'] * self.weight_config['skills'] +
63
+ experience_score['score'] * self.weight_config['experience'] +
64
+ keywords_score['score'] * self.weight_config['keywords'] +
65
+ education_score['score'] * self.weight_config['education']
66
+ )
67
+
68
+ return {
69
+ 'overall_score': round(overall_score, 2),
70
+ 'breakdown': {
71
+ 'skills': skills_score,
72
+ 'experience': experience_score,
73
+ 'keywords': keywords_score,
74
+ 'education': education_score
75
+ },
76
+ 'recommendations': self._generate_match_recommendations(
77
+ skills_score, experience_score, keywords_score, education_score
78
+ ),
79
+ 'job_requirements': job_requirements
80
+ }
81
+
82
+ def find_skill_gaps(self, profile_skills: List[str], job_requirements: List[str]) -> Dict[str, List[str]]:
83
+ """
84
+ Identify skill gaps between profile and job requirements
85
+
86
+ Args:
87
+ profile_skills (List[str]): Current profile skills
88
+ job_requirements (List[str]): Required job skills
89
+
90
+ Returns:
91
+ Dict[str, List[str]]: Missing and matching skills
92
+ """
93
+ profile_skills_lower = [skill.lower() for skill in profile_skills]
94
+ job_skills_lower = [skill.lower() for skill in job_requirements]
95
+
96
+ # Find exact matches
97
+ matching_skills = []
98
+ missing_skills = []
99
+
100
+ for job_skill in job_skills_lower:
101
+ if job_skill in profile_skills_lower:
102
+ matching_skills.append(job_skill)
103
+ else:
104
+ # Check for synonyms
105
+ found_synonym = False
106
+ for profile_skill in profile_skills_lower:
107
+ if self._are_skills_similar(profile_skill, job_skill):
108
+ matching_skills.append(job_skill)
109
+ found_synonym = True
110
+ break
111
+
112
+ if not found_synonym:
113
+ missing_skills.append(job_skill)
114
+
115
+ return {
116
+ 'matching_skills': matching_skills,
117
+ 'missing_skills': missing_skills,
118
+ 'match_percentage': len(matching_skills) / max(len(job_skills_lower), 1) * 100
119
+ }
120
+
121
+ def suggest_profile_improvements(self, match_analysis: Dict[str, Any]) -> List[str]:
122
+ """
123
+ Generate specific improvement suggestions based on match analysis
124
+
125
+ Args:
126
+ match_analysis (Dict[str, Any]): Match analysis results
127
+
128
+ Returns:
129
+ List[str]: Improvement suggestions
130
+ """
131
+ suggestions = []
132
+ breakdown = match_analysis['breakdown']
133
+
134
+ # Skills suggestions
135
+ if breakdown['skills']['score'] < 70:
136
+ missing_skills = breakdown['skills']['details']['missing_skills'][:3]
137
+ if missing_skills:
138
+ suggestions.append(
139
+ f"Add these high-priority skills: {', '.join(missing_skills)}"
140
+ )
141
+
142
+ # Experience suggestions
143
+ if breakdown['experience']['score'] < 60:
144
+ suggestions.append(
145
+ "Highlight more relevant experience in your current/previous roles"
146
+ )
147
+ suggestions.append(
148
+ "Add quantified achievements that demonstrate impact"
149
+ )
150
+
151
+ # Keywords suggestions
152
+ if breakdown['keywords']['score'] < 50:
153
+ suggestions.append(
154
+ "Incorporate more industry-specific keywords throughout your profile"
155
+ )
156
+
157
+ # Education suggestions
158
+ if breakdown['education']['score'] < 40:
159
+ suggestions.append(
160
+ "Consider adding relevant certifications or courses"
161
+ )
162
+
163
+ return suggestions
164
+
165
+ def _parse_job_requirements(self, job_description: str) -> Dict[str, Any]:
166
+ """Parse job description to extract requirements"""
167
+ requirements = {
168
+ 'skills': [],
169
+ 'keywords': [],
170
+ 'experience_years': 0,
171
+ 'education_level': '',
172
+ 'industry': '',
173
+ 'role_type': ''
174
+ }
175
+
176
+ # Extract skills (common technical skills)
177
+ skill_patterns = [
178
+ r'\b(python|javascript|java|react|angular|node\.?js|sql|aws|docker|kubernetes)\b',
179
+ r'\b(machine learning|ai|data science|devops|full.?stack)\b',
180
+ r'\b(project management|agile|scrum|leadership)\b'
181
+ ]
182
+
183
+ for pattern in skill_patterns:
184
+ matches = re.findall(pattern, job_description, re.IGNORECASE)
185
+ requirements['skills'].extend([match.lower() for match in matches])
186
+
187
+ # Extract experience years
188
+ exp_pattern = r'(\d+)\+?\s*years?\s*(?:of\s*)?experience'
189
+ exp_matches = re.findall(exp_pattern, job_description, re.IGNORECASE)
190
+ if exp_matches:
191
+ requirements['experience_years'] = int(exp_matches[0])
192
+
193
+ # Extract keywords (all meaningful words)
194
+ keywords = re.findall(r'\b[a-zA-Z]{3,}\b', job_description)
195
+ stop_words = {'the', 'and', 'for', 'with', 'you', 'will', 'are', 'have'}
196
+ requirements['keywords'] = [
197
+ word.lower() for word in keywords
198
+ if word.lower() not in stop_words
199
+ ]
200
+
201
+ # Remove duplicates
202
+ requirements['skills'] = list(set(requirements['skills']))
203
+ requirements['keywords'] = list(set(requirements['keywords']))
204
+
205
+ return requirements
206
+
207
+ def _calculate_skills_match(self, profile_skills: List[str], job_skills: List[str]) -> Dict[str, Any]:
208
+ """Calculate skills match score"""
209
+ if not job_skills:
210
+ return {'score': 100, 'details': {'matching_skills': [], 'missing_skills': []}}
211
+
212
+ skill_gap_analysis = self.find_skill_gaps(profile_skills, job_skills)
213
+
214
+ return {
215
+ 'score': skill_gap_analysis['match_percentage'],
216
+ 'details': skill_gap_analysis
217
+ }
218
+
219
+ def _calculate_experience_match(self, profile_experience: List[Dict], job_requirements: Dict) -> Dict[str, Any]:
220
+ """Calculate experience match score"""
221
+ score = 0
222
+ details = {
223
+ 'relevant_roles': 0,
224
+ 'total_experience': 0,
225
+ 'required_experience': job_requirements.get('experience_years', 0)
226
+ }
227
+
228
+ # Calculate total years of experience
229
+ total_years = 0
230
+ relevant_roles = 0
231
+
232
+ for exp in profile_experience:
233
+ duration_info = exp.get('duration_info', {})
234
+ if duration_info.get('duration_months'):
235
+ total_years += duration_info['duration_months'] / 12
236
+
237
+ # Check if role is relevant (simple keyword matching)
238
+ role_text = f"{exp.get('title', '')} {exp.get('description', '')}".lower()
239
+ job_keywords = job_requirements.get('keywords', [])
240
+
241
+ if any(keyword in role_text for keyword in job_keywords[:10]):
242
+ relevant_roles += 1
243
+
244
+ details['total_experience'] = round(total_years, 1)
245
+ details['relevant_roles'] = relevant_roles
246
+
247
+ # Calculate score based on experience and relevance
248
+ if job_requirements.get('experience_years', 0) > 0:
249
+ exp_ratio = min(total_years / job_requirements['experience_years'], 1.0)
250
+ score = exp_ratio * 70 + (relevant_roles / max(len(profile_experience), 1)) * 30
251
+ else:
252
+ score = 80 # Default good score if no specific experience required
253
+
254
+ return {
255
+ 'score': round(score, 2),
256
+ 'details': details
257
+ }
258
+
259
+ def _calculate_keywords_match(self, profile_data: Dict, job_keywords: List[str]) -> Dict[str, Any]:
260
+ """Calculate keywords match score"""
261
+ if not job_keywords:
262
+ return {'score': 100, 'details': {'matched': 0, 'total': 0}}
263
+
264
+ # Extract all text from profile
265
+ profile_text = ""
266
+ for key, value in profile_data.items():
267
+ if isinstance(value, str):
268
+ profile_text += f" {value}"
269
+ elif isinstance(value, list):
270
+ for item in value:
271
+ if isinstance(item, dict):
272
+ profile_text += f" {' '.join(str(v) for v in item.values())}"
273
+ else:
274
+ profile_text += f" {item}"
275
+
276
+ profile_text = profile_text.lower()
277
+
278
+ # Count keyword matches
279
+ matched_keywords = 0
280
+ for keyword in job_keywords:
281
+ if keyword.lower() in profile_text:
282
+ matched_keywords += 1
283
+
284
+ score = (matched_keywords / len(job_keywords)) * 100
285
+
286
+ return {
287
+ 'score': round(score, 2),
288
+ 'details': {
289
+ 'matched': matched_keywords,
290
+ 'total': len(job_keywords),
291
+ 'percentage': round(score, 2)
292
+ }
293
+ }
294
+
295
+ def _calculate_education_match(self, profile_education: List[Dict], job_requirements: Dict) -> Dict[str, Any]:
296
+ """Calculate education match score"""
297
+ score = 70 # Default score
298
+ details = {
299
+ 'has_degree': len(profile_education) > 0,
300
+ 'degree_count': len(profile_education)
301
+ }
302
+
303
+ if profile_education:
304
+ score = 85 # Boost for having education
305
+
306
+ # Check for relevant fields
307
+ job_keywords = job_requirements.get('keywords', [])
308
+ for edu in profile_education:
309
+ edu_text = f"{edu.get('degree', '')} {edu.get('field', '')}".lower()
310
+ if any(keyword in edu_text for keyword in job_keywords[:5]):
311
+ score = 95
312
+ break
313
+
314
+ return {
315
+ 'score': score,
316
+ 'details': details
317
+ }
318
+
319
+ def _are_skills_similar(self, skill1: str, skill2: str) -> bool:
320
+ """Check if two skills are similar using synonyms"""
321
+ skill1_lower = skill1.lower()
322
+ skill2_lower = skill2.lower()
323
+
324
+ # Check direct synonyms
325
+ for main_skill, synonyms in self.skill_synonyms.items():
326
+ if ((skill1_lower == main_skill or skill1_lower in synonyms) and
327
+ (skill2_lower == main_skill or skill2_lower in synonyms)):
328
+ return True
329
+
330
+ # Check partial matches
331
+ if skill1_lower in skill2_lower or skill2_lower in skill1_lower:
332
+ return True
333
+
334
+ return False
335
+
336
+ def _generate_match_recommendations(self, skills_score: Dict, experience_score: Dict,
337
+ keywords_score: Dict, education_score: Dict) -> List[str]:
338
+ """Generate recommendations based on individual scores"""
339
+ recommendations = []
340
+
341
+ if skills_score['score'] < 60:
342
+ recommendations.append("Focus on developing missing technical skills")
343
+
344
+ if experience_score['score'] < 50:
345
+ recommendations.append("Highlight more relevant work experience")
346
+
347
+ if keywords_score['score'] < 40:
348
+ recommendations.append("Optimize profile with job-specific keywords")
349
+
350
+ if education_score['score'] < 60:
351
+ recommendations.append("Consider additional certifications or training")
352
+
353
+ return recommendations
src/utils/linkedin_parser.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LinkedIn Data Parser
2
+ import re
3
+ from typing import Dict, Any, List, Optional
4
+ from datetime import datetime
5
+
6
+ class LinkedInParser:
7
+ """Utility class for parsing and cleaning LinkedIn profile data"""
8
+
9
+ def __init__(self):
10
+ self.skill_categories = {
11
+ 'technical': ['python', 'javascript', 'java', 'react', 'node.js', 'sql', 'aws', 'docker'],
12
+ 'management': ['leadership', 'project management', 'team management', 'agile', 'scrum'],
13
+ 'marketing': ['seo', 'social media', 'content marketing', 'digital marketing', 'analytics'],
14
+ 'design': ['ui/ux', 'photoshop', 'figma', 'adobe', 'design thinking']
15
+ }
16
+
17
+ def clean_profile_data(self, raw_data: Dict[str, Any]) -> Dict[str, Any]:
18
+ """
19
+ Clean and standardize raw profile data
20
+
21
+ Args:
22
+ raw_data (Dict[str, Any]): Raw scraped data
23
+
24
+ Returns:
25
+ Dict[str, Any]: Cleaned profile data
26
+ """
27
+ cleaned_data = {}
28
+
29
+ # Clean basic info
30
+ cleaned_data['name'] = self._clean_text(raw_data.get('name', ''))
31
+ cleaned_data['headline'] = self._clean_text(raw_data.get('headline', ''))
32
+ cleaned_data['location'] = self._clean_text(raw_data.get('location', ''))
33
+ cleaned_data['about'] = self._clean_text(raw_data.get('about', ''))
34
+
35
+ # Clean experience
36
+ cleaned_data['experience'] = self._clean_experience_list(
37
+ raw_data.get('experience', [])
38
+ )
39
+
40
+ # Clean education
41
+ cleaned_data['education'] = self._clean_education_list(
42
+ raw_data.get('education', [])
43
+ )
44
+
45
+ # Clean and categorize skills
46
+ cleaned_data['skills'] = self._clean_skills_list(
47
+ raw_data.get('skills', [])
48
+ )
49
+
50
+ # Parse additional info
51
+ cleaned_data['connections'] = self._parse_connections(
52
+ raw_data.get('connections', '')
53
+ )
54
+
55
+ cleaned_data['url'] = raw_data.get('url', '')
56
+ cleaned_data['parsed_at'] = datetime.now().isoformat()
57
+
58
+ return cleaned_data
59
+
60
+ def extract_keywords(self, text: str, min_length: int = 3) -> List[str]:
61
+ """
62
+ Extract meaningful keywords from text
63
+
64
+ Args:
65
+ text (str): Input text
66
+ min_length (int): Minimum keyword length
67
+
68
+ Returns:
69
+ List[str]: Extracted keywords
70
+ """
71
+ # Remove special characters and convert to lowercase
72
+ clean_text = re.sub(r'[^\w\s]', ' ', text.lower())
73
+
74
+ # Split into words and filter
75
+ words = clean_text.split()
76
+
77
+ # Common stop words to exclude
78
+ stop_words = {
79
+ 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
80
+ 'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before',
81
+ 'after', 'above', 'below', 'between', 'among', 'within', 'without',
82
+ 'under', 'over', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
83
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
84
+ 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these',
85
+ 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him',
86
+ 'her', 'us', 'them', 'my', 'your', 'his', 'her', 'its', 'our', 'their'
87
+ }
88
+
89
+ # Filter keywords
90
+ keywords = [
91
+ word for word in words
92
+ if len(word) >= min_length and word not in stop_words
93
+ ]
94
+
95
+ # Remove duplicates while preserving order
96
+ unique_keywords = []
97
+ seen = set()
98
+ for keyword in keywords:
99
+ if keyword not in seen:
100
+ unique_keywords.append(keyword)
101
+ seen.add(keyword)
102
+
103
+ return unique_keywords
104
+
105
+ def parse_duration(self, duration_str: str) -> Dict[str, Any]:
106
+ """
107
+ Parse duration strings like "2020 - Present" or "Jan 2020 - Dec 2022"
108
+
109
+ Args:
110
+ duration_str (str): Duration string
111
+
112
+ Returns:
113
+ Dict[str, Any]: Parsed duration info
114
+ """
115
+ duration_info = {
116
+ 'raw': duration_str,
117
+ 'start_date': None,
118
+ 'end_date': None,
119
+ 'is_current': False,
120
+ 'duration_months': 0
121
+ }
122
+
123
+ if not duration_str:
124
+ return duration_info
125
+
126
+ # Check if current position
127
+ if 'present' in duration_str.lower():
128
+ duration_info['is_current'] = True
129
+
130
+ # Extract years using regex
131
+ year_pattern = r'\b(19|20)\d{2}\b'
132
+ years = re.findall(year_pattern, duration_str)
133
+
134
+ if years:
135
+ duration_info['start_date'] = years[0] if len(years) > 0 else None
136
+ duration_info['end_date'] = years[1] if len(years) > 1 else None
137
+
138
+ return duration_info
139
+
140
+ def categorize_skills(self, skills: List[str]) -> Dict[str, List[str]]:
141
+ """
142
+ Categorize skills into different types
143
+
144
+ Args:
145
+ skills (List[str]): List of skills
146
+
147
+ Returns:
148
+ Dict[str, List[str]]: Categorized skills
149
+ """
150
+ categorized = {
151
+ 'technical': [],
152
+ 'management': [],
153
+ 'marketing': [],
154
+ 'design': [],
155
+ 'other': []
156
+ }
157
+
158
+ for skill in skills:
159
+ skill_lower = skill.lower()
160
+ categorized_flag = False
161
+
162
+ for category, keywords in self.skill_categories.items():
163
+ if any(keyword in skill_lower for keyword in keywords):
164
+ categorized[category].append(skill)
165
+ categorized_flag = True
166
+ break
167
+
168
+ if not categorized_flag:
169
+ categorized['other'].append(skill)
170
+
171
+ return categorized
172
+
173
+ def extract_achievements(self, text: str) -> List[str]:
174
+ """
175
+ Extract achievements with numbers/metrics from text
176
+
177
+ Args:
178
+ text (str): Input text
179
+
180
+ Returns:
181
+ List[str]: List of achievements
182
+ """
183
+ achievements = []
184
+
185
+ # Patterns for achievements with numbers
186
+ patterns = [
187
+ r'[^.]*\b\d+%[^.]*', # Percentage achievements
188
+ r'[^.]*\b\d+[kK]\+?[^.]*', # Numbers with K (thousands)
189
+ r'[^.]*\b\d+[mM]\+?[^.]*', # Numbers with M (millions)
190
+ r'[^.]*\$\d+[^.]*', # Money amounts
191
+ r'[^.]*\b\d+\s*(years?|months?)[^.]*', # Time periods
192
+ ]
193
+
194
+ for pattern in patterns:
195
+ matches = re.findall(pattern, text, re.IGNORECASE)
196
+ achievements.extend([match.strip() for match in matches])
197
+
198
+ return achievements
199
+
200
+ def _clean_text(self, text: str) -> str:
201
+ """Clean and normalize text"""
202
+ if not text:
203
+ return ""
204
+
205
+ # Remove extra whitespace
206
+ text = re.sub(r'\s+', ' ', text).strip()
207
+
208
+ # Remove special characters but keep basic punctuation
209
+ text = re.sub(r'[^\w\s\-.,!?()&/]', '', text)
210
+
211
+ return text
212
+
213
+ def _clean_experience_list(self, experience_list: List[Dict]) -> List[Dict]:
214
+ """Clean experience entries"""
215
+ cleaned_experience = []
216
+
217
+ for exp in experience_list:
218
+ if isinstance(exp, dict):
219
+ cleaned_exp = {
220
+ 'title': self._clean_text(exp.get('title', '')),
221
+ 'company': self._clean_text(exp.get('company', '')),
222
+ 'duration': self._clean_text(exp.get('duration', '')),
223
+ 'description': self._clean_text(exp.get('description', '')),
224
+ 'location': self._clean_text(exp.get('location', '')),
225
+ }
226
+
227
+ # Parse duration
228
+ cleaned_exp['duration_info'] = self.parse_duration(cleaned_exp['duration'])
229
+
230
+ # Extract achievements
231
+ cleaned_exp['achievements'] = self.extract_achievements(
232
+ cleaned_exp['description']
233
+ )
234
+
235
+ cleaned_experience.append(cleaned_exp)
236
+
237
+ return cleaned_experience
238
+
239
+ def _clean_education_list(self, education_list: List[Dict]) -> List[Dict]:
240
+ """Clean education entries"""
241
+ cleaned_education = []
242
+
243
+ for edu in education_list:
244
+ if isinstance(edu, dict):
245
+ cleaned_edu = {
246
+ 'degree': self._clean_text(edu.get('degree', '')),
247
+ 'school': self._clean_text(edu.get('school', '')),
248
+ 'year': self._clean_text(edu.get('year', '')),
249
+ 'field': self._clean_text(edu.get('field', '')),
250
+ }
251
+ cleaned_education.append(cleaned_edu)
252
+
253
+ return cleaned_education
254
+
255
+ def _clean_skills_list(self, skills_list: List[str]) -> List[str]:
256
+ """Clean and deduplicate skills"""
257
+ if not skills_list:
258
+ return []
259
+
260
+ cleaned_skills = []
261
+ seen_skills = set()
262
+
263
+ for skill in skills_list:
264
+ cleaned_skill = self._clean_text(str(skill))
265
+ skill_lower = cleaned_skill.lower()
266
+
267
+ if cleaned_skill and skill_lower not in seen_skills:
268
+ cleaned_skills.append(cleaned_skill)
269
+ seen_skills.add(skill_lower)
270
+
271
+ return cleaned_skills
272
+
273
+ def _parse_connections(self, connections_str: str) -> int:
274
+ """Parse connection count from string"""
275
+ if not connections_str:
276
+ return 0
277
+
278
+ # Extract numbers from connection string
279
+ numbers = re.findall(r'\d+', connections_str)
280
+
281
+ if numbers:
282
+ return int(numbers[0])
283
+
284
+ # Handle "500+" format
285
+ if '500+' in connections_str:
286
+ return 500
287
+
288
+ return 0