Spaces:

HallD
/

DigitalDan

Running

App Files Files Community

HallD commited on Oct 18

Commit

ad7b82e

verified ·

1 Parent(s): 4aafe22

Upload 31 files

Browse files

Initial upload of project

Files changed (31) hide show

.gitattributes +40 -0
.gitignore +58 -0
.python-version +1 -0
README.md +231 -0
app.py +788 -0
assets/Logo WO Background.png +3 -0
assets/dan.png +3 -0
assets/logo.png +3 -0
data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/data_level0.bin +3 -0
data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/header.bin +3 -0
data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/length.bin +3 -0
data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/link_lists.bin +3 -0
data/chroma/chroma.sqlite3 +3 -0
me/Daniel Halwell Full CV.pdf +3 -0
me/Profile.pdf +3 -0
me/summary.txt +911 -0
pyproject.toml +57 -0
requirements.txt +9 -0
tests/test_query.py +8 -0
utils/__pycache__/app_logging.cpython-311.pyc +0 -0
utils/__pycache__/chat.cpython-311.pyc +0 -0
utils/__pycache__/logging.cpython-311.pyc +0 -0
utils/__pycache__/text_processing.cpython-311.pyc +0 -0
utils/__pycache__/vector_db.cpython-311.pyc +0 -0
utils/app_logging.py +35 -0
utils/chat.py +424 -0
utils/create_vector_db.py +19 -0
utils/text_processing.py +116 -0
utils/tool_calls.py +57 -0
utils/vector_db.py +194 -0
uv.lock +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,40 @@

+*.png filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.sqlite3 filter=lfs diff=lfs merge=lfs -text
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,58 @@

+.env
+__pycache__/
+.pytest_cache/
+.DS_Store
+chat_log.txt
+utils/digital-cv.log
+data/chroma/
+*.sqlite3
+*.bin
+*.log
+*.lock
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv/
+.env/
+venv/
+env/
+# Environment variables and secrets
+.env
+.env.local
+.env.*.local
+# IDE and editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# Operating system files
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+logs/
+# Temporary files
+tmp/
+temp/
+.tmp/
+# Coverage and testing
+.coverage
+.pytest_cache/
+htmlcov/
+# Gradio temporary files
+gradio_cached_examples/
+flagged/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

README.md ADDED Viewed

	@@ -0,0 +1,231 @@

+title: DigitalDan
+emoji: 📊
+colorFrom: purple
+colorTo: blue
+sdk: gradio
+sdk_version: 5.49.1
+app_file: app.py
+pinned: false
+license: mit
+short_description: Digital twin of me Daniel Halwell
+# Digital CV - Interactive Personal Assistant
+An AI-powered digital CV that allows visitors to chat with Daniel Halwell through an intelligent conversational interface. Built with Gradio and powered by OpenAI's GPT models, this application provides an interactive way to learn about Daniel's professional background, experience, and capabilities.
+## 🌟 Features
+- **Interactive Chat Interface**: Natural language conversations about Daniel's experience, skills, and projects
+- **Intelligent Context Awareness**: Draws from comprehensive professional summary and LinkedIn profile data
+- **Contact Recording**: Ability to capture visitor contact information with proper consent
+- **Professional Presentation**: Clean, responsive UI with custom branding
+- **Question Tracking**: Logs unknown questions to continuously improve the knowledge base
+## 🔧 Technology Stack
+- **Frontend**: Gradio (Python-based web UI framework)
+- **AI/LLM**: OpenAI GPT models with function calling
+- **Document Processing**: PyPDF for resume parsing
+- **Notifications**: Pushover integration for contact alerts
+- **Deployment**: Supports containerized deployment
+- **Python Version**: 3.11+
+## 📁 Project Structure
+```
+digital-cv/
+├── app.py                 # Main Gradio application
+├── me/
+│   ├── Profile.pdf       # LinkedIn profile export
+│   └── summary.txt       # Comprehensive professional summary
+├── utils/
+│   ├── chat.py          # Core chat functionality and AI integration
+│   ├── tool_calls.py    # Function calling tools (contact recording, etc.)
+│   └── logging.py       # Application logging setup
+├── assets/
+│   ├── logo.png         # Application logo
+│   ├── dan.png          # Avatar image
+│   └── Logo WO Background.png
+├── pyproject.toml       # Project dependencies and metadata
+├── .env                 # Environment variables (create this file locally; not included in repo)
+└── README.md           # This file
+## 🚀 Quick Start
+### Prerequisites
+- Python 3.11 or higher
+- OpenAI API key
+- (Optional) Pushover account for notifications
+### Installation
+1. **Clone the repository**
+   ```bash
+   git clone https://github.com/CodeHalwell/digital-cv.git
+   cd digital-cv
+   ```
+2. **Install dependencies**
+   ```bash
+   pip install -e .
+   ```
+   Or install key dependencies directly:
+   ```bash
+   pip install gradio openai python-dotenv pypdf requests
+   ```
+3. **Set up environment variables**
+   Create a `.env` file in the root directory:
+   ```env
+   OPENAI_API_KEY=your_openai_api_key_here
+   PUSHOVER_TOKEN=your_pushover_token (optional)
+   PUSHOVER_USER=your_pushover_user_key (optional)
+   PORT=7860
+   ```
+4. **Run the application**
+   ```bash
+   python app.py
+   ```
+5. **Access the interface**
+   Open your browser and navigate to `http://localhost:7860`
+## 🎯 Usage
+### For Visitors
+- Start a conversation by typing questions about Daniel's experience, skills, or projects
+- Example prompts:
+  - "Tell me about your last role"
+  - "How do you design a RAG pipeline?"
+  - "What projects have you worked on?"
+  - "Can you scope a small automation?"
+- Share your contact information if you'd like to connect directly
+- Use the "Stop" button to interrupt streaming responses
+### For Developers
+- The chat interface automatically draws context from `me/summary.txt` and `me/Profile.pdf`
+- Function calling enables contact recording and question tracking
+- All conversations are logged for analytics and improvement
+## 🔧 Configuration
+### Environment Variables
+| Variable | Description | Required |
+|----------|-------------|----------|
+| `OPENAI_API_KEY` | OpenAI API key for GPT models | Yes |
+| `PUSHOVER_TOKEN` | Pushover application token | No |
+| `PUSHOVER_USER` | Pushover user key | No |
+| `PORT` | Server port (default: 7860) | No |
+### Customization
+- **Personal Content**: Update `me/summary.txt` with your professional background
+- **Profile**: Replace `me/Profile.pdf` with your LinkedIn export
+- **Branding**: Update images in the `assets/` directory
+- **Styling**: Modify the `custom_css` variable in `app.py`
+## 🛡️ Features Deep Dive
+### AI Chat System
+The chat system uses OpenAI's GPT models with:
+- **System prompts** that establish Daniel's professional persona
+- **Function calling** for structured interactions (contact recording, question logging)
+- **Content guardrails** to ensure appropriate conversations
+- **Context injection** from professional documents
+### Contact Management
+When visitors share contact information:
+- Details are validated and recorded via Pushover notifications
+- Privacy-conscious approach - only records when explicitly shared
+- Structured data capture (email, name, context notes)
+### Question Tracking
+Unknown or unanswerable questions are:
+- Automatically detected and logged
+- Sent via Pushover for manual review
+- Used to continuously improve the knowledge base
+## 📊 Monitoring & Analytics
+- Application logs provide detailed interaction tracking
+- Pushover notifications alert to new contacts and unknown questions
+- Chat logs can be analyzed for common themes and improvements
+## 🚀 Deployment
+### Local Development
+```bash
+python app.py
+```
+### Production Deployment
+The application is designed for containerized deployment:
+```dockerfile
+# Example Dockerfile approach
+FROM python:3.11-slim
+WORKDIR /app
+COPY . .
+RUN pip install -e .
+CMD ["python", "app.py"]
+```
+### Deployment Considerations
+- Set `debug=False` in production
+- Use environment variables for all secrets
+- Configure appropriate server limits for Gradio
+- Consider using a reverse proxy (nginx) for production traffic
+## 🤝 Contributing
+1. Fork the repository
+2. Create a feature branch (`git checkout -b feature/amazing-feature`)
+3. Make your changes and test thoroughly
+4. Update documentation as needed
+5. Commit your changes (`git commit -m 'Add amazing feature'`)
+6. Push to the branch (`git push origin feature/amazing-feature`)
+7. Open a Pull Request
+### Development Guidelines
+- Follow existing code style and patterns
+- Test changes with different conversation flows
+- Update `me/summary.txt` if adding new professional information
+- Ensure all dependencies are properly documented
+## 📄 License
+This project is personal intellectual property of Daniel Halwell. Contact for usage permissions.
+## 📞 Contact
+- **Email**: [email protected] (personal) | [email protected] (business)
+- **GitHub**: [@CodeHalwell](https://github.com/CodeHalwell)
+- **Portfolio**: [codehalwell.io](https://codehalwell.io)
+- **LinkedIn**: [linkedin.com/in/danielhalwell](https://linkedin.com/in/danielhalwell)
+- **Location**: Northwich, UK
+## 🎨 About the Project
+This digital CV represents a modern approach to professional networking and self-presentation. Rather than static resumes, it offers an interactive experience that showcases both technical capabilities and communication skills. The project demonstrates expertise in:
+- AI/LLM integration and prompt engineering
+- Modern Python web development with Gradio
+- User experience design for professional applications
+- Privacy-conscious data handling
+- Scalable application architecture
+---
+*Made with ❤️ — CoDHe Labs*

app.py ADDED Viewed

	@@ -0,0 +1,788 @@

+def run_with_watch():
+    from watchfiles import run_process
+    logger.info("Starting watch mode on 'app' directory")
+    def _run():
+        logger.info("Reloading app.py")
+        main()
+    run_process(
+        path=".",
+        target=_run,
+        watch_filter=lambda change, path: path.endswith(".py"),
+    )
+from dotenv import load_dotenv
+import os
+import gradio as gr
+from utils.app_logging import setup_logging
+from utils.chat import Me
+load_dotenv(override=True)
+logger = setup_logging()
+logger.info("Starting digital-cv")
+me = Me()
+logger.info("Me initialized")
+# Theming and chat styling for embedding
+theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")
+initial_assistant_message = (
+    "Hello, nice to meet you! At any time, feel free to give me your name and email; "
+    "I'll make a note and I can get back to you later."
+)
+chatbot = gr.Chatbot(
+    label=None,
+    avatar_images=("assets/logo.png", "assets/dan.png"),
+    render_markdown=True,
+    type="messages",
+    value=[{"role": "assistant", "content": initial_assistant_message}],
+    elem_id="chatbot",
+)
+logger.info("Chatbot initialized")
+custom_css = """
+html, body, .gradio-container { height: 100%; }
+body {
+    margin: 0;
+    font-family: "Inter", "SF Pro Display", "Segoe UI", system-ui, -apple-system, sans-serif;
+    background: linear-gradient(135deg, #0f172a 0%, #1e293b 25%, #334155 50%, #1e293b 75%, #0f172a 100%);
+    background-attachment: fixed;
+    color: #f8fafc;
+    font-feature-settings: "kern" 1, "liga" 1, "ss01" 1;
+    text-rendering: optimizeLegibility;
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
+    font-weight: 400;
+    line-height: 1.6;
+}
+.gradio-container {
+    display: flex;
+    background: transparent;
+    padding: 28px 0 36px;
+}
+#container {
+    max-width: 1280px;
+    margin: 0 auto;
+    padding: 32px 40px 40px;
+    display: flex;
+    flex-direction: column;
+    flex: 1 1 auto;
+    min-height: 0;
+    border-radius: 32px;
+    background: rgba(15, 23, 42, 0.95);
+    box-shadow:
+        0 64px 128px rgba(0, 0, 0, 0.4),
+        0 32px 64px rgba(0, 0, 0, 0.2),
+        0 0 0 1px rgba(148, 163, 184, 0.1),
+        inset 0 1px 0 rgba(255, 255, 255, 0.05);
+    backdrop-filter: blur(24px);
+    border: 1px solid rgba(148, 163, 184, 0.15);
+    position: relative;
+    overflow: hidden;
+}
+#container::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 2px;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(59, 130, 246, 0.6),
+        rgba(147, 51, 234, 0.6),
+        transparent
+    );
+    z-index: 1;
+}
+#container::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: radial-gradient(circle at 50% 0%, rgba(59, 130, 246, 0.05) 0%, transparent 50%);
+    pointer-events: none;
+    z-index: 0;
+}
+#header {
+    align-items: center;
+    gap: 32px;
+    justify-content: flex-start;
+    margin-bottom: 16px;
+    position: relative;
+    z-index: 2;
+}
+#logo img {
+    max-height: 240px;
+    width: auto;
+    border-radius: 24px;
+    object-fit: contain;
+    box-shadow:
+        0 32px 64px rgba(0, 0, 0, 0.3),
+        0 16px 32px rgba(0, 0, 0, 0.2),
+        0 0 0 1px rgba(148, 163, 184, 0.1),
+        inset 0 1px 0 rgba(255, 255, 255, 0.1);
+    transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+    filter: brightness(1.05) contrast(1.1);
+}
+#logo img:hover {
+    transform: translateY(-4px) scale(1.02);
+    box-shadow:
+        0 48px 96px rgba(0, 0, 0, 0.4),
+        0 24px 48px rgba(0, 0, 0, 0.3),
+        0 0 0 1px rgba(59, 130, 246, 0.3),
+        inset 0 1px 0 rgba(255, 255, 255, 0.15);
+    filter: brightness(1.1) contrast(1.15);
+}
+#intro-card {
+    border-radius: 24px;
+    padding: 32px 36px;
+    background: linear-gradient(135deg,
+        rgba(30, 41, 59, 0.8) 0%,
+        rgba(51, 65, 85, 0.6) 50%,
+        rgba(30, 41, 59, 0.8) 100%
+    );
+    border: 1px solid rgba(148, 163, 184, 0.2);
+    box-shadow:
+        inset 0 1px 0 rgba(255, 255, 255, 0.1),
+        0 16px 32px rgba(0, 0, 0, 0.2),
+        0 8px 16px rgba(0, 0, 0, 0.1);
+    position: relative;
+    overflow: hidden;
+    backdrop-filter: blur(16px);
+}
+#intro-card::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 2px;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(59, 130, 246, 0.5),
+        rgba(147, 51, 234, 0.5),
+        transparent
+    );
+}
+#intro-card::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: radial-gradient(circle at 50% 0%, rgba(59, 130, 246, 0.03) 0%, transparent 70%);
+    pointer-events: none;
+}
+#intro-card ul {
+    margin: 0.35rem 0 0.7rem;
+    padding-left: 1.1rem;
+}
+#intro-card li { margin-bottom: 0.3rem; }
+#title {
+    text-align: center;
+    margin: 24px 0 32px;
+    letter-spacing: 0.08em;
+    text-transform: uppercase;
+    font-weight: 800;
+    color: #f1f5f9;
+    font-size: 1.75rem;
+    text-shadow:
+        0 4px 12px rgba(0, 0, 0, 0.4),
+        0 2px 6px rgba(0, 0, 0, 0.2);
+    position: relative;
+    z-index: 2;
+    background: linear-gradient(135deg, #f1f5f9 0%, #cbd5e1 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+}
+#title::after {
+    content: '';
+    position: absolute;
+    bottom: -12px;
+    left: 50%;
+    transform: translateX(-50%);
+    width: 80px;
+    height: 3px;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(59, 130, 246, 0.8),
+        rgba(147, 51, 234, 0.8),
+        transparent
+    );
+    border-radius: 2px;
+    box-shadow: 0 2px 8px rgba(59, 130, 246, 0.3);
+}
+#chat-wrapper {
+    display: flex;
+    flex-direction: column;
+    gap: 16px;
+    flex: 1 1 auto;
+    min-height: 0;
+}
+#chatbot {
+    display: flex;
+    flex-direction: column;
+    min-height: 680px;
+    height: clamp(680px, calc(100dvh - 200px), 1200px);
+    border-radius: 28px;
+    border: 1px solid rgba(148, 163, 184, 0.2);
+    background: linear-gradient(135deg,
+        rgba(15, 23, 42, 0.95) 0%,
+        rgba(30, 41, 59, 0.9) 50%,
+        rgba(15, 23, 42, 0.95) 100%
+    );
+    box-shadow:
+        inset 0 1px 0 rgba(255, 255, 255, 0.1),
+        0 48px 96px rgba(0, 0, 0, 0.3),
+        0 24px 48px rgba(0, 0, 0, 0.2),
+        0 0 0 1px rgba(148, 163, 184, 0.1);
+    position: relative;
+    overflow: hidden;
+    backdrop-filter: blur(20px);
+    z-index: 2;
+}
+#chatbot::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 2px;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(59, 130, 246, 0.6),
+        rgba(147, 51, 234, 0.6),
+        transparent
+    );
+    z-index: 1;
+}
+#chatbot::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: radial-gradient(circle at 50% 0%, rgba(59, 130, 246, 0.02) 0%, transparent 70%);
+    pointer-events: none;
+    z-index: 0;
+}
+#chatbot .wrapper,
+#chatbot .bubble-wrap,
+#chatbot .message-wrap {
+    flex: 1 1 auto;
+    display: flex;
+    min-height: 0;
+}
+#chatbot .bubble-wrap {
+    flex-direction: column;
+    overflow-y: auto;
+    padding: 12px 16px 20px;
+    gap: 16px;
+}
+#chatbot label span {
+    color: rgba(221, 230, 255, 0.85);
+    font-weight: 600;
+    letter-spacing: 0.03em;
+}
+#chatbot .message-wrap .message {
+    background: rgba(30, 41, 59, 0.9);
+    border-radius: 24px;
+    border: 1px solid rgba(148, 163, 184, 0.2);
+    box-shadow:
+        0 24px 48px rgba(0, 0, 0, 0.2),
+        0 12px 24px rgba(0, 0, 0, 0.1),
+        inset 0 1px 0 rgba(255, 255, 255, 0.05);
+    backdrop-filter: blur(12px);
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+    position: relative;
+    overflow: hidden;
+    padding: 20px 24px;
+    margin: 8px 0;
+    line-height: 1.6;
+    word-wrap: break-word;
+    overflow-wrap: break-word;
+    hyphens: auto;
+}
+#chatbot .message-wrap .message::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 1px;
+    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.1), transparent);
+}
+#chatbot .message-wrap .message:hover {
+    transform: translateY(-2px) scale(1.01);
+    box-shadow:
+        0 32px 64px rgba(0, 0, 0, 0.3),
+        0 16px 32px rgba(0, 0, 0, 0.2),
+        inset 0 1px 0 rgba(255, 255, 255, 0.1);
+}
+#chatbot .message-wrap .bot .message {
+    background: linear-gradient(135deg,
+        rgba(30, 41, 59, 0.95) 0%,
+        rgba(51, 65, 85, 0.8) 100%
+    );
+    border-color: rgba(59, 130, 246, 0.3);
+    margin-right: 60px;
+    margin-left: 8px;
+}
+#chatbot .message-wrap .user .message {
+    background: linear-gradient(135deg,
+        rgba(30, 41, 59, 0.95) 0%,
+        rgba(51, 65, 85, 0.8) 100%
+    );
+    border-color: rgba(147, 51, 234, 0.3);
+    margin-left: 60px;
+    margin-right: 8px;
+}
+.suggestion-banner {
+    font-weight: 700;
+    letter-spacing: 0.06em;
+    text-transform: uppercase;
+    font-size: 0.95rem;
+    color: #cbd5e1;
+    margin-bottom: 16px;
+    text-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
+    position: relative;
+    z-index: 2;
+}
+.suggestion-buttons {
+    display: flex;
+    gap: 16px;
+    flex-wrap: wrap;
+    justify-content: space-between;
+    margin-bottom: 12px;
+    position: relative;
+    z-index: 2;
+}
+.suggestion-buttons button {
+    flex: 1 1 0;
+    min-width: 0;
+    padding: 16px 20px;
+    border-radius: 16px;
+    border: 1px solid rgba(148, 163, 184, 0.3);
+    background: linear-gradient(135deg,
+        rgba(30, 41, 59, 0.9) 0%,
+        rgba(51, 65, 85, 0.7) 100%
+    );
+    color: #f1f5f9;
+    font-weight: 600;
+    font-size: 0.95rem;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+    cursor: pointer;
+    position: relative;
+    overflow: hidden;
+    backdrop-filter: blur(12px);
+    box-shadow:
+        0 8px 16px rgba(0, 0, 0, 0.1),
+        inset 0 1px 0 rgba(255, 255, 255, 0.1);
+}
+.suggestion-buttons button::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: -100%;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(59, 130, 246, 0.1),
+        transparent
+    );
+    transition: left 0.6s ease;
+}
+.suggestion-buttons button::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 1px;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(255, 255, 255, 0.2),
+        transparent
+    );
+}
+.suggestion-buttons button:hover {
+    transform: translateY(-4px) scale(1.02);
+    box-shadow:
+        0 32px 64px rgba(0, 0, 0, 0.2),
+        0 16px 32px rgba(0, 0, 0, 0.1),
+        inset 0 1px 0 rgba(255, 255, 255, 0.15);
+    border-color: rgba(59, 130, 246, 0.5);
+    background: linear-gradient(135deg,
+        rgba(30, 41, 59, 0.95) 0%,
+        rgba(51, 65, 85, 0.8) 100%
+    );
+}
+.suggestion-buttons button:hover::before {
+    left: 100%;
+}
+.suggestion-buttons button:active {
+    transform: translateY(-2px) scale(1.01);
+}
+.gradio-container textarea {
+    border-radius: 20px !important;
+    min-height: 120px !important;
+    background: rgba(30, 41, 59, 0.95) !important;
+    border: 1px solid rgba(148, 163, 184, 0.3) !important;
+    color: #f1f5f9 !important;
+    box-shadow:
+        inset 0 1px 0 rgba(255, 255, 255, 0.1),
+        0 16px 32px rgba(0, 0, 0, 0.2),
+        0 8px 16px rgba(0, 0, 0, 0.1) !important;
+    font-size: 1rem !important;
+    font-weight: 500 !important;
+    line-height: 1.6 !important;
+    padding: 20px 24px !important;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
+    backdrop-filter: blur(16px) !important;
+    position: relative !important;
+    overflow: hidden !important;
+}
+.gradio-container textarea::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 1px;
+    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.1), transparent);
+}
+.gradio-container textarea:focus {
+    outline: none !important;
+    border-color: rgba(59, 130, 246, 0.6) !important;
+    box-shadow:
+        0 0 0 4px rgba(59, 130, 246, 0.2),
+        0 24px 48px rgba(0, 0, 0, 0.3),
+        0 12px 24px rgba(0, 0, 0, 0.2),
+        inset 0 1px 0 rgba(255, 255, 255, 0.15) !important;
+    background: rgba(30, 41, 59, 0.98) !important;
+    transform: translateY(-1px) !important;
+}
+.gradio-container textarea::placeholder {
+    color: rgba(203, 213, 225, 0.7) !important;
+    font-weight: 500 !important;
+    font-style: italic !important;
+}
+#footer {
+    text-align: center;
+    opacity: 0.8;
+    font-size: 0.9rem;
+    margin-top: 32px;
+    letter-spacing: 0.06em;
+    color: rgba(203, 213, 225, 0.8);
+    font-weight: 500;
+    text-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
+    position: relative;
+    z-index: 2;
+}
+/* Professional loading states and animations */
+@keyframes pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.6; }
+}
+@keyframes shimmer {
+    0% { transform: translateX(-100%); }
+    100% { transform: translateX(100%); }
+}
+@keyframes fadeInUp {
+    from {
+        opacity: 0;
+        transform: translateY(20px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+@keyframes scaleIn {
+    from {
+        opacity: 0;
+        transform: scale(0.95);
+    }
+    to {
+        opacity: 1;
+        transform: scale(1);
+    }
+}
+.loading-message {
+    animation: pulse 2s ease-in-out infinite;
+}
+.loading-shimmer {
+    position: relative;
+    overflow: hidden;
+}
+.loading-shimmer::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg,
+        transparent,
+        rgba(59, 130, 246, 0.1),
+        transparent
+    );
+    animation: shimmer 2.5s infinite;
+}
+/* Professional button styles */
+.gradio-container button {
+    border-radius: 16px !important;
+    font-weight: 600 !important;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
+    position: relative !important;
+    overflow: hidden !important;
+    backdrop-filter: blur(12px) !important;
+    box-shadow:
+        0 8px 16px rgba(0, 0, 0, 0.1),
+        inset 0 1px 0 rgba(255, 255, 255, 0.1) !important;
+}
+.gradio-container button:hover {
+    transform: translateY(-2px) scale(1.02) !important;
+    box-shadow:
+        0 16px 32px rgba(0, 0, 0, 0.2),
+        0 8px 16px rgba(0, 0, 0, 0.1),
+        inset 0 1px 0 rgba(255, 255, 255, 0.15) !important;
+}
+.gradio-container button:active {
+    transform: translateY(-1px) scale(1.01) !important;
+}
+/* Professional scrollbar styling */
+::-webkit-scrollbar {
+    width: 10px;
+}
+::-webkit-scrollbar-track {
+    background: rgba(30, 41, 59, 0.3);
+    border-radius: 6px;
+    border: 1px solid rgba(148, 163, 184, 0.1);
+}
+::-webkit-scrollbar-thumb {
+    background: linear-gradient(135deg,
+        rgba(59, 130, 246, 0.6) 0%,
+        rgba(147, 51, 234, 0.6) 100%
+    );
+    border-radius: 6px;
+    border: 1px solid rgba(148, 163, 184, 0.2);
+    box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1);
+}
+::-webkit-scrollbar-thumb:hover {
+    background: linear-gradient(135deg,
+        rgba(59, 130, 246, 0.8) 0%,
+        rgba(147, 51, 234, 0.8) 100%
+    );
+    box-shadow:
+        0 4px 8px rgba(0, 0, 0, 0.2),
+        inset 0 1px 0 rgba(255, 255, 255, 0.15);
+}
+/* Professional responsive design */
+@media (max-width: 1024px) {
+    #container {
+        padding: 24px 32px;
+        border-radius: 28px;
+        max-width: 100%;
+    }
+    #header {
+        gap: 24px;
+    }
+    #logo img {
+        max-height: 200px;
+    }
+    #chatbot {
+        height: clamp(620px, calc(100dvh - 180px), 1000px);
+        border-radius: 24px;
+    }
+    #chatbot .message-wrap .bot .message {
+        margin-right: 40px;
+    }
+    #chatbot .message-wrap .user .message {
+        margin-left: 40px;
+    }
+}
+@media (max-width: 900px) {
+    #container {
+        padding: 20px 24px;
+        border-radius: 24px;
+    }
+    #header {
+        flex-direction: column;
+        text-align: center;
+        gap: 24px;
+    }
+    #logo img {
+        max-height: 180px;
+    }
+    #chatbot {
+        height: clamp(580px, calc(100dvh - 160px), 900px);
+        border-radius: 20px;
+    }
+    #chatbot .message-wrap .bot .message {
+        margin-right: 20px;
+        padding: 16px 20px;
+    }
+    #chatbot .message-wrap .user .message {
+        margin-left: 20px;
+        padding: 16px 20px;
+    }
+    .suggestion-buttons {
+        flex-direction: column;
+        gap: 12px;
+    }
+    .suggestion-buttons button {
+        min-width: 100%;
+        padding: 16px 20px;
+    }
+    #title {
+        font-size: 1.4rem;
+        margin: 20px 0 24px;
+    }
+    #intro-card {
+        padding: 24px 28px;
+        border-radius: 20px;
+    }
+}
+@media (max-width: 640px) {
+    .gradio-container {
+        padding: 16px 0 24px;
+    }
+    #container {
+        border-radius: 20px;
+        padding: 16px 20px;
+    }
+    #chatbot {
+        height: clamp(520px, calc(100dvh - 140px), 800px);
+        border-radius: 18px;
+    }
+    #chatbot .message-wrap .bot .message {
+        margin-right: 12px;
+        margin-left: 4px;
+        padding: 14px 18px;
+        border-radius: 20px;
+    }
+    #chatbot .message-wrap .user .message {
+        margin-left: 12px;
+        margin-right: 4px;
+        padding: 14px 18px;
+        border-radius: 20px;
+    }
+    #logo img {
+        max-height: 160px;
+    }
+    #intro-card {
+        padding: 20px 24px;
+        border-radius: 16px;
+    }
+    .gradio-container textarea {
+        min-height: 100px !important;
+        padding: 16px 20px !important;
+        font-size: 0.95rem !important;
+        border-radius: 16px !important;
+    }
+    #title {
+        font-size: 1.2rem;
+        margin: 16px 0 20px;
+    }
+    .suggestion-buttons button {
+        padding: 14px 18px;
+        border-radius: 14px;
+    }
+}
+"""
+logger.info("Custom CSS initialized")
+with gr.Blocks(theme=theme, css=custom_css) as demo:
+    with gr.Column(elem_id="container"):
+        with gr.Row(elem_id="header"):
+            with gr.Column(scale=2, min_width=140):
+                gr.Image(
+                    value="assets/Logo WO Background.png",
+                    height=190,
+                    show_label=False,
+                    elem_id="logo",
+                )
+            with gr.Column(scale=10):
+                with gr.Group(elem_id="intro-card"):
+                    gr.Markdown(
+                        """
+                        **Welcome — Chat with Daniel**
+                        - **What to ask**: projects, AI/RAG/agents, data pipelines, or career.
+                        - **Privacy**: if you share an email, I’ll only save it when you ask.
+                        - **Tip**: streaming is live; use Stop to interrupt and send a follow‑up.
+                        Example prompts: “Tell me about your last role”, “How do you design a RAG pipeline?”, “Can you scope a small automation?”
+                        """,
+                    )
+        gr.Markdown("## Chat with Daniel", elem_id="title")
+        with gr.Column(elem_id="chat-wrapper"):
+            chat_input = gr.Textbox(
+                placeholder="Type your message here…",
+                autofocus=True,
+                max_lines=5,
+                show_copy_button=True,
+                container=False,
+                scale=1
+            )
+            chat_iface = gr.ChatInterface(
+                me.chat,
+                type="messages",
+                chatbot=chatbot,
+                title="",
+                description="Ask about projects, AI workflows, or get in touch.",
+                submit_btn="Send",
+                stop_btn="Stop",
+                textbox=chat_input,
+            )
+            gr.Markdown("**Need inspiration?** Try asking:", elem_classes="suggestion-banner")
+            with gr.Row(elem_classes="suggestion-buttons"):
+                examples = [
+                    "Tell me about your last role and what you do day to day",
+                    "How would you design a small RAG pipeline for docs?",
+                    "What Python libraries are you familiar with?",
+                ]
+                for example in examples:
+                    gr.Button(
+                        example,
+                        variant="secondary",
+                        size="sm"
+                    ).click(
+                        lambda text=example: gr.update(value=text),
+                        outputs=chat_input,
+                    )
+        gr.Markdown("Made with ❤️ — CoDHe Labs", elem_id="footer")
+logger.info("Blocks app initialized")
+def main():
+    logger.info("Launching demo")
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=int(os.getenv("PORT", 7860)),
+        favicon_path="assets/logo.png",
+        debug=True,
+        show_error=True,
+    )
+if __name__ == "__main__":
+    if os.getenv("WATCH_MODE") == "1":
+        run_with_watch()
+    else:
+        main()
+    logger.info("Demo launched")

assets/Logo WO Background.png ADDED Viewed

Git LFS Details

SHA256: d6f3d6a54ea8210e6d669c1c7acd9236025749540b2387a76eab447b1867cac0
Pointer size: 132 Bytes
Size of remote file: 1.72 MB

assets/dan.png ADDED Viewed

Git LFS Details

SHA256: 367b6d63fba504c787293e4acbac7edd056b864b6a0e774ddb3cbaaece49a565
Pointer size: 131 Bytes
Size of remote file: 628 kB

assets/logo.png ADDED Viewed

Git LFS Details

SHA256: fd4cf842201d6182f070d4d73d6c39945079dd010469eccab0fdd0a22492f758
Pointer size: 132 Bytes
Size of remote file: 1.3 MB

data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19913cb747d4f20ecdb323b45c8e9cc1f007a5d1783888656851a8b1e949c67c
+size 1242800

data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:871503f03f4549153dc2cf0f77e863e57b9a594b4224b02dda23a9018da3f346
+size 100

data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a12e561363385e9dfeeab326368731c030ed4b374e7f5897ac819159d2884c5
+size 400

data/chroma/71809a45-be76-40b0-999a-c4ac152f6a9b/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

data/chroma/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:789aa36f96343e8bffdc967b155da66a1d59df09b55e2a0658dc75f0e6018f42
+size 1576960

me/Daniel Halwell Full CV.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b977e3d5f7dcabe33a54e3114917b18156020ec70891686660d64043003350d
+size 175573

me/Profile.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c66d2e1c9fd4763f5e50bd5df0f7dc0b81e0045a48f2909c0a35ea12163ccce5
+size 60938

me/summary.txt ADDED Viewed

	@@ -0,0 +1,911 @@

+Daniel Halwell — Full Life Story (First‑Person) — Digital CV Narrative (LLM‑Ready)
+Last updated: 22 September 2025
+[Metadata]
+Exec summary: Opening context introducing you as a scientist transitioning into AI engineering, highlighting passion for coding and ethical, results-driven principles.
+Keywords: introduction, AI engineer journey, ethics, automation, passion for coding, momentum
+Questions:
+1. Who are you and what is your current career focus?
+2. How long have you been coding and why did you start?
+3. What types of work make you happiest day-to-day?
+[/Metadata]
+Hi, I’m Daniel. I’m a scientist‑turned‑AI engineer who likes building apps, automating processes and AI systems. I've been coding for the last
+6 years now, having been suggested to try it by a colleague. Now its my favourite hobby and become my main career objective to become an AI engineer.
+I’m happiest when I’m writing Python, wiring up data, and shipping small, useful tools that unblock people. I care about
+ethics, clarity, and momentum — make the right thing the easy thing, and prove it with results.
+[Metadata]
+Exec summary: Expands on your broader interests in AI, data science, and mathematics, anchoring your transition from analytical chemistry to tech.
+Keywords: data science, automation, problem solving, kaggle, mathematics, analytical chemistry transition
+Questions:
+1. What other technical domains beyond AI interest you?
+2. How do you engage with problem solving outside of work?
+3. In what ways does your background in analytical chemistry support your move into data science?
+[/Metadata]
+Its not just AI systems I like, I like data science and general automation too. I really like to solve a problem so having a look at coding competitions
+on kaggle or just trying to come up with solutions is really enjoyable. I like (trying) to learn about all the underpinning mathematics as it facinates me.
+As an analytical chemist, working with lots of data has been part of my day job for some time so making the leap to data science then AI felt pretty natural.
+—
+Core identity & working values
+—
+[Metadata]
+Exec summary: Bullet-point overview of your principles, emphasising human-centred tech, ethics, iterative building, communication style, and evidence-based mindset.
+Keywords: working values, ethics, human-first, builder mindset, mentoring, communication style, evidence-driven
+Questions:
+1. What foundational values steer your approach to technology and product delivery?
+2. How do you prefer to translate ideas into execution?
+[/Metadata]
+• Human‑first technologist. Tools are only useful if they help people make better decisions faster.
+• Ethics matter. I avoid work tied to fossil fuels, weapons, surveillance, or anything that harms people.
+• Builder’s mindset. Prefer to get an idea down in a flow diagram, like visual representations. Once I'm clear on the vision I like to start small and iterate quickly
+• Teaching & clarity. Notebooks, diagrams, docstrings, and handovers. Mentoring is part of the job.
+• Plain English, not the most over the top person, pretty laid back, just want to get stuff done and enjoy life, pretty sarcastic, love some dark humout
+• Evidence over adjectives. Numbers, before-and-after, and real bottlenecks solved.
+--
+[Metadata]
+Exec summary: Personal origin note grounding your background in Devon and acknowledging relocation for career opportunities.
+Keywords: origin story, Devon, relocation, personal background, career move motivation
+Questions:
+1. Where are you originally from
+[/Metadata]
+I'm originally from the south west of England in the lovely county of Devon. Great place to grow up even if it is a bit out of the way.
+Shame to move away but you gotta go where the jobs are. I lived here up until the age of 18, when I moved to Guyana South America for a year and
+then on to University. My first work experiences came here and it was nice to grow up in a quiet place where there was a lot of
+people who knew each other, close to the beach in summer and has great pubs and bars.
+---
+—
+Early graft (pre‑uni): where my work ethic came from
+—
+[Metadata]
+Exec summary: Details your early jobs across hospitality, retail, and manufacturing, highlighting the foundation of your work ethic, QA mindset, and preference for night shifts.
+Keywords: early career, bar cleaning, retail experience, factory work, quality assurance, work ethic, night owl
+Questions:
+1. What types of jobs did you hold before university?
+2. How did your roles shape your understanding of quality assurance?
+3. How did these early experiences influence later method development skills?
+[/Metadata]
+I started working around 13, cleaning a bar on Saturday mornings — bottles, floors, stocking the bar etc. By 16 I was at
+Summerfields, mainly stacking shelves but I did do some checkout work. Tried
+a few different shift patterns there and learnt that I'm a bit of a night owl (nights, evenings, days)
+across produce and dairy: receiving deliveries, stacking shelves,
+I also did a stint in a small component factory, making parts by hand from SOPs —
+counting coil turns, trimming, testing. It wasnt the most exciting job but was a good earner.
+This was my first foray into QA really, where checking work was a priortiy so things worked,
+this process and repeatability fed straight into my later method development work.
+I also worked in an Indian restaurant where I worked behind the bar most of the time taking orders over the phone,
+making drinks and occasionaly serving drinks to the table and clearing up tables. I always loved indian cuisine and
+working meant I ate quite a lot of curry, it was amazing.
+I also worked in a nightclub on weekends, which was a pretty late one to be honest, used to start around 10pm and then work through until
+about 3am. I did quite a few jobs here, I worked in coat check, kitchen (making burgers and lattice fries mainly) and also
+worked on the bar, pretty hectic.
+—
+Gap year in Guyana: teaching and learning to adapt
+—
+[Metadata]
+Exec summary: Chronicles your gap-year teaching experience in Guyana, emphasizing adaptability, instructional skills, and exposure to challenging environments.
+Keywords: Guyana, Project Trust, teaching, adaptability, resilience, user-centered design inspiration
+Questions:
+1. What program enabled you to teach in Guyana and what subjects did you cover?
+2. How did you handle unexpected challenges during your placement?
+3. Which teaching lessons do you carry into your user-facing design work?
+[/Metadata]
+Before university, I spent a year teaching in Guyana through Project Trust. I trained on the Isle of Coll, then flew out
+with a cohort and split off into schools. When my volunteer room mate had to return home due to illness, I moved schools and
+started again with some new room mates.
+I taught maths, science, and PE to students roughly 11–16. The big lessons:
+• Teaching is hard, you have to be prepared, things suprise you and you have to be quick on your feet
+• Learning isn't the same for everybody, you have to adapt to the idividual
+• Be clear and concise in your delivery, you gotta be fine-tuned
+Those ideas still shape how I design anything user‑facing — dashboards, APIs, or agentic assistants.
+This time wasn't without its challenges, my room mate getting ill was a big deal, when you're the only person around
+to help in a medical emergency, its quite a challenge. Thankfully things turned out well on that occasion but there were
+many challenges living in a country so different from your own. You get some key perspective on your own situation when
+viewing the type of povery that some people will never see in a lifetime.
+—
+Loughborough & Mars Petcare: chemistry + sensors + software
+—
+[Metadata]
+Exec summary: Narrates your MChem journey, industrial placement at Mars Petcare, development of analytical methods, and growing interest in statistics and food science.
+Keywords: Loughborough MChem, Mars Petcare, LC-MS, GC-MS, method development, maillard reaction, statistics, sensory science
+Questions:
+1. Why did you choose Loughborough and pursue an industrial placement at Mars?
+2. Which analytical instruments and methods did you master during the placement?
+3. How did your work with the Maillard reaction influence your interests?
+4. What statistical techniques did you apply in flavour development projects?
+5. What publication resulted from your work in this period?
+[/Metadata]
+I’d already accepted a place for MChem at Loughborough. I wanted industry experience in the degree, so I took an
+industrial placement at Mars Petcare in Verden, Germany. I trained on LC‑MS, GC‑MS, GC‑FID; moved from sample prep
+to method development; migrated a tricky amino‑acid analysis from LC to GC with derivatisation; added additional amino
+acids; and demonstrated linearity and accuracy. First taste of method development and optimisation — and I loved it.
+Living in Germany was a great experience and definitely one of the best places I've ever lived.
+I worked on flavour development of cat food, running feeding trials with recipes that I put together. This is where I started
+to get more invloved and interested in statistics. I set up design of experiments trials to determine the optimum concentration
+of food additives to increase food uptake by the cats, they are quite picky after all. This involved making pilot scale batches on plant,
+running analysis and interpreting the data. All in all it was an amazing experience.
+The main focus of my project there was the maillard reaction. The Maillard reaction is a non-enzymatic browning reaction between
+amino acids and reducing sugars that generates the complex flavours and brown colours associated with roasted, baked, and fried foods.
+It proceeds through a cascade of steps (Schiff base → Amadori/Heyns → fragmentation and Strecker degradation → melanoidins) and is
+accelerated by heat, dryness, and alkaline conditions. It made me really interested in food and how small changes to cooking can make
+big differences in flavour profiles.
+Back in the UK, I returned to Mars for a summer project near Loughborough on umami perception. I set up macros and
+a software workflow so sensory panelists could record peak perception while we swabbed to quantify concentrations, and
+we correlated the curves. That work was presented at a flavour symposium. It was instrumentation + sensory science +
+just‑enough software — a pattern I’ve repeated since in other domains. This turned into my first publication
+Relationship between Human Taste Perception and the Persistence of Umami Compounds in the Mouth - Flavour Science
+Proceedings from XIII Weurman Flavour Research Symposium
+2014, Pages 487-491
+Side note: the animal care standards and the environment were excellent. It mattered to me that the work respected
+the animals — that balance between scientific rigour and humanity set a tone for my career.
+—
+A practical reset: labouring on a building site
+—
+[Metadata]
+Exec summary: Highlights your post-graduation labouring work, underscoring appreciation for tangible progress and parallels to iterative software development.
+Keywords: labouring, construction, tangible progress, iteration, motivation, work ethic
+Questions:
+1. What work did you do immediately after graduating?
+2. How did labouring influence your appreciation for visible progress?
+3. In what way do you connect physical labour to your software development mindset?
+4. Why does iterative build-test cycles resonate with you?
+[/Metadata]
+After graduating, I worked as a labourer in Devon while job‑hunting — hauling materials through houses to back gardens,
+mixing cement for brickwork and infill, clearing waste. Tangible progress at the end of each day is addictive. I still chase
+that in my day to day, which really pointed me towards a career in programming, I just didnt know it yet: Its great to see progress when you finish at the end of the day, those small iterative cycles of build, debug
+, test repeat keeps me addicted to writing code.
+—
+Sanofi → Recipharm (2012–2021): analytical specialist in a regulated world
+—
+[Metadata]
+Exec summary: Summarises nearly a decade of analytical chemistry work at Sanofi and Recipharm, covering E&L leadership, method transfers, investigations, and cross-functional support in regulated environments.
+Keywords: Sanofi, Recipharm, analytical specialist, extractables and leachables, method validation, cGxP, investigations, manufacturing support, statistics
+Questions:
+1. What were your primary responsibilities at Sanofi and Recipharm?
+2. How did you lead extractables and leachables studies?
+3. What statistical methods did you apply during method transfers and validations?
+4. How did you contribute to troubleshooting and manufacturing support?
+5. How did this period strengthen your commitment to data integrity and Python/ML?
+[/Metadata]
+I spent nearly a decade across Sanofi and Recipharm, moving from routine QC to Analytical Specialist. My centre of gravity:
+non‑routine analysis, method transfers, validations, and inspection support (MHRA, FDA, etc.).
+[Metadata]
+Exec summary: Enumerates your day-to-day responsibilities at Sanofi and Recipharm, covering E&L leadership, method transfers, investigations, and manufacturing support.
+Keywords: responsibilities, extractables and leachables, method transfer, validation, investigations, manufacturing support. This is also when I started coding in Python.
+Questions:
+1. What specific analytical tasks did you handle in this role?
+2. How did you contribute to extractables and leachables programmes?
+3. In what ways did you support method transfers and validations?
+4. How did you engage in investigations and CAPA activities?
+5. What types of cross-functional manufacturing collaboration did you perform?
+[/Metadata]
+What I did:
+• Extractables & leachables (E&L). Subject‑matter lead for E&L studies, scoping and interpreting chromatographic &
+  spectroscopic data for materials such as plastics and elastomers. I worked with suppliers to perform testing
+  on out behalf, draw up protocols and reports, kept up to date on the latest advancements
+• Method transfers & validation. Equivalence testing, t‑tests, TOST, precision/accuracy studies, technical reports,
+  and document control in a cGxP environment. This is another stage in my career where statistics is pushing me
+  towards a career in data science and AI. I didnt quite know it yet but I loved maths more than I thought I did.
+  I was one of the technical experts when we transferred around 60 methods to Germany following potential rule
+  changes after Brexit. This made me a key contact for troubleshooting, acceptance criteria setting, result interpretation,
+  I travelled to Germany to train staff, a bit of everything.
+• Investigations & CAPA. Practical Problem Solving (PPS), root‑cause analysis across engineering, manufacturing,
+  and quality.
+• Manufacturing support. Collaborated with scientists, engineers, and microbiologists on urgent issues — from chemical
+  impurities to microbial contamination — often building or adapting analytical methods on the fly. I'd be testing effluent
+  one day and have my head in a metered dose inhaler formulation vessel the next.
+• I worked in routine QC environment for quite a few years, doing analysis of nasal product, metered dose inhalers and also
+the packaging and raw materials that went into them.
+• During this time I gained expertise in HPLC, GC, Karl Fisher and became a super user in GC specifically.
+[Metadata]
+Exec summary: Highlights the business impact and personal growth outcomes from your Sanofi/Recipharm tenure, including cost savings, data integrity ethos, and development of statistical expertise.
+Keywords: impact, cost savings, data integrity, statistics, practical problem solving, career inflection
+Questions:
+1. What quantified business result did you deliver during this period?
+2. How did the work reinforce your commitment to data integrity?
+3. In what way did statistics influence your transition toward Python and ML?
+4. What experience did you gain with the PPS tool and complex investigations?
+[/Metadata]
+Why it mattered:
+• We resolved a critical impurity issue that delivered real cost savings (and a lot of learning).
+• I developed deep respect for data integrity and traceability: if it isn’t documented, it didn’t happen.
+• Statistics became second‑nature and nudged me towards Python and, later, machine learning and AI.
+• I gained invaluble experience in practical problem solving (PPS). I worked on an extensive investigation using the
+PPS tool to solve extremely complex issues, including multivariate root causes, making it difficult to find the true root cause.
+—
+AstraZeneca (May 2021 – Present, Macclesfield): chemistry meets code
+—
+[Metadata]
+Exec summary: Captures your hybrid analytical science and AI engineering role at AstraZeneca, focusing on nitrosamine investigations, automation, and major achievements across RAG assistants, Bayesian optimisation, agentic workflows, and platform reliability.
+Keywords: AstraZeneca, analytical chemistry, nitrosamine, automation, RAG assistant, Bayesian optimisation, agentic workflows, data pipelines, mentorship
+Questions:
+1. What core responsibilities define your work at AstraZeneca?
+2. How do you apply automation and AI to analytical challenges such as nitrosamine detection?
+3. What impact did the RAG laboratory assistant deliver, and how was it developed?
+4. How did Bayesian optimisation change method development practices?
+5. What agentic workflow innovations have you introduced?
+6. Which tooling and platforms do you regularly use in this role?
+7. How do you support community and mentoring within AstraZeneca?
+[/Metadata]
+This is where everything clicked. I stayed rooted in analytical science — including trace/nitrosamine risk investigations
+where timelines are tight — but I worked increasingly like a data scientist / engineer.
+One of my key tasks is method devlopment of extremely low concentrations. Nitrosamine have to be monitored at such low concentrations
+it requires specific methods and equipment, we're talking levels of around a billionth of a gram. The one thing we can always do
+is automate better, which is what I love to do. Whether its processing ticket requests or extracting instrument usage from logs,
+this is where my programming knowledge really started to make an impact.
+[Metadata]
+Exec summary: Bullet list of standout initiatives at AstraZeneca showing your impact across GenAI, optimisation, automation, and data tooling.
+Keywords: key achievements, RAG assistant, Bayesian optimisation, agentic workflows, data pipelines, dashboards, platform correctness, chromatographic prediction, mentoring
+Questions:
+1. What major projects illustrate your contributions at AstraZeneca?
+2. How did you leverage GenAI and optimisation to improve lab processes?
+3. Which data engineering and dashboard efforts reduced friction for colleagues?
+4. How did you ensure platform correctness and predictive modelling capability?
+5. In what ways do you support mentoring and community building at work?
+[/Metadata]
+Key achievements and strands of work:
+• RAG‑based laboratory assistant (GenAI). I led the build of a retrieval‑augmented assistant with a multi‑disciplinary
+  team (SMEs, AI engineers, front/back‑end). We took it from PoC through risk assessments, evaluation vs expected
+  outputs, and UAT. It reduced troubleshooting lead times by ~20% and made internal knowledge more discoverable.
+• Bayesian optimisation for method development. We matched a historical method‑development context and reached
+  the same optimum with ~50% fewer experiments by applying Bayesian optimisation. That moved from a promising study
+  to an adopted practice in real projects. This was a great team of individuals with expert knowledge of automation
+  , Python, Bayesian Optimisation (using BayBE) and Gas Chromatography and HRMS. I also devloped a RAG chatbot for
+  writing PAL script code for managing CTC rails.
+• Agentic workflows. I’m actively developing agentic patterns (tool‑use, MCP) to cut manual
+  coordination and reduce method‑development effort. In targeted scopes, we’ve seen up to ~80% reductions in the
+  human loops required to get to “good enough to ship” (the point is fewer trips round the houses, not magic).
+• Data pipelines & APIs. I engineered pipelines in SQL (Snowflake) and Python; launched FastAPI services so downstream
+  tools could call data cleanly; and used those services as foundations for GenAI tools via tool‑use/MCP.
+• Dashboards that people actually use. I built Power BI and Streamlit tooling that gives a clean view of support tickets,
+  instrument utilisation, and a self‑serve data portals.
+• Worked with large scale databases to retrieve and clean data. Worked with external partners to improve the data pipeline
+• Developed and deployed streamlit web apps for various purposes
+• Chromatographic prediction. From fingerprints + XGBoost baselines to neural approaches and, later, attention‑based
+  graph models. I pre‑trained on a large open dataset (~70k injections)
+• Mentoring & community. I contribute to the internal Coding Network, support colleagues learning Python, and sit on the
+  programming expert panel. I like turning tacit know‑how into repeatable templates.
+[Metadata]
+Exec summary: Enumerates the primary tools, languages, and platforms you relies on within AstraZeneca projects.
+Keywords: tooling stack, Python, FastAPI, SQL, Power BI, Streamlit, ML frameworks, cloud platforms
+Questions:
+1. Which languages and frameworks underpin your daily work at AstraZeneca?
+2. What visualisation and dashboard tools do you deploy?
+3. Which machine learning libraries support your modelling efforts?
+4. What GenAI providers and cloud platforms do you integrate with?
+[/Metadata]
+Tools I use a lot here:
+Python, FastAPI, SQL/Snowflake, Power BI, Streamlit/Plotly/Matplotlib, scikit‑learn, XGBoost, PyTorch, PyTorch Geometric,
+OpenAI/Anthropic/OpenRouter/Vertex APIs, Docker, GitHub Copilot / Claude Code / Gemini Code Assist, and cloud basics
+across Azure/AWS/GCP.
+—
+CoDHe Labs (Jul 2025 – Present, part‑time): ethical AI that ships
+—
+[Metadata]
+Exec summary: Outlines your part-time independent practice focusing on ethical AI, RAG copilots, agentic automation, and pro bono work for charities, including current projects.
+Keywords: CoDHe Labs, independent work, generative AI copilots, dashboards, agentic workflows, pro bono, charity support, automation tools
+Questions:
+1. What services does CoDHe Labs provide and what principles guide it?
+2. Which current initiatives demonstrate your applied skills outside AstraZeneca?
+3. How do you balance commercial and pro bono engagements?
+4. What technologies and collaborations are involved in the charity project mentioned?
+5. What future project do you hint at in this section?
+[/Metadata]
+Alongside my full‑time role, I formalised my independent work as CoDHe Labs — a small practice focused on:
+• Generative AI “copilots” (RAG) that make internal knowledge instantly useful.
+• ML‑powered insights dashboards wired to a warehouse.
+• Agentic workflow automation that coordinates multi‑step processes via tool‑use.
+• Digital uplift for small teams and non‑profits (including light M365/Azure support).
+This includes setting up invoicing automation, data entry and storage, user management
+I also run an “AI for Charities” pro bono strand because capability should compound beyond big budgets.
+I'm currently working on a project to help a charity with their IT infrastructure and automations using Excel VBA,
+Power Automate and Python. I'm also liasing with external partners to implement additional tools.
+I'm also working on an agentic VS Code extension but more of that at a later date as thats still in development.
+[Metadata]
+Exec summary: Outlines your scoping methodology for client engagements, focusing on bottleneck identification, rapid prototyping, transparency, and documentation.
+Keywords: scoping process, bottleneck analysis, prototyping, documentation, transparency, vendor lock avoidance
+Questions:
+1. How do you prioritise bottlenecks when starting new work?
+2. What approach do you take to rapid prototyping and iteration?
+3. How do you handle intellectual property, licensing, and vendor lock concerns?
+4. What project management practices (SoWs, documentation) do you emphasise?
+[/Metadata]
+How I scope work:
+• Start with the bottleneck: retrieval? experiment count? brittle handoffs? We pick one.
+• Ship a small, working prototype fast; measure; iterate; document; hand over.
+• Keep IP, license usage; be transparent; avoid vendor lock where we can.
+• Strong SoWs; clean docs; and honest conversations about risk, safety, and fit.
+—
+Achievements I’m proud of (because they changed behaviour)
+—
+[Metadata]
+Exec summary: Highlights selected achievements demonstrating your competitive performance, hackathon recognition, adoption-driving innovations, and user-focused RAG impact.
+Keywords: achievements, Kaggle competition, Modal Labs award, Bayesian optimisation adoption, RAG assistant impact, behaviour change
+Questions:
+1. Which competition results do you cite as evidence of capability?
+2. What recognition did you receive for agentic MCP work and why?
+3. How did the Bayesian optimisation project influence team practices?
+4. Why do you value the RAG assistant’s impact on colleagues?
+5. How do these achievements reflect your focus on behaviour change?
+[/Metadata]
+• 4th place in a Kaggle binary‑classification competition (Mar 2025) — a nice reminder that fundamentals matter. In this
+challenge, we were tasked with predicting rainfall. I enjoyed working on this one and I learnt a few things as I progressed with submitting
+my predictions. I started with the usual EDA and feature engineering before testing a few models. I tend to default to models like
+XGBoost because it works really well out of the box. I usually like to do a random forest as a baseline for most tasks though, then I
+can iterate and start to build a picture of what works best. My go to toolbox is sklearn for models, pandas for data manipulation and seaborn
+for visualisation. Sklearn is great for pre-processing data as well as hyperparameter tuning, running a quick random search cv and grid search cv
+is usually my sequence. When performing a task like this, I alays like to keep in mind what data I have for predictions, especially whether the dataset
+is well balanced, on this occasion, it was quite unbalanced. When this happens, I like to employ SMOTE, generating synthetic data to help balance the scales.
+This dramatically improved performance. The final step was something new to me, I used a VotingClassifier to train multiple methods and
+train a surrogate model picker to evaluate the classifier. Initially, I was really inpressed with my cross validation, but the test set
+on Kaggle didnt look like anything special. However, I decided to stick with the good cross val score and it really payed off. I jumped up
+hundreds of places in the final leaderboard. I got my free t-shirt, one of my prized possesions, a really proud moment when I started to realise
+I'm more than ok at this, I can do this as a job. With no classical training, no full time data job, I was surpassing trained data scientists
+and Kaggle grandmasters, just imagine what I could do if I did it full time.
+• Modal Labs Choice Award ($5,000) for an agentic Model Context Protocol (MCP) server during a Gradio + Hugging Face
+  hackathon (Jul 2025). The joy wasn’t the prize — it was proving a lean, useful pattern quickly with real constraints.
+  This was one of the best achievements of my adult career. Prior to this, I hadnt written a single MCP, hadnt even really used MCP that much but I felt
+  I wanted the challenge. I worked incredibly hard on this, working very late into the night (I couldnt sleep anyway lol) and planned out my
+  application. At first, I wanted to make agentic deep research but, the more I thought about it, I want something quick with low latency. Shallow Research was born.
+  Shallow Research was about code, generating tested and validated code. The MCP in essence, took a user input about code, like "how do I perform predictions on a binary classification task?"
+  The MCP would begin a linear "agentic" workflow, where there would be various agents tasked with very specific tasks. There was a research agent
+  to look up best practice on the internet, then there was a code generation agent, then there was the most critical part, the code execution.
+  The real power of the MCP was the ability to run code in a remote sandbox on the Modal platform, an amazing platform for spinning up CPU or GPU instances.
+  The Code Runner agent would run the code and make sure the code works, the sandbox doesnt have the right library, no wories, the library would be installed
+  dynamically. A simple image was set up on Modal to decrease latency, this way it was set up quick with the core libraries and then other libraries were installed
+  only when needed. Finally, all of this would be returned to the user, the user can see that the code was executed and what the result was. They
+  could copy and paste the code knowing that the code would work. The aim was this to be a great MCP for those learning to code, it'd give you
+  working code with good explanations and citations to read more.
+• The Bayesian optimisation result at AZ (same optimum, ~50% fewer experiments) because it moved from “cool idea” to
+  “how we actually work”. It was the first of its type in the department and I learned a lot from Bayesian experts
+• The RAG assistant because it reduced real, everyday friction for colleagues hunting for knowledge in complex systems. The
+tools I develop are thought of with the end user in mind, how can I try and make someones day better by helping them solve a problem.
+—
+Why AI — and why now
+—
+[Metadata]
+Exec summary: Explains your motivation for pursuing AI, tying together analytical chemistry, statistics, Python, ML, GenAI, and the joy of iterative problem-solving.
+Keywords: motivation, AI transition, analytical chemistry influence, statistics, Python, machine learning, generative AI, iteration
+Questions:
+1. How did analytical chemistry shape your approach to data and decision-making?
+2. What role did statistics and Python play in your transition to AI?
+3. How do you describe the evolution from ML to GenAI and agentic patterns?
+4. Why do you find programming so engaging and time-dissolving?
+[/Metadata]
+Analytical chemistry immersed me in noisy data and decisions under constraint. Statistics gave me language for uncertainty.
+Python gave me leverage — automation, analysis, and APIs. ML stitched it together into predictive systems. GenAI widened
+the aperture to text and reasoning, and agentic patterns turn tools into coordinated doers. To be honest, I enjoy the loop:
+frame the question, ship a tiny thing, see if it helps, and keep going. Programming is one of those activities I can start and
+all of a sudden its 8 hours later and thats what I love about it, it tunes my brain, my brain was made to code, its just a shame
+I found out so late.
+—
+How I work (and sound)
+—
+[Metadata]
+Exec summary: Describes your working style, including goal orientation, iterative focus, accountability, collaboration, documentation habits, and conversational cues.
+Keywords: working style, goal setting, iteration, accountability, collaboration, documentation, communication tone
+Questions:
+1. How do you define success and plan your skill development?
+2. What is your approach to starting and finishing projects?
+3. How do you handle mistakes and team accountability?
+4. What role does documentation play in your delivery process?
+5. Which phrases signal your agreement or emphasis during conversations?
+[/Metadata]
+• “What does success look like in a year, what skill will I need in 6 months?” — then work backwards. I'm always thinking about how things can be done better
+• Start small and see where it goes. If its good, I'll fixate on it until its done
+• Nothing is ever good enough, we can always improve on processes
+• I'm honest, if somethings my fault, I'll hold my hand up and expect the same of others. Pushing blame onto others or nitpicking people doesnt impress me
+• Opinionated defaults, but collaborative. I’ll propose a pattern and then adapt with the team.
+• Documentation is part of delivery. If someone can’t pick it up without me, I haven’t finished and thats hard to follow through on. It's not always easy but I try my best. In the world of Pharma, if you didnt write it down, it never happened.
+If it was never written down but some action was performed, then an auditor is not going to like it and that can get you in serious trouble.
+• I’ll say “Yeah, definitely,” when something resonates. I’ll say “to be honest,” when I need to cut through nicely.
+—
+Technical highlights (deeper cuts)
+—
+[Metadata]
+Exec summary: Introduces deep-dive technical case studies covering RAG assistant, agentic workflows, Bayesian optimisation, chromatographic prediction, and API/ETL improvements.
+Keywords: technical highlights, case studies, RAG, agentic workflows, Bayesian optimisation, chromatographic prediction, APIs, ETL correctness
+Questions:
+1. What advanced technical initiatives do you showcase here?
+2. How do these highlights expand on earlier achievements?
+3. Which domains (retrieval, optimisation, prediction, API design) do you emphasise?
+4. How do these examples demonstrate your end-to-end problem solving?
+[/Metadata]
+RAG laboratory assistant
+[Metadata]
+Exec summary: Details the rationale, implementation steps, outcomes, and technical methods behind the RAG laboratory assistant PoC and rollout.
+Keywords: RAG assistant, retrieval, embeddings, ChromaDB, prompt augmentation, multimodal troubleshooting, AI governance, evaluation
+Questions:
+1. Why was the RAG laboratory assistant needed and what problem did it solve?
+2. How did you architect the retrieval pipeline, including embeddings and databases?
+3. What prompt augmentation strategy did you use to improve retrieval?
+4. How did you incorporate multimodal troubleshooting and image context into the solution?
+5. What governance, evaluation, and deployment steps were taken to roll out the assistant responsibly?
+6. How did the project balance user experience with accuracy and guardrails?
+[/Metadata]
+Why: People were wasting time on “Who knows X?” and “Where’s that doc?”. Retrieval needed to be first‑class.
+How: Light doc loaders; chunking; embeddings; vector DB; retrieval‑augmented prompting; guardrails around sources;
+simple UI; risk assessments; evaluation vs expected outputs; UAT with actual users.
+Outcome: ~20% reduction in troubleshooting lead times and noticeably faster answers to routine questions.
+How I did it: I built a PoC using Streamlit. I used Open AI embeddings to vectorise manuals and troubleshooting guides that I
+selected from the internet, knowing that these ground truth documents were great sources. What do you do with embeddings, put
+them in a vector database, personally I used ChromaDB because it was easy to set up locally but I have also used QDrant and Pinecone
+which are great cloud alternatives. Then I had to layer in the LLM calls. To improve accuracy, I employed a prompt augmentation step,
+I make an extra call to an LLM, to come up with 3 or 4 questions related to the user query but slightly different. This helps to widen the potential
+retrieval of documents, especially if it asks questions the user hadnt thought of, its all about context. From this you can inject this into the prompt
+and get a grounded answer (although you gotta cal set() on those retrieved chunks, dont want to waste tokens on duplicated lol).
+I also included image based troubleshooting, early on in the multimodal landscape. Image embeddings wernt common then, so I used models to explain the issue
+in the image and then used this context to perform retrieval, this meant it could be quite dynamic and still give ground truth results with references (key).
+The other main input into this type of tool, prompt engineering, users dont want to type war and piece, by having a specialist RAG tool you can fine fune the system
+prompt to abstract away some of the more complex prompting skills like chain of thought, its been done form them already.
+Thats just the PoC, AI governance is key, copyright concerns is key. Deployment becomes a collaborative effort with teams all over the world, sprints in Jira
+UAT with SME's, AI evaluation rounds with SME's to make sure responses meet requirements. FInally you get an app out in the wild and people start using it
+feels great!!
+Agentic workflows + MCP/tool‑use
+[Metadata]
+Exec summary: Summarises your approach to agentic workflows using Model Context Protocol, focusing on reducing manual coordination through secure tool orchestration.
+Keywords: agentic workflows, MCP, tool-use, automation, coordination reduction, secure interfaces
+Questions:
+1. What problem do agentic workflows solve for your teams?
+2. How do you apply MCP and tool-use patterns in these workflows?
+3. What outcomes have these automations delivered?
+4. How does scope control factor into your design choices?
+[/Metadata]
+Why: Multi‑step, cross‑system tasks were brittle and person‑dependent.
+How: Orchestrated tools behind clear interfaces; used MCP/tool‑use patterns so models can call functions securely; kept
+scope tight.
+Outcome: In the right slices, up to ~80% reduction in human loops to reach usable results.
+Bayesian optimisation for method development
+[Metadata]
+Exec summary: Describes your application of Bayesian optimisation to laboratory method development, reducing experiments while matching historical optimums.
+Keywords: Bayesian optimisation, method development, experiment reduction, iterative loop, objective function
+Questions:
+1. Why was Bayesian optimisation selected for the method development problem?
+2. How did you structure the optimisation loop and objective?
+3. What comparison baseline validated the approach?
+4. What efficiency gains were achieved in experiment count?
+[/Metadata]
+Why: Parameter spaces are expensive to explore; we needed a principled way to reach “good enough to ship” faster.
+How: Replayed a historical development on the same instrument with bounded variables and a clear objective; ran an
+iterative loop; compared against the known optimum.
+Outcome: Same optimum with ~50% fewer experiments. Clear signal to scale into practice.
+Chromatographic retention‑time prediction
+[Metadata]
+Exec summary: Covers your progression from baseline models to attention-based graph approaches for predicting chromatographic retention times, leveraging large datasets for pre-training and fine-tuning.
+Keywords: chromatographic prediction, retention time, XGBoost, neural networks, graph models, pre-training, fine-tuning, method development
+Questions:
+1. Why was chromatographic retention-time prediction valuable for your work?
+2. Which modelling techniques did you iterate through from baseline to advanced?
+3. How did you combine open datasets with internal data for training?
+4. What benefits did attention-based graph models provide over earlier approaches?
+[/Metadata]
+Why: Better priors mean fewer dead‑ends in method development.
+How: Start with fingerprints + XGBoost baselines; extend to neural models; then pre‑train a graph model with attention on
+~70k open injections; fine‑tune on internal ~30k; evaluate on held‑out chemistries.
+Outcome: Stronger generalisation and a reusable domain foundation to build on.
+APIs & ETL correctness
+[Metadata]
+Exec summary: Highlights your focus on API design and ETL integrity, ensuring analysts access clean, typed data via FastAPI services and schema fixes.
+Keywords: APIs, ETL correctness, FastAPI, Pydantic, schema flattening, data reliability, analytics enablement
+Questions:
+1. Why do you emphasise clean APIs and ETL pipelines for analysts?
+2. How did you use FastAPI and Pydantic models to improve data access?
+3. What schema issues did you identify and resolve, and why did they matter?
+4. How did these efforts reduce friction and bespoke scripting across teams?
+[/Metadata]
+Why: Analysts shouldn’t screen‑scrape or wrestle nested XML‑ish blobs. Clean tables + typed APIs unlock everything.
+How: FastAPI with Pydantic models; raised/resolved flattening issues so SQL was sane; wrote small services people
+could actually call.
+Outcome: Less friction; fewer bespoke scripts; more reliable dashboards and models.
+—
+What’s next
+—
+[Metadata]
+Exec summary: Signals your future focus on expanding agentic workflows, strengthening data contracts, and sharing lightweight automation patterns for charities and SMEs.
+Keywords: future plans, agentic workflows, data contracts, knowledge sharing, SMEs, charities
+Questions:
+1. What future technical areas do you plan to invest in?
+2. How do you intend to help charities and SMEs with automation?
+3. Why are explicit data contracts a priority for your upcoming work?
+4. How does knowledge sharing feature in your outlook?
+[/Metadata]
+More agentic workflows wired to real systems; more explicit data contracts; and more public sharing of light‑weight tools
+and patterns. I want charities and SMEs to have leverage without needing a 50‑person platform team.
+—
+Contact & links
+—
+[Metadata]
+Exec summary: Provides primary contact information and online presence links for reaching you or exploring your work.
+Keywords: contact, email, GitHub, portfolio, LinkedIn, location
+Questions:
+1. What email addresses can be used to contact you personally or for business?
+2. Where can someone review your code and projects?
+3. Which portfolio site showcases your broader work?
+4. What is your LinkedIn profile and current location?
+[/Metadata]
+Email: [email protected] (personal) | [email protected] (business)
+GitHub: github.com/CodeHalwell
+Portfolio: codehalwell.io
+LinkedIn: linkedin.com/in/danielhalwell
+Location: Northwich, UK
+Thanks for reading. If there’s something you want to build — or a process that needs unblocking — I’m happy to chat.
+Let’s make the right thing the easy thing.
+[Metadata]
+Exec summary: Closing invitation encouraging collaboration and reinforcing your philosophy of making the right approach straightforward.
+Keywords: closing note, collaboration invite, philosophy, call to action, accessibility
+Questions:
+1. What offer do you extend to potential collaborators?
+2. How do you summarise your approach to solving problems?
+3. What tone do you set for prospective conversations?
+4. Why do you emphasise making the right thing easy?
+[/Metadata]
+—
+Selected GitHub Repositories (LLM‑Ready Index)
+—
+[Metadata]
+Exec summary: Introduces the curated list of your GitHub repositories with metadata for LLM-ready indexing, highlighting focus areas and inferred summaries.
+Keywords: GitHub index, repositories, LLM-ready, project catalogue, focus areas, tags
+Questions:
+1. What is the purpose of this GitHub repositories section?
+2. How are the repositories categorised and described?
+3. Which metadata fields accompany each repository listing?
+4. How does this section support LLM-friendly retrieval?
+[/Metadata]
+Daniel Halwell — Repositories Index (LLM-Ready)
+[Metadata]
+Exec summary: Explains the table-like format used to present repository metadata for quick scanning and indexing.
+Keywords: repository format, metadata fields, presentation structure, LLM-ready, quick reference
+Questions:
+1. How are the repository entries structured for readability?
+2. Which metadata columns are included for each repository?
+3. Why is a consistent format important for LLM-ready indexing?
+4. How does this format help with retrieval tasks?
+[/Metadata]
+—
+Selected GitHub Repositories (Organized by Category)
+—
+**LLM Utilities & Language Models**
+• yamllm - YAML ↔ LLM interaction utilities
+• simple_rag - Minimal RAG baseline implementation
+• openai-logp-viewer - Log probability inspection and visualization
+**Agentic Systems & Automation**
+• gradio-mcp-agent-hack - Model Context Protocol experimentation with Gradio
+• agents-for-art - Creative agent orchestration tools
+• n8n-mcp - n8n integration with Model Context Protocol
+• synthetic-data-agent - Automated synthetic data generation
+• research-agent - Deep research workflow automation
+• coding-agent-cli - Command-line coding assistant
+• agentic-ai-engineering - Agent engineering frameworks and patterns
+**Web Development & Portfolio**
+• CodeHalwell-Portfolio - Personal portfolio site
+• portfolio-codehalwell - Alternative portfolio implementation
+• WeatherApp - Weather API integration with UI
+• web-page-test - Web development experiments
+**Data Science & Analytics**
+• washing-line-predictor - Weather-informed predictive modeling
+• openai-logp-viewer - Data visualization for LLM analysis
+• arxiv-scraper - Academic paper collection and processing
+**Healthcare & Specialized Domains**
+• BabelFHIR - FHIR/HL7 healthcare data processing
+**Learning & Coursework**
+• ibm-build-genai-apps - IBM watsonx platform exploration
+• ibm-python-data-analysis - IBM data analysis certification work
+• llm_engineering-course - LLM engineering fundamentals
+• LLM101n - Large language model foundations
+• DataCamp_DS_Cert - Data science certification projects
+• oaqjp-final-project-emb-ai - Embedded AI final project
+**Personal Projects & Apps**
+• MyPoppet / poppet - Personal assistant experiments
+• translator-with-voice-and-watsonx - Voice translation with IBM watsonx
+**Utilities & Experiments**
+• MyGPT - Quick GPT experimentation
+• Grand-Gardens-AI - AI garden management concepts
+• Useful_Scripts - General automation scripts
+• deep-research - Research workflow tools
+• food_review - Food review analysis
+• podcast-censoring - Podcast content filtering
+• playground_series_september2025 - September 2025 coding experiments
+• pallscripting - Scripting utilities
+• deep-learning-illustrated - Deep learning visualization
+• build_own_chatbot_without_open_ai - Non-OpenAI chatbot implementation
+• code_chat_bot - Code-focused chatbot
+• neurIPS-open-polymer - Polymer research collaboration
+**Repository List for Automation:**
+repos = [
+"CodeHalwell/yamllm","CodeHalwell/gradio-mcp-agent-hack","CodeHalwell/CodeHalwell-Portfolio",
+"CodeHalwell/MyGPT","CodeHalwell/agents-for-art","CodeHalwell/Grand-Gardens-AI",
+"CodeHalwell/Useful_Scripts","CodeHalwell/MyPoppet","CodeHalwell/deep-research",
+"CodeHalwell/ibm-build-genai-apps","CodeHalwell/n8n-mcp","CodeHalwell/washing-line-predictor",
+"CodeHalwell/portfolio-codehalwell","CodeHalwell/openai-logp-viewer","CodeHalwell/food_review",
+"CodeHalwell/synthetic-data-agent","CodeHalwell/simple_rag","CodeHalwell/ibm-python-data-analysis",
+"CodeHalwell/podcast-censoring","CodeHalwell/playground_series_september2025","CodeHalwell/poppet",
+"CodeHalwell/arxiv-scraper","RanL703/neurIPS-open-polymer","CodeHalwell/WeatherApp",
+"CodeHalwell/research-agent","CodeHalwell/pallscripting","CodeHalwell/deep-learning-illustrated",
+"quotentiroler/BabelFHIR","CodeHalwell/coding-agent-cli","CodeHalwell/llm_engineering-course",
+"CodeHalwell/agentic-ai-engineering","CodeHalwell/translator-with-voice-and-watsonx",
+"CodeHalwell/build_own_chatbot_without_open_ai","CodeHalwell/oaqjp-final-project-emb-ai",
+"CodeHalwell/LLM101n","CodeHalwell/code_chat_bot","CodeHalwell/DataCamp_DS_Cert",
+"CodeHalwell/web-page-test"
+]
+[Metadata]
+Exec summary: Supplies a Python list of repository identifiers to support scripted ingestion or indexing workflows.
+Keywords: repository list, Python array, identifiers, automation, ingestion helper
+Questions:
+1. What data structure is used to enumerate your repositories for automation?
+2. How many repositories are captured in this list and what patterns do they follow?
+3. How might this list be used in vector database or indexing pipelines?
+4. Why is maintaining a consolidated repository list useful for your digital CV?
+[/Metadata]
+—
+How I Work & Tools (Consolidated)
+—
+[Metadata]
+Exec summary: Consolidated overview of your primary languages, data/ML stack, GenAI tooling, service design experience, orchestration platforms, data platforms, and workplace productivity tools.
+Keywords: skills overview, toolchain, programming languages, ML stack, GenAI platforms, orchestration, DevOps, productivity tools
+Questions:
+1. Which programming languages and core tools do you rely on daily?
+2. What data and machine learning libraries form your toolkit?
+3. Which GenAI and agent orchestration platforms do you use?
+4. What services, APIs, and orchestration methods do you employ?
+5. Which data platforms and DevOps tools are integral to your workflow?
+6. How do you manage documentation, project tracking, and operations?
+[/Metadata]
+• Languages & Core: Python (heavy daily use), SQL, TypeScript (portfolio/UI), Bash.
+• Data & ML: NumPy, Pandas, scikit-learn, XGBoost, PyTorch, PyTorch Geometric; Power BI, Plotly, Matplotlib.
+• GenAI & Agents: OpenAI API, Anthropic, Watsonx; Retrieval (FAISS/Chroma/Qdrant), RAG patterns; tool-use/MCP; CrewAI/AutoGen/SmolAgents; prompt evaluation and structured output with Pydantic/JSON-schema.
+• Services & APIs: FastAPI (typed models via Pydantic), Flask (legacy), REST design; LangGraph-style orchestration patterns.
+• Orchestration: n8n (daily), lightweight cron, Modal, small Dockerized jobs.
+• Data Platforms: Snowflake/SQL; ETL correctness and schema hygiene are non-negotiable.
+• DevOps/Infra: Docker, GitHub Actions, Azure/AWS/GCP basics.
+• Workplace OS: Notion (docs/CRM/case studies), Linear (projects), Google Workspace, Canva, Miro. Accounting via QuickBooks; banking via Starling (sole trader).
+—
+Engagement Policy (Ethics & Fit)
+—
+[Metadata]
+Exec summary: Defines your ethical guidelines for client engagements, categorising red, amber, and green domains with default operating principles.
+Keywords: ethics, engagement policy, red lines, amber considerations, green projects, governance, transparency
+Questions:
+1. What types of work do you refuse on ethical grounds?
+2. Which project domains require additional governance before engagement?
+3. What sectors align well with your ethical stance?
+4. What default practices do you implement to maintain ethical standards?
+[/Metadata]
+Red lines: no fossil fuels, weapons/arms, or harmful surveillance/abusive tech; avoid organisations and conflicts that contradict a people-first stance.
+Ambers: ad tech, scraping of private data without consent, high-risk medical claims — require strict scoping, governance and auditability.
+Greens: health & life sciences; education & upskilling; charities & non-profits; SMEs doing practical automation; research tooling.
+Defaults: minimal lock-in; clear IP/licensing; privacy-by-design; evals and guardrails for GenAI; documented handovers with maintainers named.
+—
+Teaching & Mentoring
+—
+[Metadata]
+Exec summary: Summarises your mentoring philosophy rooted in practical explanations, co-designed exercises, and fast feedback loops inspired by teaching experiences in Guyana.
+Keywords: teaching, mentoring, diagrams, notebooks, handovers, feedback loops, user education
+Questions:
+1. How do you approach teaching and documentation while building?
+2. What mentoring activities do you participate in at work?
+3. How did your time in Guyana influence your teaching style?
+4. Why do you emphasise tight feedback loops and simple interfaces when mentoring?
+[/Metadata]
+I explain as I build: diagrams, notebooks, READMEs, and handover sessions. I mentor through internal coding groups,
+co-design small exercises, and prefer “show, don’t tell.” My Guyana year made me comfortable teaching under constraints;
+at work I apply the same approach — tight feedback loops, simple interfaces, and momentum.
+—
+Personal Tech Lab (Home Server & Experiments)
+—
+[Metadata]
+Exec summary: Describes your home lab environment, including server setups and experimentation with local LLM fine-tuning under the ChemGemma project.
+Keywords: home lab, Mac Mini server, Raspberry Pi, automations, LLM lab, ChemGemma, fine-tuning, GRPO
+Questions:
+1. What infrastructure do you maintain for personal experiments?
+2. Which services run on your home server and why?
+3. How do you use local hardware to prototype agents and RAG patterns?
+4. What is ChemGemma, and how did you fine-tune it?
+[/Metadata]
+I tinker. I run a Mac Mini home server (Jellyfin, n8n, Nextcloud, Pi‑hole, Home Assistant, web servers) and keep a Raspberry Pi 4B (SSD-boot) for small
+automations. It’s where I test agents, RAG patterns, and light-weight services before hardening them for work.
+I have an LLM lab for local models (RTX 3090). I've successfully run fine tuning and reinforcement learning using an open source Gemma model. I called this
+model ChemGemma having curated a dataset from HuggingFace to perform supervised fine tuning and RL using GRPO to add reasoning.
+—
+n8n Systems: Daily arXiv Scraper & RAG Pipeline
+—
+[Metadata]
+Exec summary: Details your n8n automations for research monitoring and RAG querying, outlining workflow steps, design choices, and resulting impact on knowledge retrieval.
+Keywords: n8n workflows, arXiv scraper, RAG pipeline, automation, Qdrant, structured prompts, evaluation, impact
+Questions:
+1. What goals do your n8n workflows achieve for research ingestion and querying?
+2. How is the daily arXiv scraper structured from trigger to vector storage?
+3. What design choices underpin the RAG query pipeline’s accuracy and guardrails?
+4. How do these systems improve your productivity and knowledge sharing?
+5. What impact metrics demonstrate the value of these workflows?
+[/Metadata]
+I’ve built a set of n8n flows that keep me and my tools up to date with AI/CS/DS research and make that corpus queryable.
+1) Daily arXiv Scraper (image: /mnt/data/db5d6387-16a9-4004-8a7b-6663d15217a2.png)
+Goal: pull fresh research (AI/CS/DS), normalise it with an LLM, store summaries/metadata in Notion, and index the text in a vector store for search and RAG.
+High-level steps in the flow:
+• Schedule Trigger → RSS Read: runs daily, fetching new arXiv entries from feeds I care about.
+• Loop Over Items: iterates papers.
+• Message a model (Message Model): composes a clean prompt per item with extracted metadata.
+• AI Agent (Chat Model + Tools): calls an OpenAI chat model; reaches out via an HTTP Request node when extra info is needed (e.g., to fetch the abstract or PDF link); produces structured JSON (title, authors, abstract, URL, categories, license hints).
+• Structured Output Parser: enforces the schema and catches malformed outputs.
+• If (branch): routes by licence/permissiveness or other policy flags.
+• Create a database page (Notion): two variants of the Notion writer — one for permissive/common licences, another for restricted — so that only permissive-license papers are fully “stored” and enriched (restricted ones get a link-only/metadata card).
+• Merge: folds both branches back into a single stream.
+• Qdrant Vector Store: chunk + embed the permitted text (abstract/fulltext when allowed) using OpenAI embeddings; write vectors and metadata for retrieval later.
+Result: a clean, daily-updated Notion knowledge base + vector index of papers I’m allowed to store, with policy-respecting handling of licences. It’s simple, fast to audit, and easy to extend.
+2) RAG Query Pipeline (image: /mnt/data/b503029c-a157-4c48-9a40-5271840d4327.png)
+Goal: ask natural-language questions over the paper corpus with transparent retrieval and guardrails.
+High-level steps in the flow:
+• Webhook: entry-point for a query (from my portal or CLI).
+• PromptAugmentation: uses a chat model to clean/expand the user prompt (e.g., add synonyms, normalise acronyms) and emits a structured plan via a Structured Output Parser.
+• Code: tiny glue to format search queries and pass control values (k, filters).
+• Loop Over Items: if the plan has multiple sub-queries, iterate them.
+• AI Agent: coordinates two tools — (a) Qdrant Vector Store search with OpenAI embeddings; (b) a Cohere re-ranker for higher precision.
+• Aggregate → Message a model → Respond to Webhook: aggregates top contexts, prompts the model to answer with citations and explicit “what I don’t know,” then returns the response JSON to the caller.
+Design choices:
+• Retrieval is explicit: top-k, distances/scores, and doc IDs logged.
+• Re-ranking improves answer quality without overloading the LLM.
+• Style/guardrails: British spelling, direct tone; citations mandatory; no hallucinated claims beyond the retrieved contexts.
+- I hooked the RAG pipeline up to Telegram, that way, I can put a message in Telegram and it'll start the RAG pipeline, retrive relevant papers
+and then drop the response back in a message a few minutes later.
+Impact:
+• I don’t waste time manually scanning feeds; new work lands in Notion and the vector store each morning.
+• I can query “What’s new on tool-use/MCP for agents?” and get a grounded answer with links.
+• The same index powers demos and internal RAG utilities — a single source of truth.
+Email Triage - I get lost in a sea of emails, its so easy to miss things, the answer, have an agent do it for you.
+My triage agent reads through emails and uses a tier system to highlight items for escalations. If an important email comes through I get a message and can take a look.
+[Metadata]
+Exec summary: Summarises the benefits of your automated research pipelines, highlighting time savings, improved retrieval, and reusable indexes.
+Keywords: impact summary, time savings, retrieval, demos, single source of truth
+Questions:
+1. How do the automations reduce your manual research effort?
+2. What querying capabilities do the pipelines unlock?
+3. How does the indexed corpus serve multiple applications?
+4. Why is a single source of truth valuable for your workflows?
+[/Metadata]
+—
+What I’m Open To
+—
+[Metadata]
+Exec summary: Lists the types of roles, pro bono work, and collaborations you is interested in pursuing, emphasizing AI engineering and mission-aligned projects.
+Keywords: opportunities, AI engineer roles, pro bono, collaborations, automation, charities
+Questions:
+1. What kinds of full-time or contract roles are you seeking?
+2. What pro bono engagements do you offer to charities?
+3. Which collaborative areas appeal to you for future work?
+4. How does this section help partners understand fit?
+[/Metadata]
+• Roles: AI Engineer / ML Engineer / Data Scientist (UK-remote or North West hybrid); full time positions and short build engagements for copilots, RAG, and agentic automations.
+• Pro bono: time-boxed PoCs for UK charities and mission-led organisations.
+• Collaborations: research tooling, open-source scaffolding, educational content.
+—
+Why I Love to Code (and the Thing I Obsess About)
+—
+[Metadata]
+Exec summary: Expresses your passion for coding as the fastest route from problem to improvement, highlighting your focus on bottlenecks and iterative solutions.
+Keywords: passion for coding, problem solving, bottlenecks, iteration, automation, optimisation
+Questions:
+1. Why do you find coding compelling and energising?
+2. How do you describe your approach to identifying and solving bottlenecks?
+3. Which problem types do you align with specific solution strategies (RAG, Bayesian optimisation, APIs)?
+4. How does iteration and feedback drive your projects?
+[/Metadata]
+I love writing Python and shipping small tools that unblock people — it’s the quickest route from “problem” to “better.”
+I tend to obsess about the bottleneck: if it’s retrieval, I build RAG; if it’s too many experiments, I reach for Bayesian optimisation;
+if it’s brittle handoffs, I ship a typed API. Solving problems is the through-line for me — discreet questions, clear interfaces,
+quick feedback, and steady iteration.
+I love to help people in general, if someone is struggling with some Python, I like to solve it, if they are having issue with Microsoft COpilot Studio flows,
+I'm more than happy to take a quick look and see what I can do. So far, people really appreciate this approach and I get some really good feedback.
+—
+Sport & Judo
+—
+[Metadata]
+Exec summary: Shares your sporting interests and judo achievements, highlighting discipline, composure, and practical learning skills gained from coaching and competition.
+Keywords: sport, judo, black belt, competition, coaching, discipline, composure, learning by doing
+Questions:
+1. Which sports do you follow or participate in?
+2. What level of achievement did you reach in judo and at what age?
+3. What competitions and coaching experiences shape your discipline and composure?
+4. How do lessons from judo translate into your daily work habits?
+[/Metadata]
+I’m into sport — football, rugby, Formula 1, and most things with a scoreboard. As a teenager I was a judoka:
+I earned my black belt at 16 (the youngest age you can), won medals across the country, including a bronze at an
+international competition in London, and trained as a coach for my local club. It taught me discipline, composure under pressure, a
+nd how to learn by doing — lessons I still apply daily.

pyproject.toml ADDED Viewed

	@@ -0,0 +1,57 @@

+[project]
+name = "digital-cv"
+version = "0.1.0"
+description = "An AI-powered digital CV that allows visitors to chat with Daniel Halwell through an intelligent conversational interface"
+readme = "README.md"
+requires-python = ">=3.11"
+authors = [
+    {name = "Daniel Halwell", email = "[email protected]"},
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: Other/Proprietary License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "anthropic>=0.49.0",
+    "autogen-agentchat>=0.4.9.2",
+    "autogen-ext[grpc,mcp,ollama,openai]>=0.4.9.2",
+    "bs4>=0.0.2",
+    "chroma>=0.2.0",
+    "chromadb>=1.1.0",
+    "gradio>=5.22.0",
+    "httpx>=0.28.1",
+    "ipywidgets>=8.1.5",
+    "langchain>=1.0.0a9",
+    "langchain-community>=0.3.30",
+    "langchain-openai>=0.3.33",
+    "lxml>=5.3.1",
+    "mcp-server-fetch>=2025.1.17",
+    "mcp[cli]>=1.5.0",
+    "openai>=1.68.2",
+    "openai-agents>=0.0.15",
+    "playwright>=1.51.0",
+    "plotly>=6.0.1",
+    "polygon-api-client>=1.14.5",
+    "psutil>=7.0.0",
+    "pypdf>=5.4.0",
+    "pypdf2>=3.0.1",
+    "python-dotenv>=1.0.1",
+    "requests>=2.32.3",
+    "semantic-kernel>=1.25.0",
+    "sendgrid>=6.11.0",
+    "setuptools>=78.1.0",
+    "smithery>=0.1.0",
+    "speedtest-cli>=2.1.3",
+    "wikipedia>=1.4.0",
+    "watchfiles>=0.24.0",
+    "huggingface-hub[cli]>=0.35.1",
+]
+[dependency-groups]
+dev = [
+    "ipykernel>=6.29.5",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio>=5.22.0
+openai>=1.68.2
+python-dotenv>=1.0.1
+requests>=2.32.3
+chromadb>=1.1.0
+langchain-openai>=0.3.33
+langchain-text-splitters>=0.3.0
+pypdf>=5.4.0

tests/test_query.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import unittest
+from utils.vector_db import VectorDB
+class TestQuery(unittest.TestCase):
+    def test_query(self):
+        vector_db = VectorDB()
+        result = vector_db.query("What is my name?")
+        self.assertEqual(result["documents"][0], "Daniel Halwell")

utils/__pycache__/app_logging.cpython-311.pyc ADDED Viewed

Binary file (2.8 kB). View file

utils/__pycache__/chat.cpython-311.pyc ADDED Viewed

Binary file (20.5 kB). View file

utils/__pycache__/logging.cpython-311.pyc ADDED Viewed

Binary file (2.8 kB). View file

utils/__pycache__/text_processing.cpython-311.pyc ADDED Viewed

Binary file (7.38 kB). View file

utils/__pycache__/vector_db.cpython-311.pyc ADDED Viewed

Binary file (9.17 kB). View file

utils/app_logging.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import logging
+import os
+def setup_logging():
+    """Setup logging for the application."""
+    global logger
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.INFO)
+    # Set common formatter
+    _formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
+    # Ensure logs appear in terminal even if root isn't configured
+    _has_console = any(isinstance(h, logging.StreamHandler) and not isinstance(h, logging.FileHandler) for h in logger.handlers)
+    if not _has_console:
+        _console_handler = logging.StreamHandler()
+        _console_handler.setLevel(logging.INFO)
+        _console_handler.setFormatter(_formatter)
+        logger.addHandler(_console_handler)
+        logger.propagate = False
+    # Ensure logs are also saved to a file next to this script
+    _log_file = os.path.join(os.path.dirname(__file__), "digital-cv.log")
+    _has_file = any(isinstance(h, logging.FileHandler) and getattr(h, "baseFilename", "") == _log_file for h in logger.handlers)
+    if not _has_file:
+        try:
+            _file_handler = logging.FileHandler(_log_file)
+            _file_handler.setLevel(logging.INFO)
+            _file_handler.setFormatter(_formatter)
+            logger.addHandler(_file_handler)
+        except Exception:
+            # If file handler can't be created, continue with console-only logging
+            pass
+    return logger

utils/chat.py ADDED Viewed

	@@ -0,0 +1,424 @@

+from __future__ import annotations
+from dotenv import load_dotenv
+from openai import OpenAI
+import json
+import os
+from typing import List, Dict, Any, Optional
+from utils.app_logging import setup_logging
+from utils.vector_db import VectorDB
+from utils.tool_calls import record_user_details, record_unknown_question
+load_dotenv(override=True)
+logger = setup_logging()
+def chat_log(message, history):
+    """Save each user and assistant message to a file after each generation.
+    This is not a tool, it's just for logging.
+    Args:
+        message: The message to save.
+        history: The chat history to save.
+    """
+    logger.info(f"Saving chat log: {message}")
+    with open("chat_log.txt", "a") as f:
+        f.write(f"User: {message}\n")
+        f.write(f"Assistant: {history[-1]['content']}\n")
+record_user_details_json = {
+    "name": "record_user_details",
+    "description": "Use this tool to record that a user is interested in being in touch and provided an email address",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "email": {
+                "type": "string",
+                "description": "The email address of this user",
+            },
+            "name": {
+                "type": "string",
+                "description": "The user's name, if they provided it",
+            },
+            "notes": {
+                "type": "string",
+                "description": "Any additional information about the conversation that's worth recording to give context",
+            },
+        },
+        "required": ["email"],
+        "additionalProperties": False,
+    },
+}
+record_unknown_question_json = {
+    "name": "record_unknown_question",
+    "description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "question": {
+                "type": "string",
+                "description": "The question that couldn't be answered",
+            },
+        },
+        "required": ["question"],
+        "additionalProperties": False,
+    },
+}
+tools = [
+    {
+        "type": "function",
+        "name": record_user_details_json["name"],
+        "description": record_user_details_json["description"],
+        "parameters": record_user_details_json["parameters"],
+    },
+    {
+        "type": "function",
+        "name": record_unknown_question_json["name"],
+        "description": record_unknown_question_json["description"],
+        "parameters": record_unknown_question_json["parameters"],
+    },
+]
+# Chat Completions-compatible tools schema
+chat_tools = [
+    {"type": "function", "function": record_user_details_json},
+    {"type": "function", "function": record_unknown_question_json},
+]
+class Me:
+    def __init__(self):
+        """Initialize persona context, vector database, and OpenAI client."""
+        self.openai = OpenAI()
+        self.name = "Daniel Halwell"
+        self.vector_db = VectorDB()
+        self.system_context = self._build_system_context()
+        self.email = "[email protected]"
+    def _build_system_context(self) -> str:
+        """Render a concise persona context from vector store contents."""
+        try:
+            peek = self.vector_db.collection.peek(5)
+            documents: List[str] = []
+            metadatas: List[Dict[str, Any]] = []
+            if isinstance(peek, dict):
+                documents = peek.get("documents", []) or []
+                metadatas = peek.get("metadatas", []) or []
+        except Exception as exc:
+            logger.error(f"Failed to peek vector DB: {exc}")
+            documents, metadatas = [], []
+        combined_entries: List[str] = []
+        for text, metadata in zip(documents, metadatas):
+            source = (
+                metadata.get("source", "unknown")
+                if isinstance(metadata, dict)
+                else "unknown"
+            )
+            combined_entries.append(f"Source: {source}\n{text.strip()}")
+        if not combined_entries:
+            return (
+                "You are Daniel Halwell, a scientist-turned-AI engineer who builds"
+                " practical AI tooling, RAG systems, and automations. Be concise,"
+                " professional, and acknowledge uncertainty when context is missing."
+            )
+        joined = "\n\n".join(combined_entries)
+        return (
+            "You are provided with an indexed knowledge base about Daniel Halwell."
+            " Use it to answer questions faithfully.\n\n" + joined
+        )
+    def _compose_retrieval_query(
+        self, message: str, history: Optional[List[Dict[str, Any]]]
+    ) -> str:
+        """Combine current message with recent user turns for retrieval."""
+        recent_user_msgs: List[str] = []
+        if history:
+            for item in reversed(history):
+                if not isinstance(item, dict):
+                    continue
+                if item.get("role") == "user":
+                    content = item.get("content", "") or ""
+                    if content.strip():
+                        recent_user_msgs.append(content.strip())
+                if len(recent_user_msgs) >= 2:
+                    break
+        recent_user_msgs.reverse()
+        if message.strip():
+            recent_user_msgs.append(message.strip())
+        return "\n\n".join(recent_user_msgs)
+    def _build_retrieval_context(
+        self, message: str, history: Optional[List[Dict[str, Any]]]
+    ) -> str:
+        """Retrieve relevant knowledge snippets for the given message."""
+        query = self._compose_retrieval_query(message, history)
+        if not query:
+            return ""
+        try:
+            results = self.vector_db.query(
+                query,
+                k=4,
+                include=["documents", "metadatas", "distances"],
+            )
+        except Exception as exc:
+            logger.error(f"Vector DB query failed: {exc}")
+            return ""
+        documents = []
+        metadatas = []
+        distances = []
+        if isinstance(results, dict):
+            documents = (results.get("documents") or [[" "]])[0]
+            metadatas = (results.get("metadatas") or [[{}]])[0]
+            distances = (results.get("distances") or [[None]])[0]
+        contexts: List[str] = []
+        for idx, (doc, metadata) in enumerate(zip(documents, metadatas)):
+            if not doc:
+                continue
+            source = "unknown"
+            if isinstance(metadata, dict):
+                source = metadata.get("source") or metadata.get("path") or "unknown"
+                chunk_id = metadata.get("chunk_id")
+                if chunk_id is not None:
+                    source = f"{source}#chunk-{chunk_id}"
+            score = distances[idx] if idx < len(distances) else None
+            score_str = (
+                f" (score: {score:.3f})" if isinstance(score, (int, float)) else ""
+            )
+            snippet = doc.strip().replace("\n\n", "\n")
+            contexts.append(f"[{idx + 1}] Source: {source}{score_str}\n{snippet}")
+        if not contexts:
+            return ""
+        return "Retrieved knowledge snippets:\n" + "\n\n".join(contexts)
+    def handle_tool_call(self, tool_calls):
+        """Execute streamed tool calls and return tool result messages.
+        Args:
+            tool_calls: Iterable of tool call objects containing name, arguments, and id.
+        Returns:
+            A list of tool result message dicts compatible with the OpenAI responses API.
+        """
+        results = []
+        for tool_call in tool_calls:
+            tool_name = tool_call.function.name
+            arguments = json.loads(tool_call.function.arguments)
+            logger.info(f"Tool called: {tool_name} with arguments: {arguments}")
+            tool = globals().get(tool_name)
+            result = tool(**arguments) if tool else {}
+            results.append(
+                {
+                    "role": "tool",
+                    "content": json.dumps(result),
+                    "tool_call_id": tool_call.id,
+                }
+            )
+        return results
+    def system_prompt(self):
+        """Construct the system prompt using persona context and vector DB summary."""
+        return f"""
+You are acting as {self.name}. You are answering questions on {self.name}'s website, particularly questions related to {self.name}'s career, background, skills and experience.
+Your responsibility is to represent {self.name} for interactions on the website as faithfully as possible.
+You have access to a retrieval system that stores vetted chunks about {self.name}. Always ground answers in those retrieved contexts.
+Sound warm, upbeat, and conversational — imagine you are chatting with someone you’d happily grab coffee with. Use friendly acknowledgements (e.g. “Great question,” “Happy to share,” “Thanks for asking”) before giving specifics. Keep explanations concise but encouraging, and invite them to follow up or email you if they want deeper detail.
+If you cannot answer confidently, log the question via the record_unknown_question tool and gently mention you’ll circle back.
+Context preview:
+{self.system_context}
+"""
+    def chat_guardrails(self, message, history):
+        """Return True if the user message is appropriate, False otherwise.
+        Uses an LLM to classify sentiment and appropriateness without any
+        allow/deny heuristics. Falls back to True on error.
+        Args:
+            message: The latest user message string.
+            history: Prior conversation history (unused).
+        Returns:
+            Boolean indicating whether the message is appropriate.
+        """
+        system_msg = (
+            "You are a sentiment and safety classifier. First assess sentiment "
+            "(positive, neutral, or negative). Then determine if the message is "
+            "appropriate for a general audience (no PII (with the exception of email), hate, harassment, sexual, "
+            "or illegal content). Output only one token: 'True' if appropriate, "
+            "or 'False' if not. Do not output anything else."
+            "The only exception to PII is email, which is allowed if it's in the context of the conversation."
+        )
+        try:
+            resp = self.openai.chat.completions.create(
+                model="gpt-4o",
+                messages=[
+                    {"role": "system", "content": system_msg},
+                    {"role": "user", "content": message},
+                ],
+                temperature=0,
+                max_tokens=3,
+            )
+            raw = (resp.choices[0].message.content or "").strip()
+            cleaned = "".join(ch for ch in raw if ch.isalpha()).lower()
+            verdict = (
+                True if cleaned == "true" else False if cleaned == "false" else True
+            )
+            logger.info(f"Guardrails response: {raw} -> {verdict}")
+            return verdict
+        except Exception as e:
+            logger.error("Guardrails call failed, defaulting to allowing the message")
+            logger.error(f"Exception: {e}")
+            return True
+    def chat_guardrails_response(self):
+        """Return a standard response for blocked (inappropriate) messages."""
+        return (
+            "I'm sorry, I can't answer that. Please ask a question that isn't "
+            "about sensitive or inappropriate topics."
+        )
+    def chat(self, message, history):
+        """Generator that streams a chat response and handles tool calls.
+        Args:
+            message: The latest user message string.
+            history: Prior conversation history as a list of role/content dicts.
+        Returns:
+            Yields progressively longer assistant message strings for streaming UI updates.
+        """
+        # Sanitize incoming history to only include role/content pairs
+        def _sanitize(msg):
+            return {"role": msg.get("role"), "content": msg.get("content", "")}
+        retrieval_context = self._build_retrieval_context(message, history)
+        messages = (
+            [{"role": "system", "content": self.system_prompt()}]
+            + (
+                [
+                    {
+                        "role": "system",
+                        "content": (
+                            "Use the following retrieved snippets when forming your answer."
+                            f" If they are empty, rely on your general knowledge of Daniel Halwell. If you don't know the answer, log the question via the record_unknown_question tool. My email is {self.email}.\n"
+                            + retrieval_context
+                        ),
+                    }
+                ]
+                if retrieval_context
+                else []
+            )
+            + [
+                _sanitize(m)
+                for m in (history or [])
+                if isinstance(m, dict) and m.get("role") in {"user", "assistant"}
+            ]
+            + [{"role": "user", "content": message}]
+        )
+        logger.info(f"User: {message}")
+        if not self.chat_guardrails(message, history):
+            yield self.chat_guardrails_response()
+            return
+        while True:
+            stream = self.openai.chat.completions.create(
+                model="gpt-5-mini",
+                messages=messages,
+                tools=chat_tools,
+                stream=True,
+            )
+            content_accumulated = ""
+            streamed_tool_calls = {}
+            finish_reason = None
+            for event in stream:
+                if not getattr(event, "choices", None):
+                    continue
+                choice = event.choices[0]
+                delta = getattr(choice, "delta", None)
+                if delta and getattr(delta, "content", None):
+                    content_accumulated += delta.content
+                    yield content_accumulated
+                # Collect tool call deltas
+                if delta and getattr(delta, "tool_calls", None):
+                    for tc in delta.tool_calls:
+                        idx = tc.index
+                        if idx not in streamed_tool_calls:
+                            streamed_tool_calls[idx] = {
+                                "id": getattr(tc, "id", None),
+                                "name": None,
+                                "arguments": "",
+                            }
+                        func = getattr(tc, "function", None)
+                        if func and getattr(func, "name", None):
+                            streamed_tool_calls[idx]["name"] = func.name
+                        if func and getattr(func, "arguments", None):
+                            streamed_tool_calls[idx]["arguments"] += func.arguments
+                if getattr(choice, "finish_reason", None):
+                    finish_reason = choice.finish_reason
+                    break
+            # If the model wants tool calls, execute them and continue the loop
+            if finish_reason == "tool_calls" and streamed_tool_calls:
+                # Build assistant tool_call message stub
+                assistant_tool_msg = {
+                    "role": "assistant",
+                    "tool_calls": [
+                        {
+                            "id": item.get("id") or f"call_{idx}",
+                            "type": "function",
+                            "function": {
+                                "name": item["name"],
+                                "arguments": item.get("arguments", ""),
+                            },
+                        }
+                        for idx, item in sorted(streamed_tool_calls.items())
+                    ],
+                }
+                logger.info(f"Assistant tool message: {assistant_tool_msg}")
+                # Convert to handle_tool_call inputs
+                tool_calls_for_handler = []
+                for idx, item in sorted(streamed_tool_calls.items()):
+                    class ToolCall:
+                        def __init__(self, name, arguments, id):
+                            self.function = type("Function", (), {})()
+                            self.function.name = name
+                            self.function.arguments = arguments
+                            self.id = id
+                    logger.info(f"Tool call for handler: {item}")
+                    tool_calls_for_handler.append(
+                        ToolCall(
+                            name=item["name"],
+                            arguments=item.get("arguments", ""),
+                            id=item.get("id") or f"call_{idx}",
+                        )
+                    )
+                logger.info(f"Tool calls for handler: {tool_calls_for_handler}")
+                results = self.handle_tool_call(tool_calls_for_handler)
+                messages.append(assistant_tool_msg)
+                messages.extend(results)
+                logger.info(f"Messages: {messages}")
+                chat_log(message, messages)
+                continue
+            logger.info(f"Assistant final response: {content_accumulated}")
+            return

utils/create_vector_db.py ADDED Viewed

	@@ -0,0 +1,19 @@

+if __name__ == "__main__":
+    import sys
+    from pathlib import Path
+    # Ensure project root is on sys.path when running as a script
+    project_root = Path(__file__).resolve().parent.parent
+    if str(project_root) not in sys.path:
+        sys.path.insert(0, str(project_root))
+from utils.text_processing import DocumentProcessing
+def main():
+    document_processing = DocumentProcessing()
+    document_processing.create_vector_db_from_directory("me")
+if __name__ == "__main__":
+    main()

utils/text_processing.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from __future__ import annotations
+from typing import Iterable, Sequence
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.document_loaders.pdf import PyPDFLoader
+from langchain_community.document_loaders.text import TextLoader
+import os
+import dotenv
+import sys
+from pathlib import Path
+project_root = Path(__file__).resolve().parent.parent
+if str(project_root) not in sys.path:
+    sys.path.insert(0, str(project_root))
+from utils.vector_db import VectorDB
+dotenv.load_dotenv()
+class DocumentProcessing:
+    def __init__(self):
+        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
+        self.embeddings = OpenAIEmbeddings(
+            model="text-embedding-3-large", api_key=os.getenv("OPENAI_API_KEY")
+        )
+        self.vector_db = VectorDB(embedding_model=self.embeddings)
+    def split_text(self, document):
+        """Split document text into chunks"""
+        if isinstance(document, list):
+            # Handle list of documents from loaders
+            all_texts = []
+            for doc in document:
+                texts = self.text_splitter.split_text(doc.page_content)
+                all_texts.extend(texts)
+            return all_texts
+        else:
+            # Handle raw text string
+            texts = self.text_splitter.split_text(document)
+            return texts
+    def embed_text(self, texts: Sequence[str]):
+        """Generate embeddings for text chunks."""
+        return self.embeddings.embed_documents(list(texts))
+    def create_vector_db(self, texts, metadata=None):
+        """Add texts to vector database"""
+        if metadata is None:
+            metadata = [{"source": "unknown"} for _ in texts]
+        embeddings = self.embed_text(texts)
+        # Create documents with IDs
+        documents = list(texts)
+        metadatas = list(metadata)
+        ids = [f"doc_{i}" for i in range(len(documents))]
+        self.vector_db.add_documents(
+            documents=documents,
+            metadatas=metadatas,
+            ids=ids,
+            embeddings=embeddings,
+        )
+    def create_vector_db_from_file(self, file_path):
+        """Process a single file and add to vector database"""
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+        if file_path.endswith(".pdf"):
+            loader = PyPDFLoader(file_path)
+        elif file_path.endswith(".txt"):
+            loader = TextLoader(file_path)
+        else:
+            raise ValueError(f"Unsupported file type: {file_path}")
+        documents = loader.load()
+        texts = self.split_text(documents)
+        # Create metadata for each chunk
+        metadata = [{"source": file_path, "chunk_id": i} for i in range(len(texts))]
+        self.create_vector_db(texts, metadata)
+        return self.vector_db
+    def create_vector_db_from_directory(self, directory_path):
+        """Process all supported files in a directory"""
+        if not os.path.exists(directory_path):
+            raise FileNotFoundError(f"Directory not found: {directory_path}")
+        supported_extensions = [".pdf", ".txt"]
+        processed_files = 0
+        for file in os.listdir(directory_path):
+            file_path = os.path.join(directory_path, file)
+            # Skip directories
+            if os.path.isdir(file_path):
+                continue
+            # Check if file has supported extension
+            if any(file.endswith(ext) for ext in supported_extensions):
+                try:
+                    self.create_vector_db_from_file(file_path)
+                    processed_files += 1
+                    print(f"Processed: {file}")
+                except Exception as e:
+                    print(f"Error processing {file}: {str(e)}")
+            else:
+                print(f"Skipping unsupported file type: {file}")
+        print(f"Successfully processed {processed_files} files")
+        return self.vector_db

utils/tool_calls.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import json
+import os
+import requests
+from utils.app_logging import setup_logging
+logger = setup_logging()
+def push(text):
+    """Send a Pushover notification.
+    Args:
+        text: The message text to send.
+    """
+    try:
+        logger.info(f"Sending Pushover notification: {text}")
+        requests.post(
+            "https://api.pushover.net/1/messages.json",
+            data={
+                "token": os.getenv("PUSHOVER_TOKEN"),
+                "user": os.getenv("PUSHOVER_USER"),
+                "message": text,
+            },
+            timeout=10,
+        )
+    except Exception as e:
+        # Silently ignore notification failures to avoid impacting UX
+        logger.error(f"Failed to send Pushover notification: {e}")
+        pass
+def record_user_details(email, name="Name not provided", notes="not provided"):
+    """Record a user's contact details via push notification.
+    Args:
+        email: The user's email address.
+        name: The user's name, if provided.
+        notes: Additional context to record.
+    Returns:
+        A dictionary indicating success, e.g., {"recorded": "ok"}.
+    """
+    logger.info(f"Recording {name} with email {email} and notes {notes}")
+    push(f"Recording {name} with email {email} and notes {notes}")
+    return {"recorded": "ok"}
+def record_unknown_question(question):
+    """Record an unanswered user question via push notification.
+    Args:
+        question: The question that couldn't be answered.
+    Returns:
+        A dictionary indicating success, e.g., {"recorded": "ok"}.
+    """
+    logger.info(f"Recording {question}")
+    push(f"Recording {question}")
+    return {"recorded": "ok"}

utils/vector_db.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Any, Iterable, Optional, Sequence
+import chromadb as cdb
+import dotenv
+from langchain_openai import OpenAIEmbeddings
+from utils.text_processing import DocumentProcessing
+dotenv.load_dotenv()
+def _default_storage_path() -> str:
+    """Return on-disk location for the Chroma persistent client."""
+    env_path = os.getenv("VECTOR_DB_PATH")
+    if env_path:
+        return env_path
+    project_root = Path(__file__).resolve().parent.parent
+    storage_dir = project_root / "data" / "chroma"
+    storage_dir.mkdir(parents=True, exist_ok=True)
+    return str(storage_dir)
+class VectorDB:
+    """Light wrapper around a persistent Chroma collection."""
+    def __init__(
+        self,
+        *,
+        collection_name: str = "me_profile",
+        persist_directory: Optional[str] = None,
+        embedding_model: Optional[OpenAIEmbeddings] = None,
+    ) -> None:
+        self.persist_directory = persist_directory or _default_storage_path()
+        self.client = cdb.PersistentClient(path=self.persist_directory)
+        try:
+            self.collection = self.client.get_or_create_collection(collection_name)
+        except Exception:
+            # Fallback for older Chroma versions
+            self.collection = self.client.create_collection(collection_name)
+        self.embedding_model = embedding_model or OpenAIEmbeddings(
+            model="text-embedding-3-large",
+            api_key=os.getenv("OPENAI_API_KEY"),
+        )
+        # Auto-initialize from 'me/' if empty and the directory exists
+        try:
+            if self.collection.count() == 0 and os.path.isdir(os.path.join(Path(__file__).resolve().parent.parent, "me")):
+                try:
+                    # Avoid circular import issues and heavy work if OPENAI key missing
+                    if os.getenv("OPENAI_API_KEY"):
+                        dp = DocumentProcessing()
+                        dp.create_vector_db_from_directory(os.path.join(Path(__file__).resolve().parent.parent, "me"))
+                except Exception:
+                    # If auto-build fails, continue with empty DB; app will still run
+                    pass
+        except Exception:
+            pass
+    # ------------------------------------------------------------------
+    # Document ingestion helpers
+    # ------------------------------------------------------------------
+    def add_documents(
+        self,
+        documents: Sequence[str],
+        *,
+        metadatas: Optional[Sequence[dict[str, Any]]] = None,
+        ids: Optional[Sequence[str]] = None,
+        embeddings: Optional[Sequence[Sequence[float]]] = None,
+    ) -> None:
+        """Add documents to the Chroma collection."""
+        documents = list(documents)
+        if not documents:
+            return
+        count = len(documents)
+        if metadatas is None:
+            metadatas = [{} for _ in range(count)]
+        if ids is None:
+            ids = [f"doc_{i}" for i in range(count)]
+        if embeddings is None:
+            embeddings = self.embedding_model.embed_documents(list(documents))
+        self.collection.add(
+            documents=documents,
+            metadatas=list(metadatas),
+            ids=list(ids),
+            embeddings=list(embeddings),
+        )
+    # ------------------------------------------------------------------
+    # Query helpers
+    # ------------------------------------------------------------------
+    def query(
+        self,
+        query_texts: Iterable[str],
+        *,
+        k: int = 5,
+        include: Optional[Sequence[str]] = None,
+    ) -> dict[str, Any]:
+        """Query the collection with one or more natural-language strings."""
+        if isinstance(query_texts, str):
+            query_texts = [query_texts]
+        else:
+            query_texts = list(query_texts)
+        if not query_texts:
+            raise ValueError("query_texts must contain at least one string")
+        query_embeddings = self.embedding_model.embed_documents(list(query_texts))
+        return self.collection.query(
+            query_texts=list(query_texts),
+            query_embeddings=list(query_embeddings),
+            n_results=k,
+            include=include,
+        )
+    # ------------------------------------------------------------------
+    # Thin wrappers around underlying collection methods
+    # ------------------------------------------------------------------
+    def upsert(
+        self,
+        documents: Sequence[str],
+        *,
+        metadatas: Optional[Sequence[dict[str, Any]]] = None,
+        ids: Optional[Sequence[str]] = None,
+        embeddings: Optional[Sequence[Sequence[float]]] = None,
+    ) -> None:
+        if embeddings is None:
+            embeddings = self.embedding_model.embed_documents(list(documents))
+        self.collection.upsert(
+            documents=list(documents),
+            metadatas=list(metadatas) if metadatas is not None else None,
+            ids=list(ids) if ids is not None else None,
+            embeddings=list(embeddings),
+        )
+    def delete(self, ids: Sequence[str]) -> None:
+        self.collection.delete(ids=list(ids))
+    def update(
+        self,
+        ids: Sequence[str],
+        documents: Optional[Sequence[str]] = None,
+        metadatas: Optional[Sequence[dict[str, Any]]] = None,
+        embeddings: Optional[Sequence[Sequence[float]]] = None,
+    ) -> None:
+        if documents is not None and embeddings is None:
+            embeddings = self.embedding_model.embed_documents(list(documents))
+        self.collection.update(
+            ids=list(ids),
+            documents=list(documents) if documents is not None else None,
+            metadatas=list(metadatas) if metadatas is not None else None,
+            embeddings=list(embeddings) if embeddings is not None else None,
+        )
+    def get(self, ids: Sequence[str]) -> dict[str, Any]:
+        return self.collection.get(ids=list(ids))
+    def count(self) -> int:
+        return self.collection.count()
+    def list(self) -> list[str]:
+        return self.collection.list()
+    def delete_all(self) -> None:
+        self.collection.delete()
+    def get_all(self) -> dict[str, Any]:
+        return self.collection.get()
+    def get_all_metadata(self) -> list[dict[str, Any]]:
+        return self.collection.get(include=["metadatas"])  # type: ignore[return-value]
+    def get_all_ids(self) -> list[str]:
+        return self.collection.get(include=["ids"]).get("ids", [])  # type: ignore[assignment]
+    def get_all_texts(self) -> list[str]:
+        return self.collection.get(include=["documents"]).get("documents", [])  # type: ignore[assignment]
+    def get_all_embeddings(self) -> list[list[float]]:
+        return self.collection.get(include=["embeddings"]).get("embeddings", [])  # type: ignore[assignment]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff