Spaces:
Paused
Paused
Jinglong Xiong
commited on
Commit
Β·
8bf4ef4
1
Parent(s):
c0f4df5
add dockerfile, add readme
Browse files- .dockerignore +58 -0
- .gitignore +1 -0
- Dockerfile +54 -0
- README.md +170 -0
- app.py +31 -7
- docker-compose.yml +21 -0
- requirements.txt +2 -0
.dockerignore
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
results/png/
|
2 |
+
results/svg/
|
3 |
+
results/*.json
|
4 |
+
unsloth_compiled_cache/
|
5 |
+
*.ipynb
|
6 |
+
SVGDreamer/
|
7 |
+
*.parquet
|
8 |
+
|
9 |
+
# Git
|
10 |
+
.git
|
11 |
+
.gitignore
|
12 |
+
|
13 |
+
# Python
|
14 |
+
__pycache__/
|
15 |
+
*.py[cod]
|
16 |
+
*$py.class
|
17 |
+
*.so
|
18 |
+
.Python
|
19 |
+
env/
|
20 |
+
build/
|
21 |
+
develop-eggs/
|
22 |
+
dist/
|
23 |
+
downloads/
|
24 |
+
eggs/
|
25 |
+
.eggs/
|
26 |
+
lib/
|
27 |
+
lib64/
|
28 |
+
parts/
|
29 |
+
sdist/
|
30 |
+
var/
|
31 |
+
*.egg-info/
|
32 |
+
.installed.cfg
|
33 |
+
*.egg
|
34 |
+
|
35 |
+
# Virtual Environment
|
36 |
+
venv/
|
37 |
+
.env.local
|
38 |
+
|
39 |
+
# Generated files
|
40 |
+
logs/
|
41 |
+
*.log
|
42 |
+
.ipynb_checkpoints
|
43 |
+
results/
|
44 |
+
|
45 |
+
# VSCode
|
46 |
+
.vscode/
|
47 |
+
|
48 |
+
# Model caches
|
49 |
+
.cache/
|
50 |
+
unsloth_compiled_cache/
|
51 |
+
|
52 |
+
# Docker
|
53 |
+
Dockerfile
|
54 |
+
docker-compose.yml
|
55 |
+
.dockerignore
|
56 |
+
|
57 |
+
# Documentation
|
58 |
+
README-HF.md
|
.gitignore
CHANGED
@@ -7,6 +7,7 @@ star-vector/
|
|
7 |
SVGDreamer/
|
8 |
*.parquet
|
9 |
*.pth
|
|
|
10 |
|
11 |
# Byte-compiled / optimized / DLL files
|
12 |
__pycache__/
|
|
|
7 |
SVGDreamer/
|
8 |
*.parquet
|
9 |
*.pth
|
10 |
+
diff_image.png
|
11 |
|
12 |
# Byte-compiled / optimized / DLL files
|
13 |
__pycache__/
|
Dockerfile
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
|
2 |
+
|
3 |
+
# Set environment variables
|
4 |
+
ENV PYTHONUNBUFFERED=1 \
|
5 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
6 |
+
DEBIAN_FRONTEND=noninteractive
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
# Install system dependencies
|
11 |
+
RUN apt-get update && apt-get install -y \
|
12 |
+
build-essential \
|
13 |
+
python3-pip \
|
14 |
+
python3-dev \
|
15 |
+
git \
|
16 |
+
wget \
|
17 |
+
libcairo2-dev \
|
18 |
+
pkg-config \
|
19 |
+
libgl1 \
|
20 |
+
libglib2.0-0 \
|
21 |
+
libsm6 \
|
22 |
+
libxrender1 \
|
23 |
+
libxext6 \
|
24 |
+
ffmpeg \
|
25 |
+
&& rm -rf /var/lib/apt/lists/*
|
26 |
+
|
27 |
+
# Copy requirements first to leverage Docker cache
|
28 |
+
COPY requirements.txt .
|
29 |
+
|
30 |
+
# Install Python dependencies
|
31 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
32 |
+
pip install --no-cache-dir -r requirements.txt && \
|
33 |
+
pip install --no-cache-dir 'tensorflow[and-cuda]' && \
|
34 |
+
pip install --no-cache-dir git+https://github.com/openai/CLIP.git
|
35 |
+
|
36 |
+
# Copy the whole application
|
37 |
+
COPY . .
|
38 |
+
|
39 |
+
# Install and build star-vector if it exists
|
40 |
+
# COPY star-vector/ ./star-vector/
|
41 |
+
# RUN if [ -d "star-vector" ]; then cd star-vector && pip install -e . && cd ..; fi
|
42 |
+
|
43 |
+
# Set environment variables for GPU usage
|
44 |
+
ENV NVIDIA_VISIBLE_DEVICES=all \
|
45 |
+
NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
46 |
+
|
47 |
+
# Expose port for Streamlit
|
48 |
+
EXPOSE 8501
|
49 |
+
|
50 |
+
# Create a healthcheck
|
51 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
|
52 |
+
|
53 |
+
# Set entry point
|
54 |
+
CMD yes | streamlit run app.py --server.port=8501 --server.address=0.0.0.0
|
README.md
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Drawing with LLM π¨
|
2 |
+
|
3 |
+
A Streamlit application that converts text descriptions into SVG graphics using multiple AI models.
|
4 |
+
|
5 |
+
## Overview
|
6 |
+
|
7 |
+
This project allows users to create vector graphics (SVG) from text descriptions using three different approaches:
|
8 |
+
1. **ML Model** - Uses Stable Diffusion to generate images and vtracer to convert them to SVG
|
9 |
+
2. **DL Model** - Uses Stable Diffusion for initial image creation and StarVector for direct image-to-SVG conversion
|
10 |
+
3. **Naive Model** - Uses Phi-4 LLM to directly generate SVG code from text descriptions
|
11 |
+
|
12 |
+
## Features
|
13 |
+
|
14 |
+
- Text-to-SVG generation with three different model approaches
|
15 |
+
- Adjustable parameters for each model type
|
16 |
+
- Real-time SVG preview and code display
|
17 |
+
- SVG download functionality
|
18 |
+
- GPU acceleration for faster generation
|
19 |
+
|
20 |
+
## Requirements
|
21 |
+
|
22 |
+
- Python 3.11+
|
23 |
+
- CUDA-compatible GPU (recommended)
|
24 |
+
- Dependencies listed in `requirements.txt`
|
25 |
+
|
26 |
+
## Installation
|
27 |
+
|
28 |
+
### Using Miniconda (Recommended)
|
29 |
+
|
30 |
+
```bash
|
31 |
+
# Install Miniconda
|
32 |
+
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
|
33 |
+
bash miniconda.sh -b -p $HOME/miniconda
|
34 |
+
echo 'export PATH="$HOME/miniconda/bin:$PATH"' >> ~/.bashrc
|
35 |
+
source ~/.bashrc
|
36 |
+
|
37 |
+
# Create and activate environment
|
38 |
+
conda create -n svg-app python=3.11 -y
|
39 |
+
conda activate svg-app
|
40 |
+
|
41 |
+
# Install star-vector
|
42 |
+
cd star-vector
|
43 |
+
pip install -e .
|
44 |
+
cd ..
|
45 |
+
|
46 |
+
# Install other dependencies
|
47 |
+
pip install -r requirements.txt
|
48 |
+
```
|
49 |
+
|
50 |
+
### Using Docker
|
51 |
+
|
52 |
+
```bash
|
53 |
+
# Build and run with Docker Compose
|
54 |
+
docker-compose up -d
|
55 |
+
```
|
56 |
+
|
57 |
+
## Usage
|
58 |
+
|
59 |
+
Start the Streamlit application:
|
60 |
+
|
61 |
+
```bash
|
62 |
+
streamlit run app.py
|
63 |
+
```
|
64 |
+
|
65 |
+
Or with the yes flag to automatically accept:
|
66 |
+
|
67 |
+
```bash
|
68 |
+
yes | streamlit run app.py
|
69 |
+
```
|
70 |
+
|
71 |
+
The application will be available at http://localhost:8501
|
72 |
+
|
73 |
+
## Models
|
74 |
+
|
75 |
+
### ML Model (vtracer)
|
76 |
+
Uses Stable Diffusion to generate an image from the text prompt, then applies vtracer to convert the raster image to SVG.
|
77 |
+
|
78 |
+
Configurable parameters:
|
79 |
+
- Simplify SVG
|
80 |
+
- Color Precision
|
81 |
+
- Filter Speckle
|
82 |
+
- Path Precision
|
83 |
+
|
84 |
+
### DL Model (starvector)
|
85 |
+
Uses Stable Diffusion for initial image creation followed by StarVector, a specialized model designed to convert images directly to SVG.
|
86 |
+
|
87 |
+
### Naive Model (phi-4)
|
88 |
+
Directly generates SVG code using the Phi-4 language model with specialized prompting.
|
89 |
+
|
90 |
+
Configurable parameters:
|
91 |
+
- Max New Tokens
|
92 |
+
|
93 |
+
## Evaluation Data and Results
|
94 |
+
|
95 |
+
### Data
|
96 |
+
The `data` directory contains synthetic evaluation data created using custom scripts:
|
97 |
+
- The first 15 examples are from the Kaggle competition "Drawing with LLM"
|
98 |
+
- `descriptions.csv` - Text descriptions for generating SVGs
|
99 |
+
- `eval.csv` - Evaluation metrics
|
100 |
+
- `gen_descriptions.py` - Script for generating synthetic descriptions
|
101 |
+
- `gen_vqa.py` - Script for generating visual question answering data
|
102 |
+
- Sample images (`gray_coat.png`, `purple_forest.png`) for reference
|
103 |
+
|
104 |
+
### Results
|
105 |
+
The `results` directory contains evaluation results comparing different models:
|
106 |
+
- Evaluation results for both Naive (Phi-4) and ML (vtracer) models
|
107 |
+
- The DL model (StarVector) was not evaluated as it typically fails on transforming natural images, often returning blank SVGs
|
108 |
+
- Performance visualizations:
|
109 |
+
- `category_radar.png` - Performance comparison across categories
|
110 |
+
- `complexity_performance.png` - Performance relative to prompt complexity
|
111 |
+
- `quality_vs_time.png` - Quality-time tradeoff analysis
|
112 |
+
- `generation_time.png` - Comparison of generation times
|
113 |
+
- `model_comparison.png` - Overall model performance comparison
|
114 |
+
- Generated SVGs and PNGs in respective subdirectories
|
115 |
+
- Detailed results in JSON and CSV formats
|
116 |
+
|
117 |
+
## Project Structure
|
118 |
+
|
119 |
+
```
|
120 |
+
drawing-with-llm/ # Root directory
|
121 |
+
β
|
122 |
+
βββ app.py # Main Streamlit application
|
123 |
+
βββ requirements.txt # Python dependencies
|
124 |
+
βββ Dockerfile # Docker container definition
|
125 |
+
βββ docker-compose.yml # Docker Compose configuration
|
126 |
+
β
|
127 |
+
βββ ml.py # ML model implementation (vtracer approach)
|
128 |
+
βββ dl.py # DL model implementation (StarVector approach)
|
129 |
+
βββ naive.py # Naive model implementation (Phi-4 approach)
|
130 |
+
βββ gen_image.py # Common image generation using Stable Diffusion
|
131 |
+
β
|
132 |
+
βββ eval.py # Evaluation script for model comparison
|
133 |
+
βββ eval_analysis.py # Analysis script for evaluation results
|
134 |
+
βββ metric.py # Metrics implementation for evaluation
|
135 |
+
β
|
136 |
+
βββ data/ # Evaluation data directory
|
137 |
+
β βββ descriptions.csv # Text descriptions for evaluation
|
138 |
+
β βββ eval.csv # Evaluation metrics
|
139 |
+
β βββ gen_descriptions.py # Script for generating synthetic descriptions
|
140 |
+
β βββ gen_vqa.py # Script for generating VQA data
|
141 |
+
β βββ gray_coat.png # Sample image by GPT-4o
|
142 |
+
β βββ purple_forest.png # Sample image by GPT-4o
|
143 |
+
β
|
144 |
+
βββ results/ # Evaluation results directory
|
145 |
+
β βββ category_radar.png # Performance comparison across categories
|
146 |
+
β βββ complexity_performance.png # Performance by prompt complexity
|
147 |
+
β βββ quality_vs_time.png # Quality-time tradeoff analysis
|
148 |
+
β βββ generation_time.png # Comparison of generation times
|
149 |
+
β βββ model_comparison.png # Overall model performance comparison
|
150 |
+
β βββ summary_*.csv # Summary metrics in CSV format
|
151 |
+
β βββ results_*.json # Detailed results in JSON format
|
152 |
+
β βββ svg/ # Generated SVG outputs
|
153 |
+
β βββ png/ # Generated PNG outputs
|
154 |
+
β
|
155 |
+
βββ star-vector/ # StarVector dependency (installed locally)
|
156 |
+
βββ starvector/ # StarVector Python package
|
157 |
+
```
|
158 |
+
|
159 |
+
## License
|
160 |
+
|
161 |
+
[Specify your license information here]
|
162 |
+
|
163 |
+
## Acknowledgments
|
164 |
+
|
165 |
+
This project utilizes several key technologies:
|
166 |
+
- [Stable Diffusion](https://github.com/CompVis/stable-diffusion) for image generation
|
167 |
+
- [StarVector](https://github.com/joanrod/star-vector) for image-to-SVG conversion
|
168 |
+
- [vtracer](https://github.com/visioncortex/vtracer) for raster-to-vector conversion
|
169 |
+
- [Phi-4](https://huggingface.co/microsoft/phi-4) for text-to-SVG generation
|
170 |
+
- [Streamlit](https://streamlit.io/) for the web interface
|
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
import base64
|
3 |
from ml import MLModel
|
4 |
-
from
|
|
|
5 |
|
6 |
st.set_page_config(page_title="Drawing with LLM", page_icon="π¨", layout="wide")
|
7 |
|
@@ -10,18 +11,38 @@ def load_ml_model():
|
|
10 |
return MLModel(device="cuda" if st.session_state.get("use_gpu", True) else "cpu")
|
11 |
|
12 |
@st.cache_resource
|
13 |
-
def
|
14 |
-
return
|
15 |
|
16 |
def render_svg(svg_content):
|
17 |
b64 = base64.b64encode(svg_content.encode("utf-8")).decode("utf-8")
|
18 |
return f'<img src="data:image/svg+xml;base64,{b64}" width="100%" height="auto"/>'
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
st.title("Drawing with LLM π¨")
|
21 |
|
|
|
|
|
|
|
|
|
22 |
with st.sidebar:
|
23 |
st.header("Settings")
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
use_gpu = st.checkbox("Use GPU", value=True)
|
26 |
st.session_state["use_gpu"] = use_gpu
|
27 |
|
@@ -31,6 +52,9 @@ with st.sidebar:
|
|
31 |
color_precision = st.slider("Color Precision", 1, 10, 6)
|
32 |
filter_speckle = st.slider("Filter Speckle", 0, 10, 4)
|
33 |
path_precision = st.slider("Path Precision", 1, 10, 8)
|
|
|
|
|
|
|
34 |
|
35 |
prompt = st.text_area("Enter your description", "A cat sitting on a windowsill at sunset")
|
36 |
|
@@ -45,9 +69,9 @@ if st.button("Generate SVG"):
|
|
45 |
filter_speckle=filter_speckle,
|
46 |
path_precision=path_precision
|
47 |
)
|
48 |
-
else:
|
49 |
-
model =
|
50 |
-
svg_content = model.predict(prompt)
|
51 |
|
52 |
col1, col2 = st.columns(2)
|
53 |
|
|
|
1 |
import streamlit as st
|
2 |
import base64
|
3 |
from ml import MLModel
|
4 |
+
from naive import NaiveModel
|
5 |
+
import torch
|
6 |
|
7 |
st.set_page_config(page_title="Drawing with LLM", page_icon="π¨", layout="wide")
|
8 |
|
|
|
11 |
return MLModel(device="cuda" if st.session_state.get("use_gpu", True) else "cpu")
|
12 |
|
13 |
@st.cache_resource
|
14 |
+
def load_naive_model():
|
15 |
+
return NaiveModel(device="cuda" if st.session_state.get("use_gpu", True) else "cpu")
|
16 |
|
17 |
def render_svg(svg_content):
|
18 |
b64 = base64.b64encode(svg_content.encode("utf-8")).decode("utf-8")
|
19 |
return f'<img src="data:image/svg+xml;base64,{b64}" width="100%" height="auto"/>'
|
20 |
|
21 |
+
def clear_gpu_memory():
|
22 |
+
if torch.cuda.is_available():
|
23 |
+
torch.cuda.empty_cache()
|
24 |
+
torch.cuda.ipc_collect()
|
25 |
+
|
26 |
st.title("Drawing with LLM π¨")
|
27 |
|
28 |
+
# Initialize session state for model type if not already set
|
29 |
+
if "current_model_type" not in st.session_state:
|
30 |
+
st.session_state["current_model_type"] = None
|
31 |
+
|
32 |
with st.sidebar:
|
33 |
st.header("Settings")
|
34 |
+
previous_model_type = st.session_state.get("current_model_type")
|
35 |
+
model_type = st.selectbox("Model Type", ["ML Model (vtracer)", "Naive Model (phi-4)"])
|
36 |
+
|
37 |
+
# Check if model type has changed
|
38 |
+
if previous_model_type is not None and previous_model_type != model_type:
|
39 |
+
st.cache_resource.clear()
|
40 |
+
clear_gpu_memory()
|
41 |
+
st.success(f"Cleared VRAM after switching from {previous_model_type} to {model_type}")
|
42 |
+
|
43 |
+
# Update current model type in session state
|
44 |
+
st.session_state["current_model_type"] = model_type
|
45 |
+
|
46 |
use_gpu = st.checkbox("Use GPU", value=True)
|
47 |
st.session_state["use_gpu"] = use_gpu
|
48 |
|
|
|
52 |
color_precision = st.slider("Color Precision", 1, 10, 6)
|
53 |
filter_speckle = st.slider("Filter Speckle", 0, 10, 4)
|
54 |
path_precision = st.slider("Path Precision", 1, 10, 8)
|
55 |
+
elif model_type == "Naive Model (phi-4)":
|
56 |
+
st.subheader("Naive Model Settings")
|
57 |
+
max_new_tokens = st.slider("Max New Tokens", 256, 1024, 512)
|
58 |
|
59 |
prompt = st.text_area("Enter your description", "A cat sitting on a windowsill at sunset")
|
60 |
|
|
|
69 |
filter_speckle=filter_speckle,
|
70 |
path_precision=path_precision
|
71 |
)
|
72 |
+
else: # Naive Model
|
73 |
+
model = load_naive_model()
|
74 |
+
svg_content = model.predict(prompt, max_new_tokens=max_new_tokens)
|
75 |
|
76 |
col1, col2 = st.columns(2)
|
77 |
|
docker-compose.yml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
app:
|
5 |
+
build:
|
6 |
+
context: .
|
7 |
+
dockerfile: Dockerfile
|
8 |
+
restart: unless-stopped
|
9 |
+
ports:
|
10 |
+
- "8501:8501"
|
11 |
+
volumes:
|
12 |
+
- ./.env:/app/.env
|
13 |
+
environment:
|
14 |
+
- NVIDIA_VISIBLE_DEVICES=all
|
15 |
+
deploy:
|
16 |
+
resources:
|
17 |
+
reservations:
|
18 |
+
devices:
|
19 |
+
- driver: nvidia
|
20 |
+
count: 1
|
21 |
+
capabilities: [gpu]
|
requirements.txt
CHANGED
@@ -25,6 +25,8 @@ vtracer==0.6.11
|
|
25 |
deepspeed==0.16.7
|
26 |
torch==2.5.1
|
27 |
torchvision==0.20.1
|
|
|
|
|
28 |
|
29 |
# pip install 'tensorflow[and-cuda]'
|
30 |
# pip install git+https://github.com/openai/CLIP.git
|
|
|
25 |
deepspeed==0.16.7
|
26 |
torch==2.5.1
|
27 |
torchvision==0.20.1
|
28 |
+
streamlit==1.44.1
|
29 |
+
lxml==5.3.2
|
30 |
|
31 |
# pip install 'tensorflow[and-cuda]'
|
32 |
# pip install git+https://github.com/openai/CLIP.git
|