Spaces:

amaye15
/

DuckDB-UI

Running

App Files Files Community

amaye15 commited on Apr 9

Commit

5dc86cf

1 Parent(s): b138bfa

deploy

Browse files

Files changed (7) hide show

.dockerignore +6 -36
Dockerfile +22 -17
README.md +51 -9
database_api.py +0 -426
main.py +350 -357
requirements.txt +5 -10
test_api.py +0 -246

.dockerignore CHANGED Viewed

@@ -1,45 +1,15 @@
-# .dockerignore
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 .Python
 env/
-.env
-.venv/
 venv/
-ENV/
-env.bak/
-venv.bak/
-.pytest_cache/
-.mypy_cache/
-.nox/
-.tox/
-.coverage
-.coverage.*
-coverage.xml
-htmlcov/
-.hypothesis/
 *.db
 *.db.wal
-*.log
-*.sqlite
-*.sqlite3
-# Ignore specific generated files if needed
-api_database.db
-api_database.db.wal
-my_duckdb_api_db.db
-my_duckdb_api_db.db.wal
-exported_db/
-duckdb_api_exports/ # Don't copy local temp exports
-# OS-specific files
-.DS_Store
-Thumbs.db
-# IDE files
-.idea/
-.vscode/

 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 .Python
 env/
 venv/
+.env
+.git
+.gitignore
 *.db
 *.db.wal
+data/*.db
+data/*.db.wal
+# Add other files/directories to ignore if needed

Dockerfile CHANGED Viewed

@@ -1,33 +1,38 @@
-# Dockerfile
 FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE 1
 ENV PYTHONUNBUFFERED 1
 # Create a non-root user and group
-ARG UID=1000
-ARG GID=1000
-RUN groupadd -g ${GID} --system appgroup && useradd -u ${UID} -g appgroup --system appuser
 WORKDIR /app
-# Create data directory and set permissions
-RUN mkdir /app/data && chown appuser:appgroup /app/data
-# Copy requirements and install as root first (some packages might need it)
-COPY requirements.txt .
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt
-# Copy application code and set permissions
-COPY . .
-RUN chown -R appuser:appgroup /app
 # Switch to the non-root user
 USER appuser
-EXPOSE 8000
-# Run uvicorn as the non-root user
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

+# Use an official Python runtime as a parent image
 FROM python:3.10-slim
+# Set environment variables
 ENV PYTHONDONTWRITEBYTECODE 1
 ENV PYTHONUNBUFFERED 1
+# Set the DuckDB path inside the container
+ENV DUCKDB_PATH /app/data/mydatabase.db
 # Create a non-root user and group
+RUN adduser --disabled-password --gecos "" appuser
+# Set the working directory in the container
 WORKDIR /app
+# Copy the requirements file into the container at /app
+COPY requirements.txt /app/
+# Install any needed packages specified in requirements.txt
+# Use --no-cache-dir to reduce image size
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the current directory contents into the container at /app
+COPY . /app/
+# Create the data directory and set permissions
+# Run these steps as root before switching user
+RUN mkdir -p /app/data && chown -R appuser:appuser /app
 # Switch to the non-root user
 USER appuser
+# Make port 7860 available to the world outside this container (Hugging Face default)
+EXPOSE 7860
+# Run main.py when the container launches using Uvicorn
+# Use 0.0.0.0 to make it accessible externally
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,13 +1,55 @@
 ---
-title: DuckDB UI
-emoji: ⚡
-colorFrom: indigo
-colorTo: yellow
 sdk: docker
-pinned: false
-license: mit
-short_description: DuckDB Hosting with UI & FastAPI 4 SQL Calls & DB Downloads
-port: 8000
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: DuckDB FastAPI API
+emoji: 🦆
+colorFrom: blue
+colorTo: green
 sdk: docker
+app_port: 7860
+# Optional: specify Python version for clarity, though the Dockerfile defines it
+# python_version: 3.10
 ---
+# DuckDB FastAPI API
+This Space provides a simple API built with FastAPI to interact with a DuckDB database.
+**Features:**
+*   Create tables
+*   Read table data (with limit/offset)
+*   Insert rows into tables
+*   Update rows based on a condition
+*   Delete rows based on a condition
+*   Download a table as CSV
+*   Download the entire database file
+*   Health check endpoint
+**API Documentation:**
+The API documentation (powered by Swagger UI) is available at the `/docs` endpoint of your Space URL.
+**Example Usage (using curl):**
+```bash
+# Health Check
+curl https://[your-space-subdomain].hf.space/health
+# Create a table
+curl -X POST "https://[your-space-subdomain].hf.space/tables/my_data" \
+     -H "Content-Type: application/json" \
+     -d '{"columns": [{"name": "id", "type": "INTEGER"}, {"name": "value", "type": "VARCHAR"}]}'
+# Insert rows
+curl -X POST "https://[your-space-subdomain].hf.space/tables/my_data/rows" \
+     -H "Content-Type: application/json" \
+     -d '{"rows": [{"id": 1, "value": "apple"}, {"id": 2, "value": "banana"}]}'
+# Read table data
+curl https://[your-space-subdomain].hf.space/tables/my_data
+# Download table as CSV
+curl -o my_data.csv https://[your-space-subdomain].hf.space/download/table/my_data
+# Download database file
+curl -o downloaded_db.db https://[your-space-subdomain].hf.space/download/database

database_api.py DELETED Viewed

@@ -1,426 +0,0 @@
-# database_api.py
-import duckdb
-import pandas as pd
-import pyarrow as pa
-import pyarrow.ipc
-from pathlib import Path
-import tempfile
-import os
-import shutil
-from typing import Optional, List, Dict, Any, Union, Iterator, Generator, Tuple
-# No need for pybind11 import here anymore
-# --- Custom Exceptions ---
-class DatabaseAPIError(Exception):
-    """Base exception for our custom API."""
-    pass
-class QueryError(DatabaseAPIError):
-    """Exception raised for errors during query execution."""
-    pass
-# --- Helper function to format COPY options ---
-def _format_copy_options(options: Optional[Dict[str, Any]]) -> str:
-    if not options:
-        return ""
-    opts_parts = []
-    for k, v in options.items():
-        key_upper = k.upper()
-        if isinstance(v, bool):
-            value_repr = str(v).upper()
-        elif isinstance(v, (int, float)):
-             value_repr = str(v)
-        elif isinstance(v, str):
-             escaped_v = v.replace("'", "''")
-             value_repr = f"'{escaped_v}'"
-        else:
-             value_repr = repr(v)
-        opts_parts.append(f"{key_upper} {value_repr}")
-    opts_str = ", ".join(opts_parts)
-    return f"WITH ({opts_str})"
-# --- Main DatabaseAPI Class ---
-class DatabaseAPI:
-    def __init__(self,
-                 db_path: Union[str, Path] = ":memory:",
-                 read_only: bool = False,
-                 config: Optional[Dict[str, str]] = None):
-        self._db_path = str(db_path)
-        self._config = config or {}
-        self._read_only = read_only
-        self._conn: Optional[duckdb.DuckDBPyConnection] = None
-        try:
-            self._conn = duckdb.connect(
-                database=self._db_path,
-                read_only=self._read_only,
-                config=self._config
-            )
-            print(f"Connected to DuckDB database at '{self._db_path}'")
-        except duckdb.Error as e:
-            print(f"Failed to connect to DuckDB: {e}")
-            raise DatabaseAPIError(f"Failed to connect to DuckDB: {e}") from e
-    def _ensure_connection(self):
-        if self._conn is None:
-            raise DatabaseAPIError("Database connection is not established or has been closed.")
-        try:
-             self._conn.execute("SELECT 1", [])
-        except (duckdb.ConnectionException, RuntimeError) as e:
-             if "Connection has already been closed" in str(e) or "connection closed" in str(e).lower():
-                 self._conn = None
-                 raise DatabaseAPIError("Database connection is closed.") from e
-             else:
-                 raise DatabaseAPIError(f"Database connection error: {e}") from e
-    # --- Basic Query Methods --- (Keep as before)
-    def execute_sql(self, sql: str, parameters: Optional[List[Any]] = None) -> None:
-        self._ensure_connection()
-        print(f"Executing SQL: {sql}")
-        try:
-            self._conn.execute(sql, parameters)
-        except duckdb.Error as e:
-            print(f"Error executing SQL: {e}")
-            raise QueryError(f"Error executing SQL: {e}") from e
-    def query_sql(self, sql: str, parameters: Optional[List[Any]] = None) -> duckdb.DuckDBPyRelation:
-        self._ensure_connection()
-        print(f"Querying SQL: {sql}")
-        try:
-            return self._conn.sql(sql, params=parameters)
-        except duckdb.Error as e:
-            print(f"Error querying SQL: {e}")
-            raise QueryError(f"Error querying SQL: {e}") from e
-    def query_df(self, sql: str, parameters: Optional[List[Any]] = None) -> pd.DataFrame:
-        self._ensure_connection()
-        print(f"Querying SQL to DataFrame: {sql}")
-        try:
-            return self._conn.execute(sql, parameters).df()
-        except ImportError:
-             print("Pandas library is required for DataFrame operations.")
-             raise
-        except duckdb.Error as e:
-            print(f"Error querying SQL to DataFrame: {e}")
-            raise QueryError(f"Error querying SQL to DataFrame: {e}") from e
-    def query_arrow(self, sql: str, parameters: Optional[List[Any]] = None) -> pa.Table:
-        self._ensure_connection()
-        print(f"Querying SQL to Arrow Table: {sql}")
-        try:
-            return self._conn.execute(sql, parameters).arrow()
-        except ImportError:
-             print("PyArrow library is required for Arrow operations.")
-             raise
-        except duckdb.Error as e:
-            print(f"Error querying SQL to Arrow Table: {e}")
-            raise QueryError(f"Error querying SQL to Arrow Table: {e}") from e
-    def query_fetchall(self, sql: str, parameters: Optional[List[Any]] = None) -> List[Tuple[Any, ...]]:
-        self._ensure_connection()
-        print(f"Querying SQL and fetching all: {sql}")
-        try:
-            return self._conn.execute(sql, parameters).fetchall()
-        except duckdb.Error as e:
-            print(f"Error querying SQL: {e}")
-            raise QueryError(f"Error querying SQL: {e}") from e
-    def query_fetchone(self, sql: str, parameters: Optional[List[Any]] = None) -> Optional[Tuple[Any, ...]]:
-        self._ensure_connection()
-        print(f"Querying SQL and fetching one: {sql}")
-        try:
-            return self._conn.execute(sql, parameters).fetchone()
-        except duckdb.Error as e:
-            print(f"Error querying SQL: {e}")
-            raise QueryError(f"Error querying SQL: {e}") from e
-    # --- Registration Methods --- (Keep as before)
-    def register_df(self, name: str, df: pd.DataFrame):
-        self._ensure_connection()
-        print(f"Registering DataFrame as '{name}'")
-        try:
-            self._conn.register(name, df)
-        except duckdb.Error as e:
-            print(f"Error registering DataFrame: {e}")
-            raise QueryError(f"Error registering DataFrame: {e}") from e
-    def unregister_df(self, name: str):
-        self._ensure_connection()
-        print(f"Unregistering virtual table '{name}'")
-        try:
-            self._conn.unregister(name)
-        except duckdb.Error as e:
-            if "not found" in str(e).lower():
-                 print(f"Warning: Virtual table '{name}' not found for unregistering.")
-            else:
-                print(f"Error unregistering virtual table: {e}")
-                raise QueryError(f"Error unregistering virtual table: {e}") from e
-    # --- Extension Methods --- (Keep as before)
-    def install_extension(self, extension_name: str, force_install: bool = False):
-        self._ensure_connection()
-        print(f"Installing extension: {extension_name}")
-        try:
-            self._conn.install_extension(extension_name, force_install=force_install)
-        except duckdb.Error as e:
-            print(f"Error installing extension '{extension_name}': {e}")
-            raise DatabaseAPIError(f"Error installing extension '{extension_name}': {e}") from e
-    def load_extension(self, extension_name: str):
-        self._ensure_connection()
-        print(f"Loading extension: {extension_name}")
-        try:
-            self._conn.load_extension(extension_name)
-        # Catch specific DuckDB errors that indicate failure but aren't API errors
-        except (duckdb.IOException, duckdb.CatalogException) as load_err:
-             print(f"Error loading extension '{extension_name}': {load_err}")
-             raise QueryError(f"Error loading extension '{extension_name}': {load_err}") from load_err
-        except duckdb.Error as e: # Catch other DuckDB errors
-            print(f"Unexpected DuckDB error loading extension '{extension_name}': {e}")
-            raise DatabaseAPIError(f"Unexpected DuckDB error loading extension '{extension_name}': {e}") from e
-    # --- Export Methods ---
-    def export_database(self, directory_path: Union[str, Path]):
-        self._ensure_connection()
-        path_str = str(directory_path)
-        if not os.path.isdir(path_str):
-             try:
-                 os.makedirs(path_str)
-                 print(f"Created export directory: {path_str}")
-             except OSError as e:
-                 raise DatabaseAPIError(f"Could not create export directory '{path_str}': {e}") from e
-        print(f"Exporting database to directory: {path_str}")
-        sql = f"EXPORT DATABASE '{path_str}' (FORMAT CSV)"
-        try:
-            self._conn.execute(sql)
-            print("Database export completed successfully.")
-        except duckdb.Error as e:
-            print(f"Error exporting database: {e}")
-            raise DatabaseAPIError(f"Error exporting database: {e}") from e
-    def _export_data(self,
-                      source: str,
-                      output_path: Union[str, Path],
-                      file_format: str,
-                      options: Optional[Dict[str, Any]] = None):
-        self._ensure_connection()
-        path_str = str(output_path)
-        options_str = _format_copy_options(options)
-        source_safe = source.strip()
-        # --- MODIFIED: Use f-string quoting instead of quote_identifier ---
-        if ' ' in source_safe or source_safe.upper().startswith(('SELECT', 'WITH', 'VALUES')):
-            copy_source = f"({source})"
-        else:
-             # Simple quoting, might need refinement for complex identifiers
-             copy_source = f'"{source_safe}"'
-        # --- END MODIFICATION ---
-        sql = f"COPY {copy_source} TO '{path_str}' {options_str}"
-        print(f"Exporting data to {path_str} (Format: {file_format}) with options: {options or {}}")
-        try:
-            self._conn.execute(sql)
-            print("Data export completed successfully.")
-        except duckdb.Error as e:
-            print(f"Error exporting data: {e}")
-            raise QueryError(f"Error exporting data to {file_format}: {e}") from e
-    # --- Keep export_data_to_csv, parquet, json, jsonl as before ---
-    def export_data_to_csv(self,
-                           source: str,
-                           output_path: Union[str, Path],
-                           options: Optional[Dict[str, Any]] = None):
-        csv_options = options.copy() if options else {}
-        csv_options['FORMAT'] = 'CSV'
-        if 'HEADER' not in {k.upper() for k in csv_options}:
-            csv_options['HEADER'] = True
-        self._export_data(source, output_path, "CSV", csv_options)
-    def export_data_to_parquet(self,
-                             source: str,
-                             output_path: Union[str, Path],
-                             options: Optional[Dict[str, Any]] = None):
-        parquet_options = options.copy() if options else {}
-        parquet_options['FORMAT'] = 'PARQUET'
-        self._export_data(source, output_path, "Parquet", parquet_options)
-    def export_data_to_json(self,
-                            source: str,
-                            output_path: Union[str, Path],
-                            array_format: bool = True,
-                            options: Optional[Dict[str, Any]] = None):
-        json_options = options.copy() if options else {}
-        json_options['FORMAT'] = 'JSON'
-        if 'ARRAY' not in {k.upper() for k in json_options}:
-             json_options['ARRAY'] = array_format
-        self._export_data(source, output_path, "JSON", json_options)
-    def export_data_to_jsonl(self,
-                             source: str,
-                             output_path: Union[str, Path],
-                             options: Optional[Dict[str, Any]] = None):
-        self.export_data_to_json(source, output_path, array_format=False, options=options)
-    # # --- Streaming Read Methods --- (Keep as before)
-    # def stream_query_arrow(self,
-    #                        sql: str,
-    #                        parameters: Optional[List[Any]] = None,
-    #                        batch_size: int = 1000000
-    #                       ) -> Iterator[pa.RecordBatch]:
-    #     self._ensure_connection()
-    #     print(f"Streaming Arrow query (batch size {batch_size}): {sql}")
-    #     try:
-    #         result_set = self._conn.execute(sql, parameters)
-    #         while True:
-    #             batch = result_set.fetch_record_batch(batch_size)
-    #             if not batch:
-    #                 break
-    #             yield batch
-    #     except ImportError:
-    #          print("PyArrow library is required for Arrow streaming.")
-    #          raise
-    #     except duckdb.Error as e:
-    #         print(f"Error streaming Arrow query: {e}")
-    #         raise QueryError(f"Error streaming Arrow query: {e}") from e
-    def stream_query_df(self,
-                        sql: str,
-                        parameters: Optional[List[Any]] = None,
-                        vectors_per_chunk: int = 1
-                       ) -> Iterator[pd.DataFrame]:
-        self._ensure_connection()
-        print(f"Streaming DataFrame query (vectors per chunk {vectors_per_chunk}): {sql}")
-        try:
-            result_set = self._conn.execute(sql, parameters)
-            while True:
-                chunk_df = result_set.fetch_df_chunk(vectors_per_chunk)
-                if chunk_df.empty:
-                    break
-                yield chunk_df
-        except ImportError:
-             print("Pandas library is required for DataFrame streaming.")
-             raise
-        except duckdb.Error as e:
-            print(f"Error streaming DataFrame query: {e}")
-            raise QueryError(f"Error streaming DataFrame query: {e}") from e
-    def stream_query_arrow(self,
-                           sql: str,
-                           parameters: Optional[List[Any]] = None,
-                           batch_size: int = 1000000
-                          ) -> Iterator[pa.RecordBatch]:
-        """
-        Executes a SQL query and streams the results as Arrow RecordBatches.
-        Useful for processing large results iteratively in Python without
-        loading the entire result set into memory.
-        Args:
-            sql: The SQL query to execute.
-            parameters: Optional list of parameters for prepared statements.
-            batch_size: The approximate number of rows per Arrow RecordBatch.
-        Yields:
-            pyarrow.RecordBatch: Chunks of the result set.
-        Raises:
-            QueryError: If the query execution or fetching fails.
-            ImportError: If pyarrow is not installed.
-        """
-        self._ensure_connection()
-        print(f"Streaming Arrow query (batch size {batch_size}): {sql}")
-        record_batch_reader = None
-        try:
-            # Use execute() to get a result object that supports streaming fetch
-            result_set = self._conn.execute(sql, parameters)
-            # --- MODIFICATION: Get the reader first ---
-            record_batch_reader = result_set.fetch_record_batch(batch_size)
-            # --- Iterate through the reader ---
-            for batch in record_batch_reader:
-                yield batch
-            # --- END MODIFICATION ---
-        except ImportError:
-             print("PyArrow library is required for Arrow streaming.")
-             raise
-        except duckdb.Error as e:
-            print(f"Error streaming Arrow query: {e}")
-            raise QueryError(f"Error streaming Arrow query: {e}") from e
-        finally:
-            # Clean up the reader if it was created
-            if record_batch_reader is not None:
-                # PyArrow readers don't have an explicit close, relying on GC.
-                # Forcing cleanup might involve ensuring references are dropped.
-                del record_batch_reader # Help GC potentially
-            # The original result_set from execute() might also hold resources,
-            # although fetch_record_batch typically consumes it.
-            # Explicitly closing it if possible, or letting it go out of scope.
-            if 'result_set' in locals() and result_set:
-                 try:
-                     # DuckDBPyResult doesn't have an explicit close, relies on __del__
-                     del result_set
-                 except Exception:
-                     pass # Best effort
-    # --- Resource Management Methods --- (Keep as before)
-    def close(self):
-        if self._conn:
-            conn_id = id(self._conn)
-            print(f"Closing connection to '{self._db_path}' (ID: {conn_id})")
-            try:
-                self._conn.close()
-            except duckdb.Error as e:
-                print(f"Error closing DuckDB connection (ID: {conn_id}): {e}")
-            finally:
-                self._conn = None
-        else:
-            print("Connection already closed or never opened.")
-    def __enter__(self):
-        self._ensure_connection()
-        return self
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.close()
-    def __del__(self):
-        if self._conn:
-            print(f"ResourceWarning: DatabaseAPI for '{self._db_path}' was not explicitly closed. Closing now in __del__.")
-            try:
-                 self.close()
-            except Exception as e:
-                 print(f"Exception during implicit close in __del__: {e}")
-                 self._conn = None
-# --- Example Usage --- (Keep as before)
-if __name__ == "__main__":
-    # ... (rest of the example usage code from previous response) ...
-    temp_dir_obj = tempfile.TemporaryDirectory()
-    temp_dir = temp_dir_obj.name
-    print(f"\n--- Using temporary directory: {temp_dir} ---")
-    db_file = Path(temp_dir) / "export_test.db"
-    try:
-        with DatabaseAPI(db_path=db_file) as db_api:
-            db_api.execute_sql("CREATE OR REPLACE TABLE products(id INTEGER, name VARCHAR, price DECIMAL(8,2))")
-            db_api.execute_sql("INSERT INTO products VALUES (101, 'Gadget', 19.99), (102, 'Widget', 35.00), (103, 'Thing''amajig', 9.50)")
-            db_api.execute_sql("CREATE OR REPLACE TABLE sales(product_id INTEGER, sale_date DATE, quantity INTEGER)")
-            db_api.execute_sql("INSERT INTO sales VALUES (101, '2023-10-26', 5), (102, '2023-10-26', 2), (101, '2023-10-27', 3)")
-            export_dir = Path(temp_dir) / "exported_db"
-            db_api.export_database(export_dir)
-            csv_path = Path(temp_dir) / "products_export.csv"
-            db_api.export_data_to_csv('products', csv_path, options={'HEADER': True})
-            parquet_path = Path(temp_dir) / "high_value_products.parquet"
-            db_api.export_data_to_parquet("SELECT * FROM products WHERE price > 20", parquet_path, options={'COMPRESSION': 'SNAPPY'})
-            json_path = Path(temp_dir) / "sales.json"
-            db_api.export_data_to_json("SELECT * FROM sales", json_path, array_format=True)
-            jsonl_path = Path(temp_dir) / "sales.jsonl"
-            db_api.export_data_to_jsonl("SELECT * FROM sales ORDER BY sale_date", jsonl_path)
-        with DatabaseAPI() as db_api:
-            db_api.execute_sql("CREATE TABLE large_range AS SELECT range AS id, range % 100 AS category FROM range(1000)")
-            for batch in db_api.stream_query_arrow("SELECT * FROM large_range", batch_size=200):
-                pass
-            for df_chunk in db_api.stream_query_df("SELECT * FROM large_range", vectors_per_chunk=1):
-                pass
-    finally:
-        temp_dir_obj.cleanup()
-        print(f"\n--- Cleaned up temporary directory: {temp_dir} ---")

main.py CHANGED Viewed

@@ -1,389 +1,382 @@
-# main.py
 import duckdb
-import pandas as pd
-import pyarrow as pa
-import pyarrow.ipc
-from pathlib import Path
-import tempfile
 import os
-import shutil
-from typing import Optional, List, Dict, Any, Union, Iterator, Generator, Tuple
-from fastapi import FastAPI, HTTPException, Body, Query, BackgroundTasks, Depends
-from fastapi.responses import StreamingResponse, FileResponse
 from pydantic import BaseModel, Field
-from database_api import DatabaseAPI, DatabaseAPIError, QueryError
-# --- Configuration --- (Keep as before)
-DUCKDB_API_DB_PATH = os.getenv("DUCKDB_API_DB_PATH", "api_database.db")
-DUCKDB_API_READ_ONLY = os.getenv("DUCKDB_API_READ_ONLY", False)
-DUCKDB_API_CONFIG = {}
-TEMP_EXPORT_DIR = Path(tempfile.gettempdir()) / "duckdb_api_exports"
-TEMP_EXPORT_DIR.mkdir(exist_ok=True)
-print(f"Using temporary directory for exports: {TEMP_EXPORT_DIR}")
-# --- Pydantic Models --- (Keep as before)
-class StatusResponse(BaseModel):
-    status: str
-    message: Optional[str] = None
-class ExecuteRequest(BaseModel):
-    sql: str
-    parameters: Optional[List[Any]] = None
-class QueryRequest(BaseModel):
-    sql: str
-    parameters: Optional[List[Any]] = None
-class DataFrameResponse(BaseModel):
-    columns: List[str]
-    records: List[Dict[str, Any]]
-class InstallRequest(BaseModel):
-    extension_name: str
-    force_install: bool = False
-class LoadRequest(BaseModel):
-    extension_name: str
-class ExportDataRequest(BaseModel):
-    source: str = Field(..., description="Table name or SQL SELECT query to export")
-    options: Optional[Dict[str, Any]] = Field(None, description="Format-specific export options")
-# --- FastAPI Application --- (Keep as before)
 app = FastAPI(
-    title="DuckDB API Wrapper",
-    description="Exposes DuckDB functionalities via a RESTful API.",
-    version="0.2.1" # Incremented version
 )
-# --- Global DatabaseAPI Instance & Lifecycle --- (Keep as before)
-db_api_instance: Optional[DatabaseAPI] = None
-@app.on_event("startup")
-async def startup_event():
-    global db_api_instance
-    print("Starting up DuckDB API...")
-    try:
-        db_api_instance = DatabaseAPI(db_path=DUCKDB_API_DB_PATH, read_only=DUCKDB_API_READ_ONLY, config=DUCKDB_API_CONFIG)
-    except DatabaseAPIError as e:
-        print(f"FATAL: Could not initialize DatabaseAPI on startup: {e}")
-        db_api_instance = None
-@app.on_event("shutdown")
-def shutdown_event():
-    print("Shutting down DuckDB API...")
-    if db_api_instance:
-        db_api_instance.close()
-# --- Dependency to get the DB API instance --- (Keep as before)
-def get_db_api() -> DatabaseAPI:
-    if db_api_instance is None:
-         raise HTTPException(status_code=503, detail="Database service is unavailable (failed to initialize).")
     try:
-        db_api_instance._ensure_connection()
-        return db_api_instance
-    except DatabaseAPIError as e:
-         raise HTTPException(status_code=503, detail=f"Database service error: {e}")
-# --- API Endpoints ---
-# --- CRUD and Querying Endpoints (Keep as before) ---
-@app.post("/execute", response_model=StatusResponse, tags=["CRUD"])
-async def execute_statement(request: ExecuteRequest, api: DatabaseAPI = Depends(get_db_api)):
-    try:
-        api.execute_sql(request.sql, request.parameters)
-        return {"status": "success", "message": None} # Explicitly return None for message
-    except QueryError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/query/fetchall", response_model=List[tuple], tags=["Querying"])
-async def query_fetchall_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
-    try:
-        return api.query_fetchall(request.sql, request.parameters)
-    except QueryError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/query/dataframe", response_model=DataFrameResponse, tags=["Querying"])
-async def query_dataframe_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
-    try:
-        df = api.query_df(request.sql, request.parameters)
-        df_serializable = df.replace({pd.NA: None, pd.NaT: None, float('nan'): None})
-        return {"columns": df_serializable.columns.tolist(), "records": df_serializable.to_dict(orient='records')}
-    except (QueryError, ImportError) as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# --- Streaming Endpoints ---
-# --- CORRECTED _stream_arrow_ipc ---
-async def _stream_arrow_ipc(record_batch_iterator: Iterator[pa.RecordBatch]) -> Generator[bytes, None, None]:
-    """Helper generator to stream Arrow IPC Stream format."""
-    writer = None
-    sink = pa.BufferOutputStream() # Create sink once
-    try:
-        first_batch = next(record_batch_iterator)
-        writer = pa.ipc.new_stream(sink, first_batch.schema)
-        writer.write_batch(first_batch)
-        # Do NOT yield yet, wait for potential subsequent batches or closure
-        for batch in record_batch_iterator:
-            # Write subsequent batches to the SAME writer
-            writer.write_batch(batch)
-    except StopIteration:
-        # Handles the case where the iterator was empty initially
-        if writer is None: # No batches were ever processed
-             print("Warning: Arrow stream iterator was empty.")
-             # Yield empty bytes or handle as needed, depends on client expectation
-             # yield b'' # Option 1: empty bytes
-             return # Option 2: Just finish generator
-    except Exception as e:
-        print(f"Error during Arrow streaming generator: {e}")
-        # Consider how to signal error downstream if possible
     finally:
-        if writer:
-             try:
-                  print("Closing Arrow IPC Stream Writer...")
-                  writer.close() # Close the writer to finalize the stream in the sink
-                  print("Writer closed.")
-             except Exception as close_e:
-                  print(f"Error closing Arrow writer: {close_e}")
-        if sink:
-             try:
-                  buffer = sink.getvalue()
-                  if buffer:
-                      print(f"Yielding final Arrow buffer (size: {len(buffer.to_pybytes())})...")
-                      yield buffer.to_pybytes() # Yield the complete stream buffer
-                  else:
-                      print("Arrow sink buffer was empty after closing writer.")
-                  sink.close()
-             except Exception as close_e:
-                  print(f"Error closing or getting value from Arrow sink: {close_e}")
-# --- END CORRECTION ---
-@app.post("/query/stream/arrow", tags=["Streaming"])
-async def query_stream_arrow_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
-    """Executes a SQL query and streams results as Arrow IPC Stream format."""
-    try:
-        iterator = api.stream_query_arrow(request.sql, request.parameters)
-        return StreamingResponse(
-            _stream_arrow_ipc(iterator),
-            media_type="application/vnd.apache.arrow.stream"
-        )
-    except (QueryError, ImportError) as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# --- _stream_jsonl (Keep as before) ---
-async def _stream_jsonl(dataframe_iterator: Iterator[pd.DataFrame]) -> Generator[bytes, None, None]:
-    try:
-        for df_chunk in dataframe_iterator:
-            df_serializable = df_chunk.replace({pd.NA: None, pd.NaT: None, float('nan'): None})
-            jsonl_string = df_serializable.to_json(orient='records', lines=True, date_format='iso')
-            if jsonl_string:
-                 # pandas>=1.5.0 adds newline by default
-                 if not jsonl_string.endswith('\n'):
-                     jsonl_string += '\n'
-                 yield jsonl_string.encode('utf-8')
-    except Exception as e:
-        print(f"Error during JSONL streaming generator: {e}")
-@app.post("/query/stream/jsonl", tags=["Streaming"])
-async def query_stream_jsonl_endpoint(request: QueryRequest, api: DatabaseAPI = Depends(get_db_api)):
-    """Executes a SQL query and streams results as JSON Lines (JSONL)."""
-    try:
-        iterator = api.stream_query_df(request.sql, request.parameters)
-        return StreamingResponse(_stream_jsonl(iterator), media_type="application/jsonl")
-    except (QueryError, ImportError) as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# --- Download / Export Endpoints (Keep as before, uses corrected _export_data) ---
-def _cleanup_temp_file(path: Union[str, Path]):
-    try:
-        if Path(path).is_file():
-             os.remove(path)
-             print(f"Cleaned up temporary file: {path}")
-    except OSError as e:
-        print(f"Error cleaning up temporary file {path}: {e}")
-async def _create_temp_export(
-    api: DatabaseAPI,
-    source: str,
-    export_format: str,
-    options: Optional[Dict[str, Any]] = None,
-    suffix: str = ".tmp"
-) -> Path:
-    fd, temp_path_str = tempfile.mkstemp(suffix=suffix, dir=TEMP_EXPORT_DIR)
-    os.close(fd)
-    temp_file_path = Path(temp_path_str)
     try:
-        print(f"Exporting to temporary file: {temp_file_path}")
-        if export_format == 'csv':
-            api.export_data_to_csv(source, temp_file_path, options)
-        elif export_format == 'parquet':
-            api.export_data_to_parquet(source, temp_file_path, options)
-        elif export_format == 'json':
-            api.export_data_to_json(source, temp_file_path, array_format=True, options=options)
-        elif export_format == 'jsonl':
-             api.export_data_to_jsonl(source, temp_file_path, options=options)
-        else:
-            raise ValueError(f"Unsupported export format: {export_format}")
-        return temp_file_path
-    except Exception as e:
-        _cleanup_temp_file(temp_file_path)
         raise e
-@app.post("/export/data/csv", response_class=FileResponse, tags=["Export / Download"])
-async def export_csv_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
-    try:
-        temp_file_path = await _create_temp_export(api, request.source, 'csv', request.options, suffix=".csv")
-        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
-        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.csv"
-        return FileResponse(temp_file_path, media_type='text/csv', filename=filename)
-    except (QueryError, ValueError) as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
     except Exception as e:
-         raise HTTPException(status_code=500, detail=f"Unexpected error during CSV export: {e}")
-@app.post("/export/data/parquet", response_class=FileResponse, tags=["Export / Download"])
-async def export_parquet_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
     try:
-        temp_file_path = await _create_temp_export(api, request.source, 'parquet', request.options, suffix=".parquet")
-        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
-        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.parquet"
-        return FileResponse(temp_file_path, media_type='application/vnd.apache.parquet', filename=filename)
-    except (QueryError, ValueError) as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
     except Exception as e:
-         raise HTTPException(status_code=500, detail=f"Unexpected error during Parquet export: {e}")
-@app.post("/export/data/json", response_class=FileResponse, tags=["Export / Download"])
-async def export_json_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
     try:
-        temp_file_path = await _create_temp_export(api, request.source, 'json', request.options, suffix=".json")
-        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
-        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.json"
-        return FileResponse(temp_file_path, media_type='application/json', filename=filename)
-    except (QueryError, ValueError) as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
     except Exception as e:
-         raise HTTPException(status_code=500, detail=f"Unexpected error during JSON export: {e}")
-@app.post("/export/data/jsonl", response_class=FileResponse, tags=["Export / Download"])
-async def export_jsonl_endpoint(request: ExportDataRequest, background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
-    try:
-        temp_file_path = await _create_temp_export(api, request.source, 'jsonl', request.options, suffix=".jsonl")
-        background_tasks.add_task(_cleanup_temp_file, temp_file_path)
-        filename = f"export_{Path(request.source).stem if '.' not in request.source else 'query'}.jsonl"
-        return FileResponse(temp_file_path, media_type='application/jsonl', filename=filename)
-    except (QueryError, ValueError) as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-    except Exception as e:
-         raise HTTPException(status_code=500, detail=f"Unexpected error during JSONL export: {e}")
-@app.post("/export/database", response_class=FileResponse, tags=["Export / Download"])
-async def export_database_endpoint(background_tasks: BackgroundTasks, api: DatabaseAPI = Depends(get_db_api)):
-    export_target_dir = Path(tempfile.mkdtemp(dir=TEMP_EXPORT_DIR))
-    fd, zip_path_str = tempfile.mkstemp(suffix=".zip", dir=TEMP_EXPORT_DIR)
-    os.close(fd)
-    zip_file_path = Path(zip_path_str)
     try:
-        print(f"Exporting database to temporary directory: {export_target_dir}")
-        api.export_database(export_target_dir)
-        print(f"Creating zip archive at: {zip_file_path}")
-        shutil.make_archive(str(zip_file_path.with_suffix('')), 'zip', str(export_target_dir))
-        print(f"Zip archive created: {zip_file_path}")
-        background_tasks.add_task(shutil.rmtree, export_target_dir, ignore_errors=True)
-        background_tasks.add_task(_cleanup_temp_file, zip_file_path)
-        db_name = Path(api._db_path).stem if api._db_path != ':memory:' else 'in_memory_db'
-        return FileResponse(zip_file_path, media_type='application/zip', filename=f"{db_name}_export.zip")
-    except (QueryError, ValueError, OSError, DatabaseAPIError) as e:
-        print(f"Error during database export: {e}")
-        shutil.rmtree(export_target_dir, ignore_errors=True)
-        _cleanup_temp_file(zip_file_path)
-        if isinstance(e, DatabaseAPIError):
-            raise HTTPException(status_code=500, detail=str(e))
-        else:
-            raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
-        print(f"Unexpected error during database export: {e}")
-        shutil.rmtree(export_target_dir, ignore_errors=True)
-        _cleanup_temp_file(zip_file_path)
-        raise HTTPException(status_code=500, detail=f"Unexpected error during database export: {e}")
-# --- Extension Management Endpoints ---
-@app.post("/extensions/install", response_model=StatusResponse, tags=["Extensions"])
-async def install_extension_endpoint(request: InstallRequest, api: DatabaseAPI = Depends(get_db_api)):
-    try:
-        api.install_extension(request.extension_name, request.force_install)
-        return {"status": "success", "message": f"Extension '{request.extension_name}' installed."}
-    except DatabaseAPIError as e:
-        raise HTTPException(status_code=500, detail=str(e))
-    # Catch specific DuckDB errors that should be client errors (400)
-    except (duckdb.IOException, duckdb.CatalogException, duckdb.InvalidInputException) as e:
-        raise HTTPException(status_code=400, detail=f"DuckDB Error during install: {e}")
-    except duckdb.Error as e: # Catch other potential DuckDB errors as 500
-        raise HTTPException(status_code=500, detail=f"Unexpected DuckDB Error during install: {e}")
-@app.post("/extensions/load", response_model=StatusResponse, tags=["Extensions"])
-async def load_extension_endpoint(request: LoadRequest, api: DatabaseAPI = Depends(get_db_api)):
-    """Loads an installed DuckDB extension."""
     try:
-        api.load_extension(request.extension_name)
-        return {"status": "success", "message": f"Extension '{request.extension_name}' loaded."}
-    # --- MODIFIED Exception Handling ---
-    except QueryError as e: # If api.load_extension raised QueryError (e.g., IO/Catalog)
-        raise HTTPException(status_code=400, detail=str(e))
-    except DatabaseAPIError as e: # For other API-level issues
-        raise HTTPException(status_code=500, detail=str(e))
-    # Catch specific DuckDB errors that should be client errors (400)
-    except (duckdb.IOException, duckdb.CatalogException) as e:
-        raise HTTPException(status_code=400, detail=f"DuckDB Error during load: {e}")
-    except duckdb.Error as e: # Catch other potential DuckDB errors as 500
-        raise HTTPException(status_code=500, detail=f"Unexpected DuckDB Error during load: {e}")
-    # --- END MODIFICATION ---
-# --- Health Check --- (Keep as before)
-@app.get("/health", response_model=StatusResponse, tags=["Health"])
 async def health_check():
-    """Basic health check."""
     try:
-        _ = get_db_api()
-        return {"status": "ok", "message": None} # Explicitly return None for message
-    except HTTPException as e:
-        raise e
     except Exception as e:
-         raise HTTPException(status_code=500, detail=f"Health check failed unexpectedly: {e}")
-# --- Run the app --- (Keep as before)
-if __name__ == "__main__":
-    import uvicorn
-    print(f"Starting DuckDB API server...")
-    print(f"Database file configured at: {DUCKDB_API_DB_PATH}")
-    print(f"Read-only mode: {DUCKDB_API_READ_ONLY}")
-    print(f"Temporary export directory: {TEMP_EXPORT_DIR}")
-    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)

 import duckdb
 import os
+from fastapi import FastAPI, HTTPException, Request, Path as FastPath
+from fastapi.responses import FileResponse, StreamingResponse
 from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+import logging
+import io
+import asyncio
+# --- Configuration ---
+DATABASE_PATH = os.environ.get("DUCKDB_PATH", "data/mydatabase.db")
+DATA_DIR = "data"
+# Ensure data directory exists
+os.makedirs(DATA_DIR, exist_ok=True)
+# --- Logging ---
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# --- FastAPI App ---
 app = FastAPI(
+    title="DuckDB API",
+    description="An API to interact with a DuckDB database.",
+    version="0.1.0"
 )
+# --- Database Connection ---
+# For simplicity in this example, we connect within each request.
+# For production, consider dependency injection or connection pooling.
+def get_db():
     try:
+        # Check if the database file needs initialization
+        initialize = not os.path.exists(DATABASE_PATH) or os.path.getsize(DATABASE_PATH) == 0
+        conn = duckdb.connect(DATABASE_PATH, read_only=False)
+        if initialize:
+            logger.info(f"Database file not found or empty at {DATABASE_PATH}. Initializing.")
+            # You could add initial schema setup here if needed
+            # conn.execute("CREATE TABLE IF NOT EXISTS initial_table (id INTEGER, name VARCHAR);")
+        yield conn
+    except duckdb.Error as e:
+        logger.error(f"Database connection error: {e}")
+        raise HTTPException(status_code=500, detail=f"Database connection error: {e}")
     finally:
+        if 'conn' in locals() and conn:
+            conn.close()
+# --- Pydantic Models ---
+class ColumnDefinition(BaseModel):
+    name: str
+    type: str
+class CreateTableRequest(BaseModel):
+    columns: List[ColumnDefinition]
+class CreateRowRequest(BaseModel):
+    # List of rows, where each row is a dict of column_name: value
+    rows: List[Dict[str, Any]]
+class UpdateRowRequest(BaseModel):
+    updates: Dict[str, Any] # Column value pairs to set
+    condition: str         # SQL WHERE clause string to identify rows
+class DeleteRowRequest(BaseModel):
+    condition: str         # SQL WHERE clause string to identify rows
+class ApiResponse(BaseModel):
+    message: str
+    details: Optional[Any] = None
+# --- Helper Functions ---
+def safe_identifier(name: str) -> str:
+    """Quotes an identifier safely."""
+    if not name.isidentifier():
+        # Basic check, consider more robust validation/sanitization if needed
+         # Use DuckDB's quoting
+        try:
+            conn = duckdb.connect(':memory:')
+            quoted = conn.execute(f"SELECT '{name}'::IDENTIFIER").fetchone()[0]
+            conn.close()
+            return quoted
+        except duckdb.Error:
+             raise HTTPException(status_code=400, detail=f"Invalid identifier: {name}")
+    # Also quote standard identifiers to be safe
+    return f'"{name}"'
+def generate_column_sql(columns: List[ColumnDefinition]) -> str:
+    """Generates the column definition part of a CREATE TABLE statement."""
+    defs = []
+    for col in columns:
+        col_name_safe = safe_identifier(col.name)
+        # Basic type validation (can be expanded)
+        allowed_types = ['INTEGER', 'VARCHAR', 'TEXT', 'BOOLEAN', 'FLOAT', 'DOUBLE', 'DATE', 'TIMESTAMP', 'BLOB', 'BIGINT', 'DECIMAL']
+        type_upper = col.type.strip().upper()
+        # Allow DECIMAL(p,s) syntax
+        if not (type_upper.startswith('DECIMAL(') and type_upper.endswith(')')) and \
+           not any(base_type in type_upper for base_type in allowed_types):
+             raise HTTPException(status_code=400, detail=f"Unsupported or invalid data type: {col.type}")
+        defs.append(f"{col_name_safe} {col.type}")
+    return ", ".join(defs)
+# --- API Endpoints ---
+@app.get("/", summary="API Root", response_model=ApiResponse)
+async def read_root():
+    """Provides a welcome message for the API."""
+    return {"message": "Welcome to the DuckDB API!"}
+@app.post("/tables/{table_name}", summary="Create Table", response_model=ApiResponse, status_code=201)
+async def create_table(
+    table_name: str = FastPath(..., description="Name of the table to create"),
+    schema: CreateTableRequest = ...,
+):
+    """Creates a new table with the specified schema."""
+    table_name_safe = safe_identifier(table_name)
+    if not schema.columns:
+        raise HTTPException(status_code=400, detail="Table must have at least one column.")
     try:
+        columns_sql = generate_column_sql(schema.columns)
+        sql = f"CREATE TABLE {table_name_safe} ({columns_sql});"
+        logger.info(f"Executing SQL: {sql}")
+        for conn in get_db():
+            conn.execute(sql)
+        return {"message": f"Table '{table_name}' created successfully."}
+    except HTTPException as e: # Re-raise validation errors
         raise e
+    except duckdb.Error as e:
+        logger.error(f"Error creating table '{table_name}': {e}")
+        raise HTTPException(status_code=400, detail=f"Error creating table: {e}")
     except Exception as e:
+        logger.error(f"Unexpected error creating table '{table_name}': {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred.")
+@app.get("/tables/{table_name}", summary="Read Table Data")
+async def read_table(
+    table_name: str = FastPath(..., description="Name of the table to read from"),
+    limit: Optional[int] = None,
+    offset: Optional[int] = None
+):
+    """Reads and returns all rows from a specified table. Supports limit and offset."""
+    table_name_safe = safe_identifier(table_name)
+    sql = f"SELECT * FROM {table_name_safe}"
+    params = []
+    if limit is not None:
+        sql += " LIMIT ?"
+        params.append(limit)
+    if offset is not None:
+        sql += " OFFSET ?"
+        params.append(offset)
+    sql += ";"
     try:
+        logger.info(f"Executing SQL: {sql} with params: {params}")
+        for conn in get_db():
+            result = conn.execute(sql, params).fetchall()
+            # Convert rows to dictionaries for JSON serialization
+            column_names = [desc[0] for desc in conn.description]
+            data = [dict(zip(column_names, row)) for row in result]
+        return data
+    except duckdb.CatalogException as e:
+         raise HTTPException(status_code=404, detail=f"Table '{table_name}' not found.")
+    except duckdb.Error as e:
+        logger.error(f"Error reading table '{table_name}': {e}")
+        raise HTTPException(status_code=400, detail=f"Error reading table: {e}")
     except Exception as e:
+        logger.error(f"Unexpected error reading table '{table_name}': {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred.")
+@app.post("/tables/{table_name}/rows", summary="Create Rows", response_model=ApiResponse, status_code=201)
+async def create_rows(
+    table_name: str = FastPath(..., description="Name of the table to insert into"),
+    request: CreateRowRequest = ...,
+):
+    """Inserts one or more rows into the specified table."""
+    table_name_safe = safe_identifier(table_name)
+    if not request.rows:
+        raise HTTPException(status_code=400, detail="No rows provided to insert.")
+    # Assume all rows have the same columns based on the first row
+    columns = list(request.rows[0].keys())
+    columns_safe = [safe_identifier(col) for col in columns]
+    placeholders = ", ".join(["?"] * len(columns))
+    columns_sql = ", ".join(columns_safe)
+    sql = f"INSERT INTO {table_name_safe} ({columns_sql}) VALUES ({placeholders});"
+    # Convert list of dicts to list of lists/tuples for executemany
+    params_list = []
+    for row_dict in request.rows:
+        if list(row_dict.keys()) != columns:
+             raise HTTPException(status_code=400, detail="All rows must have the same columns in the same order.")
+        params_list.append(list(row_dict.values()))
     try:
+        logger.info(f"Executing SQL: {sql} for {len(params_list)} rows")
+        for conn in get_db():
+            conn.executemany(sql, params_list)
+            conn.commit() # Explicit commit after potential bulk insert
+        return {"message": f"Successfully inserted {len(params_list)} rows into '{table_name}'."}
+    except duckdb.CatalogException as e:
+         raise HTTPException(status_code=404, detail=f"Table '{table_name}' not found.")
+    except duckdb.Error as e:
+        logger.error(f"Error inserting rows into '{table_name}': {e}")
+        # Rollback on error might be needed depending on transaction behavior
+        # For get_db creating connection per request, this is less critical
+        raise HTTPException(status_code=400, detail=f"Error inserting rows: {e}")
     except Exception as e:
+        logger.error(f"Unexpected error inserting rows into '{table_name}': {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred.")
+@app.put("/tables/{table_name}/rows", summary="Update Rows", response_model=ApiResponse)
+async def update_rows(
+    table_name: str = FastPath(..., description="Name of the table to update"),
+    request: UpdateRowRequest = ...,
+):
+    """Updates rows in the table based on a condition."""
+    table_name_safe = safe_identifier(table_name)
+    if not request.updates:
+        raise HTTPException(status_code=400, detail="No updates provided.")
+    if not request.condition:
+         raise HTTPException(status_code=400, detail="Update condition (WHERE clause) is required.")
+    set_clauses = []
+    params = []
+    for col, value in request.updates.items():
+        set_clauses.append(f"{safe_identifier(col)} = ?")
+        params.append(value)
+    set_sql = ", ".join(set_clauses)
+    # WARNING: Injecting request.condition directly is a security risk.
+    # In a real app, use query parameters or a safer way to build the WHERE clause.
+    sql = f"UPDATE {table_name_safe} SET {set_sql} WHERE {request.condition};"
     try:
+        logger.info(f"Executing SQL: {sql} with params: {params}")
+        for conn in get_db():
+            # Use execute for safety with parameters
+            conn.execute(sql, params)
+            conn.commit()
+        return {"message": f"Rows in '{table_name}' updated successfully based on condition."}
+    except duckdb.CatalogException as e:
+         raise HTTPException(status_code=404, detail=f"Table '{table_name}' not found.")
+    except duckdb.Error as e:
+        logger.error(f"Error updating rows in '{table_name}': {e}")
+        raise HTTPException(status_code=400, detail=f"Error updating rows: {e}")
     except Exception as e:
+        logger.error(f"Unexpected error updating rows in '{table_name}': {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred.")
+@app.delete("/tables/{table_name}/rows", summary="Delete Rows", response_model=ApiResponse)
+async def delete_rows(
+    table_name: str = FastPath(..., description="Name of the table to delete from"),
+    request: DeleteRowRequest = ...,
+):
+    """Deletes rows from the table based on a condition."""
+    table_name_safe = safe_identifier(table_name)
+    if not request.condition:
+         raise HTTPException(status_code=400, detail="Delete condition (WHERE clause) is required.")
+    # WARNING: Injecting request.condition directly is a security risk.
+    # In a real app, use query parameters or a safer way to build the WHERE clause.
+    sql = f"DELETE FROM {table_name_safe} WHERE {request.condition};"
     try:
+        logger.info(f"Executing SQL: {sql}")
+        for conn in get_db():
+            # Execute does not directly support parameters for WHERE in DELETE like this easily
+            conn.execute(sql)
+            conn.commit()
+        return {"message": f"Rows from '{table_name}' deleted successfully based on condition."}
+    except duckdb.CatalogException as e:
+         raise HTTPException(status_code=404, detail=f"Table '{table_name}' not found.")
+    except duckdb.Error as e:
+        logger.error(f"Error deleting rows from '{table_name}': {e}")
+        raise HTTPException(status_code=400, detail=f"Error deleting rows: {e}")
+    except Exception as e:
+        logger.error(f"Unexpected error deleting rows from '{table_name}': {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred.")
+# --- Download Endpoints ---
+@app.get("/download/table/{table_name}", summary="Download Table as CSV")
+async def download_table_csv(
+    table_name: str = FastPath(..., description="Name of the table to download")
+):
+    """Downloads the entire content of a table as a CSV file."""
+    table_name_safe = safe_identifier(table_name)
+    # Use COPY TO STDOUT for efficient streaming
+    sql = f"COPY (SELECT * FROM {table_name_safe}) TO STDOUT (FORMAT CSV, HEADER)"
+    async def stream_csv_data():
+        # We need a non-blocking way to stream data from DuckDB.
+        # DuckDB's Python API is blocking. A simple approach for this demo
+        # is to fetch all data first, then stream it.
+        # A more advanced approach would involve running the DuckDB query
+        # in a separate thread or process pool managed by asyncio.
+        try:
+            all_data_io = io.StringIO()
+            # This COPY TO variant isn't directly available in Python API for streaming to a buffer easily.
+            # Let's fetch data and format as CSV manually or use Pandas.
+            for conn in get_db():
+                df = conn.execute(f"SELECT * FROM {table_name_safe}").df() # Use pandas for CSV conversion
+            # Use an in-memory text buffer
+            df.to_csv(all_data_io, index=False)
+            all_data_io.seek(0)
+            # Stream the content chunk by chunk
+            chunk_size = 8192
+            while True:
+                chunk = all_data_io.read(chunk_size)
+                if not chunk:
+                    break
+                yield chunk
+                # Allow other tasks to run
+                await asyncio.sleep(0)
+            all_data_io.close()
+        except duckdb.CatalogException as e:
+            # Stream an error message if the table doesn't exist
+            yield f"Error: Table '{table_name}' not found.".encode('utf-8')
+            logger.error(f"Error downloading table '{table_name}': {e}")
+        except duckdb.Error as e:
+            yield f"Error: Could not export table '{table_name}'. {e}".encode('utf-8')
+            logger.error(f"Error downloading table '{table_name}': {e}")
+        except Exception as e:
+             yield f"Error: An unexpected error occurred.".encode('utf-8')
+             logger.error(f"Unexpected error downloading table '{table_name}': {e}")
+    return StreamingResponse(
+        stream_csv_data(),
+        media_type="text/csv",
+        headers={"Content-Disposition": f"attachment; filename={table_name}.csv"},
+    )
+@app.get("/download/database", summary="Download Database File")
+async def download_database_file():
+    """Downloads the entire DuckDB database file."""
+    if not os.path.exists(DATABASE_PATH):
+        raise HTTPException(status_code=404, detail="Database file not found.")
+    # Ensure connections are closed before downloading to avoid partial writes/locking issues.
+    # This is tricky with the current get_db pattern. A proper app stop/start or
+    # dedicated maintenance mode would be better. For this demo, we hope for the best.
+    logger.warning("Attempting to download database file. Ensure no active writes are occurring.")
+    return FileResponse(
+        path=DATABASE_PATH,
+        filename=os.path.basename(DATABASE_PATH),
+        media_type="application/octet-stream" # Generic binary file type
+    )
+# --- Health Check ---
+@app.get("/health", summary="Health Check", response_model=ApiResponse)
 async def health_check():
+    """Checks if the API and database connection are working."""
     try:
+        for conn in get_db():
+            conn.execute("SELECT 1")
+        return {"message": "API is healthy and database connection is successful."}
     except Exception as e:
+        logger.error(f"Health check failed: {e}")
+        raise HTTPException(status_code=503, detail=f"Health check failed: {e}")
+# --- Optional: Add Startup/Shutdown events if needed ---
+# @app.on_event("startup")
+# async def startup_event():
+#     # Initialize database connection pool, etc.
+#     logger.info("Application startup.")
+# @app.on_event("shutdown")
+# async def shutdown_event():
+#     # Clean up resources, close connections, etc.
+#     logger.info("Application shutdown.")

requirements.txt CHANGED Viewed

@@ -1,10 +1,5 @@
-fastapi[all]>=0.95.0
-uvicorn[standard]>=0.18.0
-duckdb>=1.2.1
-pydantic
-python-multipart>=0.0.5
-httpx
-requests>=2.20.0
-aiofiles>=0.8.0
-pandas>=1.5.0
-pyarrow>=10.0.0

+fastapi
+uvicorn[standard]
+duckdb>=0.9.0
+python-multipart
+aiofiles

test_api.py DELETED Viewed

@@ -1,246 +0,0 @@
-import pytest
-import os
-import shutil
-import tempfile
-import zipfile
-import json
-from pathlib import Path
-from typing import List, Dict, Any
-from unittest.mock import patch
-pd = pytest.importorskip("pandas")
-pa = pytest.importorskip("pyarrow")
-pa_ipc = pytest.importorskip("pyarrow.ipc")
-from fastapi.testclient import TestClient
-import main # Import main to reload and access config
-# --- Test Fixtures --- (Keep client fixture as before)
-@pytest.fixture(scope="module")
-def client():
-    with patch.dict(os.environ, {"DUCKDB_API_DB_PATH": ":memory:"}):
-        import importlib
-        importlib.reload(main)
-        main.TEMP_EXPORT_DIR.mkdir(exist_ok=True)
-        print(f"TestClient using temp export dir: {main.TEMP_EXPORT_DIR}")
-        with TestClient(main.app) as c:
-            yield c
-        print(f"Cleaning up test export dir: {main.TEMP_EXPORT_DIR}")
-        for item in main.TEMP_EXPORT_DIR.iterdir():
-            try:
-                if item.is_file():
-                    os.remove(item)
-                elif item.is_dir():
-                    shutil.rmtree(item)
-            except Exception as e:
-                print(f"Error cleaning up {item}: {e}")
-# --- Test Classes ---
-class TestHealth: # (Keep as before)
-    def test_health_check(self, client: TestClient):
-        response = client.get("/health")
-        assert response.status_code == 200
-        assert response.json() == {"status": "ok", "message": None}
-class TestExecution: # (Keep as before)
-    def test_execute_create(self, client: TestClient):
-        response = client.post("/execute", json={"sql": "CREATE TABLE test_table(id INTEGER, name VARCHAR);"})
-        assert response.status_code == 200
-        assert response.json() == {"status": "success", "message": None}
-        response_fail = client.post("/execute", json={"sql": "CREATE TABLE test_table(id INTEGER);"})
-        assert response_fail.status_code == 400
-    def test_execute_insert(self, client: TestClient):
-        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE test_table(id INTEGER, name VARCHAR);"})
-        response = client.post("/execute", json={"sql": "INSERT INTO test_table VALUES (1, 'Alice')"})
-        assert response.status_code == 200
-        query_response = client.post("/query/fetchall", json={"sql": "SELECT COUNT(*) FROM test_table"})
-        assert query_response.status_code == 200
-        assert query_response.json() == [[1]]
-    def test_execute_insert_params(self, client: TestClient):
-        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE test_table(id INTEGER, name VARCHAR);"})
-        response = client.post("/execute", json={"sql": "INSERT INTO test_table VALUES (?, ?)", "parameters": [2, "Bob"]})
-        assert response.status_code == 200
-        query_response = client.post("/query/fetchall", json={"sql": "SELECT * FROM test_table WHERE id = 2"})
-        assert query_response.status_code == 200
-        assert query_response.json() == [[2, "Bob"]]
-    def test_execute_invalid_sql(self, client: TestClient):
-        response = client.post("/execute", json={"sql": "INVALID SQL STATEMENT"})
-        assert response.status_code == 400
-        assert "Parser Error" in response.json()["detail"]
-class TestQuerying: # (Keep as before)
-    @pytest.fixture(scope="class", autouse=True)
-    def setup_data(self, client: TestClient):
-        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE query_test(id INTEGER, val VARCHAR)"})
-        client.post("/execute", json={"sql": "INSERT INTO query_test VALUES (1, 'one'), (2, 'two'), (3, 'three')"})
-    def test_query_fetchall(self, client: TestClient):
-        response = client.post("/query/fetchall", json={"sql": "SELECT * FROM query_test ORDER BY id"})
-        assert response.status_code == 200
-        assert response.json() == [[1, 'one'], [2, 'two'], [3, 'three']]
-    def test_query_fetchall_params(self, client: TestClient):
-        response = client.post("/query/fetchall", json={"sql": "SELECT * FROM query_test WHERE id > ? ORDER BY id", "parameters": [1]})
-        assert response.status_code == 200
-        assert response.json() == [[2, 'two'], [3, 'three']]
-    def test_query_fetchall_empty(self, client: TestClient):
-        response = client.post("/query/fetchall", json={"sql": "SELECT * FROM query_test WHERE id > 100"})
-        assert response.status_code == 200
-        assert response.json() == []
-    def test_query_dataframe(self, client: TestClient):
-        response = client.post("/query/dataframe", json={"sql": "SELECT * FROM query_test ORDER BY id"})
-        assert response.status_code == 200
-        data = response.json()
-        assert data["columns"] == ["id", "val"]
-        assert data["records"] == [
-            {"id": 1, "val": "one"},
-            {"id": 2, "val": "two"},
-            {"id": 3, "val": "three"}
-        ]
-    def test_query_dataframe_invalid_sql(self, client: TestClient):
-        response = client.post("/query/dataframe", json={"sql": "SELECT non_existent FROM query_test"})
-        assert response.status_code == 400
-        assert "Binder Error" in response.json()["detail"]
-class TestStreaming: # (Keep as before)
-    @pytest.fixture(scope="class", autouse=True)
-    def setup_data(self, client: TestClient):
-        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE stream_test AS SELECT range AS id, range % 5 AS category FROM range(10)"})
-    def test_stream_arrow(self, client: TestClient):
-        response = client.post("/query/stream/arrow", json={"sql": "SELECT * FROM stream_test"})
-        assert response.status_code == 200
-        assert response.headers["content-type"] == "application/vnd.apache.arrow.stream"
-        if not response.content:
-             pytest.fail("Arrow stream response content is empty")
-        try:
-            reader = pa_ipc.open_stream(response.content)
-            table = reader.read_all()
-        except pa.ArrowInvalid as e:
-            pytest.fail(f"Failed to read Arrow stream: {e}")
-        assert table.num_rows == 10
-        assert table.column_names == ["id", "category"]
-        assert table.column('id').to_pylist() == list(range(10))
-    def test_stream_arrow_empty(self, client: TestClient):
-        response = client.post("/query/stream/arrow", json={"sql": "SELECT * FROM stream_test WHERE id < 0"})
-        assert response.status_code == 200
-        assert response.headers["content-type"] == "application/vnd.apache.arrow.stream"
-        try:
-             reader = pa_ipc.open_stream(response.content)
-             table = reader.read_all()
-             assert table.num_rows == 0
-        except pa.ArrowInvalid as e:
-             print(f"Received ArrowInvalid for empty stream, which is acceptable: {e}")
-             assert response.content == b''
-    def test_stream_jsonl(self, client: TestClient):
-        response = client.post("/query/stream/jsonl", json={"sql": "SELECT * FROM stream_test ORDER BY id"})
-        assert response.status_code == 200
-        assert response.headers["content-type"] == "application/jsonl"
-        lines = response.text.strip().split('\n')
-        records = [json.loads(line) for line in lines if line]
-        assert len(records) == 10
-        assert records[0] == {"id": 0, "category": 0}
-        assert records[9] == {"id": 9, "category": 4}
-    def test_stream_jsonl_empty(self, client: TestClient):
-        response = client.post("/query/stream/jsonl", json={"sql": "SELECT * FROM stream_test WHERE id < 0"})
-        assert response.status_code == 200
-        assert response.headers["content-type"] == "application/jsonl"
-        assert response.text.strip() == ""
-class TestExportDownload: # (Keep setup_data as before)
-    @pytest.fixture(scope="class", autouse=True)
-    def setup_data(self, client: TestClient):
-        client.post("/execute", json={"sql": "CREATE OR REPLACE TABLE export_table(id INTEGER, name VARCHAR, price DECIMAL(5,2))"})
-        client.post("/execute", json={"sql": "INSERT INTO export_table VALUES (1, 'Apple', 0.50), (2, 'Banana', 0.30), (3, 'Orange', 0.75)"})
-    @pytest.mark.parametrize(
-            "endpoint_suffix, expected_content_type, expected_filename_ext, validation_fn",
-            [
-                ("csv", "text/csv", ".csv", lambda c: b"id,name,price\n1,Apple,0.50\n" in c),
-                ("parquet", "application/vnd.apache.parquet", ".parquet", lambda c: c.startswith(b"PAR1")),
-                # --- MODIFIED JSON/JSONL Lambdas ---
-                ("json", "application/json", ".json", lambda c: c.strip().startswith(b'[') and c.strip().endswith(b']')),
-                ("jsonl", "application/jsonl", ".jsonl", lambda c: b'"id":1' in c and b'"name":"Apple"' in c and b'\n' in c),
-                # --- END MODIFICATION ---
-            ]
-    )
-    def test_export_data(self, client: TestClient, endpoint_suffix, expected_content_type, expected_filename_ext, validation_fn, tmp_path):
-        endpoint = f"/export/data/{endpoint_suffix}"
-        payload = {"source": "export_table"}
-        if endpoint_suffix == 'csv':
-             payload['options'] = {'HEADER': True}
-        response = client.post(endpoint, json=payload)
-        assert response.status_code == 200, f"Request to {endpoint} failed: {response.text}"
-        assert response.headers["content-type"].startswith(expected_content_type)
-        assert "content-disposition" in response.headers
-        assert f'filename="export_export_table{expected_filename_ext}"' in response.headers["content-disposition"]
-        downloaded_path = tmp_path / f"downloaded{expected_filename_ext}"
-        with open(downloaded_path, "wb") as f:
-            f.write(response.content)
-        assert downloaded_path.exists()
-        assert validation_fn(response.content), f"Validation failed for {endpoint_suffix}"
-        # Test with a query source
-        payload = {"source": "SELECT id, name FROM export_table WHERE price > 0.40 ORDER BY id"}
-        response = client.post(endpoint, json=payload)
-        assert response.status_code == 200
-        assert f'filename="export_query{expected_filename_ext}"' in response.headers["content-disposition"]
-        assert len(response.content) > 0
-    # --- Keep test_export_database as before ---
-    def test_export_database(self, client: TestClient, tmp_path):
-        client.post("/execute", json={"sql": "CREATE TABLE IF NOT EXISTS another_table(x int)"})
-        response = client.post("/export/database")
-        assert response.status_code == 200
-        assert response.headers["content-type"] == "application/zip"
-        assert "content-disposition" in response.headers
-        assert response.headers["content-disposition"].startswith("attachment; filename=")
-        assert 'filename="in_memory_db_export.zip"' in response.headers["content-disposition"]
-        zip_path = tmp_path / "db_export.zip"
-        with open(zip_path, "wb") as f:
-            f.write(response.content)
-        assert zip_path.exists()
-        with zipfile.ZipFile(zip_path, 'r') as z:
-            print(f"Zip contents: {z.namelist()}")
-            assert "schema.sql" in z.namelist()
-            assert "load.sql" in z.namelist()
-            assert any(name.startswith("export_table") for name in z.namelist())
-            assert any(name.startswith("another_table") for name in z.namelist())
-class TestExtensions: # (Keep as before)
-    def test_install_extension_fail(self, client: TestClient):
-        response = client.post("/extensions/install", json={"extension_name": "nonexistent_dummy_ext"})
-        assert response.status_code >= 400
-        assert "Error during install" in response.json()["detail"] or "Failed to download" in response.json()["detail"]
-    def test_load_extension_fail(self, client: TestClient):
-        response = client.post("/extensions/load", json={"extension_name": "nonexistent_dummy_ext"})
-        assert response.status_code == 400
-        # --- MODIFIED Assertion ---
-        assert "Error loading extension" in response.json()["detail"]
-        # --- END MODIFICATION ---
-        assert "not found" in response.json()["detail"].lower()
-    @pytest.mark.skip(reason="Requires httpfs extension to be available for install/load")
-    def test_install_and_load_httpfs(self, client: TestClient):
-        install_response = client.post("/extensions/install", json={"extension_name": "httpfs"})
-        assert install_response.status_code == 200
-        assert install_response.json()["status"] == "success"
-        load_response = client.post("/extensions/load", json={"extension_name": "httpfs"})
-        assert load_response.status_code == 200
-        assert load_response.json()["status"] == "success"