|
import requests |
|
import json |
|
import os |
|
import os |
|
from dotenv import load_dotenv |
|
import streamlit as st |
|
|
|
API_KEY = st.secrets["hf_token"] |
|
|
|
|
|
|
|
def generate_schema(user_prompt): |
|
""" Generates a synthetic dataset schema using Hugging Face API. """ |
|
|
|
system_prompt = """ |
|
You are an expert data scientist designing synthetic datasets. |
|
For any given dataset description, generate: |
|
- Column names |
|
- Data types (string, int, float, date) |
|
- Approximate row count |
|
|
|
Output in **pure JSON** format like: |
|
{ |
|
"columns": ["PatientID", "Age", "Gender", "Diagnosis"], |
|
"types": ["int", "int", "string", "string"], |
|
"size": 500 |
|
} |
|
""" |
|
|
|
payload = { |
|
"inputs": system_prompt + "\n\nUser request: " + user_prompt, |
|
"options": {"wait_for_model": True} |
|
} |
|
|
|
response = requests.post(HF_MODEL_URL, headers={"Authorization": f"Bearer {API_KEY}"}, json=payload) |
|
|
|
if response.status_code == 200: |
|
try: |
|
output = response.json()[0]['generated_text'] |
|
schema = json.loads(output.strip()) |
|
return schema |
|
except json.JSONDecodeError: |
|
return {"error": "Invalid JSON output from model. Try again."} |
|
else: |
|
return {"error": f"API request failed. Status Code: {response.status_code}"} |