Spaces:
Running
Running
import os | |
import gradio as gr | |
from duckduckgo_search import DDGS | |
import sqlite3 | |
import json | |
import requests | |
from typing import List, Dict, Optional | |
import time | |
from bs4 import BeautifulSoup | |
specialtoken=os.getenv("SPECIALTOKEN") | |
#plants=['Turmeric', 'Aloe Vera', 'Neem', 'Tulsi', 'Ashwagandha', 'Ginger', 'Basil', 'Peppermint', 'Lavender', 'Eucalyptus', 'Chamomile', 'Sandalwood', 'Giloy', 'Haritaki', 'Brahmi', 'Gotu Kola', 'Holy Basil', 'Fenugreek', 'Licorice', 'Fennel', 'Cinnamon', 'Clove', 'Black Pepper', 'Cardamom', 'Neem', 'Indian Gooseberry', 'Saffron', 'Thyme', 'Valerian', 'Marigold', 'Ginseng', 'Dandelion', 'Hibiscus', 'Milk Thistle', 'Magnolia', "St. John's Wort", 'Yarrow', 'Calendula', 'Coriander', 'Senna', 'Echinacea', 'Moringa', 'Plantain', 'Amla', 'Shatavari', 'Peppermint', 'Chamomile', 'Gotu Kola', 'Ashoka', 'Arnica', 'Burdock Root', "Cat's Claw", "Devil's Claw", 'Elderberry', 'Feverfew', 'Ginkgo Biloba', 'Goldenseal', 'Hawthorn', 'Kava', 'Lemon Balm', 'Marshmallow Root', 'Nettle', 'Olive Leaf', 'Passionflower', 'Red Clover', 'Reishi Mushroom', 'Rhodiola', 'Sage', 'Saw Palmetto', 'Slippery Elm', 'Stinging Nettle', 'Witch Hazel', 'Yellow Dock', 'Ashitaba', 'Bael', 'Bacopa', 'Cumin', 'Guduchi', 'Jamun', 'Jatamansi', 'Karela', 'Gudmar', 'Schisandra', 'Baikal Skullcap', 'Mullein', 'Chrysanthemum', 'Catuaba', 'Dong Quai', 'Jiaogulan', 'Muira Puama', 'Catnip', 'Olive'] | |
#plants = ["Echinacea", "Ginkgo biloba", "Turmeric"] | |
PROMPT_TEMPLATE = """ | |
Extract plant information from the following content in JSON format with these keys: | |
["Name", "Scientific Name", "Alternate Names", "Description", "Plant Family", | |
"Origin", "Growth Habitat", "Active Components", "Treatable Conditions", | |
"Preparation Methods", "Dosage", "Duration", "Contraindications", "Side Effects", | |
"Interactions", "Part Used", "Harvesting Time", "Storage Tips", "Images", | |
"Related Videos", "Sources"] | |
Plant: {plant_name} | |
Content: | |
{content} | |
Output ONLY valid JSON with the specified keys. Use empty strings for missing information. | |
""" | |
def fetch_page_content(url: str): | |
"""Get webpage content with error handling""" | |
try: | |
response = requests.get(url, timeout=10) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Remove unwanted elements | |
for element in soup(['script', 'style', 'header', 'footer', 'nav']): | |
element.decompose() | |
text = soup.get_text(separator='\n', strip=True) | |
for each in ["Page not available","403 Forbidden"]: | |
if each in text: | |
return "" | |
return text[:5000] # Limit to 5k characters | |
except Exception as e: | |
return f"Error fetching page: {str(e)}" | |
def search_full_plant_information(plant_name:str): | |
""" """ | |
query = f"{plant_name} plant medicinal uses scientific information site:.edu OR site:.gov OR site:.org" | |
search_results="" | |
try: | |
search_results = DDGS().text(keywords=query, max_results=5) | |
except Exception as e : | |
pass | |
if search_results: | |
content="" | |
for result in search_results: | |
content+=result['body'] | |
content+=fetch_page_content(result['href']); | |
prompt = PROMPT_TEMPLATE.format(plant_name=plant_name, content=content) | |
response = requests.get(f"{specialtoken}/{prompt}") | |
print (response.text) | |
return response.text | |
return "No data Found!" | |
DB_NAME="plants.db" | |
def save_to_db(plant_data: Dict) -> bool: | |
"""Save processed plant data to database""" | |
try: | |
conn = sqlite3.connect(DB_NAME) | |
cursor = conn.cursor() | |
# Convert arrays to strings if they exist | |
for field in ["Alternate Names", "Active Components", "Treatable Conditions", | |
"Preparation Methods", "Contraindications", "Side Effects", | |
"Interactions"]: | |
if field in plant_data: | |
if isinstance(plant_data[field], list): | |
plant_data[field] = ", ".join(plant_data[field]) | |
elif not isinstance(plant_data[field], str): | |
plant_data[field] = str(plant_data[field]) | |
columns = [] | |
values = [] | |
for key, value in plant_data.items(): | |
if key.lower() == "error": # Skip error field | |
continue | |
columns.append(key.lower().replace(" ", "_")) | |
values.append(str(value) if value else None) | |
columns_str = ", ".join(columns) | |
placeholders = ", ".join(["?"] * len(columns)) | |
cursor.execute( | |
f"INSERT INTO plants ({columns_str}) VALUES ({placeholders})", | |
values | |
) | |
conn.commit() | |
conn.close() | |
return True | |
except Exception as e: | |
print(f"Database save error: {e}") | |
return False | |
def process_plants(plants_array: List[str]) -> str: | |
"""Main processing pipeline""" | |
results = [] | |
for plant in plants_array: | |
plant = plant.strip() | |
if not plant: | |
continue | |
print(f"Processing {plant}...") | |
plant_data = search_full_plant_information(plant) | |
if plant_data: | |
pass | |
#save_success = save_to_db(plant_data) | |
#plant_data["Database_Save_Success"] = save_success | |
#results.append(plant_data) | |
time.sleep(2) # Rate limiting | |
return print(results) | |
def split_and_search(text:str): | |
all_data="" | |
plants=text.split(",") | |
for each in plants: | |
plant=search_full_plant_information(each) | |
if "```" in plant: | |
plant_data=search_full_plant_information(each.strip()).split('```')[1] | |
all_data+=plant_data.split("**Sponsor**")[0].replace('json','')+",\n\n" | |
else: | |
all_data+=search_full_plant_information(each.strip()).split("**Sponsor**")[0].replace('json','')+",\n\n" | |
return all_data | |
#use it here : | |
#process_plants(plants) | |
#or use interface: | |
with gr.Blocks(title="AI-Powered Medicinal Plants Database") as app: | |
gr.Markdown("# 🌿 AI-Powered Medicinal Plants Database") | |
with gr.Tab("Fetch & Process Plants"): | |
gr.Markdown("### Enter plant names (comma separated)") | |
with gr.Row(): | |
plant_input = gr.Textbox(label="Plant Names", | |
placeholder="e.g., Neem, Peppermint, Aloe Vera") | |
fetch_btn = gr.Button("Process Plants", variant="primary") | |
output_area = gr.Textbox(label="AI-Processed Results", lines=12, interactive=False) | |
#json_output = gr.JSON(label="AI-Processed Results") | |
fetch_btn.click( | |
fn=split_and_search, | |
#fn=lambda x: process_plants([p.strip() for p in x.split(",")]), | |
inputs=plant_input, | |
outputs=output_area #json_output | |
) | |
if __name__ == "__main__": | |
app.launch(debug=True, share=False) |