import os import io import json import re from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.middleware.cors import CORSMiddleware from PIL import Image import google.generativeai as genai import uvicorn app = FastAPI() # CORS Middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"] ) # Configure Gemini API genai.configure(api_key=os.getenv("GEMINI_API_KEY")) def extract_text_and_translate(image): """Extract grocery items from the image and translate Malayalam items to English.""" try: model = genai.GenerativeModel('gemini-2.0-flash') prompt = ( "Extract all grocery items from this image, but only include items written in Malayalam or English. " "Ignore words written in Tamil, Hindi, or any other language. " "If an item is not in Malayalam or English, do not include it in the output. " "Translate every Malayalam word strictly to English. If translation is unknown, return 'Unknown'. " "Also, check if the image contains a date, address, phone number, or note, and return them. " "Only return JSON—do NOT include explanations, introductions, or extra text. " "Ensure JSON is valid with correct formatting. " "Output format:\n" "{'items': [" "{'number': 1, 'item': 'കടുക്', 'english_name': 'Mustard', 'quantity': 100, 'unit': 'gm'}," "{'number': 2, 'item': 'Sugar', 'english_name': 'Sugar', 'quantity': 1, 'unit': 'kg'}" "]," "'date': 'YYYY-MM-DD'," "'address': 'Extracted Address'," "'phone': 'Extracted Phone Number'," "'note': 'Additional notes if any'}" ) response = model.generate_content([prompt, image]) extracted_json = extract_valid_json(response.text.strip()) if not extracted_json: return {"items": [], "message": "Gemini response did not contain valid JSON."} return extracted_json except Exception as e: raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}") def extract_valid_json(response_text): """Extract and parse JSON from Gemini's response.""" try: # Use regex to extract the JSON part only match = re.search(r'\[.*\]', response_text, re.DOTALL) if match: return json.loads(match.group(0)) # Load as JSON return None except json.JSONDecodeError: return None @app.post("/extract_text") async def extract_text(image: UploadFile = File(...)): """API Endpoint for extracting and translating grocery list items.""" try: # Read image file image_contents = await image.read() pil_image = Image.open(io.BytesIO(image_contents)) # Extract and process text extracted_data = extract_text_and_translate(pil_image) return extracted_data except Exception as e: raise HTTPException(status_code=400, detail=str(e)) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)