vrindagopinath commited on
Commit
2078378
·
verified ·
1 Parent(s): 0e8f347

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -26
app.py CHANGED
@@ -2,19 +2,20 @@ import gradio as gr
2
  import google.generativeai as genai
3
  from PIL import Image
4
  import os
 
5
 
6
  # Direct API key placement
7
  API_KEY = 'AIzaSyBMZrhMXRpQKp7M-JcN2Qk73afeta5Mv5Y'
8
 
9
- def extract_malayalam_text(image):
10
  """
11
- Extract text specifically in Malayalam from the image
12
 
13
  Args:
14
  image (PIL.Image): Uploaded image
15
 
16
  Returns:
17
- str: Extracted Malayalam text
18
  """
19
  # Validate input
20
  if image is None:
@@ -27,48 +28,60 @@ def extract_malayalam_text(image):
27
  # Use Gemini 2.0 Flash model
28
  model = genai.GenerativeModel('gemini-2.0-flash')
29
 
30
- # Highly specific prompt for Malayalam text extraction
31
  response = model.generate_content(
32
  [
33
- "CRITICAL INSTRUCTIONS FOR MALAYALAM TEXT EXTRACTION: "
34
- "1. This image contains text in Malayalam script ONLY. "
35
- "2. Extract ONLY Malayalam characters and script. "
36
- "3. Use pure Malayalam Unicode characters. "
37
- "4. If the text is in another language, state 'No Malayalam text found'. "
38
- "5. Preserve exact Malayalam script, including any special characters. "
39
- "6. DO NOT translate or convert to any other script. "
40
- "7. If there are multiple lines, preserve line breaks. "
41
- "8. Focus EXCLUSIVELY on Malayalam script.",
42
  image
43
  ],
44
  generation_config=genai.types.GenerationConfig(
45
- temperature=0.1, # Ultra-low temperature for precise extraction
46
- max_output_tokens=500 # Increased to capture more text
47
  )
48
  )
49
 
50
  # Extract text from response
51
  extracted_text = response.text
52
 
53
- # Validate Malayalam characters
54
- malayalam_chars = [char for char in extracted_text if '\u0D00' <= char <= '\u0D7F']
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # Check if extracted text contains Malayalam characters
57
- if not malayalam_chars:
58
- return "No Malayalam text detected. Please verify the image contains Malayalam script."
59
 
60
- return extracted_text
61
 
62
  except Exception as e:
63
  return f"An error occurred: {str(e)}"
64
 
65
  # Create Gradio Interface
66
  demo = gr.Interface(
67
- fn=extract_malayalam_text,
68
- inputs=gr.Image(type="pil", label="Upload Malayalam Text Image"),
69
- outputs=gr.Textbox(label="Extracted Malayalam Text", lines=10),
70
- title="Malayalam Text Extractor",
71
- description="Upload an image containing Malayalam text for precise extraction."
72
  )
73
 
74
  # Launch the app
 
2
  import google.generativeai as genai
3
  from PIL import Image
4
  import os
5
+ import re
6
 
7
  # Direct API key placement
8
  API_KEY = 'AIzaSyBMZrhMXRpQKp7M-JcN2Qk73afeta5Mv5Y'
9
 
10
+ def extract_mixed_text(image):
11
  """
12
+ Extract Malayalam letters, English letters, and numbers from the image
13
 
14
  Args:
15
  image (PIL.Image): Uploaded image
16
 
17
  Returns:
18
+ str: Extracted text
19
  """
20
  # Validate input
21
  if image is None:
 
28
  # Use Gemini 2.0 Flash model
29
  model = genai.GenerativeModel('gemini-2.0-flash')
30
 
31
+ # Comprehensive prompt for text extraction
32
  response = model.generate_content(
33
  [
34
+ "CRITICAL INSTRUCTIONS FOR TEXT EXTRACTION: "
35
+ "1. Extract ALL text from this image. "
36
+ "2. Include: "
37
+ " - Malayalam letters "
38
+ " - English letters "
39
+ " - Numbers "
40
+ "3. Preserve original formatting and line breaks. "
41
+ "4. Maintain the exact order of characters. "
42
+ "5. Do not modify or translate the text.",
43
  image
44
  ],
45
  generation_config=genai.types.GenerationConfig(
46
+ temperature=0.1, # Precise extraction
47
+ max_output_tokens=500 # Capture full text
48
  )
49
  )
50
 
51
  # Extract text from response
52
  extracted_text = response.text
53
 
54
+ # Filter to keep only Malayalam letters, English letters, and numbers
55
+ filtered_text = ''.join(
56
+ char for char in extracted_text
57
+ if (
58
+ # Malayalam Unicode range
59
+ ('\u0D00' <= char <= '\u0D7F') or
60
+ # English letters (uppercase and lowercase)
61
+ char.isalpha() or
62
+ # Numbers
63
+ char.isdigit() or
64
+ # Preserve spaces and basic punctuation
65
+ char.isspace() or char in '.,/-'
66
+ )
67
+ )
68
 
69
+ # Validate text extraction
70
+ if not filtered_text.strip():
71
+ return "No valid text detected. Please check the image quality."
72
 
73
+ return filtered_text
74
 
75
  except Exception as e:
76
  return f"An error occurred: {str(e)}"
77
 
78
  # Create Gradio Interface
79
  demo = gr.Interface(
80
+ fn=extract_mixed_text,
81
+ inputs=gr.Image(type="pil", label="Upload Image with Text"),
82
+ outputs=gr.Textbox(label="Extracted Text", lines=10),
83
+ title="Text Extractor",
84
+ description="Upload an image to extract Malayalam letters, English letters, and numbers."
85
  )
86
 
87
  # Launch the app