Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import geopandas as gpd | |
| import sqlite3 | |
| import pandas as pd | |
| import os | |
| import torch | |
| from transformers import pipeline | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| # Set the SHAPE_RESTORE_SHX configuration option to YES | |
| os.environ['SHAPE_RESTORE_SHX'] = 'YES' | |
| # Set device to GPU if available, otherwise CPU | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| st.write(f"Using device: {device}") | |
| # Connect to SQLite database | |
| conn = sqlite3.connect('NY.db') | |
| cursor = conn.cursor() | |
| # Create table for floodland data | |
| cursor.execute(''' | |
| CREATE TABLE IF NOT EXISTS floodlands ( | |
| id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| DFIRM_ID TEXT, | |
| VERSION_ID TEXT, | |
| FLD_AR_ID TEXT, | |
| STUDY_TYP TEXT, | |
| FLD_ZONE TEXT, | |
| ZONE_SUBTY TEXT, | |
| SFHA_TF TEXT, | |
| STATIC_BFE FLOAT, | |
| V_DATUM TEXT, | |
| DEPTH FLOAT, | |
| LEN_UNIT TEXT, | |
| VELOCITY FLOAT, | |
| VEL_UNIT TEXT, | |
| AR_REVERT TEXT, | |
| AR_SUBTRV TEXT, | |
| BFE_REVERT FLOAT, | |
| DEP_REVERT FLOAT, | |
| DUAL_ZONE TEXT, | |
| SOURCE_CIT TEXT, | |
| geometry TEXT, | |
| acreage FLOAT, | |
| usable_area FLOAT | |
| ) | |
| ''') | |
| conn.commit() | |
| # Load and process the shapefile | |
| shapefile_path = os.path.join(os.path.dirname(__file__), 'S_FLD_HAZ_AR.shp') | |
| gdf = gpd.read_file(shapefile_path) | |
| # Check the initial CRS | |
| st.write("Initial CRS:", gdf.crs) | |
| # If the CRS is None, set it to WGS84 (EPSG:4326), which is common for FEMA shapefiles | |
| if gdf.crs is None: | |
| gdf.set_crs(epsg=4326, inplace=True) | |
| st.write("CRS was missing; set to EPSG:4326 (WGS84).") | |
| # Validate geometries | |
| gdf['geometry'] = gdf['geometry'].apply(lambda geom: geom if geom.is_valid else None) | |
| if gdf['geometry'].isnull().any(): | |
| st.write(f"Found {gdf['geometry'].isnull().sum()} invalid or null geometries. Dropping these rows.") | |
| gdf = gdf.dropna(subset=['geometry']) | |
| # Reproject to UTM Zone 18N (EPSG:32618) for accurate area calculations | |
| gdf = gdf.to_crs(epsg=32618) | |
| st.write("CRS after reprojection:", gdf.crs) | |
| # Calculate acreage (1 square meter = 0.000247105 acres) | |
| gdf['acreage'] = gdf.geometry.area * 0.000247105 | |
| # Calculate usable area (excluding flood-prone zones) | |
| flood_prone_zones = ['A', 'AE', 'AH', 'AO', 'VE'] | |
| gdf['usable_area'] = gdf.apply( | |
| lambda row: row['acreage'] if row['FLD_ZONE'] not in flood_prone_zones else 0, axis=1 | |
| ) | |
| # Convert geometry to WKT for storage in SQLite | |
| gdf['wkt_geometry'] = gdf['geometry'].apply(lambda geom: geom.wkt) | |
| # Create a new DataFrame without the geometry column for SQLite storage | |
| gdf_for_sql = gdf.drop(columns=['geometry']) | |
| # Store in SQLite | |
| rows_inserted = gdf_for_sql.to_sql('floodlands', conn, if_exists='replace', index=False) | |
| st.write(f"Inserted {rows_inserted} rows into the database.") | |
| # Close the database connection | |
| conn.close() | |
| # Load sentence transformer for embedding | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Load summarization model (using google/pegasus-xsum, which is publicly accessible) | |
| summarizer = pipeline("summarization", model="google/pegasus-xsum", device=0 if torch.cuda.is_available() else -1) | |
| # Create a text representation of each floodland record for embedding | |
| gdf['text'] = gdf.apply( | |
| lambda row: f"Floodland area with FLD_ZONE: {row['FLD_ZONE']}, ZONE_SUBTY: {row['ZONE_SUBTY']}, " | |
| f"acreage: {row['acreage']:.2f} acres, usable area: {row['usable_area']:.2f} acres", | |
| axis=1 | |
| ) | |
| # Embed the text representations | |
| embeddings = embedder.encode(gdf['text'].tolist(), show_progress_bar=True) | |
| # Create a FAISS index for retrieval | |
| d = embeddings.shape[1] # Dimension of embeddings | |
| index = faiss.IndexFlatL2(d) | |
| index.add(embeddings) | |
| # Store the embeddings | |
| gdf['embedding'] = list(embeddings) | |
| # RAG-based summarization function (without prompt) | |
| def rag_summarize(query, gdf, index, k=5): | |
| # Embed the query | |
| query_embedding = embedder.encode([query])[0] | |
| # Retrieve top-k relevant documents | |
| distances, indices = index.search(np.array([query_embedding]), k) | |
| retrieved_docs = gdf.iloc[indices[0]] | |
| # Aggregate acreage and usable area from retrieved documents | |
| total_acreage = retrieved_docs['acreage'].sum() | |
| usable_acreage = retrieved_docs['usable_area'].sum() | |
| # Create a simplified narrative context | |
| context = (f"The floodlands in {query} cover a total of {total_acreage:.2f} acres, with {usable_acreage:.2f} acres usable for development. " | |
| f"A significant portion, spanning {retrieved_docs.iloc[0]['acreage']:.2f} acres, is classified as {retrieved_docs.iloc[0]['FLD_ZONE']} flood zone, " | |
| f"indicating potential flood risks that could impact development projects.") | |
| # Debug: Display the narrative context | |
| st.write("Narrative context for summarization:") | |
| st.write(context) | |
| # Generate summary without a prompt | |
| try: | |
| summary_output = summarizer(context, max_length=150, min_length=100, do_sample=False) | |
| st.write("Raw summarizer output:", summary_output) | |
| if isinstance(summary_output, list) and len(summary_output) > 0 and 'summary_text' in summary_output[0]: | |
| summary = summary_output[0]['summary_text'] | |
| else: | |
| raise ValueError("Unexpected output format from summarizer.") | |
| except Exception as e: | |
| st.write(f"Error in summarization: {e}") | |
| # Fallback: Generate a basic summary manually | |
| summary = (f"The floodlands in {query} cover a total of {total_acreage:.2f} acres. " | |
| f"Only {usable_acreage:.2f} acres are usable for development due to flood risks. " | |
| f"A significant portion, including {retrieved_docs.iloc[0]['acreage']:.2f} acres, falls under the {retrieved_docs.iloc[0]['FLD_ZONE']} flood zone. " | |
| f"This indicates a high risk of flooding, which could impact power plant projects. " | |
| f"Overall, the limited usable land poses challenges for development in this region.") | |
| return summary | |
| # Streamlit interface | |
| st.title("Floodland Summary Bot") | |
| # Input field for the user to enter a location | |
| user_input = st.text_input("Input:", placeholder="Enter state name (e.g., Chicago)") | |
| # Submit button | |
| if st.button("Submit"): | |
| if user_input: | |
| st.write("Hi, How can I help you today?") | |
| st.write(f"User input: {user_input}") | |
| # Generate summary | |
| summary = rag_summarize(user_input, gdf, index, k=5) | |
| st.write(summary) | |
| else: | |
| st.write("Please enter a location to proceed.") |