Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

armanddemasson commited on Apr 1

Commit

ef61c09

1 Parent(s): 2745637

feat: added 2 new talk to data plots

Browse files

Files changed (5) hide show

app.py +0 -3
climateqa/engine/talk_to_data/main.py +9 -8
climateqa/engine/talk_to_data/plot.py +180 -14
climateqa/engine/talk_to_data/sql_query.py +32 -3
climateqa/engine/talk_to_data/workflow.py +20 -3

app.py CHANGED Viewed

@@ -152,20 +152,17 @@ def create_drias_tab():
             prev_button = gr.Button("Previous")
             next_button = gr.Button("Next")
-            # Initialisation des données
             sql_queries_state = gr.State([])
             dataframes_state = gr.State([])
             plots_state = gr.State([])
             index_state = gr.State(0)  # To track the current position
-        # Action sur la soumission du texte
         drias_direct_question.submit(
             ask_drias_query,
             inputs=[drias_direct_question, index_state],
             outputs=[drias_sql_query, drias_table, drias_display, sql_queries_state, dataframes_state, plots_state, index_state]
         )
-        # Define functions to navigate history
         def show_previous(index, sql_queries, dataframes, plots):
             if index > 0:
                 index -= 1

             prev_button = gr.Button("Previous")
             next_button = gr.Button("Next")
             sql_queries_state = gr.State([])
             dataframes_state = gr.State([])
             plots_state = gr.State([])
             index_state = gr.State(0)  # To track the current position
         drias_direct_question.submit(
             ask_drias_query,
             inputs=[drias_direct_question, index_state],
             outputs=[drias_sql_query, drias_table, drias_display, sql_queries_state, dataframes_state, plots_state, index_state]
         )
         def show_previous(index, sql_queries, dataframes, plots):
             if index > 0:
                 index -= 1

climateqa/engine/talk_to_data/main.py CHANGED Viewed

@@ -19,16 +19,17 @@ def ask_drias(db_drias_path:str, query:str , index_state: int):
     result_dataframes = []
     figures = []
     for plot_state in final_state['plot_states'].values():
         for table_state in plot_state['table_states'].values():
-            if 'ql_query' in table_state and table_state['sql_query'] is not None:
-                sql_queries.append(table_state['sql_query'])
-            if 'dataframe' in table_state and table_state['dataframe'] is not None:
-                result_dataframes.append(table_state['dataframe'])
-                if 'figure' in table_state and table_state['figure'] is not None:
-                    figures.append(table_state['figure'](table_state['dataframe']))
     return sql_queries[index_state], result_dataframes[index_state], figures[index_state], sql_queries, result_dataframes, figures, index_state

     result_dataframes = []
     figures = []
     for plot_state in final_state['plot_states'].values():
         for table_state in plot_state['table_states'].values():
+            if table_state['status'] == 'OK':
+                if 'sql_query' in table_state and table_state['sql_query'] is not None:
+                    sql_queries.append(table_state['sql_query'])
+                if 'dataframe' in table_state and table_state['dataframe'] is not None:
+                    result_dataframes.append(table_state['dataframe'])
+                    if 'figure' in table_state and table_state['figure'] is not None:
+                        figures.append(table_state['figure'](table_state['dataframe']))
     return sql_queries[index_state], result_dataframes[index_state], figures[index_state], sql_queries, result_dataframes, figures, index_state

climateqa/engine/talk_to_data/plot.py CHANGED Viewed

@@ -1,9 +1,14 @@
 from typing import Callable, TypedDict
 import pandas as pd
 from plotly.graph_objects import Figure
 import plotly.graph_objects as go
-from climateqa.engine.talk_to_data.sql_query import indicator_per_year_at_location_query
 class Plot(TypedDict):
@@ -14,7 +19,7 @@ class Plot(TypedDict):
     sql_query: Callable[..., str]
-def plot_indicator_per_year_at_location(params: dict) -> Callable[..., Figure]:
     """Generate the function to plot a line plot of an indicator per year at a certain location
     Args:
@@ -25,6 +30,7 @@ def plot_indicator_per_year_at_location(params: dict) -> Callable[..., Figure]:
     """
     indicator = params["indicator_column"]
     model = params["model"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
     def plot_data(df: pd.DataFrame) -> Figure:
@@ -74,6 +80,7 @@ def plot_indicator_per_year_at_location(params: dict) -> Callable[..., Figure]:
             y=indicators,
             name=f"Yearly {indicator_label}",
             mode="lines",
         )
         # Sliding average dashed line
@@ -83,10 +90,10 @@ def plot_indicator_per_year_at_location(params: dict) -> Callable[..., Figure]:
             mode="lines",
             name="10 years rolling average",
             line=dict(dash="dash"),
-            marker=dict(color="#1f77b4"),
         )
         fig.update_layout(
-            title=f"Plot of {indicator_label} in {params['location']} (Model Average)",
             xaxis_title="Year",
             yaxis_title=indicator_label,
             template="plotly_white",
@@ -96,16 +103,18 @@ def plot_indicator_per_year_at_location(params: dict) -> Callable[..., Figure]:
     return plot_data
-indicator_per_year_at_location: Plot = {
-    "name": "Indicator per year at location",
-    "description": "Plot an evolution of the indicator at a certain location over the years",
     "params": ["indicator_column", "location", "model"],
-    "plot_function": plot_indicator_per_year_at_location,
     "sql_query": indicator_per_year_at_location_query,
 }
-def plot_indicator_number_of_days_per_year_at_location(params) -> Callable[..., Figure]:
     """Generate the function to plot a line plot of an indicator per year at a certain location
     Args:
@@ -117,10 +126,19 @@ def plot_indicator_number_of_days_per_year_at_location(params) -> Callable[...,
     indicator = params["indicator_column"]
     model = params["model"]
-    def plot_data(df) -> Figure:
         fig = go.Figure()
-        if params["model"] == "ALL":
             df_avg = df.groupby("year", as_index=False)[indicator].mean()
             # Transform to list to avoid pandas encoding
@@ -147,10 +165,10 @@ def plot_indicator_number_of_days_per_year_at_location(params) -> Callable[...,
         indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
         fig.update_layout(
-            title=f"{indicator_label} in {params['location']} (Model Average)",
             xaxis_title="Year",
             yaxis_title=indicator,
-            yaxis=dict(range=[0, 366]),
             bargap=0.5,
             template="plotly_white",
         )
@@ -169,4 +187,152 @@ indicator_number_of_days_per_year_at_location: Plot = {
 }
-PLOTS = [indicator_per_year_at_location, indicator_number_of_days_per_year_at_location]

 from typing import Callable, TypedDict
+from matplotlib.figure import figaspect
 import pandas as pd
 from plotly.graph_objects import Figure
 import plotly.graph_objects as go
+import plotly.express as px
+from climateqa.engine.talk_to_data.sql_query import (
+    indicator_for_given_year_query,
+    indicator_per_year_at_location_query,
+)
 class Plot(TypedDict):
     sql_query: Callable[..., str]
+def plot_indicator_evolution_at_location(params: dict) -> Callable[..., Figure]:
     """Generate the function to plot a line plot of an indicator per year at a certain location
     Args:
     """
     indicator = params["indicator_column"]
     model = params["model"]
+    location = params["location"]
     indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
     def plot_data(df: pd.DataFrame) -> Figure:
             y=indicators,
             name=f"Yearly {indicator_label}",
             mode="lines",
+            marker=dict(color="#1f77b4"),
         )
         # Sliding average dashed line
             mode="lines",
             name="10 years rolling average",
             line=dict(dash="dash"),
+            marker=dict(color="#d62728"),
         )
         fig.update_layout(
+            title=f"Plot of {indicator_label} in {location} {'(Model Average)' if model == 'ALL' else '(Model : ' + model + ')'}",
             xaxis_title="Year",
             yaxis_title=indicator_label,
             template="plotly_white",
     return plot_data
+indicator_evolution_at_location: Plot = {
+    "name": "Indicator evolution at location",
+    "description": "Plot an evolution of the indicator at a certain location",
     "params": ["indicator_column", "location", "model"],
+    "plot_function": plot_indicator_evolution_at_location,
     "sql_query": indicator_per_year_at_location_query,
 }
+def plot_indicator_number_of_days_per_year_at_location(
+    params: dict,
+) -> Callable[..., Figure]:
     """Generate the function to plot a line plot of an indicator per year at a certain location
     Args:
     indicator = params["indicator_column"]
     model = params["model"]
+    location = params["location"]
+    def plot_data(df: pd.DataFrame) -> Figure:
+        """Generate the figure thanks to the dataframe
+        Args:
+            df (pd.DataFrame): pandas dataframe with the required data
+        Returns:
+            Figure: Plotly figure
+        """
         fig = go.Figure()
+        if model == "ALL":
             df_avg = df.groupby("year", as_index=False)[indicator].mean()
             # Transform to list to avoid pandas encoding
         indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
         fig.update_layout(
+            title=f"{indicator_label} in {location} {'(Model Average)' if model == 'ALL' else '(Model : ' + model + ')'}",
             xaxis_title="Year",
             yaxis_title=indicator,
+            yaxis=dict(range=[0, max(indicators)]),
             bargap=0.5,
             template="plotly_white",
         )
 }
+def plot_distribution_of_indicator_for_given_year(
+    params: dict,
+) -> Callable[..., Figure]:
+    """Generate an histogram of the distribution of an indicator for a given year
+    Args:
+        params (dict): dictionnary with the required params : model, indicator_column, year
+    Returns:
+        Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
+    """
+    indicator = params["indicator_column"]
+    model = params["model"]
+    year = params["year"]
+    indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    def plot_data(df: pd.DataFrame) -> Figure:
+        """Generate the figure thanks to the dataframe
+        Args:
+            df (pd.DataFrame): pandas dataframe with the required data
+        Returns:
+            Figure: Plotly figure
+        """
+        fig = go.Figure()
+        if params["model"] == "ALL":
+            df_avg = df.groupby(["latitude", "longitude"], as_index=False)[
+                indicator
+            ].mean()
+            # Transform to list to avoid pandas encoding
+            indicators = df_avg[indicator].astype(float).tolist()
+        else:
+            df_model = df[df["model"] == model]
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+        fig.add_trace(
+            go.Histogram(
+                x=indicators,
+                opacity=0.8,
+                histnorm="percent",
+                marker=dict(color="#1f77b4"),
+            )
+        )
+        fig.update_layout(
+            title=f"Distribution of {indicator_label} in {year} {'(Model Average)' if model == 'ALL' else '(Model : ' + model + ')'}",
+            xaxis_title=indicator_label,
+            yaxis_title="Frequency",
+            plot_bgcolor="rgba(0, 0, 0, 0)",
+            showlegend=False,
+        )
+        return fig
+    return plot_data
+distribution_of_indicator_for_given_year: Plot = {
+    "name": "Distribution of an indicator for a given year",
+    "description": "Plot an histogram of the distribution for a given year of the values of an indicator ",
+    "params": ["indicator_column", "model", "year"],
+    "plot_function": plot_distribution_of_indicator_for_given_year,
+    "sql_query": indicator_for_given_year_query,
+}
+def plot_map_of_france_of_indicator_for_given_year(
+    params: dict,
+) -> Callable[..., Figure]:
+    """Generate a plot of the map of France for an indicator at a given year
+    Args:
+        params (dict): dictionnary with the required params : model, indicator_column, year
+    Returns:
+        Callable[..., Figure]: Function which can be call to create the figure with the associated dataframe
+    """
+    indicator = params["indicator_column"]
+    model = params["model"]
+    year = params["year"]
+    indicator_label = " ".join([word.capitalize() for word in indicator.split("_")])
+    def plot_data(df: pd.DataFrame) -> Figure:
+        fig = go.Figure()
+        if model == "ALL":
+            df_avg = df.groupby(["latitude", "longitude"], as_index=False)[
+                indicator
+            ].mean()
+            indicators = df_avg[indicator].astype(float).tolist()
+            latitudes = df_avg["latitude"].astype(float).tolist()
+            longitudes = df_avg["longitude"].astype(float).tolist()
+        else:
+            df_model = df[df["model"] == model]
+            # Transform to list to avoid pandas encoding
+            indicators = df_model[indicator].astype(float).tolist()
+            latitudes = df_model["latitude"].astype(float).tolist()
+            longitudes = df_model["longitude"].astype(float).tolist()
+        fig.add_trace(
+            go.Scattermapbox(
+                lat=latitudes,
+                lon=longitudes,
+                mode="markers",
+                marker=dict(
+                    size=10,
+                    color=indicators,  # Color mapped to values
+                    colorscale="Turbo",  # Color scale (can be 'Plasma', 'Jet', etc.)
+                    cmin=min(indicators),  # Minimum color range
+                    cmax=max(indicators),  # Maximum color range
+                    showscale=True,  # Show colorbar
+                ),
+            )
+        )
+        fig.update_layout(
+            mapbox_style="open-street-map",  # Use OpenStreetMap
+            mapbox_zoom=3,
+            mapbox_center={"lat": 46.6, "lon": 2.0},
+            coloraxis_colorbar=dict(title=f"{indicator_label}"),  # Add legend
+            title=f"{indicator_label} in {year} in France",  # Title
+        )
+        return fig
+    return plot_data
+map_of_france_of_indicator_for_given_year: Plot = {
+    "name": "Map of France of an indicator for a given year",
+    "description": "Heatmap on the map of France of the values of an in indicator for a given year",
+    "params": ["indicator_column", "year", "model"],
+    "plot_function": plot_map_of_france_of_indicator_for_given_year,
+    "sql_query": indicator_for_given_year_query,
+}
+PLOTS = [
+    indicator_evolution_at_location,
+    indicator_number_of_days_per_year_at_location,
+    distribution_of_indicator_for_given_year,
+    map_of_france_of_indicator_for_given_year,
+]

climateqa/engine/talk_to_data/sql_query.py CHANGED Viewed

@@ -39,10 +39,10 @@ def execute_sql_query(db_path: str, sql_query: str) -> SqlQueryOutput:
 class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
-    table: str
-    indicator_column: list[str]
     latitude: str
     longitude: str
 def indicator_per_year_at_location_query(
@@ -60,5 +60,34 @@ def indicator_per_year_at_location_query(
     indicator_column = params.get("indicator_column")
     latitude = params.get("latitude")
     longitude = params.get("longitude")
-    sql_query = f"SELECT year, {indicator_column}, model FROM {table} WHERE latitude = {latitude} and longitude={longitude} Order by Year"
     return sql_query

 class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
+    indicator_column: str
     latitude: str
     longitude: str
+    model: str
 def indicator_per_year_at_location_query(
     indicator_column = params.get("indicator_column")
     latitude = params.get("latitude")
     longitude = params.get("longitude")
+    if indicator_column is None or latitude is None or longitude is None: # If one parameter is missing, returns an empty query
+        return ""
+    sql_query = f"SELECT year, {indicator_column}, model\nFROM {table}\nWHERE latitude = {latitude} \nand longitude={longitude} \nOrder by Year"
     return sql_query
+class IndicatorForGivenYearQueryParams(TypedDict, total=False):
+    indicator_column: str
+    year: str
+    model: str
+def indicator_for_given_year_query(
+        table:str, params: IndicatorForGivenYearQueryParams
+) -> str:
+    """SQL Query to get the values of an indicator with their latitudes, longitudes and models for a given year
+    Args:
+        table (str): sql table of the indicator
+        params (IndicatorForGivenYearQueryParams): dictionarry with the required params for the query
+    Returns:
+        str: the sql query
+    """
+    indicator_column = params.get("indicator_column")
+    year = params.get('year')
+    if year is None or indicator_column is None: # If one parameter is missing, returns an empty query
+        return ""
+    sql_query = f"Select {indicator_column}, latitude, longitude, model\nFrom {table}\nWhere year = {year}"
+    return sql_query

climateqa/engine/talk_to_data/workflow.py CHANGED Viewed

@@ -9,6 +9,7 @@ from climateqa.engine.talk_to_data.plot import PLOTS, Plot
 from climateqa.engine.talk_to_data.sql_query import execute_sql_query
 from climateqa.engine.talk_to_data.utils import (
     detect_relevant_plots,
     loc2coords,
     detect_location_with_openai,
     nearestNeighbourSQL,
@@ -25,6 +26,7 @@ class TableState(TypedDict):
     sql_query: NotRequired[str]
     dataframe: NotRequired[pd.DataFrame | None]
     figure: NotRequired[Callable[..., Figure]]
 class PlotState(TypedDict):
     plot_name: str
@@ -82,6 +84,7 @@ def drias_workflow(db_drias_path: str, user_input: str) -> State:
             table_state: TableState = {
                 'table_name': table,
                 'params': {},
             }
             table_state['params'] = {
                 'model': 'ALL'
@@ -92,6 +95,11 @@ def drias_workflow(db_drias_path: str, user_input: str) -> State:
                     table_state['params'].update(param)
             sql_query = plot['sql_query'](table, table_state['params'])
             table_state['sql_query'] = sql_query
             results = execute_sql_query(db_drias_path, sql_query)
@@ -134,6 +142,9 @@ def find_param(state: State, param_name:str, table: str, db_path: str) -> dict[s
     if param_name == 'indicator_column':
         indicator_column = find_indicator_column(table)
         return {'indicator_column': indicator_column}
     return None
@@ -155,6 +166,11 @@ def find_location(user_input: str, table: str, db_path: str) -> Location:
         })
     return output
 def find_indicator_column(table: str) -> str:
     """Retrieve the name of the indicator column within the table in the database
@@ -178,12 +194,13 @@ def find_indicator_column(table: str) -> str:
         "mean_annual_temperature": "mean_annual_temperature",
         "number_of_tropical_nights": "number_tropical_nights",
         "maximum_summer_temperature": "maximum_summer_temperature",
-        "number_of_days_with_TX_above_30": "number_of_days_with_tx_above_30",
-        "number_of_days_with_TX_above_35": "number_of_days_with_tx_above_35",
         "number_of_days_with_a_dry_ground": "number_of_days_with_dry_ground"
     }
     return indicator_columns_per_table[table]
 # def make_write_query_node():
 #     def write_query(state):
@@ -230,4 +247,4 @@ def find_indicator_column(table: str) -> str:
 #         output.update(fetch_data_from_sql_query(db_path, sql_query))
 #         return output
-#     return fetch_data

 from climateqa.engine.talk_to_data.sql_query import execute_sql_query
 from climateqa.engine.talk_to_data.utils import (
     detect_relevant_plots,
+    detect_year_with_openai,
     loc2coords,
     detect_location_with_openai,
     nearestNeighbourSQL,
     sql_query: NotRequired[str]
     dataframe: NotRequired[pd.DataFrame | None]
     figure: NotRequired[Callable[..., Figure]]
+    status: str
 class PlotState(TypedDict):
     plot_name: str
             table_state: TableState = {
                 'table_name': table,
                 'params': {},
+                'status': 'OK'
             }
             table_state['params'] = {
                 'model': 'ALL'
                     table_state['params'].update(param)
             sql_query = plot['sql_query'](table, table_state['params'])
+            if sql_query == "":
+                table_state['status'] = 'ERROR'
+                continue
             table_state['sql_query'] = sql_query
             results = execute_sql_query(db_drias_path, sql_query)
     if param_name == 'indicator_column':
         indicator_column = find_indicator_column(table)
         return {'indicator_column': indicator_column}
+    if param_name == 'year':
+        year = find_year(state['user_input'])
+        return {'year': year}
     return None
         })
     return output
+def find_year(user_input: str) -> str:
+    print(f"---- Find year ---")
+    year = detect_year_with_openai(user_input)
+    return year
 def find_indicator_column(table: str) -> str:
     """Retrieve the name of the indicator column within the table in the database
         "mean_annual_temperature": "mean_annual_temperature",
         "number_of_tropical_nights": "number_tropical_nights",
         "maximum_summer_temperature": "maximum_summer_temperature",
+        "number_of_days_with_tx_above_30": "number_of_days_with_tx_above_30",
+        "number_of_days_with_tx_above_35": "number_of_days_with_tx_above_35",
         "number_of_days_with_a_dry_ground": "number_of_days_with_dry_ground"
     }
     return indicator_columns_per_table[table]
 # def make_write_query_node():
 #     def write_query(state):
 #         output.update(fetch_data_from_sql_query(db_path, sql_query))
 #         return output
+#     return fetch_data