Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

armanddemasson commited on Jun 5

Commit

80062dd

1 Parent(s): b21471a

feat: added evolution for a specific month plot

Browse files

Files changed (5) hide show

climateqa/engine/talk_to_data/input_processing.py +38 -12
climateqa/engine/talk_to_data/ipcc/config.py +2 -1
climateqa/engine/talk_to_data/ipcc/plot_informations.py +23 -0
climateqa/engine/talk_to_data/ipcc/plots.py +81 -3
climateqa/engine/talk_to_data/ipcc/queries.py +69 -1

climateqa/engine/talk_to_data/input_processing.py CHANGED Viewed

@@ -118,7 +118,28 @@ async def detect_year_with_openai(sentence: str) -> str:
         return years_list[0]
     else:
         return ""
 async def detect_relevant_tables(user_question: str, plot: Plot, llm, table_names_list: list[str]) -> list[str]:
     """Identifies relevant tables for a plot based on user input.
@@ -227,6 +248,14 @@ async def find_year(user_input: str) -> str| None:
         return None
     return year
 async def find_relevant_plots(state: State, llm, plots: list[Plot]) -> list[str]:
     print("---- Find relevant plots ----")
     relevant_plots = await detect_relevant_plots(state['user_input'], llm, plots)
@@ -237,16 +266,9 @@ async def find_relevant_tables_per_plot(state: State, plot: Plot, llm, tables: l
     relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm, tables)
     return relevant_tables
-async def find_param(state: State, param_name:str, mode: Literal['DRIAS', 'IPCC'] = 'DRIAS') -> dict[str, Optional[str]] | Location | None:
-    """Perform the good method to retrieve the desired parameter
-    Args:
-        state (State): state of the workflow
-        param_name (str): name of the desired parameter
-        table (str): name of the table
-    Returns:
-        dict[str, Any] | None:
     """
     if param_name == 'location':
         location = await find_location(state['user_input'], mode)
@@ -254,4 +276,8 @@ async def find_param(state: State, param_name:str, mode: Literal['DRIAS', 'IPCC'
     if param_name == 'year':
         year = await find_year(state['user_input'])
         return {'year': year}
-    return None

         return years_list[0]
     else:
         return ""
+async def detect_month_with_openai(sentence: str) -> str:
+    """
+    Detects month in a sentence using OpenAI's API via LangChain.
+    Returns the month as an integer string (e.g., "7" for July), or "" if not found.
+    """
+    llm = get_llm()
+    prompt = """
+    Extract the month (as a number from 1 to 12) mentioned in the following sentence.
+    Return the result as a Python list of integers. If no month is mentioned, return an empty list.
+    Sentence: "{sentence}"
+    """
+    prompt = ChatPromptTemplate.from_template(prompt)
+    structured_llm = llm.with_structured_output(ArrayOutput)
+    chain = prompt | structured_llm
+    response: ArrayOutput = await chain.ainvoke({"sentence": sentence})
+    months_list = eval(response['array'])
+    if len(months_list) > 0:
+        return str(months_list[0])
+    else:
+        return ""
 async def detect_relevant_tables(user_question: str, plot: Plot, llm, table_names_list: list[str]) -> list[str]:
     """Identifies relevant tables for a plot based on user input.
         return None
     return year
+async def find_month(user_input: str) -> str | None:
+    """Extracts month information from user input using LLM."""
+    print(f"---- Find month ---")
+    month = await detect_month_with_openai(user_input)
+    if month == "":
+        return None
+    return month
 async def find_relevant_plots(state: State, llm, plots: list[Plot]) -> list[str]:
     print("---- Find relevant plots ----")
     relevant_plots = await detect_relevant_plots(state['user_input'], llm, plots)
     relevant_tables = await detect_relevant_tables(state['user_input'], plot, llm, tables)
     return relevant_tables
+async def find_param(state: State, param_name: str, mode: Literal['DRIAS', 'IPCC'] = 'DRIAS') -> dict[str, Optional[str]] | Location | None:
+    """
+    Perform the good method to retrieve the desired parameter.
     """
     if param_name == 'location':
         location = await find_location(state['user_input'], mode)
     if param_name == 'year':
         year = await find_year(state['user_input'])
         return {'year': year}
+    if param_name == 'month':
+        month = await find_month(state['user_input'])
+        print(month)
+        return {'month': month}
+    return None

climateqa/engine/talk_to_data/ipcc/config.py CHANGED Viewed

@@ -30,7 +30,8 @@ IPCC_MODELS = []
 IPCC_PLOT_PARAMETERS = [
     'year',
-    'location'
 ]
 MACRO_COUNTRIES = ['JP',

 IPCC_PLOT_PARAMETERS = [
     'year',
+    'location',
+    'month'
 ]
 MACRO_COUNTRIES = ['JP',

climateqa/engine/talk_to_data/ipcc/plot_informations.py CHANGED Viewed

@@ -47,4 +47,27 @@ Each grid point is colored according to the value of the indicator ({unit}), all
 - For each grid point of {location} country ({country_name}), the value of {indicator} in {year} and for the selected scenario is extracted and mapped to its geographic coordinates.
 - The grid points correspond to 1-degree squares centered on the grid points of the IPCC dataset. Each grid point has been mapped to a country using [**reverse_geocoder**](https://github.com/thampiman/reverse-geocoder).
 - The coordinates used for each region are those of the closest available grid point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
 """

 - For each grid point of {location} country ({country_name}), the value of {indicator} in {year} and for the selected scenario is extracted and mapped to its geographic coordinates.
 - The grid points correspond to 1-degree squares centered on the grid points of the IPCC dataset. Each grid point has been mapped to a country using [**reverse_geocoder**](https://github.com/thampiman/reverse-geocoder).
 - The coordinates used for each region are those of the closest available grid point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
+"""
+def indicator_specific_month_evolution_informations(
+        indicator: str,
+        params: dict[str, str]
+) -> str:
+    if "location" not in params:
+        raise ValueError('"location" must be provided in params')
+    location = params["location"]
+    if "month" not in params:
+        raise ValueError('"month" must be provided in params')
+    month = params["month"]
+    unit = IPCC_INDICATOR_TO_UNIT[indicator]
+    return f"""
+This plot shows how the climate indicator **{indicator}** evolves over time in **{location}** for the month of **{month}**.
+It combines both historical (from 1950 to 2015) observations and future (from 2016 to 2100) projections for the different SSP climate scenarios (SSP126, SSP245, SSP370 and SSP585).
+The x-axis represents the years (from 1950 to 2100), and the y-axis shows the value of the {indicator} ({unit}) for the selected month.
+Each line corresponds to a different scenario, allowing you to compare how {indicator} for month {month} might change under various future conditions.
+**Data source:**
+- The data comes from the IPCC climate datasets (Parquet files) for the relevant indicator, location, and month.
+- For each year and scenario, the value of {indicator} for month {month} is extracted for the selected location.
+- The coordinates used for {location} correspond to the closest available point in the IPCC database, which uses a regular grid with a spatial resolution of 1 degree.
 """

climateqa/engine/talk_to_data/ipcc/plots.py CHANGED Viewed

@@ -5,8 +5,8 @@ import pandas as pd
 import geojson
 from climateqa.engine.talk_to_data.ipcc.config import IPCC_INDICATOR_TO_COLORSCALE, IPCC_INDICATOR_TO_UNIT, IPCC_SCENARIO
-from climateqa.engine.talk_to_data.ipcc.plot_informations import choropleth_map_informations, indicator_evolution_informations
-from climateqa.engine.talk_to_data.ipcc.queries import indicator_for_given_year_query, indicator_per_year_at_location_query
 from climateqa.engine.talk_to_data.objects.plot import Plot
 def generate_geojson_polygons(latitudes: list[float], longitudes: list[float], indicators: list[float]) -> geojson.FeatureCollection:
@@ -102,6 +102,82 @@ indicator_evolution_at_location_historical_and_projections: Plot = {
     "short_name": "Evolution"
 }
 def plot_choropleth_map_of_country_indicator_for_specific_year(
     params: dict,
 ) -> Callable[[pd.DataFrame], Figure]:
@@ -167,6 +243,7 @@ def plot_choropleth_map_of_country_indicator_for_specific_year(
     return plot_data
 choropleth_map_of_country_indicator_for_specific_year: Plot = {
     "name": "Choropleth Map of a Country's Indicator Distribution for a Specific Year",
     "description": (
@@ -185,5 +262,6 @@ choropleth_map_of_country_indicator_for_specific_year: Plot = {
 IPCC_PLOTS = [
     indicator_evolution_at_location_historical_and_projections,
-    choropleth_map_of_country_indicator_for_specific_year
 ]

 import geojson
 from climateqa.engine.talk_to_data.ipcc.config import IPCC_INDICATOR_TO_COLORSCALE, IPCC_INDICATOR_TO_UNIT, IPCC_SCENARIO
+from climateqa.engine.talk_to_data.ipcc.plot_informations import choropleth_map_informations, indicator_evolution_informations, indicator_specific_month_evolution_informations
+from climateqa.engine.talk_to_data.ipcc.queries import indicator_for_given_year_query, indicator_per_year_and_specific_month_at_location_query, indicator_per_year_at_location_query
 from climateqa.engine.talk_to_data.objects.plot import Plot
 def generate_geojson_polygons(latitudes: list[float], longitudes: list[float], indicators: list[float]) -> geojson.FeatureCollection:
     "short_name": "Evolution"
 }
+def plot_indicator_monthly_evolution_at_location(
+    params: dict,
+) -> Callable[[pd.DataFrame], Figure]:
+    """
+    Returns a function that generates a line plot showing the evolution of a climate indicator
+    for a specific month over time at a specific location, including both historical data
+    and future projections for different climate scenarios.
+    Args:
+        params (dict): Dictionary with:
+            - indicator_column (str): Name of the climate indicator column to plot.
+            - location (str): Location (e.g., country, city) for which to plot the indicator.
+            - month (int): Month number (1-12) to plot.
+    Returns:
+        Callable[[pd.DataFrame], Figure]: Function that takes a DataFrame and returns a Plotly Figure.
+    """
+    indicator = params["indicator_column"]
+    location = params["location"]
+    month = params["month"]
+    indicator_label = " ".join(word.capitalize() for word in indicator.split("_"))
+    unit = IPCC_INDICATOR_TO_UNIT.get(indicator, "")
+    def plot_data(df: pd.DataFrame) -> Figure:
+        df = df.sort_values(by='year')
+        years = df['year'].astype(int).tolist()
+        indicators = df[indicator].astype(float).tolist()
+        scenarios = df['scenario'].astype(str).tolist()
+        # Find last historical value for continuity
+        last_historical = [(y, v) for y, v, s in zip(years, indicators, scenarios) if s == 'historical']
+        last_historical_year, last_historical_indicator = last_historical[-1] if last_historical else (None, None)
+        fig = go.Figure()
+        for scenario in IPCC_SCENARIO:
+            x = [y for y, s in zip(years, scenarios) if s == scenario]
+            y = [v for v, s in zip(indicators, scenarios) if s == scenario]
+            # Connect historical to scenario
+            if scenario != 'historical' and last_historical_indicator is not None:
+                x = [last_historical_year] + x
+                y = [last_historical_indicator] + y
+            fig.add_trace(go.Scatter(
+                x=x,
+                y=y,
+                mode='lines',
+                name=scenario
+            ))
+        fig.update_layout(
+            title=f'Evolution of {indicator_label} in {location} for Month {month} (Historical + SSP Scenarios)',
+            xaxis_title='Year',
+            yaxis_title=f'{indicator_label} ({unit})',
+            legend_title='Scenario',
+            height=800,
+        )
+        return fig
+    return plot_data
+indicator_specific_month_evolution_at_location: Plot = {
+    "name": "Indicator specific month Evolution at Location (Historical + Projections)",
+    "description": (
+        "Shows how a climate indicator (e.g., rainfall, temperature) for a specific month changes over time at a specific location, "
+        "including historical data and future projections. "
+        "Useful for questions about the value or trend of an indicator for a given month at a location, "
+        "such as 'How does July temperature evolve in Paris over time?'. "
+        "Parameters: indicator_column (the climate variable), location (e.g., country, city), month (1-12)."
+    ),
+    "params": ["indicator_column", "location", "month"],
+    "plot_function": plot_indicator_monthly_evolution_at_location,
+    "sql_query": indicator_per_year_and_specific_month_at_location_query,
+    "plot_information": indicator_specific_month_evolution_informations,
+    "short_name": "Evolution for a specific month"
+}
 def plot_choropleth_map_of_country_indicator_for_specific_year(
     params: dict,
 ) -> Callable[[pd.DataFrame], Figure]:
     return plot_data
 choropleth_map_of_country_indicator_for_specific_year: Plot = {
     "name": "Choropleth Map of a Country's Indicator Distribution for a Specific Year",
     "description": (
 IPCC_PLOTS = [
     indicator_evolution_at_location_historical_and_projections,
+    choropleth_map_of_country_indicator_for_specific_year,
+    indicator_specific_month_evolution_at_location
 ]

climateqa/engine/talk_to_data/ipcc/queries.py CHANGED Viewed

@@ -74,6 +74,74 @@ def indicator_per_year_at_location_query(
         """
     return sql_query.strip()
 class IndicatorForGivenYearQueryParams(TypedDict, total=False):
     """
     Parameters for querying an indicator's values across locations for a specific year.
@@ -140,4 +208,4 @@ def indicator_for_given_year_query(
         ORDER BY latitude, longitude, scenario
         """
-    return sql_query.strip()

         """
     return sql_query.strip()
+class IndicatorPerYearAndSpecificMonthAtLocationQueryParams(TypedDict, total=False):
+    """
+    Parameters for querying the evolution of an indicator per year for a specific month at a specific location.
+    Attributes:
+        indicator_column (str): Name of the climate indicator column.
+        latitude (str): Latitude of the location.
+        longitude (str): Longitude of the location.
+        country_code (str): Country code.
+        month (str): Month targeted
+    """
+    indicator_column: str
+    latitude: str
+    longitude: str
+    country_code: str
+    month: str
+def indicator_per_year_and_specific_month_at_location_query(
+    table: str, params: IndicatorPerYearAndSpecificMonthAtLocationQueryParams
+) -> str:
+    """
+    Builds an SQL query to get the evolution of an indicator per year for a specific month at a specific location.
+    Args:
+        table (str): SQL table of the indicator.
+        params (dict): Dictionary with required params:
+            - indicator_column (str)
+            - latitude (str or float)
+            - longitude (str or float)
+            - month (int)
+    Returns:
+        str: The SQL query string.
+    """
+    indicator_column = params.get("indicator_column")
+    latitude = params.get("latitude")
+    longitude = params.get("longitude")
+    country_code = params.get("country_code")
+    month = params.get('month')
+    if not all([indicator_column, latitude, longitude, country_code, month]):
+        return ""
+    if country_code in MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT year, scenario, {indicator_column}
+        FROM {table_path}
+        WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950 AND month={month}
+        ORDER BY year, scenario
+        """
+    elif country_code in HUGE_MACRO_COUNTRIES:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
+        sql_query = f"""
+        SELECT year, scenario, {indicator_column}
+        FROM {table_path}
+        WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950
+        ORDER year, scenario
+        """
+    else:
+        table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
+        sql_query = f"""
+        SELECT year, scenario, MEDIAN({indicator_column}) AS {indicator_column}
+        FROM {table_path}
+        WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950 AND month={month}
+        GROUP BY scenario, year
+        """
+    return sql_query.strip()
 class IndicatorForGivenYearQueryParams(TypedDict, total=False):
     """
     Parameters for querying an indicator's values across locations for a specific year.
         ORDER BY latitude, longitude, scenario
         """
+    return sql_query.strip()