Spaces:
Runtime error
Runtime error
Commit
·
de789d7
1
Parent(s):
5918b9c
New design
Browse files- .gitignore +1 -0
- app.py +111 -32
- examples.json +13 -0
- requirements.txt +3 -1
- requirements_local.txt +1 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.venv
|
app.py
CHANGED
|
@@ -2,34 +2,35 @@ import os
|
|
| 2 |
import json
|
| 3 |
|
| 4 |
import spaces
|
| 5 |
-
import gradio
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 8 |
|
| 9 |
import pyparseit
|
| 10 |
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
|
| 20 |
-
prompt_valid = open("/home/user/app/templates/prompt_valid.txt").read()
|
| 21 |
-
prompt_sparql = open("/home/user/app/templates/prompt_sparql.txt").read()
|
| 22 |
-
|
| 23 |
-
system = open("/home/user/app/templates/system1.txt").read()
|
| 24 |
|
| 25 |
@spaces.GPU
|
| 26 |
-
def
|
| 27 |
-
|
| 28 |
|
| 29 |
# load the tokenizer and the model
|
| 30 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 31 |
model = AutoModelForCausalLM.from_pretrained(
|
| 32 |
-
|
| 33 |
torch_dtype="auto",
|
| 34 |
device_map="auto"
|
| 35 |
)
|
|
@@ -54,7 +55,53 @@ def generate(messages):
|
|
| 54 |
|
| 55 |
return content
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
valid = False
|
| 60 |
|
|
@@ -68,7 +115,7 @@ def is_valid(message):
|
|
| 68 |
|
| 69 |
messages.append({"role": "user", "content": prompt})
|
| 70 |
|
| 71 |
-
generation =
|
| 72 |
|
| 73 |
print("Generated:", generation)
|
| 74 |
|
|
@@ -84,7 +131,7 @@ def is_valid(message):
|
|
| 84 |
|
| 85 |
return valid
|
| 86 |
|
| 87 |
-
def
|
| 88 |
|
| 89 |
sparql = "```sparql\n[code]```"
|
| 90 |
|
|
@@ -98,7 +145,7 @@ def gen_sparql(message):
|
|
| 98 |
|
| 99 |
messages.append({"role": "user", "content": prompt})
|
| 100 |
|
| 101 |
-
generation =
|
| 102 |
|
| 103 |
print("Generated:", generation)
|
| 104 |
|
|
@@ -107,33 +154,65 @@ def gen_sparql(message):
|
|
| 107 |
if len(blocks) >= 1:
|
| 108 |
|
| 109 |
try:
|
| 110 |
-
sparql =
|
| 111 |
|
| 112 |
except Exception as e:
|
| 113 |
print(e)
|
| 114 |
|
| 115 |
return sparql
|
| 116 |
|
| 117 |
-
def respond(
|
| 118 |
-
message,
|
| 119 |
-
history: list[tuple[str, str]],
|
| 120 |
-
):
|
| 121 |
|
| 122 |
-
|
|
|
|
|
|
|
| 123 |
|
| 124 |
-
return
|
| 125 |
|
| 126 |
else:
|
| 127 |
|
| 128 |
-
return "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
-
demo = gr.ChatInterface(
|
| 132 |
-
respond,
|
| 133 |
-
type="messages",
|
| 134 |
-
title="SPARQL generator"
|
| 135 |
-
)
|
| 136 |
|
| 137 |
|
| 138 |
if __name__ == "__main__":
|
| 139 |
-
demo.
|
|
|
|
| 2 |
import json
|
| 3 |
|
| 4 |
import spaces
|
| 5 |
+
import gradio
|
| 6 |
+
|
| 7 |
+
import numpy
|
| 8 |
+
import pandas
|
| 9 |
+
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
|
| 12 |
import pyparseit
|
| 13 |
|
| 14 |
+
import SPARQLWrapper
|
| 15 |
|
| 16 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "daniel-dona/sparql-model-era-lora-128-qwen3-0.6b")
|
| 17 |
+
SPARQL_ENDPOINT = os.getenv("SPARQL_ENDPOINT", "http://192.168.10.174:8890/sparql")
|
| 18 |
|
| 19 |
+
prompt_valid = open("./templates/prompt_valid.txt").read()
|
| 20 |
+
prompt_sparql = open("./templates/prompt_sparql.txt").read()
|
| 21 |
|
| 22 |
+
system = open("./templates/system1.txt").read()
|
| 23 |
|
| 24 |
+
examples_data = [[e["query"]] for e in json.loads(open("./examples.json").read())]
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
@spaces.GPU
|
| 28 |
+
def model_completion(messages):
|
|
|
|
| 29 |
|
| 30 |
# load the tokenizer and the model
|
| 31 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 32 |
model = AutoModelForCausalLM.from_pretrained(
|
| 33 |
+
MODEL_NAME,
|
| 34 |
torch_dtype="auto",
|
| 35 |
device_map="auto"
|
| 36 |
)
|
|
|
|
| 55 |
|
| 56 |
return content
|
| 57 |
|
| 58 |
+
|
| 59 |
+
def sparql_json_to_df(sparql_json):
|
| 60 |
+
|
| 61 |
+
if 'results' not in sparql_json or 'bindings' not in sparql_json['results']:
|
| 62 |
+
return pandas.DataFrame()
|
| 63 |
+
|
| 64 |
+
cols = []
|
| 65 |
+
|
| 66 |
+
for i, var in enumerate(sparql_json['head']['vars']):
|
| 67 |
+
cols.append(var)
|
| 68 |
+
|
| 69 |
+
bindings = sparql_json['results']['bindings']
|
| 70 |
+
|
| 71 |
+
if not bindings:
|
| 72 |
+
return pandas.DataFrame(columns=cols)
|
| 73 |
+
|
| 74 |
+
data_rows = [
|
| 75 |
+
[row.get(col, {}).get('value') for col in sparql_json['head']['vars']]
|
| 76 |
+
for row in bindings
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
df = pandas.DataFrame(data_rows, columns=cols)
|
| 80 |
+
|
| 81 |
+
df.fillna(value=numpy.nan, inplace=True)
|
| 82 |
+
df = df.convert_dtypes()
|
| 83 |
+
|
| 84 |
+
return df
|
| 85 |
+
|
| 86 |
+
def execute_sparql(sparql, timeout=60):
|
| 87 |
+
|
| 88 |
+
agent = SPARQLWrapper.SPARQLWrapper(endpoint=SPARQL_ENDPOINT)
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
agent.setTimeout(timeout)
|
| 92 |
+
agent.addExtraURITag("timeout",str((timeout-1)*1000))
|
| 93 |
+
agent.setQuery(sparql)
|
| 94 |
+
agent.setReturnFormat(SPARQLWrapper.JSON)
|
| 95 |
+
results = agent.queryAndConvert()
|
| 96 |
+
|
| 97 |
+
return sparql_json_to_df(results)
|
| 98 |
+
|
| 99 |
+
except Exception as e:
|
| 100 |
+
print(e)
|
| 101 |
+
print("Error during SPARQL execution")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def check_valid_nlq(message):
|
| 105 |
|
| 106 |
valid = False
|
| 107 |
|
|
|
|
| 115 |
|
| 116 |
messages.append({"role": "user", "content": prompt})
|
| 117 |
|
| 118 |
+
generation = model_completion(messages)
|
| 119 |
|
| 120 |
print("Generated:", generation)
|
| 121 |
|
|
|
|
| 131 |
|
| 132 |
return valid
|
| 133 |
|
| 134 |
+
def generate_sparql(message):
|
| 135 |
|
| 136 |
sparql = "```sparql\n[code]```"
|
| 137 |
|
|
|
|
| 145 |
|
| 146 |
messages.append({"role": "user", "content": prompt})
|
| 147 |
|
| 148 |
+
generation = model_completion(messages)
|
| 149 |
|
| 150 |
print("Generated:", generation)
|
| 151 |
|
|
|
|
| 154 |
if len(blocks) >= 1:
|
| 155 |
|
| 156 |
try:
|
| 157 |
+
sparql = blocks[-1].content
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
print(e)
|
| 161 |
|
| 162 |
return sparql
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
+
def process_query(message):
|
| 166 |
+
|
| 167 |
+
if check_valid_nlq(message):
|
| 168 |
|
| 169 |
+
return generate_sparql(message)
|
| 170 |
|
| 171 |
else:
|
| 172 |
|
| 173 |
+
return "Your request cannot be answered by generating a SPARQL query, try a different one that apply to the ERA Knowledge Graph."
|
| 174 |
+
|
| 175 |
+
def clear_query():
|
| 176 |
+
|
| 177 |
+
return None, None
|
| 178 |
+
|
| 179 |
+
with gradio.Blocks() as demo:
|
| 180 |
+
|
| 181 |
+
query = gradio.Textbox(render=False, label="Query", placeholder="Write a query or select an example above", submit_btn="Send query", show_copy_button=True)
|
| 182 |
+
|
| 183 |
+
sparql_block = gradio.Code(render=False, label="Generated SPARQL", interactive=False, language="sql")
|
| 184 |
+
|
| 185 |
+
sparql_results = gradio.DataFrame(render=False, label="Data result", value=None, headers=None, interactive=False, wrap=True, show_row_numbers=True, show_copy_button=True)
|
| 186 |
+
|
| 187 |
+
with gradio.Row():
|
| 188 |
+
|
| 189 |
+
with gradio.Accordion(label="Examples", open=False):
|
| 190 |
+
|
| 191 |
+
gradio.Examples(label="Query examples", examples=examples_data, example_labels=[e[0] for e in examples_data], cache_examples=False, inputs=[query],examples_per_page=10)
|
| 192 |
+
|
| 193 |
+
with gradio.Row():
|
| 194 |
+
query.render()
|
| 195 |
+
|
| 196 |
+
with gradio.Row():
|
| 197 |
+
|
| 198 |
+
with gradio.Accordion(label="Options", open=False):
|
| 199 |
+
|
| 200 |
+
gradio.Dropdown(
|
| 201 |
+
["ran", "swam", "ate", "slept"], multiselect=False, label="Model"
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
with gradio.Row():
|
| 205 |
+
sparql_block.render()
|
| 206 |
|
| 207 |
+
with gradio.Row():
|
| 208 |
+
sparql_results.render()
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
query.submit(process_query, inputs=[query], outputs=[sparql_block])
|
| 212 |
+
query.change(clear_query, inputs=[], outputs=[sparql_block, sparql_results])
|
| 213 |
+
sparql_block.change(execute_sparql, inputs=[sparql_block], outputs=[sparql_results])
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
|
| 217 |
if __name__ == "__main__":
|
| 218 |
+
demo.launch()
|
examples.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{"query": "find tracks without any rules or restrictions documents"},
|
| 3 |
+
{"query": "Get the top 100 net elements, showing their labels and lengths in descending order."},
|
| 4 |
+
{"query": "Retrieve system separation info where length >100"},
|
| 5 |
+
{"query": "List all LineReference entities along with their labels and kilometer values."},
|
| 6 |
+
{"query": "Siding labels where era:notApplicable is http://data.europa.eu/949/verificationINF"},
|
| 7 |
+
|
| 8 |
+
{"query": "hat's the total number of PhaseInfo entries that include a phaseInfoKm?"},
|
| 9 |
+
{"query": "Sections and their OP start and end"},
|
| 10 |
+
{"query": "How many tracks exist?"},
|
| 11 |
+
{"query": "Show document named T_3_020322_1.pdf"},
|
| 12 |
+
{"query": "Fetch the entities of type era:VehicleType and their labels, skipping the first five results."}
|
| 13 |
+
]
|
requirements.txt
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
huggingface_hub
|
| 2 |
transformers
|
| 3 |
accelerate
|
| 4 |
-
pyparseit
|
|
|
|
|
|
|
|
|
| 1 |
huggingface_hub
|
| 2 |
transformers
|
| 3 |
accelerate
|
| 4 |
+
pyparseit
|
| 5 |
+
SPARQLWrapper
|
| 6 |
+
rdflib
|
requirements_local.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
gradio==5.34.2
|