Spaces:
Runtime error
Runtime error
Robin Chiu
commited on
Commit
·
64204b9
1
Parent(s):
f5c4d58
Add application
Browse files- .gitattributes +1 -0
- __init__.py +0 -0
- app.py +62 -0
- chroma_db/bf252843-3061-49cb-984b-ed9693b00042/data_level0.bin +3 -0
- chroma_db/bf252843-3061-49cb-984b-ed9693b00042/header.bin +3 -0
- chroma_db/bf252843-3061-49cb-984b-ed9693b00042/index_metadata.pickle +3 -0
- chroma_db/bf252843-3061-49cb-984b-ed9693b00042/length.bin +3 -0
- chroma_db/bf252843-3061-49cb-984b-ed9693b00042/link_lists.bin +3 -0
- chroma_db/chroma.sqlite3 +3 -0
- requirements.txt +9 -0
- tools/__init__.py +0 -0
- tools/final_answer.py +14 -0
- tools/law_rag_query.py +54 -0
- tools/law_tool.py +31 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
__init__.py
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mcp.server.fastmcp import FastMCP
|
2 |
+
from tools.law_tool import LawTool
|
3 |
+
from tools.law_rag_query import LawRAGQuery
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
law_tool = LawTool()
|
7 |
+
law_rag_query = LawRAGQuery()
|
8 |
+
|
9 |
+
# Create an MCP server
|
10 |
+
mcp = FastMCP("Law Tool Service")
|
11 |
+
|
12 |
+
|
13 |
+
# Tool implementation
|
14 |
+
@mcp.tool()
|
15 |
+
def get_law(category: str, number: int) -> str:
|
16 |
+
"""
|
17 |
+
This is a tool that returns law content by input the category and number.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
category: the law category (such as 民法, 中華民國刑法, 民事訴訟法, 刑事訴訟法, 律師法 etc).
|
21 |
+
number: the law number (such as 23).
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
str: The content of the law.
|
25 |
+
"""
|
26 |
+
result = law_tool(category, number)
|
27 |
+
return result
|
28 |
+
|
29 |
+
# Tool implementation
|
30 |
+
@mcp.tool()
|
31 |
+
def rag_query(question: str) -> list:
|
32 |
+
"""
|
33 |
+
This is a tool that returns law content by input a question. It will find the related law and return.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
question: the question to query the law.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
list: A list of law content related to the question.
|
40 |
+
"""
|
41 |
+
result = law_rag_query(question)
|
42 |
+
return result
|
43 |
+
|
44 |
+
with gr.Blocks() as demo:
|
45 |
+
with gr.Tabs():
|
46 |
+
with gr.Tab("Law Tool"):
|
47 |
+
with gr.Row():
|
48 |
+
category = gr.Dropdown(label="Law Category", choices=["民法", "中華民國刑法", "民事訴訟法", "刑事訴訟法", "律師法"], info="選擇法律類別")
|
49 |
+
number = gr.Number(label="Law Number", info="ex:23")
|
50 |
+
query_btn = gr.Button("Submit")
|
51 |
+
result = gr.Textbox(label="Result")
|
52 |
+
query_btn.click(fn=get_law, inputs=[category, number], outputs=result)
|
53 |
+
|
54 |
+
with gr.Tab("Law RAG Query"):
|
55 |
+
with gr.Row():
|
56 |
+
text_input = gr.Textbox(label="Question")
|
57 |
+
rag_btn = gr.Button("Submit")
|
58 |
+
text_output = gr.List(headers=["content", "score"], value=[], label="Result", col_count=2)
|
59 |
+
rag_btn.click(fn=rag_query, inputs=text_input, outputs=text_output)
|
60 |
+
|
61 |
+
if __name__ == "__main__":
|
62 |
+
demo.launch(mcp_server=True,server_name="0.0.0.0",allowed_paths=["/"])
|
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88220a5c7fbc7492735982b90a409aa09054a1b7870c5b04f9c4ea1aa5457a1e
|
3 |
+
size 8472000
|
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0b959477622bd3d3a03a215f7b27bc5de1b741bf91bf267fa2b94518e51d0b6
|
3 |
+
size 100
|
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fce4cd4579cfb12481086209613d5b102a648159609a3bc71b4d66a04ab6eaf
|
3 |
+
size 113967
|
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f3e84c4f180dae5c9066041c4050236ae8957e1f2c62ed7df0cd8c26c86d922
|
3 |
+
size 8000
|
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c393c07422eede9e16c4ce4b1395b78bcebb6ce004ca5df80248f241e829f8e4
|
3 |
+
size 16976
|
chroma_db/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18a18286966e3457e92e88e042446f0ee8047a9590a4172439162347d3f95563
|
3 |
+
size 21917696
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
duckduckgo_search
|
2 |
+
smolagents
|
3 |
+
gradio[mcp]
|
4 |
+
datasets
|
5 |
+
langchain
|
6 |
+
langchain-chroma
|
7 |
+
langchain-text-splitters
|
8 |
+
langchain-community
|
9 |
+
sentence-transformers
|
tools/__init__.py
ADDED
File without changes
|
tools/final_answer.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Optional
|
2 |
+
from smolagents.tools import Tool
|
3 |
+
|
4 |
+
class FinalAnswerTool(Tool):
|
5 |
+
name = "final_answer"
|
6 |
+
description = "Provides a final answer to the given problem."
|
7 |
+
inputs = {'answer': {'type': 'any', 'description': 'The final answer to the problem'}}
|
8 |
+
output_type = "any"
|
9 |
+
|
10 |
+
def forward(self, answer: Any) -> Any:
|
11 |
+
return answer
|
12 |
+
|
13 |
+
def __init__(self, *args, **kwargs):
|
14 |
+
self.is_initialized = False
|
tools/law_rag_query.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from smolagents import Tool
|
2 |
+
from langchain_chroma import Chroma
|
3 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
4 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
5 |
+
from datasets import load_dataset
|
6 |
+
import os
|
7 |
+
|
8 |
+
class LawRAGQuery(Tool):
|
9 |
+
name = "law_rag_query"
|
10 |
+
description = """
|
11 |
+
This is a tool that returns law content by input a question. It will find the related law and return."""
|
12 |
+
inputs = {
|
13 |
+
"question": {
|
14 |
+
"type": "string",
|
15 |
+
"description": "the question",
|
16 |
+
}
|
17 |
+
}
|
18 |
+
output_type = "array"
|
19 |
+
vectorstore = None
|
20 |
+
|
21 |
+
def __init__(self):
|
22 |
+
dataset = load_dataset("robin0307/law", split='train')
|
23 |
+
law = dataset.to_pandas()
|
24 |
+
self.vectorstore = self.get_vectorstore("thenlper/gte-large-zh", list(law['content']))
|
25 |
+
super().__init__()
|
26 |
+
|
27 |
+
def get_vectorstore(self, model_path, data_list, path="chroma_db"):
|
28 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_path)
|
29 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=50)
|
30 |
+
chunks = [text_splitter.split_text(text) for text in data_list]
|
31 |
+
|
32 |
+
# Flatten the list
|
33 |
+
if os.path.isdir(path):
|
34 |
+
vectorstore = Chroma(embedding_function=embeddings, persist_directory=path)
|
35 |
+
else:
|
36 |
+
splits = [chunk for sublist in chunks for chunk in sublist]
|
37 |
+
vectorstore = Chroma.from_texts(texts=splits, embedding=embeddings, persist_directory=path)
|
38 |
+
print("count:", vectorstore._collection.count())
|
39 |
+
return vectorstore
|
40 |
+
|
41 |
+
def get_docs(self, input, k=10):
|
42 |
+
retrieved_documents = self.vectorstore.similarity_search_with_score(input, k=50)
|
43 |
+
|
44 |
+
results = []
|
45 |
+
for i, (doc, score) in enumerate(retrieved_documents):
|
46 |
+
results.append((doc.page_content, score))
|
47 |
+
if i >= k:
|
48 |
+
break
|
49 |
+
return results
|
50 |
+
|
51 |
+
def forward(self, question: str):
|
52 |
+
docs = self.get_docs(question)
|
53 |
+
return docs
|
54 |
+
|
tools/law_tool.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from smolagents import Tool
|
2 |
+
from datasets import load_dataset
|
3 |
+
|
4 |
+
class LawTool(Tool):
|
5 |
+
name = "law_tool"
|
6 |
+
description = """
|
7 |
+
This is a tool that returns law content by input the category and number."""
|
8 |
+
inputs = {
|
9 |
+
"category": {
|
10 |
+
"type": "string",
|
11 |
+
"description": "the law category (such as 民法, 中華民國刑法, 民事訴訟法, 刑事訴訟法, 律師法 etc)",
|
12 |
+
},
|
13 |
+
"number": {
|
14 |
+
"type": "integer",
|
15 |
+
"description": "the law number (such as 23)"
|
16 |
+
}
|
17 |
+
}
|
18 |
+
output_type = "string"
|
19 |
+
law = None
|
20 |
+
|
21 |
+
def __init__(self):
|
22 |
+
dataset = load_dataset("robin0307/law", split='train')
|
23 |
+
self.law = dataset.to_pandas()
|
24 |
+
super().__init__()
|
25 |
+
|
26 |
+
def forward(self, category: str, number: int):
|
27 |
+
if category == "刑法":
|
28 |
+
category = "中華民國刑法"
|
29 |
+
|
30 |
+
data = self.law.loc[(self.law["category"]==category) & (self.law["number"]==number), "content"].values[0]
|
31 |
+
return data
|