Robin Chiu commited on
Commit
64204b9
·
1 Parent(s): f5c4d58

Add application

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.sqlite3 filter=lfs diff=lfs merge=lfs -text
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mcp.server.fastmcp import FastMCP
2
+ from tools.law_tool import LawTool
3
+ from tools.law_rag_query import LawRAGQuery
4
+ import gradio as gr
5
+
6
+ law_tool = LawTool()
7
+ law_rag_query = LawRAGQuery()
8
+
9
+ # Create an MCP server
10
+ mcp = FastMCP("Law Tool Service")
11
+
12
+
13
+ # Tool implementation
14
+ @mcp.tool()
15
+ def get_law(category: str, number: int) -> str:
16
+ """
17
+ This is a tool that returns law content by input the category and number.
18
+
19
+ Args:
20
+ category: the law category (such as 民法, 中華民國刑法, 民事訴訟法, 刑事訴訟法, 律師法 etc).
21
+ number: the law number (such as 23).
22
+
23
+ Returns:
24
+ str: The content of the law.
25
+ """
26
+ result = law_tool(category, number)
27
+ return result
28
+
29
+ # Tool implementation
30
+ @mcp.tool()
31
+ def rag_query(question: str) -> list:
32
+ """
33
+ This is a tool that returns law content by input a question. It will find the related law and return.
34
+
35
+ Args:
36
+ question: the question to query the law.
37
+
38
+ Returns:
39
+ list: A list of law content related to the question.
40
+ """
41
+ result = law_rag_query(question)
42
+ return result
43
+
44
+ with gr.Blocks() as demo:
45
+ with gr.Tabs():
46
+ with gr.Tab("Law Tool"):
47
+ with gr.Row():
48
+ category = gr.Dropdown(label="Law Category", choices=["民法", "中華民國刑法", "民事訴訟法", "刑事訴訟法", "律師法"], info="選擇法律類別")
49
+ number = gr.Number(label="Law Number", info="ex:23")
50
+ query_btn = gr.Button("Submit")
51
+ result = gr.Textbox(label="Result")
52
+ query_btn.click(fn=get_law, inputs=[category, number], outputs=result)
53
+
54
+ with gr.Tab("Law RAG Query"):
55
+ with gr.Row():
56
+ text_input = gr.Textbox(label="Question")
57
+ rag_btn = gr.Button("Submit")
58
+ text_output = gr.List(headers=["content", "score"], value=[], label="Result", col_count=2)
59
+ rag_btn.click(fn=rag_query, inputs=text_input, outputs=text_output)
60
+
61
+ if __name__ == "__main__":
62
+ demo.launch(mcp_server=True,server_name="0.0.0.0",allowed_paths=["/"])
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88220a5c7fbc7492735982b90a409aa09054a1b7870c5b04f9c4ea1aa5457a1e
3
+ size 8472000
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b959477622bd3d3a03a215f7b27bc5de1b741bf91bf267fa2b94518e51d0b6
3
+ size 100
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fce4cd4579cfb12481086209613d5b102a648159609a3bc71b4d66a04ab6eaf
3
+ size 113967
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3e84c4f180dae5c9066041c4050236ae8957e1f2c62ed7df0cd8c26c86d922
3
+ size 8000
chroma_db/bf252843-3061-49cb-984b-ed9693b00042/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c393c07422eede9e16c4ce4b1395b78bcebb6ce004ca5df80248f241e829f8e4
3
+ size 16976
chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18a18286966e3457e92e88e042446f0ee8047a9590a4172439162347d3f95563
3
+ size 21917696
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ duckduckgo_search
2
+ smolagents
3
+ gradio[mcp]
4
+ datasets
5
+ langchain
6
+ langchain-chroma
7
+ langchain-text-splitters
8
+ langchain-community
9
+ sentence-transformers
tools/__init__.py ADDED
File without changes
tools/final_answer.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+
4
+ class FinalAnswerTool(Tool):
5
+ name = "final_answer"
6
+ description = "Provides a final answer to the given problem."
7
+ inputs = {'answer': {'type': 'any', 'description': 'The final answer to the problem'}}
8
+ output_type = "any"
9
+
10
+ def forward(self, answer: Any) -> Any:
11
+ return answer
12
+
13
+ def __init__(self, *args, **kwargs):
14
+ self.is_initialized = False
tools/law_rag_query.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from langchain_chroma import Chroma
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from datasets import load_dataset
6
+ import os
7
+
8
+ class LawRAGQuery(Tool):
9
+ name = "law_rag_query"
10
+ description = """
11
+ This is a tool that returns law content by input a question. It will find the related law and return."""
12
+ inputs = {
13
+ "question": {
14
+ "type": "string",
15
+ "description": "the question",
16
+ }
17
+ }
18
+ output_type = "array"
19
+ vectorstore = None
20
+
21
+ def __init__(self):
22
+ dataset = load_dataset("robin0307/law", split='train')
23
+ law = dataset.to_pandas()
24
+ self.vectorstore = self.get_vectorstore("thenlper/gte-large-zh", list(law['content']))
25
+ super().__init__()
26
+
27
+ def get_vectorstore(self, model_path, data_list, path="chroma_db"):
28
+ embeddings = HuggingFaceEmbeddings(model_name=model_path)
29
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=50)
30
+ chunks = [text_splitter.split_text(text) for text in data_list]
31
+
32
+ # Flatten the list
33
+ if os.path.isdir(path):
34
+ vectorstore = Chroma(embedding_function=embeddings, persist_directory=path)
35
+ else:
36
+ splits = [chunk for sublist in chunks for chunk in sublist]
37
+ vectorstore = Chroma.from_texts(texts=splits, embedding=embeddings, persist_directory=path)
38
+ print("count:", vectorstore._collection.count())
39
+ return vectorstore
40
+
41
+ def get_docs(self, input, k=10):
42
+ retrieved_documents = self.vectorstore.similarity_search_with_score(input, k=50)
43
+
44
+ results = []
45
+ for i, (doc, score) in enumerate(retrieved_documents):
46
+ results.append((doc.page_content, score))
47
+ if i >= k:
48
+ break
49
+ return results
50
+
51
+ def forward(self, question: str):
52
+ docs = self.get_docs(question)
53
+ return docs
54
+
tools/law_tool.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from datasets import load_dataset
3
+
4
+ class LawTool(Tool):
5
+ name = "law_tool"
6
+ description = """
7
+ This is a tool that returns law content by input the category and number."""
8
+ inputs = {
9
+ "category": {
10
+ "type": "string",
11
+ "description": "the law category (such as 民法, 中華民國刑法, 民事訴訟法, 刑事訴訟法, 律師法 etc)",
12
+ },
13
+ "number": {
14
+ "type": "integer",
15
+ "description": "the law number (such as 23)"
16
+ }
17
+ }
18
+ output_type = "string"
19
+ law = None
20
+
21
+ def __init__(self):
22
+ dataset = load_dataset("robin0307/law", split='train')
23
+ self.law = dataset.to_pandas()
24
+ super().__init__()
25
+
26
+ def forward(self, category: str, number: int):
27
+ if category == "刑法":
28
+ category = "中華民國刑法"
29
+
30
+ data = self.law.loc[(self.law["category"]==category) & (self.law["number"]==number), "content"].values[0]
31
+ return data