Plong95 commited on
Commit
fa62358
1 Parent(s): f27fd02

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +62 -0
  2. readme.md +28 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import git
3
+ import os
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
5
+
6
+
7
+ def clone_repo(repo_url):
8
+ local_path = "repo_clone"
9
+ git.Repo.clone_from(repo_url, local_path)
10
+ return local_path
11
+
12
+
13
+ def process_repo(repo_url, option):
14
+ if option == "Pre-trained":
15
+ qa_pipeline = pipeline('question-answering')
16
+ else:
17
+ model_path = "./model"
18
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
19
+ model = AutoModelForQuestionAnswering.from_pretrained(model_path)
20
+ qa_pipeline = pipeline('question-answering', model=model, tokenizer=tokenizer)
21
+
22
+ repo_path = clone_repo(repo_url)
23
+ result = {}
24
+ for root, dirs, files in os.walk(repo_path):
25
+ for file in files:
26
+ file_path = os.path.join(root, file)
27
+ with open(file_path, 'r', encoding="utf-8") as f:
28
+ text = f.read()
29
+ summary = text[:50] + "..." if len(text) > 50 else text
30
+ keywords = qa_pipeline(summary)['answer']
31
+ result[file_path] = {"summary": summary, "text": text, "keywords": keywords}
32
+ return result
33
+
34
+
35
+ def qa_chatbot(repo_dict, question):
36
+ all_text = ""
37
+ for file in repo_dict.values():
38
+ all_text += file['summary'] + " " + file['text'] + " "
39
+ answer = qa_pipeline({'context': all_text, 'question': question})['answer']
40
+ return answer
41
+
42
+
43
+ input_repo = gr.inputs.Textbox(label="Enter Git repository URL")
44
+ output_processed_repo = gr.outputs.Textbox(label="Processed Git repository")
45
+ output_qa_chatbot = gr.outputs.Textbox(label="Answer")
46
+
47
+ model_options = ["Pre-trained", "Fine-tuned"]
48
+ input_option = gr.inputs.Dropdown(choices=model_options, label="Choose a model option")
49
+
50
+ process_repo_interface = gr.Interface(fn=process_repo, inputs=[input_repo, input_option], outputs=output_processed_repo,
51
+ title="Process Git Repository")
52
+
53
+ qa_chatbot_interface = gr.Interface(fn=qa_chatbot, inputs={"repo_dict": gr.inputs.Dictionary(
54
+ key_type=gr.inputs.Textbox(label="File path"),
55
+ value_type=gr.inputs.Dictionary(key_type=gr.inputs.Textbox(label="File content"),
56
+ value_type=gr.inputs.Textbox(label="Keywords"))),
57
+ "question": gr.inputs.Textbox(label="Question")},
58
+ outputs=output_qa_chatbot,
59
+ title="QA Chatbot")
60
+
61
+ process_repo_interface.launch()
62
+ qa_chatbot_interface.launch()
readme.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Git-Repo-QA-Chatbot-Trainer
3
+ emoji: 🤖
4
+ colorFrom: red
5
+ colorTo: black
6
+ sdk: gradio
7
+ python_version: 3.9.0
8
+ sdk_version: 3.21.0
9
+ app_file: app.py
10
+ app_port: 7860
11
+ fullWidth: true
12
+ models:
13
+ - distilbert-base-uncased-distilled-squad
14
+ - git-repo-qa-chatbot-finetuned # Name of fine-tuned model for Git-Repo-QA-Chatbot
15
+ datasets:
16
+ - squad # Name of dataset used for fine-tuning the model
17
+ tags:
18
+ - NLP
19
+ - Question-Answering
20
+ - Git
21
+ - Transformer
22
+ pinned: true
23
+ ---
24
+
25
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
26
+
27
+
28
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==2.3.7
2
+ torch==1.9.0
3
+ transformers==4.9.2
4
+ gitpython==3.1.24