Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- app.py +62 -0
- readme.md +28 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import git
|
| 3 |
+
import os
|
| 4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def clone_repo(repo_url):
|
| 8 |
+
local_path = "repo_clone"
|
| 9 |
+
git.Repo.clone_from(repo_url, local_path)
|
| 10 |
+
return local_path
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def process_repo(repo_url, option):
|
| 14 |
+
if option == "Pre-trained":
|
| 15 |
+
qa_pipeline = pipeline('question-answering')
|
| 16 |
+
else:
|
| 17 |
+
model_path = "./model"
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 19 |
+
model = AutoModelForQuestionAnswering.from_pretrained(model_path)
|
| 20 |
+
qa_pipeline = pipeline('question-answering', model=model, tokenizer=tokenizer)
|
| 21 |
+
|
| 22 |
+
repo_path = clone_repo(repo_url)
|
| 23 |
+
result = {}
|
| 24 |
+
for root, dirs, files in os.walk(repo_path):
|
| 25 |
+
for file in files:
|
| 26 |
+
file_path = os.path.join(root, file)
|
| 27 |
+
with open(file_path, 'r', encoding="utf-8") as f:
|
| 28 |
+
text = f.read()
|
| 29 |
+
summary = text[:50] + "..." if len(text) > 50 else text
|
| 30 |
+
keywords = qa_pipeline(summary)['answer']
|
| 31 |
+
result[file_path] = {"summary": summary, "text": text, "keywords": keywords}
|
| 32 |
+
return result
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def qa_chatbot(repo_dict, question):
|
| 36 |
+
all_text = ""
|
| 37 |
+
for file in repo_dict.values():
|
| 38 |
+
all_text += file['summary'] + " " + file['text'] + " "
|
| 39 |
+
answer = qa_pipeline({'context': all_text, 'question': question})['answer']
|
| 40 |
+
return answer
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
input_repo = gr.inputs.Textbox(label="Enter Git repository URL")
|
| 44 |
+
output_processed_repo = gr.outputs.Textbox(label="Processed Git repository")
|
| 45 |
+
output_qa_chatbot = gr.outputs.Textbox(label="Answer")
|
| 46 |
+
|
| 47 |
+
model_options = ["Pre-trained", "Fine-tuned"]
|
| 48 |
+
input_option = gr.inputs.Dropdown(choices=model_options, label="Choose a model option")
|
| 49 |
+
|
| 50 |
+
process_repo_interface = gr.Interface(fn=process_repo, inputs=[input_repo, input_option], outputs=output_processed_repo,
|
| 51 |
+
title="Process Git Repository")
|
| 52 |
+
|
| 53 |
+
qa_chatbot_interface = gr.Interface(fn=qa_chatbot, inputs={"repo_dict": gr.inputs.Dictionary(
|
| 54 |
+
key_type=gr.inputs.Textbox(label="File path"),
|
| 55 |
+
value_type=gr.inputs.Dictionary(key_type=gr.inputs.Textbox(label="File content"),
|
| 56 |
+
value_type=gr.inputs.Textbox(label="Keywords"))),
|
| 57 |
+
"question": gr.inputs.Textbox(label="Question")},
|
| 58 |
+
outputs=output_qa_chatbot,
|
| 59 |
+
title="QA Chatbot")
|
| 60 |
+
|
| 61 |
+
process_repo_interface.launch()
|
| 62 |
+
qa_chatbot_interface.launch()
|
readme.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Git-Repo-QA-Chatbot-Trainer
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: black
|
| 6 |
+
sdk: gradio
|
| 7 |
+
python_version: 3.9.0
|
| 8 |
+
sdk_version: 3.21.0
|
| 9 |
+
app_file: app.py
|
| 10 |
+
app_port: 7860
|
| 11 |
+
fullWidth: true
|
| 12 |
+
models:
|
| 13 |
+
- distilbert-base-uncased-distilled-squad
|
| 14 |
+
- git-repo-qa-chatbot-finetuned # Name of fine-tuned model for Git-Repo-QA-Chatbot
|
| 15 |
+
datasets:
|
| 16 |
+
- squad # Name of dataset used for fine-tuning the model
|
| 17 |
+
tags:
|
| 18 |
+
- NLP
|
| 19 |
+
- Question-Answering
|
| 20 |
+
- Git
|
| 21 |
+
- Transformer
|
| 22 |
+
pinned: true
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==2.3.7
|
| 2 |
+
torch==1.9.0
|
| 3 |
+
transformers==4.9.2
|
| 4 |
+
gitpython==3.1.24
|