Hjayswal commited on
Commit
4de5d19
1 Parent(s): 9fd84d3

Upload 4 files

Browse files
Files changed (4) hide show
  1. data.csv +11 -0
  2. pasta.py +168 -0
  3. qnacsv.csv +0 -0
  4. requirements.txt +13 -0
data.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Patient_Name,Country,Disease,CUI,Snomed,Oxygen_Rate,Med_Type,Admission_Date
2
+ John,India,Severe Fever,CUI012345,SNO1234,92,Commercial,04-04-2020
3
+ Surya,USA,Cancer,CUI012346,SNO1235,98,Commercial,23-09-2020
4
+ Ajith,Russia,Diabetes,CUI012347,SNO1236,96,Medicare,04-03-2020
5
+ Aman,India,Severe Fever,CUI012348,SNO1237,98,Commercial,07-03-2020
6
+ Ben,USA,Severe Fever,CUI012349,SNO1238,89,Medicare,15-01-2020
7
+ Ravi,India,Edema,CUI012350,SNO1239,99,Medicaid,05-05-2020
8
+ Jitu,USA,Alzheimer,CUI012351,SNO1240,95,Medicaid,02-12-2020
9
+ Anjali,Russia,Alzheimer,CUI012352,SNO1241,94,Medicare,22-04-2020
10
+ Priya,India,Cardiac Arrest,CUI012353,SNO1242,94,Medicaid,11-08-2020
11
+ Dinesh,USA,Pneumonia,CUI012354,SNO1243,93,Medicare,02-09-2020
pasta.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri May 26 14:07:22 2023
4
+
5
+ @author: vibin
6
+ """
7
+
8
+ import streamlit as st
9
+ from pandasql import sqldf
10
+ import pandas as pd
11
+ import re
12
+ from typing import List
13
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
14
+ import re
15
+
16
+
17
+
18
+
19
+
20
+
21
+ ### Main
22
+
23
+ nav = st.sidebar.radio("Navigation",["TAPAS","Text2SQL"])
24
+ if nav == "TAPAS":
25
+
26
+
27
+
28
+ col1 , col2, col3 = st.columns(3)
29
+ col2.title("TAPAS")
30
+
31
+ col3 , col4 = st.columns([3,12])
32
+ col4.text("Tabular Data Text Extraction using text")
33
+
34
+ table = pd.read_csv("data.csv")
35
+ table = table.astype(str)
36
+ st.text("DataSet - ")
37
+ st.dataframe(table,width=3000,height= 400)
38
+
39
+ st.title("")
40
+
41
+ lst_q = ["Which country has low medicare","Who are the patients from india","Who are the patients from india","Patients who have Edema","CUI code for diabetes patients","Patients having oxygen less than 94 but 91"]
42
+
43
+ v2 = st.selectbox("Choose your text",lst_q,index = 0)
44
+
45
+ st.title("")
46
+
47
+ sql_txt = st.text_area("TAPAS Input",v2)
48
+
49
+ if st.button("Predict"):
50
+ tqa = pipeline(task="table-question-answering",
51
+ model="google/tapas-base-finetuned-wtq")
52
+
53
+ txt_sql = tqa(table=table, query=sql_txt)["answer"]
54
+ st.text("Output - ")
55
+ st.success(f"{txt_sql}")
56
+ # st.write(all_students)
57
+
58
+
59
+
60
+ elif nav == "Text2SQL":
61
+
62
+ ### Function
63
+
64
+ def prepare_input(question: str, table: List[str]):
65
+ table_prefix = "table:"
66
+ question_prefix = "question:"
67
+ join_table = ",".join(table)
68
+ inputs = f"{question_prefix} {question} {table_prefix} {join_table}"
69
+ input_ids = tokenizer(inputs, max_length=512, return_tensors="pt").input_ids
70
+ return input_ids
71
+
72
+ def inference(question: str, table: List[str]) -> str:
73
+ input_data = prepare_input(question=question, table=table)
74
+ input_data = input_data.to(model.device)
75
+ outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=700)
76
+ result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)
77
+ return result
78
+
79
+
80
+ col1 , col2, col3 = st.columns(3)
81
+ col2.title("Text2SQL")
82
+
83
+ col3 , col4 = st.columns([1,20])
84
+ col4.text("Text will be converted to SQL Query and can extract the data from DataSet")
85
+
86
+ # Import Data
87
+
88
+ df_qna = pd.read_csv("qnacsv.csv", encoding= 'unicode_escape')
89
+
90
+ st.title("")
91
+
92
+ st.text("DataSet - ")
93
+ st.dataframe(df_qna,width=3000,height= 500)
94
+
95
+ st.title("")
96
+
97
+ lst_q = ["what interface is measure indicator code = 72_HR_ABX and version is 1 and source is TD", "get class code with measure = 72_HR_ABX", "get sum of version for Class_Code is Antibiotic Stewardship", "what interface is measure indicator code = 72_HR_ABX"]
98
+ v2 = st.selectbox("Choose your text",lst_q,index = 0)
99
+
100
+ st.title("")
101
+
102
+
103
+ sql_txt = st.text_area("Text for SQL Conversion",v2)
104
+
105
+
106
+ if st.button("Predict"):
107
+ tokenizer = AutoTokenizer.from_pretrained("juierror/flan-t5-text2sql-with-schema")
108
+ model = AutoModelForSeq2SeqLM.from_pretrained("juierror/flan-t5-text2sql-with-schema")
109
+
110
+ # text = "what interface is measure indicator code = 72_HR_ABX and version is 1 and source is TD"
111
+ table_name = "df_qna"
112
+ table_col = ["Type","Class_Code", "Version","Measure_Indicator_Code","Measure_Indicator","Name","Description_Definition", "Source", "Interfaces"]
113
+
114
+ txt_sql = inference(question=sql_txt, table=table_col)
115
+
116
+
117
+ ### SQL Modification
118
+ txt_sql = txt_sql.replace("table",table_name)
119
+ sql_quotes = []
120
+ for match in re.finditer("=",txt_sql):
121
+ new_txt = txt_sql[match.span()[1]+1:]
122
+ try:
123
+ match2 = re.search("AND",new_txt)
124
+ sql_quotes.append((new_txt[:match2.span()[0]]).strip())
125
+ except:
126
+ sql_quotes.append(new_txt.strip())
127
+
128
+ for i in sql_quotes:
129
+ qts = "'" + i + "'"
130
+ txt_sql = txt_sql.replace(i, qts)
131
+
132
+ st.success(f"{txt_sql}")
133
+ all_students = sqldf(txt_sql)
134
+
135
+ st.text("Output - ")
136
+ st.write(all_students)
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
qnacsv.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pip
2
+ Cmake
3
+ wheel
4
+ pandas
5
+ jinja2
6
+ pandasql
7
+ Cython
8
+ datasets
9
+ huggingface-hub
10
+ tapas
11
+ torch
12
+ transformers
13
+ streamlit