Spaces:

ARI-HIPA-AI-Team
/

HIPA-AI

Sleeping

App Files Files Community

HIPA-AI / app.py

BraydenAC

Update app.py

ec5b0bd verified 11 months ago

raw

history blame contribute delete

4.16 kB

	import gradio as gr
	import pandas as pd
	import tensorflow as tf
	import nltk
	import spacy
	import re
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import requests
	import pickle

	# Download necessary resources
	import spacy.cli
	spacy.cli.download("en_core_web_sm")
	nltk.download('punkt_tab')
	nltk.download('stopwords')
	stop_words = set(stopwords.words('english'))
	nlp = spacy.load('en_core_web_sm')

	# Download the model file from Hugging Face
	model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
	local_model_path = "saved_keras_model.keras"

	response = requests.get(model_url)
	with open(local_model_path, 'wb') as f:
	f.write(response.content)

	print(f"Model downloaded to {local_model_path}")

	# Load the downloaded model
	model = tf.keras.models.load_model(local_model_path)
	print(f"Model loaded from {local_model_path}")

	# Load the tokenizer
	tokenizer_file_path = "tokenizer.pickle"
	with open(tokenizer_file_path, 'rb') as handle:
	tokenizer = pickle.load(handle)

	print("Tokenizer loaded from tokenizer.pickle")

	def preprocess_text(text):
	text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
	tokens = word_tokenize(text.lower())
	tokens = [word for word in tokens if word not in stop_words]
	doc = nlp(' '.join(tokens))
	lemmas = [token.lemma_ for token in doc]
	return ' '.join(lemmas)

	def predict(text):
	try:
	print(f"Input text: {text}")
	inputs = preprocess_text(text)
	print(f"Preprocessed text: {inputs}")

	inputs = tokenizer.texts_to_sequences([inputs])
	print(f"Tokenized text: {inputs}")

	inputs = pad_sequences(inputs, maxlen=750, padding='post')
	print(f"Padded text: {inputs}")

	outputs = model.predict(inputs)
	print(f"Model outputs: {outputs}")

	# Interpret the output as a prediction
	prediction = outputs[0][0]
	if prediction >= 0.5:
	result = f"True = {prediction:.2f}"
	else:
	result = f"False = {prediction:.2f}"

	return result
	except Exception as e:
	print(f"Error during prediction: {e}")
	return f"Error during prediction: {e}"
	ui_css = """
	#body {
	height: 700px;
	width: 500px;
	background-color: rgb(108, 207, 239);
	border-radius: 15px;
	}
	#hipaa-image {
	width: 75px;
	}
	#input-box {
	width: 480px;
	border: 2px solid black;
	margin-left: 8px;
	margin-right: 8px;
	overflow-y: scroll;
	height: 150px;
	max-height: 150px;
	}
	#output-elems {
	width: 480px;
	border: 2px solid black;
	margin-left: 8px;
	margin-right: 8px;
	padding: 1em;
	}
	#submit-button, #clear-button {
	color: white;
	height: 45px;
	width: 60px;
	margin: 10px;
	border-radius: 5px;
	border: 5px solid black;
	}
	#submit-button {
	background-color: red;
	}
	#clear-button {
	background-color: grey;
	}
	#addinfo {
	font-size: 16;
	justify-self: center;
	}
	"""
	# Set up the Gradio interface
	with gr.Blocks(css=ui_css) as demo:
	with gr.Column(elem_id="body"):
	with gr.Row(elem_id="header"):
	with gr.Row(elem_id="hipaa-image"):
	gr.Image(value="hipaa-e1638383751916.png")
	with gr.Row():
	gr.Markdown("Enter text below to determine if it is a HIPAA violation. Smaller inputs may be less accurate.", elem_id="addinfo")

	with gr.Row(elem_id="interactives"):
	inputs=gr.Textbox(label="Enter Input Text Here", elem_id="input-box", lines=5)

	with gr.Row(elem_id="output-elems"):
	gr.Markdown("This text is a violation: ")
	outputs=gr.Textbox(label="", elem_id="output-box", interactive=False)

	with gr.Row():
	submit_button = gr.Button("Submit", elem_id="submit-button")
	clear_button = gr.Button("Clear", elem_id="clear-button")

	submit_button.click(predict, inputs=inputs, outputs=outputs)
	clear_button.click(lambda: ("", ""), inputs=None, outputs=[inputs, outputs])
	demo.launch()