Rithvickkr
commited on
Commit
·
c19c237
1
Parent(s):
edbf821
DSATP and LlamaIndex with real CVE/IoT corpus
Browse files- app.py +29 -2
- corpus/cve_processed.txt +0 -0
- corpus/nvd_cve.json +0 -0
- requirements.txt +4 -1
app.py
CHANGED
@@ -1,7 +1,12 @@
|
|
1 |
import gradio as gr
|
2 |
import yara
|
|
|
|
|
3 |
import os
|
4 |
|
|
|
|
|
|
|
5 |
# DSATP log parsing (embedded for Spaces)
|
6 |
def dsatp_parse_log(text: str) -> dict:
|
7 |
"""Parse log for IoT threats."""
|
@@ -40,9 +45,22 @@ def dsatp_yara_scan(file_path: str) -> dict:
|
|
40 |
except Exception as e:
|
41 |
return {"error": str(e), "severity": "Unknown", "mitigation": "Check file format"}
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# Chatbot function
|
44 |
def chatbot_response(user_input, file, history):
|
45 |
-
"""Process input or file with DSATP."""
|
46 |
if history is None:
|
47 |
history = []
|
48 |
input_text = user_input
|
@@ -54,7 +72,16 @@ def chatbot_response(user_input, file, history):
|
|
54 |
else:
|
55 |
scan_result = dsatp_parse_log(input_text)
|
56 |
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
updated_history = history + [(user_input or "File uploaded", response)]
|
59 |
return updated_history, scan_result
|
60 |
|
|
|
1 |
import gradio as gr
|
2 |
import yara
|
3 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
|
4 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
5 |
import os
|
6 |
|
7 |
+
# Configure LlamaIndex to use local embeddings
|
8 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
9 |
+
|
10 |
# DSATP log parsing (embedded for Spaces)
|
11 |
def dsatp_parse_log(text: str) -> dict:
|
12 |
"""Parse log for IoT threats."""
|
|
|
45 |
except Exception as e:
|
46 |
return {"error": str(e), "severity": "Unknown", "mitigation": "Check file format"}
|
47 |
|
48 |
+
# Initialize LlamaIndex with real corpus
|
49 |
+
def init_llama_index():
|
50 |
+
"""Load CVE/IoT corpus into LlamaIndex."""
|
51 |
+
try:
|
52 |
+
documents = SimpleDirectoryReader("corpus", filename_as_id=True).load_data()
|
53 |
+
return VectorStoreIndex.from_documents(documents)
|
54 |
+
except Exception as e:
|
55 |
+
print(f"Error loading corpus: {e}")
|
56 |
+
return None
|
57 |
+
|
58 |
+
index = init_llama_index()
|
59 |
+
query_engine = index.as_query_engine() if index else None
|
60 |
+
|
61 |
# Chatbot function
|
62 |
def chatbot_response(user_input, file, history):
|
63 |
+
"""Process input or file with DSATP and LlamaIndex."""
|
64 |
if history is None:
|
65 |
history = []
|
66 |
input_text = user_input
|
|
|
72 |
else:
|
73 |
scan_result = dsatp_parse_log(input_text)
|
74 |
|
75 |
+
# Query LlamaIndex for context
|
76 |
+
context_str = "No context available."
|
77 |
+
if query_engine:
|
78 |
+
try:
|
79 |
+
context = query_engine.query(f"Mitigation for: {input_text}")
|
80 |
+
context_str = str(context)
|
81 |
+
except Exception as e:
|
82 |
+
context_str = f"Context error: {e}"
|
83 |
+
|
84 |
+
response = f"Security Analyst: {scan_result['classification']}. Severity: {scan_result['severity']}. Mitigation: {scan_result['mitigation']}. Confidence: {scan_result['confidence']:.1f}. Context: {context_str}"
|
85 |
updated_history = history + [(user_input or "File uploaded", response)]
|
86 |
return updated_history, scan_result
|
87 |
|
corpus/cve_processed.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
corpus/nvd_cve.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -3,4 +3,7 @@ textblob
|
|
3 |
fastapi
|
4 |
uvicorn
|
5 |
yara-python
|
6 |
-
requests
|
|
|
|
|
|
|
|
3 |
fastapi
|
4 |
uvicorn
|
5 |
yara-python
|
6 |
+
requests
|
7 |
+
llama-index-core
|
8 |
+
pandas
|
9 |
+
sentence-transformers
|