Rithvickkr commited on
Commit
c19c237
·
1 Parent(s): edbf821

DSATP and LlamaIndex with real CVE/IoT corpus

Browse files
Files changed (4) hide show
  1. app.py +29 -2
  2. corpus/cve_processed.txt +0 -0
  3. corpus/nvd_cve.json +0 -0
  4. requirements.txt +4 -1
app.py CHANGED
@@ -1,7 +1,12 @@
1
  import gradio as gr
2
  import yara
 
 
3
  import os
4
 
 
 
 
5
  # DSATP log parsing (embedded for Spaces)
6
  def dsatp_parse_log(text: str) -> dict:
7
  """Parse log for IoT threats."""
@@ -40,9 +45,22 @@ def dsatp_yara_scan(file_path: str) -> dict:
40
  except Exception as e:
41
  return {"error": str(e), "severity": "Unknown", "mitigation": "Check file format"}
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Chatbot function
44
  def chatbot_response(user_input, file, history):
45
- """Process input or file with DSATP."""
46
  if history is None:
47
  history = []
48
  input_text = user_input
@@ -54,7 +72,16 @@ def chatbot_response(user_input, file, history):
54
  else:
55
  scan_result = dsatp_parse_log(input_text)
56
 
57
- response = f"Security Analyst: {scan_result['classification']}. Severity: {scan_result['severity']}. Mitigation: {scan_result['mitigation']}. Confidence: {scan_result['confidence']:.1f}"
 
 
 
 
 
 
 
 
 
58
  updated_history = history + [(user_input or "File uploaded", response)]
59
  return updated_history, scan_result
60
 
 
1
  import gradio as gr
2
  import yara
3
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
4
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  import os
6
 
7
+ # Configure LlamaIndex to use local embeddings
8
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
9
+
10
  # DSATP log parsing (embedded for Spaces)
11
  def dsatp_parse_log(text: str) -> dict:
12
  """Parse log for IoT threats."""
 
45
  except Exception as e:
46
  return {"error": str(e), "severity": "Unknown", "mitigation": "Check file format"}
47
 
48
+ # Initialize LlamaIndex with real corpus
49
+ def init_llama_index():
50
+ """Load CVE/IoT corpus into LlamaIndex."""
51
+ try:
52
+ documents = SimpleDirectoryReader("corpus", filename_as_id=True).load_data()
53
+ return VectorStoreIndex.from_documents(documents)
54
+ except Exception as e:
55
+ print(f"Error loading corpus: {e}")
56
+ return None
57
+
58
+ index = init_llama_index()
59
+ query_engine = index.as_query_engine() if index else None
60
+
61
  # Chatbot function
62
  def chatbot_response(user_input, file, history):
63
+ """Process input or file with DSATP and LlamaIndex."""
64
  if history is None:
65
  history = []
66
  input_text = user_input
 
72
  else:
73
  scan_result = dsatp_parse_log(input_text)
74
 
75
+ # Query LlamaIndex for context
76
+ context_str = "No context available."
77
+ if query_engine:
78
+ try:
79
+ context = query_engine.query(f"Mitigation for: {input_text}")
80
+ context_str = str(context)
81
+ except Exception as e:
82
+ context_str = f"Context error: {e}"
83
+
84
+ response = f"Security Analyst: {scan_result['classification']}. Severity: {scan_result['severity']}. Mitigation: {scan_result['mitigation']}. Confidence: {scan_result['confidence']:.1f}. Context: {context_str}"
85
  updated_history = history + [(user_input or "File uploaded", response)]
86
  return updated_history, scan_result
87
 
corpus/cve_processed.txt ADDED
The diff for this file is too large to render. See raw diff
 
corpus/nvd_cve.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -3,4 +3,7 @@ textblob
3
  fastapi
4
  uvicorn
5
  yara-python
6
- requests
 
 
 
 
3
  fastapi
4
  uvicorn
5
  yara-python
6
+ requests
7
+ llama-index-core
8
+ pandas
9
+ sentence-transformers