implement 0. space heuristic as fallback in case the timestamp can't be parsed
Browse files- logmetric.py +19 -11
logmetric.py
CHANGED
|
@@ -176,21 +176,30 @@ class PredRefScore:
|
|
| 176 |
|
| 177 |
# Split all log-entries in timestamps and log-messages
|
| 178 |
def split_log_entry(self, pred : str, ref: str):
|
| 179 |
-
|
| 180 |
-
|
| 181 |
|
| 182 |
# One logentry always consists of timestamp + log-message
|
| 183 |
pred_timestamps, pred_logMessages = [], []
|
| 184 |
ref_timestamps, ref_logMessages = [], []
|
| 185 |
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
# We extend the shorter list to the length of the longer one
|
| 196 |
max_logentries = max(len(pred_logMessages), len(ref_logMessages))
|
|
@@ -271,4 +280,3 @@ class PredRefScore:
|
|
| 271 |
pred_timestamps, pred_logMessages, ref_timestamps, ref_logMessages = self.split_log_entry(self.prediction, self.reference)
|
| 272 |
self.all_linecontent_scores(pred_logMessages, ref_logMessages)
|
| 273 |
self.all_timestamp_scores(pred_timestamps, ref_timestamps)
|
| 274 |
-
|
|
|
|
| 176 |
|
| 177 |
# Split all log-entries in timestamps and log-messages
|
| 178 |
def split_log_entry(self, pred : str, ref: str):
|
| 179 |
+
pred_lines = pred.splitlines()
|
| 180 |
+
ref_lines = ref.splitlines()
|
| 181 |
|
| 182 |
# One logentry always consists of timestamp + log-message
|
| 183 |
pred_timestamps, pred_logMessages = [], []
|
| 184 |
ref_timestamps, ref_logMessages = [], []
|
| 185 |
|
| 186 |
+
for i in range(len(pred_lines)):
|
| 187 |
+
if TIMESTAMP_PATTERN.match(pred_lines[i]) is not None:
|
| 188 |
+
# try to match timestamp
|
| 189 |
+
_, pred_ts, pred_msg = TIMESTAMP_PATTERN.split(pred_lines[i])
|
| 190 |
+
pred_timestamps.append(pred_ts)
|
| 191 |
+
pred_logMessages.append(pred_msg)
|
| 192 |
+
else:
|
| 193 |
+
# 0. space heuristic
|
| 194 |
+
pred_msg = pred_lines[i]
|
| 195 |
+
pred_logMessages.append(pred_msg)
|
| 196 |
+
|
| 197 |
+
for i in range(len(ref_lines)):
|
| 198 |
+
if TIMESTAMP_PATTERN.match(ref_lines[i]) is None:
|
| 199 |
+
raise ValueError("The provided regex can't parse a timestamp in a reference log. Please make sure that the regex can parse a provided reference log format. Line: " + ref_lines[i])
|
| 200 |
+
_, ref_ts, ref_msg = TIMESTAMP_PATTERN.split(ref_lines[i])
|
| 201 |
+
ref_timestamps.append(ref_ts)
|
| 202 |
+
ref_logMessages.append(ref_msg)
|
| 203 |
|
| 204 |
# We extend the shorter list to the length of the longer one
|
| 205 |
max_logentries = max(len(pred_logMessages), len(ref_logMessages))
|
|
|
|
| 280 |
pred_timestamps, pred_logMessages, ref_timestamps, ref_logMessages = self.split_log_entry(self.prediction, self.reference)
|
| 281 |
self.all_linecontent_scores(pred_logMessages, ref_logMessages)
|
| 282 |
self.all_timestamp_scores(pred_timestamps, ref_timestamps)
|
|
|