Spaces:
Running on CPU Upgrade

ziem-io commited on
Commit
325ed03
·
1 Parent(s): 6370ba9

Fix: Add robust lang detection

Browse files
Files changed (1) hide show
  1. app.py +34 -7
app.py CHANGED
@@ -100,15 +100,42 @@ def _predict_np2_compat(self, text, k=1, threshold=0.0, on_unicode_error='strict
100
  lid_model.predict = types.MethodType(_predict_np2_compat, lid_model)
101
 
102
  ### Check if lang is english #####################################################
103
- def is_eng(review: str):
104
- lang_preds = lid_model.predict(review, k=3)
 
105
 
106
- print(lang_preds)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- for prob, label in lang_preds:
109
- if label == "__label__en":
110
- return True, float(prob)
111
-
112
  return False, 0.0
113
 
114
  ### Do actual prediction #########################################################
 
100
  lid_model.predict = types.MethodType(_predict_np2_compat, lid_model)
101
 
102
  ### Check if lang is english #####################################################
103
+ def is_eng(text: str, k: int = 3, threshold: float = 0.1):
104
+
105
+ out = lid_model.predict(text, k=k)
106
 
107
+ # Normalisieren auf zwei Listen: labels[], probs[]
108
+ labels, probs = [], []
109
+
110
+ # Fall A: (labels, probs)
111
+ if isinstance(out, tuple) and len(out) == 2:
112
+ labels, probs = out
113
+
114
+ # Fall B: [(prob, '__label__xx'), ...]
115
+ elif (
116
+ isinstance(out, (list, tuple))
117
+ and len(out) > 0
118
+ and isinstance(out[0], (list, tuple))
119
+ and len(out[0]) == 2
120
+ and isinstance(out[0][1], str)
121
+ ):
122
+ probs, labels = zip(*out) # entpacken
123
+ labels, probs = list(labels), list(probs)
124
+
125
+ # Fall C: ['__label__en', '__label__de', ...] (ohne Probs)
126
+ elif isinstance(out, (list, tuple)) and (len(out) == 0 or isinstance(out[0], str)):
127
+ labels = list(out)
128
+ probs = [1.0] * len(labels) # Dummy-Prob, falls nicht geliefert
129
+
130
+ else:
131
+ # Unbekanntes Format
132
+ return True, 0.0
133
+
134
+ if "__label__en" in labels:
135
+ i = labels.index("__label__en")
136
+ p = float(probs[i])
137
+ return (p >= threshold), p
138
 
 
 
 
 
139
  return False, 0.0
140
 
141
  ### Do actual prediction #########################################################