Spaces:

Yhhxhfh
/

dgdgdgdgd

Build error

App Files Files Community

Yhhxhfh commited on Oct 6, 2024

Commit

16372c9

verified ·

1 Parent(s): 57c7d28

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -27

app.py CHANGED Viewed

@@ -104,33 +104,20 @@ def load_and_train():
         """
         texts = []
         # Determinar el tamaño del lote
-        batch_size = len(next(iter(examples.values())))
-        for i in range(batch_size):
             text = ''
-            if 'dialog' in examples and examples['dialog'][i]:
-                # Verificar si el campo es una lista y concatenar si es necesario
-                dialog = examples['dialog'][i]
-                if isinstance(dialog, list):
-                    dialog = ' '.join(dialog)
-                text = dialog
-            elif 'whole_func_string' in examples and examples['whole_func_string'][i]:
-                whole_func = examples['whole_func_string'][i]
-                if isinstance(whole_func, list):
-                    whole_func = ' '.join(whole_func)
-                text = whole_func
-            elif 'func_documentation_string' in examples and examples['func_documentation_string'][i]:
-                func_doc = examples['func_documentation_string'][i]
-                if isinstance(func_doc, list):
-                    func_doc = ' '.join(func_doc)
-                text = func_doc
-            else:
-                text = ''  # Asignar cadena vacía si no hay texto disponible
-            # Asegurar que 'text' es una cadena de texto
-            if not isinstance(text, str):
-                text = str(text)
             texts.append(text)
         examples['text'] = texts
@@ -145,8 +132,8 @@ def load_and_train():
             examples['text'],
             truncation=True,
             padding='max_length',
-            max_length=512,
-            clean_up_tokenization_spaces=True  # Para evitar la advertencia de FutureWarning
         )
     # Tokenizar el dataset

         """
         texts = []
         # Determinar el tamaño del lote
+        num_examples = len(next(iter(examples.values())))  # Obtener el tamaño del lote
+        for i in range(num_examples):
             text = ''
+            # Procesar 'dialog'
+            if 'dialog' in examples and i < len(examples['dialog']) and isinstance(examples['dialog'][i], str) and examples['dialog'][i]:
+                text = examples['dialog'][i]
+            # Procesar 'whole_func_string'
+            elif 'whole_func_string' in examples and i < len(examples['whole_func_string']) and isinstance(examples['whole_func_string'][i], str) and examples['whole_func_string'][i]:
+                text = examples['whole_func_string'][i]
+            # Procesar 'func_documentation_string'
+            elif 'func_documentation_string' in examples and i < len(examples['func_documentation_string']) and isinstance(examples['func_documentation_string'][i], str) and examples['func_documentation_string'][i]:
+                text = examples['func_documentation_string'][i]
+            # Puedes añadir más campos si es necesario
             texts.append(text)
         examples['text'] = texts
             examples['text'],
             truncation=True,
             padding='max_length',
+            max_length=512
+            # clean_up_tokenization_spaces=True  # Eliminado porque no es reconocido
         )
     # Tokenizar el dataset