Awa Mbaye commited on
Commit
693ae46
1 Parent(s): 107b807

Upload rag.py

Browse files
Files changed (1) hide show
  1. rag.py +71 -0
rag.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ import spacy
3
+ from impactscoreTest import classify_actions_rse_impact_score
4
+ from ODD import classify_actions_rse_ODD
5
+
6
+ # Chargement du modèle spaCy
7
+ nlp = spacy.load("fr_core_news_sm")
8
+
9
+ # Étape 1 : Prétraitement des documents PDF
10
+ def extract_text_from_pdf(pdf_path):
11
+ text = ""
12
+ with open(pdf_path, "rb") as f:
13
+ pdf_reader = PyPDF2.PdfReader(f)
14
+ for page in pdf_reader.pages:
15
+ text += page.extract_text()
16
+ return text
17
+
18
+ # Étape 2 : Traitement du langage naturel (NLP)
19
+ def process_text(text):
20
+ doc = nlp(text)
21
+ return doc
22
+
23
+ # Étape 3 : Extraction d'informations clés
24
+ def extract_key_information(doc):
25
+ key_information = []
26
+ for entity in doc.ents:
27
+ if entity.label_ == "ORG":
28
+ name = entity.text
29
+ if name[0].lower() in ['a', 'e', 'i', 'o', 'u', 'y']: # Vérification si le nom commence par une voyelle
30
+ key_information.append(f"l'{name}")
31
+ else:
32
+ key_information.append(f"le {name}")
33
+ return key_information
34
+
35
+ # Étape 4 : Modélisation
36
+ def generate_transition_plan(key_information, impact_score_criteria, odd_criteria):
37
+ transition_plan = "Voici votre plan de transition : \n\n"
38
+
39
+ transition_plan += "Informations clés :\n"
40
+ for info in key_information:
41
+ transition_plan += f"- {info}\n"
42
+
43
+ transition_plan += "\nCritères d'impact score :\n"
44
+ for criterion, companies in impact_score_criteria.items():
45
+ transition_plan += f"- {criterion}:\n"
46
+ for company in companies:
47
+ transition_plan += f" * {company['name']} ({company['activity']}, {company['city']})\n"
48
+
49
+ transition_plan += "\nCritères ODD :\n"
50
+ for criterion, companies in odd_criteria.items():
51
+ transition_plan += f"- {criterion}:\n"
52
+ for company in companies:
53
+ transition_plan += f" * {company['name']} ({company['activity']}, {company['city']})\n"
54
+
55
+ return transition_plan
56
+
57
+
58
+ # Étape 5 : Intégration avec le RAG de Hugging Face
59
+ def generate_plan_from_pdf(pdf_path):
60
+ text = extract_text_from_pdf(pdf_path)
61
+ doc = process_text(text)
62
+ key_information = extract_key_information(doc)
63
+ impact_score_criteria = classify_actions_rse_impact_score(doc)
64
+ odd_criteria = classify_actions_rse_ODD(doc) # Utiliser la fonction ODD
65
+ transition_plan = generate_transition_plan(key_information, impact_score_criteria, odd_criteria)
66
+ return transition_plan
67
+
68
+ # Étape 6 : Test et validation
69
+ pdf_path = "trans1.pdf"
70
+ transition_plan = generate_plan_from_pdf(pdf_path)
71
+ print(transition_plan)