Nt3awnou-rescue-map / src /verified_categorie.py
JulesLambert345's picture
add the categories for the verified request
6aaeb71
raw
history blame
3.31 kB
from typing import List
from enum import Enum
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
PHRASE_NO_PROBLEMS = ['got food',
'got food and clothes',
'got food and covers']
KEYS_HOUSE = [
"shelters",
"mattresses",
"pillows",
"blankets",
"shelter",
"tentes",
"housing",
"couvertures",
"tents",
"covers",
"sdader",
"housing_shelter",
]
KEYS_FOOD = [
"groceries",
"nouriture",
"food",
"water",
"gaz",
"dishes",
"oil",
"sugar",
"tea",
"hungry",
]
KEYS_CLOTHES = [
"clothes",
"clothing",
"hygiene",
]
KEYS_MEDICAL = [
"betadine",
"medical",
"diabetics",
"medicaments",
"diabetes",
"doliprane",
"vitamines",
"drugs",
]
class HelpCategory(Enum):
HOUSE = 'house'
FOOD = 'food'
CLOTHES = 'clothes'
MEDICAL = 'medical'
UNKNOW = 'unknow'
nltk.download('wordnet')
nltk.download('omw-1.4')
lemmatizer = WordNetLemmatizer()
lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE]
lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD]
lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES]
lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL]
def to_category(text: str) -> List[HelpCategory]:
if text in PHRASE_NO_PROBLEMS:
return []
words = text.split()
categories = []
for word in words:
if word in KEYS_HOUSE:
categories.append(HelpCategory.HOUSE)
elif word in KEYS_FOOD:
categories.append(HelpCategory.FOOD)
if word in KEYS_CLOTHES:
categories.append(HelpCategory.CLOTHES)
if word in KEYS_MEDICAL:
categories.append(HelpCategory.MEDICAL)
if lemmatizer.lemmatize(word) in lemmatize_house:
categories.append(HelpCategory.HOUSE)
if lemmatizer.lemmatize(word) in lemmatize_food:
categories.append(HelpCategory.FOOD)
if lemmatizer.lemmatize(word) in lemmatize_clothes:
categories.append(HelpCategory.CLOTHES)
if lemmatizer.lemmatize(word) in lemmatize_medical:
categories.append(HelpCategory.MEDICAL)
if len(categories) == 0:
categories = [HelpCategory.UNKNOW]
return categories
def clean(text: str) -> str:
text = text.replace('Housing/Shelter', 'housing_shelter')
text = text.replace('/', ',')
text = text.lower()
text = text.strip()
return text
def to_list(text: str) -> List[str]:
helps = text.split(',')
helps = [help_string.replace('.', ' ').strip() for help_string in helps]
return helps
def help_text_to_help_category(helps: List[str]) -> List[str]:
all_categories = set()
for help_string in helps:
categories = to_category(help_string)
all_categories.update(categories)
return list(all_categories)
def add_category(df:pd.DataFrame) -> pd.DataFrame:
df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category)
return df
def string_category(df:pd.DataFrame) -> pd.DataFrame:
df['help_category'] = df['help_category'].apply(lambda x : ','.join([category.value for category in x]))
return df