Spaces:
Runtime error
Runtime error
File size: 3,307 Bytes
6aaeb71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from typing import List
from enum import Enum
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
PHRASE_NO_PROBLEMS = ['got food',
'got food and clothes',
'got food and covers']
KEYS_HOUSE = [
"shelters",
"mattresses",
"pillows",
"blankets",
"shelter",
"tentes",
"housing",
"couvertures",
"tents",
"covers",
"sdader",
"housing_shelter",
]
KEYS_FOOD = [
"groceries",
"nouriture",
"food",
"water",
"gaz",
"dishes",
"oil",
"sugar",
"tea",
"hungry",
]
KEYS_CLOTHES = [
"clothes",
"clothing",
"hygiene",
]
KEYS_MEDICAL = [
"betadine",
"medical",
"diabetics",
"medicaments",
"diabetes",
"doliprane",
"vitamines",
"drugs",
]
class HelpCategory(Enum):
HOUSE = 'house'
FOOD = 'food'
CLOTHES = 'clothes'
MEDICAL = 'medical'
UNKNOW = 'unknow'
nltk.download('wordnet')
nltk.download('omw-1.4')
lemmatizer = WordNetLemmatizer()
lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE]
lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD]
lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES]
lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL]
def to_category(text: str) -> List[HelpCategory]:
if text in PHRASE_NO_PROBLEMS:
return []
words = text.split()
categories = []
for word in words:
if word in KEYS_HOUSE:
categories.append(HelpCategory.HOUSE)
elif word in KEYS_FOOD:
categories.append(HelpCategory.FOOD)
if word in KEYS_CLOTHES:
categories.append(HelpCategory.CLOTHES)
if word in KEYS_MEDICAL:
categories.append(HelpCategory.MEDICAL)
if lemmatizer.lemmatize(word) in lemmatize_house:
categories.append(HelpCategory.HOUSE)
if lemmatizer.lemmatize(word) in lemmatize_food:
categories.append(HelpCategory.FOOD)
if lemmatizer.lemmatize(word) in lemmatize_clothes:
categories.append(HelpCategory.CLOTHES)
if lemmatizer.lemmatize(word) in lemmatize_medical:
categories.append(HelpCategory.MEDICAL)
if len(categories) == 0:
categories = [HelpCategory.UNKNOW]
return categories
def clean(text: str) -> str:
text = text.replace('Housing/Shelter', 'housing_shelter')
text = text.replace('/', ',')
text = text.lower()
text = text.strip()
return text
def to_list(text: str) -> List[str]:
helps = text.split(',')
helps = [help_string.replace('.', ' ').strip() for help_string in helps]
return helps
def help_text_to_help_category(helps: List[str]) -> List[str]:
all_categories = set()
for help_string in helps:
categories = to_category(help_string)
all_categories.update(categories)
return list(all_categories)
def add_category(df:pd.DataFrame) -> pd.DataFrame:
df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category)
return df
def string_category(df:pd.DataFrame) -> pd.DataFrame:
df['help_category'] = df['help_category'].apply(lambda x : ','.join([category.value for category in x]))
return df
|