import os PATH = '/data/' # at least 150GB storage needs to be attached os.environ['TRANSFORMERS_CACHE'] = PATH os.environ['HF_HOME'] = PATH os.environ['HF_DATASETS_CACHE'] = PATH os.environ['TORCH_HOME'] = PATH import gradio as gr from interfaces.cap import demo as cap_demo from interfaces.manifesto import demo as manifesto_demo from interfaces.sentiment import demo as sentiment_demo from interfaces.emotion import demo as emotion_demo from interfaces.ner import demo as ner_demo from interfaces.ner import download_models as download_spacy_models from interfaces.illframes import demo as illframes_demo from interfaces.ontolisst import demo as ontolisst_demo from interfaces.emotion9 import demo as e9_demo from utils import download_hf_models css = """ /* Make only the active tab bold */ .svelte-1uw5tnk[aria-selected="true"] { font-weight: bold; background: linear-gradient(to bottom right, var(--primary-100), var(--primary-300)); color: var(--primary-600) } """ with gr.Blocks(css=css) as demo: gr.Markdown( f""" <style> @import 'https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400'; </style> <div style="display: block; text-align: left; padding:0; margin:0;font-family: "Source Sans Pro", Helvetica, sans-serif;"> <h1 style="text-align: center;font-size: 17pt;">Babel Machine Demo</h1> <p style="font-size: 14pt;">This is a demo for text classification using language models finetuned on data labeled by <a href="https://www.comparativeagendas.net/">CAP</a>, <a href="https://manifesto-project.wzb.eu/">Manifesto Project</a>, sentiment, emotion coding and Named Entity Recognition systems. For the coding of complete datasets, please visit the official <a href="https://babel.poltextlab.com/">Babel Machine</a> site.<br> Please note that the sentiment (3) and emotions (6) models have been trained using parliamentary speech data, so the results for generic sentences may not be reliable. The emotions (9) models have been trained using <a href="https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/C9SAIX">this dataset</a>. It contains labeled parliamentary speeches and social media data. Under-represented categories were enriched with synthetic data.<br> <br> The models listed for Manifesto, Sentiment (3) and Emotions (6) tasks are a beta version and thus not publicly available, the Hugging Face link will not work for them for the time being. We expect a public version after tests and improvements in the Fall. Please feel free to check back for model updates, or reach out to us at that point if you wish to ask about a specific model. </p> </div> """) gr.TabbedInterface( interface_list=[cap_demo, manifesto_demo, sentiment_demo, emotion_demo, e9_demo,illframes_demo, ner_demo, ontolisst_demo], tab_names=["CAP", "Manifesto", "Sentiment (3)", "Emotions (6)","Emotions (9)", "ILLFRAMES", "Named Entity Recognition", "ONTOLISST"] ) if __name__ == "__main__": #download_hf_models() download_spacy_models() demo.launch() # TODO: add all languages & domains