Spaces:

genai-impact
/

ecologits-calculator

Running

App Files Files Community

vdwow commited on Mar 25

Commit

10e4a4c

1 Parent(s): faf13f8

feat:light mode + token counter

Browse files

Files changed (20) hide show

.python-version +0 -1
.streamlit/config.toml +4 -0
app.py +22 -4
pyproject.toml +0 -12
requirements.txt +2 -1
src/__init__.py +1 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/__pycache__/calculator.cpython-312.pyc +0 -0
src/__pycache__/constants.cpython-312.pyc +0 -0
src/__pycache__/content.cpython-312.pyc +0 -0
src/__pycache__/electricity_mix.cpython-312.pyc +0 -0
src/__pycache__/expert.cpython-312.pyc +0 -0
src/__pycache__/impacts.cpython-312.pyc +0 -0
src/__pycache__/models.cpython-312.pyc +0 -0
src/__pycache__/token_estimator.cpython-312.pyc +0 -0
src/__pycache__/utils.cpython-312.pyc +0 -0
src/content.py +28 -0
src/style.css +23 -0
src/token_estimator.py +30 -0
uv.lock +0 -0

.python-version DELETED Viewed

	@@ -1 +0,0 @@
1	- 3.12

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,4 @@

+[theme]
+base="light"
+textColor="#0B3B36"
+primaryColor = "#00BF63"

app.py CHANGED Viewed

@@ -12,16 +12,30 @@ from src.content import (
 from src.expert import expert_mode
 from src.calculator import calculator_mode
-st.set_page_config(layout="wide",
-                   page_title="ECOLOGITS",
-                   page_icon='💬')
 st.html(HERO_TEXT)
 st.markdown(INTRO_TEXT)
-tab_calculator, tab_expert, tab_method, tab_about = st.tabs(['🧮 Calculator', '🤓 Expert Mode', '📖 Methodology', 'ℹ️ About'])
 with tab_calculator:
@@ -30,6 +44,10 @@ with tab_calculator:
 with tab_expert:
     expert_mode()
 with tab_method:

 from src.expert import expert_mode
 from src.calculator import calculator_mode
+from src.token_estimator import token_estimator
+st.set_page_config(
+    layout="wide",
+    page_title="ECOLOGITS",
+    page_icon='💬'
+)
+with open( "src/style.css" ) as css:
+    st.markdown( f'<style>{css.read()}</style>' , unsafe_allow_html= True)
 st.html(HERO_TEXT)
 st.markdown(INTRO_TEXT)
+tab_calculator, tab_expert, tab_token, tab_method, tab_about = st.tabs(
+    [
+        '🧮 Calculator',
+        '🤓 Expert Mode',
+        '🪙 Tokens estimator',
+        '📖 Methodology',
+        'ℹ️ About'
+    ]
+)
 with tab_calculator:
 with tab_expert:
     expert_mode()
+with tab_token:
+    token_estimator()
 with tab_method:

pyproject.toml DELETED Viewed

@@ -1,12 +0,0 @@
-[project]
-name = "ecologits-calculator-streamlit"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.12"
-dependencies = [
-    "ecologits>=0.6.1",
-    "pint>=0.24.4",
-    "plotly>=6.0.0",
-    "streamlit>=1.43.1",
-]

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 streamlit
 ecologits
 pint
-plotly

 streamlit
 ecologits
 pint
+plotly
+tiktoken

src/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .content import *
 from .constants import *
 from .expert import expert_mode
 from .utils import *
 from .calculator import calculator_mode
 from .impacts import get_impacts, display_impacts

 from .content import *
 from .constants import *
 from .expert import expert_mode
+from .token_estimator import token_estimator
 from .utils import *
 from .calculator import calculator_mode
 from .impacts import get_impacts, display_impacts

src/__pycache__/__init__.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/__init__.cpython-312.pyc and b/src/__pycache__/__init__.cpython-312.pyc differ

src/__pycache__/calculator.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/calculator.cpython-312.pyc and b/src/__pycache__/calculator.cpython-312.pyc differ

src/__pycache__/constants.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/constants.cpython-312.pyc and b/src/__pycache__/constants.cpython-312.pyc differ

src/__pycache__/content.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/content.cpython-312.pyc and b/src/__pycache__/content.cpython-312.pyc differ

src/__pycache__/electricity_mix.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/electricity_mix.cpython-312.pyc and b/src/__pycache__/electricity_mix.cpython-312.pyc differ

src/__pycache__/expert.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/expert.cpython-312.pyc and b/src/__pycache__/expert.cpython-312.pyc differ

src/__pycache__/impacts.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/impacts.cpython-312.pyc and b/src/__pycache__/impacts.cpython-312.pyc differ

src/__pycache__/models.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/models.cpython-312.pyc and b/src/__pycache__/models.cpython-312.pyc differ

src/__pycache__/token_estimator.cpython-312.pyc ADDED Viewed

Binary file (1.9 kB). View file

src/__pycache__/utils.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/utils.cpython-312.pyc and b/src/__pycache__/utils.cpython-312.pyc differ

src/content.py CHANGED Viewed

@@ -33,6 +33,34 @@ WARNING_BOTH = """
 ⚠️ The model architecture has not been publicly released and is multimodal, expect lower precision of estimations.
 """
 ABOUT_TEXT = r"""
 ## 🎯 Our goal
 **The main goal of the EcoLogits Calculator is to raise awareness on the environmental impacts of LLM inference.**

 ⚠️ The model architecture has not been publicly released and is multimodal, expect lower precision of estimations.
 """
+TOKEN_ESTIMATOR_TEXT = """
+A token is the basic unit of text that a language model processes. Think of it as a piece of text that the model breaks down for analysis and generation. Tokens can be:
+1. Whole words
+2. Parts of words
+3. Punctuation marks
+4. Spaces or other whitespace characters
+For example, the sentence "I love EcoLogits!" might be tokenized like this:
+- "I" (1 token)
+- "love" (1 token)
+- "EcoLogits" (1 token)
+- "!" (1 token)
+Different tokenization methods exist : some models split tokens at spaces wile others use more complex algorithms that break words into subwords.
+Tokens are crucial because:
+- They determine the model's input and output capacity
+- They impact processing speed and memory usage
+- Most LLMs have a maximum token limit (e.g., 4,000 or 8,000 tokens)
+- Longer texts require more tokens, which can increase computational complexity
+- At EcoLogits, they are at the core of our impact assessment methodology !
+When you're writing or interacting with an LLM, being aware of token count can help you manage input length and complexity more effectively.
+"""
 ABOUT_TEXT = r"""
 ## 🎯 Our goal
 **The main goal of the EcoLogits Calculator is to raise awareness on the environmental impacts of LLM inference.**

src/style.css ADDED Viewed

	@@ -0,0 +1,23 @@

+@import url('https://fonts.googleapis.com/css2?family=Montserrat:ital,wght@0,100..900;1,100..900&display=swap');
+html, body, [class*="css"] {
+    font-family: 'Montserrat', sans-serif;
+    font-size: 18px;
+    font-weight: 500;
+    color: #091747;
+}
+[data-testid="metric-container"] {
+    width: fit-content;
+    margin: auto;
+}
+[data-testid="metric-container"] > div {
+    width: fit-content;
+    margin: auto;
+}
+[data-testid="metric-container"] label {
+    width: fit-content;
+    margin: auto;
+}

src/token_estimator.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import streamlit as st
+import tiktoken
+from .content import TOKEN_ESTIMATOR_TEXT
+def num_tokens_from_string(string: str, encoding_name: str) -> int:
+    """Returns the number of tokens in a text string."""
+    encoding = tiktoken.get_encoding(encoding_name)
+    num_tokens = len(encoding.encode(string))
+    return num_tokens
+def token_estimator():
+    st.markdown("### 🪙 Tokens estimator")
+    st.markdown("As our methodology deeply relies on the number of tokens processed by the model *(and as no-one is token-fluent)*, we provide you with a tool to estimate the number of tokens in a given text.")
+    st.expander("ℹ️ What is a token anyway ?", expanded = False).markdown(TOKEN_ESTIMATOR_TEXT)
+    user_text_input = st.text_area("Type or paste some text to estimate the amount of tokens.", "EcoLogits is a great project!")
+    _, col2, _ = st.columns([2, 1, 2])
+    with col2:
+        st.metric(
+            label = 'tokens estimated amount',
+            #label_visibility = 'hidden',
+            value = num_tokens_from_string(user_text_input, "cl100k_base"),
+            border = True
+        )

uv.lock DELETED Viewed

The diff for this file is too large to render. See raw diff