feat:light mode + token counter
Browse files- .python-version +0 -1
- .streamlit/config.toml +4 -0
- app.py +22 -4
- pyproject.toml +0 -12
- requirements.txt +2 -1
- src/__init__.py +1 -0
- src/__pycache__/__init__.cpython-312.pyc +0 -0
- src/__pycache__/calculator.cpython-312.pyc +0 -0
- src/__pycache__/constants.cpython-312.pyc +0 -0
- src/__pycache__/content.cpython-312.pyc +0 -0
- src/__pycache__/electricity_mix.cpython-312.pyc +0 -0
- src/__pycache__/expert.cpython-312.pyc +0 -0
- src/__pycache__/impacts.cpython-312.pyc +0 -0
- src/__pycache__/models.cpython-312.pyc +0 -0
- src/__pycache__/token_estimator.cpython-312.pyc +0 -0
- src/__pycache__/utils.cpython-312.pyc +0 -0
- src/content.py +28 -0
- src/style.css +23 -0
- src/token_estimator.py +30 -0
- uv.lock +0 -0
.python-version
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
3.12
|
|
|
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="light"
|
3 |
+
textColor="#0B3B36"
|
4 |
+
primaryColor = "#00BF63"
|
app.py
CHANGED
@@ -12,16 +12,30 @@ from src.content import (
|
|
12 |
|
13 |
from src.expert import expert_mode
|
14 |
from src.calculator import calculator_mode
|
|
|
15 |
|
16 |
-
st.set_page_config(
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
st.html(HERO_TEXT)
|
21 |
|
22 |
st.markdown(INTRO_TEXT)
|
23 |
|
24 |
-
tab_calculator, tab_expert, tab_method, tab_about = st.tabs(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
with tab_calculator:
|
27 |
|
@@ -30,6 +44,10 @@ with tab_calculator:
|
|
30 |
with tab_expert:
|
31 |
|
32 |
expert_mode()
|
|
|
|
|
|
|
|
|
33 |
|
34 |
with tab_method:
|
35 |
|
|
|
12 |
|
13 |
from src.expert import expert_mode
|
14 |
from src.calculator import calculator_mode
|
15 |
+
from src.token_estimator import token_estimator
|
16 |
|
17 |
+
st.set_page_config(
|
18 |
+
layout="wide",
|
19 |
+
page_title="ECOLOGITS",
|
20 |
+
page_icon='💬'
|
21 |
+
)
|
22 |
+
|
23 |
+
with open( "src/style.css" ) as css:
|
24 |
+
st.markdown( f'<style>{css.read()}</style>' , unsafe_allow_html= True)
|
25 |
|
26 |
st.html(HERO_TEXT)
|
27 |
|
28 |
st.markdown(INTRO_TEXT)
|
29 |
|
30 |
+
tab_calculator, tab_expert, tab_token, tab_method, tab_about = st.tabs(
|
31 |
+
[
|
32 |
+
'🧮 Calculator',
|
33 |
+
'🤓 Expert Mode',
|
34 |
+
'🪙 Tokens estimator',
|
35 |
+
'📖 Methodology',
|
36 |
+
'ℹ️ About'
|
37 |
+
]
|
38 |
+
)
|
39 |
|
40 |
with tab_calculator:
|
41 |
|
|
|
44 |
with tab_expert:
|
45 |
|
46 |
expert_mode()
|
47 |
+
|
48 |
+
with tab_token:
|
49 |
+
|
50 |
+
token_estimator()
|
51 |
|
52 |
with tab_method:
|
53 |
|
pyproject.toml
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
[project]
|
2 |
-
name = "ecologits-calculator-streamlit"
|
3 |
-
version = "0.1.0"
|
4 |
-
description = "Add your description here"
|
5 |
-
readme = "README.md"
|
6 |
-
requires-python = ">=3.12"
|
7 |
-
dependencies = [
|
8 |
-
"ecologits>=0.6.1",
|
9 |
-
"pint>=0.24.4",
|
10 |
-
"plotly>=6.0.0",
|
11 |
-
"streamlit>=1.43.1",
|
12 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
streamlit
|
2 |
ecologits
|
3 |
pint
|
4 |
-
plotly
|
|
|
|
1 |
streamlit
|
2 |
ecologits
|
3 |
pint
|
4 |
+
plotly
|
5 |
+
tiktoken
|
src/__init__.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from .content import *
|
2 |
from .constants import *
|
3 |
from .expert import expert_mode
|
|
|
4 |
from .utils import *
|
5 |
from .calculator import calculator_mode
|
6 |
from .impacts import get_impacts, display_impacts
|
|
|
1 |
from .content import *
|
2 |
from .constants import *
|
3 |
from .expert import expert_mode
|
4 |
+
from .token_estimator import token_estimator
|
5 |
from .utils import *
|
6 |
from .calculator import calculator_mode
|
7 |
from .impacts import get_impacts, display_impacts
|
src/__pycache__/__init__.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/__init__.cpython-312.pyc and b/src/__pycache__/__init__.cpython-312.pyc differ
|
|
src/__pycache__/calculator.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/calculator.cpython-312.pyc and b/src/__pycache__/calculator.cpython-312.pyc differ
|
|
src/__pycache__/constants.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/constants.cpython-312.pyc and b/src/__pycache__/constants.cpython-312.pyc differ
|
|
src/__pycache__/content.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/content.cpython-312.pyc and b/src/__pycache__/content.cpython-312.pyc differ
|
|
src/__pycache__/electricity_mix.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/electricity_mix.cpython-312.pyc and b/src/__pycache__/electricity_mix.cpython-312.pyc differ
|
|
src/__pycache__/expert.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/expert.cpython-312.pyc and b/src/__pycache__/expert.cpython-312.pyc differ
|
|
src/__pycache__/impacts.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/impacts.cpython-312.pyc and b/src/__pycache__/impacts.cpython-312.pyc differ
|
|
src/__pycache__/models.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/models.cpython-312.pyc and b/src/__pycache__/models.cpython-312.pyc differ
|
|
src/__pycache__/token_estimator.cpython-312.pyc
ADDED
Binary file (1.9 kB). View file
|
|
src/__pycache__/utils.cpython-312.pyc
CHANGED
Binary files a/src/__pycache__/utils.cpython-312.pyc and b/src/__pycache__/utils.cpython-312.pyc differ
|
|
src/content.py
CHANGED
@@ -33,6 +33,34 @@ WARNING_BOTH = """
|
|
33 |
⚠️ The model architecture has not been publicly released and is multimodal, expect lower precision of estimations.
|
34 |
"""
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
ABOUT_TEXT = r"""
|
37 |
## 🎯 Our goal
|
38 |
**The main goal of the EcoLogits Calculator is to raise awareness on the environmental impacts of LLM inference.**
|
|
|
33 |
⚠️ The model architecture has not been publicly released and is multimodal, expect lower precision of estimations.
|
34 |
"""
|
35 |
|
36 |
+
TOKEN_ESTIMATOR_TEXT = """
|
37 |
+
A token is the basic unit of text that a language model processes. Think of it as a piece of text that the model breaks down for analysis and generation. Tokens can be:
|
38 |
+
|
39 |
+
1. Whole words
|
40 |
+
2. Parts of words
|
41 |
+
3. Punctuation marks
|
42 |
+
4. Spaces or other whitespace characters
|
43 |
+
|
44 |
+
For example, the sentence "I love EcoLogits!" might be tokenized like this:
|
45 |
+
|
46 |
+
- "I" (1 token)
|
47 |
+
- "love" (1 token)
|
48 |
+
- "EcoLogits" (1 token)
|
49 |
+
- "!" (1 token)
|
50 |
+
|
51 |
+
Different tokenization methods exist : some models split tokens at spaces wile others use more complex algorithms that break words into subwords.
|
52 |
+
|
53 |
+
Tokens are crucial because:
|
54 |
+
|
55 |
+
- They determine the model's input and output capacity
|
56 |
+
- They impact processing speed and memory usage
|
57 |
+
- Most LLMs have a maximum token limit (e.g., 4,000 or 8,000 tokens)
|
58 |
+
- Longer texts require more tokens, which can increase computational complexity
|
59 |
+
- At EcoLogits, they are at the core of our impact assessment methodology !
|
60 |
+
|
61 |
+
When you're writing or interacting with an LLM, being aware of token count can help you manage input length and complexity more effectively.
|
62 |
+
"""
|
63 |
+
|
64 |
ABOUT_TEXT = r"""
|
65 |
## 🎯 Our goal
|
66 |
**The main goal of the EcoLogits Calculator is to raise awareness on the environmental impacts of LLM inference.**
|
src/style.css
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@import url('https://fonts.googleapis.com/css2?family=Montserrat:ital,wght@0,100..900;1,100..900&display=swap');
|
2 |
+
|
3 |
+
html, body, [class*="css"] {
|
4 |
+
font-family: 'Montserrat', sans-serif;
|
5 |
+
font-size: 18px;
|
6 |
+
font-weight: 500;
|
7 |
+
color: #091747;
|
8 |
+
}
|
9 |
+
|
10 |
+
[data-testid="metric-container"] {
|
11 |
+
width: fit-content;
|
12 |
+
margin: auto;
|
13 |
+
}
|
14 |
+
|
15 |
+
[data-testid="metric-container"] > div {
|
16 |
+
width: fit-content;
|
17 |
+
margin: auto;
|
18 |
+
}
|
19 |
+
|
20 |
+
[data-testid="metric-container"] label {
|
21 |
+
width: fit-content;
|
22 |
+
margin: auto;
|
23 |
+
}
|
src/token_estimator.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import tiktoken
|
3 |
+
from .content import TOKEN_ESTIMATOR_TEXT
|
4 |
+
|
5 |
+
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
6 |
+
"""Returns the number of tokens in a text string."""
|
7 |
+
encoding = tiktoken.get_encoding(encoding_name)
|
8 |
+
num_tokens = len(encoding.encode(string))
|
9 |
+
return num_tokens
|
10 |
+
|
11 |
+
def token_estimator():
|
12 |
+
|
13 |
+
st.markdown("### 🪙 Tokens estimator")
|
14 |
+
|
15 |
+
st.markdown("As our methodology deeply relies on the number of tokens processed by the model *(and as no-one is token-fluent)*, we provide you with a tool to estimate the number of tokens in a given text.")
|
16 |
+
|
17 |
+
st.expander("ℹ️ What is a token anyway ?", expanded = False).markdown(TOKEN_ESTIMATOR_TEXT)
|
18 |
+
|
19 |
+
user_text_input = st.text_area("Type or paste some text to estimate the amount of tokens.", "EcoLogits is a great project!")
|
20 |
+
|
21 |
+
_, col2, _ = st.columns([2, 1, 2])
|
22 |
+
|
23 |
+
with col2:
|
24 |
+
|
25 |
+
st.metric(
|
26 |
+
label = 'tokens estimated amount',
|
27 |
+
#label_visibility = 'hidden',
|
28 |
+
value = num_tokens_from_string(user_text_input, "cl100k_base"),
|
29 |
+
border = True
|
30 |
+
)
|
uv.lock
DELETED
The diff for this file is too large to render.
See raw diff
|
|