nyamberekimeu commited on
Commit
a55acce
·
verified ·
1 Parent(s): 137c9a4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer
3
+
4
+ # List of supported Qwen3 models
5
+ QWEN_MODELS = [
6
+ "Qwen/Qwen3-0.6B",
7
+ "Qwen/Qwen3-1.7B",
8
+ "Qwen/Qwen3-14B",
9
+ "Qwen/Qwen3-235B-A22B",
10
+ "Qwen/Qwen3-30B-A3B",
11
+ "Qwen/Qwen3-32B",
12
+ "Qwen/Qwen3-4B"
13
+ ]
14
+
15
+ # Cache tokenizers to avoid repeated downloads
16
+ tokenizer_cache = {}
17
+
18
+ def count_tokens(model_name, text_input, file_input):
19
+ # Read text from uploaded file if provided
20
+ if file_input is not None:
21
+ text = file_input.read().decode("utf-8")
22
+ else:
23
+ text = text_input
24
+
25
+ if not text.strip():
26
+ return 0, []
27
+
28
+ # Load tokenizer (with caching)
29
+ if model_name not in tokenizer_cache:
30
+ tokenizer_cache[model_name] = AutoTokenizer.from_pretrained(
31
+ model_name, trust_remote_code=True
32
+ )
33
+ tokenizer = tokenizer_cache[model_name]
34
+
35
+ # Tokenization
36
+ token_ids = tokenizer.encode(text, add_special_tokens=False)
37
+ tokens = tokenizer.convert_ids_to_tokens(token_ids)
38
+
39
+ return len(token_ids), tokens
40
+
41
+ # Gradio UI
42
+ gr.Interface(
43
+ fn=count_tokens,
44
+ inputs=[
45
+ gr.Dropdown(choices=QWEN_MODELS, label="Select Qwen Model", value=QWEN_MODELS[0]),
46
+ gr.Textbox(lines=5, label="Input Text (ignored if file is uploaded)"),
47
+ gr.File(label="Upload .txt File (optional)", file_types=[".txt"])
48
+ ],
49
+ outputs=[
50
+ gr.Number(label="Token Count"),
51
+ gr.JSON(label="Tokens")
52
+ ],
53
+ title="Qwen Token Counter",
54
+ description="Select a Qwen model and input text or upload a .txt file to see token count and token list."
55
+ ).launch()