Spaces:
Running
on
Zero
Running
on
Zero
adding application
Browse files
app.py
CHANGED
@@ -242,14 +242,14 @@ def find_last_token_index(full_ids, word_ids):
|
|
242 |
@GPU # this block runs on a job GPU
|
243 |
def analyse_word(model_name: str, word: str, patchscopes_template: str, context:str = ""):
|
244 |
try:
|
245 |
-
text = context+ " " + word
|
246 |
model, tokenizer = get_model_and_tokenizer(model_name)
|
247 |
|
248 |
# Build extraction prompt (where hidden states will be collected)
|
249 |
extraction_prompt ="X"
|
250 |
|
251 |
# Identify last token position of the *word* inside the prompt IDs
|
252 |
-
word_token_ids = tokenizer.encode(
|
253 |
|
254 |
# Instantiate Patchscopes retriever
|
255 |
patch_retriever = PatchscopesRetriever(
|
@@ -262,7 +262,7 @@ def analyse_word(model_name: str, word: str, patchscopes_template: str, context:
|
|
262 |
|
263 |
# Run retrieval for the word across all layers (one pass)
|
264 |
retrieved_words = patch_retriever.get_hidden_states_and_retrieve_word(
|
265 |
-
|
266 |
num_tokens_to_generate=len(tokenizer.tokenize(word)),
|
267 |
)[0]
|
268 |
|
@@ -309,14 +309,14 @@ with gr.Blocks(theme="soft") as demo:
|
|
309 |
label="Patchscopes prompt (use X as placeholder)",
|
310 |
value="repeat the following word X twice: 1)X 2)",
|
311 |
)
|
312 |
-
context_box = gr.Textbox(label="context", value="")
|
313 |
word_box = gr.Textbox(label="Word to test", value="interpretable")
|
314 |
run_btn = gr.Button("Analyse")
|
315 |
out_html = gr.HTML()
|
316 |
|
317 |
run_btn.click(
|
318 |
analyse_word,
|
319 |
-
inputs=[model_name, word_box, patchscopes_template, context_box],
|
320 |
outputs=out_html,
|
321 |
)
|
322 |
|
|
|
242 |
@GPU # this block runs on a job GPU
|
243 |
def analyse_word(model_name: str, word: str, patchscopes_template: str, context:str = ""):
|
244 |
try:
|
245 |
+
# text = context+ " " + word
|
246 |
model, tokenizer = get_model_and_tokenizer(model_name)
|
247 |
|
248 |
# Build extraction prompt (where hidden states will be collected)
|
249 |
extraction_prompt ="X"
|
250 |
|
251 |
# Identify last token position of the *word* inside the prompt IDs
|
252 |
+
word_token_ids = tokenizer.encode(word, add_special_tokens=False)
|
253 |
|
254 |
# Instantiate Patchscopes retriever
|
255 |
patch_retriever = PatchscopesRetriever(
|
|
|
262 |
|
263 |
# Run retrieval for the word across all layers (one pass)
|
264 |
retrieved_words = patch_retriever.get_hidden_states_and_retrieve_word(
|
265 |
+
word,
|
266 |
num_tokens_to_generate=len(tokenizer.tokenize(word)),
|
267 |
)[0]
|
268 |
|
|
|
309 |
label="Patchscopes prompt (use X as placeholder)",
|
310 |
value="repeat the following word X twice: 1)X 2)",
|
311 |
)
|
312 |
+
# context_box = gr.Textbox(label="context", value="")
|
313 |
word_box = gr.Textbox(label="Word to test", value="interpretable")
|
314 |
run_btn = gr.Button("Analyse")
|
315 |
out_html = gr.HTML()
|
316 |
|
317 |
run_btn.click(
|
318 |
analyse_word,
|
319 |
+
inputs=[model_name, word_box, patchscopes_template], #, context_box],
|
320 |
outputs=out_html,
|
321 |
)
|
322 |
|