miaoyibo commited on
Commit
4079598
·
1 Parent(s): 5d6758b
Files changed (3) hide show
  1. .gradio/certificate.pem +31 -0
  2. app.py +14 -5
  3. kimi_vl/serve/inference.py +8 -5
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import argparse
2
  import gradio as gr
3
  import os
 
4
  from PIL import Image
5
  import spaces
6
  import copy
@@ -27,7 +28,7 @@ from kimi_vl.serve.chat_utils import (
27
  to_gradio_chatbot,
28
  to_gradio_history,
29
  )
30
- from kimi_vl.serve.inference import kimi_vl_generate, load_model
31
  from kimi_vl.serve.examples import get_examples
32
 
33
  TITLE = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with Kimi-Dev-72B🤔 </h1>"""
@@ -127,7 +128,7 @@ def predict(
127
  """
128
  print("running the prediction function")
129
  try:
130
- model = fetch_model(args.model)
131
 
132
  if text == "":
133
  yield chatbot, history, "Empty context."
@@ -136,6 +137,8 @@ def predict(
136
  yield [[text, "No Model Found"]], [], "No Model Found"
137
  return
138
 
 
 
139
  if images is None:
140
  images = []
141
 
@@ -165,10 +168,11 @@ def predict(
165
  gradio_chatbot_output = to_gradio_chatbot(conversation)
166
 
167
  full_response = ""
168
- for x in kimi_vl_generate(
169
  conversations=all_conv,
170
  model=model,
171
- processor=processor,
 
172
  stop_words=stop_words,
173
  max_length=max_length_tokens,
174
  temperature=temperature,
@@ -340,13 +344,18 @@ def main(args: argparse.Namespace):
340
 
341
  # concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS
342
  favicon_path = os.path.join("kimi_vl/serve/assets/favicon.ico")
 
 
 
 
 
343
  demo.queue().launch(
344
  favicon_path=favicon_path,
345
  server_name=args.ip,
346
  server_port=args.port,
 
347
  )
348
 
349
-
350
  if __name__ == "__main__":
351
  args = parse_args()
352
  print(args)
 
1
  import argparse
2
  import gradio as gr
3
  import os
4
+ os.environ["HF_HOME"] = "/mnt/moonfs/miaoyibo-ksyun/hf_home"
5
  from PIL import Image
6
  import spaces
7
  import copy
 
28
  to_gradio_chatbot,
29
  to_gradio_history,
30
  )
31
+ from kimi_vl.serve.inference import kimi_dev_generate, load_model
32
  from kimi_vl.serve.examples import get_examples
33
 
34
  TITLE = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with Kimi-Dev-72B🤔 </h1>"""
 
128
  """
129
  print("running the prediction function")
130
  try:
131
+ model, tokenizer = fetch_model(args.model)
132
 
133
  if text == "":
134
  yield chatbot, history, "Empty context."
 
137
  yield [[text, "No Model Found"]], [], "No Model Found"
138
  return
139
 
140
+
141
+
142
  if images is None:
143
  images = []
144
 
 
168
  gradio_chatbot_output = to_gradio_chatbot(conversation)
169
 
170
  full_response = ""
171
+ for x in kimi_dev_generate(
172
  conversations=all_conv,
173
  model=model,
174
+ tokneizer=tokenizer,
175
+ # processor=processor,
176
  stop_words=stop_words,
177
  max_length=max_length_tokens,
178
  temperature=temperature,
 
344
 
345
  # concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS
346
  favicon_path = os.path.join("kimi_vl/serve/assets/favicon.ico")
347
+ # demo.queue().launch(
348
+ # favicon_path=favicon_path,
349
+ # server_name=args.ip,
350
+ # server_port=args.port,
351
+ # )
352
  demo.queue().launch(
353
  favicon_path=favicon_path,
354
  server_name=args.ip,
355
  server_port=args.port,
356
+ share=True # for 本地调试
357
  )
358
 
 
359
  if __name__ == "__main__":
360
  args = parse_args()
361
  print(args)
kimi_vl/serve/inference.py CHANGED
@@ -12,6 +12,7 @@ from transformers import (
12
  StoppingCriteria,
13
  StoppingCriteriaList,
14
  TextIteratorStreamer,
 
15
  )
16
 
17
  from .chat_utils import Conversation, get_conv_template
@@ -35,8 +36,9 @@ def load_model(model_path: str = "moonshotai/Kimi-Dev-72B"):
35
  trust_remote_code=True,
36
  )
37
  # processor = AutoProcessor.from_pretrained(model_path, config=config, trust_remote_code=True)
 
38
 
39
- return model
40
 
41
 
42
  class StoppingCriteriaSub(StoppingCriteria):
@@ -155,9 +157,10 @@ def preprocess(
155
 
156
  @torch.no_grad()
157
  @torch.inference_mode()
158
- def kimi_vl_generate(
159
  model: torch.nn.Module,
160
- processor: AutoProcessor,
 
161
  conversations: list[Conversation],
162
  stop_words: list,
163
  max_length: int = 256,
@@ -167,12 +170,12 @@ def kimi_vl_generate(
167
  ):
168
  # convert conversation to inputs
169
  print(f"conversations = {conversations}")
170
- inputs = preprocess(conversations, processor=processor)
 
171
  inputs = inputs.to(model.device)
172
 
173
  return generate(
174
  model,
175
- processor,
176
  inputs,
177
  max_gen_len=max_length,
178
  temperature=temperature,
 
12
  StoppingCriteria,
13
  StoppingCriteriaList,
14
  TextIteratorStreamer,
15
+ AutoTokenizer
16
  )
17
 
18
  from .chat_utils import Conversation, get_conv_template
 
36
  trust_remote_code=True,
37
  )
38
  # processor = AutoProcessor.from_pretrained(model_path, config=config, trust_remote_code=True)
39
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
40
 
41
+ return model, tokenizer
42
 
43
 
44
  class StoppingCriteriaSub(StoppingCriteria):
 
157
 
158
  @torch.no_grad()
159
  @torch.inference_mode()
160
+ def kimi_dev_generate(
161
  model: torch.nn.Module,
162
+ tokenizer,
163
+ # processor: AutoProcessor,
164
  conversations: list[Conversation],
165
  stop_words: list,
166
  max_length: int = 256,
 
170
  ):
171
  # convert conversation to inputs
172
  print(f"conversations = {conversations}")
173
+ # inputs = preprocess(conversations)
174
+ inputs = tokenizer.tokenize(conversations)
175
  inputs = inputs.to(model.device)
176
 
177
  return generate(
178
  model,
 
179
  inputs,
180
  max_gen_len=max_length,
181
  temperature=temperature,