Text Generation
Russian
conversational
IlyaGusev commited on
Commit
f568b29
1 Parent(s): 6609b53

Update llama.cpp version

Browse files
Files changed (3) hide show
  1. README.md +6 -6
  2. ggml-model-q4_1.bin +2 -2
  3. interact.py +0 -73
README.md CHANGED
@@ -11,17 +11,17 @@ pipeline_tag: text2text-generation
11
 
12
  Llama.cpp compatible version of an original [30B model](https://huggingface.co/IlyaGusev/saiga_30b_lora).
13
 
 
 
 
14
  How to run:
15
  ```
16
  sudo apt-get install git-lfs
17
- pip install llama-cpp-python==0.1.38 fire
18
-
19
- git clone https://huggingface.co/IlyaGusev/saiga_30b_lora_llamacpp
20
 
21
- cd saiga_30b_lora_llamacpp
22
- python3 interact.py ggml-model-q4_1.bin
23
  ```
24
 
25
  System requirements:
26
  * 32GB RAM
27
- * CPU with 4 cores
 
11
 
12
  Llama.cpp compatible version of an original [30B model](https://huggingface.co/IlyaGusev/saiga_30b_lora).
13
 
14
+ * Download `ggml-model-q4_1.bin`.
15
+ * Download [interact_llamacpp.py](https://raw.githubusercontent.com/IlyaGusev/rulm/master/self_instruct/src/interact_llamacpp.py)
16
+
17
  How to run:
18
  ```
19
  sudo apt-get install git-lfs
20
+ pip install llama-cpp-python fire
 
 
21
 
22
+ python3 interact_llamacpp.py ggml-model-q4_1.bin
 
23
  ```
24
 
25
  System requirements:
26
  * 32GB RAM
27
+ * CPU with 4 cores
ggml-model-q4_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4e6bf295d3e2eee786610e147b885193a148425df46d8ac2e45b61151dd7172
3
- size 24399792512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2b25d918f5e2b02152a3d2469b1cc0d49de9c4c592ea9f5ae67aad0e66dd8da
3
+ size 20333775232
interact.py DELETED
@@ -1,73 +0,0 @@
1
- import fire
2
- from llama_cpp import Llama
3
-
4
- SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
5
- SYSTEM_TOKEN = 1788
6
- USER_TOKEN = 1404
7
- BOT_TOKEN = 9225
8
- LINEBREAK_TOKEN = 13
9
-
10
- ROLE_TOKENS = {
11
- "user": USER_TOKEN,
12
- "bot": BOT_TOKEN,
13
- "system": SYSTEM_TOKEN
14
- }
15
-
16
-
17
- def get_message_tokens(model, role, content):
18
- message_tokens = model.tokenize(content.encode("utf-8"))
19
- message_tokens.insert(1, ROLE_TOKENS[role])
20
- message_tokens.insert(2, LINEBREAK_TOKEN)
21
- message_tokens.append(model.token_eos())
22
- return message_tokens
23
-
24
-
25
- def get_system_tokens(model):
26
- system_message = {
27
- "role": "system",
28
- "content": SYSTEM_PROMPT
29
- }
30
- return get_message_tokens(model, **system_message)
31
-
32
-
33
- def interact(
34
- model_path,
35
- n_ctx=2000,
36
- top_k=30,
37
- top_p=0.9,
38
- temperature=0.2,
39
- repeat_penalty=1.1
40
- ):
41
- model = Llama(
42
- model_path=model_path,
43
- n_ctx=n_ctx,
44
- n_parts=1,
45
- )
46
-
47
- system_tokens = get_system_tokens(model)
48
- tokens = system_tokens
49
- model.eval(tokens)
50
-
51
- while True:
52
- user_message = input("User: ")
53
- message_tokens = get_message_tokens(model=model, role="user", content=user_message)
54
- role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
55
- tokens += message_tokens + role_tokens
56
- generator = model.generate(
57
- tokens,
58
- top_k=top_k,
59
- top_p=top_p,
60
- temp=temperature,
61
- repeat_penalty=repeat_penalty
62
- )
63
- for token in generator:
64
- token_str = model.detokenize([token]).decode("utf-8")
65
- tokens.append(token)
66
- if token == model.token_eos():
67
- break
68
- print(token_str, end="", flush=True)
69
- print()
70
-
71
-
72
- if __name__ == "__main__":
73
- fire.Fire(interact)