Commit 
							
							·
						
						7dd2893
	
1
								Parent(s):
							
							c60dccb
								
Update info (#1005)
Browse files### What problem does this PR solve?
_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._
### Type of change
- [x] Refactoring
Signed-off-by: Jin Hai <[email protected]>
- rag/app/manual.py +16 -0
 - rag/llm/rpc_server.py +16 -0
 - rag/nlp/__init__.py +16 -0
 - rag/nlp/query.py +15 -1
 - rag/nlp/rag_tokenizer.py +15 -1
 - rag/nlp/search.py +16 -1
 - rag/nlp/surname.py +16 -1
 - rag/nlp/synonym.py +16 -0
 - rag/nlp/term_weight.py +16 -1
 - rag/svr/cache_file_svr.py +15 -0
 - rag/utils/__init__.py +16 -0
 
    	
        rag/app/manual.py
    CHANGED
    
    | 
         @@ -1,3 +1,19 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         
             
            import copy
         
     | 
| 2 | 
         
             
            import re
         
     | 
| 3 | 
         | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            import copy
         
     | 
| 18 | 
         
             
            import re
         
     | 
| 19 | 
         | 
    	
        rag/llm/rpc_server.py
    CHANGED
    
    | 
         @@ -1,3 +1,19 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         
             
            import argparse
         
     | 
| 2 | 
         
             
            import pickle
         
     | 
| 3 | 
         
             
            import random
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            import argparse
         
     | 
| 18 | 
         
             
            import pickle
         
     | 
| 19 | 
         
             
            import random
         
     | 
    	
        rag/nlp/__init__.py
    CHANGED
    
    | 
         @@ -1,3 +1,19 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         
             
            import random
         
     | 
| 2 | 
         
             
            from collections import Counter
         
     | 
| 3 | 
         | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            import random
         
     | 
| 18 | 
         
             
            from collections import Counter
         
     | 
| 19 | 
         | 
    	
        rag/nlp/query.py
    CHANGED
    
    | 
         @@ -1,4 +1,18 @@ 
     | 
|
| 1 | 
         
            -
            # 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 2 | 
         | 
| 3 | 
         
             
            import json
         
     | 
| 4 | 
         
             
            import math
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         | 
| 17 | 
         
             
            import json
         
     | 
| 18 | 
         
             
            import math
         
     | 
    	
        rag/nlp/rag_tokenizer.py
    CHANGED
    
    | 
         @@ -1,4 +1,18 @@ 
     | 
|
| 1 | 
         
            -
            # 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 2 | 
         | 
| 3 | 
         
             
            import copy
         
     | 
| 4 | 
         
             
            import datrie
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         | 
| 17 | 
         
             
            import copy
         
     | 
| 18 | 
         
             
            import datrie
         
     | 
    	
        rag/nlp/search.py
    CHANGED
    
    | 
         @@ -1,4 +1,19 @@ 
     | 
|
| 1 | 
         
            -
            # 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 2 | 
         
             
            import json
         
     | 
| 3 | 
         
             
            import re
         
     | 
| 4 | 
         
             
            from copy import deepcopy
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            import json
         
     | 
| 18 | 
         
             
            import re
         
     | 
| 19 | 
         
             
            from copy import deepcopy
         
     | 
    	
        rag/nlp/surname.py
    CHANGED
    
    | 
         @@ -1,4 +1,19 @@ 
     | 
|
| 1 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 2 | 
         
             
            m = set(["赵","钱","孙","李",
         
     | 
| 3 | 
         
             
            "周","吴","郑","王",
         
     | 
| 4 | 
         
             
            "冯","陈","褚","卫",
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            m = set(["赵","钱","孙","李",
         
     | 
| 18 | 
         
             
            "周","吴","郑","王",
         
     | 
| 19 | 
         
             
            "冯","陈","褚","卫",
         
     | 
    	
        rag/nlp/synonym.py
    CHANGED
    
    | 
         @@ -1,3 +1,19 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         
             
            import json
         
     | 
| 2 | 
         
             
            import os
         
     | 
| 3 | 
         
             
            import time
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            import json
         
     | 
| 18 | 
         
             
            import os
         
     | 
| 19 | 
         
             
            import time
         
     | 
    	
        rag/nlp/term_weight.py
    CHANGED
    
    | 
         @@ -1,4 +1,19 @@ 
     | 
|
| 1 | 
         
            -
            # 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 2 | 
         
             
            import math
         
     | 
| 3 | 
         
             
            import json
         
     | 
| 4 | 
         
             
            import re
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            import math
         
     | 
| 18 | 
         
             
            import json
         
     | 
| 19 | 
         
             
            import re
         
     | 
    	
        rag/svr/cache_file_svr.py
    CHANGED
    
    | 
         @@ -1,3 +1,18 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         
             
            import random
         
     | 
| 2 | 
         
             
            import time
         
     | 
| 3 | 
         
             
            import traceback
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
             
            import random
         
     | 
| 17 | 
         
             
            import time
         
     | 
| 18 | 
         
             
            import traceback
         
     | 
    	
        rag/utils/__init__.py
    CHANGED
    
    | 
         @@ -1,3 +1,19 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         
             
            import os
         
     | 
| 2 | 
         
             
            import re
         
     | 
| 3 | 
         
             
            import tiktoken
         
     | 
| 
         | 
|
| 1 | 
         
            +
            #
         
     | 
| 2 | 
         
            +
            #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
         
     | 
| 3 | 
         
            +
            #
         
     | 
| 4 | 
         
            +
            #  Licensed under the Apache License, Version 2.0 (the "License");
         
     | 
| 5 | 
         
            +
            #  you may not use this file except in compliance with the License.
         
     | 
| 6 | 
         
            +
            #  You may obtain a copy of the License at
         
     | 
| 7 | 
         
            +
            #
         
     | 
| 8 | 
         
            +
            #      http://www.apache.org/licenses/LICENSE-2.0
         
     | 
| 9 | 
         
            +
            #
         
     | 
| 10 | 
         
            +
            #  Unless required by applicable law or agreed to in writing, software
         
     | 
| 11 | 
         
            +
            #  distributed under the License is distributed on an "AS IS" BASIS,
         
     | 
| 12 | 
         
            +
            #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 13 | 
         
            +
            #  See the License for the specific language governing permissions and
         
     | 
| 14 | 
         
            +
            #  limitations under the License.
         
     | 
| 15 | 
         
            +
            #
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
             
            import os
         
     | 
| 18 | 
         
             
            import re
         
     | 
| 19 | 
         
             
            import tiktoken
         
     |