diff --git a/Dockerfile.cuda b/Dockerfile.cuda index 0886ebf9a257c4876136c58c2e0c5d5c2c5a9594..b96966638e76ce3eaf4ca29feb876278b62f725c 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -1,27 +1,27 @@ -FROM infiniflow/ragflow-base:v2.0 -USER root - -WORKDIR /ragflow - -## for cuda > 12.0 -RUN pip uninstall -y onnxruntime-gpu -RUN pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ - - -ADD ./web ./web -RUN cd ./web && npm i --force && npm run build - -ADD ./api ./api -ADD ./conf ./conf -ADD ./deepdoc ./deepdoc -ADD ./rag ./rag -ADD ./agent ./agent -ADD ./graphrag ./graphrag - -ENV PYTHONPATH=/ragflow/ -ENV HF_ENDPOINT=https://hf-mirror.com - -ADD docker/entrypoint.sh ./entrypoint.sh -RUN chmod +x ./entrypoint.sh - -ENTRYPOINT ["./entrypoint.sh"] +FROM infiniflow/ragflow-base:v2.0 +USER root + +WORKDIR /ragflow + +## for cuda > 12.0 +RUN pip uninstall -y onnxruntime-gpu +RUN pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ + + +ADD ./web ./web +RUN cd ./web && npm i --force && npm run build + +ADD ./api ./api +ADD ./conf ./conf +ADD ./deepdoc ./deepdoc +ADD ./rag ./rag +ADD ./agent ./agent +ADD ./graphrag ./graphrag + +ENV PYTHONPATH=/ragflow/ +ENV HF_ENDPOINT=https://hf-mirror.com + +ADD docker/entrypoint.sh ./entrypoint.sh +RUN chmod +x ./entrypoint.sh + +ENTRYPOINT ["./entrypoint.sh"] diff --git a/Dockerfile.scratch b/Dockerfile.scratch index abb3b254d64340ca0bf40c7ddb3a78af2120e4f1..a95cc559dfb499f225dfe7cb5a415ed6f9245948 100644 --- a/Dockerfile.scratch +++ b/Dockerfile.scratch @@ -1,56 +1,56 @@ -FROM ubuntu:22.04 -USER root - -WORKDIR /ragflow - -RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev - -RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ - bash ~/miniconda.sh -b -p /root/miniconda3 && \ - rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc - -ENV PATH /root/miniconda3/bin:$PATH - -RUN conda create -y --name py11 python=3.11 - -ENV CONDA_DEFAULT_ENV py11 -ENV CONDA_PREFIX /root/miniconda3/envs/py11 -ENV PATH $CONDA_PREFIX/bin:$PATH - -RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - -RUN apt-get install -y nodejs - -RUN apt-get install -y nginx - -ADD ./web ./web -ADD ./api ./api -ADD ./conf ./conf -ADD ./deepdoc ./deepdoc -ADD ./rag ./rag -ADD ./requirements.txt ./requirements.txt -ADD ./agent ./agent -ADD ./graphrag ./graphrag - -RUN apt install openmpi-bin openmpi-common libopenmpi-dev -ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH -RUN rm /root/miniconda3/envs/py11/compiler_compat/ld -RUN cd ./web && npm i --force && npm run build -RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt - -RUN apt-get update && \ - apt-get install -y libglib2.0-0 libgl1-mesa-glx && \ - rm -rf /var/lib/apt/lists/* - -RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama -RUN conda run -n py11 python -m nltk.downloader punkt -RUN conda run -n py11 python -m nltk.downloader wordnet - -ENV PYTHONPATH=/ragflow/ -ENV HF_ENDPOINT=https://hf-mirror.com - -ADD docker/entrypoint.sh ./entrypoint.sh -RUN chmod +x ./entrypoint.sh - -ENTRYPOINT ["./entrypoint.sh"] +FROM ubuntu:22.04 +USER root + +WORKDIR /ragflow + +RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev + +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + bash ~/miniconda.sh -b -p /root/miniconda3 && \ + rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc + +ENV PATH /root/miniconda3/bin:$PATH + +RUN conda create -y --name py11 python=3.11 + +ENV CONDA_DEFAULT_ENV py11 +ENV CONDA_PREFIX /root/miniconda3/envs/py11 +ENV PATH $CONDA_PREFIX/bin:$PATH + +RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - +RUN apt-get install -y nodejs + +RUN apt-get install -y nginx + +ADD ./web ./web +ADD ./api ./api +ADD ./conf ./conf +ADD ./deepdoc ./deepdoc +ADD ./rag ./rag +ADD ./requirements.txt ./requirements.txt +ADD ./agent ./agent +ADD ./graphrag ./graphrag + +RUN apt install openmpi-bin openmpi-common libopenmpi-dev +ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH +RUN rm /root/miniconda3/envs/py11/compiler_compat/ld +RUN cd ./web && npm i --force && npm run build +RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt + +RUN apt-get update && \ + apt-get install -y libglib2.0-0 libgl1-mesa-glx && \ + rm -rf /var/lib/apt/lists/* + +RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama +RUN conda run -n py11 python -m nltk.downloader punkt +RUN conda run -n py11 python -m nltk.downloader wordnet + +ENV PYTHONPATH=/ragflow/ +ENV HF_ENDPOINT=https://hf-mirror.com + +ADD docker/entrypoint.sh ./entrypoint.sh +RUN chmod +x ./entrypoint.sh + +ENTRYPOINT ["./entrypoint.sh"] diff --git a/Dockerfile.scratch.oc9 b/Dockerfile.scratch.oc9 index b21cc8ff12cb695ad1a2384edb14a606b6bacc62..cc2071bb36a4ae0ff404443d84bd36b193a62e56 100644 --- a/Dockerfile.scratch.oc9 +++ b/Dockerfile.scratch.oc9 @@ -1,58 +1,58 @@ -FROM opencloudos/opencloudos:9.0 -USER root - -WORKDIR /ragflow - -RUN dnf update -y && dnf install -y wget curl gcc-c++ openmpi-devel - -RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ - bash ~/miniconda.sh -b -p /root/miniconda3 && \ - rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc - -ENV PATH /root/miniconda3/bin:$PATH - -RUN conda create -y --name py11 python=3.11 - -ENV CONDA_DEFAULT_ENV py11 -ENV CONDA_PREFIX /root/miniconda3/envs/py11 -ENV PATH $CONDA_PREFIX/bin:$PATH - -# RUN curl -sL https://rpm.nodesource.com/setup_14.x | bash - -RUN dnf install -y nodejs - -RUN dnf install -y nginx - -ADD ./web ./web -ADD ./api ./api -ADD ./conf ./conf -ADD ./deepdoc ./deepdoc -ADD ./rag ./rag -ADD ./requirements.txt ./requirements.txt -ADD ./agent ./agent -ADD ./graphrag ./graphrag - -RUN dnf install -y openmpi openmpi-devel python3-openmpi -ENV C_INCLUDE_PATH /usr/include/openmpi-x86_64:$C_INCLUDE_PATH -ENV LD_LIBRARY_PATH /usr/lib64/openmpi/lib:$LD_LIBRARY_PATH -RUN rm /root/miniconda3/envs/py11/compiler_compat/ld -RUN cd ./web && npm i --force && npm run build -RUN conda run -n py11 pip install $(grep -ivE "mpi4py" ./requirements.txt) # without mpi4py==3.1.5 -RUN conda run -n py11 pip install redis - -RUN dnf update -y && \ - dnf install -y glib2 mesa-libGL && \ - dnf clean all - -RUN conda run -n py11 pip install ollama -RUN conda run -n py11 python -m nltk.downloader punkt -RUN conda run -n py11 python -m nltk.downloader wordnet - -ENV PYTHONPATH=/ragflow/ -ENV HF_ENDPOINT=https://hf-mirror.com - -ADD docker/entrypoint.sh ./entrypoint.sh -RUN chmod +x ./entrypoint.sh - -ENTRYPOINT ["./entrypoint.sh"] +FROM opencloudos/opencloudos:9.0 +USER root + +WORKDIR /ragflow + +RUN dnf update -y && dnf install -y wget curl gcc-c++ openmpi-devel + +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + bash ~/miniconda.sh -b -p /root/miniconda3 && \ + rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc + +ENV PATH /root/miniconda3/bin:$PATH + +RUN conda create -y --name py11 python=3.11 + +ENV CONDA_DEFAULT_ENV py11 +ENV CONDA_PREFIX /root/miniconda3/envs/py11 +ENV PATH $CONDA_PREFIX/bin:$PATH + +# RUN curl -sL https://rpm.nodesource.com/setup_14.x | bash - +RUN dnf install -y nodejs + +RUN dnf install -y nginx + +ADD ./web ./web +ADD ./api ./api +ADD ./conf ./conf +ADD ./deepdoc ./deepdoc +ADD ./rag ./rag +ADD ./requirements.txt ./requirements.txt +ADD ./agent ./agent +ADD ./graphrag ./graphrag + +RUN dnf install -y openmpi openmpi-devel python3-openmpi +ENV C_INCLUDE_PATH /usr/include/openmpi-x86_64:$C_INCLUDE_PATH +ENV LD_LIBRARY_PATH /usr/lib64/openmpi/lib:$LD_LIBRARY_PATH +RUN rm /root/miniconda3/envs/py11/compiler_compat/ld +RUN cd ./web && npm i --force && npm run build +RUN conda run -n py11 pip install $(grep -ivE "mpi4py" ./requirements.txt) # without mpi4py==3.1.5 +RUN conda run -n py11 pip install redis + +RUN dnf update -y && \ + dnf install -y glib2 mesa-libGL && \ + dnf clean all + +RUN conda run -n py11 pip install ollama +RUN conda run -n py11 python -m nltk.downloader punkt +RUN conda run -n py11 python -m nltk.downloader wordnet + +ENV PYTHONPATH=/ragflow/ +ENV HF_ENDPOINT=https://hf-mirror.com + +ADD docker/entrypoint.sh ./entrypoint.sh +RUN chmod +x ./entrypoint.sh + +ENTRYPOINT ["./entrypoint.sh"] diff --git a/agent/component/baidu.py b/agent/component/baidu.py index 0a866aab0c4faa2006ec0568fdcc5fefa54f9583..cb2b66ed68af229daf90c4d63a0eba597902ae75 100644 --- a/agent/component/baidu.py +++ b/agent/component/baidu.py @@ -1,69 +1,69 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import random -from abc import ABC -from functools import partial -import pandas as pd -import requests -import re -from agent.settings import DEBUG -from agent.component.base import ComponentBase, ComponentParamBase - - -class BaiduParam(ComponentParamBase): - """ - Define the Baidu component parameters. - """ - - def __init__(self): - super().__init__() - self.top_n = 10 - - def check(self): - self.check_positive_integer(self.top_n, "Top N") - - -class Baidu(ComponentBase, ABC): - component_name = "Baidu" - - def _run(self, history, **kwargs): - ans = self.get_input() - ans = " - ".join(ans["content"]) if "content" in ans else "" - if not ans: - return Baidu.be_output("") - - try: - url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n) - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'} - response = requests.get(url=url, headers=headers) - - url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text) - title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text) - body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text) - baidu_res = [{"content": re.sub('|', '', '' + title + ' ' + body)} for - url, title, body in zip(url_res, title_res, body_res)] - del body_res, url_res, title_res - except Exception as e: - return Baidu.be_output("**ERROR**: " + str(e)) - - if not baidu_res: - return Baidu.be_output("") - - df = pd.DataFrame(baidu_res) - if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") - return df - +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import random +from abc import ABC +from functools import partial +import pandas as pd +import requests +import re +from agent.settings import DEBUG +from agent.component.base import ComponentBase, ComponentParamBase + + +class BaiduParam(ComponentParamBase): + """ + Define the Baidu component parameters. + """ + + def __init__(self): + super().__init__() + self.top_n = 10 + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + + +class Baidu(ComponentBase, ABC): + component_name = "Baidu" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return Baidu.be_output("") + + try: + url = 'https://www.baidu.com/s?wd=' + ans + '&rn=' + str(self._param.top_n) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'} + response = requests.get(url=url, headers=headers) + + url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text) + title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text) + body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text) + baidu_res = [{"content": re.sub('|', '', '' + title + ' ' + body)} for + url, title, body in zip(url_res, title_res, body_res)] + del body_res, url_res, title_res + except Exception as e: + return Baidu.be_output("**ERROR**: " + str(e)) + + if not baidu_res: + return Baidu.be_output("") + + df = pd.DataFrame(baidu_res) + if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") + return df + diff --git a/agent/component/baidufanyi.py b/agent/component/baidufanyi.py index c1f1d246c70506745c5ae14007c359ddfc0cadab..84061a30b0e4dacc07f304f9cad243f904f9a53b 100644 --- a/agent/component/baidufanyi.py +++ b/agent/component/baidufanyi.py @@ -1,99 +1,99 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import random -from abc import ABC -import requests -from agent.component.base import ComponentBase, ComponentParamBase -from hashlib import md5 - - -class BaiduFanyiParam(ComponentParamBase): - """ - Define the BaiduFanyi component parameters. - """ - - def __init__(self): - super().__init__() - self.appid = "xxx" - self.secret_key = "xxx" - self.trans_type = 'translate' - self.parameters = [] - self.source_lang = 'auto' - self.target_lang = 'auto' - self.domain = 'finance' - - def check(self): - self.check_positive_integer(self.top_n, "Top N") - self.check_empty(self.appid, "BaiduFanyi APPID") - self.check_empty(self.secret_key, "BaiduFanyi Secret Key") - self.check_valid_value(self.trans_type, "Translate type", ['translate', 'fieldtranslate']) - self.check_valid_value(self.trans_type, "Translate domain", - ['it', 'finance', 'machinery', 'senimed', 'novel', 'academic', 'aerospace', 'wiki', - 'news', 'law', 'contract']) - self.check_valid_value(self.source_lang, "Source language", - ['auto', 'zh', 'en', 'yue', 'wyw', 'jp', 'kor', 'fra', 'spa', 'th', 'ara', 'ru', 'pt', - 'de', 'it', 'el', 'nl', 'pl', 'bul', 'est', 'dan', 'fin', 'cs', 'rom', 'slo', 'swe', - 'hu', 'cht', 'vie']) - self.check_valid_value(self.target_lang, "Target language", - ['auto', 'zh', 'en', 'yue', 'wyw', 'jp', 'kor', 'fra', 'spa', 'th', 'ara', 'ru', 'pt', - 'de', 'it', 'el', 'nl', 'pl', 'bul', 'est', 'dan', 'fin', 'cs', 'rom', 'slo', 'swe', - 'hu', 'cht', 'vie']) - self.check_valid_value(self.domain, "Translate field", - ['it', 'finance', 'machinery', 'senimed', 'novel', 'academic', 'aerospace', 'wiki', - 'news', 'law', 'contract']) - - -class BaiduFanyi(ComponentBase, ABC): - component_name = "BaiduFanyi" - - def _run(self, history, **kwargs): - - ans = self.get_input() - ans = " - ".join(ans["content"]) if "content" in ans else "" - if not ans: - return BaiduFanyi.be_output("") - - try: - source_lang = self._param.source_lang - target_lang = self._param.target_lang - appid = self._param.appid - salt = random.randint(32768, 65536) - secret_key = self._param.secret_key - - if self._param.trans_type == 'translate': - sign = md5((appid + ans + salt + secret_key).encode('utf-8')).hexdigest() - url = 'http://api.fanyi.baidu.com/api/trans/vip/translate?' + 'q=' + ans + '&from=' + source_lang + '&to=' + target_lang + '&appid=' + appid + '&salt=' + salt + '&sign=' + sign - headers = {"Content-Type": "application/x-www-form-urlencoded"} - response = requests.post(url=url, headers=headers).json() - - if response.get('error_code'): - BaiduFanyi.be_output("**Error**:" + response['error_msg']) - - return BaiduFanyi.be_output(response['trans_result'][0]['dst']) - elif self._param.trans_type == 'fieldtranslate': - domain = self._param.domain - sign = md5((appid + ans + salt + domain + secret_key).encode('utf-8')).hexdigest() - url = 'http://api.fanyi.baidu.com/api/trans/vip/fieldtranslate?' + 'q=' + ans + '&from=' + source_lang + '&to=' + target_lang + '&appid=' + appid + '&salt=' + salt + '&domain=' + domain + '&sign=' + sign - headers = {"Content-Type": "application/x-www-form-urlencoded"} - response = requests.post(url=url, headers=headers).json() - - if response.get('error_code'): - BaiduFanyi.be_output("**Error**:" + response['error_msg']) - - return BaiduFanyi.be_output(response['trans_result'][0]['dst']) - - except Exception as e: - BaiduFanyi.be_output("**Error**:" + str(e)) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import random +from abc import ABC +import requests +from agent.component.base import ComponentBase, ComponentParamBase +from hashlib import md5 + + +class BaiduFanyiParam(ComponentParamBase): + """ + Define the BaiduFanyi component parameters. + """ + + def __init__(self): + super().__init__() + self.appid = "xxx" + self.secret_key = "xxx" + self.trans_type = 'translate' + self.parameters = [] + self.source_lang = 'auto' + self.target_lang = 'auto' + self.domain = 'finance' + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + self.check_empty(self.appid, "BaiduFanyi APPID") + self.check_empty(self.secret_key, "BaiduFanyi Secret Key") + self.check_valid_value(self.trans_type, "Translate type", ['translate', 'fieldtranslate']) + self.check_valid_value(self.trans_type, "Translate domain", + ['it', 'finance', 'machinery', 'senimed', 'novel', 'academic', 'aerospace', 'wiki', + 'news', 'law', 'contract']) + self.check_valid_value(self.source_lang, "Source language", + ['auto', 'zh', 'en', 'yue', 'wyw', 'jp', 'kor', 'fra', 'spa', 'th', 'ara', 'ru', 'pt', + 'de', 'it', 'el', 'nl', 'pl', 'bul', 'est', 'dan', 'fin', 'cs', 'rom', 'slo', 'swe', + 'hu', 'cht', 'vie']) + self.check_valid_value(self.target_lang, "Target language", + ['auto', 'zh', 'en', 'yue', 'wyw', 'jp', 'kor', 'fra', 'spa', 'th', 'ara', 'ru', 'pt', + 'de', 'it', 'el', 'nl', 'pl', 'bul', 'est', 'dan', 'fin', 'cs', 'rom', 'slo', 'swe', + 'hu', 'cht', 'vie']) + self.check_valid_value(self.domain, "Translate field", + ['it', 'finance', 'machinery', 'senimed', 'novel', 'academic', 'aerospace', 'wiki', + 'news', 'law', 'contract']) + + +class BaiduFanyi(ComponentBase, ABC): + component_name = "BaiduFanyi" + + def _run(self, history, **kwargs): + + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return BaiduFanyi.be_output("") + + try: + source_lang = self._param.source_lang + target_lang = self._param.target_lang + appid = self._param.appid + salt = random.randint(32768, 65536) + secret_key = self._param.secret_key + + if self._param.trans_type == 'translate': + sign = md5((appid + ans + salt + secret_key).encode('utf-8')).hexdigest() + url = 'http://api.fanyi.baidu.com/api/trans/vip/translate?' + 'q=' + ans + '&from=' + source_lang + '&to=' + target_lang + '&appid=' + appid + '&salt=' + salt + '&sign=' + sign + headers = {"Content-Type": "application/x-www-form-urlencoded"} + response = requests.post(url=url, headers=headers).json() + + if response.get('error_code'): + BaiduFanyi.be_output("**Error**:" + response['error_msg']) + + return BaiduFanyi.be_output(response['trans_result'][0]['dst']) + elif self._param.trans_type == 'fieldtranslate': + domain = self._param.domain + sign = md5((appid + ans + salt + domain + secret_key).encode('utf-8')).hexdigest() + url = 'http://api.fanyi.baidu.com/api/trans/vip/fieldtranslate?' + 'q=' + ans + '&from=' + source_lang + '&to=' + target_lang + '&appid=' + appid + '&salt=' + salt + '&domain=' + domain + '&sign=' + sign + headers = {"Content-Type": "application/x-www-form-urlencoded"} + response = requests.post(url=url, headers=headers).json() + + if response.get('error_code'): + BaiduFanyi.be_output("**Error**:" + response['error_msg']) + + return BaiduFanyi.be_output(response['trans_result'][0]['dst']) + + except Exception as e: + BaiduFanyi.be_output("**Error**:" + str(e)) diff --git a/agent/component/bing.py b/agent/component/bing.py index 14fce05559e63db60d7cef3be496fd3b35b10670..dce3c7ea827a9c1cec0fc3e6b8381ca6855e4953 100644 --- a/agent/component/bing.py +++ b/agent/component/bing.py @@ -1,85 +1,85 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -import requests -import pandas as pd -from agent.settings import DEBUG -from agent.component.base import ComponentBase, ComponentParamBase - - -class BingParam(ComponentParamBase): - """ - Define the Bing component parameters. - """ - - def __init__(self): - super().__init__() - self.top_n = 10 - self.channel = "Webpages" - self.api_key = "YOUR_ACCESS_KEY" - self.country = "CN" - self.language = "en" - - def check(self): - self.check_positive_integer(self.top_n, "Top N") - self.check_valid_value(self.channel, "Bing Web Search or Bing News", ["Webpages", "News"]) - self.check_empty(self.api_key, "Bing subscription key") - self.check_valid_value(self.country, "Bing Country", - ['AR', 'AU', 'AT', 'BE', 'BR', 'CA', 'CL', 'DK', 'FI', 'FR', 'DE', 'HK', 'IN', 'ID', - 'IT', 'JP', 'KR', 'MY', 'MX', 'NL', 'NZ', 'NO', 'CN', 'PL', 'PT', 'PH', 'RU', 'SA', - 'ZA', 'ES', 'SE', 'CH', 'TW', 'TR', 'GB', 'US']) - self.check_valid_value(self.language, "Bing Languages", - ['ar', 'eu', 'bn', 'bg', 'ca', 'ns', 'nt', 'hr', 'cs', 'da', 'nl', 'en', 'gb', 'et', - 'fi', 'fr', 'gl', 'de', 'gu', 'he', 'hi', 'hu', 'is', 'it', 'jp', 'kn', 'ko', 'lv', - 'lt', 'ms', 'ml', 'mr', 'nb', 'pl', 'br', 'pt', 'pa', 'ro', 'ru', 'sr', 'sk', 'sl', - 'es', 'sv', 'ta', 'te', 'th', 'tr', 'uk', 'vi']) - - -class Bing(ComponentBase, ABC): - component_name = "Bing" - - def _run(self, history, **kwargs): - ans = self.get_input() - ans = " - ".join(ans["content"]) if "content" in ans else "" - if not ans: - return Bing.be_output("") - - try: - headers = {"Ocp-Apim-Subscription-Key": self._param.api_key, 'Accept-Language': self._param.language} - params = {"q": ans, "textDecorations": True, "textFormat": "HTML", "cc": self._param.country, - "answerCount": 1, "promote": self._param.channel} - if self._param.channel == "Webpages": - response = requests.get("https://api.bing.microsoft.com/v7.0/search", headers=headers, params=params) - response.raise_for_status() - search_results = response.json() - bing_res = [{"content": '' + i["name"] + ' ' + i["snippet"]} for i in - search_results["webPages"]["value"]] - elif self._param.channel == "News": - response = requests.get("https://api.bing.microsoft.com/v7.0/news/search", headers=headers, - params=params) - response.raise_for_status() - search_results = response.json() - bing_res = [{"content": '' + i["name"] + ' ' + i["description"]} for i - in search_results['news']['value']] - except Exception as e: - return Bing.be_output("**ERROR**: " + str(e)) - - if not bing_res: - return Bing.be_output("") - - df = pd.DataFrame(bing_res) - if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") - return df +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +import requests +import pandas as pd +from agent.settings import DEBUG +from agent.component.base import ComponentBase, ComponentParamBase + + +class BingParam(ComponentParamBase): + """ + Define the Bing component parameters. + """ + + def __init__(self): + super().__init__() + self.top_n = 10 + self.channel = "Webpages" + self.api_key = "YOUR_ACCESS_KEY" + self.country = "CN" + self.language = "en" + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + self.check_valid_value(self.channel, "Bing Web Search or Bing News", ["Webpages", "News"]) + self.check_empty(self.api_key, "Bing subscription key") + self.check_valid_value(self.country, "Bing Country", + ['AR', 'AU', 'AT', 'BE', 'BR', 'CA', 'CL', 'DK', 'FI', 'FR', 'DE', 'HK', 'IN', 'ID', + 'IT', 'JP', 'KR', 'MY', 'MX', 'NL', 'NZ', 'NO', 'CN', 'PL', 'PT', 'PH', 'RU', 'SA', + 'ZA', 'ES', 'SE', 'CH', 'TW', 'TR', 'GB', 'US']) + self.check_valid_value(self.language, "Bing Languages", + ['ar', 'eu', 'bn', 'bg', 'ca', 'ns', 'nt', 'hr', 'cs', 'da', 'nl', 'en', 'gb', 'et', + 'fi', 'fr', 'gl', 'de', 'gu', 'he', 'hi', 'hu', 'is', 'it', 'jp', 'kn', 'ko', 'lv', + 'lt', 'ms', 'ml', 'mr', 'nb', 'pl', 'br', 'pt', 'pa', 'ro', 'ru', 'sr', 'sk', 'sl', + 'es', 'sv', 'ta', 'te', 'th', 'tr', 'uk', 'vi']) + + +class Bing(ComponentBase, ABC): + component_name = "Bing" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return Bing.be_output("") + + try: + headers = {"Ocp-Apim-Subscription-Key": self._param.api_key, 'Accept-Language': self._param.language} + params = {"q": ans, "textDecorations": True, "textFormat": "HTML", "cc": self._param.country, + "answerCount": 1, "promote": self._param.channel} + if self._param.channel == "Webpages": + response = requests.get("https://api.bing.microsoft.com/v7.0/search", headers=headers, params=params) + response.raise_for_status() + search_results = response.json() + bing_res = [{"content": '' + i["name"] + ' ' + i["snippet"]} for i in + search_results["webPages"]["value"]] + elif self._param.channel == "News": + response = requests.get("https://api.bing.microsoft.com/v7.0/news/search", headers=headers, + params=params) + response.raise_for_status() + search_results = response.json() + bing_res = [{"content": '' + i["name"] + ' ' + i["description"]} for i + in search_results['news']['value']] + except Exception as e: + return Bing.be_output("**ERROR**: " + str(e)) + + if not bing_res: + return Bing.be_output("") + + df = pd.DataFrame(bing_res) + if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") + return df diff --git a/agent/component/deepl.py b/agent/component/deepl.py index 50f27b82c9d4f5311f96c56aa954c9be43a8aa1a..d5247735a81e2a8aee7adbad2d36e887fab76768 100644 --- a/agent/component/deepl.py +++ b/agent/component/deepl.py @@ -1,62 +1,62 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -import re -from agent.component.base import ComponentBase, ComponentParamBase -import deepl - - -class DeepLParam(ComponentParamBase): - """ - Define the DeepL component parameters. - """ - - def __init__(self): - super().__init__() - self.auth_key = "xxx" - self.parameters = [] - self.source_lang = 'ZH' - self.target_lang = 'EN-GB' - - def check(self): - self.check_positive_integer(self.top_n, "Top N") - self.check_valid_value(self.source_lang, "Source language", - ['AR', 'BG', 'CS', 'DA', 'DE', 'EL', 'EN', 'ES', 'ET', 'FI', 'FR', 'HU', 'ID', 'IT', - 'JA', 'KO', 'LT', 'LV', 'NB', 'NL', 'PL', 'PT', 'RO', 'RU', 'SK', 'SL', 'SV', 'TR', - 'UK', 'ZH']) - self.check_valid_value(self.target_lang, "Target language", - ['AR', 'BG', 'CS', 'DA', 'DE', 'EL', 'EN-GB', 'EN-US', 'ES', 'ET', 'FI', 'FR', 'HU', - 'ID', 'IT', 'JA', 'KO', 'LT', 'LV', 'NB', 'NL', 'PL', 'PT-BR', 'PT-PT', 'RO', 'RU', - 'SK', 'SL', 'SV', 'TR', 'UK', 'ZH']) - - -class DeepL(ComponentBase, ABC): - component_name = "GitHub" - - def _run(self, history, **kwargs): - ans = self.get_input() - ans = " - ".join(ans["content"]) if "content" in ans else "" - if not ans: - return DeepL.be_output("") - - try: - translator = deepl.Translator(self._param.auth_key) - result = translator.translate_text(ans, source_lang=self._param.source_lang, - target_lang=self._param.target_lang) - - return DeepL.be_output(result.text) - except Exception as e: - DeepL.be_output("**Error**:" + str(e)) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +import re +from agent.component.base import ComponentBase, ComponentParamBase +import deepl + + +class DeepLParam(ComponentParamBase): + """ + Define the DeepL component parameters. + """ + + def __init__(self): + super().__init__() + self.auth_key = "xxx" + self.parameters = [] + self.source_lang = 'ZH' + self.target_lang = 'EN-GB' + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + self.check_valid_value(self.source_lang, "Source language", + ['AR', 'BG', 'CS', 'DA', 'DE', 'EL', 'EN', 'ES', 'ET', 'FI', 'FR', 'HU', 'ID', 'IT', + 'JA', 'KO', 'LT', 'LV', 'NB', 'NL', 'PL', 'PT', 'RO', 'RU', 'SK', 'SL', 'SV', 'TR', + 'UK', 'ZH']) + self.check_valid_value(self.target_lang, "Target language", + ['AR', 'BG', 'CS', 'DA', 'DE', 'EL', 'EN-GB', 'EN-US', 'ES', 'ET', 'FI', 'FR', 'HU', + 'ID', 'IT', 'JA', 'KO', 'LT', 'LV', 'NB', 'NL', 'PL', 'PT-BR', 'PT-PT', 'RO', 'RU', + 'SK', 'SL', 'SV', 'TR', 'UK', 'ZH']) + + +class DeepL(ComponentBase, ABC): + component_name = "GitHub" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return DeepL.be_output("") + + try: + translator = deepl.Translator(self._param.auth_key) + result = translator.translate_text(ans, source_lang=self._param.source_lang, + target_lang=self._param.target_lang) + + return DeepL.be_output(result.text) + except Exception as e: + DeepL.be_output("**Error**:" + str(e)) diff --git a/agent/component/github.py b/agent/component/github.py index 98180431e96de7e9e6ba390e5489134fee87970a..5e56340e6c42be6be37f557cfa760eb9d1416eae 100644 --- a/agent/component/github.py +++ b/agent/component/github.py @@ -1,61 +1,61 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -import pandas as pd -import requests -from agent.settings import DEBUG -from agent.component.base import ComponentBase, ComponentParamBase - - -class GitHubParam(ComponentParamBase): - """ - Define the GitHub component parameters. - """ - - def __init__(self): - super().__init__() - self.top_n = 10 - - def check(self): - self.check_positive_integer(self.top_n, "Top N") - - -class GitHub(ComponentBase, ABC): - component_name = "GitHub" - - def _run(self, history, **kwargs): - ans = self.get_input() - ans = " - ".join(ans["content"]) if "content" in ans else "" - if not ans: - return GitHub.be_output("") - - try: - url = 'https://api.github.com/search/repositories?q=' + ans + '&sort=stars&order=desc&per_page=' + str( - self._param.top_n) - headers = {"Content-Type": "application/vnd.github+json", "X-GitHub-Api-Version": '2022-11-28'} - response = requests.get(url=url, headers=headers).json() - - github_res = [{"content": '' + i["name"] + '' + str( - i["description"]) + '\n stars:' + str(i['watchers'])} for i in response['items']] - except Exception as e: - return GitHub.be_output("**ERROR**: " + str(e)) - - if not github_res: - return GitHub.be_output("") - - df = pd.DataFrame(github_res) - if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") - return df +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +import pandas as pd +import requests +from agent.settings import DEBUG +from agent.component.base import ComponentBase, ComponentParamBase + + +class GitHubParam(ComponentParamBase): + """ + Define the GitHub component parameters. + """ + + def __init__(self): + super().__init__() + self.top_n = 10 + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + + +class GitHub(ComponentBase, ABC): + component_name = "GitHub" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return GitHub.be_output("") + + try: + url = 'https://api.github.com/search/repositories?q=' + ans + '&sort=stars&order=desc&per_page=' + str( + self._param.top_n) + headers = {"Content-Type": "application/vnd.github+json", "X-GitHub-Api-Version": '2022-11-28'} + response = requests.get(url=url, headers=headers).json() + + github_res = [{"content": '' + i["name"] + '' + str( + i["description"]) + '\n stars:' + str(i['watchers'])} for i in response['items']] + except Exception as e: + return GitHub.be_output("**ERROR**: " + str(e)) + + if not github_res: + return GitHub.be_output("") + + df = pd.DataFrame(github_res) + if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") + return df diff --git a/agent/component/google.py b/agent/component/google.py index 3ac477040168053675ce7cd44cbbf9c3e6393622..a6ff7d281d978460772a9c18287f8cebb7233868 100644 --- a/agent/component/google.py +++ b/agent/component/google.py @@ -1,96 +1,96 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -from serpapi import GoogleSearch -import pandas as pd -from agent.settings import DEBUG -from agent.component.base import ComponentBase, ComponentParamBase - - -class GoogleParam(ComponentParamBase): - """ - Define the Google component parameters. - """ - - def __init__(self): - super().__init__() - self.top_n = 10 - self.api_key = "xxx" - self.country = "cn" - self.language = "en" - - def check(self): - self.check_positive_integer(self.top_n, "Top N") - self.check_empty(self.api_key, "SerpApi API key") - self.check_valid_value(self.country, "Google Country", - ['af', 'al', 'dz', 'as', 'ad', 'ao', 'ai', 'aq', 'ag', 'ar', 'am', 'aw', 'au', 'at', - 'az', 'bs', 'bh', 'bd', 'bb', 'by', 'be', 'bz', 'bj', 'bm', 'bt', 'bo', 'ba', 'bw', - 'bv', 'br', 'io', 'bn', 'bg', 'bf', 'bi', 'kh', 'cm', 'ca', 'cv', 'ky', 'cf', 'td', - 'cl', 'cn', 'cx', 'cc', 'co', 'km', 'cg', 'cd', 'ck', 'cr', 'ci', 'hr', 'cu', 'cy', - 'cz', 'dk', 'dj', 'dm', 'do', 'ec', 'eg', 'sv', 'gq', 'er', 'ee', 'et', 'fk', 'fo', - 'fj', 'fi', 'fr', 'gf', 'pf', 'tf', 'ga', 'gm', 'ge', 'de', 'gh', 'gi', 'gr', 'gl', - 'gd', 'gp', 'gu', 'gt', 'gn', 'gw', 'gy', 'ht', 'hm', 'va', 'hn', 'hk', 'hu', 'is', - 'in', 'id', 'ir', 'iq', 'ie', 'il', 'it', 'jm', 'jp', 'jo', 'kz', 'ke', 'ki', 'kp', - 'kr', 'kw', 'kg', 'la', 'lv', 'lb', 'ls', 'lr', 'ly', 'li', 'lt', 'lu', 'mo', 'mk', - 'mg', 'mw', 'my', 'mv', 'ml', 'mt', 'mh', 'mq', 'mr', 'mu', 'yt', 'mx', 'fm', 'md', - 'mc', 'mn', 'ms', 'ma', 'mz', 'mm', 'na', 'nr', 'np', 'nl', 'an', 'nc', 'nz', 'ni', - 'ne', 'ng', 'nu', 'nf', 'mp', 'no', 'om', 'pk', 'pw', 'ps', 'pa', 'pg', 'py', 'pe', - 'ph', 'pn', 'pl', 'pt', 'pr', 'qa', 're', 'ro', 'ru', 'rw', 'sh', 'kn', 'lc', 'pm', - 'vc', 'ws', 'sm', 'st', 'sa', 'sn', 'rs', 'sc', 'sl', 'sg', 'sk', 'si', 'sb', 'so', - 'za', 'gs', 'es', 'lk', 'sd', 'sr', 'sj', 'sz', 'se', 'ch', 'sy', 'tw', 'tj', 'tz', - 'th', 'tl', 'tg', 'tk', 'to', 'tt', 'tn', 'tr', 'tm', 'tc', 'tv', 'ug', 'ua', 'ae', - 'uk', 'gb', 'us', 'um', 'uy', 'uz', 'vu', 've', 'vn', 'vg', 'vi', 'wf', 'eh', 'ye', - 'zm', 'zw']) - self.check_valid_value(self.language, "Google languages", - ['af', 'ak', 'sq', 'ws', 'am', 'ar', 'hy', 'az', 'eu', 'be', 'bem', 'bn', 'bh', - 'xx-bork', 'bs', 'br', 'bg', 'bt', 'km', 'ca', 'chr', 'ny', 'zh-cn', 'zh-tw', 'co', - 'hr', 'cs', 'da', 'nl', 'xx-elmer', 'en', 'eo', 'et', 'ee', 'fo', 'tl', 'fi', 'fr', - 'fy', 'gaa', 'gl', 'ka', 'de', 'el', 'kl', 'gn', 'gu', 'xx-hacker', 'ht', 'ha', 'haw', - 'iw', 'hi', 'hu', 'is', 'ig', 'id', 'ia', 'ga', 'it', 'ja', 'jw', 'kn', 'kk', 'rw', - 'rn', 'xx-klingon', 'kg', 'ko', 'kri', 'ku', 'ckb', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', - 'loz', 'lg', 'ach', 'mk', 'mg', 'ms', 'ml', 'mt', 'mv', 'mi', 'mr', 'mfe', 'mo', 'mn', - 'sr-me', 'my', 'ne', 'pcm', 'nso', 'no', 'nn', 'oc', 'or', 'om', 'ps', 'fa', - 'xx-pirate', 'pl', 'pt', 'pt-br', 'pt-pt', 'pa', 'qu', 'ro', 'rm', 'nyn', 'ru', 'gd', - 'sr', 'sh', 'st', 'tn', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'es', 'es-419', 'su', - 'sw', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'ti', 'to', 'lua', 'tum', 'tr', 'tk', 'tw', - 'ug', 'uk', 'ur', 'uz', 'vu', 'vi', 'cy', 'wo', 'xh', 'yi', 'yo', 'zu'] - ) - - -class Google(ComponentBase, ABC): - component_name = "Google" - - def _run(self, history, **kwargs): - ans = self.get_input() - ans = " - ".join(ans["content"]) if "content" in ans else "" - if not ans: - return Google.be_output("") - - try: - client = GoogleSearch( - {"engine": "google", "q": ans, "api_key": self._param.api_key, "gl": self._param.country, - "hl": self._param.language, "num": self._param.top_n}) - google_res = [{"content": '' + i["title"] + ' ' + i["snippet"]} for i in - client.get_dict()["organic_results"]] - except Exception as e: - return Google.be_output("**ERROR**: Existing Unavailable Parameters!") - - if not google_res: - return Google.be_output("") - - df = pd.DataFrame(google_res) - if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") - return df +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +from serpapi import GoogleSearch +import pandas as pd +from agent.settings import DEBUG +from agent.component.base import ComponentBase, ComponentParamBase + + +class GoogleParam(ComponentParamBase): + """ + Define the Google component parameters. + """ + + def __init__(self): + super().__init__() + self.top_n = 10 + self.api_key = "xxx" + self.country = "cn" + self.language = "en" + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + self.check_empty(self.api_key, "SerpApi API key") + self.check_valid_value(self.country, "Google Country", + ['af', 'al', 'dz', 'as', 'ad', 'ao', 'ai', 'aq', 'ag', 'ar', 'am', 'aw', 'au', 'at', + 'az', 'bs', 'bh', 'bd', 'bb', 'by', 'be', 'bz', 'bj', 'bm', 'bt', 'bo', 'ba', 'bw', + 'bv', 'br', 'io', 'bn', 'bg', 'bf', 'bi', 'kh', 'cm', 'ca', 'cv', 'ky', 'cf', 'td', + 'cl', 'cn', 'cx', 'cc', 'co', 'km', 'cg', 'cd', 'ck', 'cr', 'ci', 'hr', 'cu', 'cy', + 'cz', 'dk', 'dj', 'dm', 'do', 'ec', 'eg', 'sv', 'gq', 'er', 'ee', 'et', 'fk', 'fo', + 'fj', 'fi', 'fr', 'gf', 'pf', 'tf', 'ga', 'gm', 'ge', 'de', 'gh', 'gi', 'gr', 'gl', + 'gd', 'gp', 'gu', 'gt', 'gn', 'gw', 'gy', 'ht', 'hm', 'va', 'hn', 'hk', 'hu', 'is', + 'in', 'id', 'ir', 'iq', 'ie', 'il', 'it', 'jm', 'jp', 'jo', 'kz', 'ke', 'ki', 'kp', + 'kr', 'kw', 'kg', 'la', 'lv', 'lb', 'ls', 'lr', 'ly', 'li', 'lt', 'lu', 'mo', 'mk', + 'mg', 'mw', 'my', 'mv', 'ml', 'mt', 'mh', 'mq', 'mr', 'mu', 'yt', 'mx', 'fm', 'md', + 'mc', 'mn', 'ms', 'ma', 'mz', 'mm', 'na', 'nr', 'np', 'nl', 'an', 'nc', 'nz', 'ni', + 'ne', 'ng', 'nu', 'nf', 'mp', 'no', 'om', 'pk', 'pw', 'ps', 'pa', 'pg', 'py', 'pe', + 'ph', 'pn', 'pl', 'pt', 'pr', 'qa', 're', 'ro', 'ru', 'rw', 'sh', 'kn', 'lc', 'pm', + 'vc', 'ws', 'sm', 'st', 'sa', 'sn', 'rs', 'sc', 'sl', 'sg', 'sk', 'si', 'sb', 'so', + 'za', 'gs', 'es', 'lk', 'sd', 'sr', 'sj', 'sz', 'se', 'ch', 'sy', 'tw', 'tj', 'tz', + 'th', 'tl', 'tg', 'tk', 'to', 'tt', 'tn', 'tr', 'tm', 'tc', 'tv', 'ug', 'ua', 'ae', + 'uk', 'gb', 'us', 'um', 'uy', 'uz', 'vu', 've', 'vn', 'vg', 'vi', 'wf', 'eh', 'ye', + 'zm', 'zw']) + self.check_valid_value(self.language, "Google languages", + ['af', 'ak', 'sq', 'ws', 'am', 'ar', 'hy', 'az', 'eu', 'be', 'bem', 'bn', 'bh', + 'xx-bork', 'bs', 'br', 'bg', 'bt', 'km', 'ca', 'chr', 'ny', 'zh-cn', 'zh-tw', 'co', + 'hr', 'cs', 'da', 'nl', 'xx-elmer', 'en', 'eo', 'et', 'ee', 'fo', 'tl', 'fi', 'fr', + 'fy', 'gaa', 'gl', 'ka', 'de', 'el', 'kl', 'gn', 'gu', 'xx-hacker', 'ht', 'ha', 'haw', + 'iw', 'hi', 'hu', 'is', 'ig', 'id', 'ia', 'ga', 'it', 'ja', 'jw', 'kn', 'kk', 'rw', + 'rn', 'xx-klingon', 'kg', 'ko', 'kri', 'ku', 'ckb', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', + 'loz', 'lg', 'ach', 'mk', 'mg', 'ms', 'ml', 'mt', 'mv', 'mi', 'mr', 'mfe', 'mo', 'mn', + 'sr-me', 'my', 'ne', 'pcm', 'nso', 'no', 'nn', 'oc', 'or', 'om', 'ps', 'fa', + 'xx-pirate', 'pl', 'pt', 'pt-br', 'pt-pt', 'pa', 'qu', 'ro', 'rm', 'nyn', 'ru', 'gd', + 'sr', 'sh', 'st', 'tn', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'es', 'es-419', 'su', + 'sw', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'ti', 'to', 'lua', 'tum', 'tr', 'tk', 'tw', + 'ug', 'uk', 'ur', 'uz', 'vu', 'vi', 'cy', 'wo', 'xh', 'yi', 'yo', 'zu'] + ) + + +class Google(ComponentBase, ABC): + component_name = "Google" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return Google.be_output("") + + try: + client = GoogleSearch( + {"engine": "google", "q": ans, "api_key": self._param.api_key, "gl": self._param.country, + "hl": self._param.language, "num": self._param.top_n}) + google_res = [{"content": '' + i["title"] + ' ' + i["snippet"]} for i in + client.get_dict()["organic_results"]] + except Exception as e: + return Google.be_output("**ERROR**: Existing Unavailable Parameters!") + + if not google_res: + return Google.be_output("") + + df = pd.DataFrame(google_res) + if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") + return df diff --git a/agent/component/googlescholar.py b/agent/component/googlescholar.py index f895c6cf1fe0f271369141882cdb429c39232fdb..19e1110e26b28a66ddb3b8f4050da8a9dc438698 100644 --- a/agent/component/googlescholar.py +++ b/agent/component/googlescholar.py @@ -1,70 +1,70 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -import pandas as pd -from agent.settings import DEBUG -from agent.component.base import ComponentBase, ComponentParamBase -from scholarly import scholarly - - -class GoogleScholarParam(ComponentParamBase): - """ - Define the GoogleScholar component parameters. - """ - - def __init__(self): - super().__init__() - self.top_n = 6 - self.sort_by = 'relevance' - self.year_low = None - self.year_high = None - self.patents = True - - def check(self): - self.check_positive_integer(self.top_n, "Top N") - self.check_valid_value(self.sort_by, "GoogleScholar Sort_by", ['date', 'relevance']) - self.check_boolean(self.patents, "Whether or not to include patents, defaults to True") - - -class GoogleScholar(ComponentBase, ABC): - component_name = "GoogleScholar" - - def _run(self, history, **kwargs): - ans = self.get_input() - ans = " - ".join(ans["content"]) if "content" in ans else "" - if not ans: - return GoogleScholar.be_output("") - - scholar_client = scholarly.search_pubs(ans, patents=self._param.patents, year_low=self._param.year_low, - year_high=self._param.year_high, sort_by=self._param.sort_by) - scholar_res = [] - for i in range(self._param.top_n): - try: - pub = next(scholar_client) - scholar_res.append({"content": 'Title: ' + pub['bib']['title'] + '\n_Url: ' + "\n author: " + ",".join(pub['bib']['author']) + '\n Abstract: ' + pub[ - 'bib'].get('abstract', 'no abstract')}) - - except StopIteration or Exception as e: - print("**ERROR** " + str(e)) - break - - if not scholar_res: - return GoogleScholar.be_output("") - - df = pd.DataFrame(scholar_res) - if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") - return df +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +import pandas as pd +from agent.settings import DEBUG +from agent.component.base import ComponentBase, ComponentParamBase +from scholarly import scholarly + + +class GoogleScholarParam(ComponentParamBase): + """ + Define the GoogleScholar component parameters. + """ + + def __init__(self): + super().__init__() + self.top_n = 6 + self.sort_by = 'relevance' + self.year_low = None + self.year_high = None + self.patents = True + + def check(self): + self.check_positive_integer(self.top_n, "Top N") + self.check_valid_value(self.sort_by, "GoogleScholar Sort_by", ['date', 'relevance']) + self.check_boolean(self.patents, "Whether or not to include patents, defaults to True") + + +class GoogleScholar(ComponentBase, ABC): + component_name = "GoogleScholar" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = " - ".join(ans["content"]) if "content" in ans else "" + if not ans: + return GoogleScholar.be_output("") + + scholar_client = scholarly.search_pubs(ans, patents=self._param.patents, year_low=self._param.year_low, + year_high=self._param.year_high, sort_by=self._param.sort_by) + scholar_res = [] + for i in range(self._param.top_n): + try: + pub = next(scholar_client) + scholar_res.append({"content": 'Title: ' + pub['bib']['title'] + '\n_Url: ' + "\n author: " + ",".join(pub['bib']['author']) + '\n Abstract: ' + pub[ + 'bib'].get('abstract', 'no abstract')}) + + except StopIteration or Exception as e: + print("**ERROR** " + str(e)) + break + + if not scholar_res: + return GoogleScholar.be_output("") + + df = pd.DataFrame(scholar_res) + if DEBUG: print(df, ":::::::::::::::::::::::::::::::::") + return df diff --git a/agent/component/qweather.py b/agent/component/qweather.py index cba07a4d8adb71b94ac5df4e299e298e4f6c4e43..833e2922c3477cec659771f8249ef7581c17e814 100644 --- a/agent/component/qweather.py +++ b/agent/component/qweather.py @@ -1,111 +1,111 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from abc import ABC -import pandas as pd -import requests -from agent.component.base import ComponentBase, ComponentParamBase - - -class QWeatherParam(ComponentParamBase): - """ - Define the QWeather component parameters. - """ - - def __init__(self): - super().__init__() - self.web_apikey = "xxx" - self.lang = "zh" - self.type = "weather" - self.user_type = 'free' - self.error_code = { - "204": "The request was successful, but the region you are querying does not have the data you need at this time.", - "400": "Request error, may contain incorrect request parameters or missing mandatory request parameters.", - "401": "Authentication fails, possibly using the wrong KEY, wrong digital signature, wrong type of KEY (e.g. using the SDK's KEY to access the Web API).", - "402": "Exceeded the number of accesses or the balance is not enough to support continued access to the service, you can recharge, upgrade the accesses or wait for the accesses to be reset.", - "403": "No access, may be the binding PackageName, BundleID, domain IP address is inconsistent, or the data that requires additional payment.", - "404": "The queried data or region does not exist.", - "429": "Exceeded the limited QPM (number of accesses per minute), please refer to the QPM description", - "500": "No response or timeout, interface service abnormality please contact us" - } - # Weather - self.time_period = 'now' - - def check(self): - self.check_empty(self.web_apikey, "BaiduFanyi APPID") - self.check_valid_value(self.type, "Type", ["weather", "indices", "airquality"]) - self.check_valid_value(self.user_type, "Free subscription or paid subscription", ["free", "paid"]) - self.check_valid_value(self.lang, "Use language", - ['zh', 'zh-hant', 'en', 'de', 'es', 'fr', 'it', 'ja', 'ko', 'ru', 'hi', 'th', 'ar', 'pt', - 'bn', 'ms', 'nl', 'el', 'la', 'sv', 'id', 'pl', 'tr', 'cs', 'et', 'vi', 'fil', 'fi', - 'he', 'is', 'nb']) - self.check_vaild_value(self.time_period, "Time period", ['now', '3d', '7d', '10d', '15d', '30d']) - - -class QWeather(ComponentBase, ABC): - component_name = "QWeather" - - def _run(self, history, **kwargs): - ans = self.get_input() - ans = "".join(ans["content"]) if "content" in ans else "" - if not ans: - return QWeather.be_output("") - - try: - response = requests.get( - url="https://geoapi.qweather.com/v2/city/lookup?location=" + ans + "&key=" + self._param.web_apikey).json() - if response["code"] == "200": - location_id = response["location"][0]["id"] - else: - return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) - - base_url = "https://api.qweather.com/v7/" if self._param.user_type == 'paid' else "https://devapi.qweather.com/v7/" - - if self._param.type == "weather": - url = base_url + "weather/" + self._param.time_period + "?location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang - response = requests.get(url=url).json() - if response["code"] == "200": - if self._param.time_period == "now": - return QWeather.be_output(str(response["now"])) - else: - qweather_res = [{"content": str(i) + "\n"} for i in response["daily"]] - if not qweather_res: - return QWeather.be_output("") - - df = pd.DataFrame(qweather_res) - return df - else: - return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) - - elif self._param.type == "indices": - url = base_url + "indices/1d?type=0&location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang - response = requests.get(url=url).json() - if response["code"] == "200": - indices_res = response["daily"][0]["date"] + "\n" + "\n".join( - [i["name"] + ": " + i["category"] + ", " + i["text"] for i in response["daily"]]) - return QWeather.be_output(indices_res) - - else: - return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) - - elif self._param.type == "airquality": - url = base_url + "air/now?location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang - response = requests.get(url=url).json() - if response["code"] == "200": - return QWeather.be_output(str(response["now"])) - else: - return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) - except Exception as e: - return QWeather.be_output("**Error**" + str(e)) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from abc import ABC +import pandas as pd +import requests +from agent.component.base import ComponentBase, ComponentParamBase + + +class QWeatherParam(ComponentParamBase): + """ + Define the QWeather component parameters. + """ + + def __init__(self): + super().__init__() + self.web_apikey = "xxx" + self.lang = "zh" + self.type = "weather" + self.user_type = 'free' + self.error_code = { + "204": "The request was successful, but the region you are querying does not have the data you need at this time.", + "400": "Request error, may contain incorrect request parameters or missing mandatory request parameters.", + "401": "Authentication fails, possibly using the wrong KEY, wrong digital signature, wrong type of KEY (e.g. using the SDK's KEY to access the Web API).", + "402": "Exceeded the number of accesses or the balance is not enough to support continued access to the service, you can recharge, upgrade the accesses or wait for the accesses to be reset.", + "403": "No access, may be the binding PackageName, BundleID, domain IP address is inconsistent, or the data that requires additional payment.", + "404": "The queried data or region does not exist.", + "429": "Exceeded the limited QPM (number of accesses per minute), please refer to the QPM description", + "500": "No response or timeout, interface service abnormality please contact us" + } + # Weather + self.time_period = 'now' + + def check(self): + self.check_empty(self.web_apikey, "BaiduFanyi APPID") + self.check_valid_value(self.type, "Type", ["weather", "indices", "airquality"]) + self.check_valid_value(self.user_type, "Free subscription or paid subscription", ["free", "paid"]) + self.check_valid_value(self.lang, "Use language", + ['zh', 'zh-hant', 'en', 'de', 'es', 'fr', 'it', 'ja', 'ko', 'ru', 'hi', 'th', 'ar', 'pt', + 'bn', 'ms', 'nl', 'el', 'la', 'sv', 'id', 'pl', 'tr', 'cs', 'et', 'vi', 'fil', 'fi', + 'he', 'is', 'nb']) + self.check_vaild_value(self.time_period, "Time period", ['now', '3d', '7d', '10d', '15d', '30d']) + + +class QWeather(ComponentBase, ABC): + component_name = "QWeather" + + def _run(self, history, **kwargs): + ans = self.get_input() + ans = "".join(ans["content"]) if "content" in ans else "" + if not ans: + return QWeather.be_output("") + + try: + response = requests.get( + url="https://geoapi.qweather.com/v2/city/lookup?location=" + ans + "&key=" + self._param.web_apikey).json() + if response["code"] == "200": + location_id = response["location"][0]["id"] + else: + return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) + + base_url = "https://api.qweather.com/v7/" if self._param.user_type == 'paid' else "https://devapi.qweather.com/v7/" + + if self._param.type == "weather": + url = base_url + "weather/" + self._param.time_period + "?location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang + response = requests.get(url=url).json() + if response["code"] == "200": + if self._param.time_period == "now": + return QWeather.be_output(str(response["now"])) + else: + qweather_res = [{"content": str(i) + "\n"} for i in response["daily"]] + if not qweather_res: + return QWeather.be_output("") + + df = pd.DataFrame(qweather_res) + return df + else: + return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) + + elif self._param.type == "indices": + url = base_url + "indices/1d?type=0&location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang + response = requests.get(url=url).json() + if response["code"] == "200": + indices_res = response["daily"][0]["date"] + "\n" + "\n".join( + [i["name"] + ": " + i["category"] + ", " + i["text"] for i in response["daily"]]) + return QWeather.be_output(indices_res) + + else: + return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) + + elif self._param.type == "airquality": + url = base_url + "air/now?location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang + response = requests.get(url=url).json() + if response["code"] == "200": + return QWeather.be_output(str(response["now"])) + else: + return QWeather.be_output("**Error**" + self._param.error_code[response["code"]]) + except Exception as e: + return QWeather.be_output("**Error**" + str(e)) diff --git a/agent/templates/websearch_assistant.json b/agent/templates/websearch_assistant.json index 794a35a30f548d4db425853c93d6133a1ab22bf4..4ec9382f5625493eecc9bc6914237447731926a3 100644 --- a/agent/templates/websearch_assistant.json +++ b/agent/templates/websearch_assistant.json @@ -1,547 +1,547 @@ -{ - "id": 0, - "title": "WebSearch Assistant", - "description": "A chat assistant that combines information both from knowledge base and web search engines. It integrates information from the knowledge base and relevant search engines to answer a given question. What you need to do is setting up knowleage base in 'Retrieval'.", - "canvas_type": "chatbot", - "dsl": { - "answer": [], - "components": { - "Answer:PoorMapsCover": { - "downstream": [ - "Retrieval:BetterRocksJump", - "KeywordExtract:LegalIdeasTurn" - ], - "obj": { - "component_name": "Answer", - "params": {} - }, - "upstream": ["Generate:FullYearsStick", "begin"] - }, - "Baidu:OliveAreasCall": { - "downstream": ["Generate:FullYearsStick"], - "obj": { - "component_name": "Baidu", - "params": { - "top_n": 2 - } - }, - "upstream": ["KeywordExtract:LegalIdeasTurn"] - }, - "DuckDuckGo:SoftButtonsRefuse": { - "downstream": ["Generate:FullYearsStick"], - "obj": { - "component_name": "DuckDuckGo", - "params": { - "channel": "text", - "top_n": 2 - } - }, - "upstream": ["KeywordExtract:LegalIdeasTurn"] - }, - "Generate:FullYearsStick": { - "downstream": ["Answer:PoorMapsCover"], - "obj": { - "component_name": "Generate", - "params": { - "cite": true, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat", - "message_history_window_size": 12, - "parameters": [ - { - "component_id": "Retrieval:BetterRocksJump", - "id": "69415446-49bf-4d4b-8ec9-ac86066f7709", - "key": "kb_input" - }, - { - "component_id": "DuckDuckGo:SoftButtonsRefuse", - "id": "83363c2a-00a8-402f-a45c-ddc4097d7d8b", - "key": "duckduckgo" - }, - { - "component_id": "Wikipedia:WittyRiceLearn", - "id": "92c1e8e4-1597-4e65-a08d-c8cac4ac150f", - "key": "wikipedia" - }, - { - "component_id": "Baidu:OliveAreasCall", - "id": "19b5445a-7a6e-4a26-9aa9-47dfe3a03bea", - "key": "baidu" - } - ], - "presence_penalty": 0.4, - "prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n - Answer should be in markdown format.\n - Summarize and label the sources of the cited content separately: (Knowledge Base, Wikipedia, Duckduckgo, Baidu).\n - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}", - "temperature": 0.1, - "top_p": 0.3 - } - }, - "upstream": [ - "DuckDuckGo:SoftButtonsRefuse", - "Baidu:OliveAreasCall", - "Wikipedia:WittyRiceLearn", - "Retrieval:BetterRocksJump" - ] - }, - "KeywordExtract:LegalIdeasTurn": { - "downstream": [ - "Baidu:OliveAreasCall", - "DuckDuckGo:SoftButtonsRefuse", - "Wikipedia:WittyRiceLearn" - ], - "obj": { - "component_name": "KeywordExtract", - "params": { - "frequencyPenaltyEnabled": true, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat", - "maxTokensEnabled": true, - "max_tokens": 256, - "parameter": "Precise", - "presencePenaltyEnabled": true, - "presence_penalty": 0.4, - "temperature": 0.1, - "temperatureEnabled": true, - "topPEnabled": true, - "top_n": 2, - "top_p": 0.3 - } - }, - "upstream": ["Answer:PoorMapsCover"] - }, - "Retrieval:BetterRocksJump": { - "downstream": ["Generate:FullYearsStick"], - "obj": { - "component_name": "Retrieval", - "params": { - "empty_response": "The answer you want was not found in the knowledge base!", - "kb_ids": [], - "keywords_similarity_weight": 0.3, - "similarity_threshold": 0.2, - "top_n": 8 - } - }, - "upstream": ["Answer:PoorMapsCover"] - }, - "Wikipedia:WittyRiceLearn": { - "downstream": ["Generate:FullYearsStick"], - "obj": { - "component_name": "Wikipedia", - "params": { - "language": "en", - "top_n": 2 - } - }, - "upstream": ["KeywordExtract:LegalIdeasTurn"] - }, - "begin": { - "downstream": ["Answer:PoorMapsCover"], - "obj": { - "component_name": "Begin", - "params": {} - }, - "upstream": [] - } - }, - "graph": { - "edges": [ - { - "id": "reactflow__edge-Answer:PoorMapsCovera-Retrieval:BetterRocksJumpc", - "markerEnd": "logo", - "source": "Answer:PoorMapsCover", - "sourceHandle": "a", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Retrieval:BetterRocksJump", - "targetHandle": "c", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-Answer:PoorMapsCoverb-KeywordExtract:LegalIdeasTurnc", - "markerEnd": "logo", - "source": "Answer:PoorMapsCover", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "KeywordExtract:LegalIdeasTurn", - "targetHandle": "c", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-KeywordExtract:LegalIdeasTurnb-Baidu:OliveAreasCallc", - "markerEnd": "logo", - "source": "KeywordExtract:LegalIdeasTurn", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Baidu:OliveAreasCall", - "targetHandle": "c", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-KeywordExtract:LegalIdeasTurnb-DuckDuckGo:SoftButtonsRefusec", - "markerEnd": "logo", - "source": "KeywordExtract:LegalIdeasTurn", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "DuckDuckGo:SoftButtonsRefuse", - "targetHandle": "c", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-KeywordExtract:LegalIdeasTurnb-Wikipedia:WittyRiceLearnc", - "markerEnd": "logo", - "source": "KeywordExtract:LegalIdeasTurn", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Wikipedia:WittyRiceLearn", - "targetHandle": "c", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-DuckDuckGo:SoftButtonsRefuseb-Generate:FullYearsSticka", - "markerEnd": "logo", - "source": "DuckDuckGo:SoftButtonsRefuse", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Generate:FullYearsStick", - "targetHandle": "a", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-Baidu:OliveAreasCallb-Generate:FullYearsSticka", - "markerEnd": "logo", - "source": "Baidu:OliveAreasCall", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Generate:FullYearsStick", - "targetHandle": "a", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-Wikipedia:WittyRiceLearnb-Generate:FullYearsSticka", - "markerEnd": "logo", - "source": "Wikipedia:WittyRiceLearn", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Generate:FullYearsStick", - "targetHandle": "a", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-Retrieval:BetterRocksJumpb-Generate:FullYearsSticka", - "markerEnd": "logo", - "source": "Retrieval:BetterRocksJump", - "sourceHandle": "b", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Generate:FullYearsStick", - "targetHandle": "a", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-Generate:FullYearsStickd-Answer:PoorMapsCoverd", - "markerEnd": "logo", - "source": "Generate:FullYearsStick", - "sourceHandle": "d", - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Answer:PoorMapsCover", - "targetHandle": "d", - "type": "buttonEdge" - }, - { - "id": "reactflow__edge-begin-Answer:PoorMapsCoverc", - "markerEnd": "logo", - "source": "begin", - "sourceHandle": null, - "style": { - "stroke": "rgb(202 197 245)", - "strokeWidth": 2 - }, - "target": "Answer:PoorMapsCover", - "targetHandle": "c", - "type": "buttonEdge" - } - ], - "nodes": [ - { - "data": { - "label": "Begin", - "name": "opening" - }, - "dragging": false, - "height": 50, - "id": "begin", - "position": { - "x": -1020.0423250754997, - "y": 54.07040832453751 - }, - "positionAbsolute": { - "x": -1020.0423250754997, - "y": 54.07040832453751 - }, - "selected": false, - "sourcePosition": "left", - "targetPosition": "right", - "type": "beginNode", - "width": 50 - }, - { - "data": { - "form": {}, - "label": "Answer", - "name": "interface" - }, - "dragging": false, - "height": 100, - "id": "Answer:PoorMapsCover", - "position": { - "x": -880.5773333116513, - "y": 29.2721628695582 - }, - "positionAbsolute": { - "x": -880.5773333116513, - "y": 29.2721628695582 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "logicNode", - "width": 100 - }, - { - "data": { - "form": { - "frequencyPenaltyEnabled": true, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat", - "maxTokensEnabled": true, - "max_tokens": 256, - "parameter": "Precise", - "presencePenaltyEnabled": true, - "presence_penalty": 0.4, - "temperature": 0.1, - "temperatureEnabled": true, - "topPEnabled": true, - "top_n": 2, - "top_p": 0.3 - }, - "label": "KeywordExtract", - "name": "get keywords" - }, - "dragging": false, - "height": 70, - "id": "KeywordExtract:LegalIdeasTurn", - "position": { - "x": -727.0680233991866, - "y": 43.6827878582167 - }, - "positionAbsolute": { - "x": -727.0680233991866, - "y": 43.6827878582167 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "logicNode", - "width": 70 - }, - { - "data": { - "form": { - "empty_response": "The answer you want was not found in the knowledge base!", - "kb_ids": [], - "keywords_similarity_weight": 0.3, - "similarity_threshold": 0.2, - "top_n": 8 - }, - "label": "Retrieval", - "name": "Search KB" - }, - "dragging": false, - "height": 100, - "id": "Retrieval:BetterRocksJump", - "position": { - "x": -453.6381242126441, - "y": 245.01328822547293 - }, - "positionAbsolute": { - "x": -453.6381242126441, - "y": 245.01328822547293 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "logicNode", - "width": 100 - }, - { - "data": { - "form": { - "language": "en", - "top_n": 2 - }, - "label": "Wikipedia", - "name": "Wikipedia" - }, - "dragging": false, - "height": 100, - "id": "Wikipedia:WittyRiceLearn", - "position": { - "x": -552.2594439551717, - "y": 155.22722562174718 - }, - "positionAbsolute": { - "x": -552.2594439551717, - "y": 155.22722562174718 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "ragNode", - "width": 100 - }, - { - "data": { - "form": { - "top_n": 2 - }, - "label": "Baidu", - "name": "Baidu" - }, - "dragging": false, - "height": 100, - "id": "Baidu:OliveAreasCall", - "position": { - "x": -555.1646448972449, - "y": 22.458226784453046 - }, - "positionAbsolute": { - "x": -555.1646448972449, - "y": 22.458226784453046 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "ragNode", - "width": 100 - }, - { - "data": { - "form": { - "channel": "text", - "top_n": 2 - }, - "label": "DuckDuckGo", - "name": "DuckDuckGo" - }, - "dragging": false, - "height": 100, - "id": "DuckDuckGo:SoftButtonsRefuse", - "position": { - "x": -554.7669080287701, - "y": -111.86266788597959 - }, - "positionAbsolute": { - "x": -554.7669080287701, - "y": -111.86266788597959 - }, - "selected": false, - "sourcePosition": "right", - "targetPosition": "left", - "type": "ragNode", - "width": 100 - }, - { - "data": { - "form": { - "cite": true, - "frequencyPenaltyEnabled": true, - "frequency_penalty": 0.7, - "llm_id": "deepseek-chat", - "message_history_window_size": 12, - "parameter": "Precise", - "parameters": [ - { - "component_id": "Retrieval:BetterRocksJump", - "id": "69415446-49bf-4d4b-8ec9-ac86066f7709", - "key": "kb_input" - }, - { - "component_id": "DuckDuckGo:SoftButtonsRefuse", - "id": "83363c2a-00a8-402f-a45c-ddc4097d7d8b", - "key": "duckduckgo" - }, - { - "component_id": "Wikipedia:WittyRiceLearn", - "id": "92c1e8e4-1597-4e65-a08d-c8cac4ac150f", - "key": "wikipedia" - }, - { - "component_id": "Baidu:OliveAreasCall", - "id": "19b5445a-7a6e-4a26-9aa9-47dfe3a03bea", - "key": "baidu" - } - ], - "presencePenaltyEnabled": true, - "presence_penalty": 0.4, - "prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n - Answer should be in markdown format.\n - Answer should include all sources(Knowledge Base, Wikipedia, Duckduckgo, Baidu) as long as they are relevant, and label the sources of the cited content separately.\n - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}", - "temperature": 0.1, - "temperatureEnabled": true, - "topPEnabled": true, - "top_p": 0.3 - }, - "label": "Generate", - "name": "LLM" - }, - "dragging": false, - "height": 150, - "id": "Generate:FullYearsStick", - "position": { - "x": -355.85244068796055, - "y": -225.5280777950136 - }, - "positionAbsolute": { - "x": -355.85244068796055, - "y": -225.5280777950136 - }, - "selected": true, - "sourcePosition": "right", - "targetPosition": "left", - "type": "logicNode", - "width": 150 - } - ] - }, - "history": [], - "messages": [], - "path": [], - "reference": [] - }, - "avatar": "" -} +{ + "id": 0, + "title": "WebSearch Assistant", + "description": "A chat assistant that combines information both from knowledge base and web search engines. It integrates information from the knowledge base and relevant search engines to answer a given question. What you need to do is setting up knowleage base in 'Retrieval'.", + "canvas_type": "chatbot", + "dsl": { + "answer": [], + "components": { + "Answer:PoorMapsCover": { + "downstream": [ + "Retrieval:BetterRocksJump", + "KeywordExtract:LegalIdeasTurn" + ], + "obj": { + "component_name": "Answer", + "params": {} + }, + "upstream": ["Generate:FullYearsStick", "begin"] + }, + "Baidu:OliveAreasCall": { + "downstream": ["Generate:FullYearsStick"], + "obj": { + "component_name": "Baidu", + "params": { + "top_n": 2 + } + }, + "upstream": ["KeywordExtract:LegalIdeasTurn"] + }, + "DuckDuckGo:SoftButtonsRefuse": { + "downstream": ["Generate:FullYearsStick"], + "obj": { + "component_name": "DuckDuckGo", + "params": { + "channel": "text", + "top_n": 2 + } + }, + "upstream": ["KeywordExtract:LegalIdeasTurn"] + }, + "Generate:FullYearsStick": { + "downstream": ["Answer:PoorMapsCover"], + "obj": { + "component_name": "Generate", + "params": { + "cite": true, + "frequency_penalty": 0.7, + "llm_id": "deepseek-chat", + "message_history_window_size": 12, + "parameters": [ + { + "component_id": "Retrieval:BetterRocksJump", + "id": "69415446-49bf-4d4b-8ec9-ac86066f7709", + "key": "kb_input" + }, + { + "component_id": "DuckDuckGo:SoftButtonsRefuse", + "id": "83363c2a-00a8-402f-a45c-ddc4097d7d8b", + "key": "duckduckgo" + }, + { + "component_id": "Wikipedia:WittyRiceLearn", + "id": "92c1e8e4-1597-4e65-a08d-c8cac4ac150f", + "key": "wikipedia" + }, + { + "component_id": "Baidu:OliveAreasCall", + "id": "19b5445a-7a6e-4a26-9aa9-47dfe3a03bea", + "key": "baidu" + } + ], + "presence_penalty": 0.4, + "prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n - Answer should be in markdown format.\n - Summarize and label the sources of the cited content separately: (Knowledge Base, Wikipedia, Duckduckgo, Baidu).\n - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}", + "temperature": 0.1, + "top_p": 0.3 + } + }, + "upstream": [ + "DuckDuckGo:SoftButtonsRefuse", + "Baidu:OliveAreasCall", + "Wikipedia:WittyRiceLearn", + "Retrieval:BetterRocksJump" + ] + }, + "KeywordExtract:LegalIdeasTurn": { + "downstream": [ + "Baidu:OliveAreasCall", + "DuckDuckGo:SoftButtonsRefuse", + "Wikipedia:WittyRiceLearn" + ], + "obj": { + "component_name": "KeywordExtract", + "params": { + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "deepseek-chat", + "maxTokensEnabled": true, + "max_tokens": 256, + "parameter": "Precise", + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "temperature": 0.1, + "temperatureEnabled": true, + "topPEnabled": true, + "top_n": 2, + "top_p": 0.3 + } + }, + "upstream": ["Answer:PoorMapsCover"] + }, + "Retrieval:BetterRocksJump": { + "downstream": ["Generate:FullYearsStick"], + "obj": { + "component_name": "Retrieval", + "params": { + "empty_response": "The answer you want was not found in the knowledge base!", + "kb_ids": [], + "keywords_similarity_weight": 0.3, + "similarity_threshold": 0.2, + "top_n": 8 + } + }, + "upstream": ["Answer:PoorMapsCover"] + }, + "Wikipedia:WittyRiceLearn": { + "downstream": ["Generate:FullYearsStick"], + "obj": { + "component_name": "Wikipedia", + "params": { + "language": "en", + "top_n": 2 + } + }, + "upstream": ["KeywordExtract:LegalIdeasTurn"] + }, + "begin": { + "downstream": ["Answer:PoorMapsCover"], + "obj": { + "component_name": "Begin", + "params": {} + }, + "upstream": [] + } + }, + "graph": { + "edges": [ + { + "id": "reactflow__edge-Answer:PoorMapsCovera-Retrieval:BetterRocksJumpc", + "markerEnd": "logo", + "source": "Answer:PoorMapsCover", + "sourceHandle": "a", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Retrieval:BetterRocksJump", + "targetHandle": "c", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-Answer:PoorMapsCoverb-KeywordExtract:LegalIdeasTurnc", + "markerEnd": "logo", + "source": "Answer:PoorMapsCover", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "KeywordExtract:LegalIdeasTurn", + "targetHandle": "c", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-KeywordExtract:LegalIdeasTurnb-Baidu:OliveAreasCallc", + "markerEnd": "logo", + "source": "KeywordExtract:LegalIdeasTurn", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Baidu:OliveAreasCall", + "targetHandle": "c", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-KeywordExtract:LegalIdeasTurnb-DuckDuckGo:SoftButtonsRefusec", + "markerEnd": "logo", + "source": "KeywordExtract:LegalIdeasTurn", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "DuckDuckGo:SoftButtonsRefuse", + "targetHandle": "c", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-KeywordExtract:LegalIdeasTurnb-Wikipedia:WittyRiceLearnc", + "markerEnd": "logo", + "source": "KeywordExtract:LegalIdeasTurn", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Wikipedia:WittyRiceLearn", + "targetHandle": "c", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-DuckDuckGo:SoftButtonsRefuseb-Generate:FullYearsSticka", + "markerEnd": "logo", + "source": "DuckDuckGo:SoftButtonsRefuse", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Generate:FullYearsStick", + "targetHandle": "a", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-Baidu:OliveAreasCallb-Generate:FullYearsSticka", + "markerEnd": "logo", + "source": "Baidu:OliveAreasCall", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Generate:FullYearsStick", + "targetHandle": "a", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-Wikipedia:WittyRiceLearnb-Generate:FullYearsSticka", + "markerEnd": "logo", + "source": "Wikipedia:WittyRiceLearn", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Generate:FullYearsStick", + "targetHandle": "a", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-Retrieval:BetterRocksJumpb-Generate:FullYearsSticka", + "markerEnd": "logo", + "source": "Retrieval:BetterRocksJump", + "sourceHandle": "b", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Generate:FullYearsStick", + "targetHandle": "a", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-Generate:FullYearsStickd-Answer:PoorMapsCoverd", + "markerEnd": "logo", + "source": "Generate:FullYearsStick", + "sourceHandle": "d", + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Answer:PoorMapsCover", + "targetHandle": "d", + "type": "buttonEdge" + }, + { + "id": "reactflow__edge-begin-Answer:PoorMapsCoverc", + "markerEnd": "logo", + "source": "begin", + "sourceHandle": null, + "style": { + "stroke": "rgb(202 197 245)", + "strokeWidth": 2 + }, + "target": "Answer:PoorMapsCover", + "targetHandle": "c", + "type": "buttonEdge" + } + ], + "nodes": [ + { + "data": { + "label": "Begin", + "name": "opening" + }, + "dragging": false, + "height": 50, + "id": "begin", + "position": { + "x": -1020.0423250754997, + "y": 54.07040832453751 + }, + "positionAbsolute": { + "x": -1020.0423250754997, + "y": 54.07040832453751 + }, + "selected": false, + "sourcePosition": "left", + "targetPosition": "right", + "type": "beginNode", + "width": 50 + }, + { + "data": { + "form": {}, + "label": "Answer", + "name": "interface" + }, + "dragging": false, + "height": 100, + "id": "Answer:PoorMapsCover", + "position": { + "x": -880.5773333116513, + "y": 29.2721628695582 + }, + "positionAbsolute": { + "x": -880.5773333116513, + "y": 29.2721628695582 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "logicNode", + "width": 100 + }, + { + "data": { + "form": { + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "deepseek-chat", + "maxTokensEnabled": true, + "max_tokens": 256, + "parameter": "Precise", + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "temperature": 0.1, + "temperatureEnabled": true, + "topPEnabled": true, + "top_n": 2, + "top_p": 0.3 + }, + "label": "KeywordExtract", + "name": "get keywords" + }, + "dragging": false, + "height": 70, + "id": "KeywordExtract:LegalIdeasTurn", + "position": { + "x": -727.0680233991866, + "y": 43.6827878582167 + }, + "positionAbsolute": { + "x": -727.0680233991866, + "y": 43.6827878582167 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "logicNode", + "width": 70 + }, + { + "data": { + "form": { + "empty_response": "The answer you want was not found in the knowledge base!", + "kb_ids": [], + "keywords_similarity_weight": 0.3, + "similarity_threshold": 0.2, + "top_n": 8 + }, + "label": "Retrieval", + "name": "Search KB" + }, + "dragging": false, + "height": 100, + "id": "Retrieval:BetterRocksJump", + "position": { + "x": -453.6381242126441, + "y": 245.01328822547293 + }, + "positionAbsolute": { + "x": -453.6381242126441, + "y": 245.01328822547293 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "logicNode", + "width": 100 + }, + { + "data": { + "form": { + "language": "en", + "top_n": 2 + }, + "label": "Wikipedia", + "name": "Wikipedia" + }, + "dragging": false, + "height": 100, + "id": "Wikipedia:WittyRiceLearn", + "position": { + "x": -552.2594439551717, + "y": 155.22722562174718 + }, + "positionAbsolute": { + "x": -552.2594439551717, + "y": 155.22722562174718 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "ragNode", + "width": 100 + }, + { + "data": { + "form": { + "top_n": 2 + }, + "label": "Baidu", + "name": "Baidu" + }, + "dragging": false, + "height": 100, + "id": "Baidu:OliveAreasCall", + "position": { + "x": -555.1646448972449, + "y": 22.458226784453046 + }, + "positionAbsolute": { + "x": -555.1646448972449, + "y": 22.458226784453046 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "ragNode", + "width": 100 + }, + { + "data": { + "form": { + "channel": "text", + "top_n": 2 + }, + "label": "DuckDuckGo", + "name": "DuckDuckGo" + }, + "dragging": false, + "height": 100, + "id": "DuckDuckGo:SoftButtonsRefuse", + "position": { + "x": -554.7669080287701, + "y": -111.86266788597959 + }, + "positionAbsolute": { + "x": -554.7669080287701, + "y": -111.86266788597959 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "ragNode", + "width": 100 + }, + { + "data": { + "form": { + "cite": true, + "frequencyPenaltyEnabled": true, + "frequency_penalty": 0.7, + "llm_id": "deepseek-chat", + "message_history_window_size": 12, + "parameter": "Precise", + "parameters": [ + { + "component_id": "Retrieval:BetterRocksJump", + "id": "69415446-49bf-4d4b-8ec9-ac86066f7709", + "key": "kb_input" + }, + { + "component_id": "DuckDuckGo:SoftButtonsRefuse", + "id": "83363c2a-00a8-402f-a45c-ddc4097d7d8b", + "key": "duckduckgo" + }, + { + "component_id": "Wikipedia:WittyRiceLearn", + "id": "92c1e8e4-1597-4e65-a08d-c8cac4ac150f", + "key": "wikipedia" + }, + { + "component_id": "Baidu:OliveAreasCall", + "id": "19b5445a-7a6e-4a26-9aa9-47dfe3a03bea", + "key": "baidu" + } + ], + "presencePenaltyEnabled": true, + "presence_penalty": 0.4, + "prompt": "Role: You are an intelligent assistant. \nTask: Chat with user. Answer the question based on the provided content from: Knowledge Base, Wikipedia, Duckduckgo, Baidu.\nRequirements:\n - Answer should be in markdown format.\n - Answer should include all sources(Knowledge Base, Wikipedia, Duckduckgo, Baidu) as long as they are relevant, and label the sources of the cited content separately.\n - Attach URL links to the content which is quoted from Wikipedia, DuckDuckGo or Baidu.\n - Do not make thing up when there's no relevant information to user's question. \n\n## Knowledge base content\n {kb_input}\n\n\n## Wikipedia content\n{wikipedia}\n\n\n## Duckduckgo content\n{duckduckgo}\n\n\n## Baidu content\n{baidu}", + "temperature": 0.1, + "temperatureEnabled": true, + "topPEnabled": true, + "top_p": 0.3 + }, + "label": "Generate", + "name": "LLM" + }, + "dragging": false, + "height": 150, + "id": "Generate:FullYearsStick", + "position": { + "x": -355.85244068796055, + "y": -225.5280777950136 + }, + "positionAbsolute": { + "x": -355.85244068796055, + "y": -225.5280777950136 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "logicNode", + "width": 150 + } + ] + }, + "history": [], + "messages": [], + "path": [], + "reference": [] + }, + "avatar": "" +} diff --git a/agent/test/dsl_examples/keyword_wikipedia_and_generate.json b/agent/test/dsl_examples/keyword_wikipedia_and_generate.json index b7ad9cb14252c71bde4c850f1cf38aef1973a43b..fa1d62194f10977721f92878a695fa1629d155cd 100644 --- a/agent/test/dsl_examples/keyword_wikipedia_and_generate.json +++ b/agent/test/dsl_examples/keyword_wikipedia_and_generate.json @@ -1,62 +1,62 @@ -{ - "components": { - "begin": { - "obj":{ - "component_name": "Begin", - "params": { - "prologue": "Hi there!" - } - }, - "downstream": ["answer:0"], - "upstream": [] - }, - "answer:0": { - "obj": { - "component_name": "Answer", - "params": {} - }, - "downstream": ["keyword:0"], - "upstream": ["begin"] - }, - "keyword:0": { - "obj": { - "component_name": "KeywordExtract", - "params": { - "llm_id": "deepseek-chat", - "prompt": "- Role: You're a question analyzer.\n - Requirements:\n - Summarize user's question, and give top %s important keyword/phrase.\n - Use comma as a delimiter to separate keywords/phrases.\n - Answer format: (in language of user's question)\n - keyword: ", - "temperature": 0.2, - "top_n": 1 - } - }, - "downstream": ["wikipedia:0"], - "upstream": ["answer:0"] - }, - "wikipedia:0": { - "obj":{ - "component_name": "Wikipedia", - "params": { - "top_n": 10 - } - }, - "downstream": ["generate:0"], - "upstream": ["keyword:0"] - }, - "generate:1": { - "obj": { - "component_name": "Generate", - "params": { - "llm_id": "deepseek-chat", - "prompt": "You are an intelligent assistant. Please answer the question based on content from Wikipedia. When the answer from Wikipedia is incomplete, you need to output the URL link of the corresponding content as well. When all the content searched from Wikipedia is irrelevant to the question, your answer must include the sentence, \"The answer you are looking for is not found in the Wikipedia!\". Answers need to consider chat history.\n The content of Wikipedia is as follows:\n {input}\n The above is the content of Wikipedia.", - "temperature": 0.2 - } - }, - "downstream": ["answer:0"], - "upstream": ["wikipedia:0"] - } - }, - "history": [], - "path": [], - "messages": [], - "reference": {}, - "answer": [] -} +{ + "components": { + "begin": { + "obj":{ + "component_name": "Begin", + "params": { + "prologue": "Hi there!" + } + }, + "downstream": ["answer:0"], + "upstream": [] + }, + "answer:0": { + "obj": { + "component_name": "Answer", + "params": {} + }, + "downstream": ["keyword:0"], + "upstream": ["begin"] + }, + "keyword:0": { + "obj": { + "component_name": "KeywordExtract", + "params": { + "llm_id": "deepseek-chat", + "prompt": "- Role: You're a question analyzer.\n - Requirements:\n - Summarize user's question, and give top %s important keyword/phrase.\n - Use comma as a delimiter to separate keywords/phrases.\n - Answer format: (in language of user's question)\n - keyword: ", + "temperature": 0.2, + "top_n": 1 + } + }, + "downstream": ["wikipedia:0"], + "upstream": ["answer:0"] + }, + "wikipedia:0": { + "obj":{ + "component_name": "Wikipedia", + "params": { + "top_n": 10 + } + }, + "downstream": ["generate:0"], + "upstream": ["keyword:0"] + }, + "generate:1": { + "obj": { + "component_name": "Generate", + "params": { + "llm_id": "deepseek-chat", + "prompt": "You are an intelligent assistant. Please answer the question based on content from Wikipedia. When the answer from Wikipedia is incomplete, you need to output the URL link of the corresponding content as well. When all the content searched from Wikipedia is irrelevant to the question, your answer must include the sentence, \"The answer you are looking for is not found in the Wikipedia!\". Answers need to consider chat history.\n The content of Wikipedia is as follows:\n {input}\n The above is the content of Wikipedia.", + "temperature": 0.2 + } + }, + "downstream": ["answer:0"], + "upstream": ["wikipedia:0"] + } + }, + "history": [], + "path": [], + "messages": [], + "reference": {}, + "answer": [] +} diff --git a/api/apps/__init__.py b/api/apps/__init__.py index 0c0ace7f0ae195e1e6dd03a4b3685974628f4bd7..29b67a09d09867444ecb2eee7e69ba8dbc658244 100644 --- a/api/apps/__init__.py +++ b/api/apps/__init__.py @@ -1,125 +1,125 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import logging -import os -import sys -from importlib.util import module_from_spec, spec_from_file_location -from pathlib import Path -from flask import Blueprint, Flask -from werkzeug.wrappers.request import Request -from flask_cors import CORS - -from api.db import StatusEnum -from api.db.db_models import close_connection -from api.db.services import UserService -from api.utils import CustomJSONEncoder, commands - -from flask_session import Session -from flask_login import LoginManager -from api.settings import SECRET_KEY, stat_logger -from api.settings import API_VERSION, access_logger -from api.utils.api_utils import server_error_response -from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer - -__all__ = ['app'] - - -logger = logging.getLogger('flask.app') -for h in access_logger.handlers: - logger.addHandler(h) - -Request.json = property(lambda self: self.get_json(force=True, silent=True)) - -app = Flask(__name__) -CORS(app, supports_credentials=True,max_age=2592000) -app.url_map.strict_slashes = False -app.json_encoder = CustomJSONEncoder -app.errorhandler(Exception)(server_error_response) - - -## convince for dev and debug -#app.config["LOGIN_DISABLED"] = True -app.config["SESSION_PERMANENT"] = False -app.config["SESSION_TYPE"] = "filesystem" -app.config['MAX_CONTENT_LENGTH'] = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024)) - -Session(app) -login_manager = LoginManager() -login_manager.init_app(app) - -commands.register_commands(app) - - -def search_pages_path(pages_dir): - app_path_list = [path for path in pages_dir.glob('*_app.py') if not path.name.startswith('.')] - api_path_list = [path for path in pages_dir.glob('*_api.py') if not path.name.startswith('.')] - app_path_list.extend(api_path_list) - return app_path_list - - -def register_page(page_path): - path = f'{page_path}' - - page_name = page_path.stem.rstrip('_api') if "_api" in path else page_path.stem.rstrip('_app') - module_name = '.'.join(page_path.parts[page_path.parts.index('api'):-1] + (page_name,)) - - spec = spec_from_file_location(module_name, page_path) - page = module_from_spec(spec) - page.app = app - page.manager = Blueprint(page_name, module_name) - sys.modules[module_name] = page - spec.loader.exec_module(page) - page_name = getattr(page, 'page_name', page_name) - url_prefix = f'/api/{API_VERSION}/{page_name}' if "_api" in path else f'/{API_VERSION}/{page_name}' - - app.register_blueprint(page.manager, url_prefix=url_prefix) - return url_prefix - - -pages_dir = [ - Path(__file__).parent, - Path(__file__).parent.parent / 'api' / 'apps', # FIXME: ragflow/api/api/apps, can be remove? -] - -client_urls_prefix = [ - register_page(path) - for dir in pages_dir - for path in search_pages_path(dir) -] - - -@login_manager.request_loader -def load_user(web_request): - jwt = Serializer(secret_key=SECRET_KEY) - authorization = web_request.headers.get("Authorization") - if authorization: - try: - access_token = str(jwt.loads(authorization)) - user = UserService.query(access_token=access_token, status=StatusEnum.VALID.value) - if user: - return user[0] - else: - return None - except Exception as e: - stat_logger.exception(e) - return None - else: - return None - - -@app.teardown_request -def _db_close(exc): +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +import os +import sys +from importlib.util import module_from_spec, spec_from_file_location +from pathlib import Path +from flask import Blueprint, Flask +from werkzeug.wrappers.request import Request +from flask_cors import CORS + +from api.db import StatusEnum +from api.db.db_models import close_connection +from api.db.services import UserService +from api.utils import CustomJSONEncoder, commands + +from flask_session import Session +from flask_login import LoginManager +from api.settings import SECRET_KEY, stat_logger +from api.settings import API_VERSION, access_logger +from api.utils.api_utils import server_error_response +from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer + +__all__ = ['app'] + + +logger = logging.getLogger('flask.app') +for h in access_logger.handlers: + logger.addHandler(h) + +Request.json = property(lambda self: self.get_json(force=True, silent=True)) + +app = Flask(__name__) +CORS(app, supports_credentials=True,max_age=2592000) +app.url_map.strict_slashes = False +app.json_encoder = CustomJSONEncoder +app.errorhandler(Exception)(server_error_response) + + +## convince for dev and debug +#app.config["LOGIN_DISABLED"] = True +app.config["SESSION_PERMANENT"] = False +app.config["SESSION_TYPE"] = "filesystem" +app.config['MAX_CONTENT_LENGTH'] = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024)) + +Session(app) +login_manager = LoginManager() +login_manager.init_app(app) + +commands.register_commands(app) + + +def search_pages_path(pages_dir): + app_path_list = [path for path in pages_dir.glob('*_app.py') if not path.name.startswith('.')] + api_path_list = [path for path in pages_dir.glob('*_api.py') if not path.name.startswith('.')] + app_path_list.extend(api_path_list) + return app_path_list + + +def register_page(page_path): + path = f'{page_path}' + + page_name = page_path.stem.rstrip('_api') if "_api" in path else page_path.stem.rstrip('_app') + module_name = '.'.join(page_path.parts[page_path.parts.index('api'):-1] + (page_name,)) + + spec = spec_from_file_location(module_name, page_path) + page = module_from_spec(spec) + page.app = app + page.manager = Blueprint(page_name, module_name) + sys.modules[module_name] = page + spec.loader.exec_module(page) + page_name = getattr(page, 'page_name', page_name) + url_prefix = f'/api/{API_VERSION}/{page_name}' if "_api" in path else f'/{API_VERSION}/{page_name}' + + app.register_blueprint(page.manager, url_prefix=url_prefix) + return url_prefix + + +pages_dir = [ + Path(__file__).parent, + Path(__file__).parent.parent / 'api' / 'apps', # FIXME: ragflow/api/api/apps, can be remove? +] + +client_urls_prefix = [ + register_page(path) + for dir in pages_dir + for path in search_pages_path(dir) +] + + +@login_manager.request_loader +def load_user(web_request): + jwt = Serializer(secret_key=SECRET_KEY) + authorization = web_request.headers.get("Authorization") + if authorization: + try: + access_token = str(jwt.loads(authorization)) + user = UserService.query(access_token=access_token, status=StatusEnum.VALID.value) + if user: + return user[0] + else: + return None + except Exception as e: + stat_logger.exception(e) + return None + else: + return None + + +@app.teardown_request +def _db_close(exc): close_connection() \ No newline at end of file diff --git a/api/apps/api_app.py b/api/apps/api_app.py index be712f9b1e2af8afaf5567e71b13622a34f54506..502dee6cdf3ce6c1ee13b770e9f65f24d2b7f7d1 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -1,735 +1,735 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json -import os -import re -from datetime import datetime, timedelta -from flask import request, Response -from api.db.services.llm_service import TenantLLMService -from flask_login import login_required, current_user - -from api.db import FileType, LLMType, ParserType, FileSource -from api.db.db_models import APIToken, API4Conversation, Task, File -from api.db.services import duplicate_name -from api.db.services.api_service import APITokenService, API4ConversationService -from api.db.services.dialog_service import DialogService, chat -from api.db.services.document_service import DocumentService -from api.db.services.file2document_service import File2DocumentService -from api.db.services.file_service import FileService -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db.services.task_service import queue_tasks, TaskService -from api.db.services.user_service import UserTenantService -from api.settings import RetCode, retrievaler -from api.utils import get_uuid, current_timestamp, datetime_format -from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request -from itsdangerous import URLSafeTimedSerializer - -from api.utils.file_utils import filename_type, thumbnail -from rag.nlp import keyword_extraction -from rag.utils.minio_conn import MINIO - -from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService -from agent.canvas import Canvas -from functools import partial - - -def generate_confirmation_token(tenent_id): - serializer = URLSafeTimedSerializer(tenent_id) - return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34] - - -@manager.route('/new_token', methods=['POST']) -@login_required -def new_token(): - req = request.json - try: - tenants = UserTenantService.query(user_id=current_user.id) - if not tenants: - return get_data_error_result(retmsg="Tenant not found!") - - tenant_id = tenants[0].tenant_id - obj = {"tenant_id": tenant_id, "token": generate_confirmation_token(tenant_id), - "create_time": current_timestamp(), - "create_date": datetime_format(datetime.now()), - "update_time": None, - "update_date": None - } - if req.get("canvas_id"): - obj["dialog_id"] = req["canvas_id"] - obj["source"] = "agent" - else: - obj["dialog_id"] = req["dialog_id"] - - if not APITokenService.save(**obj): - return get_data_error_result(retmsg="Fail to new a dialog!") - - return get_json_result(data=obj) - except Exception as e: - return server_error_response(e) - - -@manager.route('/token_list', methods=['GET']) -@login_required -def token_list(): - try: - tenants = UserTenantService.query(user_id=current_user.id) - if not tenants: - return get_data_error_result(retmsg="Tenant not found!") - - id = request.args["dialog_id"] if "dialog_id" in request.args else request.args["canvas_id"] - objs = APITokenService.query(tenant_id=tenants[0].tenant_id, dialog_id=id) - return get_json_result(data=[o.to_dict() for o in objs]) - except Exception as e: - return server_error_response(e) - - -@manager.route('/rm', methods=['POST']) -@validate_request("tokens", "tenant_id") -@login_required -def rm(): - req = request.json - try: - for token in req["tokens"]: - APITokenService.filter_delete( - [APIToken.tenant_id == req["tenant_id"], APIToken.token == token]) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/stats', methods=['GET']) -@login_required -def stats(): - try: - tenants = UserTenantService.query(user_id=current_user.id) - if not tenants: - return get_data_error_result(retmsg="Tenant not found!") - objs = API4ConversationService.stats( - tenants[0].tenant_id, - request.args.get( - "from_date", - (datetime.now() - - timedelta( - days=7)).strftime("%Y-%m-%d 00:00:00")), - request.args.get( - "to_date", - datetime.now().strftime("%Y-%m-%d %H:%M:%S")), - "agent" if "canvas_id" in request.args else None) - res = { - "pv": [(o["dt"], o["pv"]) for o in objs], - "uv": [(o["dt"], o["uv"]) for o in objs], - "speed": [(o["dt"], float(o["tokens"]) / (float(o["duration"] + 0.1))) for o in objs], - "tokens": [(o["dt"], float(o["tokens"]) / 1000.) for o in objs], - "round": [(o["dt"], o["round"]) for o in objs], - "thumb_up": [(o["dt"], o["thumb_up"]) for o in objs] - } - return get_json_result(data=res) - except Exception as e: - return server_error_response(e) - - -@manager.route('/new_conversation', methods=['GET']) -def set_conversation(): - token = request.headers.get('Authorization').split()[1] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - req = request.json - try: - if objs[0].source == "agent": - e, c = UserCanvasService.get_by_id(objs[0].dialog_id) - if not e: - return server_error_response("canvas not found.") - conv = { - "id": get_uuid(), - "dialog_id": c.id, - "user_id": request.args.get("user_id", ""), - "message": [{"role": "assistant", "content": "Hi there!"}], - "source": "agent" - } - API4ConversationService.save(**conv) - return get_json_result(data=conv) - else: - e, dia = DialogService.get_by_id(objs[0].dialog_id) - if not e: - return get_data_error_result(retmsg="Dialog not found") - conv = { - "id": get_uuid(), - "dialog_id": dia.id, - "user_id": request.args.get("user_id", ""), - "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}] - } - API4ConversationService.save(**conv) - return get_json_result(data=conv) - except Exception as e: - return server_error_response(e) - - -@manager.route('/completion', methods=['POST']) -@validate_request("conversation_id", "messages") -def completion(): - token = request.headers.get('Authorization').split()[1] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - req = request.json - e, conv = API4ConversationService.get_by_id(req["conversation_id"]) - if not e: - return get_data_error_result(retmsg="Conversation not found!") - if "quote" not in req: req["quote"] = False - - msg = [] - for m in req["messages"]: - if m["role"] == "system": - continue - if m["role"] == "assistant" and not msg: - continue - msg.append({"role": m["role"], "content": m["content"]}) - - def fillin_conv(ans): - nonlocal conv - if not conv.reference: - conv.reference.append(ans["reference"]) - else: - conv.reference[-1] = ans["reference"] - conv.message[-1] = {"role": "assistant", "content": ans["answer"]} - - def rename_field(ans): - reference = ans['reference'] - if not isinstance(reference, dict): - return - for chunk_i in reference.get('chunks', []): - if 'docnm_kwd' in chunk_i: - chunk_i['doc_name'] = chunk_i['docnm_kwd'] - chunk_i.pop('docnm_kwd') - - try: - if conv.source == "agent": - stream = req.get("stream", True) - conv.message.append(msg[-1]) - e, cvs = UserCanvasService.get_by_id(conv.dialog_id) - if not e: - return server_error_response("canvas not found.") - del req["conversation_id"] - del req["messages"] - - if not isinstance(cvs.dsl, str): - cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False) - - if not conv.reference: - conv.reference = [] - conv.message.append({"role": "assistant", "content": ""}) - conv.reference.append({"chunks": [], "doc_aggs": []}) - - final_ans = {"reference": [], "content": ""} - canvas = Canvas(cvs.dsl, objs[0].tenant_id) - - canvas.messages.append(msg[-1]) - canvas.add_user_input(msg[-1]["content"]) - answer = canvas.run(stream=stream) - - assert answer is not None, "Nothing. Is it over?" - - if stream: - assert isinstance(answer, partial), "Nothing. Is it over?" - - def sse(): - nonlocal answer, cvs, conv - try: - for ans in answer(): - for k in ans.keys(): - final_ans[k] = ans[k] - ans = {"answer": ans["content"], "reference": ans.get("reference", [])} - fillin_conv(ans) - rename_field(ans) - yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": ans}, - ensure_ascii=False) + "\n\n" - - canvas.messages.append({"role": "assistant", "content": final_ans["content"]}) - if final_ans.get("reference"): - canvas.reference.append(final_ans["reference"]) - cvs.dsl = json.loads(str(canvas)) - API4ConversationService.append_message(conv.id, conv.to_dict()) - except Exception as e: - yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), - "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, - ensure_ascii=False) + "\n\n" - yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n" - - resp = Response(sse(), mimetype="text/event-stream") - resp.headers.add_header("Cache-control", "no-cache") - resp.headers.add_header("Connection", "keep-alive") - resp.headers.add_header("X-Accel-Buffering", "no") - resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") - return resp - - final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else "" - canvas.messages.append({"role": "assistant", "content": final_ans["content"]}) - if final_ans.get("reference"): - canvas.reference.append(final_ans["reference"]) - cvs.dsl = json.loads(str(canvas)) - - result = {"answer": final_ans["content"], "reference": final_ans.get("reference", [])} - fillin_conv(result) - API4ConversationService.append_message(conv.id, conv.to_dict()) - rename_field(result) - return get_json_result(data=result) - - #******************For dialog****************** - conv.message.append(msg[-1]) - e, dia = DialogService.get_by_id(conv.dialog_id) - if not e: - return get_data_error_result(retmsg="Dialog not found!") - del req["conversation_id"] - del req["messages"] - - if not conv.reference: - conv.reference = [] - conv.message.append({"role": "assistant", "content": ""}) - conv.reference.append({"chunks": [], "doc_aggs": []}) - - def stream(): - nonlocal dia, msg, req, conv - try: - for ans in chat(dia, msg, True, **req): - fillin_conv(ans) - rename_field(ans) - yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": ans}, - ensure_ascii=False) + "\n\n" - API4ConversationService.append_message(conv.id, conv.to_dict()) - except Exception as e: - yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), - "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, - ensure_ascii=False) + "\n\n" - yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n" - - if req.get("stream", True): - resp = Response(stream(), mimetype="text/event-stream") - resp.headers.add_header("Cache-control", "no-cache") - resp.headers.add_header("Connection", "keep-alive") - resp.headers.add_header("X-Accel-Buffering", "no") - resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") - return resp - - answer = None - for ans in chat(dia, msg, **req): - answer = ans - fillin_conv(ans) - API4ConversationService.append_message(conv.id, conv.to_dict()) - break - rename_field(answer) - return get_json_result(data=answer) - - except Exception as e: - return server_error_response(e) - - -@manager.route('/conversation/', methods=['GET']) -# @login_required -def get(conversation_id): - try: - e, conv = API4ConversationService.get_by_id(conversation_id) - if not e: - return get_data_error_result(retmsg="Conversation not found!") - - conv = conv.to_dict() - for referenct_i in conv['reference']: - if referenct_i is None or len(referenct_i) == 0: - continue - for chunk_i in referenct_i['chunks']: - if 'docnm_kwd' in chunk_i.keys(): - chunk_i['doc_name'] = chunk_i['docnm_kwd'] - chunk_i.pop('docnm_kwd') - return get_json_result(data=conv) - except Exception as e: - return server_error_response(e) - - -@manager.route('/document/upload', methods=['POST']) -@validate_request("kb_name") -def upload(): - token = request.headers.get('Authorization').split()[1] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - - kb_name = request.form.get("kb_name").strip() - tenant_id = objs[0].tenant_id - - try: - e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id) - if not e: - return get_data_error_result( - retmsg="Can't find this knowledgebase!") - kb_id = kb.id - except Exception as e: - return server_error_response(e) - - if 'file' not in request.files: - return get_json_result( - data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) - - file = request.files['file'] - if file.filename == '': - return get_json_result( - data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) - - root_folder = FileService.get_root_folder(tenant_id) - pf_id = root_folder["id"] - FileService.init_knowledgebase_docs(pf_id, tenant_id) - kb_root_folder = FileService.get_kb_folder(tenant_id) - kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"]) - - try: - if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)): - return get_data_error_result( - retmsg="Exceed the maximum file number of a free user!") - - filename = duplicate_name( - DocumentService.query, - name=file.filename, - kb_id=kb_id) - filetype = filename_type(filename) - if not filetype: - return get_data_error_result( - retmsg="This type of file has not been supported yet!") - - location = filename - while MINIO.obj_exist(kb_id, location): - location += "_" - blob = request.files['file'].read() - MINIO.put(kb_id, location, blob) - doc = { - "id": get_uuid(), - "kb_id": kb.id, - "parser_id": kb.parser_id, - "parser_config": kb.parser_config, - "created_by": kb.tenant_id, - "type": filetype, - "name": filename, - "location": location, - "size": len(blob), - "thumbnail": thumbnail(filename, blob) - } - - form_data = request.form - if "parser_id" in form_data.keys(): - if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]: - doc["parser_id"] = request.form.get("parser_id").strip() - if doc["type"] == FileType.VISUAL: - doc["parser_id"] = ParserType.PICTURE.value - if doc["type"] == FileType.AURAL: - doc["parser_id"] = ParserType.AUDIO.value - if re.search(r"\.(ppt|pptx|pages)$", filename): - doc["parser_id"] = ParserType.PRESENTATION.value - - doc_result = DocumentService.insert(doc) - FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id) - except Exception as e: - return server_error_response(e) - - if "run" in form_data.keys(): - if request.form.get("run").strip() == "1": - try: - info = {"run": 1, "progress": 0} - info["progress_msg"] = "" - info["chunk_num"] = 0 - info["token_num"] = 0 - DocumentService.update_by_id(doc["id"], info) - # if str(req["run"]) == TaskStatus.CANCEL.value: - tenant_id = DocumentService.get_tenant_id(doc["id"]) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - - # e, doc = DocumentService.get_by_id(doc["id"]) - TaskService.filter_delete([Task.doc_id == doc["id"]]) - e, doc = DocumentService.get_by_id(doc["id"]) - doc = doc.to_dict() - doc["tenant_id"] = tenant_id - bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"]) - queue_tasks(doc, bucket, name) - except Exception as e: - return server_error_response(e) - - return get_json_result(data=doc_result.to_json()) - - -@manager.route('/list_chunks', methods=['POST']) -# @login_required -def list_chunks(): - token = request.headers.get('Authorization').split()[1] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - - req = request.json - - try: - if "doc_name" in req.keys(): - tenant_id = DocumentService.get_tenant_id_by_name(req['doc_name']) - doc_id = DocumentService.get_doc_id_by_doc_name(req['doc_name']) - - elif "doc_id" in req.keys(): - tenant_id = DocumentService.get_tenant_id(req['doc_id']) - doc_id = req['doc_id'] - else: - return get_json_result( - data=False, retmsg="Can't find doc_name or doc_id" - ) - - res = retrievaler.chunk_list(doc_id=doc_id, tenant_id=tenant_id) - res = [ - { - "content": res_item["content_with_weight"], - "doc_name": res_item["docnm_kwd"], - "img_id": res_item["img_id"] - } for res_item in res - ] - - except Exception as e: - return server_error_response(e) - - return get_json_result(data=res) - - -@manager.route('/list_kb_docs', methods=['POST']) -# @login_required -def list_kb_docs(): - token = request.headers.get('Authorization').split()[1] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - - req = request.json - tenant_id = objs[0].tenant_id - kb_name = req.get("kb_name", "").strip() - - try: - e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id) - if not e: - return get_data_error_result( - retmsg="Can't find this knowledgebase!") - kb_id = kb.id - - except Exception as e: - return server_error_response(e) - - page_number = int(req.get("page", 1)) - items_per_page = int(req.get("page_size", 15)) - orderby = req.get("orderby", "create_time") - desc = req.get("desc", True) - keywords = req.get("keywords", "") - - try: - docs, tol = DocumentService.get_by_kb_id( - kb_id, page_number, items_per_page, orderby, desc, keywords) - docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs] - - return get_json_result(data={"total": tol, "docs": docs}) - - except Exception as e: - return server_error_response(e) - - -@manager.route('/document', methods=['DELETE']) -# @login_required -def document_rm(): - token = request.headers.get('Authorization').split()[1] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - - tenant_id = objs[0].tenant_id - req = request.json - doc_ids = [] - try: - doc_ids = [DocumentService.get_doc_id_by_doc_name(doc_name) for doc_name in req.get("doc_names", [])] - for doc_id in req.get("doc_ids", []): - if doc_id not in doc_ids: - doc_ids.append(doc_id) - - if not doc_ids: - return get_json_result( - data=False, retmsg="Can't find doc_names or doc_ids" - ) - - except Exception as e: - return server_error_response(e) - - root_folder = FileService.get_root_folder(tenant_id) - pf_id = root_folder["id"] - FileService.init_knowledgebase_docs(pf_id, tenant_id) - - errors = "" - for doc_id in doc_ids: - try: - e, doc = DocumentService.get_by_id(doc_id) - if not e: - return get_data_error_result(retmsg="Document not found!") - tenant_id = DocumentService.get_tenant_id(doc_id) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - - b, n = File2DocumentService.get_minio_address(doc_id=doc_id) - - if not DocumentService.remove_document(doc, tenant_id): - return get_data_error_result( - retmsg="Database error (Document removal)!") - - f2d = File2DocumentService.get_by_document_id(doc_id) - FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) - File2DocumentService.delete_by_document_id(doc_id) - - MINIO.rm(b, n) - except Exception as e: - errors += str(e) - - if errors: - return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR) - - return get_json_result(data=True) - - -@manager.route('/completion_aibotk', methods=['POST']) -@validate_request("Authorization", "conversation_id", "word") -def completion_faq(): - import base64 - req = request.json - - token = req["Authorization"] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - - e, conv = API4ConversationService.get_by_id(req["conversation_id"]) - if not e: - return get_data_error_result(retmsg="Conversation not found!") - if "quote" not in req: req["quote"] = True - - msg = [] - msg.append({"role": "user", "content": req["word"]}) - - try: - conv.message.append(msg[-1]) - e, dia = DialogService.get_by_id(conv.dialog_id) - if not e: - return get_data_error_result(retmsg="Dialog not found!") - del req["conversation_id"] - - if not conv.reference: - conv.reference = [] - conv.message.append({"role": "assistant", "content": ""}) - conv.reference.append({"chunks": [], "doc_aggs": []}) - - def fillin_conv(ans): - nonlocal conv - if not conv.reference: - conv.reference.append(ans["reference"]) - else: - conv.reference[-1] = ans["reference"] - conv.message[-1] = {"role": "assistant", "content": ans["answer"]} - - data_type_picture = { - "type": 3, - "url": "base64 content" - } - data = [ - { - "type": 1, - "content": "" - } - ] - ans = "" - for a in chat(dia, msg, stream=False, **req): - ans = a - break - data[0]["content"] += re.sub(r'##\d\$\$', '', ans["answer"]) - fillin_conv(ans) - API4ConversationService.append_message(conv.id, conv.to_dict()) - - chunk_idxs = [int(match[2]) for match in re.findall(r'##\d\$\$', ans["answer"])] - for chunk_idx in chunk_idxs[:1]: - if ans["reference"]["chunks"][chunk_idx]["img_id"]: - try: - bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-") - response = MINIO.get(bkt, nm) - data_type_picture["url"] = base64.b64encode(response).decode('utf-8') - data.append(data_type_picture) - break - except Exception as e: - return server_error_response(e) - - response = {"code": 200, "msg": "success", "data": data} - return response - - except Exception as e: - return server_error_response(e) - - -@manager.route('/retrieval', methods=['POST']) -@validate_request("kb_id", "question") -def retrieval(): - token = request.headers.get('Authorization').split()[1] - objs = APIToken.query(token=token) - if not objs: - return get_json_result( - data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) - - req = request.json - kb_ids = req.get("kb_id",[]) - doc_ids = req.get("doc_ids", []) - question = req.get("question") - page = int(req.get("page", 1)) - size = int(req.get("size", 30)) - similarity_threshold = float(req.get("similarity_threshold", 0.2)) - vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) - top = int(req.get("top_k", 1024)) - - try: - kbs = KnowledgebaseService.get_by_ids(kb_ids) - embd_nms = list(set([kb.embd_id for kb in kbs])) - if len(embd_nms) != 1: - return get_json_result( - data=False, retmsg='Knowledge bases use different embedding models or does not exist."', retcode=RetCode.AUTHENTICATION_ERROR) - - embd_mdl = TenantLLMService.model_instance( - kbs[0].tenant_id, LLMType.EMBEDDING.value, llm_name=kbs[0].embd_id) - rerank_mdl = None - if req.get("rerank_id"): - rerank_mdl = TenantLLMService.model_instance( - kbs[0].tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]) - if req.get("keyword", False): - chat_mdl = TenantLLMService.model_instance(kbs[0].tenant_id, LLMType.CHAT) - question += keyword_extraction(chat_mdl, question) - ranks = retrievaler.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size, - similarity_threshold, vector_similarity_weight, top, - doc_ids, rerank_mdl=rerank_mdl) - for c in ranks["chunks"]: - if "vector" in c: - del c["vector"] - return get_json_result(data=ranks) - except Exception as e: - if str(e).find("not_found") > 0: - return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!', - retcode=RetCode.DATA_ERROR) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import os +import re +from datetime import datetime, timedelta +from flask import request, Response +from api.db.services.llm_service import TenantLLMService +from flask_login import login_required, current_user + +from api.db import FileType, LLMType, ParserType, FileSource +from api.db.db_models import APIToken, API4Conversation, Task, File +from api.db.services import duplicate_name +from api.db.services.api_service import APITokenService, API4ConversationService +from api.db.services.dialog_service import DialogService, chat +from api.db.services.document_service import DocumentService +from api.db.services.file2document_service import File2DocumentService +from api.db.services.file_service import FileService +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db.services.task_service import queue_tasks, TaskService +from api.db.services.user_service import UserTenantService +from api.settings import RetCode, retrievaler +from api.utils import get_uuid, current_timestamp, datetime_format +from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request +from itsdangerous import URLSafeTimedSerializer + +from api.utils.file_utils import filename_type, thumbnail +from rag.nlp import keyword_extraction +from rag.utils.minio_conn import MINIO + +from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService +from agent.canvas import Canvas +from functools import partial + + +def generate_confirmation_token(tenent_id): + serializer = URLSafeTimedSerializer(tenent_id) + return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34] + + +@manager.route('/new_token', methods=['POST']) +@login_required +def new_token(): + req = request.json + try: + tenants = UserTenantService.query(user_id=current_user.id) + if not tenants: + return get_data_error_result(retmsg="Tenant not found!") + + tenant_id = tenants[0].tenant_id + obj = {"tenant_id": tenant_id, "token": generate_confirmation_token(tenant_id), + "create_time": current_timestamp(), + "create_date": datetime_format(datetime.now()), + "update_time": None, + "update_date": None + } + if req.get("canvas_id"): + obj["dialog_id"] = req["canvas_id"] + obj["source"] = "agent" + else: + obj["dialog_id"] = req["dialog_id"] + + if not APITokenService.save(**obj): + return get_data_error_result(retmsg="Fail to new a dialog!") + + return get_json_result(data=obj) + except Exception as e: + return server_error_response(e) + + +@manager.route('/token_list', methods=['GET']) +@login_required +def token_list(): + try: + tenants = UserTenantService.query(user_id=current_user.id) + if not tenants: + return get_data_error_result(retmsg="Tenant not found!") + + id = request.args["dialog_id"] if "dialog_id" in request.args else request.args["canvas_id"] + objs = APITokenService.query(tenant_id=tenants[0].tenant_id, dialog_id=id) + return get_json_result(data=[o.to_dict() for o in objs]) + except Exception as e: + return server_error_response(e) + + +@manager.route('/rm', methods=['POST']) +@validate_request("tokens", "tenant_id") +@login_required +def rm(): + req = request.json + try: + for token in req["tokens"]: + APITokenService.filter_delete( + [APIToken.tenant_id == req["tenant_id"], APIToken.token == token]) + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/stats', methods=['GET']) +@login_required +def stats(): + try: + tenants = UserTenantService.query(user_id=current_user.id) + if not tenants: + return get_data_error_result(retmsg="Tenant not found!") + objs = API4ConversationService.stats( + tenants[0].tenant_id, + request.args.get( + "from_date", + (datetime.now() - + timedelta( + days=7)).strftime("%Y-%m-%d 00:00:00")), + request.args.get( + "to_date", + datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + "agent" if "canvas_id" in request.args else None) + res = { + "pv": [(o["dt"], o["pv"]) for o in objs], + "uv": [(o["dt"], o["uv"]) for o in objs], + "speed": [(o["dt"], float(o["tokens"]) / (float(o["duration"] + 0.1))) for o in objs], + "tokens": [(o["dt"], float(o["tokens"]) / 1000.) for o in objs], + "round": [(o["dt"], o["round"]) for o in objs], + "thumb_up": [(o["dt"], o["thumb_up"]) for o in objs] + } + return get_json_result(data=res) + except Exception as e: + return server_error_response(e) + + +@manager.route('/new_conversation', methods=['GET']) +def set_conversation(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + req = request.json + try: + if objs[0].source == "agent": + e, c = UserCanvasService.get_by_id(objs[0].dialog_id) + if not e: + return server_error_response("canvas not found.") + conv = { + "id": get_uuid(), + "dialog_id": c.id, + "user_id": request.args.get("user_id", ""), + "message": [{"role": "assistant", "content": "Hi there!"}], + "source": "agent" + } + API4ConversationService.save(**conv) + return get_json_result(data=conv) + else: + e, dia = DialogService.get_by_id(objs[0].dialog_id) + if not e: + return get_data_error_result(retmsg="Dialog not found") + conv = { + "id": get_uuid(), + "dialog_id": dia.id, + "user_id": request.args.get("user_id", ""), + "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}] + } + API4ConversationService.save(**conv) + return get_json_result(data=conv) + except Exception as e: + return server_error_response(e) + + +@manager.route('/completion', methods=['POST']) +@validate_request("conversation_id", "messages") +def completion(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + req = request.json + e, conv = API4ConversationService.get_by_id(req["conversation_id"]) + if not e: + return get_data_error_result(retmsg="Conversation not found!") + if "quote" not in req: req["quote"] = False + + msg = [] + for m in req["messages"]: + if m["role"] == "system": + continue + if m["role"] == "assistant" and not msg: + continue + msg.append({"role": m["role"], "content": m["content"]}) + + def fillin_conv(ans): + nonlocal conv + if not conv.reference: + conv.reference.append(ans["reference"]) + else: + conv.reference[-1] = ans["reference"] + conv.message[-1] = {"role": "assistant", "content": ans["answer"]} + + def rename_field(ans): + reference = ans['reference'] + if not isinstance(reference, dict): + return + for chunk_i in reference.get('chunks', []): + if 'docnm_kwd' in chunk_i: + chunk_i['doc_name'] = chunk_i['docnm_kwd'] + chunk_i.pop('docnm_kwd') + + try: + if conv.source == "agent": + stream = req.get("stream", True) + conv.message.append(msg[-1]) + e, cvs = UserCanvasService.get_by_id(conv.dialog_id) + if not e: + return server_error_response("canvas not found.") + del req["conversation_id"] + del req["messages"] + + if not isinstance(cvs.dsl, str): + cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False) + + if not conv.reference: + conv.reference = [] + conv.message.append({"role": "assistant", "content": ""}) + conv.reference.append({"chunks": [], "doc_aggs": []}) + + final_ans = {"reference": [], "content": ""} + canvas = Canvas(cvs.dsl, objs[0].tenant_id) + + canvas.messages.append(msg[-1]) + canvas.add_user_input(msg[-1]["content"]) + answer = canvas.run(stream=stream) + + assert answer is not None, "Nothing. Is it over?" + + if stream: + assert isinstance(answer, partial), "Nothing. Is it over?" + + def sse(): + nonlocal answer, cvs, conv + try: + for ans in answer(): + for k in ans.keys(): + final_ans[k] = ans[k] + ans = {"answer": ans["content"], "reference": ans.get("reference", [])} + fillin_conv(ans) + rename_field(ans) + yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": ans}, + ensure_ascii=False) + "\n\n" + + canvas.messages.append({"role": "assistant", "content": final_ans["content"]}) + if final_ans.get("reference"): + canvas.reference.append(final_ans["reference"]) + cvs.dsl = json.loads(str(canvas)) + API4ConversationService.append_message(conv.id, conv.to_dict()) + except Exception as e: + yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), + "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, + ensure_ascii=False) + "\n\n" + yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n" + + resp = Response(sse(), mimetype="text/event-stream") + resp.headers.add_header("Cache-control", "no-cache") + resp.headers.add_header("Connection", "keep-alive") + resp.headers.add_header("X-Accel-Buffering", "no") + resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") + return resp + + final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else "" + canvas.messages.append({"role": "assistant", "content": final_ans["content"]}) + if final_ans.get("reference"): + canvas.reference.append(final_ans["reference"]) + cvs.dsl = json.loads(str(canvas)) + + result = {"answer": final_ans["content"], "reference": final_ans.get("reference", [])} + fillin_conv(result) + API4ConversationService.append_message(conv.id, conv.to_dict()) + rename_field(result) + return get_json_result(data=result) + + #******************For dialog****************** + conv.message.append(msg[-1]) + e, dia = DialogService.get_by_id(conv.dialog_id) + if not e: + return get_data_error_result(retmsg="Dialog not found!") + del req["conversation_id"] + del req["messages"] + + if not conv.reference: + conv.reference = [] + conv.message.append({"role": "assistant", "content": ""}) + conv.reference.append({"chunks": [], "doc_aggs": []}) + + def stream(): + nonlocal dia, msg, req, conv + try: + for ans in chat(dia, msg, True, **req): + fillin_conv(ans) + rename_field(ans) + yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": ans}, + ensure_ascii=False) + "\n\n" + API4ConversationService.append_message(conv.id, conv.to_dict()) + except Exception as e: + yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), + "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, + ensure_ascii=False) + "\n\n" + yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n" + + if req.get("stream", True): + resp = Response(stream(), mimetype="text/event-stream") + resp.headers.add_header("Cache-control", "no-cache") + resp.headers.add_header("Connection", "keep-alive") + resp.headers.add_header("X-Accel-Buffering", "no") + resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") + return resp + + answer = None + for ans in chat(dia, msg, **req): + answer = ans + fillin_conv(ans) + API4ConversationService.append_message(conv.id, conv.to_dict()) + break + rename_field(answer) + return get_json_result(data=answer) + + except Exception as e: + return server_error_response(e) + + +@manager.route('/conversation/', methods=['GET']) +# @login_required +def get(conversation_id): + try: + e, conv = API4ConversationService.get_by_id(conversation_id) + if not e: + return get_data_error_result(retmsg="Conversation not found!") + + conv = conv.to_dict() + for referenct_i in conv['reference']: + if referenct_i is None or len(referenct_i) == 0: + continue + for chunk_i in referenct_i['chunks']: + if 'docnm_kwd' in chunk_i.keys(): + chunk_i['doc_name'] = chunk_i['docnm_kwd'] + chunk_i.pop('docnm_kwd') + return get_json_result(data=conv) + except Exception as e: + return server_error_response(e) + + +@manager.route('/document/upload', methods=['POST']) +@validate_request("kb_name") +def upload(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + + kb_name = request.form.get("kb_name").strip() + tenant_id = objs[0].tenant_id + + try: + e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id) + if not e: + return get_data_error_result( + retmsg="Can't find this knowledgebase!") + kb_id = kb.id + except Exception as e: + return server_error_response(e) + + if 'file' not in request.files: + return get_json_result( + data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) + + file = request.files['file'] + if file.filename == '': + return get_json_result( + data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) + + root_folder = FileService.get_root_folder(tenant_id) + pf_id = root_folder["id"] + FileService.init_knowledgebase_docs(pf_id, tenant_id) + kb_root_folder = FileService.get_kb_folder(tenant_id) + kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"]) + + try: + if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)): + return get_data_error_result( + retmsg="Exceed the maximum file number of a free user!") + + filename = duplicate_name( + DocumentService.query, + name=file.filename, + kb_id=kb_id) + filetype = filename_type(filename) + if not filetype: + return get_data_error_result( + retmsg="This type of file has not been supported yet!") + + location = filename + while MINIO.obj_exist(kb_id, location): + location += "_" + blob = request.files['file'].read() + MINIO.put(kb_id, location, blob) + doc = { + "id": get_uuid(), + "kb_id": kb.id, + "parser_id": kb.parser_id, + "parser_config": kb.parser_config, + "created_by": kb.tenant_id, + "type": filetype, + "name": filename, + "location": location, + "size": len(blob), + "thumbnail": thumbnail(filename, blob) + } + + form_data = request.form + if "parser_id" in form_data.keys(): + if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]: + doc["parser_id"] = request.form.get("parser_id").strip() + if doc["type"] == FileType.VISUAL: + doc["parser_id"] = ParserType.PICTURE.value + if doc["type"] == FileType.AURAL: + doc["parser_id"] = ParserType.AUDIO.value + if re.search(r"\.(ppt|pptx|pages)$", filename): + doc["parser_id"] = ParserType.PRESENTATION.value + + doc_result = DocumentService.insert(doc) + FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id) + except Exception as e: + return server_error_response(e) + + if "run" in form_data.keys(): + if request.form.get("run").strip() == "1": + try: + info = {"run": 1, "progress": 0} + info["progress_msg"] = "" + info["chunk_num"] = 0 + info["token_num"] = 0 + DocumentService.update_by_id(doc["id"], info) + # if str(req["run"]) == TaskStatus.CANCEL.value: + tenant_id = DocumentService.get_tenant_id(doc["id"]) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + + # e, doc = DocumentService.get_by_id(doc["id"]) + TaskService.filter_delete([Task.doc_id == doc["id"]]) + e, doc = DocumentService.get_by_id(doc["id"]) + doc = doc.to_dict() + doc["tenant_id"] = tenant_id + bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"]) + queue_tasks(doc, bucket, name) + except Exception as e: + return server_error_response(e) + + return get_json_result(data=doc_result.to_json()) + + +@manager.route('/list_chunks', methods=['POST']) +# @login_required +def list_chunks(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + + req = request.json + + try: + if "doc_name" in req.keys(): + tenant_id = DocumentService.get_tenant_id_by_name(req['doc_name']) + doc_id = DocumentService.get_doc_id_by_doc_name(req['doc_name']) + + elif "doc_id" in req.keys(): + tenant_id = DocumentService.get_tenant_id(req['doc_id']) + doc_id = req['doc_id'] + else: + return get_json_result( + data=False, retmsg="Can't find doc_name or doc_id" + ) + + res = retrievaler.chunk_list(doc_id=doc_id, tenant_id=tenant_id) + res = [ + { + "content": res_item["content_with_weight"], + "doc_name": res_item["docnm_kwd"], + "img_id": res_item["img_id"] + } for res_item in res + ] + + except Exception as e: + return server_error_response(e) + + return get_json_result(data=res) + + +@manager.route('/list_kb_docs', methods=['POST']) +# @login_required +def list_kb_docs(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + + req = request.json + tenant_id = objs[0].tenant_id + kb_name = req.get("kb_name", "").strip() + + try: + e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id) + if not e: + return get_data_error_result( + retmsg="Can't find this knowledgebase!") + kb_id = kb.id + + except Exception as e: + return server_error_response(e) + + page_number = int(req.get("page", 1)) + items_per_page = int(req.get("page_size", 15)) + orderby = req.get("orderby", "create_time") + desc = req.get("desc", True) + keywords = req.get("keywords", "") + + try: + docs, tol = DocumentService.get_by_kb_id( + kb_id, page_number, items_per_page, orderby, desc, keywords) + docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs] + + return get_json_result(data={"total": tol, "docs": docs}) + + except Exception as e: + return server_error_response(e) + + +@manager.route('/document', methods=['DELETE']) +# @login_required +def document_rm(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + + tenant_id = objs[0].tenant_id + req = request.json + doc_ids = [] + try: + doc_ids = [DocumentService.get_doc_id_by_doc_name(doc_name) for doc_name in req.get("doc_names", [])] + for doc_id in req.get("doc_ids", []): + if doc_id not in doc_ids: + doc_ids.append(doc_id) + + if not doc_ids: + return get_json_result( + data=False, retmsg="Can't find doc_names or doc_ids" + ) + + except Exception as e: + return server_error_response(e) + + root_folder = FileService.get_root_folder(tenant_id) + pf_id = root_folder["id"] + FileService.init_knowledgebase_docs(pf_id, tenant_id) + + errors = "" + for doc_id in doc_ids: + try: + e, doc = DocumentService.get_by_id(doc_id) + if not e: + return get_data_error_result(retmsg="Document not found!") + tenant_id = DocumentService.get_tenant_id(doc_id) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + + b, n = File2DocumentService.get_minio_address(doc_id=doc_id) + + if not DocumentService.remove_document(doc, tenant_id): + return get_data_error_result( + retmsg="Database error (Document removal)!") + + f2d = File2DocumentService.get_by_document_id(doc_id) + FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) + File2DocumentService.delete_by_document_id(doc_id) + + MINIO.rm(b, n) + except Exception as e: + errors += str(e) + + if errors: + return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR) + + return get_json_result(data=True) + + +@manager.route('/completion_aibotk', methods=['POST']) +@validate_request("Authorization", "conversation_id", "word") +def completion_faq(): + import base64 + req = request.json + + token = req["Authorization"] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + + e, conv = API4ConversationService.get_by_id(req["conversation_id"]) + if not e: + return get_data_error_result(retmsg="Conversation not found!") + if "quote" not in req: req["quote"] = True + + msg = [] + msg.append({"role": "user", "content": req["word"]}) + + try: + conv.message.append(msg[-1]) + e, dia = DialogService.get_by_id(conv.dialog_id) + if not e: + return get_data_error_result(retmsg="Dialog not found!") + del req["conversation_id"] + + if not conv.reference: + conv.reference = [] + conv.message.append({"role": "assistant", "content": ""}) + conv.reference.append({"chunks": [], "doc_aggs": []}) + + def fillin_conv(ans): + nonlocal conv + if not conv.reference: + conv.reference.append(ans["reference"]) + else: + conv.reference[-1] = ans["reference"] + conv.message[-1] = {"role": "assistant", "content": ans["answer"]} + + data_type_picture = { + "type": 3, + "url": "base64 content" + } + data = [ + { + "type": 1, + "content": "" + } + ] + ans = "" + for a in chat(dia, msg, stream=False, **req): + ans = a + break + data[0]["content"] += re.sub(r'##\d\$\$', '', ans["answer"]) + fillin_conv(ans) + API4ConversationService.append_message(conv.id, conv.to_dict()) + + chunk_idxs = [int(match[2]) for match in re.findall(r'##\d\$\$', ans["answer"])] + for chunk_idx in chunk_idxs[:1]: + if ans["reference"]["chunks"][chunk_idx]["img_id"]: + try: + bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-") + response = MINIO.get(bkt, nm) + data_type_picture["url"] = base64.b64encode(response).decode('utf-8') + data.append(data_type_picture) + break + except Exception as e: + return server_error_response(e) + + response = {"code": 200, "msg": "success", "data": data} + return response + + except Exception as e: + return server_error_response(e) + + +@manager.route('/retrieval', methods=['POST']) +@validate_request("kb_id", "question") +def retrieval(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + + req = request.json + kb_ids = req.get("kb_id",[]) + doc_ids = req.get("doc_ids", []) + question = req.get("question") + page = int(req.get("page", 1)) + size = int(req.get("size", 30)) + similarity_threshold = float(req.get("similarity_threshold", 0.2)) + vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) + top = int(req.get("top_k", 1024)) + + try: + kbs = KnowledgebaseService.get_by_ids(kb_ids) + embd_nms = list(set([kb.embd_id for kb in kbs])) + if len(embd_nms) != 1: + return get_json_result( + data=False, retmsg='Knowledge bases use different embedding models or does not exist."', retcode=RetCode.AUTHENTICATION_ERROR) + + embd_mdl = TenantLLMService.model_instance( + kbs[0].tenant_id, LLMType.EMBEDDING.value, llm_name=kbs[0].embd_id) + rerank_mdl = None + if req.get("rerank_id"): + rerank_mdl = TenantLLMService.model_instance( + kbs[0].tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]) + if req.get("keyword", False): + chat_mdl = TenantLLMService.model_instance(kbs[0].tenant_id, LLMType.CHAT) + question += keyword_extraction(chat_mdl, question) + ranks = retrievaler.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size, + similarity_threshold, vector_similarity_weight, top, + doc_ids, rerank_mdl=rerank_mdl) + for c in ranks["chunks"]: + if "vector" in c: + del c["vector"] + return get_json_result(data=ranks) + except Exception as e: + if str(e).find("not_found") > 0: + return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!', + retcode=RetCode.DATA_ERROR) return server_error_response(e) \ No newline at end of file diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index f65c53b396c9613ad82d36c7d62c1696091fe269..f4e1b67c25348c3d663e2b7d33d3600e42f46e37 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -1,318 +1,318 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import datetime -import json -import traceback - -from flask import request -from flask_login import login_required, current_user -from elasticsearch_dsl import Q - -from rag.app.qa import rmPrefix, beAdoc -from rag.nlp import search, rag_tokenizer, keyword_extraction -from rag.utils.es_conn import ELASTICSEARCH -from rag.utils import rmSpace -from api.db import LLMType, ParserType -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db.services.llm_service import TenantLLMService -from api.db.services.user_service import UserTenantService -from api.utils.api_utils import server_error_response, get_data_error_result, validate_request -from api.db.services.document_service import DocumentService -from api.settings import RetCode, retrievaler, kg_retrievaler -from api.utils.api_utils import get_json_result -import hashlib -import re - - -@manager.route('/list', methods=['POST']) -@login_required -@validate_request("doc_id") -def list_chunk(): - req = request.json - doc_id = req["doc_id"] - page = int(req.get("page", 1)) - size = int(req.get("size", 30)) - question = req.get("keywords", "") - try: - tenant_id = DocumentService.get_tenant_id(req["doc_id"]) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - e, doc = DocumentService.get_by_id(doc_id) - if not e: - return get_data_error_result(retmsg="Document not found!") - query = { - "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True - } - if "available_int" in req: - query["available_int"] = int(req["available_int"]) - sres = retrievaler.search(query, search.index_name(tenant_id)) - res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()} - for id in sres.ids: - d = { - "chunk_id": id, - "content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[ - id].get( - "content_with_weight", ""), - "doc_id": sres.field[id]["doc_id"], - "docnm_kwd": sres.field[id]["docnm_kwd"], - "important_kwd": sres.field[id].get("important_kwd", []), - "img_id": sres.field[id].get("img_id", ""), - "available_int": sres.field[id].get("available_int", 1), - "positions": sres.field[id].get("position_int", "").split("\t") - } - if len(d["positions"]) % 5 == 0: - poss = [] - for i in range(0, len(d["positions"]), 5): - poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]), - float(d["positions"][i + 3]), float(d["positions"][i + 4])]) - d["positions"] = poss - res["chunks"].append(d) - return get_json_result(data=res) - except Exception as e: - if str(e).find("not_found") > 0: - return get_json_result(data=False, retmsg=f'No chunk found!', - retcode=RetCode.DATA_ERROR) - return server_error_response(e) - - -@manager.route('/get', methods=['GET']) -@login_required -def get(): - chunk_id = request.args["chunk_id"] - try: - tenants = UserTenantService.query(user_id=current_user.id) - if not tenants: - return get_data_error_result(retmsg="Tenant not found!") - res = ELASTICSEARCH.get( - chunk_id, search.index_name( - tenants[0].tenant_id)) - if not res.get("found"): - return server_error_response("Chunk not found") - id = res["_id"] - res = res["_source"] - res["chunk_id"] = id - k = [] - for n in res.keys(): - if re.search(r"(_vec$|_sm_|_tks|_ltks)", n): - k.append(n) - for n in k: - del res[n] - - return get_json_result(data=res) - except Exception as e: - if str(e).find("NotFoundError") >= 0: - return get_json_result(data=False, retmsg=f'Chunk not found!', - retcode=RetCode.DATA_ERROR) - return server_error_response(e) - - -@manager.route('/set', methods=['POST']) -@login_required -@validate_request("doc_id", "chunk_id", "content_with_weight", - "important_kwd") -def set(): - req = request.json - d = { - "id": req["chunk_id"], - "content_with_weight": req["content_with_weight"]} - d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"]) - d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - d["important_kwd"] = req["important_kwd"] - d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_kwd"])) - if "available_int" in req: - d["available_int"] = req["available_int"] - - try: - tenant_id = DocumentService.get_tenant_id(req["doc_id"]) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - - embd_id = DocumentService.get_embd_id(req["doc_id"]) - embd_mdl = TenantLLMService.model_instance( - tenant_id, LLMType.EMBEDDING.value, embd_id) - - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(retmsg="Document not found!") - - if doc.parser_id == ParserType.QA: - arr = [ - t for t in re.split( - r"[\n\t]", - req["content_with_weight"]) if len(t) > 1] - if len(arr) != 2: - return get_data_error_result( - retmsg="Q&A must be separated by TAB/ENTER key.") - q, a = rmPrefix(arr[0]), rmPrefix(arr[1]) - d = beAdoc(d, arr[0], arr[1], not any( - [rag_tokenizer.is_chinese(t) for t in q + a])) - - v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) - v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1] - d["q_%d_vec" % len(v)] = v.tolist() - ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/switch', methods=['POST']) -@login_required -@validate_request("chunk_ids", "available_int", "doc_id") -def switch(): - req = request.json - try: - tenant_id = DocumentService.get_tenant_id(req["doc_id"]) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]], - search.index_name(tenant_id)): - return get_data_error_result(retmsg="Index updating failure") - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/rm', methods=['POST']) -@login_required -@validate_request("chunk_ids", "doc_id") -def rm(): - req = request.json - try: - if not ELASTICSEARCH.deleteByQuery( - Q("ids", values=req["chunk_ids"]), search.index_name(current_user.id)): - return get_data_error_result(retmsg="Index updating failure") - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(retmsg="Document not found!") - deleted_chunk_ids = req["chunk_ids"] - chunk_number = len(deleted_chunk_ids) - DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/create', methods=['POST']) -@login_required -@validate_request("doc_id", "content_with_weight") -def create(): - req = request.json - md5 = hashlib.md5() - md5.update((req["content_with_weight"] + req["doc_id"]).encode("utf-8")) - chunck_id = md5.hexdigest() - d = {"id": chunck_id, "content_ltks": rag_tokenizer.tokenize(req["content_with_weight"]), - "content_with_weight": req["content_with_weight"]} - d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - d["important_kwd"] = req.get("important_kwd", []) - d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", []))) - d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19] - d["create_timestamp_flt"] = datetime.datetime.now().timestamp() - - try: - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(retmsg="Document not found!") - d["kb_id"] = [doc.kb_id] - d["docnm_kwd"] = doc.name - d["doc_id"] = doc.id - - tenant_id = DocumentService.get_tenant_id(req["doc_id"]) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - - embd_id = DocumentService.get_embd_id(req["doc_id"]) - embd_mdl = TenantLLMService.model_instance( - tenant_id, LLMType.EMBEDDING.value, embd_id) - - v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) - v = 0.1 * v[0] + 0.9 * v[1] - d["q_%d_vec" % len(v)] = v.tolist() - ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) - - DocumentService.increment_chunk_num( - doc.id, doc.kb_id, c, 1, 0) - return get_json_result(data={"chunk_id": chunck_id}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/retrieval_test', methods=['POST']) -@login_required -@validate_request("kb_id", "question") -def retrieval_test(): - req = request.json - page = int(req.get("page", 1)) - size = int(req.get("size", 30)) - question = req["question"] - kb_id = req["kb_id"] - doc_ids = req.get("doc_ids", []) - similarity_threshold = float(req.get("similarity_threshold", 0.2)) - vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) - top = int(req.get("top_k", 1024)) - try: - e, kb = KnowledgebaseService.get_by_id(kb_id) - if not e: - return get_data_error_result(retmsg="Knowledgebase not found!") - - embd_mdl = TenantLLMService.model_instance( - kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id) - - rerank_mdl = None - if req.get("rerank_id"): - rerank_mdl = TenantLLMService.model_instance( - kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]) - - if req.get("keyword", False): - chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT) - question += keyword_extraction(chat_mdl, question) - - retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler - ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size, - similarity_threshold, vector_similarity_weight, top, - doc_ids, rerank_mdl=rerank_mdl) - for c in ranks["chunks"]: - if "vector" in c: - del c["vector"] - - return get_json_result(data=ranks) - except Exception as e: - if str(e).find("not_found") > 0: - return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!', - retcode=RetCode.DATA_ERROR) - return server_error_response(e) - - -@manager.route('/knowledge_graph', methods=['GET']) -@login_required -def knowledge_graph(): - doc_id = request.args["doc_id"] - req = { - "doc_ids":[doc_id], - "knowledge_graph_kwd": ["graph", "mind_map"] - } - tenant_id = DocumentService.get_tenant_id(doc_id) - sres = retrievaler.search(req, search.index_name(tenant_id)) - obj = {"graph": {}, "mind_map": {}} - for id in sres.ids[:2]: - ty = sres.field[id]["knowledge_graph_kwd"] - try: - obj[ty] = json.loads(sres.field[id]["content_with_weight"]) - except Exception as e: - print(traceback.format_exc(), flush=True) - - return get_json_result(data=obj) - +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import datetime +import json +import traceback + +from flask import request +from flask_login import login_required, current_user +from elasticsearch_dsl import Q + +from rag.app.qa import rmPrefix, beAdoc +from rag.nlp import search, rag_tokenizer, keyword_extraction +from rag.utils.es_conn import ELASTICSEARCH +from rag.utils import rmSpace +from api.db import LLMType, ParserType +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db.services.llm_service import TenantLLMService +from api.db.services.user_service import UserTenantService +from api.utils.api_utils import server_error_response, get_data_error_result, validate_request +from api.db.services.document_service import DocumentService +from api.settings import RetCode, retrievaler, kg_retrievaler +from api.utils.api_utils import get_json_result +import hashlib +import re + + +@manager.route('/list', methods=['POST']) +@login_required +@validate_request("doc_id") +def list_chunk(): + req = request.json + doc_id = req["doc_id"] + page = int(req.get("page", 1)) + size = int(req.get("size", 30)) + question = req.get("keywords", "") + try: + tenant_id = DocumentService.get_tenant_id(req["doc_id"]) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + e, doc = DocumentService.get_by_id(doc_id) + if not e: + return get_data_error_result(retmsg="Document not found!") + query = { + "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True + } + if "available_int" in req: + query["available_int"] = int(req["available_int"]) + sres = retrievaler.search(query, search.index_name(tenant_id)) + res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()} + for id in sres.ids: + d = { + "chunk_id": id, + "content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[ + id].get( + "content_with_weight", ""), + "doc_id": sres.field[id]["doc_id"], + "docnm_kwd": sres.field[id]["docnm_kwd"], + "important_kwd": sres.field[id].get("important_kwd", []), + "img_id": sres.field[id].get("img_id", ""), + "available_int": sres.field[id].get("available_int", 1), + "positions": sres.field[id].get("position_int", "").split("\t") + } + if len(d["positions"]) % 5 == 0: + poss = [] + for i in range(0, len(d["positions"]), 5): + poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]), + float(d["positions"][i + 3]), float(d["positions"][i + 4])]) + d["positions"] = poss + res["chunks"].append(d) + return get_json_result(data=res) + except Exception as e: + if str(e).find("not_found") > 0: + return get_json_result(data=False, retmsg=f'No chunk found!', + retcode=RetCode.DATA_ERROR) + return server_error_response(e) + + +@manager.route('/get', methods=['GET']) +@login_required +def get(): + chunk_id = request.args["chunk_id"] + try: + tenants = UserTenantService.query(user_id=current_user.id) + if not tenants: + return get_data_error_result(retmsg="Tenant not found!") + res = ELASTICSEARCH.get( + chunk_id, search.index_name( + tenants[0].tenant_id)) + if not res.get("found"): + return server_error_response("Chunk not found") + id = res["_id"] + res = res["_source"] + res["chunk_id"] = id + k = [] + for n in res.keys(): + if re.search(r"(_vec$|_sm_|_tks|_ltks)", n): + k.append(n) + for n in k: + del res[n] + + return get_json_result(data=res) + except Exception as e: + if str(e).find("NotFoundError") >= 0: + return get_json_result(data=False, retmsg=f'Chunk not found!', + retcode=RetCode.DATA_ERROR) + return server_error_response(e) + + +@manager.route('/set', methods=['POST']) +@login_required +@validate_request("doc_id", "chunk_id", "content_with_weight", + "important_kwd") +def set(): + req = request.json + d = { + "id": req["chunk_id"], + "content_with_weight": req["content_with_weight"]} + d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"]) + d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) + d["important_kwd"] = req["important_kwd"] + d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_kwd"])) + if "available_int" in req: + d["available_int"] = req["available_int"] + + try: + tenant_id = DocumentService.get_tenant_id(req["doc_id"]) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + + embd_id = DocumentService.get_embd_id(req["doc_id"]) + embd_mdl = TenantLLMService.model_instance( + tenant_id, LLMType.EMBEDDING.value, embd_id) + + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + + if doc.parser_id == ParserType.QA: + arr = [ + t for t in re.split( + r"[\n\t]", + req["content_with_weight"]) if len(t) > 1] + if len(arr) != 2: + return get_data_error_result( + retmsg="Q&A must be separated by TAB/ENTER key.") + q, a = rmPrefix(arr[0]), rmPrefix(arr[1]) + d = beAdoc(d, arr[0], arr[1], not any( + [rag_tokenizer.is_chinese(t) for t in q + a])) + + v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) + v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1] + d["q_%d_vec" % len(v)] = v.tolist() + ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/switch', methods=['POST']) +@login_required +@validate_request("chunk_ids", "available_int", "doc_id") +def switch(): + req = request.json + try: + tenant_id = DocumentService.get_tenant_id(req["doc_id"]) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]], + search.index_name(tenant_id)): + return get_data_error_result(retmsg="Index updating failure") + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/rm', methods=['POST']) +@login_required +@validate_request("chunk_ids", "doc_id") +def rm(): + req = request.json + try: + if not ELASTICSEARCH.deleteByQuery( + Q("ids", values=req["chunk_ids"]), search.index_name(current_user.id)): + return get_data_error_result(retmsg="Index updating failure") + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + deleted_chunk_ids = req["chunk_ids"] + chunk_number = len(deleted_chunk_ids) + DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0) + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/create', methods=['POST']) +@login_required +@validate_request("doc_id", "content_with_weight") +def create(): + req = request.json + md5 = hashlib.md5() + md5.update((req["content_with_weight"] + req["doc_id"]).encode("utf-8")) + chunck_id = md5.hexdigest() + d = {"id": chunck_id, "content_ltks": rag_tokenizer.tokenize(req["content_with_weight"]), + "content_with_weight": req["content_with_weight"]} + d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) + d["important_kwd"] = req.get("important_kwd", []) + d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", []))) + d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19] + d["create_timestamp_flt"] = datetime.datetime.now().timestamp() + + try: + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + d["kb_id"] = [doc.kb_id] + d["docnm_kwd"] = doc.name + d["doc_id"] = doc.id + + tenant_id = DocumentService.get_tenant_id(req["doc_id"]) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + + embd_id = DocumentService.get_embd_id(req["doc_id"]) + embd_mdl = TenantLLMService.model_instance( + tenant_id, LLMType.EMBEDDING.value, embd_id) + + v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) + v = 0.1 * v[0] + 0.9 * v[1] + d["q_%d_vec" % len(v)] = v.tolist() + ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) + + DocumentService.increment_chunk_num( + doc.id, doc.kb_id, c, 1, 0) + return get_json_result(data={"chunk_id": chunck_id}) + except Exception as e: + return server_error_response(e) + + +@manager.route('/retrieval_test', methods=['POST']) +@login_required +@validate_request("kb_id", "question") +def retrieval_test(): + req = request.json + page = int(req.get("page", 1)) + size = int(req.get("size", 30)) + question = req["question"] + kb_id = req["kb_id"] + doc_ids = req.get("doc_ids", []) + similarity_threshold = float(req.get("similarity_threshold", 0.2)) + vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) + top = int(req.get("top_k", 1024)) + try: + e, kb = KnowledgebaseService.get_by_id(kb_id) + if not e: + return get_data_error_result(retmsg="Knowledgebase not found!") + + embd_mdl = TenantLLMService.model_instance( + kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id) + + rerank_mdl = None + if req.get("rerank_id"): + rerank_mdl = TenantLLMService.model_instance( + kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"]) + + if req.get("keyword", False): + chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT) + question += keyword_extraction(chat_mdl, question) + + retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler + ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size, + similarity_threshold, vector_similarity_weight, top, + doc_ids, rerank_mdl=rerank_mdl) + for c in ranks["chunks"]: + if "vector" in c: + del c["vector"] + + return get_json_result(data=ranks) + except Exception as e: + if str(e).find("not_found") > 0: + return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!', + retcode=RetCode.DATA_ERROR) + return server_error_response(e) + + +@manager.route('/knowledge_graph', methods=['GET']) +@login_required +def knowledge_graph(): + doc_id = request.args["doc_id"] + req = { + "doc_ids":[doc_id], + "knowledge_graph_kwd": ["graph", "mind_map"] + } + tenant_id = DocumentService.get_tenant_id(doc_id) + sres = retrievaler.search(req, search.index_name(tenant_id)) + obj = {"graph": {}, "mind_map": {}} + for id in sres.ids[:2]: + ty = sres.field[id]["knowledge_graph_kwd"] + try: + obj[ty] = json.loads(sres.field[id]["content_with_weight"]) + except Exception as e: + print(traceback.format_exc(), flush=True) + + return get_json_result(data=obj) + diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index b3088f76ce8c9f5c4644b523f665e0ccf0742e1e..9d6d623914f7cca1486ccc66fa1a19abd9176953 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -1,177 +1,177 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from copy import deepcopy -from flask import request, Response -from flask_login import login_required -from api.db.services.dialog_service import DialogService, ConversationService, chat -from api.utils.api_utils import server_error_response, get_data_error_result, validate_request -from api.utils import get_uuid -from api.utils.api_utils import get_json_result -import json - - -@manager.route('/set', methods=['POST']) -@login_required -def set_conversation(): - req = request.json - conv_id = req.get("conversation_id") - if conv_id: - del req["conversation_id"] - try: - if not ConversationService.update_by_id(conv_id, req): - return get_data_error_result(retmsg="Conversation not found!") - e, conv = ConversationService.get_by_id(conv_id) - if not e: - return get_data_error_result( - retmsg="Fail to update a conversation!") - conv = conv.to_dict() - return get_json_result(data=conv) - except Exception as e: - return server_error_response(e) - - try: - e, dia = DialogService.get_by_id(req["dialog_id"]) - if not e: - return get_data_error_result(retmsg="Dialog not found") - conv = { - "id": get_uuid(), - "dialog_id": req["dialog_id"], - "name": req.get("name", "New conversation"), - "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}] - } - ConversationService.save(**conv) - e, conv = ConversationService.get_by_id(conv["id"]) - if not e: - return get_data_error_result(retmsg="Fail to new a conversation!") - conv = conv.to_dict() - return get_json_result(data=conv) - except Exception as e: - return server_error_response(e) - - -@manager.route('/get', methods=['GET']) -@login_required -def get(): - conv_id = request.args["conversation_id"] - try: - e, conv = ConversationService.get_by_id(conv_id) - if not e: - return get_data_error_result(retmsg="Conversation not found!") - conv = conv.to_dict() - return get_json_result(data=conv) - except Exception as e: - return server_error_response(e) - - -@manager.route('/rm', methods=['POST']) -@login_required -def rm(): - conv_ids = request.json["conversation_ids"] - try: - for cid in conv_ids: - ConversationService.delete_by_id(cid) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/list', methods=['GET']) -@login_required -def list_convsersation(): - dialog_id = request.args["dialog_id"] - try: - convs = ConversationService.query( - dialog_id=dialog_id, - order_by=ConversationService.model.create_time, - reverse=True) - convs = [d.to_dict() for d in convs] - return get_json_result(data=convs) - except Exception as e: - return server_error_response(e) - - -@manager.route('/completion', methods=['POST']) -@login_required -#@validate_request("conversation_id", "messages") -def completion(): - req = request.json - #req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [ - # {"role": "user", "content": "上海有吗?"} - #]} - msg = [] - for m in req["messages"]: - if m["role"] == "system": - continue - if m["role"] == "assistant" and not msg: - continue - msg.append({"role": m["role"], "content": m["content"]}) - if "doc_ids" in m: - msg[-1]["doc_ids"] = m["doc_ids"] - try: - e, conv = ConversationService.get_by_id(req["conversation_id"]) - if not e: - return get_data_error_result(retmsg="Conversation not found!") - conv.message.append(deepcopy(msg[-1])) - e, dia = DialogService.get_by_id(conv.dialog_id) - if not e: - return get_data_error_result(retmsg="Dialog not found!") - del req["conversation_id"] - del req["messages"] - - if not conv.reference: - conv.reference = [] - conv.message.append({"role": "assistant", "content": ""}) - conv.reference.append({"chunks": [], "doc_aggs": []}) - - def fillin_conv(ans): - nonlocal conv - if not conv.reference: - conv.reference.append(ans["reference"]) - else: conv.reference[-1] = ans["reference"] - conv.message[-1] = {"role": "assistant", "content": ans["answer"]} - - def stream(): - nonlocal dia, msg, req, conv - try: - for ans in chat(dia, msg, True, **req): - fillin_conv(ans) - yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": ans}, ensure_ascii=False) + "\n\n" - ConversationService.update_by_id(conv.id, conv.to_dict()) - except Exception as e: - yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), - "data": {"answer": "**ERROR**: "+str(e), "reference": []}}, - ensure_ascii=False) + "\n\n" - yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n" - - if req.get("stream", True): - resp = Response(stream(), mimetype="text/event-stream") - resp.headers.add_header("Cache-control", "no-cache") - resp.headers.add_header("Connection", "keep-alive") - resp.headers.add_header("X-Accel-Buffering", "no") - resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") - return resp - - else: - answer = None - for ans in chat(dia, msg, **req): - answer = ans - fillin_conv(ans) - ConversationService.update_by_id(conv.id, conv.to_dict()) - break - return get_json_result(data=answer) - except Exception as e: - return server_error_response(e) - +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from copy import deepcopy +from flask import request, Response +from flask_login import login_required +from api.db.services.dialog_service import DialogService, ConversationService, chat +from api.utils.api_utils import server_error_response, get_data_error_result, validate_request +from api.utils import get_uuid +from api.utils.api_utils import get_json_result +import json + + +@manager.route('/set', methods=['POST']) +@login_required +def set_conversation(): + req = request.json + conv_id = req.get("conversation_id") + if conv_id: + del req["conversation_id"] + try: + if not ConversationService.update_by_id(conv_id, req): + return get_data_error_result(retmsg="Conversation not found!") + e, conv = ConversationService.get_by_id(conv_id) + if not e: + return get_data_error_result( + retmsg="Fail to update a conversation!") + conv = conv.to_dict() + return get_json_result(data=conv) + except Exception as e: + return server_error_response(e) + + try: + e, dia = DialogService.get_by_id(req["dialog_id"]) + if not e: + return get_data_error_result(retmsg="Dialog not found") + conv = { + "id": get_uuid(), + "dialog_id": req["dialog_id"], + "name": req.get("name", "New conversation"), + "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}] + } + ConversationService.save(**conv) + e, conv = ConversationService.get_by_id(conv["id"]) + if not e: + return get_data_error_result(retmsg="Fail to new a conversation!") + conv = conv.to_dict() + return get_json_result(data=conv) + except Exception as e: + return server_error_response(e) + + +@manager.route('/get', methods=['GET']) +@login_required +def get(): + conv_id = request.args["conversation_id"] + try: + e, conv = ConversationService.get_by_id(conv_id) + if not e: + return get_data_error_result(retmsg="Conversation not found!") + conv = conv.to_dict() + return get_json_result(data=conv) + except Exception as e: + return server_error_response(e) + + +@manager.route('/rm', methods=['POST']) +@login_required +def rm(): + conv_ids = request.json["conversation_ids"] + try: + for cid in conv_ids: + ConversationService.delete_by_id(cid) + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/list', methods=['GET']) +@login_required +def list_convsersation(): + dialog_id = request.args["dialog_id"] + try: + convs = ConversationService.query( + dialog_id=dialog_id, + order_by=ConversationService.model.create_time, + reverse=True) + convs = [d.to_dict() for d in convs] + return get_json_result(data=convs) + except Exception as e: + return server_error_response(e) + + +@manager.route('/completion', methods=['POST']) +@login_required +#@validate_request("conversation_id", "messages") +def completion(): + req = request.json + #req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [ + # {"role": "user", "content": "上海有吗?"} + #]} + msg = [] + for m in req["messages"]: + if m["role"] == "system": + continue + if m["role"] == "assistant" and not msg: + continue + msg.append({"role": m["role"], "content": m["content"]}) + if "doc_ids" in m: + msg[-1]["doc_ids"] = m["doc_ids"] + try: + e, conv = ConversationService.get_by_id(req["conversation_id"]) + if not e: + return get_data_error_result(retmsg="Conversation not found!") + conv.message.append(deepcopy(msg[-1])) + e, dia = DialogService.get_by_id(conv.dialog_id) + if not e: + return get_data_error_result(retmsg="Dialog not found!") + del req["conversation_id"] + del req["messages"] + + if not conv.reference: + conv.reference = [] + conv.message.append({"role": "assistant", "content": ""}) + conv.reference.append({"chunks": [], "doc_aggs": []}) + + def fillin_conv(ans): + nonlocal conv + if not conv.reference: + conv.reference.append(ans["reference"]) + else: conv.reference[-1] = ans["reference"] + conv.message[-1] = {"role": "assistant", "content": ans["answer"]} + + def stream(): + nonlocal dia, msg, req, conv + try: + for ans in chat(dia, msg, True, **req): + fillin_conv(ans) + yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": ans}, ensure_ascii=False) + "\n\n" + ConversationService.update_by_id(conv.id, conv.to_dict()) + except Exception as e: + yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e), + "data": {"answer": "**ERROR**: "+str(e), "reference": []}}, + ensure_ascii=False) + "\n\n" + yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n" + + if req.get("stream", True): + resp = Response(stream(), mimetype="text/event-stream") + resp.headers.add_header("Cache-control", "no-cache") + resp.headers.add_header("Connection", "keep-alive") + resp.headers.add_header("X-Accel-Buffering", "no") + resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") + return resp + + else: + answer = None + for ans in chat(dia, msg, **req): + answer = ans + fillin_conv(ans) + ConversationService.update_by_id(conv.id, conv.to_dict()) + break + return get_json_result(data=answer) + except Exception as e: + return server_error_response(e) + diff --git a/api/apps/dialog_app.py b/api/apps/dialog_app.py index ce428947ea00234504112ba6cb64d29540de7c2d..5c4c2202099b564646784d45df87a2dac99eef10 100644 --- a/api/apps/dialog_app.py +++ b/api/apps/dialog_app.py @@ -1,172 +1,172 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from flask import request -from flask_login import login_required, current_user -from api.db.services.dialog_service import DialogService -from api.db import StatusEnum -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db.services.user_service import TenantService -from api.utils.api_utils import server_error_response, get_data_error_result, validate_request -from api.utils import get_uuid -from api.utils.api_utils import get_json_result - - -@manager.route('/set', methods=['POST']) -@login_required -def set_dialog(): - req = request.json - dialog_id = req.get("dialog_id") - name = req.get("name", "New Dialog") - description = req.get("description", "A helpful Dialog") - icon = req.get("icon", "") - top_n = req.get("top_n", 6) - top_k = req.get("top_k", 1024) - rerank_id = req.get("rerank_id", "") - if not rerank_id: req["rerank_id"] = "" - similarity_threshold = req.get("similarity_threshold", 0.1) - vector_similarity_weight = req.get("vector_similarity_weight", 0.3) - if vector_similarity_weight is None: vector_similarity_weight = 0.3 - llm_setting = req.get("llm_setting", {}) - default_prompt = { - "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。 -以下是知识库: -{knowledge} -以上是知识库。""", - "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?", - "parameters": [ - {"key": "knowledge", "optional": False} - ], - "empty_response": "Sorry! 知识库中未找到相关内容!" - } - prompt_config = req.get("prompt_config", default_prompt) - - if not prompt_config["system"]: - prompt_config["system"] = default_prompt["system"] - # if len(prompt_config["parameters"]) < 1: - # prompt_config["parameters"] = default_prompt["parameters"] - # for p in prompt_config["parameters"]: - # if p["key"] == "knowledge":break - # else: prompt_config["parameters"].append(default_prompt["parameters"][0]) - - for p in prompt_config["parameters"]: - if p["optional"]: - continue - if prompt_config["system"].find("{%s}" % p["key"]) < 0: - return get_data_error_result( - retmsg="Parameter '{}' is not used".format(p["key"])) - - try: - e, tenant = TenantService.get_by_id(current_user.id) - if not e: - return get_data_error_result(retmsg="Tenant not found!") - llm_id = req.get("llm_id", tenant.llm_id) - if not dialog_id: - if not req.get("kb_ids"): - return get_data_error_result( - retmsg="Fail! Please select knowledgebase!") - dia = { - "id": get_uuid(), - "tenant_id": current_user.id, - "name": name, - "kb_ids": req["kb_ids"], - "description": description, - "llm_id": llm_id, - "llm_setting": llm_setting, - "prompt_config": prompt_config, - "top_n": top_n, - "top_k": top_k, - "rerank_id": rerank_id, - "similarity_threshold": similarity_threshold, - "vector_similarity_weight": vector_similarity_weight, - "icon": icon - } - if not DialogService.save(**dia): - return get_data_error_result(retmsg="Fail to new a dialog!") - e, dia = DialogService.get_by_id(dia["id"]) - if not e: - return get_data_error_result(retmsg="Fail to new a dialog!") - return get_json_result(data=dia.to_json()) - else: - del req["dialog_id"] - if "kb_names" in req: - del req["kb_names"] - if not DialogService.update_by_id(dialog_id, req): - return get_data_error_result(retmsg="Dialog not found!") - e, dia = DialogService.get_by_id(dialog_id) - if not e: - return get_data_error_result(retmsg="Fail to update a dialog!") - dia = dia.to_dict() - dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"]) - return get_json_result(data=dia) - except Exception as e: - return server_error_response(e) - - -@manager.route('/get', methods=['GET']) -@login_required -def get(): - dialog_id = request.args["dialog_id"] - try: - e, dia = DialogService.get_by_id(dialog_id) - if not e: - return get_data_error_result(retmsg="Dialog not found!") - dia = dia.to_dict() - dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"]) - return get_json_result(data=dia) - except Exception as e: - return server_error_response(e) - - -def get_kb_names(kb_ids): - ids, nms = [], [] - for kid in kb_ids: - e, kb = KnowledgebaseService.get_by_id(kid) - if not e or kb.status != StatusEnum.VALID.value: - continue - ids.append(kid) - nms.append(kb.name) - return ids, nms - - -@manager.route('/list', methods=['GET']) -@login_required -def list_dialogs(): - try: - diags = DialogService.query( - tenant_id=current_user.id, - status=StatusEnum.VALID.value, - reverse=True, - order_by=DialogService.model.create_time) - diags = [d.to_dict() for d in diags] - for d in diags: - d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"]) - return get_json_result(data=diags) - except Exception as e: - return server_error_response(e) - - -@manager.route('/rm', methods=['POST']) -@login_required -@validate_request("dialog_ids") -def rm(): - req = request.json - try: - DialogService.update_many_by_id( - [{"id": id, "status": StatusEnum.INVALID.value} for id in req["dialog_ids"]]) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from flask import request +from flask_login import login_required, current_user +from api.db.services.dialog_service import DialogService +from api.db import StatusEnum +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db.services.user_service import TenantService +from api.utils.api_utils import server_error_response, get_data_error_result, validate_request +from api.utils import get_uuid +from api.utils.api_utils import get_json_result + + +@manager.route('/set', methods=['POST']) +@login_required +def set_dialog(): + req = request.json + dialog_id = req.get("dialog_id") + name = req.get("name", "New Dialog") + description = req.get("description", "A helpful Dialog") + icon = req.get("icon", "") + top_n = req.get("top_n", 6) + top_k = req.get("top_k", 1024) + rerank_id = req.get("rerank_id", "") + if not rerank_id: req["rerank_id"] = "" + similarity_threshold = req.get("similarity_threshold", 0.1) + vector_similarity_weight = req.get("vector_similarity_weight", 0.3) + if vector_similarity_weight is None: vector_similarity_weight = 0.3 + llm_setting = req.get("llm_setting", {}) + default_prompt = { + "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。 +以下是知识库: +{knowledge} +以上是知识库。""", + "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?", + "parameters": [ + {"key": "knowledge", "optional": False} + ], + "empty_response": "Sorry! 知识库中未找到相关内容!" + } + prompt_config = req.get("prompt_config", default_prompt) + + if not prompt_config["system"]: + prompt_config["system"] = default_prompt["system"] + # if len(prompt_config["parameters"]) < 1: + # prompt_config["parameters"] = default_prompt["parameters"] + # for p in prompt_config["parameters"]: + # if p["key"] == "knowledge":break + # else: prompt_config["parameters"].append(default_prompt["parameters"][0]) + + for p in prompt_config["parameters"]: + if p["optional"]: + continue + if prompt_config["system"].find("{%s}" % p["key"]) < 0: + return get_data_error_result( + retmsg="Parameter '{}' is not used".format(p["key"])) + + try: + e, tenant = TenantService.get_by_id(current_user.id) + if not e: + return get_data_error_result(retmsg="Tenant not found!") + llm_id = req.get("llm_id", tenant.llm_id) + if not dialog_id: + if not req.get("kb_ids"): + return get_data_error_result( + retmsg="Fail! Please select knowledgebase!") + dia = { + "id": get_uuid(), + "tenant_id": current_user.id, + "name": name, + "kb_ids": req["kb_ids"], + "description": description, + "llm_id": llm_id, + "llm_setting": llm_setting, + "prompt_config": prompt_config, + "top_n": top_n, + "top_k": top_k, + "rerank_id": rerank_id, + "similarity_threshold": similarity_threshold, + "vector_similarity_weight": vector_similarity_weight, + "icon": icon + } + if not DialogService.save(**dia): + return get_data_error_result(retmsg="Fail to new a dialog!") + e, dia = DialogService.get_by_id(dia["id"]) + if not e: + return get_data_error_result(retmsg="Fail to new a dialog!") + return get_json_result(data=dia.to_json()) + else: + del req["dialog_id"] + if "kb_names" in req: + del req["kb_names"] + if not DialogService.update_by_id(dialog_id, req): + return get_data_error_result(retmsg="Dialog not found!") + e, dia = DialogService.get_by_id(dialog_id) + if not e: + return get_data_error_result(retmsg="Fail to update a dialog!") + dia = dia.to_dict() + dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"]) + return get_json_result(data=dia) + except Exception as e: + return server_error_response(e) + + +@manager.route('/get', methods=['GET']) +@login_required +def get(): + dialog_id = request.args["dialog_id"] + try: + e, dia = DialogService.get_by_id(dialog_id) + if not e: + return get_data_error_result(retmsg="Dialog not found!") + dia = dia.to_dict() + dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"]) + return get_json_result(data=dia) + except Exception as e: + return server_error_response(e) + + +def get_kb_names(kb_ids): + ids, nms = [], [] + for kid in kb_ids: + e, kb = KnowledgebaseService.get_by_id(kid) + if not e or kb.status != StatusEnum.VALID.value: + continue + ids.append(kid) + nms.append(kb.name) + return ids, nms + + +@manager.route('/list', methods=['GET']) +@login_required +def list_dialogs(): + try: + diags = DialogService.query( + tenant_id=current_user.id, + status=StatusEnum.VALID.value, + reverse=True, + order_by=DialogService.model.create_time) + diags = [d.to_dict() for d in diags] + for d in diags: + d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"]) + return get_json_result(data=diags) + except Exception as e: + return server_error_response(e) + + +@manager.route('/rm', methods=['POST']) +@login_required +@validate_request("dialog_ids") +def rm(): + req = request.json + try: + DialogService.update_many_by_id( + [{"id": id, "status": StatusEnum.INVALID.value} for id in req["dialog_ids"]]) + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 7e4580f1aa1d368fcd0c4cfda531753eac52dc31..baaf7fe3062e99fcb210c05a3e690c01d1af1e44 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -1,586 +1,586 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License -# -import datetime -import hashlib -import json -import os -import pathlib -import re -import traceback -from concurrent.futures import ThreadPoolExecutor -from copy import deepcopy -from io import BytesIO - -import flask -from elasticsearch_dsl import Q -from flask import request -from flask_login import login_required, current_user - -from api.db.db_models import Task, File -from api.db.services.dialog_service import DialogService, ConversationService -from api.db.services.file2document_service import File2DocumentService -from api.db.services.file_service import FileService -from api.db.services.llm_service import LLMBundle -from api.db.services.task_service import TaskService, queue_tasks -from api.db.services.user_service import TenantService -from graphrag.mind_map_extractor import MindMapExtractor -from rag.app import naive -from rag.nlp import search -from rag.utils.es_conn import ELASTICSEARCH -from api.db.services import duplicate_name -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.utils.api_utils import server_error_response, get_data_error_result, validate_request -from api.utils import get_uuid -from api.db import FileType, TaskStatus, ParserType, FileSource, LLMType -from api.db.services.document_service import DocumentService -from api.settings import RetCode, stat_logger -from api.utils.api_utils import get_json_result -from rag.utils.minio_conn import MINIO -from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory -from api.utils.web_utils import html2pdf, is_valid_url - - -@manager.route('/upload', methods=['POST']) -@login_required -@validate_request("kb_id") -def upload(): - kb_id = request.form.get("kb_id") - if not kb_id: - return get_json_result( - data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) - if 'file' not in request.files: - return get_json_result( - data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) - - file_objs = request.files.getlist('file') - for file_obj in file_objs: - if file_obj.filename == '': - return get_json_result( - data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) - - e, kb = KnowledgebaseService.get_by_id(kb_id) - if not e: - raise LookupError("Can't find this knowledgebase!") - - err, _ = FileService.upload_document(kb, file_objs) - if err: - return get_json_result( - data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR) - return get_json_result(data=True) - - -@manager.route('/web_crawl', methods=['POST']) -@login_required -@validate_request("kb_id", "name", "url") -def web_crawl(): - kb_id = request.form.get("kb_id") - if not kb_id: - return get_json_result( - data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) - name = request.form.get("name") - url = request.form.get("url") - if not is_valid_url(url): - return get_json_result( - data=False, retmsg='The URL format is invalid', retcode=RetCode.ARGUMENT_ERROR) - e, kb = KnowledgebaseService.get_by_id(kb_id) - if not e: - raise LookupError("Can't find this knowledgebase!") - - blob = html2pdf(url) - if not blob: return server_error_response(ValueError("Download failure.")) - - root_folder = FileService.get_root_folder(current_user.id) - pf_id = root_folder["id"] - FileService.init_knowledgebase_docs(pf_id, current_user.id) - kb_root_folder = FileService.get_kb_folder(current_user.id) - kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"]) - - try: - filename = duplicate_name( - DocumentService.query, - name=name + ".pdf", - kb_id=kb.id) - filetype = filename_type(filename) - if filetype == FileType.OTHER.value: - raise RuntimeError("This type of file has not been supported yet!") - - location = filename - while MINIO.obj_exist(kb_id, location): - location += "_" - MINIO.put(kb_id, location, blob) - doc = { - "id": get_uuid(), - "kb_id": kb.id, - "parser_id": kb.parser_id, - "parser_config": kb.parser_config, - "created_by": current_user.id, - "type": filetype, - "name": filename, - "location": location, - "size": len(blob), - "thumbnail": thumbnail(filename, blob) - } - if doc["type"] == FileType.VISUAL: - doc["parser_id"] = ParserType.PICTURE.value - if doc["type"] == FileType.AURAL: - doc["parser_id"] = ParserType.AUDIO.value - if re.search(r"\.(ppt|pptx|pages)$", filename): - doc["parser_id"] = ParserType.PRESENTATION.value - DocumentService.insert(doc) - FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id) - except Exception as e: - return server_error_response(e) - return get_json_result(data=True) - - -@manager.route('/create', methods=['POST']) -@login_required -@validate_request("name", "kb_id") -def create(): - req = request.json - kb_id = req["kb_id"] - if not kb_id: - return get_json_result( - data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) - - try: - e, kb = KnowledgebaseService.get_by_id(kb_id) - if not e: - return get_data_error_result( - retmsg="Can't find this knowledgebase!") - - if DocumentService.query(name=req["name"], kb_id=kb_id): - return get_data_error_result( - retmsg="Duplicated document name in the same knowledgebase.") - - doc = DocumentService.insert({ - "id": get_uuid(), - "kb_id": kb.id, - "parser_id": kb.parser_id, - "parser_config": kb.parser_config, - "created_by": current_user.id, - "type": FileType.VIRTUAL, - "name": req["name"], - "location": "", - "size": 0 - }) - return get_json_result(data=doc.to_json()) - except Exception as e: - return server_error_response(e) - - -@manager.route('/list', methods=['GET']) -@login_required -def list_docs(): - kb_id = request.args.get("kb_id") - if not kb_id: - return get_json_result( - data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) - keywords = request.args.get("keywords", "") - - page_number = int(request.args.get("page", 1)) - items_per_page = int(request.args.get("page_size", 15)) - orderby = request.args.get("orderby", "create_time") - desc = request.args.get("desc", True) - try: - docs, tol = DocumentService.get_by_kb_id( - kb_id, page_number, items_per_page, orderby, desc, keywords) - return get_json_result(data={"total": tol, "docs": docs}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/thumbnails', methods=['GET']) -@login_required -def thumbnails(): - doc_ids = request.args.get("doc_ids").split(",") - if not doc_ids: - return get_json_result( - data=False, retmsg='Lack of "Document ID"', retcode=RetCode.ARGUMENT_ERROR) - - try: - docs = DocumentService.get_thumbnails(doc_ids) - return get_json_result(data={d["id"]: d["thumbnail"] for d in docs}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/change_status', methods=['POST']) -@login_required -@validate_request("doc_id", "status") -def change_status(): - req = request.json - if str(req["status"]) not in ["0", "1"]: - get_json_result( - data=False, - retmsg='"Status" must be either 0 or 1!', - retcode=RetCode.ARGUMENT_ERROR) - - try: - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(retmsg="Document not found!") - e, kb = KnowledgebaseService.get_by_id(doc.kb_id) - if not e: - return get_data_error_result( - retmsg="Can't find this knowledgebase!") - - if not DocumentService.update_by_id( - req["doc_id"], {"status": str(req["status"])}): - return get_data_error_result( - retmsg="Database error (Document update)!") - - if str(req["status"]) == "0": - ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), - scripts="ctx._source.available_int=0;", - idxnm=search.index_name( - kb.tenant_id) - ) - else: - ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), - scripts="ctx._source.available_int=1;", - idxnm=search.index_name( - kb.tenant_id) - ) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/rm', methods=['POST']) -@login_required -@validate_request("doc_id") -def rm(): - req = request.json - doc_ids = req["doc_id"] - if isinstance(doc_ids, str): doc_ids = [doc_ids] - root_folder = FileService.get_root_folder(current_user.id) - pf_id = root_folder["id"] - FileService.init_knowledgebase_docs(pf_id, current_user.id) - errors = "" - for doc_id in doc_ids: - try: - e, doc = DocumentService.get_by_id(doc_id) - if not e: - return get_data_error_result(retmsg="Document not found!") - tenant_id = DocumentService.get_tenant_id(doc_id) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - - b, n = File2DocumentService.get_minio_address(doc_id=doc_id) - - if not DocumentService.remove_document(doc, tenant_id): - return get_data_error_result( - retmsg="Database error (Document removal)!") - - f2d = File2DocumentService.get_by_document_id(doc_id) - FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) - File2DocumentService.delete_by_document_id(doc_id) - - MINIO.rm(b, n) - except Exception as e: - errors += str(e) - - if errors: - return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR) - - return get_json_result(data=True) - - -@manager.route('/run', methods=['POST']) -@login_required -@validate_request("doc_ids", "run") -def run(): - req = request.json - try: - for id in req["doc_ids"]: - info = {"run": str(req["run"]), "progress": 0} - if str(req["run"]) == TaskStatus.RUNNING.value: - info["progress_msg"] = "" - info["chunk_num"] = 0 - info["token_num"] = 0 - DocumentService.update_by_id(id, info) - # if str(req["run"]) == TaskStatus.CANCEL.value: - tenant_id = DocumentService.get_tenant_id(id) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=id), idxnm=search.index_name(tenant_id)) - - if str(req["run"]) == TaskStatus.RUNNING.value: - TaskService.filter_delete([Task.doc_id == id]) - e, doc = DocumentService.get_by_id(id) - doc = doc.to_dict() - doc["tenant_id"] = tenant_id - bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"]) - queue_tasks(doc, bucket, name) - - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/rename', methods=['POST']) -@login_required -@validate_request("doc_id", "name") -def rename(): - req = request.json - try: - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(retmsg="Document not found!") - if pathlib.Path(req["name"].lower()).suffix != pathlib.Path( - doc.name.lower()).suffix: - return get_json_result( - data=False, - retmsg="The extension of file can't be changed", - retcode=RetCode.ARGUMENT_ERROR) - for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): - if d.name == req["name"]: - return get_data_error_result( - retmsg="Duplicated document name in the same knowledgebase.") - - if not DocumentService.update_by_id( - req["doc_id"], {"name": req["name"]}): - return get_data_error_result( - retmsg="Database error (Document rename)!") - - informs = File2DocumentService.get_by_document_id(req["doc_id"]) - if informs: - e, file = FileService.get_by_id(informs[0].file_id) - FileService.update_by_id(file.id, {"name": req["name"]}) - - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/get/', methods=['GET']) -# @login_required -def get(doc_id): - try: - e, doc = DocumentService.get_by_id(doc_id) - if not e: - return get_data_error_result(retmsg="Document not found!") - - b, n = File2DocumentService.get_minio_address(doc_id=doc_id) - response = flask.make_response(MINIO.get(b, n)) - - ext = re.search(r"\.([^.]+)$", doc.name) - if ext: - if doc.type == FileType.VISUAL.value: - response.headers.set('Content-Type', 'image/%s' % ext.group(1)) - else: - response.headers.set( - 'Content-Type', - 'application/%s' % - ext.group(1)) - return response - except Exception as e: - return server_error_response(e) - - -@manager.route('/change_parser', methods=['POST']) -@login_required -@validate_request("doc_id", "parser_id") -def change_parser(): - req = request.json - try: - e, doc = DocumentService.get_by_id(req["doc_id"]) - if not e: - return get_data_error_result(retmsg="Document not found!") - if doc.parser_id.lower() == req["parser_id"].lower(): - if "parser_config" in req: - if req["parser_config"] == doc.parser_config: - return get_json_result(data=True) - else: - return get_json_result(data=True) - - if doc.type == FileType.VISUAL or re.search( - r"\.(ppt|pptx|pages)$", doc.name): - return get_data_error_result(retmsg="Not supported yet!") - - e = DocumentService.update_by_id(doc.id, - {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", - "run": TaskStatus.UNSTART.value}) - if not e: - return get_data_error_result(retmsg="Document not found!") - if "parser_config" in req: - DocumentService.update_parser_config(doc.id, req["parser_config"]) - if doc.token_num > 0: - e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, - doc.process_duation * -1) - if not e: - return get_data_error_result(retmsg="Document not found!") - tenant_id = DocumentService.get_tenant_id(req["doc_id"]) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) - - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) - - -@manager.route('/image/', methods=['GET']) -# @login_required -def get_image(image_id): - try: - bkt, nm = image_id.split("-") - response = flask.make_response(MINIO.get(bkt, nm)) - response.headers.set('Content-Type', 'image/JPEG') - return response - except Exception as e: - return server_error_response(e) - - -@manager.route('/upload_and_parse', methods=['POST']) -@login_required -@validate_request("conversation_id") -def upload_and_parse(): - from rag.app import presentation, picture, naive, audio, email - if 'file' not in request.files: - return get_json_result( - data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) - - file_objs = request.files.getlist('file') - for file_obj in file_objs: - if file_obj.filename == '': - return get_json_result( - data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) - - e, conv = ConversationService.get_by_id(request.form.get("conversation_id")) - if not e: - return get_data_error_result(retmsg="Conversation not found!") - e, dia = DialogService.get_by_id(conv.dialog_id) - kb_id = dia.kb_ids[0] - e, kb = KnowledgebaseService.get_by_id(kb_id) - if not e: - raise LookupError("Can't find this knowledgebase!") - - idxnm = search.index_name(kb.tenant_id) - if not ELASTICSEARCH.indexExist(idxnm): - ELASTICSEARCH.createIdx(idxnm, json.load( - open(os.path.join(get_project_base_directory(), "conf", "mapping.json"), "r"))) - - embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language) - - err, files = FileService.upload_document(kb, file_objs) - if err: - return get_json_result( - data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR) - - def dummy(prog=None, msg=""): - pass - - FACTORY = { - ParserType.PRESENTATION.value: presentation, - ParserType.PICTURE.value: picture, - ParserType.AUDIO.value: audio, - ParserType.EMAIL.value: email - } - parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False} - exe = ThreadPoolExecutor(max_workers=12) - threads = [] - for d, blob in files: - kwargs = { - "callback": dummy, - "parser_config": parser_config, - "from_page": 0, - "to_page": 100000, - "tenant_id": kb.tenant_id, - "lang": kb.language - } - threads.append(exe.submit(FACTORY.get(d["parser_id"], naive).chunk, d["name"], blob, **kwargs)) - - for (docinfo,_), th in zip(files, threads): - docs = [] - doc = { - "doc_id": docinfo["id"], - "kb_id": [kb.id] - } - for ck in th.result(): - d = deepcopy(doc) - d.update(ck) - md5 = hashlib.md5() - md5.update((ck["content_with_weight"] + - str(d["doc_id"])).encode("utf-8")) - d["_id"] = md5.hexdigest() - d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19] - d["create_timestamp_flt"] = datetime.datetime.now().timestamp() - if not d.get("image"): - docs.append(d) - continue - - output_buffer = BytesIO() - if isinstance(d["image"], bytes): - output_buffer = BytesIO(d["image"]) - else: - d["image"].save(output_buffer, format='JPEG') - - MINIO.put(kb.id, d["_id"], output_buffer.getvalue()) - d["img_id"] = "{}-{}".format(kb.id, d["_id"]) - del d["image"] - docs.append(d) - - parser_ids = {d["id"]: d["parser_id"] for d, _ in files} - docids = [d["id"] for d, _ in files] - chunk_counts = {id: 0 for id in docids} - token_counts = {id: 0 for id in docids} - es_bulk_size = 64 - - def embedding(doc_id, cnts, batch_size=16): - nonlocal embd_mdl, chunk_counts, token_counts - vects = [] - for i in range(0, len(cnts), batch_size): - vts, c = embd_mdl.encode(cnts[i: i + batch_size]) - vects.extend(vts.tolist()) - chunk_counts[doc_id] += len(cnts[i:i + batch_size]) - token_counts[doc_id] += c - return vects - - _, tenant = TenantService.get_by_id(kb.tenant_id) - llm_bdl = LLMBundle(kb.tenant_id, LLMType.CHAT, tenant.llm_id) - for doc_id in docids: - cks = [c for c in docs if c["doc_id"] == doc_id] - - if False and parser_ids[doc_id] != ParserType.PICTURE.value: - mindmap = MindMapExtractor(llm_bdl) - try: - mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output, ensure_ascii=False, indent=2) - if len(mind_map) < 32: raise Exception("Few content: "+mind_map) - cks.append({ - "doc_id": doc_id, - "kb_id": [kb.id], - "content_with_weight": mind_map, - "knowledge_graph_kwd": "mind_map" - }) - except Exception as e: - stat_logger.error("Mind map generation error:", traceback.format_exc()) - - vects = embedding(doc_id, [c["content_with_weight"] for c in cks]) - assert len(cks) == len(vects) - for i, d in enumerate(cks): - v = vects[i] - d["q_%d_vec" % len(v)] = v - for b in range(0, len(cks), es_bulk_size): - ELASTICSEARCH.bulk(cks[b:b + es_bulk_size], idxnm) - - DocumentService.increment_chunk_num( - doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0) - - return get_json_result(data=[d["id"] for d,_ in files]) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License +# +import datetime +import hashlib +import json +import os +import pathlib +import re +import traceback +from concurrent.futures import ThreadPoolExecutor +from copy import deepcopy +from io import BytesIO + +import flask +from elasticsearch_dsl import Q +from flask import request +from flask_login import login_required, current_user + +from api.db.db_models import Task, File +from api.db.services.dialog_service import DialogService, ConversationService +from api.db.services.file2document_service import File2DocumentService +from api.db.services.file_service import FileService +from api.db.services.llm_service import LLMBundle +from api.db.services.task_service import TaskService, queue_tasks +from api.db.services.user_service import TenantService +from graphrag.mind_map_extractor import MindMapExtractor +from rag.app import naive +from rag.nlp import search +from rag.utils.es_conn import ELASTICSEARCH +from api.db.services import duplicate_name +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.utils.api_utils import server_error_response, get_data_error_result, validate_request +from api.utils import get_uuid +from api.db import FileType, TaskStatus, ParserType, FileSource, LLMType +from api.db.services.document_service import DocumentService +from api.settings import RetCode, stat_logger +from api.utils.api_utils import get_json_result +from rag.utils.minio_conn import MINIO +from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory +from api.utils.web_utils import html2pdf, is_valid_url + + +@manager.route('/upload', methods=['POST']) +@login_required +@validate_request("kb_id") +def upload(): + kb_id = request.form.get("kb_id") + if not kb_id: + return get_json_result( + data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) + if 'file' not in request.files: + return get_json_result( + data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) + + file_objs = request.files.getlist('file') + for file_obj in file_objs: + if file_obj.filename == '': + return get_json_result( + data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) + + e, kb = KnowledgebaseService.get_by_id(kb_id) + if not e: + raise LookupError("Can't find this knowledgebase!") + + err, _ = FileService.upload_document(kb, file_objs) + if err: + return get_json_result( + data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR) + return get_json_result(data=True) + + +@manager.route('/web_crawl', methods=['POST']) +@login_required +@validate_request("kb_id", "name", "url") +def web_crawl(): + kb_id = request.form.get("kb_id") + if not kb_id: + return get_json_result( + data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) + name = request.form.get("name") + url = request.form.get("url") + if not is_valid_url(url): + return get_json_result( + data=False, retmsg='The URL format is invalid', retcode=RetCode.ARGUMENT_ERROR) + e, kb = KnowledgebaseService.get_by_id(kb_id) + if not e: + raise LookupError("Can't find this knowledgebase!") + + blob = html2pdf(url) + if not blob: return server_error_response(ValueError("Download failure.")) + + root_folder = FileService.get_root_folder(current_user.id) + pf_id = root_folder["id"] + FileService.init_knowledgebase_docs(pf_id, current_user.id) + kb_root_folder = FileService.get_kb_folder(current_user.id) + kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"]) + + try: + filename = duplicate_name( + DocumentService.query, + name=name + ".pdf", + kb_id=kb.id) + filetype = filename_type(filename) + if filetype == FileType.OTHER.value: + raise RuntimeError("This type of file has not been supported yet!") + + location = filename + while MINIO.obj_exist(kb_id, location): + location += "_" + MINIO.put(kb_id, location, blob) + doc = { + "id": get_uuid(), + "kb_id": kb.id, + "parser_id": kb.parser_id, + "parser_config": kb.parser_config, + "created_by": current_user.id, + "type": filetype, + "name": filename, + "location": location, + "size": len(blob), + "thumbnail": thumbnail(filename, blob) + } + if doc["type"] == FileType.VISUAL: + doc["parser_id"] = ParserType.PICTURE.value + if doc["type"] == FileType.AURAL: + doc["parser_id"] = ParserType.AUDIO.value + if re.search(r"\.(ppt|pptx|pages)$", filename): + doc["parser_id"] = ParserType.PRESENTATION.value + DocumentService.insert(doc) + FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id) + except Exception as e: + return server_error_response(e) + return get_json_result(data=True) + + +@manager.route('/create', methods=['POST']) +@login_required +@validate_request("name", "kb_id") +def create(): + req = request.json + kb_id = req["kb_id"] + if not kb_id: + return get_json_result( + data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) + + try: + e, kb = KnowledgebaseService.get_by_id(kb_id) + if not e: + return get_data_error_result( + retmsg="Can't find this knowledgebase!") + + if DocumentService.query(name=req["name"], kb_id=kb_id): + return get_data_error_result( + retmsg="Duplicated document name in the same knowledgebase.") + + doc = DocumentService.insert({ + "id": get_uuid(), + "kb_id": kb.id, + "parser_id": kb.parser_id, + "parser_config": kb.parser_config, + "created_by": current_user.id, + "type": FileType.VIRTUAL, + "name": req["name"], + "location": "", + "size": 0 + }) + return get_json_result(data=doc.to_json()) + except Exception as e: + return server_error_response(e) + + +@manager.route('/list', methods=['GET']) +@login_required +def list_docs(): + kb_id = request.args.get("kb_id") + if not kb_id: + return get_json_result( + data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) + keywords = request.args.get("keywords", "") + + page_number = int(request.args.get("page", 1)) + items_per_page = int(request.args.get("page_size", 15)) + orderby = request.args.get("orderby", "create_time") + desc = request.args.get("desc", True) + try: + docs, tol = DocumentService.get_by_kb_id( + kb_id, page_number, items_per_page, orderby, desc, keywords) + return get_json_result(data={"total": tol, "docs": docs}) + except Exception as e: + return server_error_response(e) + + +@manager.route('/thumbnails', methods=['GET']) +@login_required +def thumbnails(): + doc_ids = request.args.get("doc_ids").split(",") + if not doc_ids: + return get_json_result( + data=False, retmsg='Lack of "Document ID"', retcode=RetCode.ARGUMENT_ERROR) + + try: + docs = DocumentService.get_thumbnails(doc_ids) + return get_json_result(data={d["id"]: d["thumbnail"] for d in docs}) + except Exception as e: + return server_error_response(e) + + +@manager.route('/change_status', methods=['POST']) +@login_required +@validate_request("doc_id", "status") +def change_status(): + req = request.json + if str(req["status"]) not in ["0", "1"]: + get_json_result( + data=False, + retmsg='"Status" must be either 0 or 1!', + retcode=RetCode.ARGUMENT_ERROR) + + try: + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + e, kb = KnowledgebaseService.get_by_id(doc.kb_id) + if not e: + return get_data_error_result( + retmsg="Can't find this knowledgebase!") + + if not DocumentService.update_by_id( + req["doc_id"], {"status": str(req["status"])}): + return get_data_error_result( + retmsg="Database error (Document update)!") + + if str(req["status"]) == "0": + ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), + scripts="ctx._source.available_int=0;", + idxnm=search.index_name( + kb.tenant_id) + ) + else: + ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), + scripts="ctx._source.available_int=1;", + idxnm=search.index_name( + kb.tenant_id) + ) + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/rm', methods=['POST']) +@login_required +@validate_request("doc_id") +def rm(): + req = request.json + doc_ids = req["doc_id"] + if isinstance(doc_ids, str): doc_ids = [doc_ids] + root_folder = FileService.get_root_folder(current_user.id) + pf_id = root_folder["id"] + FileService.init_knowledgebase_docs(pf_id, current_user.id) + errors = "" + for doc_id in doc_ids: + try: + e, doc = DocumentService.get_by_id(doc_id) + if not e: + return get_data_error_result(retmsg="Document not found!") + tenant_id = DocumentService.get_tenant_id(doc_id) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + + b, n = File2DocumentService.get_minio_address(doc_id=doc_id) + + if not DocumentService.remove_document(doc, tenant_id): + return get_data_error_result( + retmsg="Database error (Document removal)!") + + f2d = File2DocumentService.get_by_document_id(doc_id) + FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) + File2DocumentService.delete_by_document_id(doc_id) + + MINIO.rm(b, n) + except Exception as e: + errors += str(e) + + if errors: + return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR) + + return get_json_result(data=True) + + +@manager.route('/run', methods=['POST']) +@login_required +@validate_request("doc_ids", "run") +def run(): + req = request.json + try: + for id in req["doc_ids"]: + info = {"run": str(req["run"]), "progress": 0} + if str(req["run"]) == TaskStatus.RUNNING.value: + info["progress_msg"] = "" + info["chunk_num"] = 0 + info["token_num"] = 0 + DocumentService.update_by_id(id, info) + # if str(req["run"]) == TaskStatus.CANCEL.value: + tenant_id = DocumentService.get_tenant_id(id) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + ELASTICSEARCH.deleteByQuery( + Q("match", doc_id=id), idxnm=search.index_name(tenant_id)) + + if str(req["run"]) == TaskStatus.RUNNING.value: + TaskService.filter_delete([Task.doc_id == id]) + e, doc = DocumentService.get_by_id(id) + doc = doc.to_dict() + doc["tenant_id"] = tenant_id + bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"]) + queue_tasks(doc, bucket, name) + + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/rename', methods=['POST']) +@login_required +@validate_request("doc_id", "name") +def rename(): + req = request.json + try: + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + if pathlib.Path(req["name"].lower()).suffix != pathlib.Path( + doc.name.lower()).suffix: + return get_json_result( + data=False, + retmsg="The extension of file can't be changed", + retcode=RetCode.ARGUMENT_ERROR) + for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): + if d.name == req["name"]: + return get_data_error_result( + retmsg="Duplicated document name in the same knowledgebase.") + + if not DocumentService.update_by_id( + req["doc_id"], {"name": req["name"]}): + return get_data_error_result( + retmsg="Database error (Document rename)!") + + informs = File2DocumentService.get_by_document_id(req["doc_id"]) + if informs: + e, file = FileService.get_by_id(informs[0].file_id) + FileService.update_by_id(file.id, {"name": req["name"]}) + + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/get/', methods=['GET']) +# @login_required +def get(doc_id): + try: + e, doc = DocumentService.get_by_id(doc_id) + if not e: + return get_data_error_result(retmsg="Document not found!") + + b, n = File2DocumentService.get_minio_address(doc_id=doc_id) + response = flask.make_response(MINIO.get(b, n)) + + ext = re.search(r"\.([^.]+)$", doc.name) + if ext: + if doc.type == FileType.VISUAL.value: + response.headers.set('Content-Type', 'image/%s' % ext.group(1)) + else: + response.headers.set( + 'Content-Type', + 'application/%s' % + ext.group(1)) + return response + except Exception as e: + return server_error_response(e) + + +@manager.route('/change_parser', methods=['POST']) +@login_required +@validate_request("doc_id", "parser_id") +def change_parser(): + req = request.json + try: + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + if doc.parser_id.lower() == req["parser_id"].lower(): + if "parser_config" in req: + if req["parser_config"] == doc.parser_config: + return get_json_result(data=True) + else: + return get_json_result(data=True) + + if doc.type == FileType.VISUAL or re.search( + r"\.(ppt|pptx|pages)$", doc.name): + return get_data_error_result(retmsg="Not supported yet!") + + e = DocumentService.update_by_id(doc.id, + {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", + "run": TaskStatus.UNSTART.value}) + if not e: + return get_data_error_result(retmsg="Document not found!") + if "parser_config" in req: + DocumentService.update_parser_config(doc.id, req["parser_config"]) + if doc.token_num > 0: + e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, + doc.process_duation * -1) + if not e: + return get_data_error_result(retmsg="Document not found!") + tenant_id = DocumentService.get_tenant_id(req["doc_id"]) + if not tenant_id: + return get_data_error_result(retmsg="Tenant not found!") + ELASTICSEARCH.deleteByQuery( + Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) + + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) + + +@manager.route('/image/', methods=['GET']) +# @login_required +def get_image(image_id): + try: + bkt, nm = image_id.split("-") + response = flask.make_response(MINIO.get(bkt, nm)) + response.headers.set('Content-Type', 'image/JPEG') + return response + except Exception as e: + return server_error_response(e) + + +@manager.route('/upload_and_parse', methods=['POST']) +@login_required +@validate_request("conversation_id") +def upload_and_parse(): + from rag.app import presentation, picture, naive, audio, email + if 'file' not in request.files: + return get_json_result( + data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) + + file_objs = request.files.getlist('file') + for file_obj in file_objs: + if file_obj.filename == '': + return get_json_result( + data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) + + e, conv = ConversationService.get_by_id(request.form.get("conversation_id")) + if not e: + return get_data_error_result(retmsg="Conversation not found!") + e, dia = DialogService.get_by_id(conv.dialog_id) + kb_id = dia.kb_ids[0] + e, kb = KnowledgebaseService.get_by_id(kb_id) + if not e: + raise LookupError("Can't find this knowledgebase!") + + idxnm = search.index_name(kb.tenant_id) + if not ELASTICSEARCH.indexExist(idxnm): + ELASTICSEARCH.createIdx(idxnm, json.load( + open(os.path.join(get_project_base_directory(), "conf", "mapping.json"), "r"))) + + embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language) + + err, files = FileService.upload_document(kb, file_objs) + if err: + return get_json_result( + data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR) + + def dummy(prog=None, msg=""): + pass + + FACTORY = { + ParserType.PRESENTATION.value: presentation, + ParserType.PICTURE.value: picture, + ParserType.AUDIO.value: audio, + ParserType.EMAIL.value: email + } + parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False} + exe = ThreadPoolExecutor(max_workers=12) + threads = [] + for d, blob in files: + kwargs = { + "callback": dummy, + "parser_config": parser_config, + "from_page": 0, + "to_page": 100000, + "tenant_id": kb.tenant_id, + "lang": kb.language + } + threads.append(exe.submit(FACTORY.get(d["parser_id"], naive).chunk, d["name"], blob, **kwargs)) + + for (docinfo,_), th in zip(files, threads): + docs = [] + doc = { + "doc_id": docinfo["id"], + "kb_id": [kb.id] + } + for ck in th.result(): + d = deepcopy(doc) + d.update(ck) + md5 = hashlib.md5() + md5.update((ck["content_with_weight"] + + str(d["doc_id"])).encode("utf-8")) + d["_id"] = md5.hexdigest() + d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19] + d["create_timestamp_flt"] = datetime.datetime.now().timestamp() + if not d.get("image"): + docs.append(d) + continue + + output_buffer = BytesIO() + if isinstance(d["image"], bytes): + output_buffer = BytesIO(d["image"]) + else: + d["image"].save(output_buffer, format='JPEG') + + MINIO.put(kb.id, d["_id"], output_buffer.getvalue()) + d["img_id"] = "{}-{}".format(kb.id, d["_id"]) + del d["image"] + docs.append(d) + + parser_ids = {d["id"]: d["parser_id"] for d, _ in files} + docids = [d["id"] for d, _ in files] + chunk_counts = {id: 0 for id in docids} + token_counts = {id: 0 for id in docids} + es_bulk_size = 64 + + def embedding(doc_id, cnts, batch_size=16): + nonlocal embd_mdl, chunk_counts, token_counts + vects = [] + for i in range(0, len(cnts), batch_size): + vts, c = embd_mdl.encode(cnts[i: i + batch_size]) + vects.extend(vts.tolist()) + chunk_counts[doc_id] += len(cnts[i:i + batch_size]) + token_counts[doc_id] += c + return vects + + _, tenant = TenantService.get_by_id(kb.tenant_id) + llm_bdl = LLMBundle(kb.tenant_id, LLMType.CHAT, tenant.llm_id) + for doc_id in docids: + cks = [c for c in docs if c["doc_id"] == doc_id] + + if False and parser_ids[doc_id] != ParserType.PICTURE.value: + mindmap = MindMapExtractor(llm_bdl) + try: + mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output, ensure_ascii=False, indent=2) + if len(mind_map) < 32: raise Exception("Few content: "+mind_map) + cks.append({ + "doc_id": doc_id, + "kb_id": [kb.id], + "content_with_weight": mind_map, + "knowledge_graph_kwd": "mind_map" + }) + except Exception as e: + stat_logger.error("Mind map generation error:", traceback.format_exc()) + + vects = embedding(doc_id, [c["content_with_weight"] for c in cks]) + assert len(cks) == len(vects) + for i, d in enumerate(cks): + v = vects[i] + d["q_%d_vec" % len(v)] = v + for b in range(0, len(cks), es_bulk_size): + ELASTICSEARCH.bulk(cks[b:b + es_bulk_size], idxnm) + + DocumentService.increment_chunk_num( + doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0) + + return get_json_result(data=[d["id"] for d,_ in files]) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index 26b78dd4206ec60369d737b6dad1c421683d0714..5072d14a97349a125efa846f4181345719fad630 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -1,153 +1,153 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from elasticsearch_dsl import Q -from flask import request -from flask_login import login_required, current_user - -from api.db.services import duplicate_name -from api.db.services.document_service import DocumentService -from api.db.services.file2document_service import File2DocumentService -from api.db.services.file_service import FileService -from api.db.services.user_service import TenantService, UserTenantService -from api.utils.api_utils import server_error_response, get_data_error_result, validate_request -from api.utils import get_uuid, get_format_time -from api.db import StatusEnum, UserTenantRole, FileSource -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db.db_models import Knowledgebase, File -from api.settings import stat_logger, RetCode -from api.utils.api_utils import get_json_result -from rag.nlp import search -from rag.utils.es_conn import ELASTICSEARCH - - -@manager.route('/create', methods=['post']) -@login_required -@validate_request("name") -def create(): - req = request.json - req["name"] = req["name"].strip() - req["name"] = duplicate_name( - KnowledgebaseService.query, - name=req["name"], - tenant_id=current_user.id, - status=StatusEnum.VALID.value) - try: - req["id"] = get_uuid() - req["tenant_id"] = current_user.id - req["created_by"] = current_user.id - e, t = TenantService.get_by_id(current_user.id) - if not e: - return get_data_error_result(retmsg="Tenant not found.") - req["embd_id"] = t.embd_id - if not KnowledgebaseService.save(**req): - return get_data_error_result() - return get_json_result(data={"kb_id": req["id"]}) - except Exception as e: - return server_error_response(e) - - -@manager.route('/update', methods=['post']) -@login_required -@validate_request("kb_id", "name", "description", "permission", "parser_id") -def update(): - req = request.json - req["name"] = req["name"].strip() - try: - if not KnowledgebaseService.query( - created_by=current_user.id, id=req["kb_id"]): - return get_json_result( - data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) - - e, kb = KnowledgebaseService.get_by_id(req["kb_id"]) - if not e: - return get_data_error_result( - retmsg="Can't find this knowledgebase!") - - if req["name"].lower() != kb.name.lower() \ - and len(KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1: - return get_data_error_result( - retmsg="Duplicated knowledgebase name.") - - del req["kb_id"] - if not KnowledgebaseService.update_by_id(kb.id, req): - return get_data_error_result() - - e, kb = KnowledgebaseService.get_by_id(kb.id) - if not e: - return get_data_error_result( - retmsg="Database error (Knowledgebase rename)!") - - return get_json_result(data=kb.to_json()) - except Exception as e: - return server_error_response(e) - - -@manager.route('/detail', methods=['GET']) -@login_required -def detail(): - kb_id = request.args["kb_id"] - try: - kb = KnowledgebaseService.get_detail(kb_id) - if not kb: - return get_data_error_result( - retmsg="Can't find this knowledgebase!") - return get_json_result(data=kb) - except Exception as e: - return server_error_response(e) - - -@manager.route('/list', methods=['GET']) -@login_required -def list_kbs(): - page_number = request.args.get("page", 1) - items_per_page = request.args.get("page_size", 150) - orderby = request.args.get("orderby", "create_time") - desc = request.args.get("desc", True) - try: - tenants = TenantService.get_joined_tenants_by_user_id(current_user.id) - kbs = KnowledgebaseService.get_by_tenant_ids( - [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc) - return get_json_result(data=kbs) - except Exception as e: - return server_error_response(e) - - -@manager.route('/rm', methods=['post']) -@login_required -@validate_request("kb_id") -def rm(): - req = request.json - try: - kbs = KnowledgebaseService.query( - created_by=current_user.id, id=req["kb_id"]) - if not kbs: - return get_json_result( - data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) - - for doc in DocumentService.query(kb_id=req["kb_id"]): - if not DocumentService.remove_document(doc, kbs[0].tenant_id): - return get_data_error_result( - retmsg="Database error (Document removal)!") - f2d = File2DocumentService.get_by_document_id(doc.id) - FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) - File2DocumentService.delete_by_document_id(doc.id) - - if not KnowledgebaseService.delete_by_id(req["kb_id"]): - return get_data_error_result( - retmsg="Database error (Knowledgebase removal)!") - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from elasticsearch_dsl import Q +from flask import request +from flask_login import login_required, current_user + +from api.db.services import duplicate_name +from api.db.services.document_service import DocumentService +from api.db.services.file2document_service import File2DocumentService +from api.db.services.file_service import FileService +from api.db.services.user_service import TenantService, UserTenantService +from api.utils.api_utils import server_error_response, get_data_error_result, validate_request +from api.utils import get_uuid, get_format_time +from api.db import StatusEnum, UserTenantRole, FileSource +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db.db_models import Knowledgebase, File +from api.settings import stat_logger, RetCode +from api.utils.api_utils import get_json_result +from rag.nlp import search +from rag.utils.es_conn import ELASTICSEARCH + + +@manager.route('/create', methods=['post']) +@login_required +@validate_request("name") +def create(): + req = request.json + req["name"] = req["name"].strip() + req["name"] = duplicate_name( + KnowledgebaseService.query, + name=req["name"], + tenant_id=current_user.id, + status=StatusEnum.VALID.value) + try: + req["id"] = get_uuid() + req["tenant_id"] = current_user.id + req["created_by"] = current_user.id + e, t = TenantService.get_by_id(current_user.id) + if not e: + return get_data_error_result(retmsg="Tenant not found.") + req["embd_id"] = t.embd_id + if not KnowledgebaseService.save(**req): + return get_data_error_result() + return get_json_result(data={"kb_id": req["id"]}) + except Exception as e: + return server_error_response(e) + + +@manager.route('/update', methods=['post']) +@login_required +@validate_request("kb_id", "name", "description", "permission", "parser_id") +def update(): + req = request.json + req["name"] = req["name"].strip() + try: + if not KnowledgebaseService.query( + created_by=current_user.id, id=req["kb_id"]): + return get_json_result( + data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) + + e, kb = KnowledgebaseService.get_by_id(req["kb_id"]) + if not e: + return get_data_error_result( + retmsg="Can't find this knowledgebase!") + + if req["name"].lower() != kb.name.lower() \ + and len(KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1: + return get_data_error_result( + retmsg="Duplicated knowledgebase name.") + + del req["kb_id"] + if not KnowledgebaseService.update_by_id(kb.id, req): + return get_data_error_result() + + e, kb = KnowledgebaseService.get_by_id(kb.id) + if not e: + return get_data_error_result( + retmsg="Database error (Knowledgebase rename)!") + + return get_json_result(data=kb.to_json()) + except Exception as e: + return server_error_response(e) + + +@manager.route('/detail', methods=['GET']) +@login_required +def detail(): + kb_id = request.args["kb_id"] + try: + kb = KnowledgebaseService.get_detail(kb_id) + if not kb: + return get_data_error_result( + retmsg="Can't find this knowledgebase!") + return get_json_result(data=kb) + except Exception as e: + return server_error_response(e) + + +@manager.route('/list', methods=['GET']) +@login_required +def list_kbs(): + page_number = request.args.get("page", 1) + items_per_page = request.args.get("page_size", 150) + orderby = request.args.get("orderby", "create_time") + desc = request.args.get("desc", True) + try: + tenants = TenantService.get_joined_tenants_by_user_id(current_user.id) + kbs = KnowledgebaseService.get_by_tenant_ids( + [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc) + return get_json_result(data=kbs) + except Exception as e: + return server_error_response(e) + + +@manager.route('/rm', methods=['post']) +@login_required +@validate_request("kb_id") +def rm(): + req = request.json + try: + kbs = KnowledgebaseService.query( + created_by=current_user.id, id=req["kb_id"]) + if not kbs: + return get_json_result( + data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) + + for doc in DocumentService.query(kb_id=req["kb_id"]): + if not DocumentService.remove_document(doc, kbs[0].tenant_id): + return get_data_error_result( + retmsg="Database error (Document removal)!") + f2d = File2DocumentService.get_by_document_id(doc.id) + FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) + File2DocumentService.delete_by_document_id(doc.id) + + if not KnowledgebaseService.delete_by_id(req["kb_id"]): + return get_data_error_result( + retmsg="Database error (Knowledgebase removal)!") + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 26609f635e9c15bc736d92c84dfc90f544d68169..467ea878d3caeaf4b9f0d67a17d7df27fb2fe09e 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -1,279 +1,279 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from flask import request -from flask_login import login_required, current_user -from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService -from api.utils.api_utils import server_error_response, get_data_error_result, validate_request -from api.db import StatusEnum, LLMType -from api.db.db_models import TenantLLM -from api.utils.api_utils import get_json_result -from rag.llm import EmbeddingModel, ChatModel, RerankModel,CvModel -import requests -import ast - -@manager.route('/factories', methods=['GET']) -@login_required -def factories(): - try: - fac = LLMFactoriesService.get_all() - return get_json_result(data=[f.to_dict() for f in fac if f.name not in ["Youdao", "FastEmbed", "BAAI"]]) - except Exception as e: - return server_error_response(e) - - -@manager.route('/set_api_key', methods=['POST']) -@login_required -@validate_request("llm_factory", "api_key") -def set_api_key(): - req = request.json - # test if api key works - chat_passed, embd_passed, rerank_passed = False, False, False - factory = req["llm_factory"] - msg = "" - for llm in LLMService.query(fid=factory): - if not embd_passed and llm.model_type == LLMType.EMBEDDING.value: - mdl = EmbeddingModel[factory]( - req["api_key"], llm.llm_name, base_url=req.get("base_url")) - try: - arr, tc = mdl.encode(["Test if the api key is available"]) - if len(arr[0]) == 0: - raise Exception("Fail") - embd_passed = True - except Exception as e: - msg += f"\nFail to access embedding model({llm.llm_name}) using this api key." + str(e) - elif not chat_passed and llm.model_type == LLMType.CHAT.value: - mdl = ChatModel[factory]( - req["api_key"], llm.llm_name, base_url=req.get("base_url")) - try: - m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], - {"temperature": 0.9,'max_tokens':50}) - if m.find("**ERROR**") >=0: - raise Exception(m) - except Exception as e: - msg += f"\nFail to access model({llm.llm_name}) using this api key." + str( - e) - chat_passed = True - elif not rerank_passed and llm.model_type == LLMType.RERANK: - mdl = RerankModel[factory]( - req["api_key"], llm.llm_name, base_url=req.get("base_url")) - try: - arr, tc = mdl.similarity("What's the weather?", ["Is it sunny today?"]) - if len(arr) == 0 or tc == 0: - raise Exception("Fail") - except Exception as e: - msg += f"\nFail to access model({llm.llm_name}) using this api key." + str( - e) - rerank_passed = True - - if msg: - return get_data_error_result(retmsg=msg) - - llm = { - "api_key": req["api_key"], - "api_base": req.get("base_url", "") - } - for n in ["model_type", "llm_name"]: - if n in req: - llm[n] = req[n] - - if not TenantLLMService.filter_update( - [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory], llm): - for llm in LLMService.query(fid=factory): - TenantLLMService.save( - tenant_id=current_user.id, - llm_factory=factory, - llm_name=llm.llm_name, - model_type=llm.model_type, - api_key=req["api_key"], - api_base=req.get("base_url", "") - ) - - return get_json_result(data=True) - - -@manager.route('/add_llm', methods=['POST']) -@login_required -@validate_request("llm_factory", "llm_name", "model_type") -def add_llm(): - req = request.json - factory = req["llm_factory"] - - if factory == "VolcEngine": - # For VolcEngine, due to its special authentication method - # Assemble volc_ak, volc_sk, endpoint_id into api_key - temp = list(ast.literal_eval(req["llm_name"]).items())[0] - llm_name = temp[0] - endpoint_id = temp[1] - api_key = '{' + f'"volc_ak": "{req.get("volc_ak", "")}", ' \ - f'"volc_sk": "{req.get("volc_sk", "")}", ' \ - f'"ep_id": "{endpoint_id}", ' + '}' - elif factory == "Bedrock": - # For Bedrock, due to its special authentication method - # Assemble bedrock_ak, bedrock_sk, bedrock_region - llm_name = req["llm_name"] - api_key = '{' + f'"bedrock_ak": "{req.get("bedrock_ak", "")}", ' \ - f'"bedrock_sk": "{req.get("bedrock_sk", "")}", ' \ - f'"bedrock_region": "{req.get("bedrock_region", "")}", ' + '}' - elif factory == "LocalAI": - llm_name = req["llm_name"]+"___LocalAI" - api_key = "xxxxxxxxxxxxxxx" - elif factory == "OpenAI-API-Compatible": - llm_name = req["llm_name"]+"___OpenAI-API" - api_key = req.get("api_key","xxxxxxxxxxxxxxx") - else: - llm_name = req["llm_name"] - api_key = req.get("api_key","xxxxxxxxxxxxxxx") - - llm = { - "tenant_id": current_user.id, - "llm_factory": factory, - "model_type": req["model_type"], - "llm_name": llm_name, - "api_base": req.get("api_base", ""), - "api_key": api_key - } - - msg = "" - if llm["model_type"] == LLMType.EMBEDDING.value: - mdl = EmbeddingModel[factory]( - key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None, - model_name=llm["llm_name"], - base_url=llm["api_base"]) - try: - arr, tc = mdl.encode(["Test if the api key is available"]) - if len(arr[0]) == 0 or tc == 0: - raise Exception("Fail") - except Exception as e: - msg += f"\nFail to access embedding model({llm['llm_name']})." + str(e) - elif llm["model_type"] == LLMType.CHAT.value: - mdl = ChatModel[factory]( - key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None, - model_name=llm["llm_name"], - base_url=llm["api_base"] - ) - try: - m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], { - "temperature": 0.9}) - if not tc: - raise Exception(m) - except Exception as e: - msg += f"\nFail to access model({llm['llm_name']})." + str( - e) - elif llm["model_type"] == LLMType.RERANK: - mdl = RerankModel[factory]( - key=None, model_name=llm["llm_name"], base_url=llm["api_base"] - ) - try: - arr, tc = mdl.similarity("Hello~ Ragflower!", ["Hi, there!"]) - if len(arr) == 0 or tc == 0: - raise Exception("Not known.") - except Exception as e: - msg += f"\nFail to access model({llm['llm_name']})." + str( - e) - elif llm["model_type"] == LLMType.IMAGE2TEXT.value: - mdl = CvModel[factory]( - key=llm["api_key"] if factory in ["OpenAI-API-Compatible"] else None, model_name=llm["llm_name"], base_url=llm["api_base"] - ) - try: - img_url = ( - "https://upload.wikimedia.org/wikipedia/comm" - "ons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/256" - "0px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - ) - res = requests.get(img_url) - if res.status_code == 200: - m, tc = mdl.describe(res.content) - if not tc: - raise Exception(m) - else: - pass - except Exception as e: - msg += f"\nFail to access model({llm['llm_name']})." + str(e) - else: - # TODO: check other type of models - pass - - if msg: - return get_data_error_result(retmsg=msg) - - if not TenantLLMService.filter_update( - [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory, TenantLLM.llm_name == llm["llm_name"]], llm): - TenantLLMService.save(**llm) - - return get_json_result(data=True) - - -@manager.route('/delete_llm', methods=['POST']) -@login_required -@validate_request("llm_factory", "llm_name") -def delete_llm(): - req = request.json - TenantLLMService.filter_delete( - [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == req["llm_factory"], TenantLLM.llm_name == req["llm_name"]]) - return get_json_result(data=True) - - -@manager.route('/my_llms', methods=['GET']) -@login_required -def my_llms(): - try: - res = {} - for o in TenantLLMService.get_my_llms(current_user.id): - if o["llm_factory"] not in res: - res[o["llm_factory"]] = { - "tags": o["tags"], - "llm": [] - } - res[o["llm_factory"]]["llm"].append({ - "type": o["model_type"], - "name": o["llm_name"], - "used_token": o["used_tokens"] - }) - return get_json_result(data=res) - except Exception as e: - return server_error_response(e) - - -@manager.route('/list', methods=['GET']) -@login_required -def list_app(): - model_type = request.args.get("model_type") - try: - objs = TenantLLMService.query(tenant_id=current_user.id) - facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key]) - llms = LLMService.get_all() - llms = [m.to_dict() - for m in llms if m.status == StatusEnum.VALID.value] - for m in llms: - m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in ["Youdao","FastEmbed", "BAAI"] - - llm_set = set([m["llm_name"] for m in llms]) - for o in objs: - if not o.api_key:continue - if o.llm_name in llm_set:continue - llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True}) - - res = {} - for m in llms: - if model_type and m["model_type"].find(model_type)<0: - continue - if m["fid"] not in res: - res[m["fid"]] = [] - res[m["fid"]].append(m) - - return get_json_result(data=res) - except Exception as e: - return server_error_response(e) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from flask import request +from flask_login import login_required, current_user +from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService +from api.utils.api_utils import server_error_response, get_data_error_result, validate_request +from api.db import StatusEnum, LLMType +from api.db.db_models import TenantLLM +from api.utils.api_utils import get_json_result +from rag.llm import EmbeddingModel, ChatModel, RerankModel,CvModel +import requests +import ast + +@manager.route('/factories', methods=['GET']) +@login_required +def factories(): + try: + fac = LLMFactoriesService.get_all() + return get_json_result(data=[f.to_dict() for f in fac if f.name not in ["Youdao", "FastEmbed", "BAAI"]]) + except Exception as e: + return server_error_response(e) + + +@manager.route('/set_api_key', methods=['POST']) +@login_required +@validate_request("llm_factory", "api_key") +def set_api_key(): + req = request.json + # test if api key works + chat_passed, embd_passed, rerank_passed = False, False, False + factory = req["llm_factory"] + msg = "" + for llm in LLMService.query(fid=factory): + if not embd_passed and llm.model_type == LLMType.EMBEDDING.value: + mdl = EmbeddingModel[factory]( + req["api_key"], llm.llm_name, base_url=req.get("base_url")) + try: + arr, tc = mdl.encode(["Test if the api key is available"]) + if len(arr[0]) == 0: + raise Exception("Fail") + embd_passed = True + except Exception as e: + msg += f"\nFail to access embedding model({llm.llm_name}) using this api key." + str(e) + elif not chat_passed and llm.model_type == LLMType.CHAT.value: + mdl = ChatModel[factory]( + req["api_key"], llm.llm_name, base_url=req.get("base_url")) + try: + m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], + {"temperature": 0.9,'max_tokens':50}) + if m.find("**ERROR**") >=0: + raise Exception(m) + except Exception as e: + msg += f"\nFail to access model({llm.llm_name}) using this api key." + str( + e) + chat_passed = True + elif not rerank_passed and llm.model_type == LLMType.RERANK: + mdl = RerankModel[factory]( + req["api_key"], llm.llm_name, base_url=req.get("base_url")) + try: + arr, tc = mdl.similarity("What's the weather?", ["Is it sunny today?"]) + if len(arr) == 0 or tc == 0: + raise Exception("Fail") + except Exception as e: + msg += f"\nFail to access model({llm.llm_name}) using this api key." + str( + e) + rerank_passed = True + + if msg: + return get_data_error_result(retmsg=msg) + + llm = { + "api_key": req["api_key"], + "api_base": req.get("base_url", "") + } + for n in ["model_type", "llm_name"]: + if n in req: + llm[n] = req[n] + + if not TenantLLMService.filter_update( + [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory], llm): + for llm in LLMService.query(fid=factory): + TenantLLMService.save( + tenant_id=current_user.id, + llm_factory=factory, + llm_name=llm.llm_name, + model_type=llm.model_type, + api_key=req["api_key"], + api_base=req.get("base_url", "") + ) + + return get_json_result(data=True) + + +@manager.route('/add_llm', methods=['POST']) +@login_required +@validate_request("llm_factory", "llm_name", "model_type") +def add_llm(): + req = request.json + factory = req["llm_factory"] + + if factory == "VolcEngine": + # For VolcEngine, due to its special authentication method + # Assemble volc_ak, volc_sk, endpoint_id into api_key + temp = list(ast.literal_eval(req["llm_name"]).items())[0] + llm_name = temp[0] + endpoint_id = temp[1] + api_key = '{' + f'"volc_ak": "{req.get("volc_ak", "")}", ' \ + f'"volc_sk": "{req.get("volc_sk", "")}", ' \ + f'"ep_id": "{endpoint_id}", ' + '}' + elif factory == "Bedrock": + # For Bedrock, due to its special authentication method + # Assemble bedrock_ak, bedrock_sk, bedrock_region + llm_name = req["llm_name"] + api_key = '{' + f'"bedrock_ak": "{req.get("bedrock_ak", "")}", ' \ + f'"bedrock_sk": "{req.get("bedrock_sk", "")}", ' \ + f'"bedrock_region": "{req.get("bedrock_region", "")}", ' + '}' + elif factory == "LocalAI": + llm_name = req["llm_name"]+"___LocalAI" + api_key = "xxxxxxxxxxxxxxx" + elif factory == "OpenAI-API-Compatible": + llm_name = req["llm_name"]+"___OpenAI-API" + api_key = req.get("api_key","xxxxxxxxxxxxxxx") + else: + llm_name = req["llm_name"] + api_key = req.get("api_key","xxxxxxxxxxxxxxx") + + llm = { + "tenant_id": current_user.id, + "llm_factory": factory, + "model_type": req["model_type"], + "llm_name": llm_name, + "api_base": req.get("api_base", ""), + "api_key": api_key + } + + msg = "" + if llm["model_type"] == LLMType.EMBEDDING.value: + mdl = EmbeddingModel[factory]( + key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None, + model_name=llm["llm_name"], + base_url=llm["api_base"]) + try: + arr, tc = mdl.encode(["Test if the api key is available"]) + if len(arr[0]) == 0 or tc == 0: + raise Exception("Fail") + except Exception as e: + msg += f"\nFail to access embedding model({llm['llm_name']})." + str(e) + elif llm["model_type"] == LLMType.CHAT.value: + mdl = ChatModel[factory]( + key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None, + model_name=llm["llm_name"], + base_url=llm["api_base"] + ) + try: + m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], { + "temperature": 0.9}) + if not tc: + raise Exception(m) + except Exception as e: + msg += f"\nFail to access model({llm['llm_name']})." + str( + e) + elif llm["model_type"] == LLMType.RERANK: + mdl = RerankModel[factory]( + key=None, model_name=llm["llm_name"], base_url=llm["api_base"] + ) + try: + arr, tc = mdl.similarity("Hello~ Ragflower!", ["Hi, there!"]) + if len(arr) == 0 or tc == 0: + raise Exception("Not known.") + except Exception as e: + msg += f"\nFail to access model({llm['llm_name']})." + str( + e) + elif llm["model_type"] == LLMType.IMAGE2TEXT.value: + mdl = CvModel[factory]( + key=llm["api_key"] if factory in ["OpenAI-API-Compatible"] else None, model_name=llm["llm_name"], base_url=llm["api_base"] + ) + try: + img_url = ( + "https://upload.wikimedia.org/wikipedia/comm" + "ons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/256" + "0px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + ) + res = requests.get(img_url) + if res.status_code == 200: + m, tc = mdl.describe(res.content) + if not tc: + raise Exception(m) + else: + pass + except Exception as e: + msg += f"\nFail to access model({llm['llm_name']})." + str(e) + else: + # TODO: check other type of models + pass + + if msg: + return get_data_error_result(retmsg=msg) + + if not TenantLLMService.filter_update( + [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory, TenantLLM.llm_name == llm["llm_name"]], llm): + TenantLLMService.save(**llm) + + return get_json_result(data=True) + + +@manager.route('/delete_llm', methods=['POST']) +@login_required +@validate_request("llm_factory", "llm_name") +def delete_llm(): + req = request.json + TenantLLMService.filter_delete( + [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == req["llm_factory"], TenantLLM.llm_name == req["llm_name"]]) + return get_json_result(data=True) + + +@manager.route('/my_llms', methods=['GET']) +@login_required +def my_llms(): + try: + res = {} + for o in TenantLLMService.get_my_llms(current_user.id): + if o["llm_factory"] not in res: + res[o["llm_factory"]] = { + "tags": o["tags"], + "llm": [] + } + res[o["llm_factory"]]["llm"].append({ + "type": o["model_type"], + "name": o["llm_name"], + "used_token": o["used_tokens"] + }) + return get_json_result(data=res) + except Exception as e: + return server_error_response(e) + + +@manager.route('/list', methods=['GET']) +@login_required +def list_app(): + model_type = request.args.get("model_type") + try: + objs = TenantLLMService.query(tenant_id=current_user.id) + facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key]) + llms = LLMService.get_all() + llms = [m.to_dict() + for m in llms if m.status == StatusEnum.VALID.value] + for m in llms: + m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in ["Youdao","FastEmbed", "BAAI"] + + llm_set = set([m["llm_name"] for m in llms]) + for o in objs: + if not o.api_key:continue + if o.llm_name in llm_set:continue + llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True}) + + res = {} + for m in llms: + if model_type and m["model_type"].find(model_type)<0: + continue + if m["fid"] not in res: + res[m["fid"]] = [] + res[m["fid"]].append(m) + + return get_json_result(data=res) + except Exception as e: + return server_error_response(e) diff --git a/api/apps/user_app.py b/api/apps/user_app.py index 48e02612d0a92db05f98c6c356f8f04fb7e8b2e7..04f425db3831b0132efa303f9a1b3640ae565a6c 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -1,391 +1,391 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json -import re -from datetime import datetime - -from flask import request, session, redirect -from werkzeug.security import generate_password_hash, check_password_hash -from flask_login import login_required, current_user, login_user, logout_user - -from api.db.db_models import TenantLLM -from api.db.services.llm_service import TenantLLMService, LLMService -from api.utils.api_utils import server_error_response, validate_request -from api.utils import get_uuid, get_format_time, decrypt, download_img, current_timestamp, datetime_format -from api.db import UserTenantRole, LLMType, FileType -from api.settings import RetCode, GITHUB_OAUTH, FEISHU_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, \ - API_KEY, \ - LLM_FACTORY, LLM_BASE_URL, RERANK_MDL -from api.db.services.user_service import UserService, TenantService, UserTenantService -from api.db.services.file_service import FileService -from api.settings import stat_logger -from api.utils.api_utils import get_json_result, cors_reponse - - -@manager.route('/login', methods=['POST', 'GET']) -def login(): - login_channel = "password" - if not request.json: - return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR, - retmsg='Unautherized!') - - email = request.json.get('email', "") - users = UserService.query(email=email) - if not users: - return get_json_result( - data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg=f'This Email is not registered!') - - password = request.json.get('password') - try: - password = decrypt(password) - except BaseException: - return get_json_result( - data=False, retcode=RetCode.SERVER_ERROR, retmsg='Fail to crypt password') - - user = UserService.query_user(email, password) - if user: - response_data = user.to_json() - user.access_token = get_uuid() - login_user(user) - user.update_time = current_timestamp(), - user.update_date = datetime_format(datetime.now()), - user.save() - msg = "Welcome back!" - return cors_reponse(data=response_data, auth=user.get_id(), retmsg=msg) - else: - return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR, - retmsg='Email and Password do not match!') - - -@manager.route('/github_callback', methods=['GET']) -def github_callback(): - import requests - res = requests.post(GITHUB_OAUTH.get("url"), data={ - "client_id": GITHUB_OAUTH.get("client_id"), - "client_secret": GITHUB_OAUTH.get("secret_key"), - "code": request.args.get('code') - }, headers={"Accept": "application/json"}) - res = res.json() - if "error" in res: - return redirect("/?error=%s" % res["error_description"]) - - if "user:email" not in res["scope"].split(","): - return redirect("/?error=user:email not in scope") - - session["access_token"] = res["access_token"] - session["access_token_from"] = "github" - userinfo = user_info_from_github(session["access_token"]) - users = UserService.query(email=userinfo["email"]) - user_id = get_uuid() - if not users: - try: - try: - avatar = download_img(userinfo["avatar_url"]) - except Exception as e: - stat_logger.exception(e) - avatar = "" - users = user_register(user_id, { - "access_token": session["access_token"], - "email": userinfo["email"], - "avatar": avatar, - "nickname": userinfo["login"], - "login_channel": "github", - "last_login_time": get_format_time(), - "is_superuser": False, - }) - if not users: - raise Exception('Register user failure.') - if len(users) > 1: - raise Exception('Same E-mail exist!') - user = users[0] - login_user(user) - return redirect("/?auth=%s" % user.get_id()) - except Exception as e: - rollback_user_registration(user_id) - stat_logger.exception(e) - return redirect("/?error=%s" % str(e)) - user = users[0] - user.access_token = get_uuid() - login_user(user) - user.save() - return redirect("/?auth=%s" % user.get_id()) - - -@manager.route('/feishu_callback', methods=['GET']) -def feishu_callback(): - import requests - app_access_token_res = requests.post(FEISHU_OAUTH.get("app_access_token_url"), data=json.dumps({ - "app_id": FEISHU_OAUTH.get("app_id"), - "app_secret": FEISHU_OAUTH.get("app_secret") - }), headers={"Content-Type": "application/json; charset=utf-8"}) - app_access_token_res = app_access_token_res.json() - if app_access_token_res['code'] != 0: - return redirect("/?error=%s" % app_access_token_res) - - res = requests.post(FEISHU_OAUTH.get("user_access_token_url"), data=json.dumps({ - "grant_type": FEISHU_OAUTH.get("grant_type"), - "code": request.args.get('code') - }), headers={"Content-Type": "application/json; charset=utf-8", - 'Authorization': f"Bearer {app_access_token_res['app_access_token']}"}) - res = res.json() - if res['code'] != 0: - return redirect("/?error=%s" % res["message"]) - - if "contact:user.email:readonly" not in res["data"]["scope"].split(" "): - return redirect("/?error=contact:user.email:readonly not in scope") - session["access_token"] = res["data"]["access_token"] - session["access_token_from"] = "feishu" - userinfo = user_info_from_feishu(session["access_token"]) - users = UserService.query(email=userinfo["email"]) - user_id = get_uuid() - if not users: - try: - try: - avatar = download_img(userinfo["avatar_url"]) - except Exception as e: - stat_logger.exception(e) - avatar = "" - users = user_register(user_id, { - "access_token": session["access_token"], - "email": userinfo["email"], - "avatar": avatar, - "nickname": userinfo["en_name"], - "login_channel": "feishu", - "last_login_time": get_format_time(), - "is_superuser": False, - }) - if not users: - raise Exception('Register user failure.') - if len(users) > 1: - raise Exception('Same E-mail exist!') - user = users[0] - login_user(user) - return redirect("/?auth=%s" % user.get_id()) - except Exception as e: - rollback_user_registration(user_id) - stat_logger.exception(e) - return redirect("/?error=%s" % str(e)) - user = users[0] - user.access_token = get_uuid() - login_user(user) - user.save() - return redirect("/?auth=%s" % user.get_id()) - - -def user_info_from_feishu(access_token): - import requests - headers = {"Content-Type": "application/json; charset=utf-8", - 'Authorization': f"Bearer {access_token}"} - res = requests.get( - f"https://open.feishu.cn/open-apis/authen/v1/user_info", - headers=headers) - user_info = res.json()["data"] - user_info["email"] = None if user_info.get("email") == "" else user_info["email"] - return user_info - - -def user_info_from_github(access_token): - import requests - headers = {"Accept": "application/json", - 'Authorization': f"token {access_token}"} - res = requests.get( - f"https://api.github.com/user?access_token={access_token}", - headers=headers) - user_info = res.json() - email_info = requests.get( - f"https://api.github.com/user/emails?access_token={access_token}", - headers=headers).json() - user_info["email"] = next( - (email for email in email_info if email['primary'] == True), - None)["email"] - return user_info - - -@manager.route("/logout", methods=['GET']) -@login_required -def log_out(): - current_user.access_token = "" - current_user.save() - logout_user() - return get_json_result(data=True) - - -@manager.route("/setting", methods=["POST"]) -@login_required -def setting_user(): - update_dict = {} - request_data = request.json - if request_data.get("password"): - new_password = request_data.get("new_password") - if not check_password_hash( - current_user.password, decrypt(request_data["password"])): - return get_json_result( - data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg='Password error!') - - if new_password: - update_dict["password"] = generate_password_hash( - decrypt(new_password)) - - for k in request_data.keys(): - if k in ["password", "new_password"]: - continue - update_dict[k] = request_data[k] - - try: - UserService.update_by_id(current_user.id, update_dict) - return get_json_result(data=True) - except Exception as e: - stat_logger.exception(e) - return get_json_result( - data=False, retmsg='Update failure!', retcode=RetCode.EXCEPTION_ERROR) - - -@manager.route("/info", methods=["GET"]) -@login_required -def user_info(): - return get_json_result(data=current_user.to_dict()) - - -def rollback_user_registration(user_id): - try: - UserService.delete_by_id(user_id) - except Exception as e: - pass - try: - TenantService.delete_by_id(user_id) - except Exception as e: - pass - try: - u = UserTenantService.query(tenant_id=user_id) - if u: - UserTenantService.delete_by_id(u[0].id) - except Exception as e: - pass - try: - TenantLLM.delete().where(TenantLLM.tenant_id == user_id).execute() - except Exception as e: - pass - - -def user_register(user_id, user): - user["id"] = user_id - tenant = { - "id": user_id, - "name": user["nickname"] + "‘s Kingdom", - "llm_id": CHAT_MDL, - "embd_id": EMBEDDING_MDL, - "asr_id": ASR_MDL, - "parser_ids": PARSERS, - "img2txt_id": IMAGE2TEXT_MDL, - "rerank_id": RERANK_MDL - } - usr_tenant = { - "tenant_id": user_id, - "user_id": user_id, - "invited_by": user_id, - "role": UserTenantRole.OWNER - } - file_id = get_uuid() - file = { - "id": file_id, - "parent_id": file_id, - "tenant_id": user_id, - "created_by": user_id, - "name": "/", - "type": FileType.FOLDER.value, - "size": 0, - "location": "", - } - tenant_llm = [] - for llm in LLMService.query(fid=LLM_FACTORY): - tenant_llm.append({"tenant_id": user_id, - "llm_factory": LLM_FACTORY, - "llm_name": llm.llm_name, - "model_type": llm.model_type, - "api_key": API_KEY, - "api_base": LLM_BASE_URL - }) - - if not UserService.save(**user): - return - TenantService.insert(**tenant) - UserTenantService.insert(**usr_tenant) - TenantLLMService.insert_many(tenant_llm) - FileService.insert(file) - return UserService.query(email=user["email"]) - - -@manager.route("/register", methods=["POST"]) -@validate_request("nickname", "email", "password") -def user_add(): - req = request.json - if UserService.query(email=req["email"]): - return get_json_result( - data=False, retmsg=f'Email: {req["email"]} has already registered!', retcode=RetCode.OPERATING_ERROR) - if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", req["email"]): - return get_json_result(data=False, retmsg=f'Invaliad e-mail: {req["email"]}!', - retcode=RetCode.OPERATING_ERROR) - - user_dict = { - "access_token": get_uuid(), - "email": req["email"], - "nickname": req["nickname"], - "password": decrypt(req["password"]), - "login_channel": "password", - "last_login_time": get_format_time(), - "is_superuser": False, - } - - user_id = get_uuid() - try: - users = user_register(user_id, user_dict) - if not users: - raise Exception('Register user failure.') - if len(users) > 1: - raise Exception('Same E-mail exist!') - user = users[0] - login_user(user) - return cors_reponse(data=user.to_json(), - auth=user.get_id(), retmsg="Welcome aboard!") - except Exception as e: - rollback_user_registration(user_id) - stat_logger.exception(e) - return get_json_result( - data=False, retmsg='User registration failure!', retcode=RetCode.EXCEPTION_ERROR) - - -@manager.route("/tenant_info", methods=["GET"]) -@login_required -def tenant_info(): - try: - tenants = TenantService.get_by_user_id(current_user.id)[0] - return get_json_result(data=tenants) - except Exception as e: - return server_error_response(e) - - -@manager.route("/set_tenant_info", methods=["POST"]) -@login_required -@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id") -def set_tenant_info(): - req = request.json - try: - tid = req["tenant_id"] - del req["tenant_id"] - TenantService.update_by_id(tid, req) - return get_json_result(data=True) - except Exception as e: - return server_error_response(e) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import re +from datetime import datetime + +from flask import request, session, redirect +from werkzeug.security import generate_password_hash, check_password_hash +from flask_login import login_required, current_user, login_user, logout_user + +from api.db.db_models import TenantLLM +from api.db.services.llm_service import TenantLLMService, LLMService +from api.utils.api_utils import server_error_response, validate_request +from api.utils import get_uuid, get_format_time, decrypt, download_img, current_timestamp, datetime_format +from api.db import UserTenantRole, LLMType, FileType +from api.settings import RetCode, GITHUB_OAUTH, FEISHU_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, \ + API_KEY, \ + LLM_FACTORY, LLM_BASE_URL, RERANK_MDL +from api.db.services.user_service import UserService, TenantService, UserTenantService +from api.db.services.file_service import FileService +from api.settings import stat_logger +from api.utils.api_utils import get_json_result, cors_reponse + + +@manager.route('/login', methods=['POST', 'GET']) +def login(): + login_channel = "password" + if not request.json: + return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR, + retmsg='Unautherized!') + + email = request.json.get('email', "") + users = UserService.query(email=email) + if not users: + return get_json_result( + data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg=f'This Email is not registered!') + + password = request.json.get('password') + try: + password = decrypt(password) + except BaseException: + return get_json_result( + data=False, retcode=RetCode.SERVER_ERROR, retmsg='Fail to crypt password') + + user = UserService.query_user(email, password) + if user: + response_data = user.to_json() + user.access_token = get_uuid() + login_user(user) + user.update_time = current_timestamp(), + user.update_date = datetime_format(datetime.now()), + user.save() + msg = "Welcome back!" + return cors_reponse(data=response_data, auth=user.get_id(), retmsg=msg) + else: + return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR, + retmsg='Email and Password do not match!') + + +@manager.route('/github_callback', methods=['GET']) +def github_callback(): + import requests + res = requests.post(GITHUB_OAUTH.get("url"), data={ + "client_id": GITHUB_OAUTH.get("client_id"), + "client_secret": GITHUB_OAUTH.get("secret_key"), + "code": request.args.get('code') + }, headers={"Accept": "application/json"}) + res = res.json() + if "error" in res: + return redirect("/?error=%s" % res["error_description"]) + + if "user:email" not in res["scope"].split(","): + return redirect("/?error=user:email not in scope") + + session["access_token"] = res["access_token"] + session["access_token_from"] = "github" + userinfo = user_info_from_github(session["access_token"]) + users = UserService.query(email=userinfo["email"]) + user_id = get_uuid() + if not users: + try: + try: + avatar = download_img(userinfo["avatar_url"]) + except Exception as e: + stat_logger.exception(e) + avatar = "" + users = user_register(user_id, { + "access_token": session["access_token"], + "email": userinfo["email"], + "avatar": avatar, + "nickname": userinfo["login"], + "login_channel": "github", + "last_login_time": get_format_time(), + "is_superuser": False, + }) + if not users: + raise Exception('Register user failure.') + if len(users) > 1: + raise Exception('Same E-mail exist!') + user = users[0] + login_user(user) + return redirect("/?auth=%s" % user.get_id()) + except Exception as e: + rollback_user_registration(user_id) + stat_logger.exception(e) + return redirect("/?error=%s" % str(e)) + user = users[0] + user.access_token = get_uuid() + login_user(user) + user.save() + return redirect("/?auth=%s" % user.get_id()) + + +@manager.route('/feishu_callback', methods=['GET']) +def feishu_callback(): + import requests + app_access_token_res = requests.post(FEISHU_OAUTH.get("app_access_token_url"), data=json.dumps({ + "app_id": FEISHU_OAUTH.get("app_id"), + "app_secret": FEISHU_OAUTH.get("app_secret") + }), headers={"Content-Type": "application/json; charset=utf-8"}) + app_access_token_res = app_access_token_res.json() + if app_access_token_res['code'] != 0: + return redirect("/?error=%s" % app_access_token_res) + + res = requests.post(FEISHU_OAUTH.get("user_access_token_url"), data=json.dumps({ + "grant_type": FEISHU_OAUTH.get("grant_type"), + "code": request.args.get('code') + }), headers={"Content-Type": "application/json; charset=utf-8", + 'Authorization': f"Bearer {app_access_token_res['app_access_token']}"}) + res = res.json() + if res['code'] != 0: + return redirect("/?error=%s" % res["message"]) + + if "contact:user.email:readonly" not in res["data"]["scope"].split(" "): + return redirect("/?error=contact:user.email:readonly not in scope") + session["access_token"] = res["data"]["access_token"] + session["access_token_from"] = "feishu" + userinfo = user_info_from_feishu(session["access_token"]) + users = UserService.query(email=userinfo["email"]) + user_id = get_uuid() + if not users: + try: + try: + avatar = download_img(userinfo["avatar_url"]) + except Exception as e: + stat_logger.exception(e) + avatar = "" + users = user_register(user_id, { + "access_token": session["access_token"], + "email": userinfo["email"], + "avatar": avatar, + "nickname": userinfo["en_name"], + "login_channel": "feishu", + "last_login_time": get_format_time(), + "is_superuser": False, + }) + if not users: + raise Exception('Register user failure.') + if len(users) > 1: + raise Exception('Same E-mail exist!') + user = users[0] + login_user(user) + return redirect("/?auth=%s" % user.get_id()) + except Exception as e: + rollback_user_registration(user_id) + stat_logger.exception(e) + return redirect("/?error=%s" % str(e)) + user = users[0] + user.access_token = get_uuid() + login_user(user) + user.save() + return redirect("/?auth=%s" % user.get_id()) + + +def user_info_from_feishu(access_token): + import requests + headers = {"Content-Type": "application/json; charset=utf-8", + 'Authorization': f"Bearer {access_token}"} + res = requests.get( + f"https://open.feishu.cn/open-apis/authen/v1/user_info", + headers=headers) + user_info = res.json()["data"] + user_info["email"] = None if user_info.get("email") == "" else user_info["email"] + return user_info + + +def user_info_from_github(access_token): + import requests + headers = {"Accept": "application/json", + 'Authorization': f"token {access_token}"} + res = requests.get( + f"https://api.github.com/user?access_token={access_token}", + headers=headers) + user_info = res.json() + email_info = requests.get( + f"https://api.github.com/user/emails?access_token={access_token}", + headers=headers).json() + user_info["email"] = next( + (email for email in email_info if email['primary'] == True), + None)["email"] + return user_info + + +@manager.route("/logout", methods=['GET']) +@login_required +def log_out(): + current_user.access_token = "" + current_user.save() + logout_user() + return get_json_result(data=True) + + +@manager.route("/setting", methods=["POST"]) +@login_required +def setting_user(): + update_dict = {} + request_data = request.json + if request_data.get("password"): + new_password = request_data.get("new_password") + if not check_password_hash( + current_user.password, decrypt(request_data["password"])): + return get_json_result( + data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg='Password error!') + + if new_password: + update_dict["password"] = generate_password_hash( + decrypt(new_password)) + + for k in request_data.keys(): + if k in ["password", "new_password"]: + continue + update_dict[k] = request_data[k] + + try: + UserService.update_by_id(current_user.id, update_dict) + return get_json_result(data=True) + except Exception as e: + stat_logger.exception(e) + return get_json_result( + data=False, retmsg='Update failure!', retcode=RetCode.EXCEPTION_ERROR) + + +@manager.route("/info", methods=["GET"]) +@login_required +def user_info(): + return get_json_result(data=current_user.to_dict()) + + +def rollback_user_registration(user_id): + try: + UserService.delete_by_id(user_id) + except Exception as e: + pass + try: + TenantService.delete_by_id(user_id) + except Exception as e: + pass + try: + u = UserTenantService.query(tenant_id=user_id) + if u: + UserTenantService.delete_by_id(u[0].id) + except Exception as e: + pass + try: + TenantLLM.delete().where(TenantLLM.tenant_id == user_id).execute() + except Exception as e: + pass + + +def user_register(user_id, user): + user["id"] = user_id + tenant = { + "id": user_id, + "name": user["nickname"] + "‘s Kingdom", + "llm_id": CHAT_MDL, + "embd_id": EMBEDDING_MDL, + "asr_id": ASR_MDL, + "parser_ids": PARSERS, + "img2txt_id": IMAGE2TEXT_MDL, + "rerank_id": RERANK_MDL + } + usr_tenant = { + "tenant_id": user_id, + "user_id": user_id, + "invited_by": user_id, + "role": UserTenantRole.OWNER + } + file_id = get_uuid() + file = { + "id": file_id, + "parent_id": file_id, + "tenant_id": user_id, + "created_by": user_id, + "name": "/", + "type": FileType.FOLDER.value, + "size": 0, + "location": "", + } + tenant_llm = [] + for llm in LLMService.query(fid=LLM_FACTORY): + tenant_llm.append({"tenant_id": user_id, + "llm_factory": LLM_FACTORY, + "llm_name": llm.llm_name, + "model_type": llm.model_type, + "api_key": API_KEY, + "api_base": LLM_BASE_URL + }) + + if not UserService.save(**user): + return + TenantService.insert(**tenant) + UserTenantService.insert(**usr_tenant) + TenantLLMService.insert_many(tenant_llm) + FileService.insert(file) + return UserService.query(email=user["email"]) + + +@manager.route("/register", methods=["POST"]) +@validate_request("nickname", "email", "password") +def user_add(): + req = request.json + if UserService.query(email=req["email"]): + return get_json_result( + data=False, retmsg=f'Email: {req["email"]} has already registered!', retcode=RetCode.OPERATING_ERROR) + if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", req["email"]): + return get_json_result(data=False, retmsg=f'Invaliad e-mail: {req["email"]}!', + retcode=RetCode.OPERATING_ERROR) + + user_dict = { + "access_token": get_uuid(), + "email": req["email"], + "nickname": req["nickname"], + "password": decrypt(req["password"]), + "login_channel": "password", + "last_login_time": get_format_time(), + "is_superuser": False, + } + + user_id = get_uuid() + try: + users = user_register(user_id, user_dict) + if not users: + raise Exception('Register user failure.') + if len(users) > 1: + raise Exception('Same E-mail exist!') + user = users[0] + login_user(user) + return cors_reponse(data=user.to_json(), + auth=user.get_id(), retmsg="Welcome aboard!") + except Exception as e: + rollback_user_registration(user_id) + stat_logger.exception(e) + return get_json_result( + data=False, retmsg='User registration failure!', retcode=RetCode.EXCEPTION_ERROR) + + +@manager.route("/tenant_info", methods=["GET"]) +@login_required +def tenant_info(): + try: + tenants = TenantService.get_by_user_id(current_user.id)[0] + return get_json_result(data=tenants) + except Exception as e: + return server_error_response(e) + + +@manager.route("/set_tenant_info", methods=["POST"]) +@login_required +@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id") +def set_tenant_info(): + req = request.json + try: + tid = req["tenant_id"] + del req["tenant_id"] + TenantService.update_by_id(tid, req) + return get_json_result(data=True) + except Exception as e: + return server_error_response(e) diff --git a/api/db/__init__.py b/api/db/__init__.py index 7be445cd9f2ae851e065ae0df4869129a8ea52ea..03bf00fec0c241d67ef755a6a64289a8317da16d 100644 --- a/api/db/__init__.py +++ b/api/db/__init__.py @@ -1,102 +1,102 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from enum import Enum -from enum import IntEnum -from strenum import StrEnum - - -class StatusEnum(Enum): - VALID = "1" - INVALID = "0" - - -class UserTenantRole(StrEnum): - OWNER = 'owner' - ADMIN = 'admin' - NORMAL = 'normal' - - -class TenantPermission(StrEnum): - ME = 'me' - TEAM = 'team' - - -class SerializedType(IntEnum): - PICKLE = 1 - JSON = 2 - - -class FileType(StrEnum): - PDF = 'pdf' - DOC = 'doc' - VISUAL = 'visual' - AURAL = 'aural' - VIRTUAL = 'virtual' - FOLDER = 'folder' - OTHER = "other" - - -class LLMType(StrEnum): - CHAT = 'chat' - EMBEDDING = 'embedding' - SPEECH2TEXT = 'speech2text' - IMAGE2TEXT = 'image2text' - RERANK = 'rerank' - - -class ChatStyle(StrEnum): - CREATIVE = 'Creative' - PRECISE = 'Precise' - EVENLY = 'Evenly' - CUSTOM = 'Custom' - - -class TaskStatus(StrEnum): - UNSTART = "0" - RUNNING = "1" - CANCEL = "2" - DONE = "3" - FAIL = "4" - - -class ParserType(StrEnum): - PRESENTATION = "presentation" - LAWS = "laws" - MANUAL = "manual" - PAPER = "paper" - RESUME = "resume" - BOOK = "book" - QA = "qa" - TABLE = "table" - NAIVE = "naive" - PICTURE = "picture" - ONE = "one" - AUDIO = "audio" - EMAIL = "email" - KG = "knowledge_graph" - - -class FileSource(StrEnum): - LOCAL = "" - KNOWLEDGEBASE = "knowledgebase" - S3 = "s3" - - -class CanvasType(StrEnum): - ChatBot = "chatbot" - DocBot = "docbot" - -KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase" +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from enum import Enum +from enum import IntEnum +from strenum import StrEnum + + +class StatusEnum(Enum): + VALID = "1" + INVALID = "0" + + +class UserTenantRole(StrEnum): + OWNER = 'owner' + ADMIN = 'admin' + NORMAL = 'normal' + + +class TenantPermission(StrEnum): + ME = 'me' + TEAM = 'team' + + +class SerializedType(IntEnum): + PICKLE = 1 + JSON = 2 + + +class FileType(StrEnum): + PDF = 'pdf' + DOC = 'doc' + VISUAL = 'visual' + AURAL = 'aural' + VIRTUAL = 'virtual' + FOLDER = 'folder' + OTHER = "other" + + +class LLMType(StrEnum): + CHAT = 'chat' + EMBEDDING = 'embedding' + SPEECH2TEXT = 'speech2text' + IMAGE2TEXT = 'image2text' + RERANK = 'rerank' + + +class ChatStyle(StrEnum): + CREATIVE = 'Creative' + PRECISE = 'Precise' + EVENLY = 'Evenly' + CUSTOM = 'Custom' + + +class TaskStatus(StrEnum): + UNSTART = "0" + RUNNING = "1" + CANCEL = "2" + DONE = "3" + FAIL = "4" + + +class ParserType(StrEnum): + PRESENTATION = "presentation" + LAWS = "laws" + MANUAL = "manual" + PAPER = "paper" + RESUME = "resume" + BOOK = "book" + QA = "qa" + TABLE = "table" + NAIVE = "naive" + PICTURE = "picture" + ONE = "one" + AUDIO = "audio" + EMAIL = "email" + KG = "knowledge_graph" + + +class FileSource(StrEnum): + LOCAL = "" + KNOWLEDGEBASE = "knowledgebase" + S3 = "s3" + + +class CanvasType(StrEnum): + ChatBot = "chatbot" + DocBot = "docbot" + +KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase" diff --git a/api/db/db_models.py b/api/db/db_models.py index bb6c395efdc9e7d3d273d81dcb64ec072dd304d5..c8c1dd0ad19bf231f6533e949d784a9f36e5b5c9 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -1,972 +1,972 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import inspect -import os -import sys -import typing -import operator -from functools import wraps -from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer -from flask_login import UserMixin -from playhouse.migrate import MySQLMigrator, migrate -from peewee import ( - BigIntegerField, BooleanField, CharField, - CompositeKey, IntegerField, TextField, FloatField, DateTimeField, - Field, Model, Metadata -) -from playhouse.pool import PooledMySQLDatabase -from api.db import SerializedType, ParserType -from api.settings import DATABASE, stat_logger, SECRET_KEY -from api.utils.log_utils import getLogger -from api import utils - -LOGGER = getLogger() - - -def singleton(cls, *args, **kw): - instances = {} - - def _singleton(): - key = str(cls) + str(os.getpid()) - if key not in instances: - instances[key] = cls(*args, **kw) - return instances[key] - - return _singleton - - -CONTINUOUS_FIELD_TYPE = {IntegerField, FloatField, DateTimeField} -AUTO_DATE_TIMESTAMP_FIELD_PREFIX = { - "create", - "start", - "end", - "update", - "read_access", - "write_access"} - - -class LongTextField(TextField): - field_type = 'LONGTEXT' - - -class JSONField(LongTextField): - default_value = {} - - def __init__(self, object_hook=None, object_pairs_hook=None, **kwargs): - self._object_hook = object_hook - self._object_pairs_hook = object_pairs_hook - super().__init__(**kwargs) - - def db_value(self, value): - if value is None: - value = self.default_value - return utils.json_dumps(value) - - def python_value(self, value): - if not value: - return self.default_value - return utils.json_loads( - value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook) - - -class ListField(JSONField): - default_value = [] - - -class SerializedField(LongTextField): - def __init__(self, serialized_type=SerializedType.PICKLE, - object_hook=None, object_pairs_hook=None, **kwargs): - self._serialized_type = serialized_type - self._object_hook = object_hook - self._object_pairs_hook = object_pairs_hook - super().__init__(**kwargs) - - def db_value(self, value): - if self._serialized_type == SerializedType.PICKLE: - return utils.serialize_b64(value, to_str=True) - elif self._serialized_type == SerializedType.JSON: - if value is None: - return None - return utils.json_dumps(value, with_type=True) - else: - raise ValueError( - f"the serialized type {self._serialized_type} is not supported") - - def python_value(self, value): - if self._serialized_type == SerializedType.PICKLE: - return utils.deserialize_b64(value) - elif self._serialized_type == SerializedType.JSON: - if value is None: - return {} - return utils.json_loads( - value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook) - else: - raise ValueError( - f"the serialized type {self._serialized_type} is not supported") - - -def is_continuous_field(cls: typing.Type) -> bool: - if cls in CONTINUOUS_FIELD_TYPE: - return True - for p in cls.__bases__: - if p in CONTINUOUS_FIELD_TYPE: - return True - elif p != Field and p != object: - if is_continuous_field(p): - return True - else: - return False - - -def auto_date_timestamp_field(): - return {f"{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX} - - -def auto_date_timestamp_db_field(): - return {f"f_{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX} - - -def remove_field_name_prefix(field_name): - return field_name[2:] if field_name.startswith('f_') else field_name - - -class BaseModel(Model): - create_time = BigIntegerField(null=True, index=True) - create_date = DateTimeField(null=True, index=True) - update_time = BigIntegerField(null=True, index=True) - update_date = DateTimeField(null=True, index=True) - - def to_json(self): - # This function is obsolete - return self.to_dict() - - def to_dict(self): - return self.__dict__['__data__'] - - def to_human_model_dict(self, only_primary_with: list = None): - model_dict = self.__dict__['__data__'] - - if not only_primary_with: - return {remove_field_name_prefix( - k): v for k, v in model_dict.items()} - - human_model_dict = {} - for k in self._meta.primary_key.field_names: - human_model_dict[remove_field_name_prefix(k)] = model_dict[k] - for k in only_primary_with: - human_model_dict[k] = model_dict[f'f_{k}'] - return human_model_dict - - @property - def meta(self) -> Metadata: - return self._meta - - @classmethod - def get_primary_keys_name(cls): - return cls._meta.primary_key.field_names if isinstance(cls._meta.primary_key, CompositeKey) else [ - cls._meta.primary_key.name] - - @classmethod - def getter_by(cls, attr): - return operator.attrgetter(attr)(cls) - - @classmethod - def query(cls, reverse=None, order_by=None, **kwargs): - filters = [] - for f_n, f_v in kwargs.items(): - attr_name = '%s' % f_n - if not hasattr(cls, attr_name) or f_v is None: - continue - if type(f_v) in {list, set}: - f_v = list(f_v) - if is_continuous_field(type(getattr(cls, attr_name))): - if len(f_v) == 2: - for i, v in enumerate(f_v): - if isinstance( - v, str) and f_n in auto_date_timestamp_field(): - # time type: %Y-%m-%d %H:%M:%S - f_v[i] = utils.date_string_to_timestamp(v) - lt_value = f_v[0] - gt_value = f_v[1] - if lt_value is not None and gt_value is not None: - filters.append( - cls.getter_by(attr_name).between( - lt_value, gt_value)) - elif lt_value is not None: - filters.append( - operator.attrgetter(attr_name)(cls) >= lt_value) - elif gt_value is not None: - filters.append( - operator.attrgetter(attr_name)(cls) <= gt_value) - else: - filters.append(operator.attrgetter(attr_name)(cls) << f_v) - else: - filters.append(operator.attrgetter(attr_name)(cls) == f_v) - if filters: - query_records = cls.select().where(*filters) - if reverse is not None: - if not order_by or not hasattr(cls, f"{order_by}"): - order_by = "create_time" - if reverse is True: - query_records = query_records.order_by( - cls.getter_by(f"{order_by}").desc()) - elif reverse is False: - query_records = query_records.order_by( - cls.getter_by(f"{order_by}").asc()) - return [query_record for query_record in query_records] - else: - return [] - - @classmethod - def insert(cls, __data=None, **insert): - if isinstance(__data, dict) and __data: - __data[cls._meta.combined["create_time"] - ] = utils.current_timestamp() - if insert: - insert["create_time"] = utils.current_timestamp() - - return super().insert(__data, **insert) - - # update and insert will call this method - @classmethod - def _normalize_data(cls, data, kwargs): - normalized = super()._normalize_data(data, kwargs) - if not normalized: - return {} - - normalized[cls._meta.combined["update_time"] - ] = utils.current_timestamp() - - for f_n in AUTO_DATE_TIMESTAMP_FIELD_PREFIX: - if {f"{f_n}_time", f"{f_n}_date"}.issubset(cls._meta.combined.keys()) and \ - cls._meta.combined[f"{f_n}_time"] in normalized and \ - normalized[cls._meta.combined[f"{f_n}_time"]] is not None: - normalized[cls._meta.combined[f"{f_n}_date"]] = utils.timestamp_to_date( - normalized[cls._meta.combined[f"{f_n}_time"]]) - - return normalized - - -class JsonSerializedField(SerializedField): - def __init__(self, object_hook=utils.from_dict_hook, - object_pairs_hook=None, **kwargs): - super(JsonSerializedField, self).__init__(serialized_type=SerializedType.JSON, object_hook=object_hook, - object_pairs_hook=object_pairs_hook, **kwargs) - - -@singleton -class BaseDataBase: - def __init__(self): - database_config = DATABASE.copy() - db_name = database_config.pop("name") - self.database_connection = PooledMySQLDatabase( - db_name, **database_config) - stat_logger.info('init mysql database on cluster mode successfully') - - -class DatabaseLock: - def __init__(self, lock_name, timeout=10, db=None): - self.lock_name = lock_name - self.timeout = int(timeout) - self.db = db if db else DB - - def lock(self): - # SQL parameters only support %s format placeholders - cursor = self.db.execute_sql( - "SELECT GET_LOCK(%s, %s)", (self.lock_name, self.timeout)) - ret = cursor.fetchone() - if ret[0] == 0: - raise Exception(f'acquire mysql lock {self.lock_name} timeout') - elif ret[0] == 1: - return True - else: - raise Exception(f'failed to acquire lock {self.lock_name}') - - def unlock(self): - cursor = self.db.execute_sql( - "SELECT RELEASE_LOCK(%s)", (self.lock_name,)) - ret = cursor.fetchone() - if ret[0] == 0: - raise Exception( - f'mysql lock {self.lock_name} was not established by this thread') - elif ret[0] == 1: - return True - else: - raise Exception(f'mysql lock {self.lock_name} does not exist') - - def __enter__(self): - if isinstance(self.db, PooledMySQLDatabase): - self.lock() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if isinstance(self.db, PooledMySQLDatabase): - self.unlock() - - def __call__(self, func): - @wraps(func) - def magic(*args, **kwargs): - with self: - return func(*args, **kwargs) - - return magic - - -DB = BaseDataBase().database_connection -DB.lock = DatabaseLock - - -def close_connection(): - try: - if DB: - DB.close_stale(age=30) - except Exception as e: - LOGGER.exception(e) - - -class DataBaseModel(BaseModel): - class Meta: - database = DB - - -@DB.connection_context() -def init_database_tables(alter_fields=[]): - members = inspect.getmembers(sys.modules[__name__], inspect.isclass) - table_objs = [] - create_failed_list = [] - for name, obj in members: - if obj != DataBaseModel and issubclass(obj, DataBaseModel): - table_objs.append(obj) - LOGGER.info(f"start create table {obj.__name__}") - try: - obj.create_table() - LOGGER.info(f"create table success: {obj.__name__}") - except Exception as e: - LOGGER.exception(e) - create_failed_list.append(obj.__name__) - if create_failed_list: - LOGGER.info(f"create tables failed: {create_failed_list}") - raise Exception(f"create tables failed: {create_failed_list}") - migrate_db() - - -def fill_db_model_object(model_object, human_model_dict): - for k, v in human_model_dict.items(): - attr_name = '%s' % k - if hasattr(model_object.__class__, attr_name): - setattr(model_object, attr_name, v) - return model_object - - -class User(DataBaseModel, UserMixin): - id = CharField(max_length=32, primary_key=True) - access_token = CharField(max_length=255, null=True, index=True) - nickname = CharField(max_length=100, null=False, help_text="nicky name", index=True) - password = CharField(max_length=255, null=True, help_text="password", index=True) - email = CharField( - max_length=255, - null=False, - help_text="email", - index=True) - avatar = TextField(null=True, help_text="avatar base64 string") - language = CharField( - max_length=32, - null=True, - help_text="English|Chinese", - default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English", - index=True) - color_schema = CharField( - max_length=32, - null=True, - help_text="Bright|Dark", - default="Bright", - index=True) - timezone = CharField( - max_length=64, - null=True, - help_text="Timezone", - default="UTC+8\tAsia/Shanghai", - index=True) - last_login_time = DateTimeField(null=True, index=True) - is_authenticated = CharField(max_length=1, null=False, default="1", index=True) - is_active = CharField(max_length=1, null=False, default="1", index=True) - is_anonymous = CharField(max_length=1, null=False, default="0", index=True) - login_channel = CharField(null=True, help_text="from which user login", index=True) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - is_superuser = BooleanField(null=True, help_text="is root", default=False, index=True) - - def __str__(self): - return self.email - - def get_id(self): - jwt = Serializer(secret_key=SECRET_KEY) - return jwt.dumps(str(self.access_token)) - - class Meta: - db_table = "user" - - -class Tenant(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - name = CharField(max_length=100, null=True, help_text="Tenant name", index=True) - public_key = CharField(max_length=255, null=True, index=True) - llm_id = CharField(max_length=128, null=False, help_text="default llm ID", index=True) - embd_id = CharField( - max_length=128, - null=False, - help_text="default embedding model ID", - index=True) - asr_id = CharField( - max_length=128, - null=False, - help_text="default ASR model ID", - index=True) - img2txt_id = CharField( - max_length=128, - null=False, - help_text="default image to text model ID", - index=True) - rerank_id = CharField( - max_length=128, - null=False, - help_text="default rerank model ID", - index=True) - parser_ids = CharField( - max_length=256, - null=False, - help_text="document processors", - index=True) - credit = IntegerField(default=512, index=True) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - class Meta: - db_table = "tenant" - - -class UserTenant(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - user_id = CharField(max_length=32, null=False, index=True) - tenant_id = CharField(max_length=32, null=False, index=True) - role = CharField(max_length=32, null=False, help_text="UserTenantRole", index=True) - invited_by = CharField(max_length=32, null=False, index=True) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - class Meta: - db_table = "user_tenant" - - -class InvitationCode(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - code = CharField(max_length=32, null=False, index=True) - visit_time = DateTimeField(null=True, index=True) - user_id = CharField(max_length=32, null=True, index=True) - tenant_id = CharField(max_length=32, null=True, index=True) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - class Meta: - db_table = "invitation_code" - - -class LLMFactories(DataBaseModel): - name = CharField( - max_length=128, - null=False, - help_text="LLM factory name", - primary_key=True) - logo = TextField(null=True, help_text="llm logo base64") - tags = CharField( - max_length=255, - null=False, - help_text="LLM, Text Embedding, Image2Text, ASR", - index=True) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - def __str__(self): - return self.name - - class Meta: - db_table = "llm_factories" - - -class LLM(DataBaseModel): - # LLMs dictionary - llm_name = CharField( - max_length=128, - null=False, - help_text="LLM name", - index=True) - model_type = CharField( - max_length=128, - null=False, - help_text="LLM, Text Embedding, Image2Text, ASR", - index=True) - fid = CharField(max_length=128, null=False, help_text="LLM factory id", index=True) - max_tokens = IntegerField(default=0) - - tags = CharField( - max_length=255, - null=False, - help_text="LLM, Text Embedding, Image2Text, Chat, 32k...", - index=True) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - def __str__(self): - return self.llm_name - - class Meta: - primary_key = CompositeKey('fid', 'llm_name') - db_table = "llm" - - -class TenantLLM(DataBaseModel): - tenant_id = CharField(max_length=32, null=False, index=True) - llm_factory = CharField( - max_length=128, - null=False, - help_text="LLM factory name", - index=True) - model_type = CharField( - max_length=128, - null=True, - help_text="LLM, Text Embedding, Image2Text, ASR", - index=True) - llm_name = CharField( - max_length=128, - null=True, - help_text="LLM name", - default="", - index=True) - api_key = CharField(max_length=1024, null=True, help_text="API KEY", index=True) - api_base = CharField(max_length=255, null=True, help_text="API Base") - - used_tokens = IntegerField(default=0, index=True) - - def __str__(self): - return self.llm_name - - class Meta: - db_table = "tenant_llm" - primary_key = CompositeKey('tenant_id', 'llm_factory', 'llm_name') - - -class Knowledgebase(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - avatar = TextField(null=True, help_text="avatar base64 string") - tenant_id = CharField(max_length=32, null=False, index=True) - name = CharField( - max_length=128, - null=False, - help_text="KB name", - index=True) - language = CharField( - max_length=32, - null=True, - default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English", - help_text="English|Chinese", - index=True) - description = TextField(null=True, help_text="KB description") - embd_id = CharField( - max_length=128, - null=False, - help_text="default embedding model ID", - index=True) - permission = CharField( - max_length=16, - null=False, - help_text="me|team", - default="me", - index=True) - created_by = CharField(max_length=32, null=False, index=True) - doc_num = IntegerField(default=0, index=True) - token_num = IntegerField(default=0, index=True) - chunk_num = IntegerField(default=0, index=True) - similarity_threshold = FloatField(default=0.2, index=True) - vector_similarity_weight = FloatField(default=0.3, index=True) - - parser_id = CharField( - max_length=32, - null=False, - help_text="default parser ID", - default=ParserType.NAIVE.value, - index=True) - parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]}) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - def __str__(self): - return self.name - - class Meta: - db_table = "knowledgebase" - - -class Document(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - thumbnail = TextField(null=True, help_text="thumbnail base64 string") - kb_id = CharField(max_length=256, null=False, index=True) - parser_id = CharField( - max_length=32, - null=False, - help_text="default parser ID", - index=True) - parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]}) - source_type = CharField( - max_length=128, - null=False, - default="local", - help_text="where dose this document come from", - index=True) - type = CharField(max_length=32, null=False, help_text="file extension", - index=True) - created_by = CharField( - max_length=32, - null=False, - help_text="who created it", - index=True) - name = CharField( - max_length=255, - null=True, - help_text="file name", - index=True) - location = CharField( - max_length=255, - null=True, - help_text="where dose it store", - index=True) - size = IntegerField(default=0, index=True) - token_num = IntegerField(default=0, index=True) - chunk_num = IntegerField(default=0, index=True) - progress = FloatField(default=0, index=True) - progress_msg = TextField( - null=True, - help_text="process message", - default="") - process_begin_at = DateTimeField(null=True, index=True) - process_duation = FloatField(default=0) - - run = CharField( - max_length=1, - null=True, - help_text="start to run processing or cancel.(1: run it; 2: cancel)", - default="0", - index=True) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - class Meta: - db_table = "document" - - -class File(DataBaseModel): - id = CharField( - max_length=32, - primary_key=True) - parent_id = CharField( - max_length=32, - null=False, - help_text="parent folder id", - index=True) - tenant_id = CharField( - max_length=32, - null=False, - help_text="tenant id", - index=True) - created_by = CharField( - max_length=32, - null=False, - help_text="who created it", - index=True) - name = CharField( - max_length=255, - null=False, - help_text="file name or folder name", - index=True) - location = CharField( - max_length=255, - null=True, - help_text="where dose it store", - index=True) - size = IntegerField(default=0, index=True) - type = CharField(max_length=32, null=False, help_text="file extension", index=True) - source_type = CharField( - max_length=128, - null=False, - default="", - help_text="where dose this document come from", index=True) - - class Meta: - db_table = "file" - - -class File2Document(DataBaseModel): - id = CharField( - max_length=32, - primary_key=True) - file_id = CharField( - max_length=32, - null=True, - help_text="file id", - index=True) - document_id = CharField( - max_length=32, - null=True, - help_text="document id", - index=True) - - class Meta: - db_table = "file2document" - - -class Task(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - doc_id = CharField(max_length=32, null=False, index=True) - from_page = IntegerField(default=0) - - to_page = IntegerField(default=-1) - - begin_at = DateTimeField(null=True, index=True) - process_duation = FloatField(default=0) - - progress = FloatField(default=0, index=True) - progress_msg = TextField( - null=True, - help_text="process message", - default="") - - -class Dialog(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - tenant_id = CharField(max_length=32, null=False, index=True) - name = CharField( - max_length=255, - null=True, - help_text="dialog application name", - index=True) - description = TextField(null=True, help_text="Dialog description") - icon = TextField(null=True, help_text="icon base64 string") - language = CharField( - max_length=32, - null=True, - default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English", - help_text="English|Chinese", - index=True) - llm_id = CharField(max_length=128, null=False, help_text="default llm ID") - - llm_setting = JSONField(null=False, default={"temperature": 0.1, "top_p": 0.3, "frequency_penalty": 0.7, - "presence_penalty": 0.4, "max_tokens": 512}) - prompt_type = CharField( - max_length=16, - null=False, - default="simple", - help_text="simple|advanced", - index=True) - prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?", - "parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"}) - - similarity_threshold = FloatField(default=0.2) - vector_similarity_weight = FloatField(default=0.3) - - top_n = IntegerField(default=6) - - top_k = IntegerField(default=1024) - - do_refer = CharField( - max_length=1, - null=False, - help_text="it needs to insert reference index into answer or not") - - rerank_id = CharField( - max_length=128, - null=False, - help_text="default rerank model ID") - - kb_ids = JSONField(null=False, default=[]) - status = CharField( - max_length=1, - null=True, - help_text="is it validate(0: wasted,1: validate)", - default="1", - index=True) - - class Meta: - db_table = "dialog" - - -class Conversation(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - dialog_id = CharField(max_length=32, null=False, index=True) - name = CharField(max_length=255, null=True, help_text="converastion name", index=True) - message = JSONField(null=True) - reference = JSONField(null=True, default=[]) - - class Meta: - db_table = "conversation" - - -class APIToken(DataBaseModel): - tenant_id = CharField(max_length=32, null=False, index=True) - token = CharField(max_length=255, null=False, index=True) - dialog_id = CharField(max_length=32, null=False, index=True) - source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True) - - class Meta: - db_table = "api_token" - primary_key = CompositeKey('tenant_id', 'token') - - -class API4Conversation(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - dialog_id = CharField(max_length=32, null=False, index=True) - user_id = CharField(max_length=255, null=False, help_text="user_id", index=True) - message = JSONField(null=True) - reference = JSONField(null=True, default=[]) - tokens = IntegerField(default=0) - source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True) - - duration = FloatField(default=0, index=True) - round = IntegerField(default=0, index=True) - thumb_up = IntegerField(default=0, index=True) - - class Meta: - db_table = "api_4_conversation" - - -class UserCanvas(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - avatar = TextField(null=True, help_text="avatar base64 string") - user_id = CharField(max_length=255, null=False, help_text="user_id", index=True) - title = CharField(max_length=255, null=True, help_text="Canvas title") - - description = TextField(null=True, help_text="Canvas description") - canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True) - dsl = JSONField(null=True, default={}) - - class Meta: - db_table = "user_canvas" - - -class CanvasTemplate(DataBaseModel): - id = CharField(max_length=32, primary_key=True) - avatar = TextField(null=True, help_text="avatar base64 string") - title = CharField(max_length=255, null=True, help_text="Canvas title") - - description = TextField(null=True, help_text="Canvas description") - canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True) - dsl = JSONField(null=True, default={}) - - class Meta: - db_table = "canvas_template" - - -def migrate_db(): - with DB.transaction(): - migrator = MySQLMigrator(DB) - try: - migrate( - migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="", - help_text="where dose this document come from", - index=True)) - ) - except Exception as e: - pass - try: - migrate( - migrator.add_column('tenant', 'rerank_id', - CharField(max_length=128, null=False, default="BAAI/bge-reranker-v2-m3", - help_text="default rerank model ID")) - - ) - except Exception as e: - pass - try: - migrate( - migrator.add_column('dialog', 'rerank_id', CharField(max_length=128, null=False, default="", - help_text="default rerank model ID")) - - ) - except Exception as e: - pass - try: - migrate( - migrator.add_column('dialog', 'top_k', IntegerField(default=1024)) - - ) - except Exception as e: - pass - try: - migrate( - migrator.alter_column_type('tenant_llm', 'api_key', - CharField(max_length=1024, null=True, help_text="API KEY", index=True)) - ) - except Exception as e: - pass - try: - migrate( - migrator.add_column('api_token', 'source', - CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)) - ) - except Exception as e: - pass - try: - migrate( - migrator.add_column('api_4_conversation', 'source', - CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)) - ) - except Exception as e: - pass - try: - DB.execute_sql('ALTER TABLE llm DROP PRIMARY KEY;') - DB.execute_sql('ALTER TABLE llm ADD PRIMARY KEY (llm_name,fid);') - except Exception as e: - pass +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import inspect +import os +import sys +import typing +import operator +from functools import wraps +from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer +from flask_login import UserMixin +from playhouse.migrate import MySQLMigrator, migrate +from peewee import ( + BigIntegerField, BooleanField, CharField, + CompositeKey, IntegerField, TextField, FloatField, DateTimeField, + Field, Model, Metadata +) +from playhouse.pool import PooledMySQLDatabase +from api.db import SerializedType, ParserType +from api.settings import DATABASE, stat_logger, SECRET_KEY +from api.utils.log_utils import getLogger +from api import utils + +LOGGER = getLogger() + + +def singleton(cls, *args, **kw): + instances = {} + + def _singleton(): + key = str(cls) + str(os.getpid()) + if key not in instances: + instances[key] = cls(*args, **kw) + return instances[key] + + return _singleton + + +CONTINUOUS_FIELD_TYPE = {IntegerField, FloatField, DateTimeField} +AUTO_DATE_TIMESTAMP_FIELD_PREFIX = { + "create", + "start", + "end", + "update", + "read_access", + "write_access"} + + +class LongTextField(TextField): + field_type = 'LONGTEXT' + + +class JSONField(LongTextField): + default_value = {} + + def __init__(self, object_hook=None, object_pairs_hook=None, **kwargs): + self._object_hook = object_hook + self._object_pairs_hook = object_pairs_hook + super().__init__(**kwargs) + + def db_value(self, value): + if value is None: + value = self.default_value + return utils.json_dumps(value) + + def python_value(self, value): + if not value: + return self.default_value + return utils.json_loads( + value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook) + + +class ListField(JSONField): + default_value = [] + + +class SerializedField(LongTextField): + def __init__(self, serialized_type=SerializedType.PICKLE, + object_hook=None, object_pairs_hook=None, **kwargs): + self._serialized_type = serialized_type + self._object_hook = object_hook + self._object_pairs_hook = object_pairs_hook + super().__init__(**kwargs) + + def db_value(self, value): + if self._serialized_type == SerializedType.PICKLE: + return utils.serialize_b64(value, to_str=True) + elif self._serialized_type == SerializedType.JSON: + if value is None: + return None + return utils.json_dumps(value, with_type=True) + else: + raise ValueError( + f"the serialized type {self._serialized_type} is not supported") + + def python_value(self, value): + if self._serialized_type == SerializedType.PICKLE: + return utils.deserialize_b64(value) + elif self._serialized_type == SerializedType.JSON: + if value is None: + return {} + return utils.json_loads( + value, object_hook=self._object_hook, object_pairs_hook=self._object_pairs_hook) + else: + raise ValueError( + f"the serialized type {self._serialized_type} is not supported") + + +def is_continuous_field(cls: typing.Type) -> bool: + if cls in CONTINUOUS_FIELD_TYPE: + return True + for p in cls.__bases__: + if p in CONTINUOUS_FIELD_TYPE: + return True + elif p != Field and p != object: + if is_continuous_field(p): + return True + else: + return False + + +def auto_date_timestamp_field(): + return {f"{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX} + + +def auto_date_timestamp_db_field(): + return {f"f_{f}_time" for f in AUTO_DATE_TIMESTAMP_FIELD_PREFIX} + + +def remove_field_name_prefix(field_name): + return field_name[2:] if field_name.startswith('f_') else field_name + + +class BaseModel(Model): + create_time = BigIntegerField(null=True, index=True) + create_date = DateTimeField(null=True, index=True) + update_time = BigIntegerField(null=True, index=True) + update_date = DateTimeField(null=True, index=True) + + def to_json(self): + # This function is obsolete + return self.to_dict() + + def to_dict(self): + return self.__dict__['__data__'] + + def to_human_model_dict(self, only_primary_with: list = None): + model_dict = self.__dict__['__data__'] + + if not only_primary_with: + return {remove_field_name_prefix( + k): v for k, v in model_dict.items()} + + human_model_dict = {} + for k in self._meta.primary_key.field_names: + human_model_dict[remove_field_name_prefix(k)] = model_dict[k] + for k in only_primary_with: + human_model_dict[k] = model_dict[f'f_{k}'] + return human_model_dict + + @property + def meta(self) -> Metadata: + return self._meta + + @classmethod + def get_primary_keys_name(cls): + return cls._meta.primary_key.field_names if isinstance(cls._meta.primary_key, CompositeKey) else [ + cls._meta.primary_key.name] + + @classmethod + def getter_by(cls, attr): + return operator.attrgetter(attr)(cls) + + @classmethod + def query(cls, reverse=None, order_by=None, **kwargs): + filters = [] + for f_n, f_v in kwargs.items(): + attr_name = '%s' % f_n + if not hasattr(cls, attr_name) or f_v is None: + continue + if type(f_v) in {list, set}: + f_v = list(f_v) + if is_continuous_field(type(getattr(cls, attr_name))): + if len(f_v) == 2: + for i, v in enumerate(f_v): + if isinstance( + v, str) and f_n in auto_date_timestamp_field(): + # time type: %Y-%m-%d %H:%M:%S + f_v[i] = utils.date_string_to_timestamp(v) + lt_value = f_v[0] + gt_value = f_v[1] + if lt_value is not None and gt_value is not None: + filters.append( + cls.getter_by(attr_name).between( + lt_value, gt_value)) + elif lt_value is not None: + filters.append( + operator.attrgetter(attr_name)(cls) >= lt_value) + elif gt_value is not None: + filters.append( + operator.attrgetter(attr_name)(cls) <= gt_value) + else: + filters.append(operator.attrgetter(attr_name)(cls) << f_v) + else: + filters.append(operator.attrgetter(attr_name)(cls) == f_v) + if filters: + query_records = cls.select().where(*filters) + if reverse is not None: + if not order_by or not hasattr(cls, f"{order_by}"): + order_by = "create_time" + if reverse is True: + query_records = query_records.order_by( + cls.getter_by(f"{order_by}").desc()) + elif reverse is False: + query_records = query_records.order_by( + cls.getter_by(f"{order_by}").asc()) + return [query_record for query_record in query_records] + else: + return [] + + @classmethod + def insert(cls, __data=None, **insert): + if isinstance(__data, dict) and __data: + __data[cls._meta.combined["create_time"] + ] = utils.current_timestamp() + if insert: + insert["create_time"] = utils.current_timestamp() + + return super().insert(__data, **insert) + + # update and insert will call this method + @classmethod + def _normalize_data(cls, data, kwargs): + normalized = super()._normalize_data(data, kwargs) + if not normalized: + return {} + + normalized[cls._meta.combined["update_time"] + ] = utils.current_timestamp() + + for f_n in AUTO_DATE_TIMESTAMP_FIELD_PREFIX: + if {f"{f_n}_time", f"{f_n}_date"}.issubset(cls._meta.combined.keys()) and \ + cls._meta.combined[f"{f_n}_time"] in normalized and \ + normalized[cls._meta.combined[f"{f_n}_time"]] is not None: + normalized[cls._meta.combined[f"{f_n}_date"]] = utils.timestamp_to_date( + normalized[cls._meta.combined[f"{f_n}_time"]]) + + return normalized + + +class JsonSerializedField(SerializedField): + def __init__(self, object_hook=utils.from_dict_hook, + object_pairs_hook=None, **kwargs): + super(JsonSerializedField, self).__init__(serialized_type=SerializedType.JSON, object_hook=object_hook, + object_pairs_hook=object_pairs_hook, **kwargs) + + +@singleton +class BaseDataBase: + def __init__(self): + database_config = DATABASE.copy() + db_name = database_config.pop("name") + self.database_connection = PooledMySQLDatabase( + db_name, **database_config) + stat_logger.info('init mysql database on cluster mode successfully') + + +class DatabaseLock: + def __init__(self, lock_name, timeout=10, db=None): + self.lock_name = lock_name + self.timeout = int(timeout) + self.db = db if db else DB + + def lock(self): + # SQL parameters only support %s format placeholders + cursor = self.db.execute_sql( + "SELECT GET_LOCK(%s, %s)", (self.lock_name, self.timeout)) + ret = cursor.fetchone() + if ret[0] == 0: + raise Exception(f'acquire mysql lock {self.lock_name} timeout') + elif ret[0] == 1: + return True + else: + raise Exception(f'failed to acquire lock {self.lock_name}') + + def unlock(self): + cursor = self.db.execute_sql( + "SELECT RELEASE_LOCK(%s)", (self.lock_name,)) + ret = cursor.fetchone() + if ret[0] == 0: + raise Exception( + f'mysql lock {self.lock_name} was not established by this thread') + elif ret[0] == 1: + return True + else: + raise Exception(f'mysql lock {self.lock_name} does not exist') + + def __enter__(self): + if isinstance(self.db, PooledMySQLDatabase): + self.lock() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if isinstance(self.db, PooledMySQLDatabase): + self.unlock() + + def __call__(self, func): + @wraps(func) + def magic(*args, **kwargs): + with self: + return func(*args, **kwargs) + + return magic + + +DB = BaseDataBase().database_connection +DB.lock = DatabaseLock + + +def close_connection(): + try: + if DB: + DB.close_stale(age=30) + except Exception as e: + LOGGER.exception(e) + + +class DataBaseModel(BaseModel): + class Meta: + database = DB + + +@DB.connection_context() +def init_database_tables(alter_fields=[]): + members = inspect.getmembers(sys.modules[__name__], inspect.isclass) + table_objs = [] + create_failed_list = [] + for name, obj in members: + if obj != DataBaseModel and issubclass(obj, DataBaseModel): + table_objs.append(obj) + LOGGER.info(f"start create table {obj.__name__}") + try: + obj.create_table() + LOGGER.info(f"create table success: {obj.__name__}") + except Exception as e: + LOGGER.exception(e) + create_failed_list.append(obj.__name__) + if create_failed_list: + LOGGER.info(f"create tables failed: {create_failed_list}") + raise Exception(f"create tables failed: {create_failed_list}") + migrate_db() + + +def fill_db_model_object(model_object, human_model_dict): + for k, v in human_model_dict.items(): + attr_name = '%s' % k + if hasattr(model_object.__class__, attr_name): + setattr(model_object, attr_name, v) + return model_object + + +class User(DataBaseModel, UserMixin): + id = CharField(max_length=32, primary_key=True) + access_token = CharField(max_length=255, null=True, index=True) + nickname = CharField(max_length=100, null=False, help_text="nicky name", index=True) + password = CharField(max_length=255, null=True, help_text="password", index=True) + email = CharField( + max_length=255, + null=False, + help_text="email", + index=True) + avatar = TextField(null=True, help_text="avatar base64 string") + language = CharField( + max_length=32, + null=True, + help_text="English|Chinese", + default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English", + index=True) + color_schema = CharField( + max_length=32, + null=True, + help_text="Bright|Dark", + default="Bright", + index=True) + timezone = CharField( + max_length=64, + null=True, + help_text="Timezone", + default="UTC+8\tAsia/Shanghai", + index=True) + last_login_time = DateTimeField(null=True, index=True) + is_authenticated = CharField(max_length=1, null=False, default="1", index=True) + is_active = CharField(max_length=1, null=False, default="1", index=True) + is_anonymous = CharField(max_length=1, null=False, default="0", index=True) + login_channel = CharField(null=True, help_text="from which user login", index=True) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + is_superuser = BooleanField(null=True, help_text="is root", default=False, index=True) + + def __str__(self): + return self.email + + def get_id(self): + jwt = Serializer(secret_key=SECRET_KEY) + return jwt.dumps(str(self.access_token)) + + class Meta: + db_table = "user" + + +class Tenant(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + name = CharField(max_length=100, null=True, help_text="Tenant name", index=True) + public_key = CharField(max_length=255, null=True, index=True) + llm_id = CharField(max_length=128, null=False, help_text="default llm ID", index=True) + embd_id = CharField( + max_length=128, + null=False, + help_text="default embedding model ID", + index=True) + asr_id = CharField( + max_length=128, + null=False, + help_text="default ASR model ID", + index=True) + img2txt_id = CharField( + max_length=128, + null=False, + help_text="default image to text model ID", + index=True) + rerank_id = CharField( + max_length=128, + null=False, + help_text="default rerank model ID", + index=True) + parser_ids = CharField( + max_length=256, + null=False, + help_text="document processors", + index=True) + credit = IntegerField(default=512, index=True) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + class Meta: + db_table = "tenant" + + +class UserTenant(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + user_id = CharField(max_length=32, null=False, index=True) + tenant_id = CharField(max_length=32, null=False, index=True) + role = CharField(max_length=32, null=False, help_text="UserTenantRole", index=True) + invited_by = CharField(max_length=32, null=False, index=True) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + class Meta: + db_table = "user_tenant" + + +class InvitationCode(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + code = CharField(max_length=32, null=False, index=True) + visit_time = DateTimeField(null=True, index=True) + user_id = CharField(max_length=32, null=True, index=True) + tenant_id = CharField(max_length=32, null=True, index=True) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + class Meta: + db_table = "invitation_code" + + +class LLMFactories(DataBaseModel): + name = CharField( + max_length=128, + null=False, + help_text="LLM factory name", + primary_key=True) + logo = TextField(null=True, help_text="llm logo base64") + tags = CharField( + max_length=255, + null=False, + help_text="LLM, Text Embedding, Image2Text, ASR", + index=True) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + def __str__(self): + return self.name + + class Meta: + db_table = "llm_factories" + + +class LLM(DataBaseModel): + # LLMs dictionary + llm_name = CharField( + max_length=128, + null=False, + help_text="LLM name", + index=True) + model_type = CharField( + max_length=128, + null=False, + help_text="LLM, Text Embedding, Image2Text, ASR", + index=True) + fid = CharField(max_length=128, null=False, help_text="LLM factory id", index=True) + max_tokens = IntegerField(default=0) + + tags = CharField( + max_length=255, + null=False, + help_text="LLM, Text Embedding, Image2Text, Chat, 32k...", + index=True) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + def __str__(self): + return self.llm_name + + class Meta: + primary_key = CompositeKey('fid', 'llm_name') + db_table = "llm" + + +class TenantLLM(DataBaseModel): + tenant_id = CharField(max_length=32, null=False, index=True) + llm_factory = CharField( + max_length=128, + null=False, + help_text="LLM factory name", + index=True) + model_type = CharField( + max_length=128, + null=True, + help_text="LLM, Text Embedding, Image2Text, ASR", + index=True) + llm_name = CharField( + max_length=128, + null=True, + help_text="LLM name", + default="", + index=True) + api_key = CharField(max_length=1024, null=True, help_text="API KEY", index=True) + api_base = CharField(max_length=255, null=True, help_text="API Base") + + used_tokens = IntegerField(default=0, index=True) + + def __str__(self): + return self.llm_name + + class Meta: + db_table = "tenant_llm" + primary_key = CompositeKey('tenant_id', 'llm_factory', 'llm_name') + + +class Knowledgebase(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + avatar = TextField(null=True, help_text="avatar base64 string") + tenant_id = CharField(max_length=32, null=False, index=True) + name = CharField( + max_length=128, + null=False, + help_text="KB name", + index=True) + language = CharField( + max_length=32, + null=True, + default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English", + help_text="English|Chinese", + index=True) + description = TextField(null=True, help_text="KB description") + embd_id = CharField( + max_length=128, + null=False, + help_text="default embedding model ID", + index=True) + permission = CharField( + max_length=16, + null=False, + help_text="me|team", + default="me", + index=True) + created_by = CharField(max_length=32, null=False, index=True) + doc_num = IntegerField(default=0, index=True) + token_num = IntegerField(default=0, index=True) + chunk_num = IntegerField(default=0, index=True) + similarity_threshold = FloatField(default=0.2, index=True) + vector_similarity_weight = FloatField(default=0.3, index=True) + + parser_id = CharField( + max_length=32, + null=False, + help_text="default parser ID", + default=ParserType.NAIVE.value, + index=True) + parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]}) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + def __str__(self): + return self.name + + class Meta: + db_table = "knowledgebase" + + +class Document(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + thumbnail = TextField(null=True, help_text="thumbnail base64 string") + kb_id = CharField(max_length=256, null=False, index=True) + parser_id = CharField( + max_length=32, + null=False, + help_text="default parser ID", + index=True) + parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]}) + source_type = CharField( + max_length=128, + null=False, + default="local", + help_text="where dose this document come from", + index=True) + type = CharField(max_length=32, null=False, help_text="file extension", + index=True) + created_by = CharField( + max_length=32, + null=False, + help_text="who created it", + index=True) + name = CharField( + max_length=255, + null=True, + help_text="file name", + index=True) + location = CharField( + max_length=255, + null=True, + help_text="where dose it store", + index=True) + size = IntegerField(default=0, index=True) + token_num = IntegerField(default=0, index=True) + chunk_num = IntegerField(default=0, index=True) + progress = FloatField(default=0, index=True) + progress_msg = TextField( + null=True, + help_text="process message", + default="") + process_begin_at = DateTimeField(null=True, index=True) + process_duation = FloatField(default=0) + + run = CharField( + max_length=1, + null=True, + help_text="start to run processing or cancel.(1: run it; 2: cancel)", + default="0", + index=True) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + class Meta: + db_table = "document" + + +class File(DataBaseModel): + id = CharField( + max_length=32, + primary_key=True) + parent_id = CharField( + max_length=32, + null=False, + help_text="parent folder id", + index=True) + tenant_id = CharField( + max_length=32, + null=False, + help_text="tenant id", + index=True) + created_by = CharField( + max_length=32, + null=False, + help_text="who created it", + index=True) + name = CharField( + max_length=255, + null=False, + help_text="file name or folder name", + index=True) + location = CharField( + max_length=255, + null=True, + help_text="where dose it store", + index=True) + size = IntegerField(default=0, index=True) + type = CharField(max_length=32, null=False, help_text="file extension", index=True) + source_type = CharField( + max_length=128, + null=False, + default="", + help_text="where dose this document come from", index=True) + + class Meta: + db_table = "file" + + +class File2Document(DataBaseModel): + id = CharField( + max_length=32, + primary_key=True) + file_id = CharField( + max_length=32, + null=True, + help_text="file id", + index=True) + document_id = CharField( + max_length=32, + null=True, + help_text="document id", + index=True) + + class Meta: + db_table = "file2document" + + +class Task(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + doc_id = CharField(max_length=32, null=False, index=True) + from_page = IntegerField(default=0) + + to_page = IntegerField(default=-1) + + begin_at = DateTimeField(null=True, index=True) + process_duation = FloatField(default=0) + + progress = FloatField(default=0, index=True) + progress_msg = TextField( + null=True, + help_text="process message", + default="") + + +class Dialog(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + tenant_id = CharField(max_length=32, null=False, index=True) + name = CharField( + max_length=255, + null=True, + help_text="dialog application name", + index=True) + description = TextField(null=True, help_text="Dialog description") + icon = TextField(null=True, help_text="icon base64 string") + language = CharField( + max_length=32, + null=True, + default="Chinese" if "zh_CN" in os.getenv("LANG", "") else "English", + help_text="English|Chinese", + index=True) + llm_id = CharField(max_length=128, null=False, help_text="default llm ID") + + llm_setting = JSONField(null=False, default={"temperature": 0.1, "top_p": 0.3, "frequency_penalty": 0.7, + "presence_penalty": 0.4, "max_tokens": 512}) + prompt_type = CharField( + max_length=16, + null=False, + default="simple", + help_text="simple|advanced", + index=True) + prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?", + "parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"}) + + similarity_threshold = FloatField(default=0.2) + vector_similarity_weight = FloatField(default=0.3) + + top_n = IntegerField(default=6) + + top_k = IntegerField(default=1024) + + do_refer = CharField( + max_length=1, + null=False, + help_text="it needs to insert reference index into answer or not") + + rerank_id = CharField( + max_length=128, + null=False, + help_text="default rerank model ID") + + kb_ids = JSONField(null=False, default=[]) + status = CharField( + max_length=1, + null=True, + help_text="is it validate(0: wasted,1: validate)", + default="1", + index=True) + + class Meta: + db_table = "dialog" + + +class Conversation(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + dialog_id = CharField(max_length=32, null=False, index=True) + name = CharField(max_length=255, null=True, help_text="converastion name", index=True) + message = JSONField(null=True) + reference = JSONField(null=True, default=[]) + + class Meta: + db_table = "conversation" + + +class APIToken(DataBaseModel): + tenant_id = CharField(max_length=32, null=False, index=True) + token = CharField(max_length=255, null=False, index=True) + dialog_id = CharField(max_length=32, null=False, index=True) + source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True) + + class Meta: + db_table = "api_token" + primary_key = CompositeKey('tenant_id', 'token') + + +class API4Conversation(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + dialog_id = CharField(max_length=32, null=False, index=True) + user_id = CharField(max_length=255, null=False, help_text="user_id", index=True) + message = JSONField(null=True) + reference = JSONField(null=True, default=[]) + tokens = IntegerField(default=0) + source = CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True) + + duration = FloatField(default=0, index=True) + round = IntegerField(default=0, index=True) + thumb_up = IntegerField(default=0, index=True) + + class Meta: + db_table = "api_4_conversation" + + +class UserCanvas(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + avatar = TextField(null=True, help_text="avatar base64 string") + user_id = CharField(max_length=255, null=False, help_text="user_id", index=True) + title = CharField(max_length=255, null=True, help_text="Canvas title") + + description = TextField(null=True, help_text="Canvas description") + canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True) + dsl = JSONField(null=True, default={}) + + class Meta: + db_table = "user_canvas" + + +class CanvasTemplate(DataBaseModel): + id = CharField(max_length=32, primary_key=True) + avatar = TextField(null=True, help_text="avatar base64 string") + title = CharField(max_length=255, null=True, help_text="Canvas title") + + description = TextField(null=True, help_text="Canvas description") + canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True) + dsl = JSONField(null=True, default={}) + + class Meta: + db_table = "canvas_template" + + +def migrate_db(): + with DB.transaction(): + migrator = MySQLMigrator(DB) + try: + migrate( + migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="", + help_text="where dose this document come from", + index=True)) + ) + except Exception as e: + pass + try: + migrate( + migrator.add_column('tenant', 'rerank_id', + CharField(max_length=128, null=False, default="BAAI/bge-reranker-v2-m3", + help_text="default rerank model ID")) + + ) + except Exception as e: + pass + try: + migrate( + migrator.add_column('dialog', 'rerank_id', CharField(max_length=128, null=False, default="", + help_text="default rerank model ID")) + + ) + except Exception as e: + pass + try: + migrate( + migrator.add_column('dialog', 'top_k', IntegerField(default=1024)) + + ) + except Exception as e: + pass + try: + migrate( + migrator.alter_column_type('tenant_llm', 'api_key', + CharField(max_length=1024, null=True, help_text="API KEY", index=True)) + ) + except Exception as e: + pass + try: + migrate( + migrator.add_column('api_token', 'source', + CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)) + ) + except Exception as e: + pass + try: + migrate( + migrator.add_column('api_4_conversation', 'source', + CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)) + ) + except Exception as e: + pass + try: + DB.execute_sql('ALTER TABLE llm DROP PRIMARY KEY;') + DB.execute_sql('ALTER TABLE llm ADD PRIMARY KEY (llm_name,fid);') + except Exception as e: + pass diff --git a/api/db/db_utils.py b/api/db/db_utils.py index 144cc1f02e5f5d15304e5db695b07698d21dd8dd..7e156b4a784510ae7f3f229ec05d1d67605d3709 100644 --- a/api/db/db_utils.py +++ b/api/db/db_utils.py @@ -1,130 +1,130 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import operator -from functools import reduce -from typing import Dict, Type, Union - -from api.utils import current_timestamp, timestamp_to_date - -from api.db.db_models import DB, DataBaseModel -from api.db.runtime_config import RuntimeConfig -from api.utils.log_utils import getLogger -from enum import Enum - - -LOGGER = getLogger() - - -@DB.connection_context() -def bulk_insert_into_db(model, data_source, replace_on_conflict=False): - DB.create_tables([model]) - - for i, data in enumerate(data_source): - current_time = current_timestamp() + i - current_date = timestamp_to_date(current_time) - if 'create_time' not in data: - data['create_time'] = current_time - data['create_date'] = timestamp_to_date(data['create_time']) - data['update_time'] = current_time - data['update_date'] = current_date - - preserve = tuple(data_source[0].keys() - {'create_time', 'create_date'}) - - batch_size = 1000 - - for i in range(0, len(data_source), batch_size): - with DB.atomic(): - query = model.insert_many(data_source[i:i + batch_size]) - if replace_on_conflict: - query = query.on_conflict(preserve=preserve) - query.execute() - - -def get_dynamic_db_model(base, job_id): - return type(base.model( - table_index=get_dynamic_tracking_table_index(job_id=job_id))) - - -def get_dynamic_tracking_table_index(job_id): - return job_id[:8] - - -def fill_db_model_object(model_object, human_model_dict): - for k, v in human_model_dict.items(): - attr_name = 'f_%s' % k - if hasattr(model_object.__class__, attr_name): - setattr(model_object, attr_name, v) - return model_object - - -# https://docs.peewee-orm.com/en/latest/peewee/query_operators.html -supported_operators = { - '==': operator.eq, - '<': operator.lt, - '<=': operator.le, - '>': operator.gt, - '>=': operator.ge, - '!=': operator.ne, - '<<': operator.lshift, - '>>': operator.rshift, - '%': operator.mod, - '**': operator.pow, - '^': operator.xor, - '~': operator.inv, -} - - -def query_dict2expression( - model: Type[DataBaseModel], query: Dict[str, Union[bool, int, str, list, tuple]]): - expression = [] - - for field, value in query.items(): - if not isinstance(value, (list, tuple)): - value = ('==', value) - op, *val = value - - field = getattr(model, f'f_{field}') - value = supported_operators[op]( - field, val[0]) if op in supported_operators else getattr( - field, op)( - *val) - expression.append(value) - - return reduce(operator.iand, expression) - - -def query_db(model: Type[DataBaseModel], limit: int = 0, offset: int = 0, - query: dict = None, order_by: Union[str, list, tuple] = None): - data = model.select() - if query: - data = data.where(query_dict2expression(model, query)) - count = data.count() - - if not order_by: - order_by = 'create_time' - if not isinstance(order_by, (list, tuple)): - order_by = (order_by, 'asc') - order_by, order = order_by - order_by = getattr(model, f'f_{order_by}') - order_by = getattr(order_by, order)() - data = data.order_by(order_by) - - if limit > 0: - data = data.limit(limit) - if offset > 0: - data = data.offset(offset) - - return list(data), count +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import operator +from functools import reduce +from typing import Dict, Type, Union + +from api.utils import current_timestamp, timestamp_to_date + +from api.db.db_models import DB, DataBaseModel +from api.db.runtime_config import RuntimeConfig +from api.utils.log_utils import getLogger +from enum import Enum + + +LOGGER = getLogger() + + +@DB.connection_context() +def bulk_insert_into_db(model, data_source, replace_on_conflict=False): + DB.create_tables([model]) + + for i, data in enumerate(data_source): + current_time = current_timestamp() + i + current_date = timestamp_to_date(current_time) + if 'create_time' not in data: + data['create_time'] = current_time + data['create_date'] = timestamp_to_date(data['create_time']) + data['update_time'] = current_time + data['update_date'] = current_date + + preserve = tuple(data_source[0].keys() - {'create_time', 'create_date'}) + + batch_size = 1000 + + for i in range(0, len(data_source), batch_size): + with DB.atomic(): + query = model.insert_many(data_source[i:i + batch_size]) + if replace_on_conflict: + query = query.on_conflict(preserve=preserve) + query.execute() + + +def get_dynamic_db_model(base, job_id): + return type(base.model( + table_index=get_dynamic_tracking_table_index(job_id=job_id))) + + +def get_dynamic_tracking_table_index(job_id): + return job_id[:8] + + +def fill_db_model_object(model_object, human_model_dict): + for k, v in human_model_dict.items(): + attr_name = 'f_%s' % k + if hasattr(model_object.__class__, attr_name): + setattr(model_object, attr_name, v) + return model_object + + +# https://docs.peewee-orm.com/en/latest/peewee/query_operators.html +supported_operators = { + '==': operator.eq, + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '!=': operator.ne, + '<<': operator.lshift, + '>>': operator.rshift, + '%': operator.mod, + '**': operator.pow, + '^': operator.xor, + '~': operator.inv, +} + + +def query_dict2expression( + model: Type[DataBaseModel], query: Dict[str, Union[bool, int, str, list, tuple]]): + expression = [] + + for field, value in query.items(): + if not isinstance(value, (list, tuple)): + value = ('==', value) + op, *val = value + + field = getattr(model, f'f_{field}') + value = supported_operators[op]( + field, val[0]) if op in supported_operators else getattr( + field, op)( + *val) + expression.append(value) + + return reduce(operator.iand, expression) + + +def query_db(model: Type[DataBaseModel], limit: int = 0, offset: int = 0, + query: dict = None, order_by: Union[str, list, tuple] = None): + data = model.select() + if query: + data = data.where(query_dict2expression(model, query)) + count = data.count() + + if not order_by: + order_by = 'create_time' + if not isinstance(order_by, (list, tuple)): + order_by = (order_by, 'asc') + order_by, order = order_by + order_by = getattr(model, f'f_{order_by}') + order_by = getattr(order_by, order)() + data = data.order_by(order_by) + + if limit > 0: + data = data.limit(limit) + if offset > 0: + data = data.offset(offset) + + return list(data), count diff --git a/api/db/init_data.py b/api/db/init_data.py index 5095c3462ad5dc5030656bda2fedc73659751460..e4f61093b1e53d3c23cfa70882dd796a4609d1b8 100644 --- a/api/db/init_data.py +++ b/api/db/init_data.py @@ -1,184 +1,184 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json -import os -import time -import uuid -from copy import deepcopy - -from api.db import LLMType, UserTenantRole -from api.db.db_models import init_database_tables as init_web_db, LLMFactories, LLM, TenantLLM -from api.db.services import UserService -from api.db.services.canvas_service import CanvasTemplateService -from api.db.services.document_service import DocumentService -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle -from api.db.services.user_service import TenantService, UserTenantService -from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL -from api.utils.file_utils import get_project_base_directory - - -def init_superuser(): - user_info = { - "id": uuid.uuid1().hex, - "password": "admin", - "nickname": "admin", - "is_superuser": True, - "email": "admin@ragflow.io", - "creator": "system", - "status": "1", - } - tenant = { - "id": user_info["id"], - "name": user_info["nickname"] + "‘s Kingdom", - "llm_id": CHAT_MDL, - "embd_id": EMBEDDING_MDL, - "asr_id": ASR_MDL, - "parser_ids": PARSERS, - "img2txt_id": IMAGE2TEXT_MDL - } - usr_tenant = { - "tenant_id": user_info["id"], - "user_id": user_info["id"], - "invited_by": user_info["id"], - "role": UserTenantRole.OWNER - } - tenant_llm = [] - for llm in LLMService.query(fid=LLM_FACTORY): - tenant_llm.append( - {"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type, - "api_key": API_KEY, "api_base": LLM_BASE_URL}) - - if not UserService.save(**user_info): - print("\033[93m【ERROR】\033[0mcan't init admin.") - return - TenantService.insert(**tenant) - UserTenantService.insert(**usr_tenant) - TenantLLMService.insert_many(tenant_llm) - print( - "【INFO】Super user initialized. \033[93memail: admin@ragflow.io, password: admin\033[0m. Changing the password after logining is strongly recomanded.") - - chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"]) - msg = chat_mdl.chat(system="", history=[ - {"role": "user", "content": "Hello!"}], gen_conf={}) - if msg.find("ERROR: ") == 0: - print( - "\33[91m【ERROR】\33[0m: ", - "'{}' dosen't work. {}".format( - tenant["llm_id"], - msg)) - embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"]) - v, c = embd_mdl.encode(["Hello!"]) - if c == 0: - print( - "\33[91m【ERROR】\33[0m:", - " '{}' dosen't work!".format( - tenant["embd_id"])) - - -def init_llm_factory(): - try: - LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")]) - except Exception as e: - pass - - factory_llm_infos = json.load( - open( - os.path.join(get_project_base_directory(), "conf", "llm_factories.json"), - "r", - ) - ) - for factory_llm_info in factory_llm_infos["factory_llm_infos"]: - llm_infos = factory_llm_info.pop("llm") - try: - LLMFactoriesService.save(**factory_llm_info) - except Exception as e: - pass - LLMService.filter_delete([LLM.fid == factory_llm_info["name"]]) - for llm_info in llm_infos: - llm_info["fid"] = factory_llm_info["name"] - try: - LLMService.save(**llm_info) - except Exception as e: - pass - - LLMFactoriesService.filter_delete([LLMFactories.name == "Local"]) - LLMService.filter_delete([LLM.fid == "Local"]) - LLMService.filter_delete([LLM.llm_name == "qwen-vl-max"]) - LLMService.filter_delete([LLM.fid == "Moonshot", LLM.llm_name == "flag-embedding"]) - TenantLLMService.filter_delete([TenantLLM.llm_factory == "Moonshot", TenantLLM.llm_name == "flag-embedding"]) - LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"]) - LLMService.filter_delete([LLMService.model.fid == "QAnything"]) - TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"}) - TenantService.filter_update([1 == 1], { - "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email"}) - ## insert openai two embedding models to the current openai user. - print("Start to insert 2 OpenAI embedding models...") - tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()]) - for tid in tenant_ids: - for row in TenantLLMService.query(llm_factory="OpenAI", tenant_id=tid): - row = row.to_dict() - row["model_type"] = LLMType.EMBEDDING.value - row["llm_name"] = "text-embedding-3-small" - row["used_tokens"] = 0 - try: - TenantLLMService.save(**row) - row = deepcopy(row) - row["llm_name"] = "text-embedding-3-large" - TenantLLMService.save(**row) - except Exception as e: - pass - break - for kb_id in KnowledgebaseService.get_all_ids(): - KnowledgebaseService.update_by_id(kb_id, {"doc_num": DocumentService.get_kb_doc_count(kb_id)}) - """ - drop table llm; - drop table llm_factories; - update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph'; - alter table knowledgebase modify avatar longtext; - alter table user modify avatar longtext; - alter table dialog modify icon longtext; - """ - - -def add_graph_templates(): - dir = os.path.join(get_project_base_directory(), "agent", "templates") - for fnm in os.listdir(dir): - try: - cnvs = json.load(open(os.path.join(dir, fnm), "r")) - try: - CanvasTemplateService.save(**cnvs) - except: - CanvasTemplateService.update_by_id(cnvs["id"], cnvs) - except Exception as e: - print("Add graph templates error: ", e) - print("------------", flush=True) - - -def init_web_data(): - start_time = time.time() - - init_llm_factory() - if not UserService.get_all().count(): - init_superuser() - - add_graph_templates() - print("init web data success:{}".format(time.time() - start_time)) - - -if __name__ == '__main__': - init_web_db() - init_web_data() +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import os +import time +import uuid +from copy import deepcopy + +from api.db import LLMType, UserTenantRole +from api.db.db_models import init_database_tables as init_web_db, LLMFactories, LLM, TenantLLM +from api.db.services import UserService +from api.db.services.canvas_service import CanvasTemplateService +from api.db.services.document_service import DocumentService +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle +from api.db.services.user_service import TenantService, UserTenantService +from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL +from api.utils.file_utils import get_project_base_directory + + +def init_superuser(): + user_info = { + "id": uuid.uuid1().hex, + "password": "admin", + "nickname": "admin", + "is_superuser": True, + "email": "admin@ragflow.io", + "creator": "system", + "status": "1", + } + tenant = { + "id": user_info["id"], + "name": user_info["nickname"] + "‘s Kingdom", + "llm_id": CHAT_MDL, + "embd_id": EMBEDDING_MDL, + "asr_id": ASR_MDL, + "parser_ids": PARSERS, + "img2txt_id": IMAGE2TEXT_MDL + } + usr_tenant = { + "tenant_id": user_info["id"], + "user_id": user_info["id"], + "invited_by": user_info["id"], + "role": UserTenantRole.OWNER + } + tenant_llm = [] + for llm in LLMService.query(fid=LLM_FACTORY): + tenant_llm.append( + {"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type, + "api_key": API_KEY, "api_base": LLM_BASE_URL}) + + if not UserService.save(**user_info): + print("\033[93m【ERROR】\033[0mcan't init admin.") + return + TenantService.insert(**tenant) + UserTenantService.insert(**usr_tenant) + TenantLLMService.insert_many(tenant_llm) + print( + "【INFO】Super user initialized. \033[93memail: admin@ragflow.io, password: admin\033[0m. Changing the password after logining is strongly recomanded.") + + chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"]) + msg = chat_mdl.chat(system="", history=[ + {"role": "user", "content": "Hello!"}], gen_conf={}) + if msg.find("ERROR: ") == 0: + print( + "\33[91m【ERROR】\33[0m: ", + "'{}' dosen't work. {}".format( + tenant["llm_id"], + msg)) + embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"]) + v, c = embd_mdl.encode(["Hello!"]) + if c == 0: + print( + "\33[91m【ERROR】\33[0m:", + " '{}' dosen't work!".format( + tenant["embd_id"])) + + +def init_llm_factory(): + try: + LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")]) + except Exception as e: + pass + + factory_llm_infos = json.load( + open( + os.path.join(get_project_base_directory(), "conf", "llm_factories.json"), + "r", + ) + ) + for factory_llm_info in factory_llm_infos["factory_llm_infos"]: + llm_infos = factory_llm_info.pop("llm") + try: + LLMFactoriesService.save(**factory_llm_info) + except Exception as e: + pass + LLMService.filter_delete([LLM.fid == factory_llm_info["name"]]) + for llm_info in llm_infos: + llm_info["fid"] = factory_llm_info["name"] + try: + LLMService.save(**llm_info) + except Exception as e: + pass + + LLMFactoriesService.filter_delete([LLMFactories.name == "Local"]) + LLMService.filter_delete([LLM.fid == "Local"]) + LLMService.filter_delete([LLM.llm_name == "qwen-vl-max"]) + LLMService.filter_delete([LLM.fid == "Moonshot", LLM.llm_name == "flag-embedding"]) + TenantLLMService.filter_delete([TenantLLM.llm_factory == "Moonshot", TenantLLM.llm_name == "flag-embedding"]) + LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"]) + LLMService.filter_delete([LLMService.model.fid == "QAnything"]) + TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"}) + TenantService.filter_update([1 == 1], { + "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email"}) + ## insert openai two embedding models to the current openai user. + print("Start to insert 2 OpenAI embedding models...") + tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()]) + for tid in tenant_ids: + for row in TenantLLMService.query(llm_factory="OpenAI", tenant_id=tid): + row = row.to_dict() + row["model_type"] = LLMType.EMBEDDING.value + row["llm_name"] = "text-embedding-3-small" + row["used_tokens"] = 0 + try: + TenantLLMService.save(**row) + row = deepcopy(row) + row["llm_name"] = "text-embedding-3-large" + TenantLLMService.save(**row) + except Exception as e: + pass + break + for kb_id in KnowledgebaseService.get_all_ids(): + KnowledgebaseService.update_by_id(kb_id, {"doc_num": DocumentService.get_kb_doc_count(kb_id)}) + """ + drop table llm; + drop table llm_factories; + update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph'; + alter table knowledgebase modify avatar longtext; + alter table user modify avatar longtext; + alter table dialog modify icon longtext; + """ + + +def add_graph_templates(): + dir = os.path.join(get_project_base_directory(), "agent", "templates") + for fnm in os.listdir(dir): + try: + cnvs = json.load(open(os.path.join(dir, fnm), "r")) + try: + CanvasTemplateService.save(**cnvs) + except: + CanvasTemplateService.update_by_id(cnvs["id"], cnvs) + except Exception as e: + print("Add graph templates error: ", e) + print("------------", flush=True) + + +def init_web_data(): + start_time = time.time() + + init_llm_factory() + if not UserService.get_all().count(): + init_superuser() + + add_graph_templates() + print("init web data success:{}".format(time.time() - start_time)) + + +if __name__ == '__main__': + init_web_db() + init_web_data() diff --git a/api/db/operatioins.py b/api/db/operatioins.py index cc13a42939453afd5b32e864aed91602696666ab..30220d8f89c092980c14ed8143c1b5621af4626f 100644 --- a/api/db/operatioins.py +++ b/api/db/operatioins.py @@ -1,21 +1,21 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import operator -import time -import typing -from api.utils.log_utils import sql_logger -import peewee +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import operator +import time +import typing +from api.utils.log_utils import sql_logger +import peewee diff --git a/api/db/reload_config_base.py b/api/db/reload_config_base.py index fff9b598b7e21684526e0a7d3e3cff6f50e09a74..be37afc6bcd9556f02cf767033e483ee7d2d9c18 100644 --- a/api/db/reload_config_base.py +++ b/api/db/reload_config_base.py @@ -1,28 +1,28 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -class ReloadConfigBase: - @classmethod - def get_all(cls): - configs = {} - for k, v in cls.__dict__.items(): - if not callable(getattr(cls, k)) and not k.startswith( - "__") and not k.startswith("_"): - configs[k] = v - return configs - - @classmethod - def get(cls, config_name): - return getattr(cls, config_name) if hasattr(cls, config_name) else None +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +class ReloadConfigBase: + @classmethod + def get_all(cls): + configs = {} + for k, v in cls.__dict__.items(): + if not callable(getattr(cls, k)) and not k.startswith( + "__") and not k.startswith("_"): + configs[k] = v + return configs + + @classmethod + def get(cls, config_name): + return getattr(cls, config_name) if hasattr(cls, config_name) else None diff --git a/api/db/runtime_config.py b/api/db/runtime_config.py index ad488dc206a84134695cfcedad001bcbf3141d41..2ab484cb00b7206b9d5251c39419c874ba812bc5 100644 --- a/api/db/runtime_config.py +++ b/api/db/runtime_config.py @@ -1,54 +1,54 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from api.versions import get_versions -from .reload_config_base import ReloadConfigBase - - -class RuntimeConfig(ReloadConfigBase): - DEBUG = None - WORK_MODE = None - HTTP_PORT = None - JOB_SERVER_HOST = None - JOB_SERVER_VIP = None - ENV = dict() - SERVICE_DB = None - LOAD_CONFIG_MANAGER = False - - @classmethod - def init_config(cls, **kwargs): - for k, v in kwargs.items(): - if hasattr(cls, k): - setattr(cls, k, v) - - @classmethod - def init_env(cls): - cls.ENV.update(get_versions()) - - @classmethod - def load_config_manager(cls): - cls.LOAD_CONFIG_MANAGER = True - - @classmethod - def get_env(cls, key): - return cls.ENV.get(key, None) - - @classmethod - def get_all_env(cls): - return cls.ENV - - @classmethod - def set_service_db(cls, service_db): - cls.SERVICE_DB = service_db +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from api.versions import get_versions +from .reload_config_base import ReloadConfigBase + + +class RuntimeConfig(ReloadConfigBase): + DEBUG = None + WORK_MODE = None + HTTP_PORT = None + JOB_SERVER_HOST = None + JOB_SERVER_VIP = None + ENV = dict() + SERVICE_DB = None + LOAD_CONFIG_MANAGER = False + + @classmethod + def init_config(cls, **kwargs): + for k, v in kwargs.items(): + if hasattr(cls, k): + setattr(cls, k, v) + + @classmethod + def init_env(cls): + cls.ENV.update(get_versions()) + + @classmethod + def load_config_manager(cls): + cls.LOAD_CONFIG_MANAGER = True + + @classmethod + def get_env(cls, key): + return cls.ENV.get(key, None) + + @classmethod + def get_all_env(cls): + return cls.ENV + + @classmethod + def set_service_db(cls, service_db): + cls.SERVICE_DB = service_db diff --git a/api/db/services/__init__.py b/api/db/services/__init__.py index e324030aa9523888763c1e9394d7caefc582681f..2363e65df817919802e0c7595f319203eaae0fb3 100644 --- a/api/db/services/__init__.py +++ b/api/db/services/__init__.py @@ -1,38 +1,38 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pathlib -import re -from .user_service import UserService - - -def duplicate_name(query_func, **kwargs): - fnm = kwargs["name"] - objs = query_func(**kwargs) - if not objs: return fnm - ext = pathlib.Path(fnm).suffix #.jpg - nm = re.sub(r"%s$"%ext, "", fnm) - r = re.search(r"\(([0-9]+)\)$", nm) - c = 0 - if r: - c = int(r.group(1)) - nm = re.sub(r"\([0-9]+\)$", "", nm) - c += 1 - nm = f"{nm}({c})" - if ext: nm += f"{ext}" - - kwargs["name"] = nm - return duplicate_name(query_func, **kwargs) - +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import pathlib +import re +from .user_service import UserService + + +def duplicate_name(query_func, **kwargs): + fnm = kwargs["name"] + objs = query_func(**kwargs) + if not objs: return fnm + ext = pathlib.Path(fnm).suffix #.jpg + nm = re.sub(r"%s$"%ext, "", fnm) + r = re.search(r"\(([0-9]+)\)$", nm) + c = 0 + if r: + c = int(r.group(1)) + nm = re.sub(r"\([0-9]+\)$", "", nm) + c += 1 + nm = f"{nm}({c})" + if ext: nm += f"{ext}" + + kwargs["name"] = nm + return duplicate_name(query_func, **kwargs) + diff --git a/api/db/services/api_service.py b/api/db/services/api_service.py index d65bc3d54cc77a069a05cfdb2ecd8269ffa3b306..cb47ea917fdd1eee677c97dba2b82556e3455f0a 100644 --- a/api/db/services/api_service.py +++ b/api/db/services/api_service.py @@ -1,68 +1,68 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from datetime import datetime -import peewee -from api.db.db_models import DB, API4Conversation, APIToken, Dialog -from api.db.services.common_service import CommonService -from api.utils import current_timestamp, datetime_format - - -class APITokenService(CommonService): - model = APIToken - - @classmethod - @DB.connection_context() - def used(cls, token): - return cls.model.update({ - "update_time": current_timestamp(), - "update_date": datetime_format(datetime.now()), - }).where( - cls.model.token == token - ) - - -class API4ConversationService(CommonService): - model = API4Conversation - - @classmethod - @DB.connection_context() - def append_message(cls, id, conversation): - cls.update_by_id(id, conversation) - return cls.model.update(round=cls.model.round + 1).where(cls.model.id==id).execute() - - @classmethod - @DB.connection_context() - def stats(cls, tenant_id, from_date, to_date, source=None): - if len(to_date) == 10: to_date += " 23:59:59" - return cls.model.select( - cls.model.create_date.truncate("day").alias("dt"), - peewee.fn.COUNT( - cls.model.id).alias("pv"), - peewee.fn.COUNT( - cls.model.user_id.distinct()).alias("uv"), - peewee.fn.SUM( - cls.model.tokens).alias("tokens"), - peewee.fn.SUM( - cls.model.duration).alias("duration"), - peewee.fn.AVG( - cls.model.round).alias("round"), - peewee.fn.SUM( - cls.model.thumb_up).alias("thumb_up") - ).join(Dialog, on=(cls.model.dialog_id == Dialog.id & Dialog.tenant_id == tenant_id)).where( - cls.model.create_date >= from_date, - cls.model.create_date <= to_date, - cls.model.source == source - ).group_by(cls.model.create_date.truncate("day")).dicts() +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from datetime import datetime +import peewee +from api.db.db_models import DB, API4Conversation, APIToken, Dialog +from api.db.services.common_service import CommonService +from api.utils import current_timestamp, datetime_format + + +class APITokenService(CommonService): + model = APIToken + + @classmethod + @DB.connection_context() + def used(cls, token): + return cls.model.update({ + "update_time": current_timestamp(), + "update_date": datetime_format(datetime.now()), + }).where( + cls.model.token == token + ) + + +class API4ConversationService(CommonService): + model = API4Conversation + + @classmethod + @DB.connection_context() + def append_message(cls, id, conversation): + cls.update_by_id(id, conversation) + return cls.model.update(round=cls.model.round + 1).where(cls.model.id==id).execute() + + @classmethod + @DB.connection_context() + def stats(cls, tenant_id, from_date, to_date, source=None): + if len(to_date) == 10: to_date += " 23:59:59" + return cls.model.select( + cls.model.create_date.truncate("day").alias("dt"), + peewee.fn.COUNT( + cls.model.id).alias("pv"), + peewee.fn.COUNT( + cls.model.user_id.distinct()).alias("uv"), + peewee.fn.SUM( + cls.model.tokens).alias("tokens"), + peewee.fn.SUM( + cls.model.duration).alias("duration"), + peewee.fn.AVG( + cls.model.round).alias("round"), + peewee.fn.SUM( + cls.model.thumb_up).alias("thumb_up") + ).join(Dialog, on=(cls.model.dialog_id == Dialog.id & Dialog.tenant_id == tenant_id)).where( + cls.model.create_date >= from_date, + cls.model.create_date <= to_date, + cls.model.source == source + ).group_by(cls.model.create_date.truncate("day")).dicts() diff --git a/api/db/services/common_service.py b/api/db/services/common_service.py index 43e7c734ed07c73a284a33af425721fd7edafba5..f0f52930c7fd848a7e8e052c7c2338dad62014aa 100644 --- a/api/db/services/common_service.py +++ b/api/db/services/common_service.py @@ -1,183 +1,183 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from datetime import datetime - -import peewee - -from api.db.db_models import DB -from api.utils import datetime_format, current_timestamp, get_uuid - - -class CommonService: - model = None - - @classmethod - @DB.connection_context() - def query(cls, cols=None, reverse=None, order_by=None, **kwargs): - return cls.model.query(cols=cols, reverse=reverse, - order_by=order_by, **kwargs) - - @classmethod - @DB.connection_context() - def get_all(cls, cols=None, reverse=None, order_by=None): - if cols: - query_records = cls.model.select(*cols) - else: - query_records = cls.model.select() - if reverse is not None: - if not order_by or not hasattr(cls, order_by): - order_by = "create_time" - if reverse is True: - query_records = query_records.order_by( - cls.model.getter_by(order_by).desc()) - elif reverse is False: - query_records = query_records.order_by( - cls.model.getter_by(order_by).asc()) - return query_records - - @classmethod - @DB.connection_context() - def get(cls, **kwargs): - return cls.model.get(**kwargs) - - @classmethod - @DB.connection_context() - def get_or_none(cls, **kwargs): - try: - return cls.model.get(**kwargs) - except peewee.DoesNotExist: - return None - - @classmethod - @DB.connection_context() - def save(cls, **kwargs): - # if "id" not in kwargs: - # kwargs["id"] = get_uuid() - sample_obj = cls.model(**kwargs).save(force_insert=True) - return sample_obj - - @classmethod - @DB.connection_context() - def insert(cls, **kwargs): - if "id" not in kwargs: - kwargs["id"] = get_uuid() - kwargs["create_time"] = current_timestamp() - kwargs["create_date"] = datetime_format(datetime.now()) - kwargs["update_time"] = current_timestamp() - kwargs["update_date"] = datetime_format(datetime.now()) - sample_obj = cls.model(**kwargs).save(force_insert=True) - return sample_obj - - @classmethod - @DB.connection_context() - def insert_many(cls, data_list, batch_size=100): - with DB.atomic(): - for d in data_list: - d["create_time"] = current_timestamp() - d["create_date"] = datetime_format(datetime.now()) - for i in range(0, len(data_list), batch_size): - cls.model.insert_many(data_list[i:i + batch_size]).execute() - - @classmethod - @DB.connection_context() - def update_many_by_id(cls, data_list): - with DB.atomic(): - for data in data_list: - data["update_time"] = current_timestamp() - data["update_date"] = datetime_format(datetime.now()) - cls.model.update(data).where( - cls.model.id == data["id"]).execute() - - @classmethod - @DB.connection_context() - def update_by_id(cls, pid, data): - data["update_time"] = current_timestamp() - data["update_date"] = datetime_format(datetime.now()) - num = cls.model.update(data).where(cls.model.id == pid).execute() - return num - - @classmethod - @DB.connection_context() - def get_by_id(cls, pid): - try: - obj = cls.model.query(id=pid)[0] - return True, obj - except Exception as e: - return False, None - - @classmethod - @DB.connection_context() - def get_by_ids(cls, pids, cols=None): - if cols: - objs = cls.model.select(*cols) - else: - objs = cls.model.select() - return objs.where(cls.model.id.in_(pids)) - - @classmethod - @DB.connection_context() - def delete_by_id(cls, pid): - return cls.model.delete().where(cls.model.id == pid).execute() - - @classmethod - @DB.connection_context() - def filter_delete(cls, filters): - with DB.atomic(): - num = cls.model.delete().where(*filters).execute() - return num - - @classmethod - @DB.connection_context() - def filter_update(cls, filters, update_data): - with DB.atomic(): - return cls.model.update(update_data).where(*filters).execute() - - @staticmethod - def cut_list(tar_list, n): - length = len(tar_list) - arr = range(length) - result = [tuple(tar_list[x:(x + n)]) for x in arr[::n]] - return result - - @classmethod - @DB.connection_context() - def filter_scope_list(cls, in_key, in_filters_list, - filters=None, cols=None): - in_filters_tuple_list = cls.cut_list(in_filters_list, 20) - if not filters: - filters = [] - res_list = [] - if cols: - for i in in_filters_tuple_list: - query_records = cls.model.select( - * - cols).where( - getattr( - cls.model, - in_key).in_(i), - * - filters) - if query_records: - res_list.extend( - [query_record for query_record in query_records]) - else: - for i in in_filters_tuple_list: - query_records = cls.model.select().where( - getattr(cls.model, in_key).in_(i), *filters) - if query_records: - res_list.extend( - [query_record for query_record in query_records]) - return res_list +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from datetime import datetime + +import peewee + +from api.db.db_models import DB +from api.utils import datetime_format, current_timestamp, get_uuid + + +class CommonService: + model = None + + @classmethod + @DB.connection_context() + def query(cls, cols=None, reverse=None, order_by=None, **kwargs): + return cls.model.query(cols=cols, reverse=reverse, + order_by=order_by, **kwargs) + + @classmethod + @DB.connection_context() + def get_all(cls, cols=None, reverse=None, order_by=None): + if cols: + query_records = cls.model.select(*cols) + else: + query_records = cls.model.select() + if reverse is not None: + if not order_by or not hasattr(cls, order_by): + order_by = "create_time" + if reverse is True: + query_records = query_records.order_by( + cls.model.getter_by(order_by).desc()) + elif reverse is False: + query_records = query_records.order_by( + cls.model.getter_by(order_by).asc()) + return query_records + + @classmethod + @DB.connection_context() + def get(cls, **kwargs): + return cls.model.get(**kwargs) + + @classmethod + @DB.connection_context() + def get_or_none(cls, **kwargs): + try: + return cls.model.get(**kwargs) + except peewee.DoesNotExist: + return None + + @classmethod + @DB.connection_context() + def save(cls, **kwargs): + # if "id" not in kwargs: + # kwargs["id"] = get_uuid() + sample_obj = cls.model(**kwargs).save(force_insert=True) + return sample_obj + + @classmethod + @DB.connection_context() + def insert(cls, **kwargs): + if "id" not in kwargs: + kwargs["id"] = get_uuid() + kwargs["create_time"] = current_timestamp() + kwargs["create_date"] = datetime_format(datetime.now()) + kwargs["update_time"] = current_timestamp() + kwargs["update_date"] = datetime_format(datetime.now()) + sample_obj = cls.model(**kwargs).save(force_insert=True) + return sample_obj + + @classmethod + @DB.connection_context() + def insert_many(cls, data_list, batch_size=100): + with DB.atomic(): + for d in data_list: + d["create_time"] = current_timestamp() + d["create_date"] = datetime_format(datetime.now()) + for i in range(0, len(data_list), batch_size): + cls.model.insert_many(data_list[i:i + batch_size]).execute() + + @classmethod + @DB.connection_context() + def update_many_by_id(cls, data_list): + with DB.atomic(): + for data in data_list: + data["update_time"] = current_timestamp() + data["update_date"] = datetime_format(datetime.now()) + cls.model.update(data).where( + cls.model.id == data["id"]).execute() + + @classmethod + @DB.connection_context() + def update_by_id(cls, pid, data): + data["update_time"] = current_timestamp() + data["update_date"] = datetime_format(datetime.now()) + num = cls.model.update(data).where(cls.model.id == pid).execute() + return num + + @classmethod + @DB.connection_context() + def get_by_id(cls, pid): + try: + obj = cls.model.query(id=pid)[0] + return True, obj + except Exception as e: + return False, None + + @classmethod + @DB.connection_context() + def get_by_ids(cls, pids, cols=None): + if cols: + objs = cls.model.select(*cols) + else: + objs = cls.model.select() + return objs.where(cls.model.id.in_(pids)) + + @classmethod + @DB.connection_context() + def delete_by_id(cls, pid): + return cls.model.delete().where(cls.model.id == pid).execute() + + @classmethod + @DB.connection_context() + def filter_delete(cls, filters): + with DB.atomic(): + num = cls.model.delete().where(*filters).execute() + return num + + @classmethod + @DB.connection_context() + def filter_update(cls, filters, update_data): + with DB.atomic(): + return cls.model.update(update_data).where(*filters).execute() + + @staticmethod + def cut_list(tar_list, n): + length = len(tar_list) + arr = range(length) + result = [tuple(tar_list[x:(x + n)]) for x in arr[::n]] + return result + + @classmethod + @DB.connection_context() + def filter_scope_list(cls, in_key, in_filters_list, + filters=None, cols=None): + in_filters_tuple_list = cls.cut_list(in_filters_list, 20) + if not filters: + filters = [] + res_list = [] + if cols: + for i in in_filters_tuple_list: + query_records = cls.model.select( + * + cols).where( + getattr( + cls.model, + in_key).in_(i), + * + filters) + if query_records: + res_list.extend( + [query_record for query_record in query_records]) + else: + for i in in_filters_tuple_list: + query_records = cls.model.select().where( + getattr(cls.model, in_key).in_(i), *filters) + if query_records: + res_list.extend( + [query_record for query_record in query_records]) + return res_list diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index 849b931eaf89e261f0573f7e5c8e91510632ba77..417d6106febe323629b07691df39b512a185504d 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -1,392 +1,392 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import json -import re -from copy import deepcopy - -from api.db import LLMType, ParserType -from api.db.db_models import Dialog, Conversation -from api.db.services.common_service import CommonService -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle -from api.settings import chat_logger, retrievaler, kg_retrievaler -from rag.app.resume import forbidden_select_fields4resume -from rag.nlp import keyword_extraction -from rag.nlp.search import index_name -from rag.utils import rmSpace, num_tokens_from_string, encoder -from api.utils.file_utils import get_project_base_directory - - -class DialogService(CommonService): - model = Dialog - - -class ConversationService(CommonService): - model = Conversation - - -def message_fit_in(msg, max_length=4000): - def count(): - nonlocal msg - tks_cnts = [] - for m in msg: - tks_cnts.append( - {"role": m["role"], "count": num_tokens_from_string(m["content"])}) - total = 0 - for m in tks_cnts: - total += m["count"] - return total - - c = count() - if c < max_length: - return c, msg - - msg_ = [m for m in msg[:-1] if m["role"] == "system"] - msg_.append(msg[-1]) - msg = msg_ - c = count() - if c < max_length: - return c, msg - - ll = num_tokens_from_string(msg_[0]["content"]) - l = num_tokens_from_string(msg_[-1]["content"]) - if ll / (ll + l) > 0.8: - m = msg_[0]["content"] - m = encoder.decode(encoder.encode(m)[:max_length - l]) - msg[0]["content"] = m - return max_length, msg - - m = msg_[1]["content"] - m = encoder.decode(encoder.encode(m)[:max_length - l]) - msg[1]["content"] = m - return max_length, msg - - -def llm_id2llm_type(llm_id): - fnm = os.path.join(get_project_base_directory(), "conf") - llm_factories = json.load(open(os.path.join(fnm, "llm_factories.json"), "r")) - for llm_factory in llm_factories["factory_llm_infos"]: - for llm in llm_factory["llm"]: - if llm_id == llm["llm_name"]: - return llm["model_type"].strip(",")[-1] - - -def chat(dialog, messages, stream=True, **kwargs): - assert messages[-1]["role"] == "user", "The last content of this conversation is not from user." - llm = LLMService.query(llm_name=dialog.llm_id) - if not llm: - llm = TenantLLMService.query(tenant_id=dialog.tenant_id, llm_name=dialog.llm_id) - if not llm: - raise LookupError("LLM(%s) not found" % dialog.llm_id) - max_tokens = 8192 - else: - max_tokens = llm[0].max_tokens - kbs = KnowledgebaseService.get_by_ids(dialog.kb_ids) - embd_nms = list(set([kb.embd_id for kb in kbs])) - if len(embd_nms) != 1: - yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []} - return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []} - - is_kg = all([kb.parser_id == ParserType.KG for kb in kbs]) - retr = retrievaler if not is_kg else kg_retrievaler - - questions = [m["content"] for m in messages if m["role"] == "user"][-3:] - attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None - if "doc_ids" in messages[-1]: - attachments = messages[-1]["doc_ids"] - for m in messages[:-1]: - if "doc_ids" in m: - attachments.extend(m["doc_ids"]) - - embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0]) - if llm_id2llm_type(dialog.llm_id) == "image2text": - chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id) - else: - chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id) - - prompt_config = dialog.prompt_config - field_map = KnowledgebaseService.get_field_map(dialog.kb_ids) - # try to use sql if field mapping is good to go - if field_map: - chat_logger.info("Use SQL to retrieval:{}".format(questions[-1])) - ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True)) - if ans: - yield ans - return - - for p in prompt_config["parameters"]: - if p["key"] == "knowledge": - continue - if p["key"] not in kwargs and not p["optional"]: - raise KeyError("Miss parameter: " + p["key"]) - if p["key"] not in kwargs: - prompt_config["system"] = prompt_config["system"].replace( - "{%s}" % p["key"], " ") - - rerank_mdl = None - if dialog.rerank_id: - rerank_mdl = LLMBundle(dialog.tenant_id, LLMType.RERANK, dialog.rerank_id) - - for _ in range(len(questions) // 2): - questions.append(questions[-1]) - if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]: - kbinfos = {"total": 0, "chunks": [], "doc_aggs": []} - else: - if prompt_config.get("keyword", False): - questions[-1] += keyword_extraction(chat_mdl, questions[-1]) - kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n, - dialog.similarity_threshold, - dialog.vector_similarity_weight, - doc_ids=attachments, - top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl) - knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]] - #self-rag - if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges): - questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1]) - kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n, - dialog.similarity_threshold, - dialog.vector_similarity_weight, - doc_ids=attachments, - top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl) - knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]] - - chat_logger.info( - "{}->{}".format(" ".join(questions), "\n->".join(knowledges))) - - if not knowledges and prompt_config.get("empty_response"): - yield {"answer": prompt_config["empty_response"], "reference": kbinfos} - return {"answer": prompt_config["empty_response"], "reference": kbinfos} - - kwargs["knowledge"] = "\n".join(knowledges) - gen_conf = dialog.llm_setting - - msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}] - msg.extend([{"role": m["role"], "content": re.sub(r"##\d+\$\$", "", m["content"])} - for m in messages if m["role"] != "system"]) - used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97)) - assert len(msg) >= 2, f"message_fit_in has bug: {msg}" - - if "max_tokens" in gen_conf: - gen_conf["max_tokens"] = min( - gen_conf["max_tokens"], - max_tokens - used_token_count) - - def decorate_answer(answer): - nonlocal prompt_config, knowledges, kwargs, kbinfos - refs = [] - if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)): - answer, idx = retr.insert_citations(answer, - [ck["content_ltks"] - for ck in kbinfos["chunks"]], - [ck["vector"] - for ck in kbinfos["chunks"]], - embd_mdl, - tkweight=1 - dialog.vector_similarity_weight, - vtweight=dialog.vector_similarity_weight) - idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx]) - recall_docs = [ - d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx] - if not recall_docs: recall_docs = kbinfos["doc_aggs"] - kbinfos["doc_aggs"] = recall_docs - - refs = deepcopy(kbinfos) - for c in refs["chunks"]: - if c.get("vector"): - del c["vector"] - - if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api") >= 0: - answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'" - return {"answer": answer, "reference": refs} - - if stream: - answer = "" - for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], gen_conf): - answer = ans - yield {"answer": answer, "reference": {}} - yield decorate_answer(answer) - else: - answer = chat_mdl.chat( - msg[0]["content"], msg[1:], gen_conf) - chat_logger.info("User: {}|Assistant: {}".format( - msg[-1]["content"], answer)) - yield decorate_answer(answer) - - -def use_sql(question, field_map, tenant_id, chat_mdl, quota=True): - sys_prompt = "你是一个DBA。你需要这对以下表的字段结构,根据用户的问题列表,写出最后一个问题对应的SQL。" - user_promt = """ -表名:{}; -数据库表字段说明如下: -{} - -问题如下: -{} -请写出SQL, 且只要SQL,不要有其他说明及文字。 -""".format( - index_name(tenant_id), - "\n".join([f"{k}: {v}" for k, v in field_map.items()]), - question - ) - tried_times = 0 - - def get_table(): - nonlocal sys_prompt, user_promt, question, tried_times - sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], { - "temperature": 0.06}) - print(user_promt, sql) - chat_logger.info(f"“{question}”==>{user_promt} get SQL: {sql}") - sql = re.sub(r"[\r\n]+", " ", sql.lower()) - sql = re.sub(r".*select ", "select ", sql.lower()) - sql = re.sub(r" +", " ", sql) - sql = re.sub(r"([;;]|```).*", "", sql) - if sql[:len("select ")] != "select ": - return None, None - if not re.search(r"((sum|avg|max|min)\(|group by )", sql.lower()): - if sql[:len("select *")] != "select *": - sql = "select doc_id,docnm_kwd," + sql[6:] - else: - flds = [] - for k in field_map.keys(): - if k in forbidden_select_fields4resume: - continue - if len(flds) > 11: - break - flds.append(k) - sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:] - - print(f"“{question}” get SQL(refined): {sql}") - - chat_logger.info(f"“{question}” get SQL(refined): {sql}") - tried_times += 1 - return retrievaler.sql_retrieval(sql, format="json"), sql - - tbl, sql = get_table() - if tbl is None: - return None - if tbl.get("error") and tried_times <= 2: - user_promt = """ - 表名:{}; - 数据库表字段说明如下: - {} - - 问题如下: - {} - - 你上一次给出的错误SQL如下: - {} - - 后台报错如下: - {} - - 请纠正SQL中的错误再写一遍,且只要SQL,不要有其他说明及文字。 - """.format( - index_name(tenant_id), - "\n".join([f"{k}: {v}" for k, v in field_map.items()]), - question, sql, tbl["error"] - ) - tbl, sql = get_table() - chat_logger.info("TRY it again: {}".format(sql)) - - chat_logger.info("GET table: {}".format(tbl)) - print(tbl) - if tbl.get("error") or len(tbl["rows"]) == 0: - return None - - docid_idx = set([ii for ii, c in enumerate( - tbl["columns"]) if c["name"] == "doc_id"]) - docnm_idx = set([ii for ii, c in enumerate( - tbl["columns"]) if c["name"] == "docnm_kwd"]) - clmn_idx = [ii for ii in range( - len(tbl["columns"])) if ii not in (docid_idx | docnm_idx)] - - # compose markdown table - clmns = "|" + "|".join([re.sub(r"(/.*|([^()]+))", "", field_map.get(tbl["columns"][i]["name"], - tbl["columns"][i]["name"])) for i in - clmn_idx]) + ("|Source|" if docid_idx and docid_idx else "|") - - line = "|" + "|".join(["------" for _ in range(len(clmn_idx))]) + \ - ("|------|" if docid_idx and docid_idx else "") - - rows = ["|" + - "|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") + - "|" for r in tbl["rows"]] - if quota: - rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)]) - else: - rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)]) - rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows) - - if not docid_idx or not docnm_idx: - chat_logger.warning("SQL missing field: " + sql) - return { - "answer": "\n".join([clmns, line, rows]), - "reference": {"chunks": [], "doc_aggs": []} - } - - docid_idx = list(docid_idx)[0] - docnm_idx = list(docnm_idx)[0] - doc_aggs = {} - for r in tbl["rows"]: - if r[docid_idx] not in doc_aggs: - doc_aggs[r[docid_idx]] = {"doc_name": r[docnm_idx], "count": 0} - doc_aggs[r[docid_idx]]["count"] += 1 - return { - "answer": "\n".join([clmns, line, rows]), - "reference": {"chunks": [{"doc_id": r[docid_idx], "docnm_kwd": r[docnm_idx]} for r in tbl["rows"]], - "doc_aggs": [{"doc_id": did, "doc_name": d["doc_name"], "count": d["count"]} for did, d in - doc_aggs.items()]} - } - - -def relevant(tenant_id, llm_id, question, contents: list): - if llm_id2llm_type(llm_id) == "image2text": - chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) - else: - chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) - prompt = """ - You are a grader assessing relevance of a retrieved document to a user question. - It does not need to be a stringent test. The goal is to filter out erroneous retrievals. - If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. - Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. - No other words needed except 'yes' or 'no'. - """ - if not contents:return False - contents = "Documents: \n" + " - ".join(contents) - contents = f"Question: {question}\n" + contents - if num_tokens_from_string(contents) >= chat_mdl.max_length - 4: - contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4]) - ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01}) - if ans.lower().find("yes") >= 0: return True - return False - - -def rewrite(tenant_id, llm_id, question): - if llm_id2llm_type(llm_id) == "image2text": - chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) - else: - chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) - prompt = """ - You are an expert at query expansion to generate a paraphrasing of a question. - I can't retrieval relevant information from the knowledge base by using user's question directly. - You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase, - writing the abbreviation in its entirety, adding some extra descriptions or explanations, - changing the way of expression, translating the original question into another language (English/Chinese), etc. - And return 5 versions of question and one is from translation. - Just list the question. No other words are needed. - """ - ans = chat_mdl.chat(prompt, [{"role": "user", "content": question}], {"temperature": 0.8}) - return ans +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import json +import re +from copy import deepcopy + +from api.db import LLMType, ParserType +from api.db.db_models import Dialog, Conversation +from api.db.services.common_service import CommonService +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle +from api.settings import chat_logger, retrievaler, kg_retrievaler +from rag.app.resume import forbidden_select_fields4resume +from rag.nlp import keyword_extraction +from rag.nlp.search import index_name +from rag.utils import rmSpace, num_tokens_from_string, encoder +from api.utils.file_utils import get_project_base_directory + + +class DialogService(CommonService): + model = Dialog + + +class ConversationService(CommonService): + model = Conversation + + +def message_fit_in(msg, max_length=4000): + def count(): + nonlocal msg + tks_cnts = [] + for m in msg: + tks_cnts.append( + {"role": m["role"], "count": num_tokens_from_string(m["content"])}) + total = 0 + for m in tks_cnts: + total += m["count"] + return total + + c = count() + if c < max_length: + return c, msg + + msg_ = [m for m in msg[:-1] if m["role"] == "system"] + msg_.append(msg[-1]) + msg = msg_ + c = count() + if c < max_length: + return c, msg + + ll = num_tokens_from_string(msg_[0]["content"]) + l = num_tokens_from_string(msg_[-1]["content"]) + if ll / (ll + l) > 0.8: + m = msg_[0]["content"] + m = encoder.decode(encoder.encode(m)[:max_length - l]) + msg[0]["content"] = m + return max_length, msg + + m = msg_[1]["content"] + m = encoder.decode(encoder.encode(m)[:max_length - l]) + msg[1]["content"] = m + return max_length, msg + + +def llm_id2llm_type(llm_id): + fnm = os.path.join(get_project_base_directory(), "conf") + llm_factories = json.load(open(os.path.join(fnm, "llm_factories.json"), "r")) + for llm_factory in llm_factories["factory_llm_infos"]: + for llm in llm_factory["llm"]: + if llm_id == llm["llm_name"]: + return llm["model_type"].strip(",")[-1] + + +def chat(dialog, messages, stream=True, **kwargs): + assert messages[-1]["role"] == "user", "The last content of this conversation is not from user." + llm = LLMService.query(llm_name=dialog.llm_id) + if not llm: + llm = TenantLLMService.query(tenant_id=dialog.tenant_id, llm_name=dialog.llm_id) + if not llm: + raise LookupError("LLM(%s) not found" % dialog.llm_id) + max_tokens = 8192 + else: + max_tokens = llm[0].max_tokens + kbs = KnowledgebaseService.get_by_ids(dialog.kb_ids) + embd_nms = list(set([kb.embd_id for kb in kbs])) + if len(embd_nms) != 1: + yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []} + return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []} + + is_kg = all([kb.parser_id == ParserType.KG for kb in kbs]) + retr = retrievaler if not is_kg else kg_retrievaler + + questions = [m["content"] for m in messages if m["role"] == "user"][-3:] + attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None + if "doc_ids" in messages[-1]: + attachments = messages[-1]["doc_ids"] + for m in messages[:-1]: + if "doc_ids" in m: + attachments.extend(m["doc_ids"]) + + embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0]) + if llm_id2llm_type(dialog.llm_id) == "image2text": + chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id) + else: + chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id) + + prompt_config = dialog.prompt_config + field_map = KnowledgebaseService.get_field_map(dialog.kb_ids) + # try to use sql if field mapping is good to go + if field_map: + chat_logger.info("Use SQL to retrieval:{}".format(questions[-1])) + ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True)) + if ans: + yield ans + return + + for p in prompt_config["parameters"]: + if p["key"] == "knowledge": + continue + if p["key"] not in kwargs and not p["optional"]: + raise KeyError("Miss parameter: " + p["key"]) + if p["key"] not in kwargs: + prompt_config["system"] = prompt_config["system"].replace( + "{%s}" % p["key"], " ") + + rerank_mdl = None + if dialog.rerank_id: + rerank_mdl = LLMBundle(dialog.tenant_id, LLMType.RERANK, dialog.rerank_id) + + for _ in range(len(questions) // 2): + questions.append(questions[-1]) + if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]: + kbinfos = {"total": 0, "chunks": [], "doc_aggs": []} + else: + if prompt_config.get("keyword", False): + questions[-1] += keyword_extraction(chat_mdl, questions[-1]) + kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n, + dialog.similarity_threshold, + dialog.vector_similarity_weight, + doc_ids=attachments, + top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl) + knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]] + #self-rag + if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges): + questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1]) + kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n, + dialog.similarity_threshold, + dialog.vector_similarity_weight, + doc_ids=attachments, + top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl) + knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]] + + chat_logger.info( + "{}->{}".format(" ".join(questions), "\n->".join(knowledges))) + + if not knowledges and prompt_config.get("empty_response"): + yield {"answer": prompt_config["empty_response"], "reference": kbinfos} + return {"answer": prompt_config["empty_response"], "reference": kbinfos} + + kwargs["knowledge"] = "\n".join(knowledges) + gen_conf = dialog.llm_setting + + msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}] + msg.extend([{"role": m["role"], "content": re.sub(r"##\d+\$\$", "", m["content"])} + for m in messages if m["role"] != "system"]) + used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97)) + assert len(msg) >= 2, f"message_fit_in has bug: {msg}" + + if "max_tokens" in gen_conf: + gen_conf["max_tokens"] = min( + gen_conf["max_tokens"], + max_tokens - used_token_count) + + def decorate_answer(answer): + nonlocal prompt_config, knowledges, kwargs, kbinfos + refs = [] + if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)): + answer, idx = retr.insert_citations(answer, + [ck["content_ltks"] + for ck in kbinfos["chunks"]], + [ck["vector"] + for ck in kbinfos["chunks"]], + embd_mdl, + tkweight=1 - dialog.vector_similarity_weight, + vtweight=dialog.vector_similarity_weight) + idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx]) + recall_docs = [ + d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx] + if not recall_docs: recall_docs = kbinfos["doc_aggs"] + kbinfos["doc_aggs"] = recall_docs + + refs = deepcopy(kbinfos) + for c in refs["chunks"]: + if c.get("vector"): + del c["vector"] + + if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api") >= 0: + answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'" + return {"answer": answer, "reference": refs} + + if stream: + answer = "" + for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], gen_conf): + answer = ans + yield {"answer": answer, "reference": {}} + yield decorate_answer(answer) + else: + answer = chat_mdl.chat( + msg[0]["content"], msg[1:], gen_conf) + chat_logger.info("User: {}|Assistant: {}".format( + msg[-1]["content"], answer)) + yield decorate_answer(answer) + + +def use_sql(question, field_map, tenant_id, chat_mdl, quota=True): + sys_prompt = "你是一个DBA。你需要这对以下表的字段结构,根据用户的问题列表,写出最后一个问题对应的SQL。" + user_promt = """ +表名:{}; +数据库表字段说明如下: +{} + +问题如下: +{} +请写出SQL, 且只要SQL,不要有其他说明及文字。 +""".format( + index_name(tenant_id), + "\n".join([f"{k}: {v}" for k, v in field_map.items()]), + question + ) + tried_times = 0 + + def get_table(): + nonlocal sys_prompt, user_promt, question, tried_times + sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], { + "temperature": 0.06}) + print(user_promt, sql) + chat_logger.info(f"“{question}”==>{user_promt} get SQL: {sql}") + sql = re.sub(r"[\r\n]+", " ", sql.lower()) + sql = re.sub(r".*select ", "select ", sql.lower()) + sql = re.sub(r" +", " ", sql) + sql = re.sub(r"([;;]|```).*", "", sql) + if sql[:len("select ")] != "select ": + return None, None + if not re.search(r"((sum|avg|max|min)\(|group by )", sql.lower()): + if sql[:len("select *")] != "select *": + sql = "select doc_id,docnm_kwd," + sql[6:] + else: + flds = [] + for k in field_map.keys(): + if k in forbidden_select_fields4resume: + continue + if len(flds) > 11: + break + flds.append(k) + sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:] + + print(f"“{question}” get SQL(refined): {sql}") + + chat_logger.info(f"“{question}” get SQL(refined): {sql}") + tried_times += 1 + return retrievaler.sql_retrieval(sql, format="json"), sql + + tbl, sql = get_table() + if tbl is None: + return None + if tbl.get("error") and tried_times <= 2: + user_promt = """ + 表名:{}; + 数据库表字段说明如下: + {} + + 问题如下: + {} + + 你上一次给出的错误SQL如下: + {} + + 后台报错如下: + {} + + 请纠正SQL中的错误再写一遍,且只要SQL,不要有其他说明及文字。 + """.format( + index_name(tenant_id), + "\n".join([f"{k}: {v}" for k, v in field_map.items()]), + question, sql, tbl["error"] + ) + tbl, sql = get_table() + chat_logger.info("TRY it again: {}".format(sql)) + + chat_logger.info("GET table: {}".format(tbl)) + print(tbl) + if tbl.get("error") or len(tbl["rows"]) == 0: + return None + + docid_idx = set([ii for ii, c in enumerate( + tbl["columns"]) if c["name"] == "doc_id"]) + docnm_idx = set([ii for ii, c in enumerate( + tbl["columns"]) if c["name"] == "docnm_kwd"]) + clmn_idx = [ii for ii in range( + len(tbl["columns"])) if ii not in (docid_idx | docnm_idx)] + + # compose markdown table + clmns = "|" + "|".join([re.sub(r"(/.*|([^()]+))", "", field_map.get(tbl["columns"][i]["name"], + tbl["columns"][i]["name"])) for i in + clmn_idx]) + ("|Source|" if docid_idx and docid_idx else "|") + + line = "|" + "|".join(["------" for _ in range(len(clmn_idx))]) + \ + ("|------|" if docid_idx and docid_idx else "") + + rows = ["|" + + "|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") + + "|" for r in tbl["rows"]] + if quota: + rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)]) + else: + rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)]) + rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows) + + if not docid_idx or not docnm_idx: + chat_logger.warning("SQL missing field: " + sql) + return { + "answer": "\n".join([clmns, line, rows]), + "reference": {"chunks": [], "doc_aggs": []} + } + + docid_idx = list(docid_idx)[0] + docnm_idx = list(docnm_idx)[0] + doc_aggs = {} + for r in tbl["rows"]: + if r[docid_idx] not in doc_aggs: + doc_aggs[r[docid_idx]] = {"doc_name": r[docnm_idx], "count": 0} + doc_aggs[r[docid_idx]]["count"] += 1 + return { + "answer": "\n".join([clmns, line, rows]), + "reference": {"chunks": [{"doc_id": r[docid_idx], "docnm_kwd": r[docnm_idx]} for r in tbl["rows"]], + "doc_aggs": [{"doc_id": did, "doc_name": d["doc_name"], "count": d["count"]} for did, d in + doc_aggs.items()]} + } + + +def relevant(tenant_id, llm_id, question, contents: list): + if llm_id2llm_type(llm_id) == "image2text": + chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) + else: + chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) + prompt = """ + You are a grader assessing relevance of a retrieved document to a user question. + It does not need to be a stringent test. The goal is to filter out erroneous retrievals. + If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. + Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. + No other words needed except 'yes' or 'no'. + """ + if not contents:return False + contents = "Documents: \n" + " - ".join(contents) + contents = f"Question: {question}\n" + contents + if num_tokens_from_string(contents) >= chat_mdl.max_length - 4: + contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4]) + ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01}) + if ans.lower().find("yes") >= 0: return True + return False + + +def rewrite(tenant_id, llm_id, question): + if llm_id2llm_type(llm_id) == "image2text": + chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) + else: + chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) + prompt = """ + You are an expert at query expansion to generate a paraphrasing of a question. + I can't retrieval relevant information from the knowledge base by using user's question directly. + You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase, + writing the abbreviation in its entirety, adding some extra descriptions or explanations, + changing the way of expression, translating the original question into another language (English/Chinese), etc. + And return 5 versions of question and one is from translation. + Just list the question. No other words are needed. + """ + ans = chat_mdl.chat(prompt, [{"role": "user", "content": question}], {"temperature": 0.8}) + return ans diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index f87d8f10d4da77d875d36b37284a78744ce51188..0eb2b8c9479359acd8d18de9c5e1ebadce51d144 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -1,382 +1,382 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import random -from datetime import datetime -from elasticsearch_dsl import Q -from peewee import fn - -from api.db.db_utils import bulk_insert_into_db -from api.settings import stat_logger -from api.utils import current_timestamp, get_format_time, get_uuid -from rag.settings import SVR_QUEUE_NAME -from rag.utils.es_conn import ELASTICSEARCH -from rag.utils.minio_conn import MINIO -from rag.nlp import search - -from api.db import FileType, TaskStatus, ParserType -from api.db.db_models import DB, Knowledgebase, Tenant, Task -from api.db.db_models import Document -from api.db.services.common_service import CommonService -from api.db.services.knowledgebase_service import KnowledgebaseService -from api.db import StatusEnum -from rag.utils.redis_conn import REDIS_CONN - - -class DocumentService(CommonService): - model = Document - - @classmethod - @DB.connection_context() - def get_by_kb_id(cls, kb_id, page_number, items_per_page, - orderby, desc, keywords): - if keywords: - docs = cls.model.select().where( - (cls.model.kb_id == kb_id), - (fn.LOWER(cls.model.name).contains(keywords.lower())) - ) - else: - docs = cls.model.select().where(cls.model.kb_id == kb_id) - count = docs.count() - if desc: - docs = docs.order_by(cls.model.getter_by(orderby).desc()) - else: - docs = docs.order_by(cls.model.getter_by(orderby).asc()) - - docs = docs.paginate(page_number, items_per_page) - - return list(docs.dicts()), count - - @classmethod - @DB.connection_context() - def list_documents_in_dataset(cls, dataset_id, offset, count, order_by, descend, keywords): - if keywords: - docs = cls.model.select().where( - (cls.model.kb_id == dataset_id), - (fn.LOWER(cls.model.name).contains(keywords.lower())) - ) - else: - docs = cls.model.select().where(cls.model.kb_id == dataset_id) - - total = docs.count() - - if descend == 'True': - docs = docs.order_by(cls.model.getter_by(order_by).desc()) - if descend == 'False': - docs = docs.order_by(cls.model.getter_by(order_by).asc()) - - docs = list(docs.dicts()) - docs_length = len(docs) - - if offset < 0 or offset > docs_length: - raise IndexError("Offset is out of the valid range.") - - if count == -1: - return docs[offset:], total - - return docs[offset:offset + count], total - - @classmethod - @DB.connection_context() - def insert(cls, doc): - if not cls.save(**doc): - raise RuntimeError("Database error (Document)!") - e, doc = cls.get_by_id(doc["id"]) - if not e: - raise RuntimeError("Database error (Document retrieval)!") - e, kb = KnowledgebaseService.get_by_id(doc.kb_id) - if not KnowledgebaseService.update_by_id( - kb.id, {"doc_num": kb.doc_num + 1}): - raise RuntimeError("Database error (Knowledgebase)!") - return doc - - @classmethod - @DB.connection_context() - def remove_document(cls, doc, tenant_id): - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) - cls.clear_chunk_num(doc.id) - return cls.delete_by_id(doc.id) - - @classmethod - @DB.connection_context() - def get_newly_uploaded(cls): - fields = [ - cls.model.id, - cls.model.kb_id, - cls.model.parser_id, - cls.model.parser_config, - cls.model.name, - cls.model.type, - cls.model.location, - cls.model.size, - Knowledgebase.tenant_id, - Tenant.embd_id, - Tenant.img2txt_id, - Tenant.asr_id, - cls.model.update_time] - docs = cls.model.select(*fields) \ - .join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id)) \ - .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\ - .where( - cls.model.status == StatusEnum.VALID.value, - ~(cls.model.type == FileType.VIRTUAL.value), - cls.model.progress == 0, - cls.model.update_time >= current_timestamp() - 1000 * 600, - cls.model.run == TaskStatus.RUNNING.value)\ - .order_by(cls.model.update_time.asc()) - return list(docs.dicts()) - - @classmethod - @DB.connection_context() - def get_unfinished_docs(cls): - fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run] - docs = cls.model.select(*fields) \ - .where( - cls.model.status == StatusEnum.VALID.value, - ~(cls.model.type == FileType.VIRTUAL.value), - cls.model.progress < 1, - cls.model.progress > 0) - return list(docs.dicts()) - - @classmethod - @DB.connection_context() - def increment_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation): - num = cls.model.update(token_num=cls.model.token_num + token_num, - chunk_num=cls.model.chunk_num + chunk_num, - process_duation=cls.model.process_duation + duation).where( - cls.model.id == doc_id).execute() - if num == 0: - raise LookupError( - "Document not found which is supposed to be there") - num = Knowledgebase.update( - token_num=Knowledgebase.token_num + - token_num, - chunk_num=Knowledgebase.chunk_num + - chunk_num).where( - Knowledgebase.id == kb_id).execute() - return num - - @classmethod - @DB.connection_context() - def decrement_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation): - num = cls.model.update(token_num=cls.model.token_num - token_num, - chunk_num=cls.model.chunk_num - chunk_num, - process_duation=cls.model.process_duation + duation).where( - cls.model.id == doc_id).execute() - if num == 0: - raise LookupError( - "Document not found which is supposed to be there") - num = Knowledgebase.update( - token_num=Knowledgebase.token_num - - token_num, - chunk_num=Knowledgebase.chunk_num - - chunk_num - ).where( - Knowledgebase.id == kb_id).execute() - return num - - @classmethod - @DB.connection_context() - def clear_chunk_num(cls, doc_id): - doc = cls.model.get_by_id(doc_id) - assert doc, "Can't fine document in database." - - num = Knowledgebase.update( - token_num=Knowledgebase.token_num - - doc.token_num, - chunk_num=Knowledgebase.chunk_num - - doc.chunk_num, - doc_num=Knowledgebase.doc_num-1 - ).where( - Knowledgebase.id == doc.kb_id).execute() - return num - - @classmethod - @DB.connection_context() - def get_tenant_id(cls, doc_id): - docs = cls.model.select( - Knowledgebase.tenant_id).join( - Knowledgebase, on=( - Knowledgebase.id == cls.model.kb_id)).where( - cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) - docs = docs.dicts() - if not docs: - return - return docs[0]["tenant_id"] - - @classmethod - @DB.connection_context() - def get_tenant_id_by_name(cls, name): - docs = cls.model.select( - Knowledgebase.tenant_id).join( - Knowledgebase, on=( - Knowledgebase.id == cls.model.kb_id)).where( - cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value) - docs = docs.dicts() - if not docs: - return - return docs[0]["tenant_id"] - - @classmethod - @DB.connection_context() - def get_embd_id(cls, doc_id): - docs = cls.model.select( - Knowledgebase.embd_id).join( - Knowledgebase, on=( - Knowledgebase.id == cls.model.kb_id)).where( - cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) - docs = docs.dicts() - if not docs: - return - return docs[0]["embd_id"] - - @classmethod - @DB.connection_context() - def get_doc_id_by_doc_name(cls, doc_name): - fields = [cls.model.id] - doc_id = cls.model.select(*fields) \ - .where(cls.model.name == doc_name) - doc_id = doc_id.dicts() - if not doc_id: - return - return doc_id[0]["id"] - - @classmethod - @DB.connection_context() - def get_thumbnails(cls, docids): - fields = [cls.model.id, cls.model.thumbnail] - return list(cls.model.select( - *fields).where(cls.model.id.in_(docids)).dicts()) - - @classmethod - @DB.connection_context() - def update_parser_config(cls, id, config): - e, d = cls.get_by_id(id) - if not e: - raise LookupError(f"Document({id}) not found.") - - def dfs_update(old, new): - for k, v in new.items(): - if k not in old: - old[k] = v - continue - if isinstance(v, dict): - assert isinstance(old[k], dict) - dfs_update(old[k], v) - else: - old[k] = v - dfs_update(d.parser_config, config) - cls.update_by_id(id, {"parser_config": d.parser_config}) - - @classmethod - @DB.connection_context() - def get_doc_count(cls, tenant_id): - docs = cls.model.select(cls.model.id).join(Knowledgebase, - on=(Knowledgebase.id == cls.model.kb_id)).where( - Knowledgebase.tenant_id == tenant_id) - return len(docs) - - @classmethod - @DB.connection_context() - def begin2parse(cls, docid): - cls.update_by_id( - docid, {"progress": random.random() * 1 / 100., - "progress_msg": "Task dispatched...", - "process_begin_at": get_format_time() - }) - - @classmethod - @DB.connection_context() - def update_progress(cls): - docs = cls.get_unfinished_docs() - for d in docs: - try: - tsks = Task.query(doc_id=d["id"], order_by=Task.create_time) - if not tsks: - continue - msg = [] - prg = 0 - finished = True - bad = 0 - e, doc = DocumentService.get_by_id(d["id"]) - status = doc.run#TaskStatus.RUNNING.value - for t in tsks: - if 0 <= t.progress < 1: - finished = False - prg += t.progress if t.progress >= 0 else 0 - if t.progress_msg not in msg: - msg.append(t.progress_msg) - if t.progress == -1: - bad += 1 - prg /= len(tsks) - if finished and bad: - prg = -1 - status = TaskStatus.FAIL.value - elif finished: - if d["parser_config"].get("raptor", {}).get("use_raptor") and d["progress_msg"].lower().find(" raptor")<0: - queue_raptor_tasks(d) - prg *= 0.98 - msg.append("------ RAPTOR -------") - else: - status = TaskStatus.DONE.value - - msg = "\n".join(msg) - info = { - "process_duation": datetime.timestamp( - datetime.now()) - - d["process_begin_at"].timestamp(), - "run": status} - if prg != 0: - info["progress"] = prg - if msg: - info["progress_msg"] = msg - cls.update_by_id(d["id"], info) - except Exception as e: - stat_logger.error("fetch task exception:" + str(e)) - - @classmethod - @DB.connection_context() - def get_kb_doc_count(cls, kb_id): - return len(cls.model.select(cls.model.id).where( - cls.model.kb_id == kb_id).dicts()) - - - @classmethod - @DB.connection_context() - def do_cancel(cls, doc_id): - try: - _, doc = DocumentService.get_by_id(doc_id) - return doc.run == TaskStatus.CANCEL.value or doc.progress < 0 - except Exception as e: - pass - return False - - -def queue_raptor_tasks(doc): - def new_task(): - nonlocal doc - return { - "id": get_uuid(), - "doc_id": doc["id"], - "from_page": 0, - "to_page": -1, - "progress_msg": "Start to do RAPTOR (Recursive Abstractive Processing For Tree-Organized Retrieval)." - } - - task = new_task() - bulk_insert_into_db(Task, [task], True) - task["type"] = "raptor" - assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=task), "Can't access Redis. Please check the Redis' status." +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import random +from datetime import datetime +from elasticsearch_dsl import Q +from peewee import fn + +from api.db.db_utils import bulk_insert_into_db +from api.settings import stat_logger +from api.utils import current_timestamp, get_format_time, get_uuid +from rag.settings import SVR_QUEUE_NAME +from rag.utils.es_conn import ELASTICSEARCH +from rag.utils.minio_conn import MINIO +from rag.nlp import search + +from api.db import FileType, TaskStatus, ParserType +from api.db.db_models import DB, Knowledgebase, Tenant, Task +from api.db.db_models import Document +from api.db.services.common_service import CommonService +from api.db.services.knowledgebase_service import KnowledgebaseService +from api.db import StatusEnum +from rag.utils.redis_conn import REDIS_CONN + + +class DocumentService(CommonService): + model = Document + + @classmethod + @DB.connection_context() + def get_by_kb_id(cls, kb_id, page_number, items_per_page, + orderby, desc, keywords): + if keywords: + docs = cls.model.select().where( + (cls.model.kb_id == kb_id), + (fn.LOWER(cls.model.name).contains(keywords.lower())) + ) + else: + docs = cls.model.select().where(cls.model.kb_id == kb_id) + count = docs.count() + if desc: + docs = docs.order_by(cls.model.getter_by(orderby).desc()) + else: + docs = docs.order_by(cls.model.getter_by(orderby).asc()) + + docs = docs.paginate(page_number, items_per_page) + + return list(docs.dicts()), count + + @classmethod + @DB.connection_context() + def list_documents_in_dataset(cls, dataset_id, offset, count, order_by, descend, keywords): + if keywords: + docs = cls.model.select().where( + (cls.model.kb_id == dataset_id), + (fn.LOWER(cls.model.name).contains(keywords.lower())) + ) + else: + docs = cls.model.select().where(cls.model.kb_id == dataset_id) + + total = docs.count() + + if descend == 'True': + docs = docs.order_by(cls.model.getter_by(order_by).desc()) + if descend == 'False': + docs = docs.order_by(cls.model.getter_by(order_by).asc()) + + docs = list(docs.dicts()) + docs_length = len(docs) + + if offset < 0 or offset > docs_length: + raise IndexError("Offset is out of the valid range.") + + if count == -1: + return docs[offset:], total + + return docs[offset:offset + count], total + + @classmethod + @DB.connection_context() + def insert(cls, doc): + if not cls.save(**doc): + raise RuntimeError("Database error (Document)!") + e, doc = cls.get_by_id(doc["id"]) + if not e: + raise RuntimeError("Database error (Document retrieval)!") + e, kb = KnowledgebaseService.get_by_id(doc.kb_id) + if not KnowledgebaseService.update_by_id( + kb.id, {"doc_num": kb.doc_num + 1}): + raise RuntimeError("Database error (Knowledgebase)!") + return doc + + @classmethod + @DB.connection_context() + def remove_document(cls, doc, tenant_id): + ELASTICSEARCH.deleteByQuery( + Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) + cls.clear_chunk_num(doc.id) + return cls.delete_by_id(doc.id) + + @classmethod + @DB.connection_context() + def get_newly_uploaded(cls): + fields = [ + cls.model.id, + cls.model.kb_id, + cls.model.parser_id, + cls.model.parser_config, + cls.model.name, + cls.model.type, + cls.model.location, + cls.model.size, + Knowledgebase.tenant_id, + Tenant.embd_id, + Tenant.img2txt_id, + Tenant.asr_id, + cls.model.update_time] + docs = cls.model.select(*fields) \ + .join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id)) \ + .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\ + .where( + cls.model.status == StatusEnum.VALID.value, + ~(cls.model.type == FileType.VIRTUAL.value), + cls.model.progress == 0, + cls.model.update_time >= current_timestamp() - 1000 * 600, + cls.model.run == TaskStatus.RUNNING.value)\ + .order_by(cls.model.update_time.asc()) + return list(docs.dicts()) + + @classmethod + @DB.connection_context() + def get_unfinished_docs(cls): + fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run] + docs = cls.model.select(*fields) \ + .where( + cls.model.status == StatusEnum.VALID.value, + ~(cls.model.type == FileType.VIRTUAL.value), + cls.model.progress < 1, + cls.model.progress > 0) + return list(docs.dicts()) + + @classmethod + @DB.connection_context() + def increment_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation): + num = cls.model.update(token_num=cls.model.token_num + token_num, + chunk_num=cls.model.chunk_num + chunk_num, + process_duation=cls.model.process_duation + duation).where( + cls.model.id == doc_id).execute() + if num == 0: + raise LookupError( + "Document not found which is supposed to be there") + num = Knowledgebase.update( + token_num=Knowledgebase.token_num + + token_num, + chunk_num=Knowledgebase.chunk_num + + chunk_num).where( + Knowledgebase.id == kb_id).execute() + return num + + @classmethod + @DB.connection_context() + def decrement_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation): + num = cls.model.update(token_num=cls.model.token_num - token_num, + chunk_num=cls.model.chunk_num - chunk_num, + process_duation=cls.model.process_duation + duation).where( + cls.model.id == doc_id).execute() + if num == 0: + raise LookupError( + "Document not found which is supposed to be there") + num = Knowledgebase.update( + token_num=Knowledgebase.token_num - + token_num, + chunk_num=Knowledgebase.chunk_num - + chunk_num + ).where( + Knowledgebase.id == kb_id).execute() + return num + + @classmethod + @DB.connection_context() + def clear_chunk_num(cls, doc_id): + doc = cls.model.get_by_id(doc_id) + assert doc, "Can't fine document in database." + + num = Knowledgebase.update( + token_num=Knowledgebase.token_num - + doc.token_num, + chunk_num=Knowledgebase.chunk_num - + doc.chunk_num, + doc_num=Knowledgebase.doc_num-1 + ).where( + Knowledgebase.id == doc.kb_id).execute() + return num + + @classmethod + @DB.connection_context() + def get_tenant_id(cls, doc_id): + docs = cls.model.select( + Knowledgebase.tenant_id).join( + Knowledgebase, on=( + Knowledgebase.id == cls.model.kb_id)).where( + cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) + docs = docs.dicts() + if not docs: + return + return docs[0]["tenant_id"] + + @classmethod + @DB.connection_context() + def get_tenant_id_by_name(cls, name): + docs = cls.model.select( + Knowledgebase.tenant_id).join( + Knowledgebase, on=( + Knowledgebase.id == cls.model.kb_id)).where( + cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value) + docs = docs.dicts() + if not docs: + return + return docs[0]["tenant_id"] + + @classmethod + @DB.connection_context() + def get_embd_id(cls, doc_id): + docs = cls.model.select( + Knowledgebase.embd_id).join( + Knowledgebase, on=( + Knowledgebase.id == cls.model.kb_id)).where( + cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) + docs = docs.dicts() + if not docs: + return + return docs[0]["embd_id"] + + @classmethod + @DB.connection_context() + def get_doc_id_by_doc_name(cls, doc_name): + fields = [cls.model.id] + doc_id = cls.model.select(*fields) \ + .where(cls.model.name == doc_name) + doc_id = doc_id.dicts() + if not doc_id: + return + return doc_id[0]["id"] + + @classmethod + @DB.connection_context() + def get_thumbnails(cls, docids): + fields = [cls.model.id, cls.model.thumbnail] + return list(cls.model.select( + *fields).where(cls.model.id.in_(docids)).dicts()) + + @classmethod + @DB.connection_context() + def update_parser_config(cls, id, config): + e, d = cls.get_by_id(id) + if not e: + raise LookupError(f"Document({id}) not found.") + + def dfs_update(old, new): + for k, v in new.items(): + if k not in old: + old[k] = v + continue + if isinstance(v, dict): + assert isinstance(old[k], dict) + dfs_update(old[k], v) + else: + old[k] = v + dfs_update(d.parser_config, config) + cls.update_by_id(id, {"parser_config": d.parser_config}) + + @classmethod + @DB.connection_context() + def get_doc_count(cls, tenant_id): + docs = cls.model.select(cls.model.id).join(Knowledgebase, + on=(Knowledgebase.id == cls.model.kb_id)).where( + Knowledgebase.tenant_id == tenant_id) + return len(docs) + + @classmethod + @DB.connection_context() + def begin2parse(cls, docid): + cls.update_by_id( + docid, {"progress": random.random() * 1 / 100., + "progress_msg": "Task dispatched...", + "process_begin_at": get_format_time() + }) + + @classmethod + @DB.connection_context() + def update_progress(cls): + docs = cls.get_unfinished_docs() + for d in docs: + try: + tsks = Task.query(doc_id=d["id"], order_by=Task.create_time) + if not tsks: + continue + msg = [] + prg = 0 + finished = True + bad = 0 + e, doc = DocumentService.get_by_id(d["id"]) + status = doc.run#TaskStatus.RUNNING.value + for t in tsks: + if 0 <= t.progress < 1: + finished = False + prg += t.progress if t.progress >= 0 else 0 + if t.progress_msg not in msg: + msg.append(t.progress_msg) + if t.progress == -1: + bad += 1 + prg /= len(tsks) + if finished and bad: + prg = -1 + status = TaskStatus.FAIL.value + elif finished: + if d["parser_config"].get("raptor", {}).get("use_raptor") and d["progress_msg"].lower().find(" raptor")<0: + queue_raptor_tasks(d) + prg *= 0.98 + msg.append("------ RAPTOR -------") + else: + status = TaskStatus.DONE.value + + msg = "\n".join(msg) + info = { + "process_duation": datetime.timestamp( + datetime.now()) - + d["process_begin_at"].timestamp(), + "run": status} + if prg != 0: + info["progress"] = prg + if msg: + info["progress_msg"] = msg + cls.update_by_id(d["id"], info) + except Exception as e: + stat_logger.error("fetch task exception:" + str(e)) + + @classmethod + @DB.connection_context() + def get_kb_doc_count(cls, kb_id): + return len(cls.model.select(cls.model.id).where( + cls.model.kb_id == kb_id).dicts()) + + + @classmethod + @DB.connection_context() + def do_cancel(cls, doc_id): + try: + _, doc = DocumentService.get_by_id(doc_id) + return doc.run == TaskStatus.CANCEL.value or doc.progress < 0 + except Exception as e: + pass + return False + + +def queue_raptor_tasks(doc): + def new_task(): + nonlocal doc + return { + "id": get_uuid(), + "doc_id": doc["id"], + "from_page": 0, + "to_page": -1, + "progress_msg": "Start to do RAPTOR (Recursive Abstractive Processing For Tree-Organized Retrieval)." + } + + task = new_task() + bulk_insert_into_db(Task, [task], True) + task["type"] = "raptor" + assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=task), "Can't access Redis. Please check the Redis' status." diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index b9c12ef71cee77556af0e053a8a8b1ad7b91b5be..2874ee261c6e849957c8a2fcabeefb2eadb93c68 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -1,144 +1,144 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from api.db import StatusEnum, TenantPermission -from api.db.db_models import Knowledgebase, DB, Tenant -from api.db.services.common_service import CommonService - - -class KnowledgebaseService(CommonService): - model = Knowledgebase - - @classmethod - @DB.connection_context() - def get_by_tenant_ids(cls, joined_tenant_ids, user_id, - page_number, items_per_page, orderby, desc): - kbs = cls.model.select().where( - ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == - TenantPermission.TEAM.value)) | ( - cls.model.tenant_id == user_id)) - & (cls.model.status == StatusEnum.VALID.value) - ) - if desc: - kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) - else: - kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) - - kbs = kbs.paginate(page_number, items_per_page) - - return list(kbs.dicts()) - - @classmethod - @DB.connection_context() - def get_by_tenant_ids_by_offset(cls, joined_tenant_ids, user_id, offset, count, orderby, desc): - kbs = cls.model.select().where( - ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == - TenantPermission.TEAM.value)) | ( - cls.model.tenant_id == user_id)) - & (cls.model.status == StatusEnum.VALID.value) - ) - if desc: - kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) - else: - kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) - - kbs = list(kbs.dicts()) - - kbs_length = len(kbs) - if offset < 0 or offset > kbs_length: - raise IndexError("Offset is out of the valid range.") - - if count == -1: - return kbs[offset:] - - return kbs[offset:offset+count] - - @classmethod - @DB.connection_context() - def get_detail(cls, kb_id): - fields = [ - cls.model.id, - #Tenant.embd_id, - cls.model.embd_id, - cls.model.avatar, - cls.model.name, - cls.model.language, - cls.model.description, - cls.model.permission, - cls.model.doc_num, - cls.model.token_num, - cls.model.chunk_num, - cls.model.parser_id, - cls.model.parser_config] - kbs = cls.model.select(*fields).join(Tenant, on=( - (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( - (cls.model.id == kb_id), - (cls.model.status == StatusEnum.VALID.value) - ) - if not kbs: - return - d = kbs[0].to_dict() - #d["embd_id"] = kbs[0].tenant.embd_id - return d - - @classmethod - @DB.connection_context() - def update_parser_config(cls, id, config): - e, m = cls.get_by_id(id) - if not e: - raise LookupError(f"knowledgebase({id}) not found.") - - def dfs_update(old, new): - for k, v in new.items(): - if k not in old: - old[k] = v - continue - if isinstance(v, dict): - assert isinstance(old[k], dict) - dfs_update(old[k], v) - elif isinstance(v, list): - assert isinstance(old[k], list) - old[k] = list(set(old[k] + v)) - else: - old[k] = v - - dfs_update(m.parser_config, config) - cls.update_by_id(id, {"parser_config": m.parser_config}) - - @classmethod - @DB.connection_context() - def get_field_map(cls, ids): - conf = {} - for k in cls.get_by_ids(ids): - if k.parser_config and "field_map" in k.parser_config: - conf.update(k.parser_config["field_map"]) - return conf - - @classmethod - @DB.connection_context() - def get_by_name(cls, kb_name, tenant_id): - kb = cls.model.select().where( - (cls.model.name == kb_name) - & (cls.model.tenant_id == tenant_id) - & (cls.model.status == StatusEnum.VALID.value) - ) - if kb: - return True, kb[0] - return False, None - - @classmethod - @DB.connection_context() - def get_all_ids(cls): - return [m["id"] for m in cls.model.select(cls.model.id).dicts()] +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from api.db import StatusEnum, TenantPermission +from api.db.db_models import Knowledgebase, DB, Tenant +from api.db.services.common_service import CommonService + + +class KnowledgebaseService(CommonService): + model = Knowledgebase + + @classmethod + @DB.connection_context() + def get_by_tenant_ids(cls, joined_tenant_ids, user_id, + page_number, items_per_page, orderby, desc): + kbs = cls.model.select().where( + ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) + & (cls.model.status == StatusEnum.VALID.value) + ) + if desc: + kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) + else: + kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) + + kbs = kbs.paginate(page_number, items_per_page) + + return list(kbs.dicts()) + + @classmethod + @DB.connection_context() + def get_by_tenant_ids_by_offset(cls, joined_tenant_ids, user_id, offset, count, orderby, desc): + kbs = cls.model.select().where( + ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) + & (cls.model.status == StatusEnum.VALID.value) + ) + if desc: + kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) + else: + kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) + + kbs = list(kbs.dicts()) + + kbs_length = len(kbs) + if offset < 0 or offset > kbs_length: + raise IndexError("Offset is out of the valid range.") + + if count == -1: + return kbs[offset:] + + return kbs[offset:offset+count] + + @classmethod + @DB.connection_context() + def get_detail(cls, kb_id): + fields = [ + cls.model.id, + #Tenant.embd_id, + cls.model.embd_id, + cls.model.avatar, + cls.model.name, + cls.model.language, + cls.model.description, + cls.model.permission, + cls.model.doc_num, + cls.model.token_num, + cls.model.chunk_num, + cls.model.parser_id, + cls.model.parser_config] + kbs = cls.model.select(*fields).join(Tenant, on=( + (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( + (cls.model.id == kb_id), + (cls.model.status == StatusEnum.VALID.value) + ) + if not kbs: + return + d = kbs[0].to_dict() + #d["embd_id"] = kbs[0].tenant.embd_id + return d + + @classmethod + @DB.connection_context() + def update_parser_config(cls, id, config): + e, m = cls.get_by_id(id) + if not e: + raise LookupError(f"knowledgebase({id}) not found.") + + def dfs_update(old, new): + for k, v in new.items(): + if k not in old: + old[k] = v + continue + if isinstance(v, dict): + assert isinstance(old[k], dict) + dfs_update(old[k], v) + elif isinstance(v, list): + assert isinstance(old[k], list) + old[k] = list(set(old[k] + v)) + else: + old[k] = v + + dfs_update(m.parser_config, config) + cls.update_by_id(id, {"parser_config": m.parser_config}) + + @classmethod + @DB.connection_context() + def get_field_map(cls, ids): + conf = {} + for k in cls.get_by_ids(ids): + if k.parser_config and "field_map" in k.parser_config: + conf.update(k.parser_config["field_map"]) + return conf + + @classmethod + @DB.connection_context() + def get_by_name(cls, kb_name, tenant_id): + kb = cls.model.select().where( + (cls.model.name == kb_name) + & (cls.model.tenant_id == tenant_id) + & (cls.model.status == StatusEnum.VALID.value) + ) + if kb: + return True, kb[0] + return False, None + + @classmethod + @DB.connection_context() + def get_all_ids(cls): + return [m["id"] for m in cls.model.select(cls.model.id).dicts()] diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 4c34b7e1ff55ea4cad11f632a3a929f76a59aacd..15cf545c0744d6a467d98cd1d0302a169b258aee 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -1,242 +1,242 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from api.db.services.user_service import TenantService -from api.settings import database_logger -from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel -from api.db import LLMType -from api.db.db_models import DB, UserTenant -from api.db.db_models import LLMFactories, LLM, TenantLLM -from api.db.services.common_service import CommonService - - -class LLMFactoriesService(CommonService): - model = LLMFactories - - -class LLMService(CommonService): - model = LLM - - -class TenantLLMService(CommonService): - model = TenantLLM - - @classmethod - @DB.connection_context() - def get_api_key(cls, tenant_id, model_name): - objs = cls.query(tenant_id=tenant_id, llm_name=model_name) - if not objs: - return - return objs[0] - - @classmethod - @DB.connection_context() - def get_my_llms(cls, tenant_id): - fields = [ - cls.model.llm_factory, - LLMFactories.logo, - LLMFactories.tags, - cls.model.model_type, - cls.model.llm_name, - cls.model.used_tokens - ] - objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where( - cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts() - - return list(objs) - - @classmethod - @DB.connection_context() - def model_instance(cls, tenant_id, llm_type, - llm_name=None, lang="Chinese"): - e, tenant = TenantService.get_by_id(tenant_id) - if not e: - raise LookupError("Tenant not found") - - if llm_type == LLMType.EMBEDDING.value: - mdlnm = tenant.embd_id if not llm_name else llm_name - elif llm_type == LLMType.SPEECH2TEXT.value: - mdlnm = tenant.asr_id - elif llm_type == LLMType.IMAGE2TEXT.value: - mdlnm = tenant.img2txt_id if not llm_name else llm_name - elif llm_type == LLMType.CHAT.value: - mdlnm = tenant.llm_id if not llm_name else llm_name - elif llm_type == LLMType.RERANK: - mdlnm = tenant.rerank_id if not llm_name else llm_name - else: - assert False, "LLM type error" - - model_config = cls.get_api_key(tenant_id, mdlnm) - if model_config: model_config = model_config.to_dict() - if not model_config: - if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]: - llm = LLMService.query(llm_name=llm_name if llm_name else mdlnm) - if llm and llm[0].fid in ["Youdao", "FastEmbed", "BAAI"]: - model_config = {"llm_factory": llm[0].fid, "api_key":"", "llm_name": llm_name if llm_name else mdlnm, "api_base": ""} - if not model_config: - if llm_name == "flag-embedding": - model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "", - "llm_name": llm_name, "api_base": ""} - else: - if not mdlnm: - raise LookupError(f"Type of {llm_type} model is not set.") - raise LookupError("Model({}) not authorized".format(mdlnm)) - - if llm_type == LLMType.EMBEDDING.value: - if model_config["llm_factory"] not in EmbeddingModel: - return - return EmbeddingModel[model_config["llm_factory"]]( - model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"]) - - if llm_type == LLMType.RERANK: - if model_config["llm_factory"] not in RerankModel: - return - return RerankModel[model_config["llm_factory"]]( - model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"]) - - if llm_type == LLMType.IMAGE2TEXT.value: - if model_config["llm_factory"] not in CvModel: - return - return CvModel[model_config["llm_factory"]]( - model_config["api_key"], model_config["llm_name"], lang, - base_url=model_config["api_base"] - ) - - if llm_type == LLMType.CHAT.value: - if model_config["llm_factory"] not in ChatModel: - return - return ChatModel[model_config["llm_factory"]]( - model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"]) - - if llm_type == LLMType.SPEECH2TEXT: - if model_config["llm_factory"] not in Seq2txtModel: - return - return Seq2txtModel[model_config["llm_factory"]]( - model_config["api_key"], model_config["llm_name"], lang, - base_url=model_config["api_base"] - ) - - @classmethod - @DB.connection_context() - def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None): - e, tenant = TenantService.get_by_id(tenant_id) - if not e: - raise LookupError("Tenant not found") - - if llm_type == LLMType.EMBEDDING.value: - mdlnm = tenant.embd_id - elif llm_type == LLMType.SPEECH2TEXT.value: - mdlnm = tenant.asr_id - elif llm_type == LLMType.IMAGE2TEXT.value: - mdlnm = tenant.img2txt_id - elif llm_type == LLMType.CHAT.value: - mdlnm = tenant.llm_id if not llm_name else llm_name - elif llm_type == LLMType.RERANK: - mdlnm = tenant.llm_id if not llm_name else llm_name - else: - assert False, "LLM type error" - - num = 0 - try: - for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm): - num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\ - .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\ - .execute() - except Exception as e: - pass - return num - - @classmethod - @DB.connection_context() - def get_openai_models(cls): - objs = cls.model.select().where( - (cls.model.llm_factory == "OpenAI"), - ~(cls.model.llm_name == "text-embedding-3-small"), - ~(cls.model.llm_name == "text-embedding-3-large") - ).dicts() - return list(objs) - - -class LLMBundle(object): - def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"): - self.tenant_id = tenant_id - self.llm_type = llm_type - self.llm_name = llm_name - self.mdl = TenantLLMService.model_instance( - tenant_id, llm_type, llm_name, lang=lang) - assert self.mdl, "Can't find mole for {}/{}/{}".format( - tenant_id, llm_type, llm_name) - self.max_length = 512 - for lm in LLMService.query(llm_name=llm_name): - self.max_length = lm.max_tokens - break - - def encode(self, texts: list, batch_size=32): - emd, used_tokens = self.mdl.encode(texts, batch_size) - if not TenantLLMService.increase_usage( - self.tenant_id, self.llm_type, used_tokens): - database_logger.error( - "Can't update token usage for {}/EMBEDDING".format(self.tenant_id)) - return emd, used_tokens - - def encode_queries(self, query: str): - emd, used_tokens = self.mdl.encode_queries(query) - if not TenantLLMService.increase_usage( - self.tenant_id, self.llm_type, used_tokens): - database_logger.error( - "Can't update token usage for {}/EMBEDDING".format(self.tenant_id)) - return emd, used_tokens - - def similarity(self, query: str, texts: list): - sim, used_tokens = self.mdl.similarity(query, texts) - if not TenantLLMService.increase_usage( - self.tenant_id, self.llm_type, used_tokens): - database_logger.error( - "Can't update token usage for {}/RERANK".format(self.tenant_id)) - return sim, used_tokens - - def describe(self, image, max_tokens=300): - txt, used_tokens = self.mdl.describe(image, max_tokens) - if not TenantLLMService.increase_usage( - self.tenant_id, self.llm_type, used_tokens): - database_logger.error( - "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id)) - return txt - - def transcription(self, audio): - txt, used_tokens = self.mdl.transcription(audio) - if not TenantLLMService.increase_usage( - self.tenant_id, self.llm_type, used_tokens): - database_logger.error( - "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id)) - return txt - - def chat(self, system, history, gen_conf): - txt, used_tokens = self.mdl.chat(system, history, gen_conf) - if not TenantLLMService.increase_usage( - self.tenant_id, self.llm_type, used_tokens, self.llm_name): - database_logger.error( - "Can't update token usage for {}/CHAT".format(self.tenant_id)) - return txt - - def chat_streamly(self, system, history, gen_conf): - for txt in self.mdl.chat_streamly(system, history, gen_conf): - if isinstance(txt, int): - if not TenantLLMService.increase_usage( - self.tenant_id, self.llm_type, txt, self.llm_name): - database_logger.error( - "Can't update token usage for {}/CHAT".format(self.tenant_id)) - return - yield txt +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from api.db.services.user_service import TenantService +from api.settings import database_logger +from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel +from api.db import LLMType +from api.db.db_models import DB, UserTenant +from api.db.db_models import LLMFactories, LLM, TenantLLM +from api.db.services.common_service import CommonService + + +class LLMFactoriesService(CommonService): + model = LLMFactories + + +class LLMService(CommonService): + model = LLM + + +class TenantLLMService(CommonService): + model = TenantLLM + + @classmethod + @DB.connection_context() + def get_api_key(cls, tenant_id, model_name): + objs = cls.query(tenant_id=tenant_id, llm_name=model_name) + if not objs: + return + return objs[0] + + @classmethod + @DB.connection_context() + def get_my_llms(cls, tenant_id): + fields = [ + cls.model.llm_factory, + LLMFactories.logo, + LLMFactories.tags, + cls.model.model_type, + cls.model.llm_name, + cls.model.used_tokens + ] + objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where( + cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts() + + return list(objs) + + @classmethod + @DB.connection_context() + def model_instance(cls, tenant_id, llm_type, + llm_name=None, lang="Chinese"): + e, tenant = TenantService.get_by_id(tenant_id) + if not e: + raise LookupError("Tenant not found") + + if llm_type == LLMType.EMBEDDING.value: + mdlnm = tenant.embd_id if not llm_name else llm_name + elif llm_type == LLMType.SPEECH2TEXT.value: + mdlnm = tenant.asr_id + elif llm_type == LLMType.IMAGE2TEXT.value: + mdlnm = tenant.img2txt_id if not llm_name else llm_name + elif llm_type == LLMType.CHAT.value: + mdlnm = tenant.llm_id if not llm_name else llm_name + elif llm_type == LLMType.RERANK: + mdlnm = tenant.rerank_id if not llm_name else llm_name + else: + assert False, "LLM type error" + + model_config = cls.get_api_key(tenant_id, mdlnm) + if model_config: model_config = model_config.to_dict() + if not model_config: + if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]: + llm = LLMService.query(llm_name=llm_name if llm_name else mdlnm) + if llm and llm[0].fid in ["Youdao", "FastEmbed", "BAAI"]: + model_config = {"llm_factory": llm[0].fid, "api_key":"", "llm_name": llm_name if llm_name else mdlnm, "api_base": ""} + if not model_config: + if llm_name == "flag-embedding": + model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "", + "llm_name": llm_name, "api_base": ""} + else: + if not mdlnm: + raise LookupError(f"Type of {llm_type} model is not set.") + raise LookupError("Model({}) not authorized".format(mdlnm)) + + if llm_type == LLMType.EMBEDDING.value: + if model_config["llm_factory"] not in EmbeddingModel: + return + return EmbeddingModel[model_config["llm_factory"]]( + model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"]) + + if llm_type == LLMType.RERANK: + if model_config["llm_factory"] not in RerankModel: + return + return RerankModel[model_config["llm_factory"]]( + model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"]) + + if llm_type == LLMType.IMAGE2TEXT.value: + if model_config["llm_factory"] not in CvModel: + return + return CvModel[model_config["llm_factory"]]( + model_config["api_key"], model_config["llm_name"], lang, + base_url=model_config["api_base"] + ) + + if llm_type == LLMType.CHAT.value: + if model_config["llm_factory"] not in ChatModel: + return + return ChatModel[model_config["llm_factory"]]( + model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"]) + + if llm_type == LLMType.SPEECH2TEXT: + if model_config["llm_factory"] not in Seq2txtModel: + return + return Seq2txtModel[model_config["llm_factory"]]( + model_config["api_key"], model_config["llm_name"], lang, + base_url=model_config["api_base"] + ) + + @classmethod + @DB.connection_context() + def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None): + e, tenant = TenantService.get_by_id(tenant_id) + if not e: + raise LookupError("Tenant not found") + + if llm_type == LLMType.EMBEDDING.value: + mdlnm = tenant.embd_id + elif llm_type == LLMType.SPEECH2TEXT.value: + mdlnm = tenant.asr_id + elif llm_type == LLMType.IMAGE2TEXT.value: + mdlnm = tenant.img2txt_id + elif llm_type == LLMType.CHAT.value: + mdlnm = tenant.llm_id if not llm_name else llm_name + elif llm_type == LLMType.RERANK: + mdlnm = tenant.llm_id if not llm_name else llm_name + else: + assert False, "LLM type error" + + num = 0 + try: + for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm): + num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\ + .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\ + .execute() + except Exception as e: + pass + return num + + @classmethod + @DB.connection_context() + def get_openai_models(cls): + objs = cls.model.select().where( + (cls.model.llm_factory == "OpenAI"), + ~(cls.model.llm_name == "text-embedding-3-small"), + ~(cls.model.llm_name == "text-embedding-3-large") + ).dicts() + return list(objs) + + +class LLMBundle(object): + def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"): + self.tenant_id = tenant_id + self.llm_type = llm_type + self.llm_name = llm_name + self.mdl = TenantLLMService.model_instance( + tenant_id, llm_type, llm_name, lang=lang) + assert self.mdl, "Can't find mole for {}/{}/{}".format( + tenant_id, llm_type, llm_name) + self.max_length = 512 + for lm in LLMService.query(llm_name=llm_name): + self.max_length = lm.max_tokens + break + + def encode(self, texts: list, batch_size=32): + emd, used_tokens = self.mdl.encode(texts, batch_size) + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, used_tokens): + database_logger.error( + "Can't update token usage for {}/EMBEDDING".format(self.tenant_id)) + return emd, used_tokens + + def encode_queries(self, query: str): + emd, used_tokens = self.mdl.encode_queries(query) + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, used_tokens): + database_logger.error( + "Can't update token usage for {}/EMBEDDING".format(self.tenant_id)) + return emd, used_tokens + + def similarity(self, query: str, texts: list): + sim, used_tokens = self.mdl.similarity(query, texts) + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, used_tokens): + database_logger.error( + "Can't update token usage for {}/RERANK".format(self.tenant_id)) + return sim, used_tokens + + def describe(self, image, max_tokens=300): + txt, used_tokens = self.mdl.describe(image, max_tokens) + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, used_tokens): + database_logger.error( + "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id)) + return txt + + def transcription(self, audio): + txt, used_tokens = self.mdl.transcription(audio) + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, used_tokens): + database_logger.error( + "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id)) + return txt + + def chat(self, system, history, gen_conf): + txt, used_tokens = self.mdl.chat(system, history, gen_conf) + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, used_tokens, self.llm_name): + database_logger.error( + "Can't update token usage for {}/CHAT".format(self.tenant_id)) + return txt + + def chat_streamly(self, system, history, gen_conf): + for txt in self.mdl.chat_streamly(system, history, gen_conf): + if isinstance(txt, int): + if not TenantLLMService.increase_usage( + self.tenant_id, self.llm_type, txt, self.llm_name): + database_logger.error( + "Can't update token usage for {}/CHAT".format(self.tenant_id)) + return + yield txt diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py index 33565bcb87a61e37eef8280adfc407fe61246a7e..545bd81be519e919382b25a5454c6583c956429a 100644 --- a/api/db/services/task_service.py +++ b/api/db/services/task_service.py @@ -1,175 +1,175 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import random - -from api.db.db_utils import bulk_insert_into_db -from deepdoc.parser import PdfParser -from peewee import JOIN -from api.db.db_models import DB, File2Document, File -from api.db import StatusEnum, FileType, TaskStatus -from api.db.db_models import Task, Document, Knowledgebase, Tenant -from api.db.services.common_service import CommonService -from api.db.services.document_service import DocumentService -from api.utils import current_timestamp, get_uuid -from deepdoc.parser.excel_parser import RAGFlowExcelParser -from rag.settings import SVR_QUEUE_NAME -from rag.utils.minio_conn import MINIO -from rag.utils.redis_conn import REDIS_CONN - - -class TaskService(CommonService): - model = Task - - @classmethod - @DB.connection_context() - def get_tasks(cls, task_id): - fields = [ - cls.model.id, - cls.model.doc_id, - cls.model.from_page, - cls.model.to_page, - Document.kb_id, - Document.parser_id, - Document.parser_config, - Document.name, - Document.type, - Document.location, - Document.size, - Knowledgebase.tenant_id, - Knowledgebase.language, - Knowledgebase.embd_id, - Tenant.img2txt_id, - Tenant.asr_id, - Tenant.llm_id, - cls.model.update_time] - docs = cls.model.select(*fields) \ - .join(Document, on=(cls.model.doc_id == Document.id)) \ - .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \ - .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \ - .where(cls.model.id == task_id) - docs = list(docs.dicts()) - if not docs: return [] - - cls.model.update(progress_msg=cls.model.progress_msg + "\n" + "Task has been received.", - progress=random.random() / 10.).where( - cls.model.id == docs[0]["id"]).execute() - return docs - - @classmethod - @DB.connection_context() - def get_ongoing_doc_name(cls): - with DB.lock("get_task", -1): - docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \ - .join(Document, on=(cls.model.doc_id == Document.id)) \ - .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \ - .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \ - .where( - Document.status == StatusEnum.VALID.value, - Document.run == TaskStatus.RUNNING.value, - ~(Document.type == FileType.VIRTUAL.value), - cls.model.progress < 1, - cls.model.create_time >= current_timestamp() - 1000 * 600 - ) - docs = list(docs.dicts()) - if not docs: return [] - - return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs])) - - @classmethod - @DB.connection_context() - def do_cancel(cls, id): - try: - task = cls.model.get_by_id(id) - _, doc = DocumentService.get_by_id(task.doc_id) - return doc.run == TaskStatus.CANCEL.value or doc.progress < 0 - except Exception as e: - pass - return False - - @classmethod - @DB.connection_context() - def update_progress(cls, id, info): - if os.environ.get("MACOS"): - if info["progress_msg"]: - cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where( - cls.model.id == id).execute() - if "progress" in info: - cls.model.update(progress=info["progress"]).where( - cls.model.id == id).execute() - return - - with DB.lock("update_progress", -1): - if info["progress_msg"]: - cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where( - cls.model.id == id).execute() - if "progress" in info: - cls.model.update(progress=info["progress"]).where( - cls.model.id == id).execute() - - -def queue_tasks(doc, bucket, name): - def new_task(): - nonlocal doc - return { - "id": get_uuid(), - "doc_id": doc["id"] - } - tsks = [] - - if doc["type"] == FileType.PDF.value: - file_bin = MINIO.get(bucket, name) - do_layout = doc["parser_config"].get("layout_recognize", True) - pages = PdfParser.total_page_number(doc["name"], file_bin) - page_size = doc["parser_config"].get("task_page_size", 12) - if doc["parser_id"] == "paper": - page_size = doc["parser_config"].get("task_page_size", 22) - if doc["parser_id"] == "one": - page_size = 1000000000 - if doc["parser_id"] == "knowledge_graph": - page_size = 1000000000 - if not do_layout: - page_size = 1000000000 - page_ranges = doc["parser_config"].get("pages") - if not page_ranges: - page_ranges = [(1, 100000)] - for s, e in page_ranges: - s -= 1 - s = max(0, s) - e = min(e - 1, pages) - for p in range(s, e, page_size): - task = new_task() - task["from_page"] = p - task["to_page"] = min(p + page_size, e) - tsks.append(task) - - elif doc["parser_id"] == "table": - file_bin = MINIO.get(bucket, name) - rn = RAGFlowExcelParser.row_number( - doc["name"], file_bin) - for i in range(0, rn, 3000): - task = new_task() - task["from_page"] = i - task["to_page"] = min(i + 3000, rn) - tsks.append(task) - else: - tsks.append(new_task()) - - bulk_insert_into_db(Task, tsks, True) - DocumentService.begin2parse(doc["id"]) - - for t in tsks: - assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status." +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import random + +from api.db.db_utils import bulk_insert_into_db +from deepdoc.parser import PdfParser +from peewee import JOIN +from api.db.db_models import DB, File2Document, File +from api.db import StatusEnum, FileType, TaskStatus +from api.db.db_models import Task, Document, Knowledgebase, Tenant +from api.db.services.common_service import CommonService +from api.db.services.document_service import DocumentService +from api.utils import current_timestamp, get_uuid +from deepdoc.parser.excel_parser import RAGFlowExcelParser +from rag.settings import SVR_QUEUE_NAME +from rag.utils.minio_conn import MINIO +from rag.utils.redis_conn import REDIS_CONN + + +class TaskService(CommonService): + model = Task + + @classmethod + @DB.connection_context() + def get_tasks(cls, task_id): + fields = [ + cls.model.id, + cls.model.doc_id, + cls.model.from_page, + cls.model.to_page, + Document.kb_id, + Document.parser_id, + Document.parser_config, + Document.name, + Document.type, + Document.location, + Document.size, + Knowledgebase.tenant_id, + Knowledgebase.language, + Knowledgebase.embd_id, + Tenant.img2txt_id, + Tenant.asr_id, + Tenant.llm_id, + cls.model.update_time] + docs = cls.model.select(*fields) \ + .join(Document, on=(cls.model.doc_id == Document.id)) \ + .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \ + .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \ + .where(cls.model.id == task_id) + docs = list(docs.dicts()) + if not docs: return [] + + cls.model.update(progress_msg=cls.model.progress_msg + "\n" + "Task has been received.", + progress=random.random() / 10.).where( + cls.model.id == docs[0]["id"]).execute() + return docs + + @classmethod + @DB.connection_context() + def get_ongoing_doc_name(cls): + with DB.lock("get_task", -1): + docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \ + .join(Document, on=(cls.model.doc_id == Document.id)) \ + .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \ + .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \ + .where( + Document.status == StatusEnum.VALID.value, + Document.run == TaskStatus.RUNNING.value, + ~(Document.type == FileType.VIRTUAL.value), + cls.model.progress < 1, + cls.model.create_time >= current_timestamp() - 1000 * 600 + ) + docs = list(docs.dicts()) + if not docs: return [] + + return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs])) + + @classmethod + @DB.connection_context() + def do_cancel(cls, id): + try: + task = cls.model.get_by_id(id) + _, doc = DocumentService.get_by_id(task.doc_id) + return doc.run == TaskStatus.CANCEL.value or doc.progress < 0 + except Exception as e: + pass + return False + + @classmethod + @DB.connection_context() + def update_progress(cls, id, info): + if os.environ.get("MACOS"): + if info["progress_msg"]: + cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where( + cls.model.id == id).execute() + if "progress" in info: + cls.model.update(progress=info["progress"]).where( + cls.model.id == id).execute() + return + + with DB.lock("update_progress", -1): + if info["progress_msg"]: + cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where( + cls.model.id == id).execute() + if "progress" in info: + cls.model.update(progress=info["progress"]).where( + cls.model.id == id).execute() + + +def queue_tasks(doc, bucket, name): + def new_task(): + nonlocal doc + return { + "id": get_uuid(), + "doc_id": doc["id"] + } + tsks = [] + + if doc["type"] == FileType.PDF.value: + file_bin = MINIO.get(bucket, name) + do_layout = doc["parser_config"].get("layout_recognize", True) + pages = PdfParser.total_page_number(doc["name"], file_bin) + page_size = doc["parser_config"].get("task_page_size", 12) + if doc["parser_id"] == "paper": + page_size = doc["parser_config"].get("task_page_size", 22) + if doc["parser_id"] == "one": + page_size = 1000000000 + if doc["parser_id"] == "knowledge_graph": + page_size = 1000000000 + if not do_layout: + page_size = 1000000000 + page_ranges = doc["parser_config"].get("pages") + if not page_ranges: + page_ranges = [(1, 100000)] + for s, e in page_ranges: + s -= 1 + s = max(0, s) + e = min(e - 1, pages) + for p in range(s, e, page_size): + task = new_task() + task["from_page"] = p + task["to_page"] = min(p + page_size, e) + tsks.append(task) + + elif doc["parser_id"] == "table": + file_bin = MINIO.get(bucket, name) + rn = RAGFlowExcelParser.row_number( + doc["name"], file_bin) + for i in range(0, rn, 3000): + task = new_task() + task["from_page"] = i + task["to_page"] = min(i + 3000, rn) + tsks.append(task) + else: + tsks.append(new_task()) + + bulk_insert_into_db(Task, tsks, True) + DocumentService.begin2parse(doc["id"]) + + for t in tsks: + assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status." diff --git a/api/ragflow_server.py b/api/ragflow_server.py index 8878817a345aec9d1ca2e3f3eeec4c454f45348c..b0c0ee1dea988e570a47ff09c6018fbaa8cb3604 100644 --- a/api/ragflow_server.py +++ b/api/ragflow_server.py @@ -1,100 +1,100 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import logging -import os -import signal -import sys -import time -import traceback -from concurrent.futures import ThreadPoolExecutor - -from werkzeug.serving import run_simple -from api.apps import app -from api.db.runtime_config import RuntimeConfig -from api.db.services.document_service import DocumentService -from api.settings import ( - HOST, HTTP_PORT, access_logger, database_logger, stat_logger, -) -from api import utils - -from api.db.db_models import init_database_tables as init_web_db -from api.db.init_data import init_web_data -from api.versions import get_versions - - -def update_progress(): - while True: - time.sleep(1) - try: - DocumentService.update_progress() - except Exception as e: - stat_logger.error("update_progress exception:" + str(e)) - - -if __name__ == '__main__': - print(""" - ____ ______ __ - / __ \ ____ _ ____ _ / ____// /____ _ __ - / /_/ // __ `// __ `// /_ / // __ \| | /| / / - / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ / -/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/ - /____/ - - """, flush=True) - stat_logger.info( - f'project base: {utils.file_utils.get_project_base_directory()}' - ) - - # init db - init_web_db() - init_web_data() - # init runtime config - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--version', default=False, help="rag flow version", action='store_true') - parser.add_argument('--debug', default=False, help="debug mode", action='store_true') - args = parser.parse_args() - if args.version: - print(get_versions()) - sys.exit(0) - - RuntimeConfig.DEBUG = args.debug - if RuntimeConfig.DEBUG: - stat_logger.info("run on debug mode") - - RuntimeConfig.init_env() - RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT) - - peewee_logger = logging.getLogger('peewee') - peewee_logger.propagate = False - # rag_arch.common.log.ROpenHandler - peewee_logger.addHandler(database_logger.handlers[0]) - peewee_logger.setLevel(database_logger.level) - - thr = ThreadPoolExecutor(max_workers=1) - thr.submit(update_progress) - - # start http server - try: - stat_logger.info("RAG Flow http server start...") - werkzeug_logger = logging.getLogger("werkzeug") - for h in access_logger.handlers: - werkzeug_logger.addHandler(h) - run_simple(hostname=HOST, port=HTTP_PORT, application=app, threaded=True, use_reloader=RuntimeConfig.DEBUG, use_debugger=RuntimeConfig.DEBUG) - except Exception: - traceback.print_exc() +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import os +import signal +import sys +import time +import traceback +from concurrent.futures import ThreadPoolExecutor + +from werkzeug.serving import run_simple +from api.apps import app +from api.db.runtime_config import RuntimeConfig +from api.db.services.document_service import DocumentService +from api.settings import ( + HOST, HTTP_PORT, access_logger, database_logger, stat_logger, +) +from api import utils + +from api.db.db_models import init_database_tables as init_web_db +from api.db.init_data import init_web_data +from api.versions import get_versions + + +def update_progress(): + while True: + time.sleep(1) + try: + DocumentService.update_progress() + except Exception as e: + stat_logger.error("update_progress exception:" + str(e)) + + +if __name__ == '__main__': + print(""" + ____ ______ __ + / __ \ ____ _ ____ _ / ____// /____ _ __ + / /_/ // __ `// __ `// /_ / // __ \| | /| / / + / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ / +/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/ + /____/ + + """, flush=True) + stat_logger.info( + f'project base: {utils.file_utils.get_project_base_directory()}' + ) + + # init db + init_web_db() + init_web_data() + # init runtime config + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--version', default=False, help="rag flow version", action='store_true') + parser.add_argument('--debug', default=False, help="debug mode", action='store_true') + args = parser.parse_args() + if args.version: + print(get_versions()) + sys.exit(0) + + RuntimeConfig.DEBUG = args.debug + if RuntimeConfig.DEBUG: + stat_logger.info("run on debug mode") + + RuntimeConfig.init_env() + RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT) + + peewee_logger = logging.getLogger('peewee') + peewee_logger.propagate = False + # rag_arch.common.log.ROpenHandler + peewee_logger.addHandler(database_logger.handlers[0]) + peewee_logger.setLevel(database_logger.level) + + thr = ThreadPoolExecutor(max_workers=1) + thr.submit(update_progress) + + # start http server + try: + stat_logger.info("RAG Flow http server start...") + werkzeug_logger = logging.getLogger("werkzeug") + for h in access_logger.handlers: + werkzeug_logger.addHandler(h) + run_simple(hostname=HOST, port=HTTP_PORT, application=app, threaded=True, use_reloader=RuntimeConfig.DEBUG, use_debugger=RuntimeConfig.DEBUG) + except Exception: + traceback.print_exc() os.kill(os.getpid(), signal.SIGKILL) \ No newline at end of file diff --git a/api/settings.py b/api/settings.py index 6efc46ab7cc35186cb22e2d71673caa3083e67d2..95bf1961383135276e4657ae4b9c9dc65a113d1c 100644 --- a/api/settings.py +++ b/api/settings.py @@ -1,251 +1,251 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -from enum import IntEnum, Enum -from api.utils.file_utils import get_project_base_directory -from api.utils.log_utils import LoggerFactory, getLogger - -# Logger -LoggerFactory.set_directory( - os.path.join( - get_project_base_directory(), - "logs", - "api")) -# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0} -LoggerFactory.LEVEL = 30 - -stat_logger = getLogger("stat") -access_logger = getLogger("access") -database_logger = getLogger("database") -chat_logger = getLogger("chat") - -from rag.utils.es_conn import ELASTICSEARCH -from rag.nlp import search -from graphrag import search as kg_search -from api.utils import get_base_config, decrypt_database_config - -API_VERSION = "v1" -RAG_FLOW_SERVICE_NAME = "ragflow" -SERVER_MODULE = "rag_flow_server.py" -TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp") -RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf") - -SUBPROCESS_STD_LOG_NAME = "std.log" - -ERROR_REPORT = True -ERROR_REPORT_WITH_PATH = False - -MAX_TIMESTAMP_INTERVAL = 60 -SESSION_VALID_PERIOD = 7 * 24 * 60 * 60 - -REQUEST_TRY_TIMES = 3 -REQUEST_WAIT_SEC = 2 -REQUEST_MAX_WAIT_SEC = 300 - -USE_REGISTRY = get_base_config("use_registry") - -default_llm = { - "Tongyi-Qianwen": { - "chat_model": "qwen-plus", - "embedding_model": "text-embedding-v2", - "image2text_model": "qwen-vl-max", - "asr_model": "paraformer-realtime-8k-v1", - }, - "OpenAI": { - "chat_model": "gpt-3.5-turbo", - "embedding_model": "text-embedding-ada-002", - "image2text_model": "gpt-4-vision-preview", - "asr_model": "whisper-1", - }, - "Azure-OpenAI": { - "chat_model": "azure-gpt-35-turbo", - "embedding_model": "azure-text-embedding-ada-002", - "image2text_model": "azure-gpt-4-vision-preview", - "asr_model": "azure-whisper-1", - }, - "ZHIPU-AI": { - "chat_model": "glm-3-turbo", - "embedding_model": "embedding-2", - "image2text_model": "glm-4v", - "asr_model": "", - }, - "Ollama": { - "chat_model": "qwen-14B-chat", - "embedding_model": "flag-embedding", - "image2text_model": "", - "asr_model": "", - }, - "Moonshot": { - "chat_model": "moonshot-v1-8k", - "embedding_model": "", - "image2text_model": "", - "asr_model": "", - }, - "DeepSeek": { - "chat_model": "deepseek-chat", - "embedding_model": "", - "image2text_model": "", - "asr_model": "", - }, - "VolcEngine": { - "chat_model": "", - "embedding_model": "", - "image2text_model": "", - "asr_model": "", - }, - "BAAI": { - "chat_model": "", - "embedding_model": "BAAI/bge-large-zh-v1.5", - "image2text_model": "", - "asr_model": "", - "rerank_model": "BAAI/bge-reranker-v2-m3", - } -} -LLM = get_base_config("user_default_llm", {}) -LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen") -LLM_BASE_URL = LLM.get("base_url") - -if LLM_FACTORY not in default_llm: - print( - "\33[91m【ERROR】\33[0m:", - f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.") - LLM_FACTORY = "Tongyi-Qianwen" -CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"] -EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"] -RERANK_MDL = default_llm["BAAI"]["rerank_model"] -ASR_MDL = default_llm[LLM_FACTORY]["asr_model"] -IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"] - -API_KEY = LLM.get("api_key", "") -PARSERS = LLM.get( - "parsers", - "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email") - -# distribution -DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False) -RAG_FLOW_UPDATE_CHECK = False - -HOST = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1") -HTTP_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port") - -SECRET_KEY = get_base_config( - RAG_FLOW_SERVICE_NAME, - {}).get( - "secret_key", - "infiniflow") -TOKEN_EXPIRE_IN = get_base_config( - RAG_FLOW_SERVICE_NAME, {}).get( - "token_expires_in", 3600) - -NGINX_HOST = get_base_config( - RAG_FLOW_SERVICE_NAME, {}).get( - "nginx", {}).get("host") or HOST -NGINX_HTTP_PORT = get_base_config( - RAG_FLOW_SERVICE_NAME, {}).get( - "nginx", {}).get("http_port") or HTTP_PORT - -RANDOM_INSTANCE_ID = get_base_config( - RAG_FLOW_SERVICE_NAME, {}).get( - "random_instance_id", False) - -PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy") -PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol") - -DATABASE = decrypt_database_config(name="mysql") - -# Switch -# upload -UPLOAD_DATA_FROM_CLIENT = True - -# authentication -AUTHENTICATION_CONF = get_base_config("authentication", {}) - -# client -CLIENT_AUTHENTICATION = AUTHENTICATION_CONF.get( - "client", {}).get( - "switch", False) -HTTP_APP_KEY = AUTHENTICATION_CONF.get("client", {}).get("http_app_key") -GITHUB_OAUTH = get_base_config("oauth", {}).get("github") -FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu") -WECHAT_OAUTH = get_base_config("oauth", {}).get("wechat") - -# site -SITE_AUTHENTICATION = AUTHENTICATION_CONF.get("site", {}).get("switch", False) - -# permission -PERMISSION_CONF = get_base_config("permission", {}) -PERMISSION_SWITCH = PERMISSION_CONF.get("switch") -COMPONENT_PERMISSION = PERMISSION_CONF.get("component") -DATASET_PERMISSION = PERMISSION_CONF.get("dataset") - -HOOK_MODULE = get_base_config("hook_module") -HOOK_SERVER_NAME = get_base_config("hook_server_name") - -ENABLE_MODEL_STORE = get_base_config('enable_model_store', False) -# authentication -USE_AUTHENTICATION = False -USE_DATA_AUTHENTICATION = False -AUTOMATIC_AUTHORIZATION_OUTPUT_DATA = True -USE_DEFAULT_TIMEOUT = False -AUTHENTICATION_DEFAULT_TIMEOUT = 7 * 24 * 60 * 60 # s -PRIVILEGE_COMMAND_WHITELIST = [] -CHECK_NODES_IDENTITY = False - -retrievaler = search.Dealer(ELASTICSEARCH) -kg_retrievaler = kg_search.KGSearch(ELASTICSEARCH) - - -class CustomEnum(Enum): - @classmethod - def valid(cls, value): - try: - cls(value) - return True - except BaseException: - return False - - @classmethod - def values(cls): - return [member.value for member in cls.__members__.values()] - - @classmethod - def names(cls): - return [member.name for member in cls.__members__.values()] - - -class PythonDependenceName(CustomEnum): - Rag_Source_Code = "python" - Python_Env = "miniconda" - - -class ModelStorage(CustomEnum): - REDIS = "redis" - MYSQL = "mysql" - - -class RetCode(IntEnum, CustomEnum): - SUCCESS = 0 - NOT_EFFECTIVE = 10 - EXCEPTION_ERROR = 100 - ARGUMENT_ERROR = 101 - DATA_ERROR = 102 - OPERATING_ERROR = 103 - CONNECTION_ERROR = 105 - RUNNING = 106 - PERMISSION_ERROR = 108 - AUTHENTICATION_ERROR = 109 - UNAUTHORIZED = 401 - SERVER_ERROR = 500 +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +from enum import IntEnum, Enum +from api.utils.file_utils import get_project_base_directory +from api.utils.log_utils import LoggerFactory, getLogger + +# Logger +LoggerFactory.set_directory( + os.path.join( + get_project_base_directory(), + "logs", + "api")) +# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0} +LoggerFactory.LEVEL = 30 + +stat_logger = getLogger("stat") +access_logger = getLogger("access") +database_logger = getLogger("database") +chat_logger = getLogger("chat") + +from rag.utils.es_conn import ELASTICSEARCH +from rag.nlp import search +from graphrag import search as kg_search +from api.utils import get_base_config, decrypt_database_config + +API_VERSION = "v1" +RAG_FLOW_SERVICE_NAME = "ragflow" +SERVER_MODULE = "rag_flow_server.py" +TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp") +RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf") + +SUBPROCESS_STD_LOG_NAME = "std.log" + +ERROR_REPORT = True +ERROR_REPORT_WITH_PATH = False + +MAX_TIMESTAMP_INTERVAL = 60 +SESSION_VALID_PERIOD = 7 * 24 * 60 * 60 + +REQUEST_TRY_TIMES = 3 +REQUEST_WAIT_SEC = 2 +REQUEST_MAX_WAIT_SEC = 300 + +USE_REGISTRY = get_base_config("use_registry") + +default_llm = { + "Tongyi-Qianwen": { + "chat_model": "qwen-plus", + "embedding_model": "text-embedding-v2", + "image2text_model": "qwen-vl-max", + "asr_model": "paraformer-realtime-8k-v1", + }, + "OpenAI": { + "chat_model": "gpt-3.5-turbo", + "embedding_model": "text-embedding-ada-002", + "image2text_model": "gpt-4-vision-preview", + "asr_model": "whisper-1", + }, + "Azure-OpenAI": { + "chat_model": "azure-gpt-35-turbo", + "embedding_model": "azure-text-embedding-ada-002", + "image2text_model": "azure-gpt-4-vision-preview", + "asr_model": "azure-whisper-1", + }, + "ZHIPU-AI": { + "chat_model": "glm-3-turbo", + "embedding_model": "embedding-2", + "image2text_model": "glm-4v", + "asr_model": "", + }, + "Ollama": { + "chat_model": "qwen-14B-chat", + "embedding_model": "flag-embedding", + "image2text_model": "", + "asr_model": "", + }, + "Moonshot": { + "chat_model": "moonshot-v1-8k", + "embedding_model": "", + "image2text_model": "", + "asr_model": "", + }, + "DeepSeek": { + "chat_model": "deepseek-chat", + "embedding_model": "", + "image2text_model": "", + "asr_model": "", + }, + "VolcEngine": { + "chat_model": "", + "embedding_model": "", + "image2text_model": "", + "asr_model": "", + }, + "BAAI": { + "chat_model": "", + "embedding_model": "BAAI/bge-large-zh-v1.5", + "image2text_model": "", + "asr_model": "", + "rerank_model": "BAAI/bge-reranker-v2-m3", + } +} +LLM = get_base_config("user_default_llm", {}) +LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen") +LLM_BASE_URL = LLM.get("base_url") + +if LLM_FACTORY not in default_llm: + print( + "\33[91m【ERROR】\33[0m:", + f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.") + LLM_FACTORY = "Tongyi-Qianwen" +CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"] +EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"] +RERANK_MDL = default_llm["BAAI"]["rerank_model"] +ASR_MDL = default_llm[LLM_FACTORY]["asr_model"] +IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"] + +API_KEY = LLM.get("api_key", "") +PARSERS = LLM.get( + "parsers", + "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email") + +# distribution +DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False) +RAG_FLOW_UPDATE_CHECK = False + +HOST = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1") +HTTP_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port") + +SECRET_KEY = get_base_config( + RAG_FLOW_SERVICE_NAME, + {}).get( + "secret_key", + "infiniflow") +TOKEN_EXPIRE_IN = get_base_config( + RAG_FLOW_SERVICE_NAME, {}).get( + "token_expires_in", 3600) + +NGINX_HOST = get_base_config( + RAG_FLOW_SERVICE_NAME, {}).get( + "nginx", {}).get("host") or HOST +NGINX_HTTP_PORT = get_base_config( + RAG_FLOW_SERVICE_NAME, {}).get( + "nginx", {}).get("http_port") or HTTP_PORT + +RANDOM_INSTANCE_ID = get_base_config( + RAG_FLOW_SERVICE_NAME, {}).get( + "random_instance_id", False) + +PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy") +PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol") + +DATABASE = decrypt_database_config(name="mysql") + +# Switch +# upload +UPLOAD_DATA_FROM_CLIENT = True + +# authentication +AUTHENTICATION_CONF = get_base_config("authentication", {}) + +# client +CLIENT_AUTHENTICATION = AUTHENTICATION_CONF.get( + "client", {}).get( + "switch", False) +HTTP_APP_KEY = AUTHENTICATION_CONF.get("client", {}).get("http_app_key") +GITHUB_OAUTH = get_base_config("oauth", {}).get("github") +FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu") +WECHAT_OAUTH = get_base_config("oauth", {}).get("wechat") + +# site +SITE_AUTHENTICATION = AUTHENTICATION_CONF.get("site", {}).get("switch", False) + +# permission +PERMISSION_CONF = get_base_config("permission", {}) +PERMISSION_SWITCH = PERMISSION_CONF.get("switch") +COMPONENT_PERMISSION = PERMISSION_CONF.get("component") +DATASET_PERMISSION = PERMISSION_CONF.get("dataset") + +HOOK_MODULE = get_base_config("hook_module") +HOOK_SERVER_NAME = get_base_config("hook_server_name") + +ENABLE_MODEL_STORE = get_base_config('enable_model_store', False) +# authentication +USE_AUTHENTICATION = False +USE_DATA_AUTHENTICATION = False +AUTOMATIC_AUTHORIZATION_OUTPUT_DATA = True +USE_DEFAULT_TIMEOUT = False +AUTHENTICATION_DEFAULT_TIMEOUT = 7 * 24 * 60 * 60 # s +PRIVILEGE_COMMAND_WHITELIST = [] +CHECK_NODES_IDENTITY = False + +retrievaler = search.Dealer(ELASTICSEARCH) +kg_retrievaler = kg_search.KGSearch(ELASTICSEARCH) + + +class CustomEnum(Enum): + @classmethod + def valid(cls, value): + try: + cls(value) + return True + except BaseException: + return False + + @classmethod + def values(cls): + return [member.value for member in cls.__members__.values()] + + @classmethod + def names(cls): + return [member.name for member in cls.__members__.values()] + + +class PythonDependenceName(CustomEnum): + Rag_Source_Code = "python" + Python_Env = "miniconda" + + +class ModelStorage(CustomEnum): + REDIS = "redis" + MYSQL = "mysql" + + +class RetCode(IntEnum, CustomEnum): + SUCCESS = 0 + NOT_EFFECTIVE = 10 + EXCEPTION_ERROR = 100 + ARGUMENT_ERROR = 101 + DATA_ERROR = 102 + OPERATING_ERROR = 103 + CONNECTION_ERROR = 105 + RUNNING = 106 + PERMISSION_ERROR = 108 + AUTHENTICATION_ERROR = 109 + UNAUTHORIZED = 401 + SERVER_ERROR = 500 diff --git a/api/utils/__init__.py b/api/utils/__init__.py index 65c6b31e9788191cd104f07af51ac30c8eed3649..96b085c2074fd9918e8735de8aedca94fdaf978e 100644 --- a/api/utils/__init__.py +++ b/api/utils/__init__.py @@ -1,346 +1,346 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import base64 -import datetime -import io -import json -import os -import pickle -import socket -import time -import uuid -import requests -from enum import Enum, IntEnum -import importlib -from Cryptodome.PublicKey import RSA -from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5 - -from filelock import FileLock - -from . import file_utils - -SERVICE_CONF = "service_conf.yaml" - - -def conf_realpath(conf_name): - conf_path = f"conf/{conf_name}" - return os.path.join(file_utils.get_project_base_directory(), conf_path) - - -def get_base_config(key, default=None, conf_name=SERVICE_CONF) -> dict: - local_config = {} - local_path = conf_realpath(f'local.{conf_name}') - if default is None: - default = os.environ.get(key.upper()) - - if os.path.exists(local_path): - local_config = file_utils.load_yaml_conf(local_path) - if not isinstance(local_config, dict): - raise ValueError(f'Invalid config file: "{local_path}".') - - if key is not None and key in local_config: - return local_config[key] - - config_path = conf_realpath(conf_name) - config = file_utils.load_yaml_conf(config_path) - - if not isinstance(config, dict): - raise ValueError(f'Invalid config file: "{config_path}".') - - config.update(local_config) - return config.get(key, default) if key is not None else config - - -use_deserialize_safe_module = get_base_config( - 'use_deserialize_safe_module', False) - - -class CoordinationCommunicationProtocol(object): - HTTP = "http" - GRPC = "grpc" - - -class BaseType: - def to_dict(self): - return dict([(k.lstrip("_"), v) for k, v in self.__dict__.items()]) - - def to_dict_with_type(self): - def _dict(obj): - module = None - if issubclass(obj.__class__, BaseType): - data = {} - for attr, v in obj.__dict__.items(): - k = attr.lstrip("_") - data[k] = _dict(v) - module = obj.__module__ - elif isinstance(obj, (list, tuple)): - data = [] - for i, vv in enumerate(obj): - data.append(_dict(vv)) - elif isinstance(obj, dict): - data = {} - for _k, vv in obj.items(): - data[_k] = _dict(vv) - else: - data = obj - return {"type": obj.__class__.__name__, - "data": data, "module": module} - return _dict(self) - - -class CustomJSONEncoder(json.JSONEncoder): - def __init__(self, **kwargs): - self._with_type = kwargs.pop("with_type", False) - super().__init__(**kwargs) - - def default(self, obj): - if isinstance(obj, datetime.datetime): - return obj.strftime('%Y-%m-%d %H:%M:%S') - elif isinstance(obj, datetime.date): - return obj.strftime('%Y-%m-%d') - elif isinstance(obj, datetime.timedelta): - return str(obj) - elif issubclass(type(obj), Enum) or issubclass(type(obj), IntEnum): - return obj.value - elif isinstance(obj, set): - return list(obj) - elif issubclass(type(obj), BaseType): - if not self._with_type: - return obj.to_dict() - else: - return obj.to_dict_with_type() - elif isinstance(obj, type): - return obj.__name__ - else: - return json.JSONEncoder.default(self, obj) - - -def rag_uuid(): - return uuid.uuid1().hex - - -def string_to_bytes(string): - return string if isinstance( - string, bytes) else string.encode(encoding="utf-8") - - -def bytes_to_string(byte): - return byte.decode(encoding="utf-8") - - -def json_dumps(src, byte=False, indent=None, with_type=False): - dest = json.dumps( - src, - indent=indent, - cls=CustomJSONEncoder, - with_type=with_type) - if byte: - dest = string_to_bytes(dest) - return dest - - -def json_loads(src, object_hook=None, object_pairs_hook=None): - if isinstance(src, bytes): - src = bytes_to_string(src) - return json.loads(src, object_hook=object_hook, - object_pairs_hook=object_pairs_hook) - - -def current_timestamp(): - return int(time.time() * 1000) - - -def timestamp_to_date(timestamp, format_string="%Y-%m-%d %H:%M:%S"): - if not timestamp: - timestamp = time.time() - timestamp = int(timestamp) / 1000 - time_array = time.localtime(timestamp) - str_date = time.strftime(format_string, time_array) - return str_date - - -def date_string_to_timestamp(time_str, format_string="%Y-%m-%d %H:%M:%S"): - time_array = time.strptime(time_str, format_string) - time_stamp = int(time.mktime(time_array) * 1000) - return time_stamp - - -def serialize_b64(src, to_str=False): - dest = base64.b64encode(pickle.dumps(src)) - if not to_str: - return dest - else: - return bytes_to_string(dest) - - -def deserialize_b64(src): - src = base64.b64decode( - string_to_bytes(src) if isinstance( - src, str) else src) - if use_deserialize_safe_module: - return restricted_loads(src) - return pickle.loads(src) - - -safe_module = { - 'numpy', - 'rag_flow' -} - - -class RestrictedUnpickler(pickle.Unpickler): - def find_class(self, module, name): - import importlib - if module.split('.')[0] in safe_module: - _module = importlib.import_module(module) - return getattr(_module, name) - # Forbid everything else. - raise pickle.UnpicklingError("global '%s.%s' is forbidden" % - (module, name)) - - -def restricted_loads(src): - """Helper function analogous to pickle.loads().""" - return RestrictedUnpickler(io.BytesIO(src)).load() - - -def get_lan_ip(): - if os.name != "nt": - import fcntl - import struct - - def get_interface_ip(ifname): - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - return socket.inet_ntoa( - fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', string_to_bytes(ifname[:15])))[20:24]) - - ip = socket.gethostbyname(socket.getfqdn()) - if ip.startswith("127.") and os.name != "nt": - interfaces = [ - "bond1", - "eth0", - "eth1", - "eth2", - "wlan0", - "wlan1", - "wifi0", - "ath0", - "ath1", - "ppp0", - ] - for ifname in interfaces: - try: - ip = get_interface_ip(ifname) - break - except IOError as e: - pass - return ip or '' - - -def from_dict_hook(in_dict: dict): - if "type" in in_dict and "data" in in_dict: - if in_dict["module"] is None: - return in_dict["data"] - else: - return getattr(importlib.import_module( - in_dict["module"]), in_dict["type"])(**in_dict["data"]) - else: - return in_dict - - -def decrypt_database_password(password): - encrypt_password = get_base_config("encrypt_password", False) - encrypt_module = get_base_config("encrypt_module", False) - private_key = get_base_config("private_key", None) - - if not password or not encrypt_password: - return password - - if not private_key: - raise ValueError("No private key") - - module_fun = encrypt_module.split("#") - pwdecrypt_fun = getattr( - importlib.import_module( - module_fun[0]), - module_fun[1]) - - return pwdecrypt_fun(private_key, password) - - -def decrypt_database_config( - database=None, passwd_key="password", name="database"): - if not database: - database = get_base_config(name, {}) - - database[passwd_key] = decrypt_database_password(database[passwd_key]) - return database - - -def update_config(key, value, conf_name=SERVICE_CONF): - conf_path = conf_realpath(conf_name=conf_name) - if not os.path.isabs(conf_path): - conf_path = os.path.join( - file_utils.get_project_base_directory(), conf_path) - - with FileLock(os.path.join(os.path.dirname(conf_path), ".lock")): - config = file_utils.load_yaml_conf(conf_path=conf_path) or {} - config[key] = value - file_utils.rewrite_yaml_conf(conf_path=conf_path, config=config) - - -def get_uuid(): - return uuid.uuid1().hex - - -def datetime_format(date_time: datetime.datetime) -> datetime.datetime: - return datetime.datetime(date_time.year, date_time.month, date_time.day, - date_time.hour, date_time.minute, date_time.second) - - -def get_format_time() -> datetime.datetime: - return datetime_format(datetime.datetime.now()) - - -def str2date(date_time: str): - return datetime.datetime.strptime(date_time, '%Y-%m-%d') - - -def elapsed2time(elapsed): - seconds = elapsed / 1000 - minuter, second = divmod(seconds, 60) - hour, minuter = divmod(minuter, 60) - return '%02d:%02d:%02d' % (hour, minuter, second) - - -def decrypt(line): - file_path = os.path.join( - file_utils.get_project_base_directory(), - "conf", - "private.pem") - rsa_key = RSA.importKey(open(file_path).read(), "Welcome") - cipher = Cipher_pkcs1_v1_5.new(rsa_key) - return cipher.decrypt(base64.b64decode( - line), "Fail to decrypt password!").decode('utf-8') - - -def download_img(url): - if not url: - return "" - response = requests.get(url) - return "data:" + \ - response.headers.get('Content-Type', 'image/jpg') + ";" + \ - "base64," + base64.b64encode(response.content).decode("utf-8") +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import base64 +import datetime +import io +import json +import os +import pickle +import socket +import time +import uuid +import requests +from enum import Enum, IntEnum +import importlib +from Cryptodome.PublicKey import RSA +from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5 + +from filelock import FileLock + +from . import file_utils + +SERVICE_CONF = "service_conf.yaml" + + +def conf_realpath(conf_name): + conf_path = f"conf/{conf_name}" + return os.path.join(file_utils.get_project_base_directory(), conf_path) + + +def get_base_config(key, default=None, conf_name=SERVICE_CONF) -> dict: + local_config = {} + local_path = conf_realpath(f'local.{conf_name}') + if default is None: + default = os.environ.get(key.upper()) + + if os.path.exists(local_path): + local_config = file_utils.load_yaml_conf(local_path) + if not isinstance(local_config, dict): + raise ValueError(f'Invalid config file: "{local_path}".') + + if key is not None and key in local_config: + return local_config[key] + + config_path = conf_realpath(conf_name) + config = file_utils.load_yaml_conf(config_path) + + if not isinstance(config, dict): + raise ValueError(f'Invalid config file: "{config_path}".') + + config.update(local_config) + return config.get(key, default) if key is not None else config + + +use_deserialize_safe_module = get_base_config( + 'use_deserialize_safe_module', False) + + +class CoordinationCommunicationProtocol(object): + HTTP = "http" + GRPC = "grpc" + + +class BaseType: + def to_dict(self): + return dict([(k.lstrip("_"), v) for k, v in self.__dict__.items()]) + + def to_dict_with_type(self): + def _dict(obj): + module = None + if issubclass(obj.__class__, BaseType): + data = {} + for attr, v in obj.__dict__.items(): + k = attr.lstrip("_") + data[k] = _dict(v) + module = obj.__module__ + elif isinstance(obj, (list, tuple)): + data = [] + for i, vv in enumerate(obj): + data.append(_dict(vv)) + elif isinstance(obj, dict): + data = {} + for _k, vv in obj.items(): + data[_k] = _dict(vv) + else: + data = obj + return {"type": obj.__class__.__name__, + "data": data, "module": module} + return _dict(self) + + +class CustomJSONEncoder(json.JSONEncoder): + def __init__(self, **kwargs): + self._with_type = kwargs.pop("with_type", False) + super().__init__(**kwargs) + + def default(self, obj): + if isinstance(obj, datetime.datetime): + return obj.strftime('%Y-%m-%d %H:%M:%S') + elif isinstance(obj, datetime.date): + return obj.strftime('%Y-%m-%d') + elif isinstance(obj, datetime.timedelta): + return str(obj) + elif issubclass(type(obj), Enum) or issubclass(type(obj), IntEnum): + return obj.value + elif isinstance(obj, set): + return list(obj) + elif issubclass(type(obj), BaseType): + if not self._with_type: + return obj.to_dict() + else: + return obj.to_dict_with_type() + elif isinstance(obj, type): + return obj.__name__ + else: + return json.JSONEncoder.default(self, obj) + + +def rag_uuid(): + return uuid.uuid1().hex + + +def string_to_bytes(string): + return string if isinstance( + string, bytes) else string.encode(encoding="utf-8") + + +def bytes_to_string(byte): + return byte.decode(encoding="utf-8") + + +def json_dumps(src, byte=False, indent=None, with_type=False): + dest = json.dumps( + src, + indent=indent, + cls=CustomJSONEncoder, + with_type=with_type) + if byte: + dest = string_to_bytes(dest) + return dest + + +def json_loads(src, object_hook=None, object_pairs_hook=None): + if isinstance(src, bytes): + src = bytes_to_string(src) + return json.loads(src, object_hook=object_hook, + object_pairs_hook=object_pairs_hook) + + +def current_timestamp(): + return int(time.time() * 1000) + + +def timestamp_to_date(timestamp, format_string="%Y-%m-%d %H:%M:%S"): + if not timestamp: + timestamp = time.time() + timestamp = int(timestamp) / 1000 + time_array = time.localtime(timestamp) + str_date = time.strftime(format_string, time_array) + return str_date + + +def date_string_to_timestamp(time_str, format_string="%Y-%m-%d %H:%M:%S"): + time_array = time.strptime(time_str, format_string) + time_stamp = int(time.mktime(time_array) * 1000) + return time_stamp + + +def serialize_b64(src, to_str=False): + dest = base64.b64encode(pickle.dumps(src)) + if not to_str: + return dest + else: + return bytes_to_string(dest) + + +def deserialize_b64(src): + src = base64.b64decode( + string_to_bytes(src) if isinstance( + src, str) else src) + if use_deserialize_safe_module: + return restricted_loads(src) + return pickle.loads(src) + + +safe_module = { + 'numpy', + 'rag_flow' +} + + +class RestrictedUnpickler(pickle.Unpickler): + def find_class(self, module, name): + import importlib + if module.split('.')[0] in safe_module: + _module = importlib.import_module(module) + return getattr(_module, name) + # Forbid everything else. + raise pickle.UnpicklingError("global '%s.%s' is forbidden" % + (module, name)) + + +def restricted_loads(src): + """Helper function analogous to pickle.loads().""" + return RestrictedUnpickler(io.BytesIO(src)).load() + + +def get_lan_ip(): + if os.name != "nt": + import fcntl + import struct + + def get_interface_ip(ifname): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + return socket.inet_ntoa( + fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', string_to_bytes(ifname[:15])))[20:24]) + + ip = socket.gethostbyname(socket.getfqdn()) + if ip.startswith("127.") and os.name != "nt": + interfaces = [ + "bond1", + "eth0", + "eth1", + "eth2", + "wlan0", + "wlan1", + "wifi0", + "ath0", + "ath1", + "ppp0", + ] + for ifname in interfaces: + try: + ip = get_interface_ip(ifname) + break + except IOError as e: + pass + return ip or '' + + +def from_dict_hook(in_dict: dict): + if "type" in in_dict and "data" in in_dict: + if in_dict["module"] is None: + return in_dict["data"] + else: + return getattr(importlib.import_module( + in_dict["module"]), in_dict["type"])(**in_dict["data"]) + else: + return in_dict + + +def decrypt_database_password(password): + encrypt_password = get_base_config("encrypt_password", False) + encrypt_module = get_base_config("encrypt_module", False) + private_key = get_base_config("private_key", None) + + if not password or not encrypt_password: + return password + + if not private_key: + raise ValueError("No private key") + + module_fun = encrypt_module.split("#") + pwdecrypt_fun = getattr( + importlib.import_module( + module_fun[0]), + module_fun[1]) + + return pwdecrypt_fun(private_key, password) + + +def decrypt_database_config( + database=None, passwd_key="password", name="database"): + if not database: + database = get_base_config(name, {}) + + database[passwd_key] = decrypt_database_password(database[passwd_key]) + return database + + +def update_config(key, value, conf_name=SERVICE_CONF): + conf_path = conf_realpath(conf_name=conf_name) + if not os.path.isabs(conf_path): + conf_path = os.path.join( + file_utils.get_project_base_directory(), conf_path) + + with FileLock(os.path.join(os.path.dirname(conf_path), ".lock")): + config = file_utils.load_yaml_conf(conf_path=conf_path) or {} + config[key] = value + file_utils.rewrite_yaml_conf(conf_path=conf_path, config=config) + + +def get_uuid(): + return uuid.uuid1().hex + + +def datetime_format(date_time: datetime.datetime) -> datetime.datetime: + return datetime.datetime(date_time.year, date_time.month, date_time.day, + date_time.hour, date_time.minute, date_time.second) + + +def get_format_time() -> datetime.datetime: + return datetime_format(datetime.datetime.now()) + + +def str2date(date_time: str): + return datetime.datetime.strptime(date_time, '%Y-%m-%d') + + +def elapsed2time(elapsed): + seconds = elapsed / 1000 + minuter, second = divmod(seconds, 60) + hour, minuter = divmod(minuter, 60) + return '%02d:%02d:%02d' % (hour, minuter, second) + + +def decrypt(line): + file_path = os.path.join( + file_utils.get_project_base_directory(), + "conf", + "private.pem") + rsa_key = RSA.importKey(open(file_path).read(), "Welcome") + cipher = Cipher_pkcs1_v1_5.new(rsa_key) + return cipher.decrypt(base64.b64decode( + line), "Fail to decrypt password!").decode('utf-8') + + +def download_img(url): + if not url: + return "" + response = requests.get(url) + return "data:" + \ + response.headers.get('Content-Type', 'image/jpg') + ";" + \ + "base64," + base64.b64encode(response.content).decode("utf-8") diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index 30ba84c85447bcfe4668da4dacb78af467127480..6d1d7c6a337695795e139ee3f1d54c4442656353 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -1,269 +1,269 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json -import random -import time -from functools import wraps -from io import BytesIO -from flask import ( - Response, jsonify, send_file, make_response, - request as flask_request, -) -from werkzeug.http import HTTP_STATUS_CODES - -from api.utils import json_dumps -from api.settings import RetCode -from api.settings import ( - REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC, - stat_logger, CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY -) -import requests -import functools -from api.utils import CustomJSONEncoder -from uuid import uuid1 -from base64 import b64encode -from hmac import HMAC -from urllib.parse import quote, urlencode - -requests.models.complexjson.dumps = functools.partial( - json.dumps, cls=CustomJSONEncoder) - - -def request(**kwargs): - sess = requests.Session() - stream = kwargs.pop('stream', sess.stream) - timeout = kwargs.pop('timeout', None) - kwargs['headers'] = { - k.replace( - '_', - '-').upper(): v for k, - v in kwargs.get( - 'headers', - {}).items()} - prepped = requests.Request(**kwargs).prepare() - - if CLIENT_AUTHENTICATION and HTTP_APP_KEY and SECRET_KEY: - timestamp = str(round(time() * 1000)) - nonce = str(uuid1()) - signature = b64encode(HMAC(SECRET_KEY.encode('ascii'), b'\n'.join([ - timestamp.encode('ascii'), - nonce.encode('ascii'), - HTTP_APP_KEY.encode('ascii'), - prepped.path_url.encode('ascii'), - prepped.body if kwargs.get('json') else b'', - urlencode( - sorted( - kwargs['data'].items()), - quote_via=quote, - safe='-._~').encode('ascii') - if kwargs.get('data') and isinstance(kwargs['data'], dict) else b'', - ]), 'sha1').digest()).decode('ascii') - - prepped.headers.update({ - 'TIMESTAMP': timestamp, - 'NONCE': nonce, - 'APP-KEY': HTTP_APP_KEY, - 'SIGNATURE': signature, - }) - - return sess.send(prepped, stream=stream, timeout=timeout) - - -def get_exponential_backoff_interval(retries, full_jitter=False): - """Calculate the exponential backoff wait time.""" - # Will be zero if factor equals 0 - countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2 ** retries)) - # Full jitter according to - # https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ - if full_jitter: - countdown = random.randrange(countdown + 1) - # Adjust according to maximum wait time and account for negative values. - return max(0, countdown) - - -def get_json_result(retcode=RetCode.SUCCESS, retmsg='success', - data=None, job_id=None, meta=None): - import re - result_dict = { - "retcode": retcode, - "retmsg": retmsg, - # "retmsg": re.sub(r"rag", "seceum", retmsg, flags=re.IGNORECASE), - "data": data, - "jobId": job_id, - "meta": meta, - } - - response = {} - for key, value in result_dict.items(): - if value is None and key != "retcode": - continue - else: - response[key] = value - return jsonify(response) - - -def get_data_error_result(retcode=RetCode.DATA_ERROR, - retmsg='Sorry! Data missing!'): - import re - result_dict = { - "retcode": retcode, - "retmsg": re.sub( - r"rag", - "seceum", - retmsg, - flags=re.IGNORECASE)} - response = {} - for key, value in result_dict.items(): - if value is None and key != "retcode": - continue - else: - response[key] = value - return jsonify(response) - - -def server_error_response(e): - stat_logger.exception(e) - try: - if e.code == 401: - return get_json_result(retcode=401, retmsg=repr(e)) - except BaseException: - pass - if len(e.args) > 1: - return get_json_result( - retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1]) - if repr(e).find("index_not_found_exception") >= 0: - return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.") - - return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e)) - - -def error_response(response_code, retmsg=None): - if retmsg is None: - retmsg = HTTP_STATUS_CODES.get(response_code, 'Unknown Error') - - return Response(json.dumps({ - 'retmsg': retmsg, - 'retcode': response_code, - }), status=response_code, mimetype='application/json') - - -def validate_request(*args, **kwargs): - def wrapper(func): - @wraps(func) - def decorated_function(*_args, **_kwargs): - input_arguments = flask_request.json or flask_request.form.to_dict() - no_arguments = [] - error_arguments = [] - for arg in args: - if arg not in input_arguments: - no_arguments.append(arg) - for k, v in kwargs.items(): - config_value = input_arguments.get(k, None) - if config_value is None: - no_arguments.append(k) - elif isinstance(v, (tuple, list)): - if config_value not in v: - error_arguments.append((k, set(v))) - elif config_value != v: - error_arguments.append((k, v)) - if no_arguments or error_arguments: - error_string = "" - if no_arguments: - error_string += "required argument are missing: {}; ".format( - ",".join(no_arguments)) - if error_arguments: - error_string += "required argument values: {}".format( - ",".join(["{}={}".format(a[0], a[1]) for a in error_arguments])) - return get_json_result( - retcode=RetCode.ARGUMENT_ERROR, retmsg=error_string) - return func(*_args, **_kwargs) - return decorated_function - return wrapper - - -def is_localhost(ip): - return ip in {'127.0.0.1', '::1', '[::1]', 'localhost'} - - -def send_file_in_mem(data, filename): - if not isinstance(data, (str, bytes)): - data = json_dumps(data) - if isinstance(data, str): - data = data.encode('utf-8') - - f = BytesIO() - f.write(data) - f.seek(0) - - return send_file(f, as_attachment=True, attachment_filename=filename) - - -def get_json_result(retcode=RetCode.SUCCESS, retmsg='success', data=None): - response = {"retcode": retcode, "retmsg": retmsg, "data": data} - return jsonify(response) - - -def cors_reponse(retcode=RetCode.SUCCESS, - retmsg='success', data=None, auth=None): - result_dict = {"retcode": retcode, "retmsg": retmsg, "data": data} - response_dict = {} - for key, value in result_dict.items(): - if value is None and key != "retcode": - continue - else: - response_dict[key] = value - response = make_response(jsonify(response_dict)) - if auth: - response.headers["Authorization"] = auth - response.headers["Access-Control-Allow-Origin"] = "*" - response.headers["Access-Control-Allow-Method"] = "*" - response.headers["Access-Control-Allow-Headers"] = "*" - response.headers["Access-Control-Allow-Headers"] = "*" - response.headers["Access-Control-Expose-Headers"] = "Authorization" - return response - -def construct_result(code=RetCode.DATA_ERROR, message='data is missing'): - import re - result_dict = {"code": code, "message": re.sub(r"rag", "seceum", message, flags=re.IGNORECASE)} - response = {} - for key, value in result_dict.items(): - if value is None and key != "code": - continue - else: - response[key] = value - return jsonify(response) - - -def construct_json_result(code=RetCode.SUCCESS, message='success', data=None): - if data is None: - return jsonify({"code": code, "message": message}) - else: - return jsonify({"code": code, "message": message, "data": data}) - - -def construct_error_response(e): - stat_logger.exception(e) - try: - if e.code == 401: - return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e)) - except BaseException: - pass - if len(e.args) > 1: - return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1]) - if repr(e).find("index_not_found_exception") >=0: - return construct_json_result(code=RetCode.EXCEPTION_ERROR, message="No chunk found, please upload file and parse it.") - - return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e)) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import random +import time +from functools import wraps +from io import BytesIO +from flask import ( + Response, jsonify, send_file, make_response, + request as flask_request, +) +from werkzeug.http import HTTP_STATUS_CODES + +from api.utils import json_dumps +from api.settings import RetCode +from api.settings import ( + REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC, + stat_logger, CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY +) +import requests +import functools +from api.utils import CustomJSONEncoder +from uuid import uuid1 +from base64 import b64encode +from hmac import HMAC +from urllib.parse import quote, urlencode + +requests.models.complexjson.dumps = functools.partial( + json.dumps, cls=CustomJSONEncoder) + + +def request(**kwargs): + sess = requests.Session() + stream = kwargs.pop('stream', sess.stream) + timeout = kwargs.pop('timeout', None) + kwargs['headers'] = { + k.replace( + '_', + '-').upper(): v for k, + v in kwargs.get( + 'headers', + {}).items()} + prepped = requests.Request(**kwargs).prepare() + + if CLIENT_AUTHENTICATION and HTTP_APP_KEY and SECRET_KEY: + timestamp = str(round(time() * 1000)) + nonce = str(uuid1()) + signature = b64encode(HMAC(SECRET_KEY.encode('ascii'), b'\n'.join([ + timestamp.encode('ascii'), + nonce.encode('ascii'), + HTTP_APP_KEY.encode('ascii'), + prepped.path_url.encode('ascii'), + prepped.body if kwargs.get('json') else b'', + urlencode( + sorted( + kwargs['data'].items()), + quote_via=quote, + safe='-._~').encode('ascii') + if kwargs.get('data') and isinstance(kwargs['data'], dict) else b'', + ]), 'sha1').digest()).decode('ascii') + + prepped.headers.update({ + 'TIMESTAMP': timestamp, + 'NONCE': nonce, + 'APP-KEY': HTTP_APP_KEY, + 'SIGNATURE': signature, + }) + + return sess.send(prepped, stream=stream, timeout=timeout) + + +def get_exponential_backoff_interval(retries, full_jitter=False): + """Calculate the exponential backoff wait time.""" + # Will be zero if factor equals 0 + countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2 ** retries)) + # Full jitter according to + # https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ + if full_jitter: + countdown = random.randrange(countdown + 1) + # Adjust according to maximum wait time and account for negative values. + return max(0, countdown) + + +def get_json_result(retcode=RetCode.SUCCESS, retmsg='success', + data=None, job_id=None, meta=None): + import re + result_dict = { + "retcode": retcode, + "retmsg": retmsg, + # "retmsg": re.sub(r"rag", "seceum", retmsg, flags=re.IGNORECASE), + "data": data, + "jobId": job_id, + "meta": meta, + } + + response = {} + for key, value in result_dict.items(): + if value is None and key != "retcode": + continue + else: + response[key] = value + return jsonify(response) + + +def get_data_error_result(retcode=RetCode.DATA_ERROR, + retmsg='Sorry! Data missing!'): + import re + result_dict = { + "retcode": retcode, + "retmsg": re.sub( + r"rag", + "seceum", + retmsg, + flags=re.IGNORECASE)} + response = {} + for key, value in result_dict.items(): + if value is None and key != "retcode": + continue + else: + response[key] = value + return jsonify(response) + + +def server_error_response(e): + stat_logger.exception(e) + try: + if e.code == 401: + return get_json_result(retcode=401, retmsg=repr(e)) + except BaseException: + pass + if len(e.args) > 1: + return get_json_result( + retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1]) + if repr(e).find("index_not_found_exception") >= 0: + return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.") + + return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e)) + + +def error_response(response_code, retmsg=None): + if retmsg is None: + retmsg = HTTP_STATUS_CODES.get(response_code, 'Unknown Error') + + return Response(json.dumps({ + 'retmsg': retmsg, + 'retcode': response_code, + }), status=response_code, mimetype='application/json') + + +def validate_request(*args, **kwargs): + def wrapper(func): + @wraps(func) + def decorated_function(*_args, **_kwargs): + input_arguments = flask_request.json or flask_request.form.to_dict() + no_arguments = [] + error_arguments = [] + for arg in args: + if arg not in input_arguments: + no_arguments.append(arg) + for k, v in kwargs.items(): + config_value = input_arguments.get(k, None) + if config_value is None: + no_arguments.append(k) + elif isinstance(v, (tuple, list)): + if config_value not in v: + error_arguments.append((k, set(v))) + elif config_value != v: + error_arguments.append((k, v)) + if no_arguments or error_arguments: + error_string = "" + if no_arguments: + error_string += "required argument are missing: {}; ".format( + ",".join(no_arguments)) + if error_arguments: + error_string += "required argument values: {}".format( + ",".join(["{}={}".format(a[0], a[1]) for a in error_arguments])) + return get_json_result( + retcode=RetCode.ARGUMENT_ERROR, retmsg=error_string) + return func(*_args, **_kwargs) + return decorated_function + return wrapper + + +def is_localhost(ip): + return ip in {'127.0.0.1', '::1', '[::1]', 'localhost'} + + +def send_file_in_mem(data, filename): + if not isinstance(data, (str, bytes)): + data = json_dumps(data) + if isinstance(data, str): + data = data.encode('utf-8') + + f = BytesIO() + f.write(data) + f.seek(0) + + return send_file(f, as_attachment=True, attachment_filename=filename) + + +def get_json_result(retcode=RetCode.SUCCESS, retmsg='success', data=None): + response = {"retcode": retcode, "retmsg": retmsg, "data": data} + return jsonify(response) + + +def cors_reponse(retcode=RetCode.SUCCESS, + retmsg='success', data=None, auth=None): + result_dict = {"retcode": retcode, "retmsg": retmsg, "data": data} + response_dict = {} + for key, value in result_dict.items(): + if value is None and key != "retcode": + continue + else: + response_dict[key] = value + response = make_response(jsonify(response_dict)) + if auth: + response.headers["Authorization"] = auth + response.headers["Access-Control-Allow-Origin"] = "*" + response.headers["Access-Control-Allow-Method"] = "*" + response.headers["Access-Control-Allow-Headers"] = "*" + response.headers["Access-Control-Allow-Headers"] = "*" + response.headers["Access-Control-Expose-Headers"] = "Authorization" + return response + +def construct_result(code=RetCode.DATA_ERROR, message='data is missing'): + import re + result_dict = {"code": code, "message": re.sub(r"rag", "seceum", message, flags=re.IGNORECASE)} + response = {} + for key, value in result_dict.items(): + if value is None and key != "code": + continue + else: + response[key] = value + return jsonify(response) + + +def construct_json_result(code=RetCode.SUCCESS, message='success', data=None): + if data is None: + return jsonify({"code": code, "message": message}) + else: + return jsonify({"code": code, "message": message, "data": data}) + + +def construct_error_response(e): + stat_logger.exception(e) + try: + if e.code == 401: + return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e)) + except BaseException: + pass + if len(e.args) > 1: + return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1]) + if repr(e).find("index_not_found_exception") >=0: + return construct_json_result(code=RetCode.EXCEPTION_ERROR, message="No chunk found, please upload file and parse it.") + + return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e)) diff --git a/api/utils/commands.py b/api/utils/commands.py index c1c41ab0dedb9f6e36359dddda52fdcccf310a90..a1a8d025aca035b2c17873d873ba67f5b3cd7fcc 100644 --- a/api/utils/commands.py +++ b/api/utils/commands.py @@ -1,78 +1,78 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import base64 -import click -import re - -from flask import Flask -from werkzeug.security import generate_password_hash - -from api.db.services import UserService - - -@click.command('reset-password', help='Reset the account password.') -@click.option('--email', prompt=True, help='The email address of the account whose password you need to reset') -@click.option('--new-password', prompt=True, help='the new password.') -@click.option('--password-confirm', prompt=True, help='the new password confirm.') -def reset_password(email, new_password, password_confirm): - if str(new_password).strip() != str(password_confirm).strip(): - click.echo(click.style('sorry. The two passwords do not match.', fg='red')) - return - user = UserService.query(email=email) - if not user: - click.echo(click.style('sorry. The Email is not registered!.', fg='red')) - return - encode_password = base64.b64encode(new_password.encode('utf-8')).decode('utf-8') - password_hash = generate_password_hash(encode_password) - user_dict = { - 'password': password_hash - } - UserService.update_user(user[0].id,user_dict) - click.echo(click.style('Congratulations! Password has been reset.', fg='green')) - - -@click.command('reset-email', help='Reset the account email.') -@click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset') -@click.option('--new-email', prompt=True, help='the new email.') -@click.option('--email-confirm', prompt=True, help='the new email confirm.') -def reset_email(email, new_email, email_confirm): - if str(new_email).strip() != str(email_confirm).strip(): - click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red')) - return - if str(new_email).strip() == str(email).strip(): - click.echo(click.style('Sorry, new email and old email are the same.', fg='red')) - return - user = UserService.query(email=email) - if not user: - click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red')) - return - if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", new_email): - click.echo(click.style('sorry. {} is not a valid email. '.format(new_email), fg='red')) - return - new_user = UserService.query(email=new_email) - if new_user: - click.echo(click.style('sorry. the account: [{}] is exist .'.format(new_email), fg='red')) - return - user_dict = { - 'email': new_email - } - UserService.update_user(user[0].id,user_dict) - click.echo(click.style('Congratulations!, email has been reset.', fg='green')) - -def register_commands(app: Flask): - app.cli.add_command(reset_password) - app.cli.add_command(reset_email) +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import base64 +import click +import re + +from flask import Flask +from werkzeug.security import generate_password_hash + +from api.db.services import UserService + + +@click.command('reset-password', help='Reset the account password.') +@click.option('--email', prompt=True, help='The email address of the account whose password you need to reset') +@click.option('--new-password', prompt=True, help='the new password.') +@click.option('--password-confirm', prompt=True, help='the new password confirm.') +def reset_password(email, new_password, password_confirm): + if str(new_password).strip() != str(password_confirm).strip(): + click.echo(click.style('sorry. The two passwords do not match.', fg='red')) + return + user = UserService.query(email=email) + if not user: + click.echo(click.style('sorry. The Email is not registered!.', fg='red')) + return + encode_password = base64.b64encode(new_password.encode('utf-8')).decode('utf-8') + password_hash = generate_password_hash(encode_password) + user_dict = { + 'password': password_hash + } + UserService.update_user(user[0].id,user_dict) + click.echo(click.style('Congratulations! Password has been reset.', fg='green')) + + +@click.command('reset-email', help='Reset the account email.') +@click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset') +@click.option('--new-email', prompt=True, help='the new email.') +@click.option('--email-confirm', prompt=True, help='the new email confirm.') +def reset_email(email, new_email, email_confirm): + if str(new_email).strip() != str(email_confirm).strip(): + click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red')) + return + if str(new_email).strip() == str(email).strip(): + click.echo(click.style('Sorry, new email and old email are the same.', fg='red')) + return + user = UserService.query(email=email) + if not user: + click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red')) + return + if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", new_email): + click.echo(click.style('sorry. {} is not a valid email. '.format(new_email), fg='red')) + return + new_user = UserService.query(email=new_email) + if new_user: + click.echo(click.style('sorry. the account: [{}] is exist .'.format(new_email), fg='red')) + return + user_dict = { + 'email': new_email + } + UserService.update_user(user[0].id,user_dict) + click.echo(click.style('Congratulations!, email has been reset.', fg='green')) + +def register_commands(app: Flask): + app.cli.add_command(reset_password) + app.cli.add_command(reset_email) diff --git a/api/utils/file_utils.py b/api/utils/file_utils.py index b3af462cab1498ced54048059e145289f25365bc..807d52741203d501554eee1759b89e940cd17987 100644 --- a/api/utils/file_utils.py +++ b/api/utils/file_utils.py @@ -1,207 +1,207 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import base64 -import json -import os -import re -from io import BytesIO - -import pdfplumber -from PIL import Image -from cachetools import LRUCache, cached -from ruamel.yaml import YAML - -from api.db import FileType - -PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE") -RAG_BASE = os.getenv("RAG_BASE") - - -def get_project_base_directory(*args): - global PROJECT_BASE - if PROJECT_BASE is None: - PROJECT_BASE = os.path.abspath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), - os.pardir, - os.pardir, - ) - ) - - if args: - return os.path.join(PROJECT_BASE, *args) - return PROJECT_BASE - - -def get_rag_directory(*args): - global RAG_BASE - if RAG_BASE is None: - RAG_BASE = os.path.abspath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), - os.pardir, - os.pardir, - os.pardir, - ) - ) - if args: - return os.path.join(RAG_BASE, *args) - return RAG_BASE - - -def get_rag_python_directory(*args): - return get_rag_directory("python", *args) - - -def get_home_cache_dir(): - dir = os.path.join(os.path.expanduser('~'), ".ragflow") - try: - os.mkdir(dir) - except OSError as error: - pass - return dir - - -@cached(cache=LRUCache(maxsize=10)) -def load_json_conf(conf_path): - if os.path.isabs(conf_path): - json_conf_path = conf_path - else: - json_conf_path = os.path.join(get_project_base_directory(), conf_path) - try: - with open(json_conf_path) as f: - return json.load(f) - except BaseException: - raise EnvironmentError( - "loading json file config from '{}' failed!".format(json_conf_path) - ) - - -def dump_json_conf(config_data, conf_path): - if os.path.isabs(conf_path): - json_conf_path = conf_path - else: - json_conf_path = os.path.join(get_project_base_directory(), conf_path) - try: - with open(json_conf_path, "w") as f: - json.dump(config_data, f, indent=4) - except BaseException: - raise EnvironmentError( - "loading json file config from '{}' failed!".format(json_conf_path) - ) - - -def load_json_conf_real_time(conf_path): - if os.path.isabs(conf_path): - json_conf_path = conf_path - else: - json_conf_path = os.path.join(get_project_base_directory(), conf_path) - try: - with open(json_conf_path) as f: - return json.load(f) - except BaseException: - raise EnvironmentError( - "loading json file config from '{}' failed!".format(json_conf_path) - ) - - -def load_yaml_conf(conf_path): - if not os.path.isabs(conf_path): - conf_path = os.path.join(get_project_base_directory(), conf_path) - try: - with open(conf_path) as f: - yaml = YAML(typ='safe', pure=True) - return yaml.load(f) - except Exception as e: - raise EnvironmentError( - "loading yaml file config from {} failed:".format(conf_path), e - ) - - -def rewrite_yaml_conf(conf_path, config): - if not os.path.isabs(conf_path): - conf_path = os.path.join(get_project_base_directory(), conf_path) - try: - with open(conf_path, "w") as f: - yaml = YAML(typ="safe") - yaml.dump(config, f) - except Exception as e: - raise EnvironmentError( - "rewrite yaml file config {} failed:".format(conf_path), e - ) - - -def rewrite_json_file(filepath, json_data): - with open(filepath, "w") as f: - json.dump(json_data, f, indent=4, separators=(",", ": ")) - f.close() - - -def filename_type(filename): - filename = filename.lower() - if re.match(r".*\.pdf$", filename): - return FileType.PDF.value - - if re.match( - r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): - return FileType.DOC.value - - if re.match( - r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename): - return FileType.AURAL.value - - if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename): - return FileType.VISUAL.value - - return FileType.OTHER.value - - -def thumbnail(filename, blob): - filename = filename.lower() - if re.match(r".*\.pdf$", filename): - pdf = pdfplumber.open(BytesIO(blob)) - buffered = BytesIO() - pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png") - return "data:image/png;base64," + \ - base64.b64encode(buffered.getvalue()).decode("utf-8") - - if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): - image = Image.open(BytesIO(blob)) - image.thumbnail((30, 30)) - buffered = BytesIO() - image.save(buffered, format="png") - return "data:image/png;base64," + \ - base64.b64encode(buffered.getvalue()).decode("utf-8") - - if re.match(r".*\.(ppt|pptx)$", filename): - import aspose.slides as slides - import aspose.pydrawing as drawing - try: - with slides.Presentation(BytesIO(blob)) as presentation: - buffered = BytesIO() - presentation.slides[0].get_thumbnail(0.03, 0.03).save( - buffered, drawing.imaging.ImageFormat.png) - return "data:image/png;base64," + \ - base64.b64encode(buffered.getvalue()).decode("utf-8") - except Exception as e: - pass - - -def traversal_files(base): - for root, ds, fs in os.walk(base): - for f in fs: - fullname = os.path.join(root, f) - yield fullname +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import base64 +import json +import os +import re +from io import BytesIO + +import pdfplumber +from PIL import Image +from cachetools import LRUCache, cached +from ruamel.yaml import YAML + +from api.db import FileType + +PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE") +RAG_BASE = os.getenv("RAG_BASE") + + +def get_project_base_directory(*args): + global PROJECT_BASE + if PROJECT_BASE is None: + PROJECT_BASE = os.path.abspath( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + os.pardir, + os.pardir, + ) + ) + + if args: + return os.path.join(PROJECT_BASE, *args) + return PROJECT_BASE + + +def get_rag_directory(*args): + global RAG_BASE + if RAG_BASE is None: + RAG_BASE = os.path.abspath( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + os.pardir, + os.pardir, + os.pardir, + ) + ) + if args: + return os.path.join(RAG_BASE, *args) + return RAG_BASE + + +def get_rag_python_directory(*args): + return get_rag_directory("python", *args) + + +def get_home_cache_dir(): + dir = os.path.join(os.path.expanduser('~'), ".ragflow") + try: + os.mkdir(dir) + except OSError as error: + pass + return dir + + +@cached(cache=LRUCache(maxsize=10)) +def load_json_conf(conf_path): + if os.path.isabs(conf_path): + json_conf_path = conf_path + else: + json_conf_path = os.path.join(get_project_base_directory(), conf_path) + try: + with open(json_conf_path) as f: + return json.load(f) + except BaseException: + raise EnvironmentError( + "loading json file config from '{}' failed!".format(json_conf_path) + ) + + +def dump_json_conf(config_data, conf_path): + if os.path.isabs(conf_path): + json_conf_path = conf_path + else: + json_conf_path = os.path.join(get_project_base_directory(), conf_path) + try: + with open(json_conf_path, "w") as f: + json.dump(config_data, f, indent=4) + except BaseException: + raise EnvironmentError( + "loading json file config from '{}' failed!".format(json_conf_path) + ) + + +def load_json_conf_real_time(conf_path): + if os.path.isabs(conf_path): + json_conf_path = conf_path + else: + json_conf_path = os.path.join(get_project_base_directory(), conf_path) + try: + with open(json_conf_path) as f: + return json.load(f) + except BaseException: + raise EnvironmentError( + "loading json file config from '{}' failed!".format(json_conf_path) + ) + + +def load_yaml_conf(conf_path): + if not os.path.isabs(conf_path): + conf_path = os.path.join(get_project_base_directory(), conf_path) + try: + with open(conf_path) as f: + yaml = YAML(typ='safe', pure=True) + return yaml.load(f) + except Exception as e: + raise EnvironmentError( + "loading yaml file config from {} failed:".format(conf_path), e + ) + + +def rewrite_yaml_conf(conf_path, config): + if not os.path.isabs(conf_path): + conf_path = os.path.join(get_project_base_directory(), conf_path) + try: + with open(conf_path, "w") as f: + yaml = YAML(typ="safe") + yaml.dump(config, f) + except Exception as e: + raise EnvironmentError( + "rewrite yaml file config {} failed:".format(conf_path), e + ) + + +def rewrite_json_file(filepath, json_data): + with open(filepath, "w") as f: + json.dump(json_data, f, indent=4, separators=(",", ": ")) + f.close() + + +def filename_type(filename): + filename = filename.lower() + if re.match(r".*\.pdf$", filename): + return FileType.PDF.value + + if re.match( + r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): + return FileType.DOC.value + + if re.match( + r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename): + return FileType.AURAL.value + + if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename): + return FileType.VISUAL.value + + return FileType.OTHER.value + + +def thumbnail(filename, blob): + filename = filename.lower() + if re.match(r".*\.pdf$", filename): + pdf = pdfplumber.open(BytesIO(blob)) + buffered = BytesIO() + pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png") + return "data:image/png;base64," + \ + base64.b64encode(buffered.getvalue()).decode("utf-8") + + if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): + image = Image.open(BytesIO(blob)) + image.thumbnail((30, 30)) + buffered = BytesIO() + image.save(buffered, format="png") + return "data:image/png;base64," + \ + base64.b64encode(buffered.getvalue()).decode("utf-8") + + if re.match(r".*\.(ppt|pptx)$", filename): + import aspose.slides as slides + import aspose.pydrawing as drawing + try: + with slides.Presentation(BytesIO(blob)) as presentation: + buffered = BytesIO() + presentation.slides[0].get_thumbnail(0.03, 0.03).save( + buffered, drawing.imaging.ImageFormat.png) + return "data:image/png;base64," + \ + base64.b64encode(buffered.getvalue()).decode("utf-8") + except Exception as e: + pass + + +def traversal_files(base): + for root, ds, fs in os.walk(base): + for f in fs: + fullname = os.path.join(root, f) + yield fullname diff --git a/api/utils/log_utils.py b/api/utils/log_utils.py index e74060d4b54ad3ad0b5276e570836b1f930829b3..5640ffe71289dabe9683053e94be9da190d9d587 100644 --- a/api/utils/log_utils.py +++ b/api/utils/log_utils.py @@ -1,313 +1,313 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import typing -import traceback -import logging -import inspect -from logging.handlers import TimedRotatingFileHandler -from threading import RLock - -from api.utils import file_utils - - -class LoggerFactory(object): - TYPE = "FILE" - LOG_FORMAT = "[%(levelname)s] [%(asctime)s] [%(module)s.%(funcName)s] [line:%(lineno)d]: %(message)s" - logging.basicConfig(format=LOG_FORMAT) - LEVEL = logging.DEBUG - logger_dict = {} - global_handler_dict = {} - - LOG_DIR = None - PARENT_LOG_DIR = None - log_share = True - - append_to_parent_log = None - - lock = RLock() - # CRITICAL = 50 - # FATAL = CRITICAL - # ERROR = 40 - # WARNING = 30 - # WARN = WARNING - # INFO = 20 - # DEBUG = 10 - # NOTSET = 0 - levels = (10, 20, 30, 40) - schedule_logger_dict = {} - - @staticmethod - def set_directory(directory=None, parent_log_dir=None, - append_to_parent_log=None, force=False): - if parent_log_dir: - LoggerFactory.PARENT_LOG_DIR = parent_log_dir - if append_to_parent_log: - LoggerFactory.append_to_parent_log = append_to_parent_log - with LoggerFactory.lock: - if not directory: - directory = file_utils.get_project_base_directory("logs") - if not LoggerFactory.LOG_DIR or force: - LoggerFactory.LOG_DIR = directory - if LoggerFactory.log_share: - oldmask = os.umask(000) - os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True) - os.umask(oldmask) - else: - os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True) - for loggerName, ghandler in LoggerFactory.global_handler_dict.items(): - for className, (logger, - handler) in LoggerFactory.logger_dict.items(): - logger.removeHandler(ghandler) - ghandler.close() - LoggerFactory.global_handler_dict = {} - for className, (logger, - handler) in LoggerFactory.logger_dict.items(): - logger.removeHandler(handler) - _handler = None - if handler: - handler.close() - if className != "default": - _handler = LoggerFactory.get_handler(className) - logger.addHandler(_handler) - LoggerFactory.assemble_global_handler(logger) - LoggerFactory.logger_dict[className] = logger, _handler - - @staticmethod - def new_logger(name): - logger = logging.getLogger(name) - logger.propagate = False - logger.setLevel(LoggerFactory.LEVEL) - return logger - - @staticmethod - def get_logger(class_name=None): - with LoggerFactory.lock: - if class_name in LoggerFactory.logger_dict.keys(): - logger, handler = LoggerFactory.logger_dict[class_name] - if not logger: - logger, handler = LoggerFactory.init_logger(class_name) - else: - logger, handler = LoggerFactory.init_logger(class_name) - return logger - - @staticmethod - def get_global_handler(logger_name, level=None, log_dir=None): - if not LoggerFactory.LOG_DIR: - return logging.StreamHandler() - if log_dir: - logger_name_key = logger_name + "_" + log_dir - else: - logger_name_key = logger_name + "_" + LoggerFactory.LOG_DIR - # if loggerName not in LoggerFactory.globalHandlerDict: - if logger_name_key not in LoggerFactory.global_handler_dict: - with LoggerFactory.lock: - if logger_name_key not in LoggerFactory.global_handler_dict: - handler = LoggerFactory.get_handler( - logger_name, level, log_dir) - LoggerFactory.global_handler_dict[logger_name_key] = handler - return LoggerFactory.global_handler_dict[logger_name_key] - - @staticmethod - def get_handler(class_name, level=None, log_dir=None, - log_type=None, job_id=None): - if not log_type: - if not LoggerFactory.LOG_DIR or not class_name: - return logging.StreamHandler() - # return Diy_StreamHandler() - - if not log_dir: - log_file = os.path.join( - LoggerFactory.LOG_DIR, - "{}.log".format(class_name)) - else: - log_file = os.path.join(log_dir, "{}.log".format(class_name)) - else: - log_file = os.path.join(log_dir, "rag_flow_{}.log".format( - log_type) if level == LoggerFactory.LEVEL else 'rag_flow_{}_error.log'.format(log_type)) - - os.makedirs(os.path.dirname(log_file), exist_ok=True) - if LoggerFactory.log_share: - handler = ROpenHandler(log_file, - when='D', - interval=1, - backupCount=14, - delay=True) - else: - handler = TimedRotatingFileHandler(log_file, - when='D', - interval=1, - backupCount=14, - delay=True) - if level: - handler.level = level - - return handler - - @staticmethod - def init_logger(class_name): - with LoggerFactory.lock: - logger = LoggerFactory.new_logger(class_name) - handler = None - if class_name: - handler = LoggerFactory.get_handler(class_name) - logger.addHandler(handler) - LoggerFactory.logger_dict[class_name] = logger, handler - - else: - LoggerFactory.logger_dict["default"] = logger, handler - - LoggerFactory.assemble_global_handler(logger) - return logger, handler - - @staticmethod - def assemble_global_handler(logger): - if LoggerFactory.LOG_DIR: - for level in LoggerFactory.levels: - if level >= LoggerFactory.LEVEL: - level_logger_name = logging._levelToName[level] - logger.addHandler( - LoggerFactory.get_global_handler( - level_logger_name, level)) - if LoggerFactory.append_to_parent_log and LoggerFactory.PARENT_LOG_DIR: - for level in LoggerFactory.levels: - if level >= LoggerFactory.LEVEL: - level_logger_name = logging._levelToName[level] - logger.addHandler( - LoggerFactory.get_global_handler(level_logger_name, level, LoggerFactory.PARENT_LOG_DIR)) - - -def setDirectory(directory=None): - LoggerFactory.set_directory(directory) - - -def setLevel(level): - LoggerFactory.LEVEL = level - - -def getLogger(className=None, useLevelFile=False): - if className is None: - frame = inspect.stack()[1] - module = inspect.getmodule(frame[0]) - className = 'stat' - return LoggerFactory.get_logger(className) - - -def exception_to_trace_string(ex): - return "".join(traceback.TracebackException.from_exception(ex).format()) - - -class ROpenHandler(TimedRotatingFileHandler): - def _open(self): - prevumask = os.umask(000) - rtv = TimedRotatingFileHandler._open(self) - os.umask(prevumask) - return rtv - - -def sql_logger(job_id='', log_type='sql'): - key = job_id + log_type - if key in LoggerFactory.schedule_logger_dict.keys(): - return LoggerFactory.schedule_logger_dict[key] - return get_job_logger(job_id=job_id, log_type=log_type) - - -def ready_log(msg, job=None, task=None, role=None, party_id=None, detail=None): - prefix, suffix = base_msg(job, task, role, party_id, detail) - return f"{prefix}{msg} ready{suffix}" - - -def start_log(msg, job=None, task=None, role=None, party_id=None, detail=None): - prefix, suffix = base_msg(job, task, role, party_id, detail) - return f"{prefix}start to {msg}{suffix}" - - -def successful_log(msg, job=None, task=None, role=None, - party_id=None, detail=None): - prefix, suffix = base_msg(job, task, role, party_id, detail) - return f"{prefix}{msg} successfully{suffix}" - - -def warning_log(msg, job=None, task=None, role=None, - party_id=None, detail=None): - prefix, suffix = base_msg(job, task, role, party_id, detail) - return f"{prefix}{msg} is not effective{suffix}" - - -def failed_log(msg, job=None, task=None, role=None, - party_id=None, detail=None): - prefix, suffix = base_msg(job, task, role, party_id, detail) - return f"{prefix}failed to {msg}{suffix}" - - -def base_msg(job=None, task=None, role: str = None, - party_id: typing.Union[str, int] = None, detail=None): - if detail: - detail_msg = f" detail: \n{detail}" - else: - detail_msg = "" - if task is not None: - return f"task {task.f_task_id} {task.f_task_version} ", f" on {task.f_role} {task.f_party_id}{detail_msg}" - elif job is not None: - return "", f" on {job.f_role} {job.f_party_id}{detail_msg}" - elif role and party_id: - return "", f" on {role} {party_id}{detail_msg}" - else: - return "", f"{detail_msg}" - - -def exception_to_trace_string(ex): - return "".join(traceback.TracebackException.from_exception(ex).format()) - - -def get_logger_base_dir(): - job_log_dir = file_utils.get_rag_flow_directory('logs') - return job_log_dir - - -def get_job_logger(job_id, log_type): - rag_flow_log_dir = file_utils.get_rag_flow_directory('logs', 'rag_flow') - job_log_dir = file_utils.get_rag_flow_directory('logs', job_id) - if not job_id: - log_dirs = [rag_flow_log_dir] - else: - if log_type == 'audit': - log_dirs = [job_log_dir, rag_flow_log_dir] - else: - log_dirs = [job_log_dir] - if LoggerFactory.log_share: - oldmask = os.umask(000) - os.makedirs(job_log_dir, exist_ok=True) - os.makedirs(rag_flow_log_dir, exist_ok=True) - os.umask(oldmask) - else: - os.makedirs(job_log_dir, exist_ok=True) - os.makedirs(rag_flow_log_dir, exist_ok=True) - logger = LoggerFactory.new_logger(f"{job_id}_{log_type}") - for job_log_dir in log_dirs: - handler = LoggerFactory.get_handler(class_name=None, level=LoggerFactory.LEVEL, - log_dir=job_log_dir, log_type=log_type, job_id=job_id) - error_handler = LoggerFactory.get_handler( - class_name=None, - level=logging.ERROR, - log_dir=job_log_dir, - log_type=log_type, - job_id=job_id) - logger.addHandler(handler) - logger.addHandler(error_handler) - with LoggerFactory.lock: - LoggerFactory.schedule_logger_dict[job_id + log_type] = logger - return logger +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import typing +import traceback +import logging +import inspect +from logging.handlers import TimedRotatingFileHandler +from threading import RLock + +from api.utils import file_utils + + +class LoggerFactory(object): + TYPE = "FILE" + LOG_FORMAT = "[%(levelname)s] [%(asctime)s] [%(module)s.%(funcName)s] [line:%(lineno)d]: %(message)s" + logging.basicConfig(format=LOG_FORMAT) + LEVEL = logging.DEBUG + logger_dict = {} + global_handler_dict = {} + + LOG_DIR = None + PARENT_LOG_DIR = None + log_share = True + + append_to_parent_log = None + + lock = RLock() + # CRITICAL = 50 + # FATAL = CRITICAL + # ERROR = 40 + # WARNING = 30 + # WARN = WARNING + # INFO = 20 + # DEBUG = 10 + # NOTSET = 0 + levels = (10, 20, 30, 40) + schedule_logger_dict = {} + + @staticmethod + def set_directory(directory=None, parent_log_dir=None, + append_to_parent_log=None, force=False): + if parent_log_dir: + LoggerFactory.PARENT_LOG_DIR = parent_log_dir + if append_to_parent_log: + LoggerFactory.append_to_parent_log = append_to_parent_log + with LoggerFactory.lock: + if not directory: + directory = file_utils.get_project_base_directory("logs") + if not LoggerFactory.LOG_DIR or force: + LoggerFactory.LOG_DIR = directory + if LoggerFactory.log_share: + oldmask = os.umask(000) + os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True) + os.umask(oldmask) + else: + os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True) + for loggerName, ghandler in LoggerFactory.global_handler_dict.items(): + for className, (logger, + handler) in LoggerFactory.logger_dict.items(): + logger.removeHandler(ghandler) + ghandler.close() + LoggerFactory.global_handler_dict = {} + for className, (logger, + handler) in LoggerFactory.logger_dict.items(): + logger.removeHandler(handler) + _handler = None + if handler: + handler.close() + if className != "default": + _handler = LoggerFactory.get_handler(className) + logger.addHandler(_handler) + LoggerFactory.assemble_global_handler(logger) + LoggerFactory.logger_dict[className] = logger, _handler + + @staticmethod + def new_logger(name): + logger = logging.getLogger(name) + logger.propagate = False + logger.setLevel(LoggerFactory.LEVEL) + return logger + + @staticmethod + def get_logger(class_name=None): + with LoggerFactory.lock: + if class_name in LoggerFactory.logger_dict.keys(): + logger, handler = LoggerFactory.logger_dict[class_name] + if not logger: + logger, handler = LoggerFactory.init_logger(class_name) + else: + logger, handler = LoggerFactory.init_logger(class_name) + return logger + + @staticmethod + def get_global_handler(logger_name, level=None, log_dir=None): + if not LoggerFactory.LOG_DIR: + return logging.StreamHandler() + if log_dir: + logger_name_key = logger_name + "_" + log_dir + else: + logger_name_key = logger_name + "_" + LoggerFactory.LOG_DIR + # if loggerName not in LoggerFactory.globalHandlerDict: + if logger_name_key not in LoggerFactory.global_handler_dict: + with LoggerFactory.lock: + if logger_name_key not in LoggerFactory.global_handler_dict: + handler = LoggerFactory.get_handler( + logger_name, level, log_dir) + LoggerFactory.global_handler_dict[logger_name_key] = handler + return LoggerFactory.global_handler_dict[logger_name_key] + + @staticmethod + def get_handler(class_name, level=None, log_dir=None, + log_type=None, job_id=None): + if not log_type: + if not LoggerFactory.LOG_DIR or not class_name: + return logging.StreamHandler() + # return Diy_StreamHandler() + + if not log_dir: + log_file = os.path.join( + LoggerFactory.LOG_DIR, + "{}.log".format(class_name)) + else: + log_file = os.path.join(log_dir, "{}.log".format(class_name)) + else: + log_file = os.path.join(log_dir, "rag_flow_{}.log".format( + log_type) if level == LoggerFactory.LEVEL else 'rag_flow_{}_error.log'.format(log_type)) + + os.makedirs(os.path.dirname(log_file), exist_ok=True) + if LoggerFactory.log_share: + handler = ROpenHandler(log_file, + when='D', + interval=1, + backupCount=14, + delay=True) + else: + handler = TimedRotatingFileHandler(log_file, + when='D', + interval=1, + backupCount=14, + delay=True) + if level: + handler.level = level + + return handler + + @staticmethod + def init_logger(class_name): + with LoggerFactory.lock: + logger = LoggerFactory.new_logger(class_name) + handler = None + if class_name: + handler = LoggerFactory.get_handler(class_name) + logger.addHandler(handler) + LoggerFactory.logger_dict[class_name] = logger, handler + + else: + LoggerFactory.logger_dict["default"] = logger, handler + + LoggerFactory.assemble_global_handler(logger) + return logger, handler + + @staticmethod + def assemble_global_handler(logger): + if LoggerFactory.LOG_DIR: + for level in LoggerFactory.levels: + if level >= LoggerFactory.LEVEL: + level_logger_name = logging._levelToName[level] + logger.addHandler( + LoggerFactory.get_global_handler( + level_logger_name, level)) + if LoggerFactory.append_to_parent_log and LoggerFactory.PARENT_LOG_DIR: + for level in LoggerFactory.levels: + if level >= LoggerFactory.LEVEL: + level_logger_name = logging._levelToName[level] + logger.addHandler( + LoggerFactory.get_global_handler(level_logger_name, level, LoggerFactory.PARENT_LOG_DIR)) + + +def setDirectory(directory=None): + LoggerFactory.set_directory(directory) + + +def setLevel(level): + LoggerFactory.LEVEL = level + + +def getLogger(className=None, useLevelFile=False): + if className is None: + frame = inspect.stack()[1] + module = inspect.getmodule(frame[0]) + className = 'stat' + return LoggerFactory.get_logger(className) + + +def exception_to_trace_string(ex): + return "".join(traceback.TracebackException.from_exception(ex).format()) + + +class ROpenHandler(TimedRotatingFileHandler): + def _open(self): + prevumask = os.umask(000) + rtv = TimedRotatingFileHandler._open(self) + os.umask(prevumask) + return rtv + + +def sql_logger(job_id='', log_type='sql'): + key = job_id + log_type + if key in LoggerFactory.schedule_logger_dict.keys(): + return LoggerFactory.schedule_logger_dict[key] + return get_job_logger(job_id=job_id, log_type=log_type) + + +def ready_log(msg, job=None, task=None, role=None, party_id=None, detail=None): + prefix, suffix = base_msg(job, task, role, party_id, detail) + return f"{prefix}{msg} ready{suffix}" + + +def start_log(msg, job=None, task=None, role=None, party_id=None, detail=None): + prefix, suffix = base_msg(job, task, role, party_id, detail) + return f"{prefix}start to {msg}{suffix}" + + +def successful_log(msg, job=None, task=None, role=None, + party_id=None, detail=None): + prefix, suffix = base_msg(job, task, role, party_id, detail) + return f"{prefix}{msg} successfully{suffix}" + + +def warning_log(msg, job=None, task=None, role=None, + party_id=None, detail=None): + prefix, suffix = base_msg(job, task, role, party_id, detail) + return f"{prefix}{msg} is not effective{suffix}" + + +def failed_log(msg, job=None, task=None, role=None, + party_id=None, detail=None): + prefix, suffix = base_msg(job, task, role, party_id, detail) + return f"{prefix}failed to {msg}{suffix}" + + +def base_msg(job=None, task=None, role: str = None, + party_id: typing.Union[str, int] = None, detail=None): + if detail: + detail_msg = f" detail: \n{detail}" + else: + detail_msg = "" + if task is not None: + return f"task {task.f_task_id} {task.f_task_version} ", f" on {task.f_role} {task.f_party_id}{detail_msg}" + elif job is not None: + return "", f" on {job.f_role} {job.f_party_id}{detail_msg}" + elif role and party_id: + return "", f" on {role} {party_id}{detail_msg}" + else: + return "", f"{detail_msg}" + + +def exception_to_trace_string(ex): + return "".join(traceback.TracebackException.from_exception(ex).format()) + + +def get_logger_base_dir(): + job_log_dir = file_utils.get_rag_flow_directory('logs') + return job_log_dir + + +def get_job_logger(job_id, log_type): + rag_flow_log_dir = file_utils.get_rag_flow_directory('logs', 'rag_flow') + job_log_dir = file_utils.get_rag_flow_directory('logs', job_id) + if not job_id: + log_dirs = [rag_flow_log_dir] + else: + if log_type == 'audit': + log_dirs = [job_log_dir, rag_flow_log_dir] + else: + log_dirs = [job_log_dir] + if LoggerFactory.log_share: + oldmask = os.umask(000) + os.makedirs(job_log_dir, exist_ok=True) + os.makedirs(rag_flow_log_dir, exist_ok=True) + os.umask(oldmask) + else: + os.makedirs(job_log_dir, exist_ok=True) + os.makedirs(rag_flow_log_dir, exist_ok=True) + logger = LoggerFactory.new_logger(f"{job_id}_{log_type}") + for job_log_dir in log_dirs: + handler = LoggerFactory.get_handler(class_name=None, level=LoggerFactory.LEVEL, + log_dir=job_log_dir, log_type=log_type, job_id=job_id) + error_handler = LoggerFactory.get_handler( + class_name=None, + level=logging.ERROR, + log_dir=job_log_dir, + log_type=log_type, + job_id=job_id) + logger.addHandler(handler) + logger.addHandler(error_handler) + with LoggerFactory.lock: + LoggerFactory.schedule_logger_dict[job_id + log_type] = logger + return logger diff --git a/api/utils/t_crypt.py b/api/utils/t_crypt.py index 99f70a3b6a019540b2077d989f0998cc64b91f70..67fd69c627a1ea6e39ff1b49bb6ea44c095a31a2 100644 --- a/api/utils/t_crypt.py +++ b/api/utils/t_crypt.py @@ -1,24 +1,24 @@ -import base64 -import os -import sys -from Cryptodome.PublicKey import RSA -from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5 -from api.utils import decrypt, file_utils - - -def crypt(line): - file_path = os.path.join( - file_utils.get_project_base_directory(), - "conf", - "public.pem") - rsa_key = RSA.importKey(open(file_path).read(),"Welcome") - cipher = Cipher_pkcs1_v1_5.new(rsa_key) - password_base64 = base64.b64encode(line.encode('utf-8')).decode("utf-8") - encrypted_password = cipher.encrypt(password_base64.encode()) - return base64.b64encode(encrypted_password).decode('utf-8') - - -if __name__ == "__main__": - pswd = crypt(sys.argv[1]) - print(pswd) - print(decrypt(pswd)) +import base64 +import os +import sys +from Cryptodome.PublicKey import RSA +from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5 +from api.utils import decrypt, file_utils + + +def crypt(line): + file_path = os.path.join( + file_utils.get_project_base_directory(), + "conf", + "public.pem") + rsa_key = RSA.importKey(open(file_path).read(),"Welcome") + cipher = Cipher_pkcs1_v1_5.new(rsa_key) + password_base64 = base64.b64encode(line.encode('utf-8')).decode("utf-8") + encrypted_password = cipher.encrypt(password_base64.encode()) + return base64.b64encode(encrypted_password).decode('utf-8') + + +if __name__ == "__main__": + pswd = crypt(sys.argv[1]) + print(pswd) + print(decrypt(pswd)) diff --git a/api/versions.py b/api/versions.py index 5fde4e796e41aa6cc3bfeb5d90e519d3c4e79795..331a5ced137df027c8840e889e08b7819e5e2cde 100644 --- a/api/versions.py +++ b/api/versions.py @@ -1,28 +1,28 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import dotenv -import typing -from api.utils.file_utils import get_project_base_directory - - -def get_versions() -> typing.Mapping[str, typing.Any]: - dotenv.load_dotenv(dotenv.find_dotenv()) - return dotenv.dotenv_values() - - -def get_rag_version() -> typing.Optional[str]: +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import dotenv +import typing +from api.utils.file_utils import get_project_base_directory + + +def get_versions() -> typing.Mapping[str, typing.Any]: + dotenv.load_dotenv(dotenv.find_dotenv()) + return dotenv.dotenv_values() + + +def get_rag_version() -> typing.Optional[str]: return get_versions().get("RAGFLOW_VERSION", "dev") \ No newline at end of file diff --git a/conf/service_conf.yaml b/conf/service_conf.yaml index 792404a6c0f297b704fd86b3a31ee23be7d4bdcd..880561d1315f329e25e32f33d1480a4a1b0d84df 100644 --- a/conf/service_conf.yaml +++ b/conf/service_conf.yaml @@ -1,49 +1,49 @@ -ragflow: - host: 0.0.0.0 - http_port: 9380 -mysql: - name: 'rag_flow' - user: 'root' - password: 'infini_rag_flow' - host: 'mysql' - port: 3306 - max_connections: 100 - stale_timeout: 30 -minio: - user: 'rag_flow' - password: 'infini_rag_flow' - host: 'minio:9000' -es: - hosts: 'http://es01:9200' - username: 'elastic' - password: 'infini_rag_flow' -redis: - db: 1 - password: 'infini_rag_flow' - host: 'redis:6379' -user_default_llm: - factory: 'Tongyi-Qianwen' - api_key: 'sk-xxxxxxxxxxxxx' - base_url: '' -oauth: - github: - client_id: xxxxxxxxxxxxxxxxxxxxxxxxx - secret_key: xxxxxxxxxxxxxxxxxxxxxxxxxxxx - url: https://github.com/login/oauth/access_token - feishu: - app_id: cli_xxxxxxxxxxxxxxxxxxx - app_secret: xxxxxxxxxxxxxxxxxxxxxxxxxxxx - app_access_token_url: https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal - user_access_token_url: https://open.feishu.cn/open-apis/authen/v1/oidc/access_token - grant_type: 'authorization_code' -authentication: - client: - switch: false - http_app_key: - http_secret_key: - site: - switch: false -permission: - switch: false - component: false - dataset: false +ragflow: + host: 0.0.0.0 + http_port: 9380 +mysql: + name: 'rag_flow' + user: 'root' + password: 'infini_rag_flow' + host: 'mysql' + port: 3306 + max_connections: 100 + stale_timeout: 30 +minio: + user: 'rag_flow' + password: 'infini_rag_flow' + host: 'minio:9000' +es: + hosts: 'http://es01:9200' + username: 'elastic' + password: 'infini_rag_flow' +redis: + db: 1 + password: 'infini_rag_flow' + host: 'redis:6379' +user_default_llm: + factory: 'Tongyi-Qianwen' + api_key: 'sk-xxxxxxxxxxxxx' + base_url: '' +oauth: + github: + client_id: xxxxxxxxxxxxxxxxxxxxxxxxx + secret_key: xxxxxxxxxxxxxxxxxxxxxxxxxxxx + url: https://github.com/login/oauth/access_token + feishu: + app_id: cli_xxxxxxxxxxxxxxxxxxx + app_secret: xxxxxxxxxxxxxxxxxxxxxxxxxxxx + app_access_token_url: https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal + user_access_token_url: https://open.feishu.cn/open-apis/authen/v1/oidc/access_token + grant_type: 'authorization_code' +authentication: + client: + switch: false + http_app_key: + http_secret_key: + site: + switch: false +permission: + switch: false + component: false + dataset: false diff --git a/deepdoc/README.md b/deepdoc/README.md index 5c7235566f532e85e697f12d3a549022ac6ea4a9..14c7947bf5392a641c6928484b50327c23e0a8d6 100644 --- a/deepdoc/README.md +++ b/deepdoc/README.md @@ -1,122 +1,122 @@ -English | [简体中文](./README_zh.md) - -# *Deep*Doc - -- [1. Introduction](#1) -- [2. Vision](#2) -- [3. Parser](#3) - - -## 1. Introduction - -With a bunch of documents from various domains with various formats and along with diverse retrieval requirements, -an accurate analysis becomes a very challenge task. *Deep*Doc is born for that purpose. -There are 2 parts in *Deep*Doc so far: vision and parser. -You can run the flowing test programs if you're interested in our results of OCR, layout recognition and TSR. -```bash -python deepdoc/vision/t_ocr.py -h -usage: t_ocr.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR] - -options: - -h, --help show this help message and exit - --inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF - --output_dir OUTPUT_DIR - Directory where to store the output images. Default: './ocr_outputs' -``` -```bash -python deepdoc/vision/t_recognizer.py -h -usage: t_recognizer.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR] [--threshold THRESHOLD] [--mode {layout,tsr}] - -options: - -h, --help show this help message and exit - --inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF - --output_dir OUTPUT_DIR - Directory where to store the output images. Default: './layouts_outputs' - --threshold THRESHOLD - A threshold to filter out detections. Default: 0.5 - --mode {layout,tsr} Task mode: layout recognition or table structure recognition -``` - -Our models are served on HuggingFace. If you have trouble downloading HuggingFace models, this might help!! -```bash -export HF_ENDPOINT=https://hf-mirror.com -``` - - -## 2. Vision - -We use vision information to resolve problems as human being. - - OCR. Since a lot of documents presented as images or at least be able to transform to image, - OCR is a very essential and fundamental or even universal solution for text extraction. - ```bash - python deepdoc/vision/t_ocr.py --inputs=path_to_images_or_pdfs --output_dir=path_to_store_result - ``` - The inputs could be directory to images or PDF, or a image or PDF. - You can look into the folder 'path_to_store_result' where has images which demonstrate the positions of results, - txt files which contain the OCR text. -
- -
- - - Layout recognition. Documents from different domain may have various layouts, - like, newspaper, magazine, book and résumé are distinct in terms of layout. - Only when machine have an accurate layout analysis, it can decide if these text parts are successive or not, - or this part needs Table Structure Recognition(TSR) to process, or this part is a figure and described with this caption. - We have 10 basic layout components which covers most cases: - - Text - - Title - - Figure - - Figure caption - - Table - - Table caption - - Header - - Footer - - Reference - - Equation - - Have a try on the following command to see the layout detection results. - ```bash - python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=layout --output_dir=path_to_store_result - ``` - The inputs could be directory to images or PDF, or a image or PDF. - You can look into the folder 'path_to_store_result' where has images which demonstrate the detection results as following: -
- -
- - - Table Structure Recognition(TSR). Data table is a frequently used structure to present data including numbers or text. - And the structure of a table might be very complex, like hierarchy headers, spanning cells and projected row headers. - Along with TSR, we also reassemble the content into sentences which could be well comprehended by LLM. - We have five labels for TSR task: - - Column - - Row - - Column header - - Projected row header - - Spanning cell - - Have a try on the following command to see the layout detection results. - ```bash - python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=tsr --output_dir=path_to_store_result - ``` - The inputs could be directory to images or PDF, or a image or PDF. - You can look into the folder 'path_to_store_result' where has both images and html pages which demonstrate the detection results as following: -
- -
- - -## 3. Parser - -Four kinds of document formats as PDF, DOCX, EXCEL and PPT have their corresponding parser. -The most complex one is PDF parser since PDF's flexibility. The output of PDF parser includes: - - Text chunks with their own positions in PDF(page number and rectangular positions). - - Tables with cropped image from the PDF, and contents which has already translated into natural language sentences. - - Figures with caption and text in the figures. - -### Résumé - -The résumé is a very complicated kind of document. A résumé which is composed of unstructured text -with various layouts could be resolved into structured data composed of nearly a hundred of fields. -We haven't opened the parser yet, as we open the processing method after parsing procedure. - +English | [简体中文](./README_zh.md) + +# *Deep*Doc + +- [1. Introduction](#1) +- [2. Vision](#2) +- [3. Parser](#3) + + +## 1. Introduction + +With a bunch of documents from various domains with various formats and along with diverse retrieval requirements, +an accurate analysis becomes a very challenge task. *Deep*Doc is born for that purpose. +There are 2 parts in *Deep*Doc so far: vision and parser. +You can run the flowing test programs if you're interested in our results of OCR, layout recognition and TSR. +```bash +python deepdoc/vision/t_ocr.py -h +usage: t_ocr.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR] + +options: + -h, --help show this help message and exit + --inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF + --output_dir OUTPUT_DIR + Directory where to store the output images. Default: './ocr_outputs' +``` +```bash +python deepdoc/vision/t_recognizer.py -h +usage: t_recognizer.py [-h] --inputs INPUTS [--output_dir OUTPUT_DIR] [--threshold THRESHOLD] [--mode {layout,tsr}] + +options: + -h, --help show this help message and exit + --inputs INPUTS Directory where to store images or PDFs, or a file path to a single image or PDF + --output_dir OUTPUT_DIR + Directory where to store the output images. Default: './layouts_outputs' + --threshold THRESHOLD + A threshold to filter out detections. Default: 0.5 + --mode {layout,tsr} Task mode: layout recognition or table structure recognition +``` + +Our models are served on HuggingFace. If you have trouble downloading HuggingFace models, this might help!! +```bash +export HF_ENDPOINT=https://hf-mirror.com +``` + + +## 2. Vision + +We use vision information to resolve problems as human being. + - OCR. Since a lot of documents presented as images or at least be able to transform to image, + OCR is a very essential and fundamental or even universal solution for text extraction. + ```bash + python deepdoc/vision/t_ocr.py --inputs=path_to_images_or_pdfs --output_dir=path_to_store_result + ``` + The inputs could be directory to images or PDF, or a image or PDF. + You can look into the folder 'path_to_store_result' where has images which demonstrate the positions of results, + txt files which contain the OCR text. +
+ +
+ + - Layout recognition. Documents from different domain may have various layouts, + like, newspaper, magazine, book and résumé are distinct in terms of layout. + Only when machine have an accurate layout analysis, it can decide if these text parts are successive or not, + or this part needs Table Structure Recognition(TSR) to process, or this part is a figure and described with this caption. + We have 10 basic layout components which covers most cases: + - Text + - Title + - Figure + - Figure caption + - Table + - Table caption + - Header + - Footer + - Reference + - Equation + + Have a try on the following command to see the layout detection results. + ```bash + python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=layout --output_dir=path_to_store_result + ``` + The inputs could be directory to images or PDF, or a image or PDF. + You can look into the folder 'path_to_store_result' where has images which demonstrate the detection results as following: +
+ +
+ + - Table Structure Recognition(TSR). Data table is a frequently used structure to present data including numbers or text. + And the structure of a table might be very complex, like hierarchy headers, spanning cells and projected row headers. + Along with TSR, we also reassemble the content into sentences which could be well comprehended by LLM. + We have five labels for TSR task: + - Column + - Row + - Column header + - Projected row header + - Spanning cell + + Have a try on the following command to see the layout detection results. + ```bash + python deepdoc/vision/t_recognizer.py --inputs=path_to_images_or_pdfs --threshold=0.2 --mode=tsr --output_dir=path_to_store_result + ``` + The inputs could be directory to images or PDF, or a image or PDF. + You can look into the folder 'path_to_store_result' where has both images and html pages which demonstrate the detection results as following: +
+ +
+ + +## 3. Parser + +Four kinds of document formats as PDF, DOCX, EXCEL and PPT have their corresponding parser. +The most complex one is PDF parser since PDF's flexibility. The output of PDF parser includes: + - Text chunks with their own positions in PDF(page number and rectangular positions). + - Tables with cropped image from the PDF, and contents which has already translated into natural language sentences. + - Figures with caption and text in the figures. + +### Résumé + +The résumé is a very complicated kind of document. A résumé which is composed of unstructured text +with various layouts could be resolved into structured data composed of nearly a hundred of fields. +We haven't opened the parser yet, as we open the processing method after parsing procedure. + \ No newline at end of file diff --git a/deepdoc/parser/ppt_parser.py b/deepdoc/parser/ppt_parser.py index ce2451057f40f76f5e9baa494433841d5d2f8925..b2a08b11f379355b01173ddb3c02542c684ae2fd 100644 --- a/deepdoc/parser/ppt_parser.py +++ b/deepdoc/parser/ppt_parser.py @@ -1,61 +1,61 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from io import BytesIO -from pptx import Presentation - - -class RAGFlowPptParser(object): - def __init__(self): - super().__init__() - - def __extract(self, shape): - if shape.shape_type == 19: - tb = shape.table - rows = [] - for i in range(1, len(tb.rows)): - rows.append("; ".join([tb.cell( - 0, j).text + ": " + tb.cell(i, j).text for j in range(len(tb.columns)) if tb.cell(i, j)])) - return "\n".join(rows) - - if shape.has_text_frame: - return shape.text_frame.text - - if shape.shape_type == 6: - texts = [] - for p in sorted(shape.shapes, key=lambda x: (x.top // 10, x.left)): - t = self.__extract(p) - if t: - texts.append(t) - return "\n".join(texts) - - def __call__(self, fnm, from_page, to_page, callback=None): - ppt = Presentation(fnm) if isinstance( - fnm, str) else Presentation( - BytesIO(fnm)) - txts = [] - self.total_page = len(ppt.slides) - for i, slide in enumerate(ppt.slides): - if i < from_page: - continue - if i >= to_page: - break - texts = [] - for shape in sorted( - slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)): - txt = self.__extract(shape) - if txt: - texts.append(txt) - txts.append("\n".join(texts)) - - return txts +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from io import BytesIO +from pptx import Presentation + + +class RAGFlowPptParser(object): + def __init__(self): + super().__init__() + + def __extract(self, shape): + if shape.shape_type == 19: + tb = shape.table + rows = [] + for i in range(1, len(tb.rows)): + rows.append("; ".join([tb.cell( + 0, j).text + ": " + tb.cell(i, j).text for j in range(len(tb.columns)) if tb.cell(i, j)])) + return "\n".join(rows) + + if shape.has_text_frame: + return shape.text_frame.text + + if shape.shape_type == 6: + texts = [] + for p in sorted(shape.shapes, key=lambda x: (x.top // 10, x.left)): + t = self.__extract(p) + if t: + texts.append(t) + return "\n".join(texts) + + def __call__(self, fnm, from_page, to_page, callback=None): + ppt = Presentation(fnm) if isinstance( + fnm, str) else Presentation( + BytesIO(fnm)) + txts = [] + self.total_page = len(ppt.slides) + for i, slide in enumerate(ppt.slides): + if i < from_page: + continue + if i >= to_page: + break + texts = [] + for shape in sorted( + slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)): + txt = self.__extract(shape) + if txt: + texts.append(txt) + txts.append("\n".join(texts)) + + return txts diff --git a/deepdoc/parser/resume/__init__.py b/deepdoc/parser/resume/__init__.py index 8fe338dae89d9372a123af81e178f42c470f6481..fab6f7e716eb0d5ac2aebda5690c6f704f85da74 100644 --- a/deepdoc/parser/resume/__init__.py +++ b/deepdoc/parser/resume/__init__.py @@ -1,65 +1,65 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import datetime - - -def refactor(cv): - for n in ["raw_txt", "parser_name", "inference", "ori_text", "use_time", "time_stat"]: - if n in cv and cv[n] is not None: del cv[n] - cv["is_deleted"] = 0 - if "basic" not in cv: cv["basic"] = {} - if cv["basic"].get("photo2"): del cv["basic"]["photo2"] - - for n in ["education", "work", "certificate", "project", "language", "skill", "training"]: - if n not in cv or cv[n] is None: continue - if type(cv[n]) == type({}): cv[n] = [v for _, v in cv[n].items()] - if type(cv[n]) != type([]): - del cv[n] - continue - vv = [] - for v in cv[n]: - if "external" in v and v["external"] is not None: del v["external"] - vv.append(v) - cv[n] = {str(i): vv[i] for i in range(len(vv))} - - basics = [ - ("basic_salary_month", "salary_month"), - ("expect_annual_salary_from", "expect_annual_salary"), - ] - for n, t in basics: - if cv["basic"].get(n): - cv["basic"][t] = cv["basic"][n] - del cv["basic"][n] - - work = sorted([v for _, v in cv.get("work", {}).items()], key=lambda x: x.get("start_time", "")) - edu = sorted([v for _, v in cv.get("education", {}).items()], key=lambda x: x.get("start_time", "")) - - if work: - cv["basic"]["work_start_time"] = work[0].get("start_time", "") - cv["basic"]["management_experience"] = 'Y' if any( - [w.get("management_experience", '') == 'Y' for w in work]) else 'N' - cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0") - - for n in ["annual_salary_from", "annual_salary_to", "industry_name", "position_name", "responsibilities", - "corporation_type", "scale", "corporation_name"]: - cv["basic"][n] = work[-1].get(n, "") - - if edu: - for n in ["school_name", "discipline_name"]: - if n in edu[-1]: cv["basic"][n] = edu[-1][n] - - cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - if "contact" not in cv: cv["contact"] = {} - if not cv["contact"].get("name"): cv["contact"]["name"] = cv["basic"].get("name", "") +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime + + +def refactor(cv): + for n in ["raw_txt", "parser_name", "inference", "ori_text", "use_time", "time_stat"]: + if n in cv and cv[n] is not None: del cv[n] + cv["is_deleted"] = 0 + if "basic" not in cv: cv["basic"] = {} + if cv["basic"].get("photo2"): del cv["basic"]["photo2"] + + for n in ["education", "work", "certificate", "project", "language", "skill", "training"]: + if n not in cv or cv[n] is None: continue + if type(cv[n]) == type({}): cv[n] = [v for _, v in cv[n].items()] + if type(cv[n]) != type([]): + del cv[n] + continue + vv = [] + for v in cv[n]: + if "external" in v and v["external"] is not None: del v["external"] + vv.append(v) + cv[n] = {str(i): vv[i] for i in range(len(vv))} + + basics = [ + ("basic_salary_month", "salary_month"), + ("expect_annual_salary_from", "expect_annual_salary"), + ] + for n, t in basics: + if cv["basic"].get(n): + cv["basic"][t] = cv["basic"][n] + del cv["basic"][n] + + work = sorted([v for _, v in cv.get("work", {}).items()], key=lambda x: x.get("start_time", "")) + edu = sorted([v for _, v in cv.get("education", {}).items()], key=lambda x: x.get("start_time", "")) + + if work: + cv["basic"]["work_start_time"] = work[0].get("start_time", "") + cv["basic"]["management_experience"] = 'Y' if any( + [w.get("management_experience", '') == 'Y' for w in work]) else 'N' + cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0") + + for n in ["annual_salary_from", "annual_salary_to", "industry_name", "position_name", "responsibilities", + "corporation_type", "scale", "corporation_name"]: + cv["basic"][n] = work[-1].get(n, "") + + if edu: + for n in ["school_name", "discipline_name"]: + if n in edu[-1]: cv["basic"][n] = edu[-1][n] + + cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + if "contact" not in cv: cv["contact"] = {} + if not cv["contact"].get("name"): cv["contact"]["name"] = cv["basic"].get("name", "") return cv \ No newline at end of file diff --git a/deepdoc/parser/resume/entities/res/school.rank.csv b/deepdoc/parser/resume/entities/res/school.rank.csv index 4dab9cb97ff3eaf94d5e59a3e435d635abd819a2..2207bf477ea1696dc6f1c973152c9a3b86b059f4 100644 --- a/deepdoc/parser/resume/entities/res/school.rank.csv +++ b/deepdoc/parser/resume/entities/res/school.rank.csv @@ -1,4 +1,4 @@ -清华大学,2,985,清华 +清华大学,2,985,清华 清华大学,2,985,Tsinghua University 清华大学,2,985,THU 北京大学,1,985,北大 diff --git a/deepdoc/parser/resume/step_one.py b/deepdoc/parser/resume/step_one.py index 34c474248a9674558c78132eca491e1a38da083e..90e52e45078f4a51c796a0a1a6146bb233fde6fd 100644 --- a/deepdoc/parser/resume/step_one.py +++ b/deepdoc/parser/resume/step_one.py @@ -1,186 +1,186 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -from deepdoc.parser.resume.entities import degrees, regions, industries - -FIELDS = [ -"address STRING", -"annual_salary int", -"annual_salary_from int", -"annual_salary_to int", -"birth STRING", -"card STRING", -"certificate_obj string", -"city STRING", -"corporation_id int", -"corporation_name STRING", -"corporation_type STRING", -"degree STRING", -"discipline_name STRING", -"education_obj string", -"email STRING", -"expect_annual_salary int", -"expect_city_names string", -"expect_industry_name STRING", -"expect_position_name STRING", -"expect_salary_from int", -"expect_salary_to int", -"expect_type STRING", -"gender STRING", -"industry_name STRING", -"industry_names STRING", -"is_deleted STRING", -"is_fertility STRING", -"is_house STRING", -"is_management_experience STRING", -"is_marital STRING", -"is_oversea STRING", -"language_obj string", -"name STRING", -"nation STRING", -"phone STRING", -"political_status STRING", -"position_name STRING", -"project_obj string", -"responsibilities string", -"salary_month int", -"scale STRING", -"school_name STRING", -"self_remark string", -"skill_obj string", -"title_name STRING", -"tob_resume_id STRING", -"updated_at Timestamp", -"wechat STRING", -"work_obj string", -"work_experience int", -"work_start_time BIGINT" -] - -def refactor(df): - def deal_obj(obj, k, kk): - if not isinstance(obj, type({})): - return "" - obj = obj.get(k, {}) - if not isinstance(obj, type({})): - return "" - return obj.get(kk, "") - - def loadjson(line): - try: - return json.loads(line) - except Exception as e: - pass - return {} - - df["obj"] = df["resume_content"].map(lambda x: loadjson(x)) - df.fillna("", inplace=True) - - clms = ["tob_resume_id", "updated_at"] - - def extract(nms, cc=None): - nonlocal clms - clms.extend(nms) - for c in nms: - if cc: - df[c] = df["obj"].map(lambda x: deal_obj(x, cc, c)) - else: - df[c] = df["obj"].map( - lambda x: json.dumps( - x.get( - c, - {}), - ensure_ascii=False) if isinstance( - x, - type( - {})) and ( - isinstance( - x.get(c), - type( - {})) or not x.get(c)) else str(x).replace( - "None", - "")) - - extract(["education", "work", "certificate", "project", "language", - "skill"]) - extract(["wechat", "phone", "is_deleted", - "name", "tel", "email"], "contact") - extract(["nation", "expect_industry_name", "salary_month", - "industry_ids", "is_house", "birth", "annual_salary_from", - "annual_salary_to", "card", - "expect_salary_to", "expect_salary_from", - "expect_position_name", "gender", "city", - "is_fertility", "expect_city_names", - "political_status", "title_name", "expect_annual_salary", - "industry_name", "address", "position_name", "school_name", - "corporation_id", - "is_oversea", "responsibilities", - "work_start_time", "degree", "management_experience", - "expect_type", "corporation_type", "scale", "corporation_name", - "self_remark", "annual_salary", "work_experience", - "discipline_name", "marital", "updated_at"], "basic") - - df["degree"] = df["degree"].map(lambda x: degrees.get_name(x)) - df["address"] = df["address"].map(lambda x: " ".join(regions.get_names(x))) - df["industry_names"] = df["industry_ids"].map(lambda x: " ".join([" ".join(industries.get_names(i)) for i in - str(x).split(",")])) - clms.append("industry_names") - - def arr2str(a): - if not a: - return "" - if isinstance(a, list): - a = " ".join([str(i) for i in a]) - return str(a).replace(",", " ") - - df["expect_industry_name"] = df["expect_industry_name"].map( - lambda x: arr2str(x)) - df["gender"] = df["gender"].map( - lambda x: "男" if x == 'M' else ( - "女" if x == 'F' else "")) - for c in ["is_fertility", "is_oversea", "is_house", - "management_experience", "marital"]: - df[c] = df[c].map( - lambda x: '是' if x == 'Y' else ( - '否' if x == 'N' else "")) - df["is_management_experience"] = df["management_experience"] - df["is_marital"] = df["marital"] - clms.extend(["is_management_experience", "is_marital"]) - - df.fillna("", inplace=True) - for i in range(len(df)): - if not df.loc[i, "phone"].strip() and df.loc[i, "tel"].strip(): - df.loc[i, "phone"] = df.loc[i, "tel"].strip() - - for n in ["industry_ids", "management_experience", "marital", "tel"]: - for i in range(len(clms)): - if clms[i] == n: - del clms[i] - break - - clms = list(set(clms)) - - df = df.reindex(sorted(clms), axis=1) - #print(json.dumps(list(df.columns.values)), "LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL") - for c in clms: - df[c] = df[c].map( - lambda s: str(s).replace( - "\t", - " ").replace( - "\n", - "\\n").replace( - "\r", - "\\n")) - # print(df.values.tolist()) - return dict(zip([n.split(" ")[0] for n in FIELDS], df.values.tolist()[0])) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import json +from deepdoc.parser.resume.entities import degrees, regions, industries + +FIELDS = [ +"address STRING", +"annual_salary int", +"annual_salary_from int", +"annual_salary_to int", +"birth STRING", +"card STRING", +"certificate_obj string", +"city STRING", +"corporation_id int", +"corporation_name STRING", +"corporation_type STRING", +"degree STRING", +"discipline_name STRING", +"education_obj string", +"email STRING", +"expect_annual_salary int", +"expect_city_names string", +"expect_industry_name STRING", +"expect_position_name STRING", +"expect_salary_from int", +"expect_salary_to int", +"expect_type STRING", +"gender STRING", +"industry_name STRING", +"industry_names STRING", +"is_deleted STRING", +"is_fertility STRING", +"is_house STRING", +"is_management_experience STRING", +"is_marital STRING", +"is_oversea STRING", +"language_obj string", +"name STRING", +"nation STRING", +"phone STRING", +"political_status STRING", +"position_name STRING", +"project_obj string", +"responsibilities string", +"salary_month int", +"scale STRING", +"school_name STRING", +"self_remark string", +"skill_obj string", +"title_name STRING", +"tob_resume_id STRING", +"updated_at Timestamp", +"wechat STRING", +"work_obj string", +"work_experience int", +"work_start_time BIGINT" +] + +def refactor(df): + def deal_obj(obj, k, kk): + if not isinstance(obj, type({})): + return "" + obj = obj.get(k, {}) + if not isinstance(obj, type({})): + return "" + return obj.get(kk, "") + + def loadjson(line): + try: + return json.loads(line) + except Exception as e: + pass + return {} + + df["obj"] = df["resume_content"].map(lambda x: loadjson(x)) + df.fillna("", inplace=True) + + clms = ["tob_resume_id", "updated_at"] + + def extract(nms, cc=None): + nonlocal clms + clms.extend(nms) + for c in nms: + if cc: + df[c] = df["obj"].map(lambda x: deal_obj(x, cc, c)) + else: + df[c] = df["obj"].map( + lambda x: json.dumps( + x.get( + c, + {}), + ensure_ascii=False) if isinstance( + x, + type( + {})) and ( + isinstance( + x.get(c), + type( + {})) or not x.get(c)) else str(x).replace( + "None", + "")) + + extract(["education", "work", "certificate", "project", "language", + "skill"]) + extract(["wechat", "phone", "is_deleted", + "name", "tel", "email"], "contact") + extract(["nation", "expect_industry_name", "salary_month", + "industry_ids", "is_house", "birth", "annual_salary_from", + "annual_salary_to", "card", + "expect_salary_to", "expect_salary_from", + "expect_position_name", "gender", "city", + "is_fertility", "expect_city_names", + "political_status", "title_name", "expect_annual_salary", + "industry_name", "address", "position_name", "school_name", + "corporation_id", + "is_oversea", "responsibilities", + "work_start_time", "degree", "management_experience", + "expect_type", "corporation_type", "scale", "corporation_name", + "self_remark", "annual_salary", "work_experience", + "discipline_name", "marital", "updated_at"], "basic") + + df["degree"] = df["degree"].map(lambda x: degrees.get_name(x)) + df["address"] = df["address"].map(lambda x: " ".join(regions.get_names(x))) + df["industry_names"] = df["industry_ids"].map(lambda x: " ".join([" ".join(industries.get_names(i)) for i in + str(x).split(",")])) + clms.append("industry_names") + + def arr2str(a): + if not a: + return "" + if isinstance(a, list): + a = " ".join([str(i) for i in a]) + return str(a).replace(",", " ") + + df["expect_industry_name"] = df["expect_industry_name"].map( + lambda x: arr2str(x)) + df["gender"] = df["gender"].map( + lambda x: "男" if x == 'M' else ( + "女" if x == 'F' else "")) + for c in ["is_fertility", "is_oversea", "is_house", + "management_experience", "marital"]: + df[c] = df[c].map( + lambda x: '是' if x == 'Y' else ( + '否' if x == 'N' else "")) + df["is_management_experience"] = df["management_experience"] + df["is_marital"] = df["marital"] + clms.extend(["is_management_experience", "is_marital"]) + + df.fillna("", inplace=True) + for i in range(len(df)): + if not df.loc[i, "phone"].strip() and df.loc[i, "tel"].strip(): + df.loc[i, "phone"] = df.loc[i, "tel"].strip() + + for n in ["industry_ids", "management_experience", "marital", "tel"]: + for i in range(len(clms)): + if clms[i] == n: + del clms[i] + break + + clms = list(set(clms)) + + df = df.reindex(sorted(clms), axis=1) + #print(json.dumps(list(df.columns.values)), "LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL") + for c in clms: + df[c] = df[c].map( + lambda s: str(s).replace( + "\t", + " ").replace( + "\n", + "\\n").replace( + "\r", + "\\n")) + # print(df.values.tolist()) + return dict(zip([n.split(" ")[0] for n in FIELDS], df.values.tolist()[0])) diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py index 00282b7d6c929153245cae216ced8c9844c3e85a..4f8b79d740135f922b84a5b230d555d2306e836f 100644 --- a/deepdoc/parser/resume/step_two.py +++ b/deepdoc/parser/resume/step_two.py @@ -1,592 +1,592 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import re, copy, time, datetime, demjson3, \ - traceback, signal -import numpy as np -from deepdoc.parser.resume.entities import degrees, schools, corporations -from rag.nlp import rag_tokenizer, surname -from xpinyin import Pinyin -from contextlib import contextmanager - - -class TimeoutException(Exception): pass - - -@contextmanager -def time_limit(seconds): - def signal_handler(signum, frame): - raise TimeoutException("Timed out!") - - signal.signal(signal.SIGALRM, signal_handler) - signal.alarm(seconds) - try: - yield - finally: - signal.alarm(0) - - -ENV = None -PY = Pinyin() - - -def rmHtmlTag(line): - return re.sub(r"<[a-z0-9.\"=';,:\+_/ -]+>", " ", line, 100000, re.IGNORECASE) - - -def highest_degree(dg): - if not dg: return "" - if type(dg) == type(""): dg = [dg] - m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8} - return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0] - - -def forEdu(cv): - if not cv.get("education_obj"): - cv["integerity_flt"] *= 0.8 - return cv - - first_fea, fea, maj, fmaj, deg, fdeg, sch, fsch, st_dt, ed_dt = [], [], [], [], [], [], [], [], [], [] - edu_nst = [] - edu_end_dt = "" - cv["school_rank_int"] = 1000000 - for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))): - e = {} - if n.get("end_time"): - if n["end_time"] > edu_end_dt: edu_end_dt = n["end_time"] - try: - dt = n["end_time"] - if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt) - y, m, d = getYMD(dt) - ed_dt.append(str(y)) - e["end_dt_kwd"] = str(y) - except Exception as e: - pass - if n.get("start_time"): - try: - dt = n["start_time"] - if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt) - y, m, d = getYMD(dt) - st_dt.append(str(y)) - e["start_dt_kwd"] = str(y) - except Exception as e: - pass - - r = schools.select(n.get("school_name", "")) - if r: - if str(r.get("type", "")) == "1": fea.append("211") - if str(r.get("type", "")) == "2": fea.append("211") - if str(r.get("is_abroad", "")) == "1": fea.append("留学") - if str(r.get("is_double_first", "")) == "1": fea.append("双一流") - if str(r.get("is_985", "")) == "1": fea.append("985") - if str(r.get("is_world_known", "")) == "1": fea.append("海外知名") - if r.get("rank") and cv["school_rank_int"] > r["rank"]: cv["school_rank_int"] = r["rank"] - - if n.get("school_name") and isinstance(n["school_name"], str): - sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"])) - e["sch_nm_kwd"] = sch[-1] - fea.append(rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(n.get("school_name", ""))).split(" ")[-1]) - - if n.get("discipline_name") and isinstance(n["discipline_name"], str): - maj.append(n["discipline_name"]) - e["major_kwd"] = n["discipline_name"] - - if not n.get("degree") and "985" in fea and not first_fea: n["degree"] = "1" - - if n.get("degree"): - d = degrees.get_name(n["degree"]) - if d: e["degree_kwd"] = d - if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", - n.get( - "school_name", - ""))): d = "专升本" - if d: deg.append(d) - - # for first degree - if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]: - fdeg = [d] - if n.get("school_name"): fsch = [n["school_name"]] - if n.get("discipline_name"): fmaj = [n["discipline_name"]] - first_fea = copy.deepcopy(fea) - - edu_nst.append(e) - - cv["sch_rank_kwd"] = [] - if cv["school_rank_int"] <= 20 \ - or ("海外名校" in fea and cv["school_rank_int"] <= 200): - cv["sch_rank_kwd"].append("顶尖学校") - elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \ - or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \ - cv["school_rank_int"] > 200): - cv["sch_rank_kwd"].append("精英学校") - elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \ - or ("海外名校" in fea and cv["school_rank_int"] > 500): - cv["sch_rank_kwd"].append("优质学校") - else: - cv["sch_rank_kwd"].append("一般学校") - - if edu_nst: cv["edu_nst"] = edu_nst - if fea: cv["edu_fea_kwd"] = list(set(fea)) - if first_fea: cv["edu_first_fea_kwd"] = list(set(first_fea)) - if maj: cv["major_kwd"] = maj - if fsch: cv["first_school_name_kwd"] = fsch - if fdeg: cv["first_degree_kwd"] = fdeg - if fmaj: cv["first_major_kwd"] = fmaj - if st_dt: cv["edu_start_kwd"] = st_dt - if ed_dt: cv["edu_end_kwd"] = ed_dt - if ed_dt: cv["edu_end_int"] = max([int(t) for t in ed_dt]) - if deg: - if "本科" in deg and "专科" in deg: - deg.append("专升本") - deg = [d for d in deg if d != '本科'] - cv["degree_kwd"] = deg - cv["highest_degree_kwd"] = highest_degree(deg) - if edu_end_dt: - try: - if re.match(r"[0-9]{9,}", edu_end_dt): edu_end_dt = turnTm2Dt(edu_end_dt) - if edu_end_dt.strip("\n") == "至今": edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today())) - y, m, d = getYMD(edu_end_dt) - cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000)) - except Exception as e: - print("EXCEPTION: ", e, edu_end_dt, cv.get("work_exp_flt")) - if sch: - cv["school_name_kwd"] = sch - if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \ - or all([c.lower() in ["硕士", "博士", "mba", "博士后"] for c in cv.get("degree_kwd", [])]) \ - or not cv.get("degree_kwd"): - for c in sch: - if schools.is_good(c): - if "tag_kwd" not in cv: cv["tag_kwd"] = [] - cv["tag_kwd"].append("好学校") - cv["tag_kwd"].append("好学历") - break - if (len(cv.get("degree_kwd", [])) >= 1 and \ - "本科" in cv["degree_kwd"] and \ - any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \ - or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \ - or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]): - if "tag_kwd" not in cv: cv["tag_kwd"] = [] - if "好学历" not in cv["tag_kwd"]: cv["tag_kwd"].append("好学历") - - if cv.get("major_kwd"): cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj)) - if cv.get("school_name_kwd"): cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch)) - if cv.get("first_school_name_kwd"): cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch)) - if cv.get("first_major_kwd"): cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj)) - - return cv - - -def forProj(cv): - if not cv.get("project_obj"): return cv - - pro_nms, desc = [], [] - for i, n in enumerate( - sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if type(x) == type({}) else "", - reverse=True)): - if n.get("name"): pro_nms.append(n["name"]) - if n.get("describe"): desc.append(str(n["describe"])) - if n.get("responsibilities"): desc.append(str(n["responsibilities"])) - if n.get("achivement"): desc.append(str(n["achivement"])) - - if pro_nms: - # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms)) - cv["project_name_tks"] = rag_tokenizer.tokenize(pro_nms[0]) - if desc: - cv["pro_desc_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(" ".join(desc))) - cv["project_desc_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(desc[0])) - - return cv - - -def json_loads(line): - return demjson3.decode(re.sub(r": *(True|False)", r": '\1'", line)) - - -def forWork(cv): - if not cv.get("work_obj"): - cv["integerity_flt"] *= 0.7 - return cv - - flds = ["position_name", "corporation_name", "corporation_id", "responsibilities", - "industry_name", "subordinates_count"] - duas = [] - scales = [] - fea = {c: [] for c in flds} - latest_job_tm = "" - goodcorp = False - goodcorp_ = False - work_st_tm = "" - corp_tags = [] - for i, n in enumerate( - sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if type(x) == type({}) else "", - reverse=True)): - if type(n) == type(""): - try: - n = json_loads(n) - except Exception as e: - continue - - if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"] - for c in flds: - if not n.get(c) or str(n[c]) == '0': - fea[c].append("") - continue - if c == "corporation_name": - n[c] = corporations.corpNorm(n[c], False) - if corporations.is_good(n[c]): - if i == 0: - goodcorp = True - else: - goodcorp_ = True - ct = corporations.corp_tag(n[c]) - if i == 0: - corp_tags.extend(ct) - elif ct and ct[0] != "软外": - corp_tags.extend([f"{t}(曾)" for t in ct]) - - fea[c].append(rmHtmlTag(str(n[c]).lower())) - - y, m, d = getYMD(n.get("start_time")) - if not y or not m: continue - st = "%s-%02d-%02d" % (y, int(m), int(d)) - latest_job_tm = st - - y, m, d = getYMD(n.get("end_time")) - if (not y or not m) and i > 0: continue - if not y or not m or int(y) > 2022: y, m, d = getYMD(str(n.get("updated_at", ""))) - if not y or not m: continue - ed = "%s-%02d-%02d" % (y, int(m), int(d)) - - try: - duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days) - except Exception as e: - print("kkkkkkkkkkkkkkkkkkkk", n.get("start_time"), n.get("end_time")) - - if n.get("scale"): - r = re.search(r"^([0-9]+)", str(n["scale"])) - if r: scales.append(int(r.group(1))) - - if goodcorp: - if "tag_kwd" not in cv: cv["tag_kwd"] = [] - cv["tag_kwd"].append("好公司") - if goodcorp_: - if "tag_kwd" not in cv: cv["tag_kwd"] = [] - cv["tag_kwd"].append("好公司(曾)") - - if corp_tags: - if "tag_kwd" not in cv: cv["tag_kwd"] = [] - cv["tag_kwd"].extend(corp_tags) - cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)] - - if latest_job_tm: cv["latest_job_dt"] = latest_job_tm - if fea["corporation_id"]: cv["corporation_id"] = fea["corporation_id"] - - if fea["position_name"]: - cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0]) - cv["position_name_sm_tks"] = rag_tokenizer.fine_grained_tokenize(cv["position_name_tks"]) - cv["pos_nm_tks"] = rag_tokenizer.tokenize(" ".join(fea["position_name"][1:])) - - if fea["industry_name"]: - cv["industry_name_tks"] = rag_tokenizer.tokenize(fea["industry_name"][0]) - cv["industry_name_sm_tks"] = rag_tokenizer.fine_grained_tokenize(cv["industry_name_tks"]) - cv["indu_nm_tks"] = rag_tokenizer.tokenize(" ".join(fea["industry_name"][1:])) - - if fea["corporation_name"]: - cv["corporation_name_kwd"] = fea["corporation_name"][0] - cv["corp_nm_kwd"] = fea["corporation_name"] - cv["corporation_name_tks"] = rag_tokenizer.tokenize(fea["corporation_name"][0]) - cv["corporation_name_sm_tks"] = rag_tokenizer.fine_grained_tokenize(cv["corporation_name_tks"]) - cv["corp_nm_tks"] = rag_tokenizer.tokenize(" ".join(fea["corporation_name"][1:])) - - if fea["responsibilities"]: - cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0]) - cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:])) - - if fea["subordinates_count"]: fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if - re.match(r"[^0-9]+$", str(i))] - if fea["subordinates_count"]: cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"]) - - if type(cv.get("corporation_id")) == type(1): cv["corporation_id"] = [str(cv["corporation_id"])] - if not cv.get("corporation_id"): cv["corporation_id"] = [] - for i in cv.get("corporation_id", []): - cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0) - - if work_st_tm: - try: - if re.match(r"[0-9]{9,}", work_st_tm): work_st_tm = turnTm2Dt(work_st_tm) - y, m, d = getYMD(work_st_tm) - cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000)) - except Exception as e: - print("EXCEPTION: ", e, work_st_tm, cv.get("work_exp_flt")) - - cv["job_num_int"] = 0 - if duas: - cv["dua_flt"] = np.mean(duas) - cv["cur_dua_int"] = duas[0] - cv["job_num_int"] = len(duas) - if scales: cv["scale_flt"] = np.max(scales) - return cv - - -def turnTm2Dt(b): - if not b: return - b = str(b).strip() - if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10]))) - return b - - -def getYMD(b): - y, m, d = "", "", "01" - if not b: return (y, m, d) - b = turnTm2Dt(b) - if re.match(r"[0-9]{4}", b): y = int(b[:4]) - r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b) - if r: m = r.group(1) - r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b) - if r: d = r.group(1) - if not d or int(d) == 0 or int(d) > 31: d = "1" - if not m or int(m) > 12 or int(m) < 1: m = "1" - return (y, m, d) - - -def birth(cv): - if not cv.get("birth"): - cv["integerity_flt"] *= 0.9 - return cv - y, m, d = getYMD(cv["birth"]) - if not m or not y: return cv - b = "%s-%02d-%02d" % (y, int(m), int(d)) - cv["birth_dt"] = b - cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d)) - - cv["age_int"] = datetime.datetime.now().year - int(y) - return cv - - -def parse(cv): - for k in cv.keys(): - if cv[k] == '\\N': cv[k] = '' - # cv = cv.asDict() - tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names", - "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name", - "position_name", "school_name", "self_remark", "title_name"] - small_tks_fld = ["corporation_name", "expect_position_name", "position_name", "school_name", "title_name"] - kwd_fld = ["address", "city", "corporation_type", "degree", "discipline_name", "expect_city_names", "email", - "expect_industry_name", "expect_position_name", "expect_type", "gender", "industry_name", - "industry_names", "political_status", "position_name", "scale", "school_name", "phone", "tel"] - num_fld = ["annual_salary", "annual_salary_from", "annual_salary_to", "expect_annual_salary", "expect_salary_from", - "expect_salary_to", "salary_month"] - - is_fld = [ - ("is_fertility", "已育", "未育"), - ("is_house", "有房", "没房"), - ("is_management_experience", "有管理经验", "无管理经验"), - ("is_marital", "已婚", "未婚"), - ("is_oversea", "有海外经验", "无海外经验") - ] - - rmkeys = [] - for k in cv.keys(): - if cv[k] is None: rmkeys.append(k) - if (type(cv[k]) == type([]) or type(cv[k]) == type("")) and len(cv[k]) == 0: rmkeys.append(k) - for k in rmkeys: del cv[k] - - integerity = 0. - flds_num = 0. - - def hasValues(flds): - nonlocal integerity, flds_num - flds_num += len(flds) - for f in flds: - v = str(cv.get(f, "")) - if len(v) > 0 and v != '0' and v != '[]': integerity += 1 - - hasValues(tks_fld) - hasValues(small_tks_fld) - hasValues(kwd_fld) - hasValues(num_fld) - cv["integerity_flt"] = integerity / flds_num - - if cv.get("corporation_type"): - for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""), - (r"[//.· <\((]+.*", ""), - (r".*(合资|民企|股份制|中外|私营|个体|Private|创业|Owned|投资).*", "民营"), - (r".*(机关|事业).*", "机关"), - (r".*(非盈利|Non-profit).*", "非盈利"), - (r".*(外企|外商|欧美|foreign|Institution|Australia|港资).*", "外企"), - (r".*国有.*", "国企"), - (r"[ ()\(\)人/·0-9-]+", ""), - (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]: - cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE) - if len(cv["corporation_type"]) < 2: del cv["corporation_type"] - - if cv.get("political_status"): - for p, r in [ - (r".*党员.*", "党员"), - (r".*(无党派|公民).*", "群众"), - (r".*团员.*", "团员")]: - cv["political_status"] = re.sub(p, r, cv["political_status"]) - if not re.search(r"[党团群]", cv["political_status"]): del cv["political_status"] - - if cv.get("phone"): cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"])) - - keys = list(cv.keys()) - for k in keys: - # deal with json objects - if k.find("_obj") > 0: - try: - cv[k] = json_loads(cv[k]) - cv[k] = [a for _, a in cv[k].items()] - nms = [] - for n in cv[k]: - if type(n) != type({}) or "name" not in n or not n.get("name"): continue - n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower() - if not n["name"]: continue - nms.append(n["name"]) - if nms: - t = k[:-4] - cv[f"{t}_kwd"] = nms - cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms)) - except Exception as e: - print("【EXCEPTION】:", str(traceback.format_exc()), cv[k]) - cv[k] = [] - - # tokenize fields - if k in tks_fld: - cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k]) - if k in small_tks_fld: cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"]) - - # keyword fields - if k in kwd_fld: cv[f"{k}_kwd"] = [n.lower() - for n in re.split(r"[\t,,;;. ]", - re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k]) - ) if n] - - if k in num_fld and cv.get(k): cv[f"{k}_int"] = cv[k] - - cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "") - # for name field - if cv.get("name"): - nm = re.sub(r"[\n——\-\((\+].*", "", cv["name"].strip()) - nm = re.sub(r"[ \t ]+", " ", nm) - if re.match(r"[a-zA-Z ]+$", nm): - if len(nm.split(" ")) > 1: - cv["name"] = nm - else: - nm = "" - elif nm and (surname.isit(nm[0]) or surname.isit(nm[:2])): - nm = re.sub(r"[a-zA-Z]+.*", "", nm[:5]) - else: - nm = "" - cv["name"] = nm.strip() - name = cv["name"] - - # name pingyin and its prefix - cv["name_py_tks"] = " ".join(PY.get_pinyins(nm[:20], '')) + " " + " ".join(PY.get_pinyins(nm[:20], ' ')) - cv["name_py_pref0_tks"] = "" - cv["name_py_pref_tks"] = "" - for py in PY.get_pinyins(nm[:20], ''): - for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i] - for py in PY.get_pinyins(nm[:20], ' '): - py = py.split(" ") - for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i]) - - cv["name_kwd"] = name - cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3] - cv["name_tks"] = ( - rag_tokenizer.tokenize(name) + " " + (" ".join(list(name)) if not re.match(r"[a-zA-Z ]+$", name) else "") - ) if name else "" - else: - cv["integerity_flt"] /= 2. - - if cv.get("phone"): - r = re.search(r"(1[3456789][0-9]{9})", cv["phone"]) - if not r: - cv["phone"] = "" - else: - cv["phone"] = r.group(1) - - # deal with date fields - if cv.get("updated_at") and isinstance(cv["updated_at"], datetime.datetime): - cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S') - else: - y, m, d = getYMD(str(cv.get("updated_at", ""))) - if not y: y = "2012" - if not m: m = "01" - if not d: d = "01" - cv["updated_at_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d)) - # long text tokenize - - if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"])) - - # for yes or no field - fea = [] - for f, y, n in is_fld: - if f not in cv: continue - if cv[f] == '是': fea.append(y) - if cv[f] == '否': fea.append(n) - - if fea: cv["tag_kwd"] = fea - - cv = forEdu(cv) - cv = forProj(cv) - cv = forWork(cv) - cv = birth(cv) - - cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])] - for i in range(len(cv["corp_proj_sch_deg_kwd"])): - for j in cv.get("sch_rank_kwd", []): cv["corp_proj_sch_deg_kwd"][i] += "+" + j - for i in range(len(cv["corp_proj_sch_deg_kwd"])): - if cv.get("highest_degree_kwd"): cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"] - - try: - if not cv.get("work_exp_flt") and cv.get("work_start_time"): - if re.match(r"[0-9]{9,}", str(cv["work_start_time"])): - cv["work_start_dt"] = turnTm2Dt(cv["work_start_time"]) - cv["work_exp_flt"] = (time.time() - int(int(cv["work_start_time"]) / 1000)) / 3600. / 24. / 365. - elif re.match(r"[0-9]{4}[^0-9]", str(cv["work_start_time"])): - y, m, d = getYMD(str(cv["work_start_time"])) - cv["work_start_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d)) - cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y) - except Exception as e: - print("【EXCEPTION】", e, "==>", cv.get("work_start_time")) - if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12. - - keys = list(cv.keys()) - for k in keys: - if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): del cv[k] - for k in cv.keys(): - if not re.search("_(kwd|id)$", k) or type(cv[k]) != type([]): continue - cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']])) - keys = [k for k in cv.keys() if re.search(r"_feas*$", k)] - for k in keys: - if cv[k] <= 0: del cv[k] - - cv["tob_resume_id"] = str(cv["tob_resume_id"]) - cv["id"] = cv["tob_resume_id"] - print("CCCCCCCCCCCCCCC") - - return dealWithInt64(cv) - - -def dealWithInt64(d): - if isinstance(d, dict): - for n, v in d.items(): - d[n] = dealWithInt64(v) - - if isinstance(d, list): - d = [dealWithInt64(t) for t in d] - - if isinstance(d, np.integer): d = int(d) - return d - +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re, copy, time, datetime, demjson3, \ + traceback, signal +import numpy as np +from deepdoc.parser.resume.entities import degrees, schools, corporations +from rag.nlp import rag_tokenizer, surname +from xpinyin import Pinyin +from contextlib import contextmanager + + +class TimeoutException(Exception): pass + + +@contextmanager +def time_limit(seconds): + def signal_handler(signum, frame): + raise TimeoutException("Timed out!") + + signal.signal(signal.SIGALRM, signal_handler) + signal.alarm(seconds) + try: + yield + finally: + signal.alarm(0) + + +ENV = None +PY = Pinyin() + + +def rmHtmlTag(line): + return re.sub(r"<[a-z0-9.\"=';,:\+_/ -]+>", " ", line, 100000, re.IGNORECASE) + + +def highest_degree(dg): + if not dg: return "" + if type(dg) == type(""): dg = [dg] + m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8} + return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0] + + +def forEdu(cv): + if not cv.get("education_obj"): + cv["integerity_flt"] *= 0.8 + return cv + + first_fea, fea, maj, fmaj, deg, fdeg, sch, fsch, st_dt, ed_dt = [], [], [], [], [], [], [], [], [], [] + edu_nst = [] + edu_end_dt = "" + cv["school_rank_int"] = 1000000 + for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))): + e = {} + if n.get("end_time"): + if n["end_time"] > edu_end_dt: edu_end_dt = n["end_time"] + try: + dt = n["end_time"] + if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt) + y, m, d = getYMD(dt) + ed_dt.append(str(y)) + e["end_dt_kwd"] = str(y) + except Exception as e: + pass + if n.get("start_time"): + try: + dt = n["start_time"] + if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt) + y, m, d = getYMD(dt) + st_dt.append(str(y)) + e["start_dt_kwd"] = str(y) + except Exception as e: + pass + + r = schools.select(n.get("school_name", "")) + if r: + if str(r.get("type", "")) == "1": fea.append("211") + if str(r.get("type", "")) == "2": fea.append("211") + if str(r.get("is_abroad", "")) == "1": fea.append("留学") + if str(r.get("is_double_first", "")) == "1": fea.append("双一流") + if str(r.get("is_985", "")) == "1": fea.append("985") + if str(r.get("is_world_known", "")) == "1": fea.append("海外知名") + if r.get("rank") and cv["school_rank_int"] > r["rank"]: cv["school_rank_int"] = r["rank"] + + if n.get("school_name") and isinstance(n["school_name"], str): + sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"])) + e["sch_nm_kwd"] = sch[-1] + fea.append(rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(n.get("school_name", ""))).split(" ")[-1]) + + if n.get("discipline_name") and isinstance(n["discipline_name"], str): + maj.append(n["discipline_name"]) + e["major_kwd"] = n["discipline_name"] + + if not n.get("degree") and "985" in fea and not first_fea: n["degree"] = "1" + + if n.get("degree"): + d = degrees.get_name(n["degree"]) + if d: e["degree_kwd"] = d + if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", + n.get( + "school_name", + ""))): d = "专升本" + if d: deg.append(d) + + # for first degree + if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]: + fdeg = [d] + if n.get("school_name"): fsch = [n["school_name"]] + if n.get("discipline_name"): fmaj = [n["discipline_name"]] + first_fea = copy.deepcopy(fea) + + edu_nst.append(e) + + cv["sch_rank_kwd"] = [] + if cv["school_rank_int"] <= 20 \ + or ("海外名校" in fea and cv["school_rank_int"] <= 200): + cv["sch_rank_kwd"].append("顶尖学校") + elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \ + or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \ + cv["school_rank_int"] > 200): + cv["sch_rank_kwd"].append("精英学校") + elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \ + or ("海外名校" in fea and cv["school_rank_int"] > 500): + cv["sch_rank_kwd"].append("优质学校") + else: + cv["sch_rank_kwd"].append("一般学校") + + if edu_nst: cv["edu_nst"] = edu_nst + if fea: cv["edu_fea_kwd"] = list(set(fea)) + if first_fea: cv["edu_first_fea_kwd"] = list(set(first_fea)) + if maj: cv["major_kwd"] = maj + if fsch: cv["first_school_name_kwd"] = fsch + if fdeg: cv["first_degree_kwd"] = fdeg + if fmaj: cv["first_major_kwd"] = fmaj + if st_dt: cv["edu_start_kwd"] = st_dt + if ed_dt: cv["edu_end_kwd"] = ed_dt + if ed_dt: cv["edu_end_int"] = max([int(t) for t in ed_dt]) + if deg: + if "本科" in deg and "专科" in deg: + deg.append("专升本") + deg = [d for d in deg if d != '本科'] + cv["degree_kwd"] = deg + cv["highest_degree_kwd"] = highest_degree(deg) + if edu_end_dt: + try: + if re.match(r"[0-9]{9,}", edu_end_dt): edu_end_dt = turnTm2Dt(edu_end_dt) + if edu_end_dt.strip("\n") == "至今": edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today())) + y, m, d = getYMD(edu_end_dt) + cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000)) + except Exception as e: + print("EXCEPTION: ", e, edu_end_dt, cv.get("work_exp_flt")) + if sch: + cv["school_name_kwd"] = sch + if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \ + or all([c.lower() in ["硕士", "博士", "mba", "博士后"] for c in cv.get("degree_kwd", [])]) \ + or not cv.get("degree_kwd"): + for c in sch: + if schools.is_good(c): + if "tag_kwd" not in cv: cv["tag_kwd"] = [] + cv["tag_kwd"].append("好学校") + cv["tag_kwd"].append("好学历") + break + if (len(cv.get("degree_kwd", [])) >= 1 and \ + "本科" in cv["degree_kwd"] and \ + any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \ + or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \ + or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]): + if "tag_kwd" not in cv: cv["tag_kwd"] = [] + if "好学历" not in cv["tag_kwd"]: cv["tag_kwd"].append("好学历") + + if cv.get("major_kwd"): cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj)) + if cv.get("school_name_kwd"): cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch)) + if cv.get("first_school_name_kwd"): cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch)) + if cv.get("first_major_kwd"): cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj)) + + return cv + + +def forProj(cv): + if not cv.get("project_obj"): return cv + + pro_nms, desc = [], [] + for i, n in enumerate( + sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if type(x) == type({}) else "", + reverse=True)): + if n.get("name"): pro_nms.append(n["name"]) + if n.get("describe"): desc.append(str(n["describe"])) + if n.get("responsibilities"): desc.append(str(n["responsibilities"])) + if n.get("achivement"): desc.append(str(n["achivement"])) + + if pro_nms: + # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms)) + cv["project_name_tks"] = rag_tokenizer.tokenize(pro_nms[0]) + if desc: + cv["pro_desc_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(" ".join(desc))) + cv["project_desc_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(desc[0])) + + return cv + + +def json_loads(line): + return demjson3.decode(re.sub(r": *(True|False)", r": '\1'", line)) + + +def forWork(cv): + if not cv.get("work_obj"): + cv["integerity_flt"] *= 0.7 + return cv + + flds = ["position_name", "corporation_name", "corporation_id", "responsibilities", + "industry_name", "subordinates_count"] + duas = [] + scales = [] + fea = {c: [] for c in flds} + latest_job_tm = "" + goodcorp = False + goodcorp_ = False + work_st_tm = "" + corp_tags = [] + for i, n in enumerate( + sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if type(x) == type({}) else "", + reverse=True)): + if type(n) == type(""): + try: + n = json_loads(n) + except Exception as e: + continue + + if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"] + for c in flds: + if not n.get(c) or str(n[c]) == '0': + fea[c].append("") + continue + if c == "corporation_name": + n[c] = corporations.corpNorm(n[c], False) + if corporations.is_good(n[c]): + if i == 0: + goodcorp = True + else: + goodcorp_ = True + ct = corporations.corp_tag(n[c]) + if i == 0: + corp_tags.extend(ct) + elif ct and ct[0] != "软外": + corp_tags.extend([f"{t}(曾)" for t in ct]) + + fea[c].append(rmHtmlTag(str(n[c]).lower())) + + y, m, d = getYMD(n.get("start_time")) + if not y or not m: continue + st = "%s-%02d-%02d" % (y, int(m), int(d)) + latest_job_tm = st + + y, m, d = getYMD(n.get("end_time")) + if (not y or not m) and i > 0: continue + if not y or not m or int(y) > 2022: y, m, d = getYMD(str(n.get("updated_at", ""))) + if not y or not m: continue + ed = "%s-%02d-%02d" % (y, int(m), int(d)) + + try: + duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days) + except Exception as e: + print("kkkkkkkkkkkkkkkkkkkk", n.get("start_time"), n.get("end_time")) + + if n.get("scale"): + r = re.search(r"^([0-9]+)", str(n["scale"])) + if r: scales.append(int(r.group(1))) + + if goodcorp: + if "tag_kwd" not in cv: cv["tag_kwd"] = [] + cv["tag_kwd"].append("好公司") + if goodcorp_: + if "tag_kwd" not in cv: cv["tag_kwd"] = [] + cv["tag_kwd"].append("好公司(曾)") + + if corp_tags: + if "tag_kwd" not in cv: cv["tag_kwd"] = [] + cv["tag_kwd"].extend(corp_tags) + cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)] + + if latest_job_tm: cv["latest_job_dt"] = latest_job_tm + if fea["corporation_id"]: cv["corporation_id"] = fea["corporation_id"] + + if fea["position_name"]: + cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0]) + cv["position_name_sm_tks"] = rag_tokenizer.fine_grained_tokenize(cv["position_name_tks"]) + cv["pos_nm_tks"] = rag_tokenizer.tokenize(" ".join(fea["position_name"][1:])) + + if fea["industry_name"]: + cv["industry_name_tks"] = rag_tokenizer.tokenize(fea["industry_name"][0]) + cv["industry_name_sm_tks"] = rag_tokenizer.fine_grained_tokenize(cv["industry_name_tks"]) + cv["indu_nm_tks"] = rag_tokenizer.tokenize(" ".join(fea["industry_name"][1:])) + + if fea["corporation_name"]: + cv["corporation_name_kwd"] = fea["corporation_name"][0] + cv["corp_nm_kwd"] = fea["corporation_name"] + cv["corporation_name_tks"] = rag_tokenizer.tokenize(fea["corporation_name"][0]) + cv["corporation_name_sm_tks"] = rag_tokenizer.fine_grained_tokenize(cv["corporation_name_tks"]) + cv["corp_nm_tks"] = rag_tokenizer.tokenize(" ".join(fea["corporation_name"][1:])) + + if fea["responsibilities"]: + cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0]) + cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:])) + + if fea["subordinates_count"]: fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if + re.match(r"[^0-9]+$", str(i))] + if fea["subordinates_count"]: cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"]) + + if type(cv.get("corporation_id")) == type(1): cv["corporation_id"] = [str(cv["corporation_id"])] + if not cv.get("corporation_id"): cv["corporation_id"] = [] + for i in cv.get("corporation_id", []): + cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0) + + if work_st_tm: + try: + if re.match(r"[0-9]{9,}", work_st_tm): work_st_tm = turnTm2Dt(work_st_tm) + y, m, d = getYMD(work_st_tm) + cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000)) + except Exception as e: + print("EXCEPTION: ", e, work_st_tm, cv.get("work_exp_flt")) + + cv["job_num_int"] = 0 + if duas: + cv["dua_flt"] = np.mean(duas) + cv["cur_dua_int"] = duas[0] + cv["job_num_int"] = len(duas) + if scales: cv["scale_flt"] = np.max(scales) + return cv + + +def turnTm2Dt(b): + if not b: return + b = str(b).strip() + if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10]))) + return b + + +def getYMD(b): + y, m, d = "", "", "01" + if not b: return (y, m, d) + b = turnTm2Dt(b) + if re.match(r"[0-9]{4}", b): y = int(b[:4]) + r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b) + if r: m = r.group(1) + r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b) + if r: d = r.group(1) + if not d or int(d) == 0 or int(d) > 31: d = "1" + if not m or int(m) > 12 or int(m) < 1: m = "1" + return (y, m, d) + + +def birth(cv): + if not cv.get("birth"): + cv["integerity_flt"] *= 0.9 + return cv + y, m, d = getYMD(cv["birth"]) + if not m or not y: return cv + b = "%s-%02d-%02d" % (y, int(m), int(d)) + cv["birth_dt"] = b + cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d)) + + cv["age_int"] = datetime.datetime.now().year - int(y) + return cv + + +def parse(cv): + for k in cv.keys(): + if cv[k] == '\\N': cv[k] = '' + # cv = cv.asDict() + tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names", + "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name", + "position_name", "school_name", "self_remark", "title_name"] + small_tks_fld = ["corporation_name", "expect_position_name", "position_name", "school_name", "title_name"] + kwd_fld = ["address", "city", "corporation_type", "degree", "discipline_name", "expect_city_names", "email", + "expect_industry_name", "expect_position_name", "expect_type", "gender", "industry_name", + "industry_names", "political_status", "position_name", "scale", "school_name", "phone", "tel"] + num_fld = ["annual_salary", "annual_salary_from", "annual_salary_to", "expect_annual_salary", "expect_salary_from", + "expect_salary_to", "salary_month"] + + is_fld = [ + ("is_fertility", "已育", "未育"), + ("is_house", "有房", "没房"), + ("is_management_experience", "有管理经验", "无管理经验"), + ("is_marital", "已婚", "未婚"), + ("is_oversea", "有海外经验", "无海外经验") + ] + + rmkeys = [] + for k in cv.keys(): + if cv[k] is None: rmkeys.append(k) + if (type(cv[k]) == type([]) or type(cv[k]) == type("")) and len(cv[k]) == 0: rmkeys.append(k) + for k in rmkeys: del cv[k] + + integerity = 0. + flds_num = 0. + + def hasValues(flds): + nonlocal integerity, flds_num + flds_num += len(flds) + for f in flds: + v = str(cv.get(f, "")) + if len(v) > 0 and v != '0' and v != '[]': integerity += 1 + + hasValues(tks_fld) + hasValues(small_tks_fld) + hasValues(kwd_fld) + hasValues(num_fld) + cv["integerity_flt"] = integerity / flds_num + + if cv.get("corporation_type"): + for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""), + (r"[//.· <\((]+.*", ""), + (r".*(合资|民企|股份制|中外|私营|个体|Private|创业|Owned|投资).*", "民营"), + (r".*(机关|事业).*", "机关"), + (r".*(非盈利|Non-profit).*", "非盈利"), + (r".*(外企|外商|欧美|foreign|Institution|Australia|港资).*", "外企"), + (r".*国有.*", "国企"), + (r"[ ()\(\)人/·0-9-]+", ""), + (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]: + cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE) + if len(cv["corporation_type"]) < 2: del cv["corporation_type"] + + if cv.get("political_status"): + for p, r in [ + (r".*党员.*", "党员"), + (r".*(无党派|公民).*", "群众"), + (r".*团员.*", "团员")]: + cv["political_status"] = re.sub(p, r, cv["political_status"]) + if not re.search(r"[党团群]", cv["political_status"]): del cv["political_status"] + + if cv.get("phone"): cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"])) + + keys = list(cv.keys()) + for k in keys: + # deal with json objects + if k.find("_obj") > 0: + try: + cv[k] = json_loads(cv[k]) + cv[k] = [a for _, a in cv[k].items()] + nms = [] + for n in cv[k]: + if type(n) != type({}) or "name" not in n or not n.get("name"): continue + n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower() + if not n["name"]: continue + nms.append(n["name"]) + if nms: + t = k[:-4] + cv[f"{t}_kwd"] = nms + cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms)) + except Exception as e: + print("【EXCEPTION】:", str(traceback.format_exc()), cv[k]) + cv[k] = [] + + # tokenize fields + if k in tks_fld: + cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k]) + if k in small_tks_fld: cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"]) + + # keyword fields + if k in kwd_fld: cv[f"{k}_kwd"] = [n.lower() + for n in re.split(r"[\t,,;;. ]", + re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k]) + ) if n] + + if k in num_fld and cv.get(k): cv[f"{k}_int"] = cv[k] + + cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "") + # for name field + if cv.get("name"): + nm = re.sub(r"[\n——\-\((\+].*", "", cv["name"].strip()) + nm = re.sub(r"[ \t ]+", " ", nm) + if re.match(r"[a-zA-Z ]+$", nm): + if len(nm.split(" ")) > 1: + cv["name"] = nm + else: + nm = "" + elif nm and (surname.isit(nm[0]) or surname.isit(nm[:2])): + nm = re.sub(r"[a-zA-Z]+.*", "", nm[:5]) + else: + nm = "" + cv["name"] = nm.strip() + name = cv["name"] + + # name pingyin and its prefix + cv["name_py_tks"] = " ".join(PY.get_pinyins(nm[:20], '')) + " " + " ".join(PY.get_pinyins(nm[:20], ' ')) + cv["name_py_pref0_tks"] = "" + cv["name_py_pref_tks"] = "" + for py in PY.get_pinyins(nm[:20], ''): + for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i] + for py in PY.get_pinyins(nm[:20], ' '): + py = py.split(" ") + for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i]) + + cv["name_kwd"] = name + cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3] + cv["name_tks"] = ( + rag_tokenizer.tokenize(name) + " " + (" ".join(list(name)) if not re.match(r"[a-zA-Z ]+$", name) else "") + ) if name else "" + else: + cv["integerity_flt"] /= 2. + + if cv.get("phone"): + r = re.search(r"(1[3456789][0-9]{9})", cv["phone"]) + if not r: + cv["phone"] = "" + else: + cv["phone"] = r.group(1) + + # deal with date fields + if cv.get("updated_at") and isinstance(cv["updated_at"], datetime.datetime): + cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S') + else: + y, m, d = getYMD(str(cv.get("updated_at", ""))) + if not y: y = "2012" + if not m: m = "01" + if not d: d = "01" + cv["updated_at_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d)) + # long text tokenize + + if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"])) + + # for yes or no field + fea = [] + for f, y, n in is_fld: + if f not in cv: continue + if cv[f] == '是': fea.append(y) + if cv[f] == '否': fea.append(n) + + if fea: cv["tag_kwd"] = fea + + cv = forEdu(cv) + cv = forProj(cv) + cv = forWork(cv) + cv = birth(cv) + + cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])] + for i in range(len(cv["corp_proj_sch_deg_kwd"])): + for j in cv.get("sch_rank_kwd", []): cv["corp_proj_sch_deg_kwd"][i] += "+" + j + for i in range(len(cv["corp_proj_sch_deg_kwd"])): + if cv.get("highest_degree_kwd"): cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"] + + try: + if not cv.get("work_exp_flt") and cv.get("work_start_time"): + if re.match(r"[0-9]{9,}", str(cv["work_start_time"])): + cv["work_start_dt"] = turnTm2Dt(cv["work_start_time"]) + cv["work_exp_flt"] = (time.time() - int(int(cv["work_start_time"]) / 1000)) / 3600. / 24. / 365. + elif re.match(r"[0-9]{4}[^0-9]", str(cv["work_start_time"])): + y, m, d = getYMD(str(cv["work_start_time"])) + cv["work_start_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d)) + cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y) + except Exception as e: + print("【EXCEPTION】", e, "==>", cv.get("work_start_time")) + if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12. + + keys = list(cv.keys()) + for k in keys: + if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): del cv[k] + for k in cv.keys(): + if not re.search("_(kwd|id)$", k) or type(cv[k]) != type([]): continue + cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']])) + keys = [k for k in cv.keys() if re.search(r"_feas*$", k)] + for k in keys: + if cv[k] <= 0: del cv[k] + + cv["tob_resume_id"] = str(cv["tob_resume_id"]) + cv["id"] = cv["tob_resume_id"] + print("CCCCCCCCCCCCCCC") + + return dealWithInt64(cv) + + +def dealWithInt64(d): + if isinstance(d, dict): + for n, v in d.items(): + d[n] = dealWithInt64(v) + + if isinstance(d, list): + d = [dealWithInt64(t) for t in d] + + if isinstance(d, np.integer): d = int(d) + return d + diff --git a/deepdoc/vision/__init__.py b/deepdoc/vision/__init__.py index 46afe0127d7951b6484d4e492044e4b3d640b1d7..9f16fe3d8c758e30044c1e9b5356e75afce3a10c 100644 --- a/deepdoc/vision/__init__.py +++ b/deepdoc/vision/__init__.py @@ -1,61 +1,61 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pdfplumber - -from .ocr import OCR -from .recognizer import Recognizer -from .layout_recognizer import LayoutRecognizer -from .table_structure_recognizer import TableStructureRecognizer - - -def init_in_out(args): - from PIL import Image - import os - import traceback - from api.utils.file_utils import traversal_files - images = [] - outputs = [] - - if not os.path.exists(args.output_dir): - os.mkdir(args.output_dir) - - def pdf_pages(fnm, zoomin=3): - nonlocal outputs, images - pdf = pdfplumber.open(fnm) - images = [p.to_image(resolution=72 * zoomin).annotated for i, p in - enumerate(pdf.pages)] - - for i, page in enumerate(images): - outputs.append(os.path.split(fnm)[-1] + f"_{i}.jpg") - - def images_and_outputs(fnm): - nonlocal outputs, images - if fnm.split(".")[-1].lower() == "pdf": - pdf_pages(fnm) - return - try: - images.append(Image.open(fnm)) - outputs.append(os.path.split(fnm)[-1]) - except Exception as e: - traceback.print_exc() - - if os.path.isdir(args.inputs): - for fnm in traversal_files(args.inputs): - images_and_outputs(fnm) - else: - images_and_outputs(args.inputs) - - for i in range(len(outputs)): outputs[i] = os.path.join(args.output_dir, outputs[i]) - +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pdfplumber + +from .ocr import OCR +from .recognizer import Recognizer +from .layout_recognizer import LayoutRecognizer +from .table_structure_recognizer import TableStructureRecognizer + + +def init_in_out(args): + from PIL import Image + import os + import traceback + from api.utils.file_utils import traversal_files + images = [] + outputs = [] + + if not os.path.exists(args.output_dir): + os.mkdir(args.output_dir) + + def pdf_pages(fnm, zoomin=3): + nonlocal outputs, images + pdf = pdfplumber.open(fnm) + images = [p.to_image(resolution=72 * zoomin).annotated for i, p in + enumerate(pdf.pages)] + + for i, page in enumerate(images): + outputs.append(os.path.split(fnm)[-1] + f"_{i}.jpg") + + def images_and_outputs(fnm): + nonlocal outputs, images + if fnm.split(".")[-1].lower() == "pdf": + pdf_pages(fnm) + return + try: + images.append(Image.open(fnm)) + outputs.append(os.path.split(fnm)[-1]) + except Exception as e: + traceback.print_exc() + + if os.path.isdir(args.inputs): + for fnm in traversal_files(args.inputs): + images_and_outputs(fnm) + else: + images_and_outputs(args.inputs) + + for i in range(len(outputs)): outputs[i] = os.path.join(args.output_dir, outputs[i]) + return images, outputs \ No newline at end of file diff --git a/deepdoc/vision/layout_recognizer.py b/deepdoc/vision/layout_recognizer.py index 2d6b295b32ab7d902dead414f5f88d323bb97556..88006f9af60a88ff7aba0abded64c1e701bc248e 100644 --- a/deepdoc/vision/layout_recognizer.py +++ b/deepdoc/vision/layout_recognizer.py @@ -1,151 +1,151 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import re -from collections import Counter -from copy import deepcopy -import numpy as np -from huggingface_hub import snapshot_download - -from api.utils.file_utils import get_project_base_directory -from deepdoc.vision import Recognizer - - -class LayoutRecognizer(Recognizer): - labels = [ - "_background_", - "Text", - "Title", - "Figure", - "Figure caption", - "Table", - "Table caption", - "Header", - "Footer", - "Reference", - "Equation", - ] - - def __init__(self, domain): - try: - model_dir = os.path.join( - get_project_base_directory(), - "rag/res/deepdoc") - super().__init__(self.labels, domain, model_dir) - except Exception as e: - model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", - local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), - local_dir_use_symlinks=False) - super().__init__(self.labels, domain, model_dir) - - self.garbage_layouts = ["footer", "header", "reference"] - - def __call__(self, image_list, ocr_res, scale_factor=3, - thr=0.2, batch_size=16, drop=True): - def __is_garbage(b): - patt = [r"^•+$", r"(版权归©|免责条款|地址[::])", r"\.{3,}", "^[0-9]{1,2} / ?[0-9]{1,2}$", - r"^[0-9]{1,2} of [0-9]{1,2}$", "^http://[^ ]{12,}", - "(资料|数据)来源[::]", "[0-9a-z._-]+@[a-z0-9-]+\\.[a-z]{2,3}", - "\\(cid *: *[0-9]+ *\\)" - ] - return any([re.search(p, b["text"]) for p in patt]) - - layouts = super().__call__(image_list, thr, batch_size) - # save_results(image_list, layouts, self.labels, output_dir='output/', threshold=0.7) - assert len(image_list) == len(ocr_res) - # Tag layout type - boxes = [] - assert len(image_list) == len(layouts) - garbages = {} - page_layout = [] - for pn, lts in enumerate(layouts): - bxs = ocr_res[pn] - lts = [{"type": b["type"], - "score": float(b["score"]), - "x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor, - "top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor, - "page_number": pn, - } for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts] - lts = self.sort_Y_firstly(lts, np.mean( - [l["bottom"] - l["top"] for l in lts]) / 2) - lts = self.layouts_cleanup(bxs, lts) - page_layout.append(lts) - - # Tag layout type, layouts are ready - def findLayout(ty): - nonlocal bxs, lts, self - lts_ = [lt for lt in lts if lt["type"] == ty] - i = 0 - while i < len(bxs): - if bxs[i].get("layout_type"): - i += 1 - continue - if __is_garbage(bxs[i]): - bxs.pop(i) - continue - - ii = self.find_overlapped_with_threashold(bxs[i], lts_, - thr=0.4) - if ii is None: # belong to nothing - bxs[i]["layout_type"] = "" - i += 1 - continue - lts_[ii]["visited"] = True - keep_feats = [ - lts_[ - ii]["type"] == "footer" and bxs[i]["bottom"] < image_list[pn].size[1] * 0.9 / scale_factor, - lts_[ - ii]["type"] == "header" and bxs[i]["top"] > image_list[pn].size[1] * 0.1 / scale_factor, - ] - if drop and lts_[ - ii]["type"] in self.garbage_layouts and not any(keep_feats): - if lts_[ii]["type"] not in garbages: - garbages[lts_[ii]["type"]] = [] - garbages[lts_[ii]["type"]].append(bxs[i]["text"]) - bxs.pop(i) - continue - - bxs[i]["layoutno"] = f"{ty}-{ii}" - bxs[i]["layout_type"] = lts_[ii]["type"] if lts_[ - ii]["type"] != "equation" else "figure" - i += 1 - - for lt in ["footer", "header", "reference", "figure caption", - "table caption", "title", "table", "text", "figure", "equation"]: - findLayout(lt) - - # add box to figure layouts which has not text box - for i, lt in enumerate( - [lt for lt in lts if lt["type"] in ["figure", "equation"]]): - if lt.get("visited"): - continue - lt = deepcopy(lt) - del lt["type"] - lt["text"] = "" - lt["layout_type"] = "figure" - lt["layoutno"] = f"figure-{i}" - bxs.append(lt) - - boxes.extend(bxs) - - ocr_res = boxes - - garbag_set = set() - for k in garbages.keys(): - garbages[k] = Counter(garbages[k]) - for g, c in garbages[k].items(): - if c > 1: - garbag_set.add(g) - - ocr_res = [b for b in ocr_res if b["text"].strip() not in garbag_set] - return ocr_res, page_layout +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import re +from collections import Counter +from copy import deepcopy +import numpy as np +from huggingface_hub import snapshot_download + +from api.utils.file_utils import get_project_base_directory +from deepdoc.vision import Recognizer + + +class LayoutRecognizer(Recognizer): + labels = [ + "_background_", + "Text", + "Title", + "Figure", + "Figure caption", + "Table", + "Table caption", + "Header", + "Footer", + "Reference", + "Equation", + ] + + def __init__(self, domain): + try: + model_dir = os.path.join( + get_project_base_directory(), + "rag/res/deepdoc") + super().__init__(self.labels, domain, model_dir) + except Exception as e: + model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False) + super().__init__(self.labels, domain, model_dir) + + self.garbage_layouts = ["footer", "header", "reference"] + + def __call__(self, image_list, ocr_res, scale_factor=3, + thr=0.2, batch_size=16, drop=True): + def __is_garbage(b): + patt = [r"^•+$", r"(版权归©|免责条款|地址[::])", r"\.{3,}", "^[0-9]{1,2} / ?[0-9]{1,2}$", + r"^[0-9]{1,2} of [0-9]{1,2}$", "^http://[^ ]{12,}", + "(资料|数据)来源[::]", "[0-9a-z._-]+@[a-z0-9-]+\\.[a-z]{2,3}", + "\\(cid *: *[0-9]+ *\\)" + ] + return any([re.search(p, b["text"]) for p in patt]) + + layouts = super().__call__(image_list, thr, batch_size) + # save_results(image_list, layouts, self.labels, output_dir='output/', threshold=0.7) + assert len(image_list) == len(ocr_res) + # Tag layout type + boxes = [] + assert len(image_list) == len(layouts) + garbages = {} + page_layout = [] + for pn, lts in enumerate(layouts): + bxs = ocr_res[pn] + lts = [{"type": b["type"], + "score": float(b["score"]), + "x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor, + "top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor, + "page_number": pn, + } for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts] + lts = self.sort_Y_firstly(lts, np.mean( + [l["bottom"] - l["top"] for l in lts]) / 2) + lts = self.layouts_cleanup(bxs, lts) + page_layout.append(lts) + + # Tag layout type, layouts are ready + def findLayout(ty): + nonlocal bxs, lts, self + lts_ = [lt for lt in lts if lt["type"] == ty] + i = 0 + while i < len(bxs): + if bxs[i].get("layout_type"): + i += 1 + continue + if __is_garbage(bxs[i]): + bxs.pop(i) + continue + + ii = self.find_overlapped_with_threashold(bxs[i], lts_, + thr=0.4) + if ii is None: # belong to nothing + bxs[i]["layout_type"] = "" + i += 1 + continue + lts_[ii]["visited"] = True + keep_feats = [ + lts_[ + ii]["type"] == "footer" and bxs[i]["bottom"] < image_list[pn].size[1] * 0.9 / scale_factor, + lts_[ + ii]["type"] == "header" and bxs[i]["top"] > image_list[pn].size[1] * 0.1 / scale_factor, + ] + if drop and lts_[ + ii]["type"] in self.garbage_layouts and not any(keep_feats): + if lts_[ii]["type"] not in garbages: + garbages[lts_[ii]["type"]] = [] + garbages[lts_[ii]["type"]].append(bxs[i]["text"]) + bxs.pop(i) + continue + + bxs[i]["layoutno"] = f"{ty}-{ii}" + bxs[i]["layout_type"] = lts_[ii]["type"] if lts_[ + ii]["type"] != "equation" else "figure" + i += 1 + + for lt in ["footer", "header", "reference", "figure caption", + "table caption", "title", "table", "text", "figure", "equation"]: + findLayout(lt) + + # add box to figure layouts which has not text box + for i, lt in enumerate( + [lt for lt in lts if lt["type"] in ["figure", "equation"]]): + if lt.get("visited"): + continue + lt = deepcopy(lt) + del lt["type"] + lt["text"] = "" + lt["layout_type"] = "figure" + lt["layoutno"] = f"figure-{i}" + bxs.append(lt) + + boxes.extend(bxs) + + ocr_res = boxes + + garbag_set = set() + for k in garbages.keys(): + garbages[k] = Counter(garbages[k]) + for g, c in garbages[k].items(): + if c > 1: + garbag_set.add(g) + + ocr_res = [b for b in ocr_res if b["text"].strip() not in garbag_set] + return ocr_res, page_layout diff --git a/deepdoc/vision/ocr.res b/deepdoc/vision/ocr.res index b62de66190de02c68df57fb21de1e2da9bd92fea..84b885d8352226e49b1d5d791b8f43a663e246aa 100644 --- a/deepdoc/vision/ocr.res +++ b/deepdoc/vision/ocr.res @@ -1,6623 +1,6623 @@ -' -疗 -绚 -诚 -娇 -溜 -题 -贿 -者 -廖 -更 -纳 -加 -奉 -公 -一 -就 -汴 -计 -与 -路 -房 -原 -妇 -2 -0 -8 -- -7 -其 -> -: -] -, -, -骑 -刈 -全 -消 -昏 -傈 -安 -久 -钟 -嗅 -不 -影 -处 -驽 -蜿 -资 -关 -椤 -地 -瘸 -专 -问 -忖 -票 -嫉 -炎 -韵 -要 -月 -田 -节 -陂 -鄙 -捌 -备 -拳 -伺 -眼 -网 -盎 -大 -傍 -心 -东 -愉 -汇 -蹿 -科 -每 -业 -里 -航 -晏 -字 -平 -录 -先 -1 -3 -彤 -鲶 -产 -稍 -督 -腴 -有 -象 -岳 -注 -绍 -在 -泺 -文 -定 -核 -名 -水 -过 -理 -让 -偷 -率 -等 -这 -发 -” -为 -含 -肥 -酉 -相 -鄱 -七 -编 -猥 -锛 -日 -镀 -蒂 -掰 -倒 -辆 -栾 -栗 -综 -涩 -州 -雌 -滑 -馀 -了 -机 -块 -司 -宰 -甙 -兴 -矽 -抚 -保 -用 -沧 -秩 -如 -收 -息 -滥 -页 -疑 -埠 -! -! -姥 -异 -橹 -钇 -向 -下 -跄 -的 -椴 -沫 -国 -绥 -獠 -报 -开 -民 -蜇 -何 -分 -凇 -长 -讥 -藏 -掏 -施 -羽 -中 -讲 -派 -嘟 -人 -提 -浼 -间 -世 -而 -古 -多 -倪 -唇 -饯 -控 -庚 -首 -赛 -蜓 -味 -断 -制 -觉 -技 -替 -艰 -溢 -潮 -夕 -钺 -外 -摘 -枋 -动 -双 -单 -啮 -户 -枇 -确 -锦 -曜 -杜 -或 -能 -效 -霜 -盒 -然 -侗 -电 -晁 -放 -步 -鹃 -新 -杖 -蜂 -吒 -濂 -瞬 -评 -总 -隍 -对 -独 -合 -也 -是 -府 -青 -天 -诲 -墙 -组 -滴 -级 -邀 -帘 -示 -已 -时 -骸 -仄 -泅 -和 -遨 -店 -雇 -疫 -持 -巍 -踮 -境 -只 -亨 -目 -鉴 -崤 -闲 -体 -泄 -杂 -作 -般 -轰 -化 -解 -迂 -诿 -蛭 -璀 -腾 -告 -版 -服 -省 -师 -小 -规 -程 -线 -海 -办 -引 -二 -桧 -牌 -砺 -洄 -裴 -修 -图 -痫 -胡 -许 -犊 -事 -郛 -基 -柴 -呼 -食 -研 -奶 -律 -蛋 -因 -葆 -察 -戏 -褒 -戒 -再 -李 -骁 -工 -貂 -油 -鹅 -章 -啄 -休 -场 -给 -睡 -纷 -豆 -器 -捎 -说 -敏 -学 -会 -浒 -设 -诊 -格 -廓 -查 -来 -霓 -室 -溆 -¢ -诡 -寥 -焕 -舜 -柒 -狐 -回 -戟 -砾 -厄 -实 -翩 -尿 -五 -入 -径 -惭 -喹 -股 -宇 -篝 -| -; -美 -期 -云 -九 -祺 -扮 -靠 -锝 -槌 -系 -企 -酰 -阊 -暂 -蚕 -忻 -豁 -本 -羹 -执 -条 -钦 -H -獒 -限 -进 -季 -楦 -于 -芘 -玖 -铋 -茯 -未 -答 -粘 -括 -样 -精 -欠 -矢 -甥 -帷 -嵩 -扣 -令 -仔 -风 -皈 -行 -支 -部 -蓉 -刮 -站 -蜡 -救 -钊 -汗 -松 -嫌 -成 -可 -. -鹤 -院 -从 -交 -政 -怕 -活 -调 -球 -局 -验 -髌 -第 -韫 -谗 -串 -到 -圆 -年 -米 -/ -* -友 -忿 -检 -区 -看 -自 -敢 -刃 -个 -兹 -弄 -流 -留 -同 -没 -齿 -星 -聆 -轼 -湖 -什 -三 -建 -蛔 -儿 -椋 -汕 -震 -颧 -鲤 -跟 -力 -情 -璺 -铨 -陪 -务 -指 -族 -训 -滦 -鄣 -濮 -扒 -商 -箱 -十 -召 -慷 -辗 -所 -莞 -管 -护 -臭 -横 -硒 -嗓 -接 -侦 -六 -露 -党 -馋 -驾 -剖 -高 -侬 -妪 -幂 -猗 -绺 -骐 -央 -酐 -孝 -筝 -课 -徇 -缰 -门 -男 -西 -项 -句 -谙 -瞒 -秃 -篇 -教 -碲 -罚 -声 -呐 -景 -前 -富 -嘴 -鳌 -稀 -免 -朋 -啬 -睐 -去 -赈 -鱼 -住 -肩 -愕 -速 -旁 -波 -厅 -健 -茼 -厥 -鲟 -谅 -投 -攸 -炔 -数 -方 -击 -呋 -谈 -绩 -别 -愫 -僚 -躬 -鹧 -胪 -炳 -招 -喇 -膨 -泵 -蹦 -毛 -结 -5 -4 -谱 -识 -陕 -粽 -婚 -拟 -构 -且 -搜 -任 -潘 -比 -郢 -妨 -醪 -陀 -桔 -碘 -扎 -选 -哈 -骷 -楷 -亿 -明 -缆 -脯 -监 -睫 -逻 -婵 -共 -赴 -淝 -凡 -惦 -及 -达 -揖 -谩 -澹 -减 -焰 -蛹 -番 -祁 -柏 -员 -禄 -怡 -峤 -龙 -白 -叽 -生 -闯 -起 -细 -装 -谕 -竟 -聚 -钙 -上 -导 -渊 -按 -艾 -辘 -挡 -耒 -盹 -饪 -臀 -记 -邮 -蕙 -受 -各 -医 -搂 -普 -滇 -朗 -茸 -带 -翻 -酚 -( -光 -堤 -墟 -蔷 -万 -幻 -〓 -瑙 -辈 -昧 -盏 -亘 -蛀 -吉 -铰 -请 -子 -假 -闻 -税 -井 -诩 -哨 -嫂 -好 -面 -琐 -校 -馊 -鬣 -缂 -营 -访 -炖 -占 -农 -缀 -否 -经 -钚 -棵 -趟 -张 -亟 -吏 -茶 -谨 -捻 -论 -迸 -堂 -玉 -信 -吧 -瞠 -乡 -姬 -寺 -咬 -溏 -苄 -皿 -意 -赉 -宝 -尔 -钰 -艺 -特 -唳 -踉 -都 -荣 -倚 -登 -荐 -丧 -奇 -涵 -批 -炭 -近 -符 -傩 -感 -道 -着 -菊 -虹 -仲 -众 -懈 -濯 -颞 -眺 -南 -释 -北 -缝 -标 -既 -茗 -整 -撼 -迤 -贲 -挎 -耱 -拒 -某 -妍 -卫 -哇 -英 -矶 -藩 -治 -他 -元 -领 -膜 -遮 -穗 -蛾 -飞 -荒 -棺 -劫 -么 -市 -火 -温 -拈 -棚 -洼 -转 -果 -奕 -卸 -迪 -伸 -泳 -斗 -邡 -侄 -涨 -屯 -萋 -胭 -氡 -崮 -枞 -惧 -冒 -彩 -斜 -手 -豚 -随 -旭 -淑 -妞 -形 -菌 -吲 -沱 -争 -驯 -歹 -挟 -兆 -柱 -传 -至 -包 -内 -响 -临 -红 -功 -弩 -衡 -寂 -禁 -老 -棍 -耆 -渍 -织 -害 -氵 -渑 -布 -载 -靥 -嗬 -虽 -苹 -咨 -娄 -库 -雉 -榜 -帜 -嘲 -套 -瑚 -亲 -簸 -欧 -边 -6 -腿 -旮 -抛 -吹 -瞳 -得 -镓 -梗 -厨 -继 -漾 -愣 -憨 -士 -策 -窑 -抑 -躯 -襟 -脏 -参 -贸 -言 -干 -绸 -鳄 -穷 -藜 -音 -折 -详 -) -举 -悍 -甸 -癌 -黎 -谴 -死 -罩 -迁 -寒 -驷 -袖 -媒 -蒋 -掘 -模 -纠 -恣 -观 -祖 -蛆 -碍 -位 -稿 -主 -澧 -跌 -筏 -京 -锏 -帝 -贴 -证 -糠 -才 -黄 -鲸 -略 -炯 -饱 -四 -出 -园 -犀 -牧 -容 -汉 -杆 -浈 -汰 -瑷 -造 -虫 -瘩 -怪 -驴 -济 -应 -花 -沣 -谔 -夙 -旅 -价 -矿 -以 -考 -s -u -呦 -晒 -巡 -茅 -准 -肟 -瓴 -詹 -仟 -褂 -译 -桌 -混 -宁 -怦 -郑 -抿 -些 -余 -鄂 -饴 -攒 -珑 -群 -阖 -岔 -琨 -藓 -预 -环 -洮 -岌 -宀 -杲 -瀵 -最 -常 -囡 -周 -踊 -女 -鼓 -袭 -喉 -简 -范 -薯 -遐 -疏 -粱 -黜 -禧 -法 -箔 -斤 -遥 -汝 -奥 -直 -贞 -撑 -置 -绱 -集 -她 -馅 -逗 -钧 -橱 -魉 -[ -恙 -躁 -唤 -9 -旺 -膘 -待 -脾 -惫 -购 -吗 -依 -盲 -度 -瘿 -蠖 -俾 -之 -镗 -拇 -鲵 -厝 -簧 -续 -款 -展 -啃 -表 -剔 -品 -钻 -腭 -损 -清 -锶 -统 -涌 -寸 -滨 -贪 -链 -吠 -冈 -伎 -迥 -咏 -吁 -览 -防 -迅 -失 -汾 -阔 -逵 -绀 -蔑 -列 -川 -凭 -努 -熨 -揪 -利 -俱 -绉 -抢 -鸨 -我 -即 -责 -膦 -易 -毓 -鹊 -刹 -玷 -岿 -空 -嘞 -绊 -排 -术 -估 -锷 -违 -们 -苟 -铜 -播 -肘 -件 -烫 -审 -鲂 -广 -像 -铌 -惰 -铟 -巳 -胍 -鲍 -康 -憧 -色 -恢 -想 -拷 -尤 -疳 -知 -S -Y -F -D -A -峄 -裕 -帮 -握 -搔 -氐 -氘 -难 -墒 -沮 -雨 -叁 -缥 -悴 -藐 -湫 -娟 -苑 -稠 -颛 -簇 -后 -阕 -闭 -蕤 -缚 -怎 -佞 -码 -嘤 -蔡 -痊 -舱 -螯 -帕 -赫 -昵 -升 -烬 -岫 -、 -疵 -蜻 -髁 -蕨 -隶 -烛 -械 -丑 -盂 -梁 -强 -鲛 -由 -拘 -揉 -劭 -龟 -撤 -钩 -呕 -孛 -费 -妻 -漂 -求 -阑 -崖 -秤 -甘 -通 -深 -补 -赃 -坎 -床 -啪 -承 -吼 -量 -暇 -钼 -烨 -阂 -擎 -脱 -逮 -称 -P -神 -属 -矗 -华 -届 -狍 -葑 -汹 -育 -患 -窒 -蛰 -佼 -静 -槎 -运 -鳗 -庆 -逝 -曼 -疱 -克 -代 -官 -此 -麸 -耧 -蚌 -晟 -例 -础 -榛 -副 -测 -唰 -缢 -迹 -灬 -霁 -身 -岁 -赭 -扛 -又 -菡 -乜 -雾 -板 -读 -陷 -徉 -贯 -郁 -虑 -变 -钓 -菜 -圾 -现 -琢 -式 -乐 -维 -渔 -浜 -左 -吾 -脑 -钡 -警 -T -啵 -拴 -偌 -漱 -湿 -硕 -止 -骼 -魄 -积 -燥 -联 -踢 -玛 -则 -窿 -见 -振 -畿 -送 -班 -钽 -您 -赵 -刨 -印 -讨 -踝 -籍 -谡 -舌 -崧 -汽 -蔽 -沪 -酥 -绒 -怖 -财 -帖 -肱 -私 -莎 -勋 -羔 -霸 -励 -哼 -帐 -将 -帅 -渠 -纪 -婴 -娩 -岭 -厘 -滕 -吻 -伤 -坝 -冠 -戊 -隆 -瘁 -介 -涧 -物 -黍 -并 -姗 -奢 -蹑 -掣 -垸 -锴 -命 -箍 -捉 -病 -辖 -琰 -眭 -迩 -艘 -绌 -繁 -寅 -若 -毋 -思 -诉 -类 -诈 -燮 -轲 -酮 -狂 -重 -反 -职 -筱 -县 -委 -磕 -绣 -奖 -晋 -濉 -志 -徽 -肠 -呈 -獐 -坻 -口 -片 -碰 -几 -村 -柿 -劳 -料 -获 -亩 -惕 -晕 -厌 -号 -罢 -池 -正 -鏖 -煨 -家 -棕 -复 -尝 -懋 -蜥 -锅 -岛 -扰 -队 -坠 -瘾 -钬 -@ -卧 -疣 -镇 -譬 -冰 -彷 -频 -黯 -据 -垄 -采 -八 -缪 -瘫 -型 -熹 -砰 -楠 -襁 -箐 -但 -嘶 -绳 -啤 -拍 -盥 -穆 -傲 -洗 -盯 -塘 -怔 -筛 -丿 -台 -恒 -喂 -葛 -永 -¥ -烟 -酒 -桦 -书 -砂 -蚝 -缉 -态 -瀚 -袄 -圳 -轻 -蛛 -超 -榧 -遛 -姒 -奘 -铮 -右 -荽 -望 -偻 -卡 -丶 -氰 -附 -做 -革 -索 -戚 -坨 -桷 -唁 -垅 -榻 -岐 -偎 -坛 -莨 -山 -殊 -微 -骇 -陈 -爨 -推 -嗝 -驹 -澡 -藁 -呤 -卤 -嘻 -糅 -逛 -侵 -郓 -酌 -德 -摇 -※ -鬃 -被 -慨 -殡 -羸 -昌 -泡 -戛 -鞋 -河 -宪 -沿 -玲 -鲨 -翅 -哽 -源 -铅 -语 -照 -邯 -址 -荃 -佬 -顺 -鸳 -町 -霭 -睾 -瓢 -夸 -椁 -晓 -酿 -痈 -咔 -侏 -券 -噎 -湍 -签 -嚷 -离 -午 -尚 -社 -锤 -背 -孟 -使 -浪 -缦 -潍 -鞅 -军 -姹 -驶 -笑 -鳟 -鲁 -》 -孽 -钜 -绿 -洱 -礴 -焯 -椰 -颖 -囔 -乌 -孔 -巴 -互 -性 -椽 -哞 -聘 -昨 -早 -暮 -胶 -炀 -隧 -低 -彗 -昝 -铁 -呓 -氽 -藉 -喔 -癖 -瑗 -姨 -权 -胱 -韦 -堑 -蜜 -酋 -楝 -砝 -毁 -靓 -歙 -锲 -究 -屋 -喳 -骨 -辨 -碑 -武 -鸠 -宫 -辜 -烊 -适 -坡 -殃 -培 -佩 -供 -走 -蜈 -迟 -翼 -况 -姣 -凛 -浔 -吃 -飘 -债 -犟 -金 -促 -苛 -崇 -坂 -莳 -畔 -绂 -兵 -蠕 -斋 -根 -砍 -亢 -欢 -恬 -崔 -剁 -餐 -榫 -快 -扶 -‖ -濒 -缠 -鳜 -当 -彭 -驭 -浦 -篮 -昀 -锆 -秸 -钳 -弋 -娣 -瞑 -夷 -龛 -苫 -拱 -致 -% -嵊 -障 -隐 -弑 -初 -娓 -抉 -汩 -累 -蓖 -" -唬 -助 -苓 -昙 -押 -毙 -破 -城 -郧 -逢 -嚏 -獭 -瞻 -溱 -婿 -赊 -跨 -恼 -璧 -萃 -姻 -貉 -灵 -炉 -密 -氛 -陶 -砸 -谬 -衔 -点 -琛 -沛 -枳 -层 -岱 -诺 -脍 -榈 -埂 -征 -冷 -裁 -打 -蹴 -素 -瘘 -逞 -蛐 -聊 -激 -腱 -萘 -踵 -飒 -蓟 -吆 -取 -咙 -簋 -涓 -矩 -曝 -挺 -揣 -座 -你 -史 -舵 -焱 -尘 -苏 -笈 -脚 -溉 -榨 -诵 -樊 -邓 -焊 -义 -庶 -儋 -蟋 -蒲 -赦 -呷 -杞 -诠 -豪 -还 -试 -颓 -茉 -太 -除 -紫 -逃 -痴 -草 -充 -鳕 -珉 -祗 -墨 -渭 -烩 -蘸 -慕 -璇 -镶 -穴 -嵘 -恶 -骂 -险 -绋 -幕 -碉 -肺 -戳 -刘 -潞 -秣 -纾 -潜 -銮 -洛 -须 -罘 -销 -瘪 -汞 -兮 -屉 -r -林 -厕 -质 -探 -划 -狸 -殚 -善 -煊 -烹 -〒 -锈 -逯 -宸 -辍 -泱 -柚 -袍 -远 -蹋 -嶙 -绝 -峥 -娥 -缍 -雀 -徵 -认 -镱 -谷 -= -贩 -勉 -撩 -鄯 -斐 -洋 -非 -祚 -泾 -诒 -饿 -撬 -威 -晷 -搭 -芍 -锥 -笺 -蓦 -候 -琊 -档 -礁 -沼 -卵 -荠 -忑 -朝 -凹 -瑞 -头 -仪 -弧 -孵 -畏 -铆 -突 -衲 -车 -浩 -气 -茂 -悖 -厢 -枕 -酝 -戴 -湾 -邹 -飚 -攘 -锂 -写 -宵 -翁 -岷 -无 -喜 -丈 -挑 -嗟 -绛 -殉 -议 -槽 -具 -醇 -淞 -笃 -郴 -阅 -饼 -底 -壕 -砚 -弈 -询 -缕 -庹 -翟 -零 -筷 -暨 -舟 -闺 -甯 -撞 -麂 -茌 -蔼 -很 -珲 -捕 -棠 -角 -阉 -媛 -娲 -诽 -剿 -尉 -爵 -睬 -韩 -诰 -匣 -危 -糍 -镯 -立 -浏 -阳 -少 -盆 -舔 -擘 -匪 -申 -尬 -铣 -旯 -抖 -赘 -瓯 -居 -ˇ -哮 -游 -锭 -茏 -歌 -坏 -甚 -秒 -舞 -沙 -仗 -劲 -潺 -阿 -燧 -郭 -嗖 -霏 -忠 -材 -奂 -耐 -跺 -砀 -输 -岖 -媳 -氟 -极 -摆 -灿 -今 -扔 -腻 -枝 -奎 -药 -熄 -吨 -话 -q -额 -慑 -嘌 -协 -喀 -壳 -埭 -视 -著 -於 -愧 -陲 -翌 -峁 -颅 -佛 -腹 -聋 -侯 -咎 -叟 -秀 -颇 -存 -较 -罪 -哄 -岗 -扫 -栏 -钾 -羌 -己 -璨 -枭 -霉 -煌 -涸 -衿 -键 -镝 -益 -岢 -奏 -连 -夯 -睿 -冥 -均 -糖 -狞 -蹊 -稻 -爸 -刿 -胥 -煜 -丽 -肿 -璃 -掸 -跚 -灾 -垂 -樾 -濑 -乎 -莲 -窄 -犹 -撮 -战 -馄 -软 -络 -显 -鸢 -胸 -宾 -妲 -恕 -埔 -蝌 -份 -遇 -巧 -瞟 -粒 -恰 -剥 -桡 -博 -讯 -凯 -堇 -阶 -滤 -卖 -斌 -骚 -彬 -兑 -磺 -樱 -舷 -两 -娱 -福 -仃 -差 -找 -桁 -÷ -净 -把 -阴 -污 -戬 -雷 -碓 -蕲 -楚 -罡 -焖 -抽 -妫 -咒 -仑 -闱 -尽 -邑 -菁 -爱 -贷 -沥 -鞑 -牡 -嗉 -崴 -骤 -塌 -嗦 -订 -拮 -滓 -捡 -锻 -次 -坪 -杩 -臃 -箬 -融 -珂 -鹗 -宗 -枚 -降 -鸬 -妯 -阄 -堰 -盐 -毅 -必 -杨 -崃 -俺 -甬 -状 -莘 -货 -耸 -菱 -腼 -铸 -唏 -痤 -孚 -澳 -懒 -溅 -翘 -疙 -杷 -淼 -缙 -骰 -喊 -悉 -砻 -坷 -艇 -赁 -界 -谤 -纣 -宴 -晃 -茹 -归 -饭 -梢 -铡 -街 -抄 -肼 -鬟 -苯 -颂 -撷 -戈 -炒 -咆 -茭 -瘙 -负 -仰 -客 -琉 -铢 -封 -卑 -珥 -椿 -镧 -窨 -鬲 -寿 -御 -袤 -铃 -萎 -砖 -餮 -脒 -裳 -肪 -孕 -嫣 -馗 -嵇 -恳 -氯 -江 -石 -褶 -冢 -祸 -阻 -狈 -羞 -银 -靳 -透 -咳 -叼 -敷 -芷 -啥 -它 -瓤 -兰 -痘 -懊 -逑 -肌 -往 -捺 -坊 -甩 -呻 -〃 -沦 -忘 -膻 -祟 -菅 -剧 -崆 -智 -坯 -臧 -霍 -墅 -攻 -眯 -倘 -拢 -骠 -铐 -庭 -岙 -瓠 -′ -缺 -泥 -迢 -捶 -? -? -郏 -喙 -掷 -沌 -纯 -秘 -种 -听 -绘 -固 -螨 -团 -香 -盗 -妒 -埚 -蓝 -拖 -旱 -荞 -铀 -血 -遏 -汲 -辰 -叩 -拽 -幅 -硬 -惶 -桀 -漠 -措 -泼 -唑 -齐 -肾 -念 -酱 -虚 -屁 -耶 -旗 -砦 -闵 -婉 -馆 -拭 -绅 -韧 -忏 -窝 -醋 -葺 -顾 -辞 -倜 -堆 -辋 -逆 -玟 -贱 -疾 -董 -惘 -倌 -锕 -淘 -嘀 -莽 -俭 -笏 -绑 -鲷 -杈 -择 -蟀 -粥 -嗯 -驰 -逾 -案 -谪 -褓 -胫 -哩 -昕 -颚 -鲢 -绠 -躺 -鹄 -崂 -儒 -俨 -丝 -尕 -泌 -啊 -萸 -彰 -幺 -吟 -骄 -苣 -弦 -脊 -瑰 -〈 -诛 -镁 -析 -闪 -剪 -侧 -哟 -框 -螃 -守 -嬗 -燕 -狭 -铈 -缮 -概 -迳 -痧 -鲲 -俯 -售 -笼 -痣 -扉 -挖 -满 -咋 -援 -邱 -扇 -歪 -便 -玑 -绦 -峡 -蛇 -叨 -〖 -泽 -胃 -斓 -喋 -怂 -坟 -猪 -该 -蚬 -炕 -弥 -赞 -棣 -晔 -娠 -挲 -狡 -创 -疖 -铕 -镭 -稷 -挫 -弭 -啾 -翔 -粉 -履 -苘 -哦 -楼 -秕 -铂 -土 -锣 -瘟 -挣 -栉 -习 -享 -桢 -袅 -磨 -桂 -谦 -延 -坚 -蔚 -噗 -署 -谟 -猬 -钎 -恐 -嬉 -雒 -倦 -衅 -亏 -璩 -睹 -刻 -殿 -王 -算 -雕 -麻 -丘 -柯 -骆 -丸 -塍 -谚 -添 -鲈 -垓 -桎 -蚯 -芥 -予 -飕 -镦 -谌 -窗 -醚 -菀 -亮 -搪 -莺 -蒿 -羁 -足 -J -真 -轶 -悬 -衷 -靛 -翊 -掩 -哒 -炅 -掐 -冼 -妮 -l -谐 -稚 -荆 -擒 -犯 -陵 -虏 -浓 -崽 -刍 -陌 -傻 -孜 -千 -靖 -演 -矜 -钕 -煽 -杰 -酗 -渗 -伞 -栋 -俗 -泫 -戍 -罕 -沾 -疽 -灏 -煦 -芬 -磴 -叱 -阱 -榉 -湃 -蜀 -叉 -醒 -彪 -租 -郡 -篷 -屎 -良 -垢 -隗 -弱 -陨 -峪 -砷 -掴 -颁 -胎 -雯 -绵 -贬 -沐 -撵 -隘 -篙 -暖 -曹 -陡 -栓 -填 -臼 -彦 -瓶 -琪 -潼 -哪 -鸡 -摩 -啦 -俟 -锋 -域 -耻 -蔫 -疯 -纹 -撇 -毒 -绶 -痛 -酯 -忍 -爪 -赳 -歆 -嘹 -辕 -烈 -册 -朴 -钱 -吮 -毯 -癜 -娃 -谀 -邵 -厮 -炽 -璞 -邃 -丐 -追 -词 -瓒 -忆 -轧 -芫 -谯 -喷 -弟 -半 -冕 -裙 -掖 -墉 -绮 -寝 -苔 -势 -顷 -褥 -切 -衮 -君 -佳 -嫒 -蚩 -霞 -佚 -洙 -逊 -镖 -暹 -唛 -& -殒 -顶 -碗 -獗 -轭 -铺 -蛊 -废 -恹 -汨 -崩 -珍 -那 -杵 -曲 -纺 -夏 -薰 -傀 -闳 -淬 -姘 -舀 -拧 -卷 -楂 -恍 -讪 -厩 -寮 -篪 -赓 -乘 -灭 -盅 -鞣 -沟 -慎 -挂 -饺 -鼾 -杳 -树 -缨 -丛 -絮 -娌 -臻 -嗳 -篡 -侩 -述 -衰 -矛 -圈 -蚜 -匕 -筹 -匿 -濞 -晨 -叶 -骋 -郝 -挚 -蚴 -滞 -增 -侍 -描 -瓣 -吖 -嫦 -蟒 -匾 -圣 -赌 -毡 -癞 -恺 -百 -曳 -需 -篓 -肮 -庖 -帏 -卿 -驿 -遗 -蹬 -鬓 -骡 -歉 -芎 -胳 -屐 -禽 -烦 -晌 -寄 -媾 -狄 -翡 -苒 -船 -廉 -终 -痞 -殇 -々 -畦 -饶 -改 -拆 -悻 -萄 -£ -瓿 -乃 -訾 -桅 -匮 -溧 -拥 -纱 -铍 -骗 -蕃 -龋 -缬 -父 -佐 -疚 -栎 -醍 -掳 -蓄 -x -惆 -颜 -鲆 -榆 -〔 -猎 -敌 -暴 -谥 -鲫 -贾 -罗 -玻 -缄 -扦 -芪 -癣 -落 -徒 -臾 -恿 -猩 -托 -邴 -肄 -牵 -春 -陛 -耀 -刊 -拓 -蓓 -邳 -堕 -寇 -枉 -淌 -啡 -湄 -兽 -酷 -萼 -碚 -濠 -萤 -夹 -旬 -戮 -梭 -琥 -椭 -昔 -勺 -蜊 -绐 -晚 -孺 -僵 -宣 -摄 -冽 -旨 -萌 -忙 -蚤 -眉 -噼 -蟑 -付 -契 -瓜 -悼 -颡 -壁 -曾 -窕 -颢 -澎 -仿 -俑 -浑 -嵌 -浣 -乍 -碌 -褪 -乱 -蔟 -隙 -玩 -剐 -葫 -箫 -纲 -围 -伐 -决 -伙 -漩 -瑟 -刑 -肓 -镳 -缓 -蹭 -氨 -皓 -典 -畲 -坍 -铑 -檐 -塑 -洞 -倬 -储 -胴 -淳 -戾 -吐 -灼 -惺 -妙 -毕 -珐 -缈 -虱 -盖 -羰 -鸿 -磅 -谓 -髅 -娴 -苴 -唷 -蚣 -霹 -抨 -贤 -唠 -犬 -誓 -逍 -庠 -逼 -麓 -籼 -釉 -呜 -碧 -秧 -氩 -摔 -霄 -穸 -纨 -辟 -妈 -映 -完 -牛 -缴 -嗷 -炊 -恩 -荔 -茆 -掉 -紊 -慌 -莓 -羟 -阙 -萁 -磐 -另 -蕹 -辱 -鳐 -湮 -吡 -吩 -唐 -睦 -垠 -舒 -圜 -冗 -瞿 -溺 -芾 -囱 -匠 -僳 -汐 -菩 -饬 -漓 -黑 -霰 -浸 -濡 -窥 -毂 -蒡 -兢 -驻 -鹉 -芮 -诙 -迫 -雳 -厂 -忐 -臆 -猴 -鸣 -蚪 -栈 -箕 -羡 -渐 -莆 -捍 -眈 -哓 -趴 -蹼 -埕 -嚣 -骛 -宏 -淄 -斑 -噜 -严 -瑛 -垃 -椎 -诱 -压 -庾 -绞 -焘 -廿 -抡 -迄 -棘 -夫 -纬 -锹 -眨 -瞌 -侠 -脐 -竞 -瀑 -孳 -骧 -遁 -姜 -颦 -荪 -滚 -萦 -伪 -逸 -粳 -爬 -锁 -矣 -役 -趣 -洒 -颔 -诏 -逐 -奸 -甭 -惠 -攀 -蹄 -泛 -尼 -拼 -阮 -鹰 -亚 -颈 -惑 -勒 -〉 -际 -肛 -爷 -刚 -钨 -丰 -养 -冶 -鲽 -辉 -蔻 -画 -覆 -皴 -妊 -麦 -返 -醉 -皂 -擀 -〗 -酶 -凑 -粹 -悟 -诀 -硖 -港 -卜 -z -杀 -涕 -± -舍 -铠 -抵 -弛 -段 -敝 -镐 -奠 -拂 -轴 -跛 -袱 -e -t -沉 -菇 -俎 -薪 -峦 -秭 -蟹 -历 -盟 -菠 -寡 -液 -肢 -喻 -染 -裱 -悱 -抱 -氙 -赤 -捅 -猛 -跑 -氮 -谣 -仁 -尺 -辊 -窍 -烙 -衍 -架 -擦 -倏 -璐 -瑁 -币 -楞 -胖 -夔 -趸 -邛 -惴 -饕 -虔 -蝎 -§ -哉 -贝 -宽 -辫 -炮 -扩 -饲 -籽 -魏 -菟 -锰 -伍 -猝 -末 -琳 -哚 -蛎 -邂 -呀 -姿 -鄞 -却 -歧 -仙 -恸 -椐 -森 -牒 -寤 -袒 -婆 -虢 -雅 -钉 -朵 -贼 -欲 -苞 -寰 -故 -龚 -坭 -嘘 -咫 -礼 -硷 -兀 -睢 -汶 -’ -铲 -烧 -绕 -诃 -浃 -钿 -哺 -柜 -讼 -颊 -璁 -腔 -洽 -咐 -脲 -簌 -筠 -镣 -玮 -鞠 -谁 -兼 -姆 -挥 -梯 -蝴 -谘 -漕 -刷 -躏 -宦 -弼 -b -垌 -劈 -麟 -莉 -揭 -笙 -渎 -仕 -嗤 -仓 -配 -怏 -抬 -错 -泯 -镊 -孰 -猿 -邪 -仍 -秋 -鼬 -壹 -歇 -吵 -炼 -< -尧 -射 -柬 -廷 -胧 -霾 -凳 -隋 -肚 -浮 -梦 -祥 -株 -堵 -退 -L -鹫 -跎 -凶 -毽 -荟 -炫 -栩 -玳 -甜 -沂 -鹿 -顽 -伯 -爹 -赔 -蛴 -徐 -匡 -欣 -狰 -缸 -雹 -蟆 -疤 -默 -沤 -啜 -痂 -衣 -禅 -w -i -h -辽 -葳 -黝 -钗 -停 -沽 -棒 -馨 -颌 -肉 -吴 -硫 -悯 -劾 -娈 -马 -啧 -吊 -悌 -镑 -峭 -帆 -瀣 -涉 -咸 -疸 -滋 -泣 -翦 -拙 -癸 -钥 -蜒 -+ -尾 -庄 -凝 -泉 -婢 -渴 -谊 -乞 -陆 -锉 -糊 -鸦 -淮 -I -B -N -晦 -弗 -乔 -庥 -葡 -尻 -席 -橡 -傣 -渣 -拿 -惩 -麋 -斛 -缃 -矮 -蛏 -岘 -鸽 -姐 -膏 -催 -奔 -镒 -喱 -蠡 -摧 -钯 -胤 -柠 -拐 -璋 -鸥 -卢 -荡 -倾 -^ -_ -珀 -逄 -萧 -塾 -掇 -贮 -笆 -聂 -圃 -冲 -嵬 -M -滔 -笕 -值 -炙 -偶 -蜱 -搐 -梆 -汪 -蔬 -腑 -鸯 -蹇 -敞 -绯 -仨 -祯 -谆 -梧 -糗 -鑫 -啸 -豺 -囹 -猾 -巢 -柄 -瀛 -筑 -踌 -沭 -暗 -苁 -鱿 -蹉 -脂 -蘖 -牢 -热 -木 -吸 -溃 -宠 -序 -泞 -偿 -拜 -檩 -厚 -朐 -毗 -螳 -吞 -媚 -朽 -担 -蝗 -橘 -畴 -祈 -糟 -盱 -隼 -郜 -惜 -珠 -裨 -铵 -焙 -琚 -唯 -咚 -噪 -骊 -丫 -滢 -勤 -棉 -呸 -咣 -淀 -隔 -蕾 -窈 -饨 -挨 -煅 -短 -匙 -粕 -镜 -赣 -撕 -墩 -酬 -馁 -豌 -颐 -抗 -酣 -氓 -佑 -搁 -哭 -递 -耷 -涡 -桃 -贻 -碣 -截 -瘦 -昭 -镌 -蔓 -氚 -甲 -猕 -蕴 -蓬 -散 -拾 -纛 -狼 -猷 -铎 -埋 -旖 -矾 -讳 -囊 -糜 -迈 -粟 -蚂 -紧 -鲳 -瘢 -栽 -稼 -羊 -锄 -斟 -睁 -桥 -瓮 -蹙 -祉 -醺 -鼻 -昱 -剃 -跳 -篱 -跷 -蒜 -翎 -宅 -晖 -嗑 -壑 -峻 -癫 -屏 -狠 -陋 -袜 -途 -憎 -祀 -莹 -滟 -佶 -溥 -臣 -约 -盛 -峰 -磁 -慵 -婪 -拦 -莅 -朕 -鹦 -粲 -裤 -哎 -疡 -嫖 -琵 -窟 -堪 -谛 -嘉 -儡 -鳝 -斩 -郾 -驸 -酊 -妄 -胜 -贺 -徙 -傅 -噌 -钢 -栅 -庇 -恋 -匝 -巯 -邈 -尸 -锚 -粗 -佟 -蛟 -薹 -纵 -蚊 -郅 -绢 -锐 -苗 -俞 -篆 -淆 -膀 -鲜 -煎 -诶 -秽 -寻 -涮 -刺 -怀 -噶 -巨 -褰 -魅 -灶 -灌 -桉 -藕 -谜 -舸 -薄 -搀 -恽 -借 -牯 -痉 -渥 -愿 -亓 -耘 -杠 -柩 -锔 -蚶 -钣 -珈 -喘 -蹒 -幽 -赐 -稗 -晤 -莱 -泔 -扯 -肯 -菪 -裆 -腩 -豉 -疆 -骜 -腐 -倭 -珏 -唔 -粮 -亡 -润 -慰 -伽 -橄 -玄 -誉 -醐 -胆 -龊 -粼 -塬 -陇 -彼 -削 -嗣 -绾 -芽 -妗 -垭 -瘴 -爽 -薏 -寨 -龈 -泠 -弹 -赢 -漪 -猫 -嘧 -涂 -恤 -圭 -茧 -烽 -屑 -痕 -巾 -赖 -荸 -凰 -腮 -畈 -亵 -蹲 -偃 -苇 -澜 -艮 -换 -骺 -烘 -苕 -梓 -颉 -肇 -哗 -悄 -氤 -涠 -葬 -屠 -鹭 -植 -竺 -佯 -诣 -鲇 -瘀 -鲅 -邦 -移 -滁 -冯 -耕 -癔 -戌 -茬 -沁 -巩 -悠 -湘 -洪 -痹 -锟 -循 -谋 -腕 -鳃 -钠 -捞 -焉 -迎 -碱 -伫 -急 -榷 -奈 -邝 -卯 -辄 -皲 -卟 -醛 -畹 -忧 -稳 -雄 -昼 -缩 -阈 -睑 -扌 -耗 -曦 -涅 -捏 -瞧 -邕 -淖 -漉 -铝 -耦 -禹 -湛 -喽 -莼 -琅 -诸 -苎 -纂 -硅 -始 -嗨 -傥 -燃 -臂 -赅 -嘈 -呆 -贵 -屹 -壮 -肋 -亍 -蚀 -卅 -豹 -腆 -邬 -迭 -浊 -} -童 -螂 -捐 -圩 -勐 -触 -寞 -汊 -壤 -荫 -膺 -渌 -芳 -懿 -遴 -螈 -泰 -蓼 -蛤 -茜 -舅 -枫 -朔 -膝 -眙 -避 -梅 -判 -鹜 -璜 -牍 -缅 -垫 -藻 -黔 -侥 -惚 -懂 -踩 -腰 -腈 -札 -丞 -唾 -慈 -顿 -摹 -荻 -琬 -~ -斧 -沈 -滂 -胁 -胀 -幄 -莜 -Z -匀 -鄄 -掌 -绰 -茎 -焚 -赋 -萱 -谑 -汁 -铒 -瞎 -夺 -蜗 -野 -娆 -冀 -弯 -篁 -懵 -灞 -隽 -芡 -脘 -俐 -辩 -芯 -掺 -喏 -膈 -蝈 -觐 -悚 -踹 -蔗 -熠 -鼠 -呵 -抓 -橼 -峨 -畜 -缔 -禾 -崭 -弃 -熊 -摒 -凸 -拗 -穹 -蒙 -抒 -祛 -劝 -闫 -扳 -阵 -醌 -踪 -喵 -侣 -搬 -仅 -荧 -赎 -蝾 -琦 -买 -婧 -瞄 -寓 -皎 -冻 -赝 -箩 -莫 -瞰 -郊 -笫 -姝 -筒 -枪 -遣 -煸 -袋 -舆 -痱 -涛 -母 -〇 -启 -践 -耙 -绲 -盘 -遂 -昊 -搞 -槿 -诬 -纰 -泓 -惨 -檬 -亻 -越 -C -o -憩 -熵 -祷 -钒 -暧 -塔 -阗 -胰 -咄 -娶 -魔 -琶 -钞 -邻 -扬 -杉 -殴 -咽 -弓 -〆 -髻 -】 -吭 -揽 -霆 -拄 -殖 -脆 -彻 -岩 -芝 -勃 -辣 -剌 -钝 -嘎 -甄 -佘 -皖 -伦 -授 -徕 -憔 -挪 -皇 -庞 -稔 -芜 -踏 -溴 -兖 -卒 -擢 -饥 -鳞 -煲 -‰ -账 -颗 -叻 -斯 -捧 -鳍 -琮 -讹 -蛙 -纽 -谭 -酸 -兔 -莒 -睇 -伟 -觑 -羲 -嗜 -宜 -褐 -旎 -辛 -卦 -诘 -筋 -鎏 -溪 -挛 -熔 -阜 -晰 -鳅 -丢 -奚 -灸 -呱 -献 -陉 -黛 -鸪 -甾 -萨 -疮 -拯 -洲 -疹 -辑 -叙 -恻 -谒 -允 -柔 -烂 -氏 -逅 -漆 -拎 -惋 -扈 -湟 -纭 -啕 -掬 -擞 -哥 -忽 -涤 -鸵 -靡 -郗 -瓷 -扁 -廊 -怨 -雏 -钮 -敦 -E -懦 -憋 -汀 -拚 -啉 -腌 -岸 -f -痼 -瞅 -尊 -咀 -眩 -飙 -忌 -仝 -迦 -熬 -毫 -胯 -篑 -茄 -腺 -凄 -舛 -碴 -锵 -诧 -羯 -後 -漏 -汤 -宓 -仞 -蚁 -壶 -谰 -皑 -铄 -棰 -罔 -辅 -晶 -苦 -牟 -闽 -\ -烃 -饮 -聿 -丙 -蛳 -朱 -煤 -涔 -鳖 -犁 -罐 -荼 -砒 -淦 -妤 -黏 -戎 -孑 -婕 -瑾 -戢 -钵 -枣 -捋 -砥 -衩 -狙 -桠 -稣 -阎 -肃 -梏 -诫 -孪 -昶 -婊 -衫 -嗔 -侃 -塞 -蜃 -樵 -峒 -貌 -屿 -欺 -缫 -阐 -栖 -诟 -珞 -荭 -吝 -萍 -嗽 -恂 -啻 -蜴 -磬 -峋 -俸 -豫 -谎 -徊 -镍 -韬 -魇 -晴 -U -囟 -猜 -蛮 -坐 -囿 -伴 -亭 -肝 -佗 -蝠 -妃 -胞 -滩 -榴 -氖 -垩 -苋 -砣 -扪 -馏 -姓 -轩 -厉 -夥 -侈 -禀 -垒 -岑 -赏 -钛 -辐 -痔 -披 -纸 -碳 -“ -坞 -蠓 -挤 -荥 -沅 -悔 -铧 -帼 -蒌 -蝇 -a -p -y -n -g -哀 -浆 -瑶 -凿 -桶 -馈 -皮 -奴 -苜 -佤 -伶 -晗 -铱 -炬 -优 -弊 -氢 -恃 -甫 -攥 -端 -锌 -灰 -稹 -炝 -曙 -邋 -亥 -眶 -碾 -拉 -萝 -绔 -捷 -浍 -腋 -姑 -菖 -凌 -涞 -麽 -锢 -桨 -潢 -绎 -镰 -殆 -锑 -渝 -铬 -困 -绽 -觎 -匈 -糙 -暑 -裹 -鸟 -盔 -肽 -迷 -綦 -『 -亳 -佝 -俘 -钴 -觇 -骥 -仆 -疝 -跪 -婶 -郯 -瀹 -唉 -脖 -踞 -针 -晾 -忒 -扼 -瞩 -叛 -椒 -疟 -嗡 -邗 -肆 -跆 -玫 -忡 -捣 -咧 -唆 -艄 -蘑 -潦 -笛 -阚 -沸 -泻 -掊 -菽 -贫 -斥 -髂 -孢 -镂 -赂 -麝 -鸾 -屡 -衬 -苷 -恪 -叠 -希 -粤 -爻 -喝 -茫 -惬 -郸 -绻 -庸 -撅 -碟 -宄 -妹 -膛 -叮 -饵 -崛 -嗲 -椅 -冤 -搅 -咕 -敛 -尹 -垦 -闷 -蝉 -霎 -勰 -败 -蓑 -泸 -肤 -鹌 -幌 -焦 -浠 -鞍 -刁 -舰 -乙 -竿 -裔 -。 -茵 -函 -伊 -兄 -丨 -娜 -匍 -謇 -莪 -宥 -似 -蝽 -翳 -酪 -翠 -粑 -薇 -祢 -骏 -赠 -叫 -Q -噤 -噻 -竖 -芗 -莠 -潭 -俊 -羿 -耜 -O -郫 -趁 -嗪 -囚 -蹶 -芒 -洁 -笋 -鹑 -敲 -硝 -啶 -堡 -渲 -揩 -』 -携 -宿 -遒 -颍 -扭 -棱 -割 -萜 -蔸 -葵 -琴 -捂 -饰 -衙 -耿 -掠 -募 -岂 -窖 -涟 -蔺 -瘤 -柞 -瞪 -怜 -匹 -距 -楔 -炜 -哆 -秦 -缎 -幼 -茁 -绪 -痨 -恨 -楸 -娅 -瓦 -桩 -雪 -嬴 -伏 -榔 -妥 -铿 -拌 -眠 -雍 -缇 -‘ -卓 -搓 -哌 -觞 -噩 -屈 -哧 -髓 -咦 -巅 -娑 -侑 -淫 -膳 -祝 -勾 -姊 -莴 -胄 -疃 -薛 -蜷 -胛 -巷 -芙 -芋 -熙 -闰 -勿 -窃 -狱 -剩 -钏 -幢 -陟 -铛 -慧 -靴 -耍 -k -浙 -浇 -飨 -惟 -绗 -祜 -澈 -啼 -咪 -磷 -摞 -诅 -郦 -抹 -跃 -壬 -吕 -肖 -琏 -颤 -尴 -剡 -抠 -凋 -赚 -泊 -津 -宕 -殷 -倔 -氲 -漫 -邺 -涎 -怠 -$ -垮 -荬 -遵 -俏 -叹 -噢 -饽 -蜘 -孙 -筵 -疼 -鞭 -羧 -牦 -箭 -潴 -c -眸 -祭 -髯 -啖 -坳 -愁 -芩 -驮 -倡 -巽 -穰 -沃 -胚 -怒 -凤 -槛 -剂 -趵 -嫁 -v -邢 -灯 -鄢 -桐 -睽 -檗 -锯 -槟 -婷 -嵋 -圻 -诗 -蕈 -颠 -遭 -痢 -芸 -怯 -馥 -竭 -锗 -徜 -恭 -遍 -籁 -剑 -嘱 -苡 -龄 -僧 -桑 -潸 -弘 -澶 -楹 -悲 -讫 -愤 -腥 -悸 -谍 -椹 -呢 -桓 -葭 -攫 -阀 -翰 -躲 -敖 -柑 -郎 -笨 -橇 -呃 -魁 -燎 -脓 -葩 -磋 -垛 -玺 -狮 -沓 -砜 -蕊 -锺 -罹 -蕉 -翱 -虐 -闾 -巫 -旦 -茱 -嬷 -枯 -鹏 -贡 -芹 -汛 -矫 -绁 -拣 -禺 -佃 -讣 -舫 -惯 -乳 -趋 -疲 -挽 -岚 -虾 -衾 -蠹 -蹂 -飓 -氦 -铖 -孩 -稞 -瑜 -壅 -掀 -勘 -妓 -畅 -髋 -W -庐 -牲 -蓿 -榕 -练 -垣 -唱 -邸 -菲 -昆 -婺 -穿 -绡 -麒 -蚱 -掂 -愚 -泷 -涪 -漳 -妩 -娉 -榄 -讷 -觅 -旧 -藤 -煮 -呛 -柳 -腓 -叭 -庵 -烷 -阡 -罂 -蜕 -擂 -猖 -咿 -媲 -脉 -【 -沏 -貅 -黠 -熏 -哲 -烁 -坦 -酵 -兜 -× -潇 -撒 -剽 -珩 -圹 -乾 -摸 -樟 -帽 -嗒 -襄 -魂 -轿 -憬 -锡 -〕 -喃 -皆 -咖 -隅 -脸 -残 -泮 -袂 -鹂 -珊 -囤 -捆 -咤 -误 -徨 -闹 -淙 -芊 -淋 -怆 -囗 -拨 -梳 -渤 -R -G -绨 -蚓 -婀 -幡 -狩 -麾 -谢 -唢 -裸 -旌 -伉 -纶 -裂 -驳 -砼 -咛 -澄 -樨 -蹈 -宙 -澍 -倍 -貔 -操 -勇 -蟠 -摈 -砧 -虬 -够 -缁 -悦 -藿 -撸 -艹 -摁 -淹 -豇 -虎 -榭 -ˉ -吱 -d -° -喧 -荀 -踱 -侮 -奋 -偕 -饷 -犍 -惮 -坑 -璎 -徘 -宛 -妆 -袈 -倩 -窦 -昂 -荏 -乖 -K -怅 -撰 -鳙 -牙 -袁 -酞 -X -痿 -琼 -闸 -雁 -趾 -荚 -虻 -涝 -《 -杏 -韭 -偈 -烤 -绫 -鞘 -卉 -症 -遢 -蓥 -诋 -杭 -荨 -匆 -竣 -簪 -辙 -敕 -虞 -丹 -缭 -咩 -黟 -m -淤 -瑕 -咂 -铉 -硼 -茨 -嶂 -痒 -畸 -敬 -涿 -粪 -窘 -熟 -叔 -嫔 -盾 -忱 -裘 -憾 -梵 -赡 -珙 -咯 -娘 -庙 -溯 -胺 -葱 -痪 -摊 -荷 -卞 -乒 -髦 -寐 -铭 -坩 -胗 -枷 -爆 -溟 -嚼 -羚 -砬 -轨 -惊 -挠 -罄 -竽 -菏 -氧 -浅 -楣 -盼 -枢 -炸 -阆 -杯 -谏 -噬 -淇 -渺 -俪 -秆 -墓 -泪 -跻 -砌 -痰 -垡 -渡 -耽 -釜 -讶 -鳎 -煞 -呗 -韶 -舶 -绷 -鹳 -缜 -旷 -铊 -皱 -龌 -檀 -霖 -奄 -槐 -艳 -蝶 -旋 -哝 -赶 -骞 -蚧 -腊 -盈 -丁 -` -蜚 -矸 -蝙 -睨 -嚓 -僻 -鬼 -醴 -夜 -彝 -磊 -笔 -拔 -栀 -糕 -厦 -邰 -纫 -逭 -纤 -眦 -膊 -馍 -躇 -烯 -蘼 -冬 -诤 -暄 -骶 -哑 -瘠 -」 -臊 -丕 -愈 -咱 -螺 -擅 -跋 -搏 -硪 -谄 -笠 -淡 -嘿 -骅 -谧 -鼎 -皋 -姚 -歼 -蠢 -驼 -耳 -胬 -挝 -涯 -狗 -蒽 -孓 -犷 -凉 -芦 -箴 -铤 -孤 -嘛 -坤 -V -茴 -朦 -挞 -尖 -橙 -诞 -搴 -碇 -洵 -浚 -帚 -蜍 -漯 -柘 -嚎 -讽 -芭 -荤 -咻 -祠 -秉 -跖 -埃 -吓 -糯 -眷 -馒 -惹 -娼 -鲑 -嫩 -讴 -轮 -瞥 -靶 -褚 -乏 -缤 -宋 -帧 -删 -驱 -碎 -扑 -俩 -俄 -偏 -涣 -竹 -噱 -皙 -佰 -渚 -唧 -斡 -# -镉 -刀 -崎 -筐 -佣 -夭 -贰 -肴 -峙 -哔 -艿 -匐 -牺 -镛 -缘 -仡 -嫡 -劣 -枸 -堀 -梨 -簿 -鸭 -蒸 -亦 -稽 -浴 -{ -衢 -束 -槲 -j -阁 -揍 -疥 -棋 -潋 -聪 -窜 -乓 -睛 -插 -冉 -阪 -苍 -搽 -「 -蟾 -螟 -幸 -仇 -樽 -撂 -慢 -跤 -幔 -俚 -淅 -覃 -觊 -溶 -妖 -帛 -侨 -曰 -妾 -泗 -· -: -瀘 -風 -Ë -( -) -∶ -紅 -紗 -瑭 -雲 -頭 -鶏 -財 -許 -• -¥ -樂 -焗 -麗 -— -; -滙 -東 -榮 -繪 -興 -… -門 -業 -π -楊 -國 -顧 -é -盤 -寳 -Λ -龍 -鳳 -島 -誌 -緣 -結 -銭 -萬 -勝 -祎 -璟 -優 -歡 -臨 -時 -購 -= -★ -藍 -昇 -鐵 -觀 -勅 -農 -聲 -畫 -兿 -術 -發 -劉 -記 -專 -耑 -園 -書 -壴 -種 -Ο -● -褀 -號 -銀 -匯 -敟 -锘 -葉 -橪 -廣 -進 -蒄 -鑽 -阝 -祙 -貢 -鍋 -豊 -夬 -喆 -團 -閣 -開 -燁 -賓 -館 -酡 -沔 -順 -+ -硚 -劵 -饸 -陽 -車 -湓 -復 -萊 -氣 -軒 -華 -堃 -迮 -纟 -戶 -馬 -學 -裡 -電 -嶽 -獨 -マ -シ -サ -ジ -燘 -袪 -環 -❤ -臺 -灣 -専 -賣 -孖 -聖 -攝 -線 -▪ -α -傢 -俬 -夢 -達 -莊 -喬 -貝 -薩 -劍 -羅 -壓 -棛 -饦 -尃 -璈 -囍 -醫 -G -I -A -# -N -鷄 -髙 -嬰 -啓 -約 -隹 -潔 -賴 -藝 -~ -寶 -籣 -麺 -  -嶺 -√ -義 -網 -峩 -長 -∧ -魚 -機 -構 -② -鳯 -偉 -L -B -㙟 -畵 -鴿 -' -詩 -溝 -嚞 -屌 -藔 -佧 -玥 -蘭 -織 -1 -3 -9 -0 -7 -點 -砭 -鴨 -鋪 -銘 -廳 -弍 -‧ -創 -湯 -坶 -℃ -卩 -骝 -& -烜 -荘 -當 -潤 -扞 -係 -懷 -碶 -钅 -蚨 -讠 -☆ -叢 -爲 -埗 -涫 -塗 -→ -楽 -現 -鯨 -愛 -瑪 -鈺 -忄 -悶 -藥 -飾 -樓 -視 -孬 -ㆍ -燚 -苪 -師 -① -丼 -锽 -│ -韓 -標 -è -兒 -閏 -匋 -張 -漢 -Ü -髪 -會 -閑 -檔 -習 -裝 -の -峯 -菘 -輝 -И -雞 -釣 -億 -浐 -K -O -R -8 -H -E -P -T -W -D -S -C -M -F -姌 -饹 -» -晞 -廰 -ä -嵯 -鷹 -負 -飲 -絲 -冚 -楗 -澤 -綫 -區 -❋ -← -質 -靑 -揚 -③ -滬 -統 -産 -協 -﹑ -乸 -畐 -經 -運 -際 -洺 -岽 -為 -粵 -諾 -崋 -豐 -碁 -ɔ -V -2 -6 -齋 -誠 -訂 -´ -勑 -雙 -陳 -無 -í -泩 -媄 -夌 -刂 -i -c -t -o -r -a -嘢 -耄 -燴 -暃 -壽 -媽 -靈 -抻 -體 -唻 -É -冮 -甹 -鎮 -錦 -ʌ -蜛 -蠄 -尓 -駕 -戀 -飬 -逹 -倫 -貴 -極 -Я -Й -寬 -磚 -嶪 -郎 -職 -| -間 -n -d -剎 -伈 -課 -飛 -橋 -瘊 -№ -譜 -骓 -圗 -滘 -縣 -粿 -咅 -養 -濤 -彳 -® -% -Ⅱ -啰 -㴪 -見 -矞 -薬 -糁 -邨 -鲮 -顔 -罱 -З -選 -話 -贏 -氪 -俵 -競 -瑩 -繡 -枱 -β -綉 -á -獅 -爾 -™ -麵 -戋 -淩 -徳 -個 -劇 -場 -務 -簡 -寵 -h -實 -膠 -轱 -圖 -築 -嘣 -樹 -㸃 -營 -耵 -孫 -饃 -鄺 -飯 -麯 -遠 -輸 -坫 -孃 -乚 -閃 -鏢 -㎡ -題 -廠 -關 -↑ -爺 -將 -軍 -連 -篦 -覌 -參 -箸 -- -窠 -棽 -寕 -夀 -爰 -歐 -呙 -閥 -頡 -熱 -雎 -垟 -裟 -凬 -勁 -帑 -馕 -夆 -疌 -枼 -馮 -貨 -蒤 -樸 -彧 -旸 -靜 -龢 -暢 -㐱 -鳥 -珺 -鏡 -灡 -爭 -堷 -廚 -Ó -騰 -診 -┅ -蘇 -褔 -凱 -頂 -豕 -亞 -帥 -嘬 -⊥ -仺 -桖 -複 -饣 -絡 -穂 -顏 -棟 -納 -▏ -濟 -親 -設 -計 -攵 -埌 -烺 -ò -頤 -燦 -蓮 -撻 -節 -講 -濱 -濃 -娽 -洳 -朿 -燈 -鈴 -護 -膚 -铔 -過 -補 -Z -U -5 -4 -坋 -闿 -䖝 -餘 -缐 -铞 -貿 -铪 -桼 -趙 -鍊 -[ -㐂 -垚 -菓 -揸 -捲 -鐘 -滏 -𣇉 -爍 -輪 -燜 -鴻 -鮮 -動 -鹞 -鷗 -丄 -慶 -鉌 -翥 -飮 -腸 -⇋ -漁 -覺 -來 -熘 -昴 -翏 -鲱 -圧 -鄉 -萭 -頔 -爐 -嫚 -г -貭 -類 -聯 -幛 -輕 -訓 -鑒 -夋 -锨 -芃 -珣 -䝉 -扙 -嵐 -銷 -處 -ㄱ -語 -誘 -苝 -歸 -儀 -燒 -楿 -內 -粢 -葒 -奧 -麥 -礻 -滿 -蠔 -穵 -瞭 -態 -鱬 -榞 -硂 -鄭 -黃 -煙 -祐 -奓 -逺 -* -瑄 -獲 -聞 -薦 -讀 -這 -樣 -決 -問 -啟 -們 -執 -説 -轉 -單 -隨 -唘 -帶 -倉 -庫 -還 -贈 -尙 -皺 -■ -餅 -產 -○ -∈ -報 -狀 -楓 -賠 -琯 -嗮 -禮 -` -傳 -> -≤ -嗞 -Φ -≥ -換 -咭 -∣ -↓ -曬 -ε -応 -寫 -″ -終 -様 -純 -費 -療 -聨 -凍 -壐 -郵 -ü -黒 -∫ -製 -塊 -調 -軽 -確 -撃 -級 -馴 -Ⅲ -涇 -繹 -數 -碼 -證 -狒 -処 -劑 -< -晧 -賀 -衆 -] -櫥 -兩 -陰 -絶 -對 -鯉 -憶 -◎ -p -e -Y -蕒 -煖 -頓 -測 -試 -鼽 -僑 -碩 -妝 -帯 -≈ -鐡 -舖 -權 -喫 -倆 -ˋ -該 -悅 -ā -俫 -. -f -s -b -m -k -g -u -j -貼 -淨 -濕 -針 -適 -備 -l -/ -給 -謢 -強 -觸 -衛 -與 -⊙ -$ -緯 -變 -⑴ -⑵ -⑶ -㎏ -殺 -∩ -幚 -─ -價 -▲ -離 -ú -ó -飄 -烏 -関 -閟 -﹝ -﹞ -邏 -輯 -鍵 -驗 -訣 -導 -歷 -屆 -層 -▼ -儱 -錄 -熳 -ē -艦 -吋 -錶 -辧 -飼 -顯 -④ -禦 -販 -気 -対 -枰 -閩 -紀 -幹 -瞓 -貊 -淚 -△ -眞 -墊 -Ω -獻 -褲 -縫 -緑 -亜 -鉅 -餠 -{ -} -◆ -蘆 -薈 -█ -◇ -溫 -彈 -晳 -粧 -犸 -穩 -訊 -崬 -凖 -熥 -П -舊 -條 -紋 -圍 -Ⅳ -筆 -尷 -難 -雜 -錯 -綁 -識 -頰 -鎖 -艶 -□ -殁 -殼 -⑧ -├ -▕ -鵬 -ǐ -ō -ǒ -糝 -綱 -▎ -μ -盜 -饅 -醬 -籤 -蓋 -釀 -鹽 -據 -à -ɡ -辦 -◥ -彐 -┌ -婦 -獸 -鲩 -伱 -ī -蒟 -蒻 -齊 -袆 -腦 -寧 -凈 -妳 -煥 -詢 -偽 -謹 -啫 -鯽 -騷 -鱸 -損 -傷 -鎻 -髮 -買 -冏 -儥 -両 -﹢ -∞ -載 -喰 -z -羙 -悵 -燙 -曉 -員 -組 -徹 -艷 -痠 -鋼 -鼙 -縮 -細 -嚒 -爯 -≠ -維 -" -鱻 -壇 -厍 -帰 -浥 -犇 -薡 -軎 -² -應 -醜 -刪 -緻 -鶴 -賜 -噁 -軌 -尨 -镔 -鷺 -槗 -彌 -葚 -濛 -請 -溇 -緹 -賢 -訪 -獴 -瑅 -資 -縤 -陣 -蕟 -栢 -韻 -祼 -恁 -伢 -謝 -劃 -涑 -總 -衖 -踺 -砋 -凉 -籃 -駿 -苼 -瘋 -昽 -紡 -驊 -腎 -﹗ -響 -杋 -剛 -嚴 -禪 -歓 -槍 -傘 -檸 -檫 -炣 -勢 -鏜 -鎢 -銑 -尐 -減 -奪 -惡 -θ -僮 -婭 -臘 -ū -ì -殻 -鉄 -∑ -蛲 -焼 -緖 -續 -紹 +' +疗 +绚 +诚 +娇 +溜 +题 +贿 +者 +廖 +更 +纳 +加 +奉 +公 +一 +就 +汴 +计 +与 +路 +房 +原 +妇 +2 +0 +8 +- +7 +其 +> +: +] +, +, +骑 +刈 +全 +消 +昏 +傈 +安 +久 +钟 +嗅 +不 +影 +处 +驽 +蜿 +资 +关 +椤 +地 +瘸 +专 +问 +忖 +票 +嫉 +炎 +韵 +要 +月 +田 +节 +陂 +鄙 +捌 +备 +拳 +伺 +眼 +网 +盎 +大 +傍 +心 +东 +愉 +汇 +蹿 +科 +每 +业 +里 +航 +晏 +字 +平 +录 +先 +1 +3 +彤 +鲶 +产 +稍 +督 +腴 +有 +象 +岳 +注 +绍 +在 +泺 +文 +定 +核 +名 +水 +过 +理 +让 +偷 +率 +等 +这 +发 +” +为 +含 +肥 +酉 +相 +鄱 +七 +编 +猥 +锛 +日 +镀 +蒂 +掰 +倒 +辆 +栾 +栗 +综 +涩 +州 +雌 +滑 +馀 +了 +机 +块 +司 +宰 +甙 +兴 +矽 +抚 +保 +用 +沧 +秩 +如 +收 +息 +滥 +页 +疑 +埠 +! +! +姥 +异 +橹 +钇 +向 +下 +跄 +的 +椴 +沫 +国 +绥 +獠 +报 +开 +民 +蜇 +何 +分 +凇 +长 +讥 +藏 +掏 +施 +羽 +中 +讲 +派 +嘟 +人 +提 +浼 +间 +世 +而 +古 +多 +倪 +唇 +饯 +控 +庚 +首 +赛 +蜓 +味 +断 +制 +觉 +技 +替 +艰 +溢 +潮 +夕 +钺 +外 +摘 +枋 +动 +双 +单 +啮 +户 +枇 +确 +锦 +曜 +杜 +或 +能 +效 +霜 +盒 +然 +侗 +电 +晁 +放 +步 +鹃 +新 +杖 +蜂 +吒 +濂 +瞬 +评 +总 +隍 +对 +独 +合 +也 +是 +府 +青 +天 +诲 +墙 +组 +滴 +级 +邀 +帘 +示 +已 +时 +骸 +仄 +泅 +和 +遨 +店 +雇 +疫 +持 +巍 +踮 +境 +只 +亨 +目 +鉴 +崤 +闲 +体 +泄 +杂 +作 +般 +轰 +化 +解 +迂 +诿 +蛭 +璀 +腾 +告 +版 +服 +省 +师 +小 +规 +程 +线 +海 +办 +引 +二 +桧 +牌 +砺 +洄 +裴 +修 +图 +痫 +胡 +许 +犊 +事 +郛 +基 +柴 +呼 +食 +研 +奶 +律 +蛋 +因 +葆 +察 +戏 +褒 +戒 +再 +李 +骁 +工 +貂 +油 +鹅 +章 +啄 +休 +场 +给 +睡 +纷 +豆 +器 +捎 +说 +敏 +学 +会 +浒 +设 +诊 +格 +廓 +查 +来 +霓 +室 +溆 +¢ +诡 +寥 +焕 +舜 +柒 +狐 +回 +戟 +砾 +厄 +实 +翩 +尿 +五 +入 +径 +惭 +喹 +股 +宇 +篝 +| +; +美 +期 +云 +九 +祺 +扮 +靠 +锝 +槌 +系 +企 +酰 +阊 +暂 +蚕 +忻 +豁 +本 +羹 +执 +条 +钦 +H +獒 +限 +进 +季 +楦 +于 +芘 +玖 +铋 +茯 +未 +答 +粘 +括 +样 +精 +欠 +矢 +甥 +帷 +嵩 +扣 +令 +仔 +风 +皈 +行 +支 +部 +蓉 +刮 +站 +蜡 +救 +钊 +汗 +松 +嫌 +成 +可 +. +鹤 +院 +从 +交 +政 +怕 +活 +调 +球 +局 +验 +髌 +第 +韫 +谗 +串 +到 +圆 +年 +米 +/ +* +友 +忿 +检 +区 +看 +自 +敢 +刃 +个 +兹 +弄 +流 +留 +同 +没 +齿 +星 +聆 +轼 +湖 +什 +三 +建 +蛔 +儿 +椋 +汕 +震 +颧 +鲤 +跟 +力 +情 +璺 +铨 +陪 +务 +指 +族 +训 +滦 +鄣 +濮 +扒 +商 +箱 +十 +召 +慷 +辗 +所 +莞 +管 +护 +臭 +横 +硒 +嗓 +接 +侦 +六 +露 +党 +馋 +驾 +剖 +高 +侬 +妪 +幂 +猗 +绺 +骐 +央 +酐 +孝 +筝 +课 +徇 +缰 +门 +男 +西 +项 +句 +谙 +瞒 +秃 +篇 +教 +碲 +罚 +声 +呐 +景 +前 +富 +嘴 +鳌 +稀 +免 +朋 +啬 +睐 +去 +赈 +鱼 +住 +肩 +愕 +速 +旁 +波 +厅 +健 +茼 +厥 +鲟 +谅 +投 +攸 +炔 +数 +方 +击 +呋 +谈 +绩 +别 +愫 +僚 +躬 +鹧 +胪 +炳 +招 +喇 +膨 +泵 +蹦 +毛 +结 +5 +4 +谱 +识 +陕 +粽 +婚 +拟 +构 +且 +搜 +任 +潘 +比 +郢 +妨 +醪 +陀 +桔 +碘 +扎 +选 +哈 +骷 +楷 +亿 +明 +缆 +脯 +监 +睫 +逻 +婵 +共 +赴 +淝 +凡 +惦 +及 +达 +揖 +谩 +澹 +减 +焰 +蛹 +番 +祁 +柏 +员 +禄 +怡 +峤 +龙 +白 +叽 +生 +闯 +起 +细 +装 +谕 +竟 +聚 +钙 +上 +导 +渊 +按 +艾 +辘 +挡 +耒 +盹 +饪 +臀 +记 +邮 +蕙 +受 +各 +医 +搂 +普 +滇 +朗 +茸 +带 +翻 +酚 +( +光 +堤 +墟 +蔷 +万 +幻 +〓 +瑙 +辈 +昧 +盏 +亘 +蛀 +吉 +铰 +请 +子 +假 +闻 +税 +井 +诩 +哨 +嫂 +好 +面 +琐 +校 +馊 +鬣 +缂 +营 +访 +炖 +占 +农 +缀 +否 +经 +钚 +棵 +趟 +张 +亟 +吏 +茶 +谨 +捻 +论 +迸 +堂 +玉 +信 +吧 +瞠 +乡 +姬 +寺 +咬 +溏 +苄 +皿 +意 +赉 +宝 +尔 +钰 +艺 +特 +唳 +踉 +都 +荣 +倚 +登 +荐 +丧 +奇 +涵 +批 +炭 +近 +符 +傩 +感 +道 +着 +菊 +虹 +仲 +众 +懈 +濯 +颞 +眺 +南 +释 +北 +缝 +标 +既 +茗 +整 +撼 +迤 +贲 +挎 +耱 +拒 +某 +妍 +卫 +哇 +英 +矶 +藩 +治 +他 +元 +领 +膜 +遮 +穗 +蛾 +飞 +荒 +棺 +劫 +么 +市 +火 +温 +拈 +棚 +洼 +转 +果 +奕 +卸 +迪 +伸 +泳 +斗 +邡 +侄 +涨 +屯 +萋 +胭 +氡 +崮 +枞 +惧 +冒 +彩 +斜 +手 +豚 +随 +旭 +淑 +妞 +形 +菌 +吲 +沱 +争 +驯 +歹 +挟 +兆 +柱 +传 +至 +包 +内 +响 +临 +红 +功 +弩 +衡 +寂 +禁 +老 +棍 +耆 +渍 +织 +害 +氵 +渑 +布 +载 +靥 +嗬 +虽 +苹 +咨 +娄 +库 +雉 +榜 +帜 +嘲 +套 +瑚 +亲 +簸 +欧 +边 +6 +腿 +旮 +抛 +吹 +瞳 +得 +镓 +梗 +厨 +继 +漾 +愣 +憨 +士 +策 +窑 +抑 +躯 +襟 +脏 +参 +贸 +言 +干 +绸 +鳄 +穷 +藜 +音 +折 +详 +) +举 +悍 +甸 +癌 +黎 +谴 +死 +罩 +迁 +寒 +驷 +袖 +媒 +蒋 +掘 +模 +纠 +恣 +观 +祖 +蛆 +碍 +位 +稿 +主 +澧 +跌 +筏 +京 +锏 +帝 +贴 +证 +糠 +才 +黄 +鲸 +略 +炯 +饱 +四 +出 +园 +犀 +牧 +容 +汉 +杆 +浈 +汰 +瑷 +造 +虫 +瘩 +怪 +驴 +济 +应 +花 +沣 +谔 +夙 +旅 +价 +矿 +以 +考 +s +u +呦 +晒 +巡 +茅 +准 +肟 +瓴 +詹 +仟 +褂 +译 +桌 +混 +宁 +怦 +郑 +抿 +些 +余 +鄂 +饴 +攒 +珑 +群 +阖 +岔 +琨 +藓 +预 +环 +洮 +岌 +宀 +杲 +瀵 +最 +常 +囡 +周 +踊 +女 +鼓 +袭 +喉 +简 +范 +薯 +遐 +疏 +粱 +黜 +禧 +法 +箔 +斤 +遥 +汝 +奥 +直 +贞 +撑 +置 +绱 +集 +她 +馅 +逗 +钧 +橱 +魉 +[ +恙 +躁 +唤 +9 +旺 +膘 +待 +脾 +惫 +购 +吗 +依 +盲 +度 +瘿 +蠖 +俾 +之 +镗 +拇 +鲵 +厝 +簧 +续 +款 +展 +啃 +表 +剔 +品 +钻 +腭 +损 +清 +锶 +统 +涌 +寸 +滨 +贪 +链 +吠 +冈 +伎 +迥 +咏 +吁 +览 +防 +迅 +失 +汾 +阔 +逵 +绀 +蔑 +列 +川 +凭 +努 +熨 +揪 +利 +俱 +绉 +抢 +鸨 +我 +即 +责 +膦 +易 +毓 +鹊 +刹 +玷 +岿 +空 +嘞 +绊 +排 +术 +估 +锷 +违 +们 +苟 +铜 +播 +肘 +件 +烫 +审 +鲂 +广 +像 +铌 +惰 +铟 +巳 +胍 +鲍 +康 +憧 +色 +恢 +想 +拷 +尤 +疳 +知 +S +Y +F +D +A +峄 +裕 +帮 +握 +搔 +氐 +氘 +难 +墒 +沮 +雨 +叁 +缥 +悴 +藐 +湫 +娟 +苑 +稠 +颛 +簇 +后 +阕 +闭 +蕤 +缚 +怎 +佞 +码 +嘤 +蔡 +痊 +舱 +螯 +帕 +赫 +昵 +升 +烬 +岫 +、 +疵 +蜻 +髁 +蕨 +隶 +烛 +械 +丑 +盂 +梁 +强 +鲛 +由 +拘 +揉 +劭 +龟 +撤 +钩 +呕 +孛 +费 +妻 +漂 +求 +阑 +崖 +秤 +甘 +通 +深 +补 +赃 +坎 +床 +啪 +承 +吼 +量 +暇 +钼 +烨 +阂 +擎 +脱 +逮 +称 +P +神 +属 +矗 +华 +届 +狍 +葑 +汹 +育 +患 +窒 +蛰 +佼 +静 +槎 +运 +鳗 +庆 +逝 +曼 +疱 +克 +代 +官 +此 +麸 +耧 +蚌 +晟 +例 +础 +榛 +副 +测 +唰 +缢 +迹 +灬 +霁 +身 +岁 +赭 +扛 +又 +菡 +乜 +雾 +板 +读 +陷 +徉 +贯 +郁 +虑 +变 +钓 +菜 +圾 +现 +琢 +式 +乐 +维 +渔 +浜 +左 +吾 +脑 +钡 +警 +T +啵 +拴 +偌 +漱 +湿 +硕 +止 +骼 +魄 +积 +燥 +联 +踢 +玛 +则 +窿 +见 +振 +畿 +送 +班 +钽 +您 +赵 +刨 +印 +讨 +踝 +籍 +谡 +舌 +崧 +汽 +蔽 +沪 +酥 +绒 +怖 +财 +帖 +肱 +私 +莎 +勋 +羔 +霸 +励 +哼 +帐 +将 +帅 +渠 +纪 +婴 +娩 +岭 +厘 +滕 +吻 +伤 +坝 +冠 +戊 +隆 +瘁 +介 +涧 +物 +黍 +并 +姗 +奢 +蹑 +掣 +垸 +锴 +命 +箍 +捉 +病 +辖 +琰 +眭 +迩 +艘 +绌 +繁 +寅 +若 +毋 +思 +诉 +类 +诈 +燮 +轲 +酮 +狂 +重 +反 +职 +筱 +县 +委 +磕 +绣 +奖 +晋 +濉 +志 +徽 +肠 +呈 +獐 +坻 +口 +片 +碰 +几 +村 +柿 +劳 +料 +获 +亩 +惕 +晕 +厌 +号 +罢 +池 +正 +鏖 +煨 +家 +棕 +复 +尝 +懋 +蜥 +锅 +岛 +扰 +队 +坠 +瘾 +钬 +@ +卧 +疣 +镇 +譬 +冰 +彷 +频 +黯 +据 +垄 +采 +八 +缪 +瘫 +型 +熹 +砰 +楠 +襁 +箐 +但 +嘶 +绳 +啤 +拍 +盥 +穆 +傲 +洗 +盯 +塘 +怔 +筛 +丿 +台 +恒 +喂 +葛 +永 +¥ +烟 +酒 +桦 +书 +砂 +蚝 +缉 +态 +瀚 +袄 +圳 +轻 +蛛 +超 +榧 +遛 +姒 +奘 +铮 +右 +荽 +望 +偻 +卡 +丶 +氰 +附 +做 +革 +索 +戚 +坨 +桷 +唁 +垅 +榻 +岐 +偎 +坛 +莨 +山 +殊 +微 +骇 +陈 +爨 +推 +嗝 +驹 +澡 +藁 +呤 +卤 +嘻 +糅 +逛 +侵 +郓 +酌 +德 +摇 +※ +鬃 +被 +慨 +殡 +羸 +昌 +泡 +戛 +鞋 +河 +宪 +沿 +玲 +鲨 +翅 +哽 +源 +铅 +语 +照 +邯 +址 +荃 +佬 +顺 +鸳 +町 +霭 +睾 +瓢 +夸 +椁 +晓 +酿 +痈 +咔 +侏 +券 +噎 +湍 +签 +嚷 +离 +午 +尚 +社 +锤 +背 +孟 +使 +浪 +缦 +潍 +鞅 +军 +姹 +驶 +笑 +鳟 +鲁 +》 +孽 +钜 +绿 +洱 +礴 +焯 +椰 +颖 +囔 +乌 +孔 +巴 +互 +性 +椽 +哞 +聘 +昨 +早 +暮 +胶 +炀 +隧 +低 +彗 +昝 +铁 +呓 +氽 +藉 +喔 +癖 +瑗 +姨 +权 +胱 +韦 +堑 +蜜 +酋 +楝 +砝 +毁 +靓 +歙 +锲 +究 +屋 +喳 +骨 +辨 +碑 +武 +鸠 +宫 +辜 +烊 +适 +坡 +殃 +培 +佩 +供 +走 +蜈 +迟 +翼 +况 +姣 +凛 +浔 +吃 +飘 +债 +犟 +金 +促 +苛 +崇 +坂 +莳 +畔 +绂 +兵 +蠕 +斋 +根 +砍 +亢 +欢 +恬 +崔 +剁 +餐 +榫 +快 +扶 +‖ +濒 +缠 +鳜 +当 +彭 +驭 +浦 +篮 +昀 +锆 +秸 +钳 +弋 +娣 +瞑 +夷 +龛 +苫 +拱 +致 +% +嵊 +障 +隐 +弑 +初 +娓 +抉 +汩 +累 +蓖 +" +唬 +助 +苓 +昙 +押 +毙 +破 +城 +郧 +逢 +嚏 +獭 +瞻 +溱 +婿 +赊 +跨 +恼 +璧 +萃 +姻 +貉 +灵 +炉 +密 +氛 +陶 +砸 +谬 +衔 +点 +琛 +沛 +枳 +层 +岱 +诺 +脍 +榈 +埂 +征 +冷 +裁 +打 +蹴 +素 +瘘 +逞 +蛐 +聊 +激 +腱 +萘 +踵 +飒 +蓟 +吆 +取 +咙 +簋 +涓 +矩 +曝 +挺 +揣 +座 +你 +史 +舵 +焱 +尘 +苏 +笈 +脚 +溉 +榨 +诵 +樊 +邓 +焊 +义 +庶 +儋 +蟋 +蒲 +赦 +呷 +杞 +诠 +豪 +还 +试 +颓 +茉 +太 +除 +紫 +逃 +痴 +草 +充 +鳕 +珉 +祗 +墨 +渭 +烩 +蘸 +慕 +璇 +镶 +穴 +嵘 +恶 +骂 +险 +绋 +幕 +碉 +肺 +戳 +刘 +潞 +秣 +纾 +潜 +銮 +洛 +须 +罘 +销 +瘪 +汞 +兮 +屉 +r +林 +厕 +质 +探 +划 +狸 +殚 +善 +煊 +烹 +〒 +锈 +逯 +宸 +辍 +泱 +柚 +袍 +远 +蹋 +嶙 +绝 +峥 +娥 +缍 +雀 +徵 +认 +镱 +谷 += +贩 +勉 +撩 +鄯 +斐 +洋 +非 +祚 +泾 +诒 +饿 +撬 +威 +晷 +搭 +芍 +锥 +笺 +蓦 +候 +琊 +档 +礁 +沼 +卵 +荠 +忑 +朝 +凹 +瑞 +头 +仪 +弧 +孵 +畏 +铆 +突 +衲 +车 +浩 +气 +茂 +悖 +厢 +枕 +酝 +戴 +湾 +邹 +飚 +攘 +锂 +写 +宵 +翁 +岷 +无 +喜 +丈 +挑 +嗟 +绛 +殉 +议 +槽 +具 +醇 +淞 +笃 +郴 +阅 +饼 +底 +壕 +砚 +弈 +询 +缕 +庹 +翟 +零 +筷 +暨 +舟 +闺 +甯 +撞 +麂 +茌 +蔼 +很 +珲 +捕 +棠 +角 +阉 +媛 +娲 +诽 +剿 +尉 +爵 +睬 +韩 +诰 +匣 +危 +糍 +镯 +立 +浏 +阳 +少 +盆 +舔 +擘 +匪 +申 +尬 +铣 +旯 +抖 +赘 +瓯 +居 +ˇ +哮 +游 +锭 +茏 +歌 +坏 +甚 +秒 +舞 +沙 +仗 +劲 +潺 +阿 +燧 +郭 +嗖 +霏 +忠 +材 +奂 +耐 +跺 +砀 +输 +岖 +媳 +氟 +极 +摆 +灿 +今 +扔 +腻 +枝 +奎 +药 +熄 +吨 +话 +q +额 +慑 +嘌 +协 +喀 +壳 +埭 +视 +著 +於 +愧 +陲 +翌 +峁 +颅 +佛 +腹 +聋 +侯 +咎 +叟 +秀 +颇 +存 +较 +罪 +哄 +岗 +扫 +栏 +钾 +羌 +己 +璨 +枭 +霉 +煌 +涸 +衿 +键 +镝 +益 +岢 +奏 +连 +夯 +睿 +冥 +均 +糖 +狞 +蹊 +稻 +爸 +刿 +胥 +煜 +丽 +肿 +璃 +掸 +跚 +灾 +垂 +樾 +濑 +乎 +莲 +窄 +犹 +撮 +战 +馄 +软 +络 +显 +鸢 +胸 +宾 +妲 +恕 +埔 +蝌 +份 +遇 +巧 +瞟 +粒 +恰 +剥 +桡 +博 +讯 +凯 +堇 +阶 +滤 +卖 +斌 +骚 +彬 +兑 +磺 +樱 +舷 +两 +娱 +福 +仃 +差 +找 +桁 +÷ +净 +把 +阴 +污 +戬 +雷 +碓 +蕲 +楚 +罡 +焖 +抽 +妫 +咒 +仑 +闱 +尽 +邑 +菁 +爱 +贷 +沥 +鞑 +牡 +嗉 +崴 +骤 +塌 +嗦 +订 +拮 +滓 +捡 +锻 +次 +坪 +杩 +臃 +箬 +融 +珂 +鹗 +宗 +枚 +降 +鸬 +妯 +阄 +堰 +盐 +毅 +必 +杨 +崃 +俺 +甬 +状 +莘 +货 +耸 +菱 +腼 +铸 +唏 +痤 +孚 +澳 +懒 +溅 +翘 +疙 +杷 +淼 +缙 +骰 +喊 +悉 +砻 +坷 +艇 +赁 +界 +谤 +纣 +宴 +晃 +茹 +归 +饭 +梢 +铡 +街 +抄 +肼 +鬟 +苯 +颂 +撷 +戈 +炒 +咆 +茭 +瘙 +负 +仰 +客 +琉 +铢 +封 +卑 +珥 +椿 +镧 +窨 +鬲 +寿 +御 +袤 +铃 +萎 +砖 +餮 +脒 +裳 +肪 +孕 +嫣 +馗 +嵇 +恳 +氯 +江 +石 +褶 +冢 +祸 +阻 +狈 +羞 +银 +靳 +透 +咳 +叼 +敷 +芷 +啥 +它 +瓤 +兰 +痘 +懊 +逑 +肌 +往 +捺 +坊 +甩 +呻 +〃 +沦 +忘 +膻 +祟 +菅 +剧 +崆 +智 +坯 +臧 +霍 +墅 +攻 +眯 +倘 +拢 +骠 +铐 +庭 +岙 +瓠 +′ +缺 +泥 +迢 +捶 +? +? +郏 +喙 +掷 +沌 +纯 +秘 +种 +听 +绘 +固 +螨 +团 +香 +盗 +妒 +埚 +蓝 +拖 +旱 +荞 +铀 +血 +遏 +汲 +辰 +叩 +拽 +幅 +硬 +惶 +桀 +漠 +措 +泼 +唑 +齐 +肾 +念 +酱 +虚 +屁 +耶 +旗 +砦 +闵 +婉 +馆 +拭 +绅 +韧 +忏 +窝 +醋 +葺 +顾 +辞 +倜 +堆 +辋 +逆 +玟 +贱 +疾 +董 +惘 +倌 +锕 +淘 +嘀 +莽 +俭 +笏 +绑 +鲷 +杈 +择 +蟀 +粥 +嗯 +驰 +逾 +案 +谪 +褓 +胫 +哩 +昕 +颚 +鲢 +绠 +躺 +鹄 +崂 +儒 +俨 +丝 +尕 +泌 +啊 +萸 +彰 +幺 +吟 +骄 +苣 +弦 +脊 +瑰 +〈 +诛 +镁 +析 +闪 +剪 +侧 +哟 +框 +螃 +守 +嬗 +燕 +狭 +铈 +缮 +概 +迳 +痧 +鲲 +俯 +售 +笼 +痣 +扉 +挖 +满 +咋 +援 +邱 +扇 +歪 +便 +玑 +绦 +峡 +蛇 +叨 +〖 +泽 +胃 +斓 +喋 +怂 +坟 +猪 +该 +蚬 +炕 +弥 +赞 +棣 +晔 +娠 +挲 +狡 +创 +疖 +铕 +镭 +稷 +挫 +弭 +啾 +翔 +粉 +履 +苘 +哦 +楼 +秕 +铂 +土 +锣 +瘟 +挣 +栉 +习 +享 +桢 +袅 +磨 +桂 +谦 +延 +坚 +蔚 +噗 +署 +谟 +猬 +钎 +恐 +嬉 +雒 +倦 +衅 +亏 +璩 +睹 +刻 +殿 +王 +算 +雕 +麻 +丘 +柯 +骆 +丸 +塍 +谚 +添 +鲈 +垓 +桎 +蚯 +芥 +予 +飕 +镦 +谌 +窗 +醚 +菀 +亮 +搪 +莺 +蒿 +羁 +足 +J +真 +轶 +悬 +衷 +靛 +翊 +掩 +哒 +炅 +掐 +冼 +妮 +l +谐 +稚 +荆 +擒 +犯 +陵 +虏 +浓 +崽 +刍 +陌 +傻 +孜 +千 +靖 +演 +矜 +钕 +煽 +杰 +酗 +渗 +伞 +栋 +俗 +泫 +戍 +罕 +沾 +疽 +灏 +煦 +芬 +磴 +叱 +阱 +榉 +湃 +蜀 +叉 +醒 +彪 +租 +郡 +篷 +屎 +良 +垢 +隗 +弱 +陨 +峪 +砷 +掴 +颁 +胎 +雯 +绵 +贬 +沐 +撵 +隘 +篙 +暖 +曹 +陡 +栓 +填 +臼 +彦 +瓶 +琪 +潼 +哪 +鸡 +摩 +啦 +俟 +锋 +域 +耻 +蔫 +疯 +纹 +撇 +毒 +绶 +痛 +酯 +忍 +爪 +赳 +歆 +嘹 +辕 +烈 +册 +朴 +钱 +吮 +毯 +癜 +娃 +谀 +邵 +厮 +炽 +璞 +邃 +丐 +追 +词 +瓒 +忆 +轧 +芫 +谯 +喷 +弟 +半 +冕 +裙 +掖 +墉 +绮 +寝 +苔 +势 +顷 +褥 +切 +衮 +君 +佳 +嫒 +蚩 +霞 +佚 +洙 +逊 +镖 +暹 +唛 +& +殒 +顶 +碗 +獗 +轭 +铺 +蛊 +废 +恹 +汨 +崩 +珍 +那 +杵 +曲 +纺 +夏 +薰 +傀 +闳 +淬 +姘 +舀 +拧 +卷 +楂 +恍 +讪 +厩 +寮 +篪 +赓 +乘 +灭 +盅 +鞣 +沟 +慎 +挂 +饺 +鼾 +杳 +树 +缨 +丛 +絮 +娌 +臻 +嗳 +篡 +侩 +述 +衰 +矛 +圈 +蚜 +匕 +筹 +匿 +濞 +晨 +叶 +骋 +郝 +挚 +蚴 +滞 +增 +侍 +描 +瓣 +吖 +嫦 +蟒 +匾 +圣 +赌 +毡 +癞 +恺 +百 +曳 +需 +篓 +肮 +庖 +帏 +卿 +驿 +遗 +蹬 +鬓 +骡 +歉 +芎 +胳 +屐 +禽 +烦 +晌 +寄 +媾 +狄 +翡 +苒 +船 +廉 +终 +痞 +殇 +々 +畦 +饶 +改 +拆 +悻 +萄 +£ +瓿 +乃 +訾 +桅 +匮 +溧 +拥 +纱 +铍 +骗 +蕃 +龋 +缬 +父 +佐 +疚 +栎 +醍 +掳 +蓄 +x +惆 +颜 +鲆 +榆 +〔 +猎 +敌 +暴 +谥 +鲫 +贾 +罗 +玻 +缄 +扦 +芪 +癣 +落 +徒 +臾 +恿 +猩 +托 +邴 +肄 +牵 +春 +陛 +耀 +刊 +拓 +蓓 +邳 +堕 +寇 +枉 +淌 +啡 +湄 +兽 +酷 +萼 +碚 +濠 +萤 +夹 +旬 +戮 +梭 +琥 +椭 +昔 +勺 +蜊 +绐 +晚 +孺 +僵 +宣 +摄 +冽 +旨 +萌 +忙 +蚤 +眉 +噼 +蟑 +付 +契 +瓜 +悼 +颡 +壁 +曾 +窕 +颢 +澎 +仿 +俑 +浑 +嵌 +浣 +乍 +碌 +褪 +乱 +蔟 +隙 +玩 +剐 +葫 +箫 +纲 +围 +伐 +决 +伙 +漩 +瑟 +刑 +肓 +镳 +缓 +蹭 +氨 +皓 +典 +畲 +坍 +铑 +檐 +塑 +洞 +倬 +储 +胴 +淳 +戾 +吐 +灼 +惺 +妙 +毕 +珐 +缈 +虱 +盖 +羰 +鸿 +磅 +谓 +髅 +娴 +苴 +唷 +蚣 +霹 +抨 +贤 +唠 +犬 +誓 +逍 +庠 +逼 +麓 +籼 +釉 +呜 +碧 +秧 +氩 +摔 +霄 +穸 +纨 +辟 +妈 +映 +完 +牛 +缴 +嗷 +炊 +恩 +荔 +茆 +掉 +紊 +慌 +莓 +羟 +阙 +萁 +磐 +另 +蕹 +辱 +鳐 +湮 +吡 +吩 +唐 +睦 +垠 +舒 +圜 +冗 +瞿 +溺 +芾 +囱 +匠 +僳 +汐 +菩 +饬 +漓 +黑 +霰 +浸 +濡 +窥 +毂 +蒡 +兢 +驻 +鹉 +芮 +诙 +迫 +雳 +厂 +忐 +臆 +猴 +鸣 +蚪 +栈 +箕 +羡 +渐 +莆 +捍 +眈 +哓 +趴 +蹼 +埕 +嚣 +骛 +宏 +淄 +斑 +噜 +严 +瑛 +垃 +椎 +诱 +压 +庾 +绞 +焘 +廿 +抡 +迄 +棘 +夫 +纬 +锹 +眨 +瞌 +侠 +脐 +竞 +瀑 +孳 +骧 +遁 +姜 +颦 +荪 +滚 +萦 +伪 +逸 +粳 +爬 +锁 +矣 +役 +趣 +洒 +颔 +诏 +逐 +奸 +甭 +惠 +攀 +蹄 +泛 +尼 +拼 +阮 +鹰 +亚 +颈 +惑 +勒 +〉 +际 +肛 +爷 +刚 +钨 +丰 +养 +冶 +鲽 +辉 +蔻 +画 +覆 +皴 +妊 +麦 +返 +醉 +皂 +擀 +〗 +酶 +凑 +粹 +悟 +诀 +硖 +港 +卜 +z +杀 +涕 +± +舍 +铠 +抵 +弛 +段 +敝 +镐 +奠 +拂 +轴 +跛 +袱 +e +t +沉 +菇 +俎 +薪 +峦 +秭 +蟹 +历 +盟 +菠 +寡 +液 +肢 +喻 +染 +裱 +悱 +抱 +氙 +赤 +捅 +猛 +跑 +氮 +谣 +仁 +尺 +辊 +窍 +烙 +衍 +架 +擦 +倏 +璐 +瑁 +币 +楞 +胖 +夔 +趸 +邛 +惴 +饕 +虔 +蝎 +§ +哉 +贝 +宽 +辫 +炮 +扩 +饲 +籽 +魏 +菟 +锰 +伍 +猝 +末 +琳 +哚 +蛎 +邂 +呀 +姿 +鄞 +却 +歧 +仙 +恸 +椐 +森 +牒 +寤 +袒 +婆 +虢 +雅 +钉 +朵 +贼 +欲 +苞 +寰 +故 +龚 +坭 +嘘 +咫 +礼 +硷 +兀 +睢 +汶 +’ +铲 +烧 +绕 +诃 +浃 +钿 +哺 +柜 +讼 +颊 +璁 +腔 +洽 +咐 +脲 +簌 +筠 +镣 +玮 +鞠 +谁 +兼 +姆 +挥 +梯 +蝴 +谘 +漕 +刷 +躏 +宦 +弼 +b +垌 +劈 +麟 +莉 +揭 +笙 +渎 +仕 +嗤 +仓 +配 +怏 +抬 +错 +泯 +镊 +孰 +猿 +邪 +仍 +秋 +鼬 +壹 +歇 +吵 +炼 +< +尧 +射 +柬 +廷 +胧 +霾 +凳 +隋 +肚 +浮 +梦 +祥 +株 +堵 +退 +L +鹫 +跎 +凶 +毽 +荟 +炫 +栩 +玳 +甜 +沂 +鹿 +顽 +伯 +爹 +赔 +蛴 +徐 +匡 +欣 +狰 +缸 +雹 +蟆 +疤 +默 +沤 +啜 +痂 +衣 +禅 +w +i +h +辽 +葳 +黝 +钗 +停 +沽 +棒 +馨 +颌 +肉 +吴 +硫 +悯 +劾 +娈 +马 +啧 +吊 +悌 +镑 +峭 +帆 +瀣 +涉 +咸 +疸 +滋 +泣 +翦 +拙 +癸 +钥 +蜒 ++ +尾 +庄 +凝 +泉 +婢 +渴 +谊 +乞 +陆 +锉 +糊 +鸦 +淮 +I +B +N +晦 +弗 +乔 +庥 +葡 +尻 +席 +橡 +傣 +渣 +拿 +惩 +麋 +斛 +缃 +矮 +蛏 +岘 +鸽 +姐 +膏 +催 +奔 +镒 +喱 +蠡 +摧 +钯 +胤 +柠 +拐 +璋 +鸥 +卢 +荡 +倾 +^ +_ +珀 +逄 +萧 +塾 +掇 +贮 +笆 +聂 +圃 +冲 +嵬 +M +滔 +笕 +值 +炙 +偶 +蜱 +搐 +梆 +汪 +蔬 +腑 +鸯 +蹇 +敞 +绯 +仨 +祯 +谆 +梧 +糗 +鑫 +啸 +豺 +囹 +猾 +巢 +柄 +瀛 +筑 +踌 +沭 +暗 +苁 +鱿 +蹉 +脂 +蘖 +牢 +热 +木 +吸 +溃 +宠 +序 +泞 +偿 +拜 +檩 +厚 +朐 +毗 +螳 +吞 +媚 +朽 +担 +蝗 +橘 +畴 +祈 +糟 +盱 +隼 +郜 +惜 +珠 +裨 +铵 +焙 +琚 +唯 +咚 +噪 +骊 +丫 +滢 +勤 +棉 +呸 +咣 +淀 +隔 +蕾 +窈 +饨 +挨 +煅 +短 +匙 +粕 +镜 +赣 +撕 +墩 +酬 +馁 +豌 +颐 +抗 +酣 +氓 +佑 +搁 +哭 +递 +耷 +涡 +桃 +贻 +碣 +截 +瘦 +昭 +镌 +蔓 +氚 +甲 +猕 +蕴 +蓬 +散 +拾 +纛 +狼 +猷 +铎 +埋 +旖 +矾 +讳 +囊 +糜 +迈 +粟 +蚂 +紧 +鲳 +瘢 +栽 +稼 +羊 +锄 +斟 +睁 +桥 +瓮 +蹙 +祉 +醺 +鼻 +昱 +剃 +跳 +篱 +跷 +蒜 +翎 +宅 +晖 +嗑 +壑 +峻 +癫 +屏 +狠 +陋 +袜 +途 +憎 +祀 +莹 +滟 +佶 +溥 +臣 +约 +盛 +峰 +磁 +慵 +婪 +拦 +莅 +朕 +鹦 +粲 +裤 +哎 +疡 +嫖 +琵 +窟 +堪 +谛 +嘉 +儡 +鳝 +斩 +郾 +驸 +酊 +妄 +胜 +贺 +徙 +傅 +噌 +钢 +栅 +庇 +恋 +匝 +巯 +邈 +尸 +锚 +粗 +佟 +蛟 +薹 +纵 +蚊 +郅 +绢 +锐 +苗 +俞 +篆 +淆 +膀 +鲜 +煎 +诶 +秽 +寻 +涮 +刺 +怀 +噶 +巨 +褰 +魅 +灶 +灌 +桉 +藕 +谜 +舸 +薄 +搀 +恽 +借 +牯 +痉 +渥 +愿 +亓 +耘 +杠 +柩 +锔 +蚶 +钣 +珈 +喘 +蹒 +幽 +赐 +稗 +晤 +莱 +泔 +扯 +肯 +菪 +裆 +腩 +豉 +疆 +骜 +腐 +倭 +珏 +唔 +粮 +亡 +润 +慰 +伽 +橄 +玄 +誉 +醐 +胆 +龊 +粼 +塬 +陇 +彼 +削 +嗣 +绾 +芽 +妗 +垭 +瘴 +爽 +薏 +寨 +龈 +泠 +弹 +赢 +漪 +猫 +嘧 +涂 +恤 +圭 +茧 +烽 +屑 +痕 +巾 +赖 +荸 +凰 +腮 +畈 +亵 +蹲 +偃 +苇 +澜 +艮 +换 +骺 +烘 +苕 +梓 +颉 +肇 +哗 +悄 +氤 +涠 +葬 +屠 +鹭 +植 +竺 +佯 +诣 +鲇 +瘀 +鲅 +邦 +移 +滁 +冯 +耕 +癔 +戌 +茬 +沁 +巩 +悠 +湘 +洪 +痹 +锟 +循 +谋 +腕 +鳃 +钠 +捞 +焉 +迎 +碱 +伫 +急 +榷 +奈 +邝 +卯 +辄 +皲 +卟 +醛 +畹 +忧 +稳 +雄 +昼 +缩 +阈 +睑 +扌 +耗 +曦 +涅 +捏 +瞧 +邕 +淖 +漉 +铝 +耦 +禹 +湛 +喽 +莼 +琅 +诸 +苎 +纂 +硅 +始 +嗨 +傥 +燃 +臂 +赅 +嘈 +呆 +贵 +屹 +壮 +肋 +亍 +蚀 +卅 +豹 +腆 +邬 +迭 +浊 +} +童 +螂 +捐 +圩 +勐 +触 +寞 +汊 +壤 +荫 +膺 +渌 +芳 +懿 +遴 +螈 +泰 +蓼 +蛤 +茜 +舅 +枫 +朔 +膝 +眙 +避 +梅 +判 +鹜 +璜 +牍 +缅 +垫 +藻 +黔 +侥 +惚 +懂 +踩 +腰 +腈 +札 +丞 +唾 +慈 +顿 +摹 +荻 +琬 +~ +斧 +沈 +滂 +胁 +胀 +幄 +莜 +Z +匀 +鄄 +掌 +绰 +茎 +焚 +赋 +萱 +谑 +汁 +铒 +瞎 +夺 +蜗 +野 +娆 +冀 +弯 +篁 +懵 +灞 +隽 +芡 +脘 +俐 +辩 +芯 +掺 +喏 +膈 +蝈 +觐 +悚 +踹 +蔗 +熠 +鼠 +呵 +抓 +橼 +峨 +畜 +缔 +禾 +崭 +弃 +熊 +摒 +凸 +拗 +穹 +蒙 +抒 +祛 +劝 +闫 +扳 +阵 +醌 +踪 +喵 +侣 +搬 +仅 +荧 +赎 +蝾 +琦 +买 +婧 +瞄 +寓 +皎 +冻 +赝 +箩 +莫 +瞰 +郊 +笫 +姝 +筒 +枪 +遣 +煸 +袋 +舆 +痱 +涛 +母 +〇 +启 +践 +耙 +绲 +盘 +遂 +昊 +搞 +槿 +诬 +纰 +泓 +惨 +檬 +亻 +越 +C +o +憩 +熵 +祷 +钒 +暧 +塔 +阗 +胰 +咄 +娶 +魔 +琶 +钞 +邻 +扬 +杉 +殴 +咽 +弓 +〆 +髻 +】 +吭 +揽 +霆 +拄 +殖 +脆 +彻 +岩 +芝 +勃 +辣 +剌 +钝 +嘎 +甄 +佘 +皖 +伦 +授 +徕 +憔 +挪 +皇 +庞 +稔 +芜 +踏 +溴 +兖 +卒 +擢 +饥 +鳞 +煲 +‰ +账 +颗 +叻 +斯 +捧 +鳍 +琮 +讹 +蛙 +纽 +谭 +酸 +兔 +莒 +睇 +伟 +觑 +羲 +嗜 +宜 +褐 +旎 +辛 +卦 +诘 +筋 +鎏 +溪 +挛 +熔 +阜 +晰 +鳅 +丢 +奚 +灸 +呱 +献 +陉 +黛 +鸪 +甾 +萨 +疮 +拯 +洲 +疹 +辑 +叙 +恻 +谒 +允 +柔 +烂 +氏 +逅 +漆 +拎 +惋 +扈 +湟 +纭 +啕 +掬 +擞 +哥 +忽 +涤 +鸵 +靡 +郗 +瓷 +扁 +廊 +怨 +雏 +钮 +敦 +E +懦 +憋 +汀 +拚 +啉 +腌 +岸 +f +痼 +瞅 +尊 +咀 +眩 +飙 +忌 +仝 +迦 +熬 +毫 +胯 +篑 +茄 +腺 +凄 +舛 +碴 +锵 +诧 +羯 +後 +漏 +汤 +宓 +仞 +蚁 +壶 +谰 +皑 +铄 +棰 +罔 +辅 +晶 +苦 +牟 +闽 +\ +烃 +饮 +聿 +丙 +蛳 +朱 +煤 +涔 +鳖 +犁 +罐 +荼 +砒 +淦 +妤 +黏 +戎 +孑 +婕 +瑾 +戢 +钵 +枣 +捋 +砥 +衩 +狙 +桠 +稣 +阎 +肃 +梏 +诫 +孪 +昶 +婊 +衫 +嗔 +侃 +塞 +蜃 +樵 +峒 +貌 +屿 +欺 +缫 +阐 +栖 +诟 +珞 +荭 +吝 +萍 +嗽 +恂 +啻 +蜴 +磬 +峋 +俸 +豫 +谎 +徊 +镍 +韬 +魇 +晴 +U +囟 +猜 +蛮 +坐 +囿 +伴 +亭 +肝 +佗 +蝠 +妃 +胞 +滩 +榴 +氖 +垩 +苋 +砣 +扪 +馏 +姓 +轩 +厉 +夥 +侈 +禀 +垒 +岑 +赏 +钛 +辐 +痔 +披 +纸 +碳 +“ +坞 +蠓 +挤 +荥 +沅 +悔 +铧 +帼 +蒌 +蝇 +a +p +y +n +g +哀 +浆 +瑶 +凿 +桶 +馈 +皮 +奴 +苜 +佤 +伶 +晗 +铱 +炬 +优 +弊 +氢 +恃 +甫 +攥 +端 +锌 +灰 +稹 +炝 +曙 +邋 +亥 +眶 +碾 +拉 +萝 +绔 +捷 +浍 +腋 +姑 +菖 +凌 +涞 +麽 +锢 +桨 +潢 +绎 +镰 +殆 +锑 +渝 +铬 +困 +绽 +觎 +匈 +糙 +暑 +裹 +鸟 +盔 +肽 +迷 +綦 +『 +亳 +佝 +俘 +钴 +觇 +骥 +仆 +疝 +跪 +婶 +郯 +瀹 +唉 +脖 +踞 +针 +晾 +忒 +扼 +瞩 +叛 +椒 +疟 +嗡 +邗 +肆 +跆 +玫 +忡 +捣 +咧 +唆 +艄 +蘑 +潦 +笛 +阚 +沸 +泻 +掊 +菽 +贫 +斥 +髂 +孢 +镂 +赂 +麝 +鸾 +屡 +衬 +苷 +恪 +叠 +希 +粤 +爻 +喝 +茫 +惬 +郸 +绻 +庸 +撅 +碟 +宄 +妹 +膛 +叮 +饵 +崛 +嗲 +椅 +冤 +搅 +咕 +敛 +尹 +垦 +闷 +蝉 +霎 +勰 +败 +蓑 +泸 +肤 +鹌 +幌 +焦 +浠 +鞍 +刁 +舰 +乙 +竿 +裔 +。 +茵 +函 +伊 +兄 +丨 +娜 +匍 +謇 +莪 +宥 +似 +蝽 +翳 +酪 +翠 +粑 +薇 +祢 +骏 +赠 +叫 +Q +噤 +噻 +竖 +芗 +莠 +潭 +俊 +羿 +耜 +O +郫 +趁 +嗪 +囚 +蹶 +芒 +洁 +笋 +鹑 +敲 +硝 +啶 +堡 +渲 +揩 +』 +携 +宿 +遒 +颍 +扭 +棱 +割 +萜 +蔸 +葵 +琴 +捂 +饰 +衙 +耿 +掠 +募 +岂 +窖 +涟 +蔺 +瘤 +柞 +瞪 +怜 +匹 +距 +楔 +炜 +哆 +秦 +缎 +幼 +茁 +绪 +痨 +恨 +楸 +娅 +瓦 +桩 +雪 +嬴 +伏 +榔 +妥 +铿 +拌 +眠 +雍 +缇 +‘ +卓 +搓 +哌 +觞 +噩 +屈 +哧 +髓 +咦 +巅 +娑 +侑 +淫 +膳 +祝 +勾 +姊 +莴 +胄 +疃 +薛 +蜷 +胛 +巷 +芙 +芋 +熙 +闰 +勿 +窃 +狱 +剩 +钏 +幢 +陟 +铛 +慧 +靴 +耍 +k +浙 +浇 +飨 +惟 +绗 +祜 +澈 +啼 +咪 +磷 +摞 +诅 +郦 +抹 +跃 +壬 +吕 +肖 +琏 +颤 +尴 +剡 +抠 +凋 +赚 +泊 +津 +宕 +殷 +倔 +氲 +漫 +邺 +涎 +怠 +$ +垮 +荬 +遵 +俏 +叹 +噢 +饽 +蜘 +孙 +筵 +疼 +鞭 +羧 +牦 +箭 +潴 +c +眸 +祭 +髯 +啖 +坳 +愁 +芩 +驮 +倡 +巽 +穰 +沃 +胚 +怒 +凤 +槛 +剂 +趵 +嫁 +v +邢 +灯 +鄢 +桐 +睽 +檗 +锯 +槟 +婷 +嵋 +圻 +诗 +蕈 +颠 +遭 +痢 +芸 +怯 +馥 +竭 +锗 +徜 +恭 +遍 +籁 +剑 +嘱 +苡 +龄 +僧 +桑 +潸 +弘 +澶 +楹 +悲 +讫 +愤 +腥 +悸 +谍 +椹 +呢 +桓 +葭 +攫 +阀 +翰 +躲 +敖 +柑 +郎 +笨 +橇 +呃 +魁 +燎 +脓 +葩 +磋 +垛 +玺 +狮 +沓 +砜 +蕊 +锺 +罹 +蕉 +翱 +虐 +闾 +巫 +旦 +茱 +嬷 +枯 +鹏 +贡 +芹 +汛 +矫 +绁 +拣 +禺 +佃 +讣 +舫 +惯 +乳 +趋 +疲 +挽 +岚 +虾 +衾 +蠹 +蹂 +飓 +氦 +铖 +孩 +稞 +瑜 +壅 +掀 +勘 +妓 +畅 +髋 +W +庐 +牲 +蓿 +榕 +练 +垣 +唱 +邸 +菲 +昆 +婺 +穿 +绡 +麒 +蚱 +掂 +愚 +泷 +涪 +漳 +妩 +娉 +榄 +讷 +觅 +旧 +藤 +煮 +呛 +柳 +腓 +叭 +庵 +烷 +阡 +罂 +蜕 +擂 +猖 +咿 +媲 +脉 +【 +沏 +貅 +黠 +熏 +哲 +烁 +坦 +酵 +兜 +× +潇 +撒 +剽 +珩 +圹 +乾 +摸 +樟 +帽 +嗒 +襄 +魂 +轿 +憬 +锡 +〕 +喃 +皆 +咖 +隅 +脸 +残 +泮 +袂 +鹂 +珊 +囤 +捆 +咤 +误 +徨 +闹 +淙 +芊 +淋 +怆 +囗 +拨 +梳 +渤 +R +G +绨 +蚓 +婀 +幡 +狩 +麾 +谢 +唢 +裸 +旌 +伉 +纶 +裂 +驳 +砼 +咛 +澄 +樨 +蹈 +宙 +澍 +倍 +貔 +操 +勇 +蟠 +摈 +砧 +虬 +够 +缁 +悦 +藿 +撸 +艹 +摁 +淹 +豇 +虎 +榭 +ˉ +吱 +d +° +喧 +荀 +踱 +侮 +奋 +偕 +饷 +犍 +惮 +坑 +璎 +徘 +宛 +妆 +袈 +倩 +窦 +昂 +荏 +乖 +K +怅 +撰 +鳙 +牙 +袁 +酞 +X +痿 +琼 +闸 +雁 +趾 +荚 +虻 +涝 +《 +杏 +韭 +偈 +烤 +绫 +鞘 +卉 +症 +遢 +蓥 +诋 +杭 +荨 +匆 +竣 +簪 +辙 +敕 +虞 +丹 +缭 +咩 +黟 +m +淤 +瑕 +咂 +铉 +硼 +茨 +嶂 +痒 +畸 +敬 +涿 +粪 +窘 +熟 +叔 +嫔 +盾 +忱 +裘 +憾 +梵 +赡 +珙 +咯 +娘 +庙 +溯 +胺 +葱 +痪 +摊 +荷 +卞 +乒 +髦 +寐 +铭 +坩 +胗 +枷 +爆 +溟 +嚼 +羚 +砬 +轨 +惊 +挠 +罄 +竽 +菏 +氧 +浅 +楣 +盼 +枢 +炸 +阆 +杯 +谏 +噬 +淇 +渺 +俪 +秆 +墓 +泪 +跻 +砌 +痰 +垡 +渡 +耽 +釜 +讶 +鳎 +煞 +呗 +韶 +舶 +绷 +鹳 +缜 +旷 +铊 +皱 +龌 +檀 +霖 +奄 +槐 +艳 +蝶 +旋 +哝 +赶 +骞 +蚧 +腊 +盈 +丁 +` +蜚 +矸 +蝙 +睨 +嚓 +僻 +鬼 +醴 +夜 +彝 +磊 +笔 +拔 +栀 +糕 +厦 +邰 +纫 +逭 +纤 +眦 +膊 +馍 +躇 +烯 +蘼 +冬 +诤 +暄 +骶 +哑 +瘠 +」 +臊 +丕 +愈 +咱 +螺 +擅 +跋 +搏 +硪 +谄 +笠 +淡 +嘿 +骅 +谧 +鼎 +皋 +姚 +歼 +蠢 +驼 +耳 +胬 +挝 +涯 +狗 +蒽 +孓 +犷 +凉 +芦 +箴 +铤 +孤 +嘛 +坤 +V +茴 +朦 +挞 +尖 +橙 +诞 +搴 +碇 +洵 +浚 +帚 +蜍 +漯 +柘 +嚎 +讽 +芭 +荤 +咻 +祠 +秉 +跖 +埃 +吓 +糯 +眷 +馒 +惹 +娼 +鲑 +嫩 +讴 +轮 +瞥 +靶 +褚 +乏 +缤 +宋 +帧 +删 +驱 +碎 +扑 +俩 +俄 +偏 +涣 +竹 +噱 +皙 +佰 +渚 +唧 +斡 +# +镉 +刀 +崎 +筐 +佣 +夭 +贰 +肴 +峙 +哔 +艿 +匐 +牺 +镛 +缘 +仡 +嫡 +劣 +枸 +堀 +梨 +簿 +鸭 +蒸 +亦 +稽 +浴 +{ +衢 +束 +槲 +j +阁 +揍 +疥 +棋 +潋 +聪 +窜 +乓 +睛 +插 +冉 +阪 +苍 +搽 +「 +蟾 +螟 +幸 +仇 +樽 +撂 +慢 +跤 +幔 +俚 +淅 +覃 +觊 +溶 +妖 +帛 +侨 +曰 +妾 +泗 +· +: +瀘 +風 +Ë +( +) +∶ +紅 +紗 +瑭 +雲 +頭 +鶏 +財 +許 +• +¥ +樂 +焗 +麗 +— +; +滙 +東 +榮 +繪 +興 +… +門 +業 +π +楊 +國 +顧 +é +盤 +寳 +Λ +龍 +鳳 +島 +誌 +緣 +結 +銭 +萬 +勝 +祎 +璟 +優 +歡 +臨 +時 +購 += +★ +藍 +昇 +鐵 +觀 +勅 +農 +聲 +畫 +兿 +術 +發 +劉 +記 +專 +耑 +園 +書 +壴 +種 +Ο +● +褀 +號 +銀 +匯 +敟 +锘 +葉 +橪 +廣 +進 +蒄 +鑽 +阝 +祙 +貢 +鍋 +豊 +夬 +喆 +團 +閣 +開 +燁 +賓 +館 +酡 +沔 +順 ++ +硚 +劵 +饸 +陽 +車 +湓 +復 +萊 +氣 +軒 +華 +堃 +迮 +纟 +戶 +馬 +學 +裡 +電 +嶽 +獨 +マ +シ +サ +ジ +燘 +袪 +環 +❤ +臺 +灣 +専 +賣 +孖 +聖 +攝 +線 +▪ +α +傢 +俬 +夢 +達 +莊 +喬 +貝 +薩 +劍 +羅 +壓 +棛 +饦 +尃 +璈 +囍 +醫 +G +I +A +# +N +鷄 +髙 +嬰 +啓 +約 +隹 +潔 +賴 +藝 +~ +寶 +籣 +麺 +  +嶺 +√ +義 +網 +峩 +長 +∧ +魚 +機 +構 +② +鳯 +偉 +L +B +㙟 +畵 +鴿 +' +詩 +溝 +嚞 +屌 +藔 +佧 +玥 +蘭 +織 +1 +3 +9 +0 +7 +點 +砭 +鴨 +鋪 +銘 +廳 +弍 +‧ +創 +湯 +坶 +℃ +卩 +骝 +& +烜 +荘 +當 +潤 +扞 +係 +懷 +碶 +钅 +蚨 +讠 +☆ +叢 +爲 +埗 +涫 +塗 +→ +楽 +現 +鯨 +愛 +瑪 +鈺 +忄 +悶 +藥 +飾 +樓 +視 +孬 +ㆍ +燚 +苪 +師 +① +丼 +锽 +│ +韓 +標 +è +兒 +閏 +匋 +張 +漢 +Ü +髪 +會 +閑 +檔 +習 +裝 +の +峯 +菘 +輝 +И +雞 +釣 +億 +浐 +K +O +R +8 +H +E +P +T +W +D +S +C +M +F +姌 +饹 +» +晞 +廰 +ä +嵯 +鷹 +負 +飲 +絲 +冚 +楗 +澤 +綫 +區 +❋ +← +質 +靑 +揚 +③ +滬 +統 +産 +協 +﹑ +乸 +畐 +經 +運 +際 +洺 +岽 +為 +粵 +諾 +崋 +豐 +碁 +ɔ +V +2 +6 +齋 +誠 +訂 +´ +勑 +雙 +陳 +無 +í +泩 +媄 +夌 +刂 +i +c +t +o +r +a +嘢 +耄 +燴 +暃 +壽 +媽 +靈 +抻 +體 +唻 +É +冮 +甹 +鎮 +錦 +ʌ +蜛 +蠄 +尓 +駕 +戀 +飬 +逹 +倫 +貴 +極 +Я +Й +寬 +磚 +嶪 +郎 +職 +| +間 +n +d +剎 +伈 +課 +飛 +橋 +瘊 +№ +譜 +骓 +圗 +滘 +縣 +粿 +咅 +養 +濤 +彳 +® +% +Ⅱ +啰 +㴪 +見 +矞 +薬 +糁 +邨 +鲮 +顔 +罱 +З +選 +話 +贏 +氪 +俵 +競 +瑩 +繡 +枱 +β +綉 +á +獅 +爾 +™ +麵 +戋 +淩 +徳 +個 +劇 +場 +務 +簡 +寵 +h +實 +膠 +轱 +圖 +築 +嘣 +樹 +㸃 +營 +耵 +孫 +饃 +鄺 +飯 +麯 +遠 +輸 +坫 +孃 +乚 +閃 +鏢 +㎡ +題 +廠 +關 +↑ +爺 +將 +軍 +連 +篦 +覌 +參 +箸 +- +窠 +棽 +寕 +夀 +爰 +歐 +呙 +閥 +頡 +熱 +雎 +垟 +裟 +凬 +勁 +帑 +馕 +夆 +疌 +枼 +馮 +貨 +蒤 +樸 +彧 +旸 +靜 +龢 +暢 +㐱 +鳥 +珺 +鏡 +灡 +爭 +堷 +廚 +Ó +騰 +診 +┅ +蘇 +褔 +凱 +頂 +豕 +亞 +帥 +嘬 +⊥ +仺 +桖 +複 +饣 +絡 +穂 +顏 +棟 +納 +▏ +濟 +親 +設 +計 +攵 +埌 +烺 +ò +頤 +燦 +蓮 +撻 +節 +講 +濱 +濃 +娽 +洳 +朿 +燈 +鈴 +護 +膚 +铔 +過 +補 +Z +U +5 +4 +坋 +闿 +䖝 +餘 +缐 +铞 +貿 +铪 +桼 +趙 +鍊 +[ +㐂 +垚 +菓 +揸 +捲 +鐘 +滏 +𣇉 +爍 +輪 +燜 +鴻 +鮮 +動 +鹞 +鷗 +丄 +慶 +鉌 +翥 +飮 +腸 +⇋ +漁 +覺 +來 +熘 +昴 +翏 +鲱 +圧 +鄉 +萭 +頔 +爐 +嫚 +г +貭 +類 +聯 +幛 +輕 +訓 +鑒 +夋 +锨 +芃 +珣 +䝉 +扙 +嵐 +銷 +處 +ㄱ +語 +誘 +苝 +歸 +儀 +燒 +楿 +內 +粢 +葒 +奧 +麥 +礻 +滿 +蠔 +穵 +瞭 +態 +鱬 +榞 +硂 +鄭 +黃 +煙 +祐 +奓 +逺 +* +瑄 +獲 +聞 +薦 +讀 +這 +樣 +決 +問 +啟 +們 +執 +説 +轉 +單 +隨 +唘 +帶 +倉 +庫 +還 +贈 +尙 +皺 +■ +餅 +產 +○ +∈ +報 +狀 +楓 +賠 +琯 +嗮 +禮 +` +傳 +> +≤ +嗞 +Φ +≥ +換 +咭 +∣ +↓ +曬 +ε +応 +寫 +″ +終 +様 +純 +費 +療 +聨 +凍 +壐 +郵 +ü +黒 +∫ +製 +塊 +調 +軽 +確 +撃 +級 +馴 +Ⅲ +涇 +繹 +數 +碼 +證 +狒 +処 +劑 +< +晧 +賀 +衆 +] +櫥 +兩 +陰 +絶 +對 +鯉 +憶 +◎ +p +e +Y +蕒 +煖 +頓 +測 +試 +鼽 +僑 +碩 +妝 +帯 +≈ +鐡 +舖 +權 +喫 +倆 +ˋ +該 +悅 +ā +俫 +. +f +s +b +m +k +g +u +j +貼 +淨 +濕 +針 +適 +備 +l +/ +給 +謢 +強 +觸 +衛 +與 +⊙ +$ +緯 +變 +⑴ +⑵ +⑶ +㎏ +殺 +∩ +幚 +─ +價 +▲ +離 +ú +ó +飄 +烏 +関 +閟 +﹝ +﹞ +邏 +輯 +鍵 +驗 +訣 +導 +歷 +屆 +層 +▼ +儱 +錄 +熳 +ē +艦 +吋 +錶 +辧 +飼 +顯 +④ +禦 +販 +気 +対 +枰 +閩 +紀 +幹 +瞓 +貊 +淚 +△ +眞 +墊 +Ω +獻 +褲 +縫 +緑 +亜 +鉅 +餠 +{ +} +◆ +蘆 +薈 +█ +◇ +溫 +彈 +晳 +粧 +犸 +穩 +訊 +崬 +凖 +熥 +П +舊 +條 +紋 +圍 +Ⅳ +筆 +尷 +難 +雜 +錯 +綁 +識 +頰 +鎖 +艶 +□ +殁 +殼 +⑧ +├ +▕ +鵬 +ǐ +ō +ǒ +糝 +綱 +▎ +μ +盜 +饅 +醬 +籤 +蓋 +釀 +鹽 +據 +à +ɡ +辦 +◥ +彐 +┌ +婦 +獸 +鲩 +伱 +ī +蒟 +蒻 +齊 +袆 +腦 +寧 +凈 +妳 +煥 +詢 +偽 +謹 +啫 +鯽 +騷 +鱸 +損 +傷 +鎻 +髮 +買 +冏 +儥 +両 +﹢ +∞ +載 +喰 +z +羙 +悵 +燙 +曉 +員 +組 +徹 +艷 +痠 +鋼 +鼙 +縮 +細 +嚒 +爯 +≠ +維 +" +鱻 +壇 +厍 +帰 +浥 +犇 +薡 +軎 +² +應 +醜 +刪 +緻 +鶴 +賜 +噁 +軌 +尨 +镔 +鷺 +槗 +彌 +葚 +濛 +請 +溇 +緹 +賢 +訪 +獴 +瑅 +資 +縤 +陣 +蕟 +栢 +韻 +祼 +恁 +伢 +謝 +劃 +涑 +總 +衖 +踺 +砋 +凉 +籃 +駿 +苼 +瘋 +昽 +紡 +驊 +腎 +﹗ +響 +杋 +剛 +嚴 +禪 +歓 +槍 +傘 +檸 +檫 +炣 +勢 +鏜 +鎢 +銑 +尐 +減 +奪 +惡 +θ +僮 +婭 +臘 +ū +ì +殻 +鉄 +∑ +蛲 +焼 +緖 +續 +紹 懮 \ No newline at end of file diff --git a/deepdoc/vision/operators.py b/deepdoc/vision/operators.py index 382fe3635ff827799f4907d77f150cca72f2560c..b72ba6d2a323d00a51d027be76cb0526f4e55ec2 100644 --- a/deepdoc/vision/operators.py +++ b/deepdoc/vision/operators.py @@ -1,711 +1,711 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import sys -import six -import cv2 -import numpy as np -import math -from PIL import Image - - -class DecodeImage(object): - """ decode image """ - - def __init__(self, - img_mode='RGB', - channel_first=False, - ignore_orientation=False, - **kwargs): - self.img_mode = img_mode - self.channel_first = channel_first - self.ignore_orientation = ignore_orientation - - def __call__(self, data): - img = data['image'] - if six.PY2: - assert isinstance(img, str) and len( - img) > 0, "invalid input 'img' in DecodeImage" - else: - assert isinstance(img, bytes) and len( - img) > 0, "invalid input 'img' in DecodeImage" - img = np.frombuffer(img, dtype='uint8') - if self.ignore_orientation: - img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION | - cv2.IMREAD_COLOR) - else: - img = cv2.imdecode(img, 1) - if img is None: - return None - if self.img_mode == 'GRAY': - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - elif self.img_mode == 'RGB': - assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( - img.shape) - img = img[:, :, ::-1] - - if self.channel_first: - img = img.transpose((2, 0, 1)) - - data['image'] = img - return data - - -class StandardizeImage(object): - """normalize image - Args: - mean (list): im - mean - std (list): im / std - is_scale (bool): whether need im / 255 - norm_type (str): type in ['mean_std', 'none'] - """ - - def __init__(self, mean, std, is_scale=True, norm_type='mean_std'): - self.mean = mean - self.std = std - self.is_scale = is_scale - self.norm_type = norm_type - - def __call__(self, im, im_info): - """ - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - im = im.astype(np.float32, copy=False) - if self.is_scale: - scale = 1.0 / 255.0 - im *= scale - - if self.norm_type == 'mean_std': - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - im -= mean - im /= std - return im, im_info - - -class NormalizeImage(object): - """ normalize image such as substract mean, divide std - """ - - def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs): - if isinstance(scale, str): - scale = eval(scale) - self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) - mean = mean if mean is not None else [0.485, 0.456, 0.406] - std = std if std is not None else [0.229, 0.224, 0.225] - - shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) - self.mean = np.array(mean).reshape(shape).astype('float32') - self.std = np.array(std).reshape(shape).astype('float32') - - def __call__(self, data): - img = data['image'] - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - assert isinstance(img, - np.ndarray), "invalid input 'img' in NormalizeImage" - data['image'] = ( - img.astype('float32') * self.scale - self.mean) / self.std - return data - - -class ToCHWImage(object): - """ convert hwc image to chw image - """ - - def __init__(self, **kwargs): - pass - - def __call__(self, data): - img = data['image'] - from PIL import Image - if isinstance(img, Image.Image): - img = np.array(img) - data['image'] = img.transpose((2, 0, 1)) - return data - - -class Fasttext(object): - def __init__(self, path="None", **kwargs): - import fasttext - self.fast_model = fasttext.load_model(path) - - def __call__(self, data): - label = data['label'] - fast_label = self.fast_model[label] - data['fast_label'] = fast_label - return data - - -class KeepKeys(object): - def __init__(self, keep_keys, **kwargs): - self.keep_keys = keep_keys - - def __call__(self, data): - data_list = [] - for key in self.keep_keys: - data_list.append(data[key]) - return data_list - - -class Pad(object): - def __init__(self, size=None, size_div=32, **kwargs): - if size is not None and not isinstance(size, (int, list, tuple)): - raise TypeError("Type of target_size is invalid. Now is {}".format( - type(size))) - if isinstance(size, int): - size = [size, size] - self.size = size - self.size_div = size_div - - def __call__(self, data): - - img = data['image'] - img_h, img_w = img.shape[0], img.shape[1] - if self.size: - resize_h2, resize_w2 = self.size - assert ( - img_h < resize_h2 and img_w < resize_w2 - ), '(h, w) of target size should be greater than (img_h, img_w)' - else: - resize_h2 = max( - int(math.ceil(img.shape[0] / self.size_div) * self.size_div), - self.size_div) - resize_w2 = max( - int(math.ceil(img.shape[1] / self.size_div) * self.size_div), - self.size_div) - img = cv2.copyMakeBorder( - img, - 0, - resize_h2 - img_h, - 0, - resize_w2 - img_w, - cv2.BORDER_CONSTANT, - value=0) - data['image'] = img - return data - - -class LinearResize(object): - """resize image by target_size and max_size - Args: - target_size (int): the target size of image - keep_ratio (bool): whether keep_ratio or not, default true - interp (int): method of resize - """ - - def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): - if isinstance(target_size, int): - target_size = [target_size, target_size] - self.target_size = target_size - self.keep_ratio = keep_ratio - self.interp = interp - - def __call__(self, im, im_info): - """ - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - assert len(self.target_size) == 2 - assert self.target_size[0] > 0 and self.target_size[1] > 0 - im_channel = im.shape[2] - im_scale_y, im_scale_x = self.generate_scale(im) - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') - im_info['scale_factor'] = np.array( - [im_scale_y, im_scale_x]).astype('float32') - return im, im_info - - def generate_scale(self, im): - """ - Args: - im (np.ndarray): image (np.ndarray) - Returns: - im_scale_x: the resize ratio of X - im_scale_y: the resize ratio of Y - """ - origin_shape = im.shape[:2] - im_c = im.shape[2] - if self.keep_ratio: - im_size_min = np.min(origin_shape) - im_size_max = np.max(origin_shape) - target_size_min = np.min(self.target_size) - target_size_max = np.max(self.target_size) - im_scale = float(target_size_min) / float(im_size_min) - if np.round(im_scale * im_size_max) > target_size_max: - im_scale = float(target_size_max) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - else: - resize_h, resize_w = self.target_size - im_scale_y = resize_h / float(origin_shape[0]) - im_scale_x = resize_w / float(origin_shape[1]) - return im_scale_y, im_scale_x - - -class Resize(object): - def __init__(self, size=(640, 640), **kwargs): - self.size = size - - def resize_image(self, img): - resize_h, resize_w = self.size - ori_h, ori_w = img.shape[:2] # (h, w, c) - ratio_h = float(resize_h) / ori_h - ratio_w = float(resize_w) / ori_w - img = cv2.resize(img, (int(resize_w), int(resize_h))) - return img, [ratio_h, ratio_w] - - def __call__(self, data): - img = data['image'] - if 'polys' in data: - text_polys = data['polys'] - - img_resize, [ratio_h, ratio_w] = self.resize_image(img) - if 'polys' in data: - new_boxes = [] - for box in text_polys: - new_box = [] - for cord in box: - new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) - new_boxes.append(new_box) - data['polys'] = np.array(new_boxes, dtype=np.float32) - data['image'] = img_resize - return data - - -class DetResizeForTest(object): - def __init__(self, **kwargs): - super(DetResizeForTest, self).__init__() - self.resize_type = 0 - self.keep_ratio = False - if 'image_shape' in kwargs: - self.image_shape = kwargs['image_shape'] - self.resize_type = 1 - if 'keep_ratio' in kwargs: - self.keep_ratio = kwargs['keep_ratio'] - elif 'limit_side_len' in kwargs: - self.limit_side_len = kwargs['limit_side_len'] - self.limit_type = kwargs.get('limit_type', 'min') - elif 'resize_long' in kwargs: - self.resize_type = 2 - self.resize_long = kwargs.get('resize_long', 960) - else: - self.limit_side_len = 736 - self.limit_type = 'min' - - def __call__(self, data): - img = data['image'] - src_h, src_w, _ = img.shape - if sum([src_h, src_w]) < 64: - img = self.image_padding(img) - - if self.resize_type == 0: - # img, shape = self.resize_image_type0(img) - img, [ratio_h, ratio_w] = self.resize_image_type0(img) - elif self.resize_type == 2: - img, [ratio_h, ratio_w] = self.resize_image_type2(img) - else: - # img, shape = self.resize_image_type1(img) - img, [ratio_h, ratio_w] = self.resize_image_type1(img) - data['image'] = img - data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) - return data - - def image_padding(self, im, value=0): - h, w, c = im.shape - im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value - im_pad[:h, :w, :] = im - return im_pad - - def resize_image_type1(self, img): - resize_h, resize_w = self.image_shape - ori_h, ori_w = img.shape[:2] # (h, w, c) - if self.keep_ratio is True: - resize_w = ori_w * resize_h / ori_h - N = math.ceil(resize_w / 32) - resize_w = N * 32 - ratio_h = float(resize_h) / ori_h - ratio_w = float(resize_w) / ori_w - img = cv2.resize(img, (int(resize_w), int(resize_h))) - # return img, np.array([ori_h, ori_w]) - return img, [ratio_h, ratio_w] - - def resize_image_type0(self, img): - """ - resize image to a size multiple of 32 which is required by the network - args: - img(array): array with shape [h, w, c] - return(tuple): - img, (ratio_h, ratio_w) - """ - limit_side_len = self.limit_side_len - h, w, c = img.shape - - # limit the max side - if self.limit_type == 'max': - if max(h, w) > limit_side_len: - if h > w: - ratio = float(limit_side_len) / h - else: - ratio = float(limit_side_len) / w - else: - ratio = 1. - elif self.limit_type == 'min': - if min(h, w) < limit_side_len: - if h < w: - ratio = float(limit_side_len) / h - else: - ratio = float(limit_side_len) / w - else: - ratio = 1. - elif self.limit_type == 'resize_long': - ratio = float(limit_side_len) / max(h, w) - else: - raise Exception('not support limit type, image ') - resize_h = int(h * ratio) - resize_w = int(w * ratio) - - resize_h = max(int(round(resize_h / 32) * 32), 32) - resize_w = max(int(round(resize_w / 32) * 32), 32) - - try: - if int(resize_w) <= 0 or int(resize_h) <= 0: - return None, (None, None) - img = cv2.resize(img, (int(resize_w), int(resize_h))) - except BaseException: - print(img.shape, resize_w, resize_h) - sys.exit(0) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return img, [ratio_h, ratio_w] - - def resize_image_type2(self, img): - h, w, _ = img.shape - - resize_w = w - resize_h = h - - if resize_h > resize_w: - ratio = float(self.resize_long) / resize_h - else: - ratio = float(self.resize_long) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - img = cv2.resize(img, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return img, [ratio_h, ratio_w] - - -class E2EResizeForTest(object): - def __init__(self, **kwargs): - super(E2EResizeForTest, self).__init__() - self.max_side_len = kwargs['max_side_len'] - self.valid_set = kwargs['valid_set'] - - def __call__(self, data): - img = data['image'] - src_h, src_w, _ = img.shape - if self.valid_set == 'totaltext': - im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext( - img, max_side_len=self.max_side_len) - else: - im_resized, (ratio_h, ratio_w) = self.resize_image( - img, max_side_len=self.max_side_len) - data['image'] = im_resized - data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) - return data - - def resize_image_for_totaltext(self, im, max_side_len=512): - - h, w, _ = im.shape - resize_w = w - resize_h = h - ratio = 1.25 - if h * ratio > max_side_len: - ratio = float(max_side_len) / resize_h - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - def resize_image(self, im, max_side_len=512): - """ - resize image to a size multiple of max_stride which is required by the network - :param im: the resized image - :param max_side_len: limit of max image size to avoid out of memory in gpu - :return: the resized image and the resize ratio - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - # Fix the longer side - if resize_h > resize_w: - ratio = float(max_side_len) / resize_h - else: - ratio = float(max_side_len) / resize_w - - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - max_stride = 128 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(im, (int(resize_w), int(resize_h))) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - - return im, (ratio_h, ratio_w) - - -class KieResize(object): - def __init__(self, **kwargs): - super(KieResize, self).__init__() - self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[ - 'img_scale'][1] - - def __call__(self, data): - img = data['image'] - points = data['points'] - src_h, src_w, _ = img.shape - im_resized, scale_factor, [ratio_h, ratio_w - ], [new_h, new_w] = self.resize_image(img) - resize_points = self.resize_boxes(img, points, scale_factor) - data['ori_image'] = img - data['ori_boxes'] = points - data['points'] = resize_points - data['image'] = im_resized - data['shape'] = np.array([new_h, new_w]) - return data - - def resize_image(self, img): - norm_img = np.zeros([1024, 1024, 3], dtype='float32') - scale = [512, 1024] - h, w = img.shape[:2] - max_long_edge = max(scale) - max_short_edge = min(scale) - scale_factor = min(max_long_edge / max(h, w), - max_short_edge / min(h, w)) - resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float( - scale_factor) + 0.5) - max_stride = 32 - resize_h = (resize_h + max_stride - 1) // max_stride * max_stride - resize_w = (resize_w + max_stride - 1) // max_stride * max_stride - im = cv2.resize(img, (resize_w, resize_h)) - new_h, new_w = im.shape[:2] - w_scale = new_w / w - h_scale = new_h / h - scale_factor = np.array( - [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) - norm_img[:new_h, :new_w, :] = im - return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w] - - def resize_boxes(self, im, points, scale_factor): - points = points * scale_factor - img_shape = im.shape[:2] - points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1]) - points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0]) - return points - - -class SRResize(object): - def __init__(self, - imgH=32, - imgW=128, - down_sample_scale=4, - keep_ratio=False, - min_ratio=1, - mask=False, - infer_mode=False, - **kwargs): - self.imgH = imgH - self.imgW = imgW - self.keep_ratio = keep_ratio - self.min_ratio = min_ratio - self.down_sample_scale = down_sample_scale - self.mask = mask - self.infer_mode = infer_mode - - def __call__(self, data): - imgH = self.imgH - imgW = self.imgW - images_lr = data["image_lr"] - transform2 = ResizeNormalize( - (imgW // self.down_sample_scale, imgH // self.down_sample_scale)) - images_lr = transform2(images_lr) - data["img_lr"] = images_lr - if self.infer_mode: - return data - - images_HR = data["image_hr"] - label_strs = data["label"] - transform = ResizeNormalize((imgW, imgH)) - images_HR = transform(images_HR) - data["img_hr"] = images_HR - return data - - -class ResizeNormalize(object): - def __init__(self, size, interpolation=Image.BICUBIC): - self.size = size - self.interpolation = interpolation - - def __call__(self, img): - img = img.resize(self.size, self.interpolation) - img_numpy = np.array(img).astype("float32") - img_numpy = img_numpy.transpose((2, 0, 1)) / 255 - return img_numpy - - -class GrayImageChannelFormat(object): - """ - format gray scale image's channel: (3,h,w) -> (1,h,w) - Args: - inverse: inverse gray image - """ - - def __init__(self, inverse=False, **kwargs): - self.inverse = inverse - - def __call__(self, data): - img = data['image'] - img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - img_expanded = np.expand_dims(img_single_channel, 0) - - if self.inverse: - data['image'] = np.abs(img_expanded - 1) - else: - data['image'] = img_expanded - - data['src_image'] = img - return data - - -class Permute(object): - """permute image - Args: - to_bgr (bool): whether convert RGB to BGR - channel_first (bool): whether convert HWC to CHW - """ - - def __init__(self, ): - super(Permute, self).__init__() - - def __call__(self, im, im_info): - """ - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - im = im.transpose((2, 0, 1)).copy() - return im, im_info - - -class PadStride(object): - """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config - Args: - stride (bool): model with FPN need image shape % stride == 0 - """ - - def __init__(self, stride=0): - self.coarsest_stride = stride - - def __call__(self, im, im_info): - """ - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - coarsest_stride = self.coarsest_stride - if coarsest_stride <= 0: - return im, im_info - im_c, im_h, im_w = im.shape - pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) - pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) - padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) - padding_im[:, :im_h, :im_w] = im - return padding_im, im_info - - -def decode_image(im_file, im_info): - """read rgb image - Args: - im_file (str|np.ndarray): input can be image path or np.ndarray - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - if isinstance(im_file, str): - with open(im_file, 'rb') as f: - im_read = f.read() - data = np.frombuffer(im_read, dtype='uint8') - im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - else: - im = im_file - im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) - im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) - return im, im_info - - -def preprocess(im, preprocess_ops): - # process image by preprocess_ops - im_info = { - 'scale_factor': np.array( - [1., 1.], dtype=np.float32), - 'im_shape': None, - } - im, im_info = decode_image(im, im_info) - for operator in preprocess_ops: - im, im_info = operator(im, im_info) - return im, im_info +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import six +import cv2 +import numpy as np +import math +from PIL import Image + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, + img_mode='RGB', + channel_first=False, + ignore_orientation=False, + **kwargs): + self.img_mode = img_mode + self.channel_first = channel_first + self.ignore_orientation = ignore_orientation + + def __call__(self, data): + img = data['image'] + if six.PY2: + assert isinstance(img, str) and len( + img) > 0, "invalid input 'img' in DecodeImage" + else: + assert isinstance(img, bytes) and len( + img) > 0, "invalid input 'img' in DecodeImage" + img = np.frombuffer(img, dtype='uint8') + if self.ignore_orientation: + img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION | + cv2.IMREAD_COLOR) + else: + img = cv2.imdecode(img, 1) + if img is None: + return None + if self.img_mode == 'GRAY': + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + elif self.img_mode == 'RGB': + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + data['image'] = img + return data + + +class StandardizeImage(object): + """normalize image + Args: + mean (list): im - mean + std (list): im / std + is_scale (bool): whether need im / 255 + norm_type (str): type in ['mean_std', 'none'] + """ + + def __init__(self, mean, std, is_scale=True, norm_type='mean_std'): + self.mean = mean + self.std = std + self.is_scale = is_scale + self.norm_type = norm_type + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.astype(np.float32, copy=False) + if self.is_scale: + scale = 1.0 / 255.0 + im *= scale + + if self.norm_type == 'mean_std': + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + im -= mean + im /= std + return im, im_info + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs): + if isinstance(scale, str): + scale = eval(scale) + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, data): + img = data['image'] + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + assert isinstance(img, + np.ndarray), "invalid input 'img' in NormalizeImage" + data['image'] = ( + img.astype('float32') * self.scale - self.mean) / self.std + return data + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self, **kwargs): + pass + + def __call__(self, data): + img = data['image'] + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + data['image'] = img.transpose((2, 0, 1)) + return data + + +class Fasttext(object): + def __init__(self, path="None", **kwargs): + import fasttext + self.fast_model = fasttext.load_model(path) + + def __call__(self, data): + label = data['label'] + fast_label = self.fast_model[label] + data['fast_label'] = fast_label + return data + + +class KeepKeys(object): + def __init__(self, keep_keys, **kwargs): + self.keep_keys = keep_keys + + def __call__(self, data): + data_list = [] + for key in self.keep_keys: + data_list.append(data[key]) + return data_list + + +class Pad(object): + def __init__(self, size=None, size_div=32, **kwargs): + if size is not None and not isinstance(size, (int, list, tuple)): + raise TypeError("Type of target_size is invalid. Now is {}".format( + type(size))) + if isinstance(size, int): + size = [size, size] + self.size = size + self.size_div = size_div + + def __call__(self, data): + + img = data['image'] + img_h, img_w = img.shape[0], img.shape[1] + if self.size: + resize_h2, resize_w2 = self.size + assert ( + img_h < resize_h2 and img_w < resize_w2 + ), '(h, w) of target size should be greater than (img_h, img_w)' + else: + resize_h2 = max( + int(math.ceil(img.shape[0] / self.size_div) * self.size_div), + self.size_div) + resize_w2 = max( + int(math.ceil(img.shape[1] / self.size_div) * self.size_div), + self.size_div) + img = cv2.copyMakeBorder( + img, + 0, + resize_h2 - img_h, + 0, + resize_w2 - img_w, + cv2.BORDER_CONSTANT, + value=0) + data['image'] = img + return data + + +class LinearResize(object): + """resize image by target_size and max_size + Args: + target_size (int): the target size of image + keep_ratio (bool): whether keep_ratio or not, default true + interp (int): method of resize + """ + + def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + self.keep_ratio = keep_ratio + self.interp = interp + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + assert len(self.target_size) == 2 + assert self.target_size[0] > 0 and self.target_size[1] > 0 + im_channel = im.shape[2] + im_scale_y, im_scale_x = self.generate_scale(im) + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') + im_info['scale_factor'] = np.array( + [im_scale_y, im_scale_x]).astype('float32') + return im, im_info + + def generate_scale(self, im): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + im_c = im.shape[2] + if self.keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(self.target_size) + target_size_max = np.max(self.target_size) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = self.target_size + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + +class Resize(object): + def __init__(self, size=(640, 640), **kwargs): + self.size = size + + def resize_image(self, img): + resize_h, resize_w = self.size + ori_h, ori_w = img.shape[:2] # (h, w, c) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + img = cv2.resize(img, (int(resize_w), int(resize_h))) + return img, [ratio_h, ratio_w] + + def __call__(self, data): + img = data['image'] + if 'polys' in data: + text_polys = data['polys'] + + img_resize, [ratio_h, ratio_w] = self.resize_image(img) + if 'polys' in data: + new_boxes = [] + for box in text_polys: + new_box = [] + for cord in box: + new_box.append([cord[0] * ratio_w, cord[1] * ratio_h]) + new_boxes.append(new_box) + data['polys'] = np.array(new_boxes, dtype=np.float32) + data['image'] = img_resize + return data + + +class DetResizeForTest(object): + def __init__(self, **kwargs): + super(DetResizeForTest, self).__init__() + self.resize_type = 0 + self.keep_ratio = False + if 'image_shape' in kwargs: + self.image_shape = kwargs['image_shape'] + self.resize_type = 1 + if 'keep_ratio' in kwargs: + self.keep_ratio = kwargs['keep_ratio'] + elif 'limit_side_len' in kwargs: + self.limit_side_len = kwargs['limit_side_len'] + self.limit_type = kwargs.get('limit_type', 'min') + elif 'resize_long' in kwargs: + self.resize_type = 2 + self.resize_long = kwargs.get('resize_long', 960) + else: + self.limit_side_len = 736 + self.limit_type = 'min' + + def __call__(self, data): + img = data['image'] + src_h, src_w, _ = img.shape + if sum([src_h, src_w]) < 64: + img = self.image_padding(img) + + if self.resize_type == 0: + # img, shape = self.resize_image_type0(img) + img, [ratio_h, ratio_w] = self.resize_image_type0(img) + elif self.resize_type == 2: + img, [ratio_h, ratio_w] = self.resize_image_type2(img) + else: + # img, shape = self.resize_image_type1(img) + img, [ratio_h, ratio_w] = self.resize_image_type1(img) + data['image'] = img + data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) + return data + + def image_padding(self, im, value=0): + h, w, c = im.shape + im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value + im_pad[:h, :w, :] = im + return im_pad + + def resize_image_type1(self, img): + resize_h, resize_w = self.image_shape + ori_h, ori_w = img.shape[:2] # (h, w, c) + if self.keep_ratio is True: + resize_w = ori_w * resize_h / ori_h + N = math.ceil(resize_w / 32) + resize_w = N * 32 + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + img = cv2.resize(img, (int(resize_w), int(resize_h))) + # return img, np.array([ori_h, ori_w]) + return img, [ratio_h, ratio_w] + + def resize_image_type0(self, img): + """ + resize image to a size multiple of 32 which is required by the network + args: + img(array): array with shape [h, w, c] + return(tuple): + img, (ratio_h, ratio_w) + """ + limit_side_len = self.limit_side_len + h, w, c = img.shape + + # limit the max side + if self.limit_type == 'max': + if max(h, w) > limit_side_len: + if h > w: + ratio = float(limit_side_len) / h + else: + ratio = float(limit_side_len) / w + else: + ratio = 1. + elif self.limit_type == 'min': + if min(h, w) < limit_side_len: + if h < w: + ratio = float(limit_side_len) / h + else: + ratio = float(limit_side_len) / w + else: + ratio = 1. + elif self.limit_type == 'resize_long': + ratio = float(limit_side_len) / max(h, w) + else: + raise Exception('not support limit type, image ') + resize_h = int(h * ratio) + resize_w = int(w * ratio) + + resize_h = max(int(round(resize_h / 32) * 32), 32) + resize_w = max(int(round(resize_w / 32) * 32), 32) + + try: + if int(resize_w) <= 0 or int(resize_h) <= 0: + return None, (None, None) + img = cv2.resize(img, (int(resize_w), int(resize_h))) + except BaseException: + print(img.shape, resize_w, resize_h) + sys.exit(0) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return img, [ratio_h, ratio_w] + + def resize_image_type2(self, img): + h, w, _ = img.shape + + resize_w = w + resize_h = h + + if resize_h > resize_w: + ratio = float(self.resize_long) / resize_h + else: + ratio = float(self.resize_long) / resize_w + + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + img = cv2.resize(img, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + + return img, [ratio_h, ratio_w] + + +class E2EResizeForTest(object): + def __init__(self, **kwargs): + super(E2EResizeForTest, self).__init__() + self.max_side_len = kwargs['max_side_len'] + self.valid_set = kwargs['valid_set'] + + def __call__(self, data): + img = data['image'] + src_h, src_w, _ = img.shape + if self.valid_set == 'totaltext': + im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext( + img, max_side_len=self.max_side_len) + else: + im_resized, (ratio_h, ratio_w) = self.resize_image( + img, max_side_len=self.max_side_len) + data['image'] = im_resized + data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) + return data + + def resize_image_for_totaltext(self, im, max_side_len=512): + + h, w, _ = im.shape + resize_w = w + resize_h = h + ratio = 1.25 + if h * ratio > max_side_len: + ratio = float(max_side_len) / resize_h + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return im, (ratio_h, ratio_w) + + def resize_image(self, im, max_side_len=512): + """ + resize image to a size multiple of max_stride which is required by the network + :param im: the resized image + :param max_side_len: limit of max image size to avoid out of memory in gpu + :return: the resized image and the resize ratio + """ + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # Fix the longer side + if resize_h > resize_w: + ratio = float(max_side_len) / resize_h + else: + ratio = float(max_side_len) / resize_w + + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + + max_stride = 128 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + + return im, (ratio_h, ratio_w) + + +class KieResize(object): + def __init__(self, **kwargs): + super(KieResize, self).__init__() + self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[ + 'img_scale'][1] + + def __call__(self, data): + img = data['image'] + points = data['points'] + src_h, src_w, _ = img.shape + im_resized, scale_factor, [ratio_h, ratio_w + ], [new_h, new_w] = self.resize_image(img) + resize_points = self.resize_boxes(img, points, scale_factor) + data['ori_image'] = img + data['ori_boxes'] = points + data['points'] = resize_points + data['image'] = im_resized + data['shape'] = np.array([new_h, new_w]) + return data + + def resize_image(self, img): + norm_img = np.zeros([1024, 1024, 3], dtype='float32') + scale = [512, 1024] + h, w = img.shape[:2] + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), + max_short_edge / min(h, w)) + resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float( + scale_factor) + 0.5) + max_stride = 32 + resize_h = (resize_h + max_stride - 1) // max_stride * max_stride + resize_w = (resize_w + max_stride - 1) // max_stride * max_stride + im = cv2.resize(img, (resize_w, resize_h)) + new_h, new_w = im.shape[:2] + w_scale = new_w / w + h_scale = new_h / h + scale_factor = np.array( + [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) + norm_img[:new_h, :new_w, :] = im + return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w] + + def resize_boxes(self, im, points, scale_factor): + points = points * scale_factor + img_shape = im.shape[:2] + points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1]) + points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0]) + return points + + +class SRResize(object): + def __init__(self, + imgH=32, + imgW=128, + down_sample_scale=4, + keep_ratio=False, + min_ratio=1, + mask=False, + infer_mode=False, + **kwargs): + self.imgH = imgH + self.imgW = imgW + self.keep_ratio = keep_ratio + self.min_ratio = min_ratio + self.down_sample_scale = down_sample_scale + self.mask = mask + self.infer_mode = infer_mode + + def __call__(self, data): + imgH = self.imgH + imgW = self.imgW + images_lr = data["image_lr"] + transform2 = ResizeNormalize( + (imgW // self.down_sample_scale, imgH // self.down_sample_scale)) + images_lr = transform2(images_lr) + data["img_lr"] = images_lr + if self.infer_mode: + return data + + images_HR = data["image_hr"] + label_strs = data["label"] + transform = ResizeNormalize((imgW, imgH)) + images_HR = transform(images_HR) + data["img_hr"] = images_HR + return data + + +class ResizeNormalize(object): + def __init__(self, size, interpolation=Image.BICUBIC): + self.size = size + self.interpolation = interpolation + + def __call__(self, img): + img = img.resize(self.size, self.interpolation) + img_numpy = np.array(img).astype("float32") + img_numpy = img_numpy.transpose((2, 0, 1)) / 255 + return img_numpy + + +class GrayImageChannelFormat(object): + """ + format gray scale image's channel: (3,h,w) -> (1,h,w) + Args: + inverse: inverse gray image + """ + + def __init__(self, inverse=False, **kwargs): + self.inverse = inverse + + def __call__(self, data): + img = data['image'] + img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + img_expanded = np.expand_dims(img_single_channel, 0) + + if self.inverse: + data['image'] = np.abs(img_expanded - 1) + else: + data['image'] = img_expanded + + data['src_image'] = img + return data + + +class Permute(object): + """permute image + Args: + to_bgr (bool): whether convert RGB to BGR + channel_first (bool): whether convert HWC to CHW + """ + + def __init__(self, ): + super(Permute, self).__init__() + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.transpose((2, 0, 1)).copy() + return im, im_info + + +class PadStride(object): + """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config + Args: + stride (bool): model with FPN need image shape % stride == 0 + """ + + def __init__(self, stride=0): + self.coarsest_stride = stride + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + coarsest_stride = self.coarsest_stride + if coarsest_stride <= 0: + return im, im_info + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + return padding_im, im_info + + +def decode_image(im_file, im_info): + """read rgb image + Args: + im_file (str|np.ndarray): input can be image path or np.ndarray + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + if isinstance(im_file, str): + with open(im_file, 'rb') as f: + im_read = f.read() + data = np.frombuffer(im_read, dtype='uint8') + im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + else: + im = im_file + im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) + im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) + return im, im_info + + +def preprocess(im, preprocess_ops): + # process image by preprocess_ops + im_info = { + 'scale_factor': np.array( + [1., 1.], dtype=np.float32), + 'im_shape': None, + } + im, im_info = decode_image(im, im_info) + for operator in preprocess_ops: + im, im_info = operator(im, im_info) + return im, im_info diff --git a/deepdoc/vision/postprocess.py b/deepdoc/vision/postprocess.py index 198089c814842759d01740a2cda22d7b4d0490c9..9ab08e4ffa699c7403ce174b12bf81727a09a96f 100644 --- a/deepdoc/vision/postprocess.py +++ b/deepdoc/vision/postprocess.py @@ -1,366 +1,366 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy -import re -import numpy as np -import cv2 -from shapely.geometry import Polygon -import pyclipper - - -def build_post_process(config, global_config=None): - support_dict = ['DBPostProcess', 'CTCLabelDecode'] - - config = copy.deepcopy(config) - module_name = config.pop('name') - if module_name == "None": - return - if global_config is not None: - config.update(global_config) - assert module_name in support_dict, Exception( - 'post process only support {}'.format(support_dict)) - module_class = eval(module_name)(**config) - return module_class - - -class DBPostProcess(object): - """ - The post process for Differentiable Binarization (DB). - """ - - def __init__(self, - thresh=0.3, - box_thresh=0.7, - max_candidates=1000, - unclip_ratio=2.0, - use_dilation=False, - score_mode="fast", - box_type='quad', - **kwargs): - self.thresh = thresh - self.box_thresh = box_thresh - self.max_candidates = max_candidates - self.unclip_ratio = unclip_ratio - self.min_size = 3 - self.score_mode = score_mode - self.box_type = box_type - assert score_mode in [ - "slow", "fast" - ], "Score mode must be in [slow, fast] but got: {}".format(score_mode) - - self.dilation_kernel = None if not use_dilation else np.array( - [[1, 1], [1, 1]]) - - def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' - _bitmap: single map with shape (1, H, W), - whose values are binarized as {0, 1} - ''' - - bitmap = _bitmap - height, width = bitmap.shape - - boxes = [] - scores = [] - - contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), - cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) - - for contour in contours[:self.max_candidates]: - epsilon = 0.002 * cv2.arcLength(contour, True) - approx = cv2.approxPolyDP(contour, epsilon, True) - points = approx.reshape((-1, 2)) - if points.shape[0] < 4: - continue - - score = self.box_score_fast(pred, points.reshape(-1, 2)) - if self.box_thresh > score: - continue - - if points.shape[0] > 2: - box = self.unclip(points, self.unclip_ratio) - if len(box) > 1: - continue - else: - continue - box = box.reshape(-1, 2) - - _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2))) - if sside < self.min_size + 2: - continue - - box = np.array(box) - box[:, 0] = np.clip( - np.round(box[:, 0] / width * dest_width), 0, dest_width) - box[:, 1] = np.clip( - np.round(box[:, 1] / height * dest_height), 0, dest_height) - boxes.append(box.tolist()) - scores.append(score) - return boxes, scores - - def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' - _bitmap: single map with shape (1, H, W), - whose values are binarized as {0, 1} - ''' - - bitmap = _bitmap - height, width = bitmap.shape - - outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, - cv2.CHAIN_APPROX_SIMPLE) - if len(outs) == 3: - img, contours, _ = outs[0], outs[1], outs[2] - elif len(outs) == 2: - contours, _ = outs[0], outs[1] - - num_contours = min(len(contours), self.max_candidates) - - boxes = [] - scores = [] - for index in range(num_contours): - contour = contours[index] - points, sside = self.get_mini_boxes(contour) - if sside < self.min_size: - continue - points = np.array(points) - if self.score_mode == "fast": - score = self.box_score_fast(pred, points.reshape(-1, 2)) - else: - score = self.box_score_slow(pred, contour) - if self.box_thresh > score: - continue - - box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2) - box, sside = self.get_mini_boxes(box) - if sside < self.min_size + 2: - continue - box = np.array(box) - - box[:, 0] = np.clip( - np.round(box[:, 0] / width * dest_width), 0, dest_width) - box[:, 1] = np.clip( - np.round(box[:, 1] / height * dest_height), 0, dest_height) - boxes.append(box.astype("int32")) - scores.append(score) - return np.array(boxes, dtype="int32"), scores - - def unclip(self, box, unclip_ratio): - poly = Polygon(box) - distance = poly.area * unclip_ratio / poly.length - offset = pyclipper.PyclipperOffset() - offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - expanded = np.array(offset.Execute(distance)) - return expanded - - def get_mini_boxes(self, contour): - bounding_box = cv2.minAreaRect(contour) - points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) - - index_1, index_2, index_3, index_4 = 0, 1, 2, 3 - if points[1][1] > points[0][1]: - index_1 = 0 - index_4 = 1 - else: - index_1 = 1 - index_4 = 0 - if points[3][1] > points[2][1]: - index_2 = 2 - index_3 = 3 - else: - index_2 = 3 - index_3 = 2 - - box = [ - points[index_1], points[index_2], points[index_3], points[index_4] - ] - return box, min(bounding_box[1]) - - def box_score_fast(self, bitmap, _box): - ''' - box_score_fast: use bbox mean score as the mean score - ''' - h, w = bitmap.shape[:2] - box = _box.copy() - xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) - xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1) - ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1) - ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - box[:, 0] = box[:, 0] - xmin - box[:, 1] = box[:, 1] - ymin - cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def box_score_slow(self, bitmap, contour): - ''' - box_score_slow: use polyon mean score as the mean score - ''' - h, w = bitmap.shape[:2] - contour = contour.copy() - contour = np.reshape(contour, (-1, 2)) - - xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) - xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) - ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) - ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - - contour[:, 0] = contour[:, 0] - xmin - contour[:, 1] = contour[:, 1] - ymin - - cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def __call__(self, outs_dict, shape_list): - pred = outs_dict['maps'] - if not isinstance(pred, np.ndarray): - pred = pred.numpy() - pred = pred[:, 0, :, :] - segmentation = pred > self.thresh - - boxes_batch = [] - for batch_index in range(pred.shape[0]): - src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] - if self.dilation_kernel is not None: - mask = cv2.dilate( - np.array(segmentation[batch_index]).astype(np.uint8), - self.dilation_kernel) - else: - mask = segmentation[batch_index] - if self.box_type == 'poly': - boxes, scores = self.polygons_from_bitmap(pred[batch_index], - mask, src_w, src_h) - elif self.box_type == 'quad': - boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, - src_w, src_h) - else: - raise ValueError( - "box_type can only be one of ['quad', 'poly']") - - boxes_batch.append({'points': boxes}) - return boxes_batch - - -class BaseRecLabelDecode(object): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False): - self.beg_str = "sos" - self.end_str = "eos" - self.reverse = False - self.character_str = [] - - if character_dict_path is None: - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - else: - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str.append(line) - if use_space_char: - self.character_str.append(" ") - dict_character = list(self.character_str) - if 'arabic' in character_dict_path: - self.reverse = True - - dict_character = self.add_special_char(dict_character) - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def pred_reverse(self, pred): - pred_re = [] - c_current = '' - for c in pred: - if not bool(re.search('[a-zA-Z0-9 :*./%+-]', c)): - if c_current != '': - pred_re.append(c_current) - pred_re.append(c) - c_current = '' - else: - c_current += c - if c_current != '': - pred_re.append(c_current) - - return ''.join(pred_re[::-1]) - - def add_special_char(self, dict_character): - return dict_character - - def decode(self, text_index, text_prob=None, is_remove_duplicate=False): - """ convert text-index into text-label. """ - result_list = [] - ignored_tokens = self.get_ignored_tokens() - batch_size = len(text_index) - for batch_idx in range(batch_size): - selection = np.ones(len(text_index[batch_idx]), dtype=bool) - if is_remove_duplicate: - selection[1:] = text_index[batch_idx][1:] != text_index[ - batch_idx][:-1] - for ignored_token in ignored_tokens: - selection &= text_index[batch_idx] != ignored_token - - char_list = [ - self.character[text_id] - for text_id in text_index[batch_idx][selection] - ] - if text_prob is not None: - conf_list = text_prob[batch_idx][selection] - else: - conf_list = [1] * len(selection) - if len(conf_list) == 0: - conf_list = [0] - - text = ''.join(char_list) - - if self.reverse: # for arabic rec - text = self.pred_reverse(text) - - result_list.append((text, np.mean(conf_list).tolist())) - return result_list - - def get_ignored_tokens(self): - return [0] # for ctc blank - - -class CTCLabelDecode(BaseRecLabelDecode): - """ Convert between text-label and text-index """ - - def __init__(self, character_dict_path=None, use_space_char=False, - **kwargs): - super(CTCLabelDecode, self).__init__(character_dict_path, - use_space_char) - - def __call__(self, preds, label=None, *args, **kwargs): - if isinstance(preds, tuple) or isinstance(preds, list): - preds = preds[-1] - if not isinstance(preds, np.ndarray): - preds = preds.numpy() - preds_idx = preds.argmax(axis=2) - preds_prob = preds.max(axis=2) - text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) - if label is None: - return text - label = self.decode(label) - return text, label - - def add_special_char(self, dict_character): - dict_character = ['blank'] + dict_character - return dict_character +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import copy +import re +import numpy as np +import cv2 +from shapely.geometry import Polygon +import pyclipper + + +def build_post_process(config, global_config=None): + support_dict = ['DBPostProcess', 'CTCLabelDecode'] + + config = copy.deepcopy(config) + module_name = config.pop('name') + if module_name == "None": + return + if global_config is not None: + config.update(global_config) + assert module_name in support_dict, Exception( + 'post process only support {}'.format(support_dict)) + module_class = eval(module_name)(**config) + return module_class + + +class DBPostProcess(object): + """ + The post process for Differentiable Binarization (DB). + """ + + def __init__(self, + thresh=0.3, + box_thresh=0.7, + max_candidates=1000, + unclip_ratio=2.0, + use_dilation=False, + score_mode="fast", + box_type='quad', + **kwargs): + self.thresh = thresh + self.box_thresh = box_thresh + self.max_candidates = max_candidates + self.unclip_ratio = unclip_ratio + self.min_size = 3 + self.score_mode = score_mode + self.box_type = box_type + assert score_mode in [ + "slow", "fast" + ], "Score mode must be in [slow, fast] but got: {}".format(score_mode) + + self.dilation_kernel = None if not use_dilation else np.array( + [[1, 1], [1, 1]]) + + def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + boxes = [] + scores = [] + + contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), + cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + + for contour in contours[:self.max_candidates]: + epsilon = 0.002 * cv2.arcLength(contour, True) + approx = cv2.approxPolyDP(contour, epsilon, True) + points = approx.reshape((-1, 2)) + if points.shape[0] < 4: + continue + + score = self.box_score_fast(pred, points.reshape(-1, 2)) + if self.box_thresh > score: + continue + + if points.shape[0] > 2: + box = self.unclip(points, self.unclip_ratio) + if len(box) > 1: + continue + else: + continue + box = box.reshape(-1, 2) + + _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2))) + if sside < self.min_size + 2: + continue + + box = np.array(box) + box[:, 0] = np.clip( + np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes.append(box.tolist()) + scores.append(score) + return boxes, scores + + def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, + cv2.CHAIN_APPROX_SIMPLE) + if len(outs) == 3: + img, contours, _ = outs[0], outs[1], outs[2] + elif len(outs) == 2: + contours, _ = outs[0], outs[1] + + num_contours = min(len(contours), self.max_candidates) + + boxes = [] + scores = [] + for index in range(num_contours): + contour = contours[index] + points, sside = self.get_mini_boxes(contour) + if sside < self.min_size: + continue + points = np.array(points) + if self.score_mode == "fast": + score = self.box_score_fast(pred, points.reshape(-1, 2)) + else: + score = self.box_score_slow(pred, contour) + if self.box_thresh > score: + continue + + box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2) + box, sside = self.get_mini_boxes(box) + if sside < self.min_size + 2: + continue + box = np.array(box) + + box[:, 0] = np.clip( + np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes.append(box.astype("int32")) + scores.append(score) + return np.array(boxes, dtype="int32"), scores + + def unclip(self, box, unclip_ratio): + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def get_mini_boxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [ + points[index_1], points[index_2], points[index_3], points[index_4] + ] + return box, min(bounding_box[1]) + + def box_score_fast(self, bitmap, _box): + ''' + box_score_fast: use bbox mean score as the mean score + ''' + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype("int32"), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype("int32"), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype("int32"), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype("int32"), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def box_score_slow(self, bitmap, contour): + ''' + box_score_slow: use polyon mean score as the mean score + ''' + h, w = bitmap.shape[:2] + contour = contour.copy() + contour = np.reshape(contour, (-1, 2)) + + xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) + xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) + ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) + ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + + contour[:, 0] = contour[:, 0] - xmin + contour[:, 1] = contour[:, 1] - ymin + + cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def __call__(self, outs_dict, shape_list): + pred = outs_dict['maps'] + if not isinstance(pred, np.ndarray): + pred = pred.numpy() + pred = pred[:, 0, :, :] + segmentation = pred > self.thresh + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] + if self.dilation_kernel is not None: + mask = cv2.dilate( + np.array(segmentation[batch_index]).astype(np.uint8), + self.dilation_kernel) + else: + mask = segmentation[batch_index] + if self.box_type == 'poly': + boxes, scores = self.polygons_from_bitmap(pred[batch_index], + mask, src_w, src_h) + elif self.box_type == 'quad': + boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, + src_w, src_h) + else: + raise ValueError( + "box_type can only be one of ['quad', 'poly']") + + boxes_batch.append({'points': boxes}) + return boxes_batch + + +class BaseRecLabelDecode(object): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False): + self.beg_str = "sos" + self.end_str = "eos" + self.reverse = False + self.character_str = [] + + if character_dict_path is None: + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + else: + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n").strip("\r\n") + self.character_str.append(line) + if use_space_char: + self.character_str.append(" ") + dict_character = list(self.character_str) + if 'arabic' in character_dict_path: + self.reverse = True + + dict_character = self.add_special_char(dict_character) + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def pred_reverse(self, pred): + pred_re = [] + c_current = '' + for c in pred: + if not bool(re.search('[a-zA-Z0-9 :*./%+-]', c)): + if c_current != '': + pred_re.append(c_current) + pred_re.append(c) + c_current = '' + else: + c_current += c + if c_current != '': + pred_re.append(c_current) + + return ''.join(pred_re[::-1]) + + def add_special_char(self, dict_character): + return dict_character + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + batch_size = len(text_index) + for batch_idx in range(batch_size): + selection = np.ones(len(text_index[batch_idx]), dtype=bool) + if is_remove_duplicate: + selection[1:] = text_index[batch_idx][1:] != text_index[ + batch_idx][:-1] + for ignored_token in ignored_tokens: + selection &= text_index[batch_idx] != ignored_token + + char_list = [ + self.character[text_id] + for text_id in text_index[batch_idx][selection] + ] + if text_prob is not None: + conf_list = text_prob[batch_idx][selection] + else: + conf_list = [1] * len(selection) + if len(conf_list) == 0: + conf_list = [0] + + text = ''.join(char_list) + + if self.reverse: # for arabic rec + text = self.pred_reverse(text) + + result_list.append((text, np.mean(conf_list).tolist())) + return result_list + + def get_ignored_tokens(self): + return [0] # for ctc blank + + +class CTCLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False, + **kwargs): + super(CTCLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def __call__(self, preds, label=None, *args, **kwargs): + if isinstance(preds, tuple) or isinstance(preds, list): + preds = preds[-1] + if not isinstance(preds, np.ndarray): + preds = preds.numpy() + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) + if label is None: + return text + label = self.decode(label) + return text, label + + def add_special_char(self, dict_character): + dict_character = ['blank'] + dict_character + return dict_character diff --git a/deepdoc/vision/recognizer.py b/deepdoc/vision/recognizer.py index 7fd624e2d3b725346bbdf44f7c8c148dc0113ece..a7fe5047c53db3848777fa1564a705a636a61924 100644 --- a/deepdoc/vision/recognizer.py +++ b/deepdoc/vision/recognizer.py @@ -1,452 +1,452 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -from copy import deepcopy - -import onnxruntime as ort -from huggingface_hub import snapshot_download - -from api.utils.file_utils import get_project_base_directory -from .operators import * - - -class Recognizer(object): - def __init__(self, label_list, task_name, model_dir=None): - """ - If you have trouble downloading HuggingFace models, -_^ this might help!! - - For Linux: - export HF_ENDPOINT=https://hf-mirror.com - - For Windows: - Good luck - ^_- - - """ - if not model_dir: - model_dir = os.path.join( - get_project_base_directory(), - "rag/res/deepdoc") - model_file_path = os.path.join(model_dir, task_name + ".onnx") - if not os.path.exists(model_file_path): - model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", - local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), - local_dir_use_symlinks=False) - model_file_path = os.path.join(model_dir, task_name + ".onnx") - else: - model_file_path = os.path.join(model_dir, task_name + ".onnx") - - if not os.path.exists(model_file_path): - raise ValueError("not find model file path {}".format( - model_file_path)) - if False and ort.get_device() == "GPU": - options = ort.SessionOptions() - options.enable_cpu_mem_arena = False - self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')]) - else: - self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider']) - self.input_names = [node.name for node in self.ort_sess.get_inputs()] - self.output_names = [node.name for node in self.ort_sess.get_outputs()] - self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4] - self.label_list = label_list - - @staticmethod - def sort_Y_firstly(arr, threashold): - # sort using y1 first and then x1 - arr = sorted(arr, key=lambda r: (r["top"], r["x0"])) - for i in range(len(arr) - 1): - for j in range(i, -1, -1): - # restore the order using th - if abs(arr[j + 1]["top"] - arr[j]["top"]) < threashold \ - and arr[j + 1]["x0"] < arr[j]["x0"]: - tmp = deepcopy(arr[j]) - arr[j] = deepcopy(arr[j + 1]) - arr[j + 1] = deepcopy(tmp) - return arr - - @staticmethod - def sort_X_firstly(arr, threashold, copy=True): - # sort using y1 first and then x1 - arr = sorted(arr, key=lambda r: (r["x0"], r["top"])) - for i in range(len(arr) - 1): - for j in range(i, -1, -1): - # restore the order using th - if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \ - and arr[j + 1]["top"] < arr[j]["top"]: - tmp = deepcopy(arr[j]) if copy else arr[j] - arr[j] = deepcopy(arr[j + 1]) if copy else arr[j + 1] - arr[j + 1] = deepcopy(tmp) if copy else tmp - return arr - - @staticmethod - def sort_C_firstly(arr, thr=0): - # sort using y1 first and then x1 - # sorted(arr, key=lambda r: (r["x0"], r["top"])) - arr = Recognizer.sort_X_firstly(arr, thr) - for i in range(len(arr) - 1): - for j in range(i, -1, -1): - # restore the order using th - if "C" not in arr[j] or "C" not in arr[j + 1]: - continue - if arr[j + 1]["C"] < arr[j]["C"] \ - or ( - arr[j + 1]["C"] == arr[j]["C"] - and arr[j + 1]["top"] < arr[j]["top"] - ): - tmp = arr[j] - arr[j] = arr[j + 1] - arr[j + 1] = tmp - return arr - - return sorted(arr, key=lambda r: (r.get("C", r["x0"]), r["top"])) - - @staticmethod - def sort_R_firstly(arr, thr=0): - # sort using y1 first and then x1 - # sorted(arr, key=lambda r: (r["top"], r["x0"])) - arr = Recognizer.sort_Y_firstly(arr, thr) - for i in range(len(arr) - 1): - for j in range(i, -1, -1): - if "R" not in arr[j] or "R" not in arr[j + 1]: - continue - if arr[j + 1]["R"] < arr[j]["R"] \ - or ( - arr[j + 1]["R"] == arr[j]["R"] - and arr[j + 1]["x0"] < arr[j]["x0"] - ): - tmp = arr[j] - arr[j] = arr[j + 1] - arr[j + 1] = tmp - return arr - - @staticmethod - def overlapped_area(a, b, ratio=True): - tp, btm, x0, x1 = a["top"], a["bottom"], a["x0"], a["x1"] - if b["x0"] > x1 or b["x1"] < x0: - return 0 - if b["bottom"] < tp or b["top"] > btm: - return 0 - x0_ = max(b["x0"], x0) - x1_ = min(b["x1"], x1) - assert x0_ <= x1_, "Fuckedup! T:{},B:{},X0:{},X1:{} ==> {}".format( - tp, btm, x0, x1, b) - tp_ = max(b["top"], tp) - btm_ = min(b["bottom"], btm) - assert tp_ <= btm_, "Fuckedup! T:{},B:{},X0:{},X1:{} => {}".format( - tp, btm, x0, x1, b) - ov = (btm_ - tp_) * (x1_ - x0_) if x1 - \ - x0 != 0 and btm - tp != 0 else 0 - if ov > 0 and ratio: - ov /= (x1 - x0) * (btm - tp) - return ov - - @staticmethod - def layouts_cleanup(boxes, layouts, far=2, thr=0.7): - def notOverlapped(a, b): - return any([a["x1"] < b["x0"], - a["x0"] > b["x1"], - a["bottom"] < b["top"], - a["top"] > b["bottom"]]) - - i = 0 - while i + 1 < len(layouts): - j = i + 1 - while j < min(i + far, len(layouts)) \ - and (layouts[i].get("type", "") != layouts[j].get("type", "") - or notOverlapped(layouts[i], layouts[j])): - j += 1 - if j >= min(i + far, len(layouts)): - i += 1 - continue - if Recognizer.overlapped_area(layouts[i], layouts[j]) < thr \ - and Recognizer.overlapped_area(layouts[j], layouts[i]) < thr: - i += 1 - continue - - if layouts[i].get("score") and layouts[j].get("score"): - if layouts[i]["score"] > layouts[j]["score"]: - layouts.pop(j) - else: - layouts.pop(i) - continue - - area_i, area_i_1 = 0, 0 - for b in boxes: - if not notOverlapped(b, layouts[i]): - area_i += Recognizer.overlapped_area(b, layouts[i], False) - if not notOverlapped(b, layouts[j]): - area_i_1 += Recognizer.overlapped_area(b, layouts[j], False) - - if area_i > area_i_1: - layouts.pop(j) - else: - layouts.pop(i) - - return layouts - - def create_inputs(self, imgs, im_info): - """generate input for different model type - Args: - imgs (list(numpy)): list of images (np.ndarray) - im_info (list(dict)): list of image info - Returns: - inputs (dict): input of model - """ - inputs = {} - - im_shape = [] - scale_factor = [] - if len(imgs) == 1: - inputs['image'] = np.array((imgs[0],)).astype('float32') - inputs['im_shape'] = np.array( - (im_info[0]['im_shape'],)).astype('float32') - inputs['scale_factor'] = np.array( - (im_info[0]['scale_factor'],)).astype('float32') - return inputs - - for e in im_info: - im_shape.append(np.array((e['im_shape'],)).astype('float32')) - scale_factor.append(np.array((e['scale_factor'],)).astype('float32')) - - inputs['im_shape'] = np.concatenate(im_shape, axis=0) - inputs['scale_factor'] = np.concatenate(scale_factor, axis=0) - - imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs] - max_shape_h = max([e[0] for e in imgs_shape]) - max_shape_w = max([e[1] for e in imgs_shape]) - padding_imgs = [] - for img in imgs: - im_c, im_h, im_w = img.shape[:] - padding_im = np.zeros( - (im_c, max_shape_h, max_shape_w), dtype=np.float32) - padding_im[:, :im_h, :im_w] = img - padding_imgs.append(padding_im) - inputs['image'] = np.stack(padding_imgs, axis=0) - return inputs - - @staticmethod - def find_overlapped(box, boxes_sorted_by_y, naive=False): - if not boxes_sorted_by_y: - return - bxs = boxes_sorted_by_y - s, e, ii = 0, len(bxs), 0 - while s < e and not naive: - ii = (e + s) // 2 - pv = bxs[ii] - if box["bottom"] < pv["top"]: - e = ii - continue - if box["top"] > pv["bottom"]: - s = ii + 1 - continue - break - while s < ii: - if box["top"] > bxs[s]["bottom"]: - s += 1 - break - while e - 1 > ii: - if box["bottom"] < bxs[e - 1]["top"]: - e -= 1 - break - - max_overlaped_i, max_overlaped = None, 0 - for i in range(s, e): - ov = Recognizer.overlapped_area(bxs[i], box) - if ov <= max_overlaped: - continue - max_overlaped_i = i - max_overlaped = ov - - return max_overlaped_i - - @staticmethod - def find_horizontally_tightest_fit(box, boxes): - if not boxes: - return - min_dis, min_i = 1000000, None - for i,b in enumerate(boxes): - if box.get("layoutno", "0") != b.get("layoutno", "0"): continue - dis = min(abs(box["x0"] - b["x0"]), abs(box["x1"] - b["x1"]), abs(box["x0"]+box["x1"] - b["x1"] - b["x0"])/2) - if dis < min_dis: - min_i = i - min_dis = dis - return min_i - - @staticmethod - def find_overlapped_with_threashold(box, boxes, thr=0.3): - if not boxes: - return - max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0 - s, e = 0, len(boxes) - for i in range(s, e): - ov = Recognizer.overlapped_area(box, boxes[i]) - _ov = Recognizer.overlapped_area(boxes[i], box) - if (ov, _ov) < (max_overlapped, _max_overlapped): - continue - max_overlapped_i = i - max_overlapped = ov - _max_overlapped = _ov - - return max_overlapped_i - - def preprocess(self, image_list): - inputs = [] - if "scale_factor" in self.input_names: - preprocess_ops = [] - for op_info in [ - {'interp': 2, 'keep_ratio': False, 'target_size': [800, 608], 'type': 'LinearResize'}, - {'is_scale': True, 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'type': 'StandardizeImage'}, - {'type': 'Permute'}, - {'stride': 32, 'type': 'PadStride'} - ]: - new_op_info = op_info.copy() - op_type = new_op_info.pop('type') - preprocess_ops.append(eval(op_type)(**new_op_info)) - - for im_path in image_list: - im, im_info = preprocess(im_path, preprocess_ops) - inputs.append({"image": np.array((im,)).astype('float32'), - "scale_factor": np.array((im_info["scale_factor"],)).astype('float32')}) - else: - hh, ww = self.input_shape - for img in image_list: - h, w = img.shape[:2] - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = cv2.resize(np.array(img).astype('float32'), (ww, hh)) - # Scale input pixel values to 0 to 1 - img /= 255.0 - img = img.transpose(2, 0, 1) - img = img[np.newaxis, :, :, :].astype(np.float32) - inputs.append({self.input_names[0]: img, "scale_factor": [w/ww, h/hh]}) - return inputs - - def postprocess(self, boxes, inputs, thr): - if "scale_factor" in self.input_names: - bb = [] - for b in boxes: - clsid, bbox, score = int(b[0]), b[2:], b[1] - if score < thr: - continue - if clsid >= len(self.label_list): - continue - bb.append({ - "type": self.label_list[clsid].lower(), - "bbox": [float(t) for t in bbox.tolist()], - "score": float(score) - }) - return bb - - def xywh2xyxy(x): - # [x, y, w, h] to [x1, y1, x2, y2] - y = np.copy(x) - y[:, 0] = x[:, 0] - x[:, 2] / 2 - y[:, 1] = x[:, 1] - x[:, 3] / 2 - y[:, 2] = x[:, 0] + x[:, 2] / 2 - y[:, 3] = x[:, 1] + x[:, 3] / 2 - return y - - def compute_iou(box, boxes): - # Compute xmin, ymin, xmax, ymax for both boxes - xmin = np.maximum(box[0], boxes[:, 0]) - ymin = np.maximum(box[1], boxes[:, 1]) - xmax = np.minimum(box[2], boxes[:, 2]) - ymax = np.minimum(box[3], boxes[:, 3]) - - # Compute intersection area - intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) - - # Compute union area - box_area = (box[2] - box[0]) * (box[3] - box[1]) - boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) - union_area = box_area + boxes_area - intersection_area - - # Compute IoU - iou = intersection_area / union_area - - return iou - - def iou_filter(boxes, scores, iou_threshold): - sorted_indices = np.argsort(scores)[::-1] - - keep_boxes = [] - while sorted_indices.size > 0: - # Pick the last box - box_id = sorted_indices[0] - keep_boxes.append(box_id) - - # Compute IoU of the picked box with the rest - ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) - - # Remove boxes with IoU over the threshold - keep_indices = np.where(ious < iou_threshold)[0] - - # print(keep_indices.shape, sorted_indices.shape) - sorted_indices = sorted_indices[keep_indices + 1] - - return keep_boxes - - boxes = np.squeeze(boxes).T - # Filter out object confidence scores below threshold - scores = np.max(boxes[:, 4:], axis=1) - boxes = boxes[scores > thr, :] - scores = scores[scores > thr] - if len(boxes) == 0: return [] - - # Get the class with the highest confidence - class_ids = np.argmax(boxes[:, 4:], axis=1) - boxes = boxes[:, :4] - input_shape = np.array([inputs["scale_factor"][0], inputs["scale_factor"][1], inputs["scale_factor"][0], inputs["scale_factor"][1]]) - boxes = np.multiply(boxes, input_shape, dtype=np.float32) - boxes = xywh2xyxy(boxes) - - unique_class_ids = np.unique(class_ids) - indices = [] - for class_id in unique_class_ids: - class_indices = np.where(class_ids == class_id)[0] - class_boxes = boxes[class_indices, :] - class_scores = scores[class_indices] - class_keep_boxes = iou_filter(class_boxes, class_scores, 0.2) - indices.extend(class_indices[class_keep_boxes]) - - return [{ - "type": self.label_list[class_ids[i]].lower(), - "bbox": [float(t) for t in boxes[i].tolist()], - "score": float(scores[i]) - } for i in indices] - - def __call__(self, image_list, thr=0.7, batch_size=16): - res = [] - imgs = [] - for i in range(len(image_list)): - if not isinstance(image_list[i], np.ndarray): - imgs.append(np.array(image_list[i])) - else: imgs.append(image_list[i]) - - batch_loop_cnt = math.ceil(float(len(imgs)) / batch_size) - for i in range(batch_loop_cnt): - start_index = i * batch_size - end_index = min((i + 1) * batch_size, len(imgs)) - batch_image_list = imgs[start_index:end_index] - inputs = self.preprocess(batch_image_list) - print("preprocess") - for ins in inputs: - bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr) - res.append(bb) - - #seeit.save_results(image_list, res, self.label_list, threshold=thr) - - return res - - - +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from copy import deepcopy + +import onnxruntime as ort +from huggingface_hub import snapshot_download + +from api.utils.file_utils import get_project_base_directory +from .operators import * + + +class Recognizer(object): + def __init__(self, label_list, task_name, model_dir=None): + """ + If you have trouble downloading HuggingFace models, -_^ this might help!! + + For Linux: + export HF_ENDPOINT=https://hf-mirror.com + + For Windows: + Good luck + ^_- + + """ + if not model_dir: + model_dir = os.path.join( + get_project_base_directory(), + "rag/res/deepdoc") + model_file_path = os.path.join(model_dir, task_name + ".onnx") + if not os.path.exists(model_file_path): + model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False) + model_file_path = os.path.join(model_dir, task_name + ".onnx") + else: + model_file_path = os.path.join(model_dir, task_name + ".onnx") + + if not os.path.exists(model_file_path): + raise ValueError("not find model file path {}".format( + model_file_path)) + if False and ort.get_device() == "GPU": + options = ort.SessionOptions() + options.enable_cpu_mem_arena = False + self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')]) + else: + self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider']) + self.input_names = [node.name for node in self.ort_sess.get_inputs()] + self.output_names = [node.name for node in self.ort_sess.get_outputs()] + self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4] + self.label_list = label_list + + @staticmethod + def sort_Y_firstly(arr, threashold): + # sort using y1 first and then x1 + arr = sorted(arr, key=lambda r: (r["top"], r["x0"])) + for i in range(len(arr) - 1): + for j in range(i, -1, -1): + # restore the order using th + if abs(arr[j + 1]["top"] - arr[j]["top"]) < threashold \ + and arr[j + 1]["x0"] < arr[j]["x0"]: + tmp = deepcopy(arr[j]) + arr[j] = deepcopy(arr[j + 1]) + arr[j + 1] = deepcopy(tmp) + return arr + + @staticmethod + def sort_X_firstly(arr, threashold, copy=True): + # sort using y1 first and then x1 + arr = sorted(arr, key=lambda r: (r["x0"], r["top"])) + for i in range(len(arr) - 1): + for j in range(i, -1, -1): + # restore the order using th + if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \ + and arr[j + 1]["top"] < arr[j]["top"]: + tmp = deepcopy(arr[j]) if copy else arr[j] + arr[j] = deepcopy(arr[j + 1]) if copy else arr[j + 1] + arr[j + 1] = deepcopy(tmp) if copy else tmp + return arr + + @staticmethod + def sort_C_firstly(arr, thr=0): + # sort using y1 first and then x1 + # sorted(arr, key=lambda r: (r["x0"], r["top"])) + arr = Recognizer.sort_X_firstly(arr, thr) + for i in range(len(arr) - 1): + for j in range(i, -1, -1): + # restore the order using th + if "C" not in arr[j] or "C" not in arr[j + 1]: + continue + if arr[j + 1]["C"] < arr[j]["C"] \ + or ( + arr[j + 1]["C"] == arr[j]["C"] + and arr[j + 1]["top"] < arr[j]["top"] + ): + tmp = arr[j] + arr[j] = arr[j + 1] + arr[j + 1] = tmp + return arr + + return sorted(arr, key=lambda r: (r.get("C", r["x0"]), r["top"])) + + @staticmethod + def sort_R_firstly(arr, thr=0): + # sort using y1 first and then x1 + # sorted(arr, key=lambda r: (r["top"], r["x0"])) + arr = Recognizer.sort_Y_firstly(arr, thr) + for i in range(len(arr) - 1): + for j in range(i, -1, -1): + if "R" not in arr[j] or "R" not in arr[j + 1]: + continue + if arr[j + 1]["R"] < arr[j]["R"] \ + or ( + arr[j + 1]["R"] == arr[j]["R"] + and arr[j + 1]["x0"] < arr[j]["x0"] + ): + tmp = arr[j] + arr[j] = arr[j + 1] + arr[j + 1] = tmp + return arr + + @staticmethod + def overlapped_area(a, b, ratio=True): + tp, btm, x0, x1 = a["top"], a["bottom"], a["x0"], a["x1"] + if b["x0"] > x1 or b["x1"] < x0: + return 0 + if b["bottom"] < tp or b["top"] > btm: + return 0 + x0_ = max(b["x0"], x0) + x1_ = min(b["x1"], x1) + assert x0_ <= x1_, "Fuckedup! T:{},B:{},X0:{},X1:{} ==> {}".format( + tp, btm, x0, x1, b) + tp_ = max(b["top"], tp) + btm_ = min(b["bottom"], btm) + assert tp_ <= btm_, "Fuckedup! T:{},B:{},X0:{},X1:{} => {}".format( + tp, btm, x0, x1, b) + ov = (btm_ - tp_) * (x1_ - x0_) if x1 - \ + x0 != 0 and btm - tp != 0 else 0 + if ov > 0 and ratio: + ov /= (x1 - x0) * (btm - tp) + return ov + + @staticmethod + def layouts_cleanup(boxes, layouts, far=2, thr=0.7): + def notOverlapped(a, b): + return any([a["x1"] < b["x0"], + a["x0"] > b["x1"], + a["bottom"] < b["top"], + a["top"] > b["bottom"]]) + + i = 0 + while i + 1 < len(layouts): + j = i + 1 + while j < min(i + far, len(layouts)) \ + and (layouts[i].get("type", "") != layouts[j].get("type", "") + or notOverlapped(layouts[i], layouts[j])): + j += 1 + if j >= min(i + far, len(layouts)): + i += 1 + continue + if Recognizer.overlapped_area(layouts[i], layouts[j]) < thr \ + and Recognizer.overlapped_area(layouts[j], layouts[i]) < thr: + i += 1 + continue + + if layouts[i].get("score") and layouts[j].get("score"): + if layouts[i]["score"] > layouts[j]["score"]: + layouts.pop(j) + else: + layouts.pop(i) + continue + + area_i, area_i_1 = 0, 0 + for b in boxes: + if not notOverlapped(b, layouts[i]): + area_i += Recognizer.overlapped_area(b, layouts[i], False) + if not notOverlapped(b, layouts[j]): + area_i_1 += Recognizer.overlapped_area(b, layouts[j], False) + + if area_i > area_i_1: + layouts.pop(j) + else: + layouts.pop(i) + + return layouts + + def create_inputs(self, imgs, im_info): + """generate input for different model type + Args: + imgs (list(numpy)): list of images (np.ndarray) + im_info (list(dict)): list of image info + Returns: + inputs (dict): input of model + """ + inputs = {} + + im_shape = [] + scale_factor = [] + if len(imgs) == 1: + inputs['image'] = np.array((imgs[0],)).astype('float32') + inputs['im_shape'] = np.array( + (im_info[0]['im_shape'],)).astype('float32') + inputs['scale_factor'] = np.array( + (im_info[0]['scale_factor'],)).astype('float32') + return inputs + + for e in im_info: + im_shape.append(np.array((e['im_shape'],)).astype('float32')) + scale_factor.append(np.array((e['scale_factor'],)).astype('float32')) + + inputs['im_shape'] = np.concatenate(im_shape, axis=0) + inputs['scale_factor'] = np.concatenate(scale_factor, axis=0) + + imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs] + max_shape_h = max([e[0] for e in imgs_shape]) + max_shape_w = max([e[1] for e in imgs_shape]) + padding_imgs = [] + for img in imgs: + im_c, im_h, im_w = img.shape[:] + padding_im = np.zeros( + (im_c, max_shape_h, max_shape_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = img + padding_imgs.append(padding_im) + inputs['image'] = np.stack(padding_imgs, axis=0) + return inputs + + @staticmethod + def find_overlapped(box, boxes_sorted_by_y, naive=False): + if not boxes_sorted_by_y: + return + bxs = boxes_sorted_by_y + s, e, ii = 0, len(bxs), 0 + while s < e and not naive: + ii = (e + s) // 2 + pv = bxs[ii] + if box["bottom"] < pv["top"]: + e = ii + continue + if box["top"] > pv["bottom"]: + s = ii + 1 + continue + break + while s < ii: + if box["top"] > bxs[s]["bottom"]: + s += 1 + break + while e - 1 > ii: + if box["bottom"] < bxs[e - 1]["top"]: + e -= 1 + break + + max_overlaped_i, max_overlaped = None, 0 + for i in range(s, e): + ov = Recognizer.overlapped_area(bxs[i], box) + if ov <= max_overlaped: + continue + max_overlaped_i = i + max_overlaped = ov + + return max_overlaped_i + + @staticmethod + def find_horizontally_tightest_fit(box, boxes): + if not boxes: + return + min_dis, min_i = 1000000, None + for i,b in enumerate(boxes): + if box.get("layoutno", "0") != b.get("layoutno", "0"): continue + dis = min(abs(box["x0"] - b["x0"]), abs(box["x1"] - b["x1"]), abs(box["x0"]+box["x1"] - b["x1"] - b["x0"])/2) + if dis < min_dis: + min_i = i + min_dis = dis + return min_i + + @staticmethod + def find_overlapped_with_threashold(box, boxes, thr=0.3): + if not boxes: + return + max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0 + s, e = 0, len(boxes) + for i in range(s, e): + ov = Recognizer.overlapped_area(box, boxes[i]) + _ov = Recognizer.overlapped_area(boxes[i], box) + if (ov, _ov) < (max_overlapped, _max_overlapped): + continue + max_overlapped_i = i + max_overlapped = ov + _max_overlapped = _ov + + return max_overlapped_i + + def preprocess(self, image_list): + inputs = [] + if "scale_factor" in self.input_names: + preprocess_ops = [] + for op_info in [ + {'interp': 2, 'keep_ratio': False, 'target_size': [800, 608], 'type': 'LinearResize'}, + {'is_scale': True, 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'type': 'StandardizeImage'}, + {'type': 'Permute'}, + {'stride': 32, 'type': 'PadStride'} + ]: + new_op_info = op_info.copy() + op_type = new_op_info.pop('type') + preprocess_ops.append(eval(op_type)(**new_op_info)) + + for im_path in image_list: + im, im_info = preprocess(im_path, preprocess_ops) + inputs.append({"image": np.array((im,)).astype('float32'), + "scale_factor": np.array((im_info["scale_factor"],)).astype('float32')}) + else: + hh, ww = self.input_shape + for img in image_list: + h, w = img.shape[:2] + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = cv2.resize(np.array(img).astype('float32'), (ww, hh)) + # Scale input pixel values to 0 to 1 + img /= 255.0 + img = img.transpose(2, 0, 1) + img = img[np.newaxis, :, :, :].astype(np.float32) + inputs.append({self.input_names[0]: img, "scale_factor": [w/ww, h/hh]}) + return inputs + + def postprocess(self, boxes, inputs, thr): + if "scale_factor" in self.input_names: + bb = [] + for b in boxes: + clsid, bbox, score = int(b[0]), b[2:], b[1] + if score < thr: + continue + if clsid >= len(self.label_list): + continue + bb.append({ + "type": self.label_list[clsid].lower(), + "bbox": [float(t) for t in bbox.tolist()], + "score": float(score) + }) + return bb + + def xywh2xyxy(x): + # [x, y, w, h] to [x1, y1, x2, y2] + y = np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + + def compute_iou(box, boxes): + # Compute xmin, ymin, xmax, ymax for both boxes + xmin = np.maximum(box[0], boxes[:, 0]) + ymin = np.maximum(box[1], boxes[:, 1]) + xmax = np.minimum(box[2], boxes[:, 2]) + ymax = np.minimum(box[3], boxes[:, 3]) + + # Compute intersection area + intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin) + + # Compute union area + box_area = (box[2] - box[0]) * (box[3] - box[1]) + boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + union_area = box_area + boxes_area - intersection_area + + # Compute IoU + iou = intersection_area / union_area + + return iou + + def iou_filter(boxes, scores, iou_threshold): + sorted_indices = np.argsort(scores)[::-1] + + keep_boxes = [] + while sorted_indices.size > 0: + # Pick the last box + box_id = sorted_indices[0] + keep_boxes.append(box_id) + + # Compute IoU of the picked box with the rest + ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :]) + + # Remove boxes with IoU over the threshold + keep_indices = np.where(ious < iou_threshold)[0] + + # print(keep_indices.shape, sorted_indices.shape) + sorted_indices = sorted_indices[keep_indices + 1] + + return keep_boxes + + boxes = np.squeeze(boxes).T + # Filter out object confidence scores below threshold + scores = np.max(boxes[:, 4:], axis=1) + boxes = boxes[scores > thr, :] + scores = scores[scores > thr] + if len(boxes) == 0: return [] + + # Get the class with the highest confidence + class_ids = np.argmax(boxes[:, 4:], axis=1) + boxes = boxes[:, :4] + input_shape = np.array([inputs["scale_factor"][0], inputs["scale_factor"][1], inputs["scale_factor"][0], inputs["scale_factor"][1]]) + boxes = np.multiply(boxes, input_shape, dtype=np.float32) + boxes = xywh2xyxy(boxes) + + unique_class_ids = np.unique(class_ids) + indices = [] + for class_id in unique_class_ids: + class_indices = np.where(class_ids == class_id)[0] + class_boxes = boxes[class_indices, :] + class_scores = scores[class_indices] + class_keep_boxes = iou_filter(class_boxes, class_scores, 0.2) + indices.extend(class_indices[class_keep_boxes]) + + return [{ + "type": self.label_list[class_ids[i]].lower(), + "bbox": [float(t) for t in boxes[i].tolist()], + "score": float(scores[i]) + } for i in indices] + + def __call__(self, image_list, thr=0.7, batch_size=16): + res = [] + imgs = [] + for i in range(len(image_list)): + if not isinstance(image_list[i], np.ndarray): + imgs.append(np.array(image_list[i])) + else: imgs.append(image_list[i]) + + batch_loop_cnt = math.ceil(float(len(imgs)) / batch_size) + for i in range(batch_loop_cnt): + start_index = i * batch_size + end_index = min((i + 1) * batch_size, len(imgs)) + batch_image_list = imgs[start_index:end_index] + inputs = self.preprocess(batch_image_list) + print("preprocess") + for ins in inputs: + bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr) + res.append(bb) + + #seeit.save_results(image_list, res, self.label_list, threshold=thr) + + return res + + + diff --git a/deepdoc/vision/seeit.py b/deepdoc/vision/seeit.py index 70e547f0cc786f558d33b2b2bb7dc2122571355d..96046d76e5ec9a0fbb96149e086a4e87977dc78c 100644 --- a/deepdoc/vision/seeit.py +++ b/deepdoc/vision/seeit.py @@ -1,83 +1,83 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import PIL -from PIL import ImageDraw - - -def save_results(image_list, results, labels, output_dir='output/', threshold=0.5): - if not os.path.exists(output_dir): - os.makedirs(output_dir) - for idx, im in enumerate(image_list): - im = draw_box(im, results[idx], labels, threshold=threshold) - - out_path = os.path.join(output_dir, f"{idx}.jpg") - im.save(out_path, quality=95) - print("save result to: " + out_path) - - -def draw_box(im, result, lables, threshold=0.5): - draw_thickness = min(im.size) // 320 - draw = ImageDraw.Draw(im) - color_list = get_color_map_list(len(lables)) - clsid2color = {n.lower():color_list[i] for i,n in enumerate(lables)} - result = [r for r in result if r["score"] >= threshold] - - for dt in result: - color = tuple(clsid2color[dt["type"]]) - xmin, ymin, xmax, ymax = dt["bbox"] - draw.line( - [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), - (xmin, ymin)], - width=draw_thickness, - fill=color) - - # draw label - text = "{} {:.4f}".format(dt["type"], dt["score"]) - tw, th = imagedraw_textsize_c(draw, text) - draw.rectangle( - [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) - draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) - return im - - -def get_color_map_list(num_classes): - """ - Args: - num_classes (int): number of class - Returns: - color_map (list): RGB color list - """ - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j = 0 - lab = i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] - return color_map - - -def imagedraw_textsize_c(draw, text): - if int(PIL.__version__.split('.')[0]) < 10: - tw, th = draw.textsize(text) - else: - left, top, right, bottom = draw.textbbox((0, 0), text) - tw, th = right - left, bottom - top - - return tw, th +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import PIL +from PIL import ImageDraw + + +def save_results(image_list, results, labels, output_dir='output/', threshold=0.5): + if not os.path.exists(output_dir): + os.makedirs(output_dir) + for idx, im in enumerate(image_list): + im = draw_box(im, results[idx], labels, threshold=threshold) + + out_path = os.path.join(output_dir, f"{idx}.jpg") + im.save(out_path, quality=95) + print("save result to: " + out_path) + + +def draw_box(im, result, lables, threshold=0.5): + draw_thickness = min(im.size) // 320 + draw = ImageDraw.Draw(im) + color_list = get_color_map_list(len(lables)) + clsid2color = {n.lower():color_list[i] for i,n in enumerate(lables)} + result = [r for r in result if r["score"] >= threshold] + + for dt in result: + color = tuple(clsid2color[dt["type"]]) + xmin, ymin, xmax, ymax = dt["bbox"] + draw.line( + [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), + (xmin, ymin)], + width=draw_thickness, + fill=color) + + # draw label + text = "{} {:.4f}".format(dt["type"], dt["score"]) + tw, th = imagedraw_textsize_c(draw, text) + draw.rectangle( + [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) + draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) + return im + + +def get_color_map_list(num_classes): + """ + Args: + num_classes (int): number of class + Returns: + color_map (list): RGB color list + """ + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + return color_map + + +def imagedraw_textsize_c(draw, text): + if int(PIL.__version__.split('.')[0]) < 10: + tw, th = draw.textsize(text) + else: + left, top, right, bottom = draw.textbbox((0, 0), text) + tw, th = right - left, bottom - top + + return tw, th diff --git a/deepdoc/vision/t_ocr.py b/deepdoc/vision/t_ocr.py index 37a87fd3f569023cde689828cb1ca85188e5011b..910b91be1a1567ce11a403003fbaef1b44f4505c 100644 --- a/deepdoc/vision/t_ocr.py +++ b/deepdoc/vision/t_ocr.py @@ -1,56 +1,56 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import os -import sys -sys.path.insert( - 0, - os.path.abspath( - os.path.join( - os.path.dirname( - os.path.abspath(__file__)), - '../../'))) - -from deepdoc.vision.seeit import draw_box -from deepdoc.vision import OCR, init_in_out -import argparse -import numpy as np - - -def main(args): - ocr = OCR() - images, outputs = init_in_out(args) - - for i, img in enumerate(images): - bxs = ocr(np.array(img)) - bxs = [(line[0], line[1][0]) for line in bxs] - bxs = [{ - "text": t, - "bbox": [b[0][0], b[0][1], b[1][0], b[-1][1]], - "type": "ocr", - "score": 1} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]] - img = draw_box(images[i], bxs, ["ocr"], 1.) - img.save(outputs[i], quality=95) - with open(outputs[i] + ".txt", "w+") as f: - f.write("\n".join([o["text"] for o in bxs])) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--inputs', - help="Directory where to store images or PDFs, or a file path to a single image or PDF", - required=True) - parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './ocr_outputs'", - default="./ocr_outputs") - args = parser.parse_args() - main(args) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import sys +sys.path.insert( + 0, + os.path.abspath( + os.path.join( + os.path.dirname( + os.path.abspath(__file__)), + '../../'))) + +from deepdoc.vision.seeit import draw_box +from deepdoc.vision import OCR, init_in_out +import argparse +import numpy as np + + +def main(args): + ocr = OCR() + images, outputs = init_in_out(args) + + for i, img in enumerate(images): + bxs = ocr(np.array(img)) + bxs = [(line[0], line[1][0]) for line in bxs] + bxs = [{ + "text": t, + "bbox": [b[0][0], b[0][1], b[1][0], b[-1][1]], + "type": "ocr", + "score": 1} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]] + img = draw_box(images[i], bxs, ["ocr"], 1.) + img.save(outputs[i], quality=95) + with open(outputs[i] + ".txt", "w+") as f: + f.write("\n".join([o["text"] for o in bxs])) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--inputs', + help="Directory where to store images or PDFs, or a file path to a single image or PDF", + required=True) + parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './ocr_outputs'", + default="./ocr_outputs") + args = parser.parse_args() + main(args) diff --git a/deepdoc/vision/t_recognizer.py b/deepdoc/vision/t_recognizer.py index 5a1c8592db8e06df9adf0a78cd1f725be842730d..7f9ff8e031aef24765bee9e21c10dba3274b7ca3 100644 --- a/deepdoc/vision/t_recognizer.py +++ b/deepdoc/vision/t_recognizer.py @@ -1,187 +1,187 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os, sys -sys.path.insert( - 0, - os.path.abspath( - os.path.join( - os.path.dirname( - os.path.abspath(__file__)), - '../../'))) - -from deepdoc.vision.seeit import draw_box -from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out -from api.utils.file_utils import get_project_base_directory -import argparse -import re -import numpy as np - - -def main(args): - images, outputs = init_in_out(args) - if args.mode.lower() == "layout": - labels = LayoutRecognizer.labels - detr = Recognizer( - labels, - "layout", - os.path.join( - get_project_base_directory(), - "rag/res/deepdoc/")) - if args.mode.lower() == "tsr": - labels = TableStructureRecognizer.labels - detr = TableStructureRecognizer() - ocr = OCR() - - layouts = detr(images, float(args.threshold)) - for i, lyt in enumerate(layouts): - if args.mode.lower() == "tsr": - #lyt = [t for t in lyt if t["type"] == "table column"] - html = get_table_html(images[i], lyt, ocr) - with open(outputs[i] + ".html", "w+") as f: - f.write(html) - lyt = [{ - "type": t["label"], - "bbox": [t["x0"], t["top"], t["x1"], t["bottom"]], - "score": t["score"] - } for t in lyt] - img = draw_box(images[i], lyt, labels, float(args.threshold)) - img.save(outputs[i], quality=95) - print("save result to: " + outputs[i]) - - -def get_table_html(img, tb_cpns, ocr): - boxes = ocr(np.array(img)) - boxes = Recognizer.sort_Y_firstly( - [{"x0": b[0][0], "x1": b[1][0], - "top": b[0][1], "text": t[0], - "bottom": b[-1][1], - "layout_type": "table", - "page_number": 0} for b, t in boxes if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]], - np.mean([b[-1][1] - b[0][1] for b, _ in boxes]) / 3 - ) - - def gather(kwd, fzy=10, ption=0.6): - nonlocal boxes - eles = Recognizer.sort_Y_firstly( - [r for r in tb_cpns if re.match(kwd, r["label"])], fzy) - eles = Recognizer.layouts_cleanup(boxes, eles, 5, ption) - return Recognizer.sort_Y_firstly(eles, 0) - - headers = gather(r".*header$") - rows = gather(r".* (row|header)") - spans = gather(r".*spanning") - clmns = sorted([r for r in tb_cpns if re.match( - r"table column$", r["label"])], key=lambda x: x["x0"]) - clmns = Recognizer.layouts_cleanup(boxes, clmns, 5, 0.5) - - for b in boxes: - ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3) - if ii is not None: - b["R"] = ii - b["R_top"] = rows[ii]["top"] - b["R_bott"] = rows[ii]["bottom"] - - ii = Recognizer.find_overlapped_with_threashold(b, headers, thr=0.3) - if ii is not None: - b["H_top"] = headers[ii]["top"] - b["H_bott"] = headers[ii]["bottom"] - b["H_left"] = headers[ii]["x0"] - b["H_right"] = headers[ii]["x1"] - b["H"] = ii - - ii = Recognizer.find_horizontally_tightest_fit(b, clmns) - if ii is not None: - b["C"] = ii - b["C_left"] = clmns[ii]["x0"] - b["C_right"] = clmns[ii]["x1"] - - ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3) - if ii is not None: - b["H_top"] = spans[ii]["top"] - b["H_bott"] = spans[ii]["bottom"] - b["H_left"] = spans[ii]["x0"] - b["H_right"] = spans[ii]["x1"] - b["SP"] = ii - - html = """ - - - - - - %s - - -""" % TableStructureRecognizer.construct_table(boxes, html=True) - return html - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--inputs', - help="Directory where to store images or PDFs, or a file path to a single image or PDF", - required=True) - parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './layouts_outputs'", - default="./layouts_outputs") - parser.add_argument( - '--threshold', - help="A threshold to filter out detections. Default: 0.5", - default=0.5) - parser.add_argument('--mode', help="Task mode: layout recognition or table structure recognition", choices=["layout", "tsr"], - default="layout") - args = parser.parse_args() - main(args) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os, sys +sys.path.insert( + 0, + os.path.abspath( + os.path.join( + os.path.dirname( + os.path.abspath(__file__)), + '../../'))) + +from deepdoc.vision.seeit import draw_box +from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out +from api.utils.file_utils import get_project_base_directory +import argparse +import re +import numpy as np + + +def main(args): + images, outputs = init_in_out(args) + if args.mode.lower() == "layout": + labels = LayoutRecognizer.labels + detr = Recognizer( + labels, + "layout", + os.path.join( + get_project_base_directory(), + "rag/res/deepdoc/")) + if args.mode.lower() == "tsr": + labels = TableStructureRecognizer.labels + detr = TableStructureRecognizer() + ocr = OCR() + + layouts = detr(images, float(args.threshold)) + for i, lyt in enumerate(layouts): + if args.mode.lower() == "tsr": + #lyt = [t for t in lyt if t["type"] == "table column"] + html = get_table_html(images[i], lyt, ocr) + with open(outputs[i] + ".html", "w+") as f: + f.write(html) + lyt = [{ + "type": t["label"], + "bbox": [t["x0"], t["top"], t["x1"], t["bottom"]], + "score": t["score"] + } for t in lyt] + img = draw_box(images[i], lyt, labels, float(args.threshold)) + img.save(outputs[i], quality=95) + print("save result to: " + outputs[i]) + + +def get_table_html(img, tb_cpns, ocr): + boxes = ocr(np.array(img)) + boxes = Recognizer.sort_Y_firstly( + [{"x0": b[0][0], "x1": b[1][0], + "top": b[0][1], "text": t[0], + "bottom": b[-1][1], + "layout_type": "table", + "page_number": 0} for b, t in boxes if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]], + np.mean([b[-1][1] - b[0][1] for b, _ in boxes]) / 3 + ) + + def gather(kwd, fzy=10, ption=0.6): + nonlocal boxes + eles = Recognizer.sort_Y_firstly( + [r for r in tb_cpns if re.match(kwd, r["label"])], fzy) + eles = Recognizer.layouts_cleanup(boxes, eles, 5, ption) + return Recognizer.sort_Y_firstly(eles, 0) + + headers = gather(r".*header$") + rows = gather(r".* (row|header)") + spans = gather(r".*spanning") + clmns = sorted([r for r in tb_cpns if re.match( + r"table column$", r["label"])], key=lambda x: x["x0"]) + clmns = Recognizer.layouts_cleanup(boxes, clmns, 5, 0.5) + + for b in boxes: + ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3) + if ii is not None: + b["R"] = ii + b["R_top"] = rows[ii]["top"] + b["R_bott"] = rows[ii]["bottom"] + + ii = Recognizer.find_overlapped_with_threashold(b, headers, thr=0.3) + if ii is not None: + b["H_top"] = headers[ii]["top"] + b["H_bott"] = headers[ii]["bottom"] + b["H_left"] = headers[ii]["x0"] + b["H_right"] = headers[ii]["x1"] + b["H"] = ii + + ii = Recognizer.find_horizontally_tightest_fit(b, clmns) + if ii is not None: + b["C"] = ii + b["C_left"] = clmns[ii]["x0"] + b["C_right"] = clmns[ii]["x1"] + + ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3) + if ii is not None: + b["H_top"] = spans[ii]["top"] + b["H_bott"] = spans[ii]["bottom"] + b["H_left"] = spans[ii]["x0"] + b["H_right"] = spans[ii]["x1"] + b["SP"] = ii + + html = """ + + + + + + %s + + +""" % TableStructureRecognizer.construct_table(boxes, html=True) + return html + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--inputs', + help="Directory where to store images or PDFs, or a file path to a single image or PDF", + required=True) + parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './layouts_outputs'", + default="./layouts_outputs") + parser.add_argument( + '--threshold', + help="A threshold to filter out detections. Default: 0.5", + default=0.5) + parser.add_argument('--mode', help="Task mode: layout recognition or table structure recognition", choices=["layout", "tsr"], + default="layout") + args = parser.parse_args() + main(args) diff --git a/deepdoc/vision/table_structure_recognizer.py b/deepdoc/vision/table_structure_recognizer.py index 70d750bd94019639309aa41425c64e76ed4ea589..7ec6b1a62dd70fc6b46e58ca2c206802a7b5625a 100644 --- a/deepdoc/vision/table_structure_recognizer.py +++ b/deepdoc/vision/table_structure_recognizer.py @@ -1,584 +1,584 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import logging -import os -import re -from collections import Counter - -import numpy as np -from huggingface_hub import snapshot_download - -from api.utils.file_utils import get_project_base_directory -from rag.nlp import rag_tokenizer -from .recognizer import Recognizer - - -class TableStructureRecognizer(Recognizer): - labels = [ - "table", - "table column", - "table row", - "table column header", - "table projected row header", - "table spanning cell", - ] - - def __init__(self): - try: - super().__init__(self.labels, "tsr", os.path.join( - get_project_base_directory(), - "rag/res/deepdoc")) - except Exception as e: - super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc", - local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), - local_dir_use_symlinks=False)) - - def __call__(self, images, thr=0.2): - tbls = super().__call__(images, thr) - res = [] - # align left&right for rows, align top&bottom for columns - for tbl in tbls: - lts = [{"label": b["type"], - "score": b["score"], - "x0": b["bbox"][0], "x1": b["bbox"][2], - "top": b["bbox"][1], "bottom": b["bbox"][-1] - } for b in tbl] - if not lts: - continue - - left = [b["x0"] for b in lts if b["label"].find( - "row") > 0 or b["label"].find("header") > 0] - right = [b["x1"] for b in lts if b["label"].find( - "row") > 0 or b["label"].find("header") > 0] - if not left: - continue - left = np.mean(left) if len(left) > 4 else np.min(left) - right = np.mean(right) if len(right) > 4 else np.max(right) - for b in lts: - if b["label"].find("row") > 0 or b["label"].find("header") > 0: - if b["x0"] > left: - b["x0"] = left - if b["x1"] < right: - b["x1"] = right - - top = [b["top"] for b in lts if b["label"] == "table column"] - bottom = [b["bottom"] for b in lts if b["label"] == "table column"] - if not top: - res.append(lts) - continue - top = np.median(top) if len(top) > 4 else np.min(top) - bottom = np.median(bottom) if len(bottom) > 4 else np.max(bottom) - for b in lts: - if b["label"] == "table column": - if b["top"] > top: - b["top"] = top - if b["bottom"] < bottom: - b["bottom"] = bottom - - res.append(lts) - return res - - @staticmethod - def is_caption(bx): - patt = [ - r"[图表]+[ 0-9::]{2,}" - ] - if any([re.match(p, bx["text"].strip()) for p in patt]) \ - or bx["layout_type"].find("caption") >= 0: - return True - return False - - @staticmethod - def blockType(b): - patt = [ - ("^(20|19)[0-9]{2}[年/-][0-9]{1,2}[月/-][0-9]{1,2}日*$", "Dt"), - (r"^(20|19)[0-9]{2}年$", "Dt"), - (r"^(20|19)[0-9]{2}[年-][0-9]{1,2}月*$", "Dt"), - ("^[0-9]{1,2}[月-][0-9]{1,2}日*$", "Dt"), - (r"^第*[一二三四1-4]季度$", "Dt"), - (r"^(20|19)[0-9]{2}年*[一二三四1-4]季度$", "Dt"), - (r"^(20|19)[0-9]{2}[ABCDE]$", "Dt"), - ("^[0-9.,+%/ -]+$", "Nu"), - (r"^[0-9A-Z/\._~-]+$", "Ca"), - (r"^[A-Z]*[a-z' -]+$", "En"), - (r"^[0-9.,+-]+[0-9A-Za-z/$¥%<>()()' -]+$", "NE"), - (r"^.{1}$", "Sg") - ] - for p, n in patt: - if re.search(p, b["text"].strip()): - return n - tks = [t for t in rag_tokenizer.tokenize(b["text"]).split(" ") if len(t) > 1] - if len(tks) > 3: - if len(tks) < 12: - return "Tx" - else: - return "Lx" - - if len(tks) == 1 and rag_tokenizer.tag(tks[0]) == "nr": - return "Nr" - - return "Ot" - - @staticmethod - def construct_table(boxes, is_english=False, html=False): - cap = "" - i = 0 - while i < len(boxes): - if TableStructureRecognizer.is_caption(boxes[i]): - if is_english: - cap + " " - cap += boxes[i]["text"] - boxes.pop(i) - i -= 1 - i += 1 - - if not boxes: - return [] - for b in boxes: - b["btype"] = TableStructureRecognizer.blockType(b) - max_type = Counter([b["btype"] for b in boxes]).items() - max_type = max(max_type, key=lambda x: x[1])[0] if max_type else "" - logging.debug("MAXTYPE: " + max_type) - - rowh = [b["R_bott"] - b["R_top"] for b in boxes if "R" in b] - rowh = np.min(rowh) if rowh else 0 - boxes = Recognizer.sort_R_firstly(boxes, rowh / 2) - #for b in boxes:print(b) - boxes[0]["rn"] = 0 - rows = [[boxes[0]]] - btm = boxes[0]["bottom"] - for b in boxes[1:]: - b["rn"] = len(rows) - 1 - lst_r = rows[-1] - if lst_r[-1].get("R", "") != b.get("R", "") \ - or (b["top"] >= btm - 3 and lst_r[-1].get("R", "-1") != b.get("R", "-2") - ): # new row - btm = b["bottom"] - b["rn"] += 1 - rows.append([b]) - continue - btm = (btm + b["bottom"]) / 2. - rows[-1].append(b) - - colwm = [b["C_right"] - b["C_left"] for b in boxes if "C" in b] - colwm = np.min(colwm) if colwm else 0 - crosspage = len(set([b["page_number"] for b in boxes])) > 1 - if crosspage: - boxes = Recognizer.sort_X_firstly(boxes, colwm / 2, False) - else: - boxes = Recognizer.sort_C_firstly(boxes, colwm / 2) - boxes[0]["cn"] = 0 - cols = [[boxes[0]]] - right = boxes[0]["x1"] - for b in boxes[1:]: - b["cn"] = len(cols) - 1 - lst_c = cols[-1] - if (int(b.get("C", "1")) - int(lst_c[-1].get("C", "1")) == 1 and b["page_number"] == lst_c[-1][ - "page_number"]) \ - or (b["x0"] >= right and lst_c[-1].get("C", "-1") != b.get("C", "-2")): # new col - right = b["x1"] - b["cn"] += 1 - cols.append([b]) - continue - right = (right + b["x1"]) / 2. - cols[-1].append(b) - - tbl = [[[] for _ in range(len(cols))] for _ in range(len(rows))] - for b in boxes: - tbl[b["rn"]][b["cn"]].append(b) - - if len(rows) >= 4: - # remove single in column - j = 0 - while j < len(tbl[0]): - e, ii = 0, 0 - for i in range(len(tbl)): - if tbl[i][j]: - e += 1 - ii = i - if e > 1: - break - if e > 1: - j += 1 - continue - f = (j > 0 and tbl[ii][j - 1] and tbl[ii] - [j - 1][0].get("text")) or j == 0 - ff = (j + 1 < len(tbl[ii]) and tbl[ii][j + 1] and tbl[ii] - [j + 1][0].get("text")) or j + 1 >= len(tbl[ii]) - if f and ff: - j += 1 - continue - bx = tbl[ii][j][0] - logging.debug("Relocate column single: " + bx["text"]) - # j column only has one value - left, right = 100000, 100000 - if j > 0 and not f: - for i in range(len(tbl)): - if tbl[i][j - 1]: - left = min(left, np.min( - [bx["x0"] - a["x1"] for a in tbl[i][j - 1]])) - if j + 1 < len(tbl[0]) and not ff: - for i in range(len(tbl)): - if tbl[i][j + 1]: - right = min(right, np.min( - [a["x0"] - bx["x1"] for a in tbl[i][j + 1]])) - assert left < 100000 or right < 100000 - if left < right: - for jj in range(j, len(tbl[0])): - for i in range(len(tbl)): - for a in tbl[i][jj]: - a["cn"] -= 1 - if tbl[ii][j - 1]: - tbl[ii][j - 1].extend(tbl[ii][j]) - else: - tbl[ii][j - 1] = tbl[ii][j] - for i in range(len(tbl)): - tbl[i].pop(j) - - else: - for jj in range(j + 1, len(tbl[0])): - for i in range(len(tbl)): - for a in tbl[i][jj]: - a["cn"] -= 1 - if tbl[ii][j + 1]: - tbl[ii][j + 1].extend(tbl[ii][j]) - else: - tbl[ii][j + 1] = tbl[ii][j] - for i in range(len(tbl)): - tbl[i].pop(j) - cols.pop(j) - assert len(cols) == len(tbl[0]), "Column NO. miss matched: %d vs %d" % ( - len(cols), len(tbl[0])) - - if len(cols) >= 4: - # remove single in row - i = 0 - while i < len(tbl): - e, jj = 0, 0 - for j in range(len(tbl[i])): - if tbl[i][j]: - e += 1 - jj = j - if e > 1: - break - if e > 1: - i += 1 - continue - f = (i > 0 and tbl[i - 1][jj] and tbl[i - 1] - [jj][0].get("text")) or i == 0 - ff = (i + 1 < len(tbl) and tbl[i + 1][jj] and tbl[i + 1] - [jj][0].get("text")) or i + 1 >= len(tbl) - if f and ff: - i += 1 - continue - - bx = tbl[i][jj][0] - logging.debug("Relocate row single: " + bx["text"]) - # i row only has one value - up, down = 100000, 100000 - if i > 0 and not f: - for j in range(len(tbl[i - 1])): - if tbl[i - 1][j]: - up = min(up, np.min( - [bx["top"] - a["bottom"] for a in tbl[i - 1][j]])) - if i + 1 < len(tbl) and not ff: - for j in range(len(tbl[i + 1])): - if tbl[i + 1][j]: - down = min(down, np.min( - [a["top"] - bx["bottom"] for a in tbl[i + 1][j]])) - assert up < 100000 or down < 100000 - if up < down: - for ii in range(i, len(tbl)): - for j in range(len(tbl[ii])): - for a in tbl[ii][j]: - a["rn"] -= 1 - if tbl[i - 1][jj]: - tbl[i - 1][jj].extend(tbl[i][jj]) - else: - tbl[i - 1][jj] = tbl[i][jj] - tbl.pop(i) - - else: - for ii in range(i + 1, len(tbl)): - for j in range(len(tbl[ii])): - for a in tbl[ii][j]: - a["rn"] -= 1 - if tbl[i + 1][jj]: - tbl[i + 1][jj].extend(tbl[i][jj]) - else: - tbl[i + 1][jj] = tbl[i][jj] - tbl.pop(i) - rows.pop(i) - - # which rows are headers - hdset = set([]) - for i in range(len(tbl)): - cnt, h = 0, 0 - for j, arr in enumerate(tbl[i]): - if not arr: - continue - cnt += 1 - if max_type == "Nu" and arr[0]["btype"] == "Nu": - continue - if any([a.get("H") for a in arr]) \ - or (max_type == "Nu" and arr[0]["btype"] != "Nu"): - h += 1 - if h / cnt > 0.5: - hdset.add(i) - - if html: - return TableStructureRecognizer.__html_table(cap, hdset, - TableStructureRecognizer.__cal_spans(boxes, rows, - cols, tbl, True) - ) - - return TableStructureRecognizer.__desc_table(cap, hdset, - TableStructureRecognizer.__cal_spans(boxes, rows, cols, tbl, - False), - is_english) - - @staticmethod - def __html_table(cap, hdset, tbl): - # constrcut HTML - html = "" - if cap: - html += f"" - for i in range(len(tbl)): - row = "" - txts = [] - for j, arr in enumerate(tbl[i]): - if arr is None: - continue - if not arr: - row += "" if i not in hdset else "" - continue - txt = "" - if arr: - h = min(np.min([c["bottom"] - c["top"] - for c in arr]) / 2, 10) - txt = " ".join([c["text"] - for c in Recognizer.sort_Y_firstly(arr, h)]) - txts.append(txt) - sp = "" - if arr[0].get("colspan"): - sp = "colspan={}".format(arr[0]["colspan"]) - if arr[0].get("rowspan"): - sp += " rowspan={}".format(arr[0]["rowspan"]) - if i in hdset: - row += f"" - else: - row += f"" - - if i in hdset: - if all([t in hdset for t in txts]): - continue - for t in txts: - hdset.add(t) - - if row != "": - row += "" - else: - row = "" - html += "\n" + row - html += "\n
{cap}
" + txt + "" + txt + "
" - return html - - @staticmethod - def __desc_table(cap, hdr_rowno, tbl, is_english): - # get text of every colomn in header row to become header text - clmno = len(tbl[0]) - rowno = len(tbl) - headers = {} - hdrset = set() - lst_hdr = [] - de = "的" if not is_english else " for " - for r in sorted(list(hdr_rowno)): - headers[r] = ["" for _ in range(clmno)] - for i in range(clmno): - if not tbl[r][i]: - continue - txt = " ".join([a["text"].strip() for a in tbl[r][i]]) - headers[r][i] = txt - hdrset.add(txt) - if all([not t for t in headers[r]]): - del headers[r] - hdr_rowno.remove(r) - continue - for j in range(clmno): - if headers[r][j]: - continue - if j >= len(lst_hdr): - break - headers[r][j] = lst_hdr[j] - lst_hdr = headers[r] - for i in range(rowno): - if i not in hdr_rowno: - continue - for j in range(i + 1, rowno): - if j not in hdr_rowno: - break - for k in range(clmno): - if not headers[j - 1][k]: - continue - if headers[j][k].find(headers[j - 1][k]) >= 0: - continue - if len(headers[j][k]) > len(headers[j - 1][k]): - headers[j][k] += (de if headers[j][k] - else "") + headers[j - 1][k] - else: - headers[j][k] = headers[j - 1][k] \ - + (de if headers[j - 1][k] else "") \ - + headers[j][k] - - logging.debug( - f">>>>>>>>>>>>>>>>>{cap}:SIZE:{rowno}X{clmno} Header: {hdr_rowno}") - row_txt = [] - for i in range(rowno): - if i in hdr_rowno: - continue - rtxt = [] - - def append(delimer): - nonlocal rtxt, row_txt - rtxt = delimer.join(rtxt) - if row_txt and len(row_txt[-1]) + len(rtxt) < 64: - row_txt[-1] += "\n" + rtxt - else: - row_txt.append(rtxt) - - r = 0 - if len(headers.items()): - _arr = [(i - r, r) for r, _ in headers.items() if r < i] - if _arr: - _, r = min(_arr, key=lambda x: x[0]) - - if r not in headers and clmno <= 2: - for j in range(clmno): - if not tbl[i][j]: - continue - txt = "".join([a["text"].strip() for a in tbl[i][j]]) - if txt: - rtxt.append(txt) - if rtxt: - append(":") - continue - - for j in range(clmno): - if not tbl[i][j]: - continue - txt = "".join([a["text"].strip() for a in tbl[i][j]]) - if not txt: - continue - ctt = headers[r][j] if r in headers else "" - if ctt: - ctt += ":" - ctt += txt - if ctt: - rtxt.append(ctt) - - if rtxt: - row_txt.append("; ".join(rtxt)) - - if cap: - if is_english: - from_ = " in " - else: - from_ = "来自" - row_txt = [t + f"\t——{from_}“{cap}”" for t in row_txt] - return row_txt - - @staticmethod - def __cal_spans(boxes, rows, cols, tbl, html=True): - # caculate span - clft = [np.mean([c.get("C_left", c["x0"]) for c in cln]) - for cln in cols] - crgt = [np.mean([c.get("C_right", c["x1"]) for c in cln]) - for cln in cols] - rtop = [np.mean([c.get("R_top", c["top"]) for c in row]) - for row in rows] - rbtm = [np.mean([c.get("R_btm", c["bottom"]) - for c in row]) for row in rows] - for b in boxes: - if "SP" not in b: - continue - b["colspan"] = [b["cn"]] - b["rowspan"] = [b["rn"]] - # col span - for j in range(0, len(clft)): - if j == b["cn"]: - continue - if clft[j] + (crgt[j] - clft[j]) / 2 < b["H_left"]: - continue - if crgt[j] - (crgt[j] - clft[j]) / 2 > b["H_right"]: - continue - b["colspan"].append(j) - # row span - for j in range(0, len(rtop)): - if j == b["rn"]: - continue - if rtop[j] + (rbtm[j] - rtop[j]) / 2 < b["H_top"]: - continue - if rbtm[j] - (rbtm[j] - rtop[j]) / 2 > b["H_bott"]: - continue - b["rowspan"].append(j) - - def join(arr): - if not arr: - return "" - return "".join([t["text"] for t in arr]) - - # rm the spaning cells - for i in range(len(tbl)): - for j, arr in enumerate(tbl[i]): - if not arr: - continue - if all(["rowspan" not in a and "colspan" not in a for a in arr]): - continue - rowspan, colspan = [], [] - for a in arr: - if isinstance(a.get("rowspan", 0), list): - rowspan.extend(a["rowspan"]) - if isinstance(a.get("colspan", 0), list): - colspan.extend(a["colspan"]) - rowspan, colspan = set(rowspan), set(colspan) - if len(rowspan) < 2 and len(colspan) < 2: - for a in arr: - if "rowspan" in a: - del a["rowspan"] - if "colspan" in a: - del a["colspan"] - continue - rowspan, colspan = sorted(rowspan), sorted(colspan) - rowspan = list(range(rowspan[0], rowspan[-1] + 1)) - colspan = list(range(colspan[0], colspan[-1] + 1)) - assert i in rowspan, rowspan - assert j in colspan, colspan - arr = [] - for r in rowspan: - for c in colspan: - arr_txt = join(arr) - if tbl[r][c] and join(tbl[r][c]) != arr_txt: - arr.extend(tbl[r][c]) - tbl[r][c] = None if html else arr - for a in arr: - if len(rowspan) > 1: - a["rowspan"] = len(rowspan) - elif "rowspan" in a: - del a["rowspan"] - if len(colspan) > 1: - a["colspan"] = len(colspan) - elif "colspan" in a: - del a["colspan"] - tbl[rowspan[0]][colspan[0]] = arr - - return tbl +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +import os +import re +from collections import Counter + +import numpy as np +from huggingface_hub import snapshot_download + +from api.utils.file_utils import get_project_base_directory +from rag.nlp import rag_tokenizer +from .recognizer import Recognizer + + +class TableStructureRecognizer(Recognizer): + labels = [ + "table", + "table column", + "table row", + "table column header", + "table projected row header", + "table spanning cell", + ] + + def __init__(self): + try: + super().__init__(self.labels, "tsr", os.path.join( + get_project_base_directory(), + "rag/res/deepdoc")) + except Exception as e: + super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc", + local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"), + local_dir_use_symlinks=False)) + + def __call__(self, images, thr=0.2): + tbls = super().__call__(images, thr) + res = [] + # align left&right for rows, align top&bottom for columns + for tbl in tbls: + lts = [{"label": b["type"], + "score": b["score"], + "x0": b["bbox"][0], "x1": b["bbox"][2], + "top": b["bbox"][1], "bottom": b["bbox"][-1] + } for b in tbl] + if not lts: + continue + + left = [b["x0"] for b in lts if b["label"].find( + "row") > 0 or b["label"].find("header") > 0] + right = [b["x1"] for b in lts if b["label"].find( + "row") > 0 or b["label"].find("header") > 0] + if not left: + continue + left = np.mean(left) if len(left) > 4 else np.min(left) + right = np.mean(right) if len(right) > 4 else np.max(right) + for b in lts: + if b["label"].find("row") > 0 or b["label"].find("header") > 0: + if b["x0"] > left: + b["x0"] = left + if b["x1"] < right: + b["x1"] = right + + top = [b["top"] for b in lts if b["label"] == "table column"] + bottom = [b["bottom"] for b in lts if b["label"] == "table column"] + if not top: + res.append(lts) + continue + top = np.median(top) if len(top) > 4 else np.min(top) + bottom = np.median(bottom) if len(bottom) > 4 else np.max(bottom) + for b in lts: + if b["label"] == "table column": + if b["top"] > top: + b["top"] = top + if b["bottom"] < bottom: + b["bottom"] = bottom + + res.append(lts) + return res + + @staticmethod + def is_caption(bx): + patt = [ + r"[图表]+[ 0-9::]{2,}" + ] + if any([re.match(p, bx["text"].strip()) for p in patt]) \ + or bx["layout_type"].find("caption") >= 0: + return True + return False + + @staticmethod + def blockType(b): + patt = [ + ("^(20|19)[0-9]{2}[年/-][0-9]{1,2}[月/-][0-9]{1,2}日*$", "Dt"), + (r"^(20|19)[0-9]{2}年$", "Dt"), + (r"^(20|19)[0-9]{2}[年-][0-9]{1,2}月*$", "Dt"), + ("^[0-9]{1,2}[月-][0-9]{1,2}日*$", "Dt"), + (r"^第*[一二三四1-4]季度$", "Dt"), + (r"^(20|19)[0-9]{2}年*[一二三四1-4]季度$", "Dt"), + (r"^(20|19)[0-9]{2}[ABCDE]$", "Dt"), + ("^[0-9.,+%/ -]+$", "Nu"), + (r"^[0-9A-Z/\._~-]+$", "Ca"), + (r"^[A-Z]*[a-z' -]+$", "En"), + (r"^[0-9.,+-]+[0-9A-Za-z/$¥%<>()()' -]+$", "NE"), + (r"^.{1}$", "Sg") + ] + for p, n in patt: + if re.search(p, b["text"].strip()): + return n + tks = [t for t in rag_tokenizer.tokenize(b["text"]).split(" ") if len(t) > 1] + if len(tks) > 3: + if len(tks) < 12: + return "Tx" + else: + return "Lx" + + if len(tks) == 1 and rag_tokenizer.tag(tks[0]) == "nr": + return "Nr" + + return "Ot" + + @staticmethod + def construct_table(boxes, is_english=False, html=False): + cap = "" + i = 0 + while i < len(boxes): + if TableStructureRecognizer.is_caption(boxes[i]): + if is_english: + cap + " " + cap += boxes[i]["text"] + boxes.pop(i) + i -= 1 + i += 1 + + if not boxes: + return [] + for b in boxes: + b["btype"] = TableStructureRecognizer.blockType(b) + max_type = Counter([b["btype"] for b in boxes]).items() + max_type = max(max_type, key=lambda x: x[1])[0] if max_type else "" + logging.debug("MAXTYPE: " + max_type) + + rowh = [b["R_bott"] - b["R_top"] for b in boxes if "R" in b] + rowh = np.min(rowh) if rowh else 0 + boxes = Recognizer.sort_R_firstly(boxes, rowh / 2) + #for b in boxes:print(b) + boxes[0]["rn"] = 0 + rows = [[boxes[0]]] + btm = boxes[0]["bottom"] + for b in boxes[1:]: + b["rn"] = len(rows) - 1 + lst_r = rows[-1] + if lst_r[-1].get("R", "") != b.get("R", "") \ + or (b["top"] >= btm - 3 and lst_r[-1].get("R", "-1") != b.get("R", "-2") + ): # new row + btm = b["bottom"] + b["rn"] += 1 + rows.append([b]) + continue + btm = (btm + b["bottom"]) / 2. + rows[-1].append(b) + + colwm = [b["C_right"] - b["C_left"] for b in boxes if "C" in b] + colwm = np.min(colwm) if colwm else 0 + crosspage = len(set([b["page_number"] for b in boxes])) > 1 + if crosspage: + boxes = Recognizer.sort_X_firstly(boxes, colwm / 2, False) + else: + boxes = Recognizer.sort_C_firstly(boxes, colwm / 2) + boxes[0]["cn"] = 0 + cols = [[boxes[0]]] + right = boxes[0]["x1"] + for b in boxes[1:]: + b["cn"] = len(cols) - 1 + lst_c = cols[-1] + if (int(b.get("C", "1")) - int(lst_c[-1].get("C", "1")) == 1 and b["page_number"] == lst_c[-1][ + "page_number"]) \ + or (b["x0"] >= right and lst_c[-1].get("C", "-1") != b.get("C", "-2")): # new col + right = b["x1"] + b["cn"] += 1 + cols.append([b]) + continue + right = (right + b["x1"]) / 2. + cols[-1].append(b) + + tbl = [[[] for _ in range(len(cols))] for _ in range(len(rows))] + for b in boxes: + tbl[b["rn"]][b["cn"]].append(b) + + if len(rows) >= 4: + # remove single in column + j = 0 + while j < len(tbl[0]): + e, ii = 0, 0 + for i in range(len(tbl)): + if tbl[i][j]: + e += 1 + ii = i + if e > 1: + break + if e > 1: + j += 1 + continue + f = (j > 0 and tbl[ii][j - 1] and tbl[ii] + [j - 1][0].get("text")) or j == 0 + ff = (j + 1 < len(tbl[ii]) and tbl[ii][j + 1] and tbl[ii] + [j + 1][0].get("text")) or j + 1 >= len(tbl[ii]) + if f and ff: + j += 1 + continue + bx = tbl[ii][j][0] + logging.debug("Relocate column single: " + bx["text"]) + # j column only has one value + left, right = 100000, 100000 + if j > 0 and not f: + for i in range(len(tbl)): + if tbl[i][j - 1]: + left = min(left, np.min( + [bx["x0"] - a["x1"] for a in tbl[i][j - 1]])) + if j + 1 < len(tbl[0]) and not ff: + for i in range(len(tbl)): + if tbl[i][j + 1]: + right = min(right, np.min( + [a["x0"] - bx["x1"] for a in tbl[i][j + 1]])) + assert left < 100000 or right < 100000 + if left < right: + for jj in range(j, len(tbl[0])): + for i in range(len(tbl)): + for a in tbl[i][jj]: + a["cn"] -= 1 + if tbl[ii][j - 1]: + tbl[ii][j - 1].extend(tbl[ii][j]) + else: + tbl[ii][j - 1] = tbl[ii][j] + for i in range(len(tbl)): + tbl[i].pop(j) + + else: + for jj in range(j + 1, len(tbl[0])): + for i in range(len(tbl)): + for a in tbl[i][jj]: + a["cn"] -= 1 + if tbl[ii][j + 1]: + tbl[ii][j + 1].extend(tbl[ii][j]) + else: + tbl[ii][j + 1] = tbl[ii][j] + for i in range(len(tbl)): + tbl[i].pop(j) + cols.pop(j) + assert len(cols) == len(tbl[0]), "Column NO. miss matched: %d vs %d" % ( + len(cols), len(tbl[0])) + + if len(cols) >= 4: + # remove single in row + i = 0 + while i < len(tbl): + e, jj = 0, 0 + for j in range(len(tbl[i])): + if tbl[i][j]: + e += 1 + jj = j + if e > 1: + break + if e > 1: + i += 1 + continue + f = (i > 0 and tbl[i - 1][jj] and tbl[i - 1] + [jj][0].get("text")) or i == 0 + ff = (i + 1 < len(tbl) and tbl[i + 1][jj] and tbl[i + 1] + [jj][0].get("text")) or i + 1 >= len(tbl) + if f and ff: + i += 1 + continue + + bx = tbl[i][jj][0] + logging.debug("Relocate row single: " + bx["text"]) + # i row only has one value + up, down = 100000, 100000 + if i > 0 and not f: + for j in range(len(tbl[i - 1])): + if tbl[i - 1][j]: + up = min(up, np.min( + [bx["top"] - a["bottom"] for a in tbl[i - 1][j]])) + if i + 1 < len(tbl) and not ff: + for j in range(len(tbl[i + 1])): + if tbl[i + 1][j]: + down = min(down, np.min( + [a["top"] - bx["bottom"] for a in tbl[i + 1][j]])) + assert up < 100000 or down < 100000 + if up < down: + for ii in range(i, len(tbl)): + for j in range(len(tbl[ii])): + for a in tbl[ii][j]: + a["rn"] -= 1 + if tbl[i - 1][jj]: + tbl[i - 1][jj].extend(tbl[i][jj]) + else: + tbl[i - 1][jj] = tbl[i][jj] + tbl.pop(i) + + else: + for ii in range(i + 1, len(tbl)): + for j in range(len(tbl[ii])): + for a in tbl[ii][j]: + a["rn"] -= 1 + if tbl[i + 1][jj]: + tbl[i + 1][jj].extend(tbl[i][jj]) + else: + tbl[i + 1][jj] = tbl[i][jj] + tbl.pop(i) + rows.pop(i) + + # which rows are headers + hdset = set([]) + for i in range(len(tbl)): + cnt, h = 0, 0 + for j, arr in enumerate(tbl[i]): + if not arr: + continue + cnt += 1 + if max_type == "Nu" and arr[0]["btype"] == "Nu": + continue + if any([a.get("H") for a in arr]) \ + or (max_type == "Nu" and arr[0]["btype"] != "Nu"): + h += 1 + if h / cnt > 0.5: + hdset.add(i) + + if html: + return TableStructureRecognizer.__html_table(cap, hdset, + TableStructureRecognizer.__cal_spans(boxes, rows, + cols, tbl, True) + ) + + return TableStructureRecognizer.__desc_table(cap, hdset, + TableStructureRecognizer.__cal_spans(boxes, rows, cols, tbl, + False), + is_english) + + @staticmethod + def __html_table(cap, hdset, tbl): + # constrcut HTML + html = "" + if cap: + html += f"" + for i in range(len(tbl)): + row = "" + txts = [] + for j, arr in enumerate(tbl[i]): + if arr is None: + continue + if not arr: + row += "" if i not in hdset else "" + continue + txt = "" + if arr: + h = min(np.min([c["bottom"] - c["top"] + for c in arr]) / 2, 10) + txt = " ".join([c["text"] + for c in Recognizer.sort_Y_firstly(arr, h)]) + txts.append(txt) + sp = "" + if arr[0].get("colspan"): + sp = "colspan={}".format(arr[0]["colspan"]) + if arr[0].get("rowspan"): + sp += " rowspan={}".format(arr[0]["rowspan"]) + if i in hdset: + row += f"" + else: + row += f"" + + if i in hdset: + if all([t in hdset for t in txts]): + continue + for t in txts: + hdset.add(t) + + if row != "": + row += "" + else: + row = "" + html += "\n" + row + html += "\n
{cap}
" + txt + "" + txt + "
" + return html + + @staticmethod + def __desc_table(cap, hdr_rowno, tbl, is_english): + # get text of every colomn in header row to become header text + clmno = len(tbl[0]) + rowno = len(tbl) + headers = {} + hdrset = set() + lst_hdr = [] + de = "的" if not is_english else " for " + for r in sorted(list(hdr_rowno)): + headers[r] = ["" for _ in range(clmno)] + for i in range(clmno): + if not tbl[r][i]: + continue + txt = " ".join([a["text"].strip() for a in tbl[r][i]]) + headers[r][i] = txt + hdrset.add(txt) + if all([not t for t in headers[r]]): + del headers[r] + hdr_rowno.remove(r) + continue + for j in range(clmno): + if headers[r][j]: + continue + if j >= len(lst_hdr): + break + headers[r][j] = lst_hdr[j] + lst_hdr = headers[r] + for i in range(rowno): + if i not in hdr_rowno: + continue + for j in range(i + 1, rowno): + if j not in hdr_rowno: + break + for k in range(clmno): + if not headers[j - 1][k]: + continue + if headers[j][k].find(headers[j - 1][k]) >= 0: + continue + if len(headers[j][k]) > len(headers[j - 1][k]): + headers[j][k] += (de if headers[j][k] + else "") + headers[j - 1][k] + else: + headers[j][k] = headers[j - 1][k] \ + + (de if headers[j - 1][k] else "") \ + + headers[j][k] + + logging.debug( + f">>>>>>>>>>>>>>>>>{cap}:SIZE:{rowno}X{clmno} Header: {hdr_rowno}") + row_txt = [] + for i in range(rowno): + if i in hdr_rowno: + continue + rtxt = [] + + def append(delimer): + nonlocal rtxt, row_txt + rtxt = delimer.join(rtxt) + if row_txt and len(row_txt[-1]) + len(rtxt) < 64: + row_txt[-1] += "\n" + rtxt + else: + row_txt.append(rtxt) + + r = 0 + if len(headers.items()): + _arr = [(i - r, r) for r, _ in headers.items() if r < i] + if _arr: + _, r = min(_arr, key=lambda x: x[0]) + + if r not in headers and clmno <= 2: + for j in range(clmno): + if not tbl[i][j]: + continue + txt = "".join([a["text"].strip() for a in tbl[i][j]]) + if txt: + rtxt.append(txt) + if rtxt: + append(":") + continue + + for j in range(clmno): + if not tbl[i][j]: + continue + txt = "".join([a["text"].strip() for a in tbl[i][j]]) + if not txt: + continue + ctt = headers[r][j] if r in headers else "" + if ctt: + ctt += ":" + ctt += txt + if ctt: + rtxt.append(ctt) + + if rtxt: + row_txt.append("; ".join(rtxt)) + + if cap: + if is_english: + from_ = " in " + else: + from_ = "来自" + row_txt = [t + f"\t——{from_}“{cap}”" for t in row_txt] + return row_txt + + @staticmethod + def __cal_spans(boxes, rows, cols, tbl, html=True): + # caculate span + clft = [np.mean([c.get("C_left", c["x0"]) for c in cln]) + for cln in cols] + crgt = [np.mean([c.get("C_right", c["x1"]) for c in cln]) + for cln in cols] + rtop = [np.mean([c.get("R_top", c["top"]) for c in row]) + for row in rows] + rbtm = [np.mean([c.get("R_btm", c["bottom"]) + for c in row]) for row in rows] + for b in boxes: + if "SP" not in b: + continue + b["colspan"] = [b["cn"]] + b["rowspan"] = [b["rn"]] + # col span + for j in range(0, len(clft)): + if j == b["cn"]: + continue + if clft[j] + (crgt[j] - clft[j]) / 2 < b["H_left"]: + continue + if crgt[j] - (crgt[j] - clft[j]) / 2 > b["H_right"]: + continue + b["colspan"].append(j) + # row span + for j in range(0, len(rtop)): + if j == b["rn"]: + continue + if rtop[j] + (rbtm[j] - rtop[j]) / 2 < b["H_top"]: + continue + if rbtm[j] - (rbtm[j] - rtop[j]) / 2 > b["H_bott"]: + continue + b["rowspan"].append(j) + + def join(arr): + if not arr: + return "" + return "".join([t["text"] for t in arr]) + + # rm the spaning cells + for i in range(len(tbl)): + for j, arr in enumerate(tbl[i]): + if not arr: + continue + if all(["rowspan" not in a and "colspan" not in a for a in arr]): + continue + rowspan, colspan = [], [] + for a in arr: + if isinstance(a.get("rowspan", 0), list): + rowspan.extend(a["rowspan"]) + if isinstance(a.get("colspan", 0), list): + colspan.extend(a["colspan"]) + rowspan, colspan = set(rowspan), set(colspan) + if len(rowspan) < 2 and len(colspan) < 2: + for a in arr: + if "rowspan" in a: + del a["rowspan"] + if "colspan" in a: + del a["colspan"] + continue + rowspan, colspan = sorted(rowspan), sorted(colspan) + rowspan = list(range(rowspan[0], rowspan[-1] + 1)) + colspan = list(range(colspan[0], colspan[-1] + 1)) + assert i in rowspan, rowspan + assert j in colspan, colspan + arr = [] + for r in rowspan: + for c in colspan: + arr_txt = join(arr) + if tbl[r][c] and join(tbl[r][c]) != arr_txt: + arr.extend(tbl[r][c]) + tbl[r][c] = None if html else arr + for a in arr: + if len(rowspan) > 1: + a["rowspan"] = len(rowspan) + elif "rowspan" in a: + del a["rowspan"] + if len(colspan) > 1: + a["colspan"] = len(colspan) + elif "colspan" in a: + del a["colspan"] + tbl[rowspan[0]][colspan[0]] = arr + + return tbl diff --git a/docker/README.md b/docker/README.md index dc7e05bded1afd6cd89db555c322198e78224527..256fd3a7110aba924653082931fe788faf5ae45f 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,80 +1,80 @@ - -# Docker Environment Variable - -Look into [.env](./.env), there're some important variables. - -## MYSQL_PASSWORD -The mysql password could be changed by this variable. But you need to change *mysql.password* in [service_conf.yaml](./service_conf.yaml) at the same time. - - -## MYSQL_PORT -It refers to exported port number of mysql docker container, it's useful if you want to access the database outside the docker containers. - -## MINIO_USER -It refers to user name of [Mino](https://github.com/minio/minio). The modification should be synchronous updating at minio.user of [service_conf.yaml](./service_conf.yaml). - -## MINIO_PASSWORD -It refers to user password of [Mino](https://github.com/minio/minio). The modification should be synchronous updating at minio.password of [service_conf.yaml](./service_conf.yaml). - - -## SVR_HTTP_PORT -It refers to The API server serving port. - - -# Service Configuration -[service_conf.yaml](./service_conf.yaml) is used by the *API server* and *task executor*. It's the most important configuration of the system. - -## ragflow - -### host -The IP address used by the API server. - -### port -The serving port of API server. - -## mysql - -### name -The database name in mysql used by this system. - -### user -The database user name. - -### password -The database password. The modification should be synchronous updating at *MYSQL_PASSWORD* in [.env](./.env). - -### port -The serving port of mysql inside the container. The modification should be synchronous updating at [docker-compose.yml](./docker-compose.yml) - -### max_connections -The max database connection. - -### stale_timeout -The timeout duration in seconds. - -## minio - -### user -The username of minio. The modification should be synchronous updating at *MINIO_USER* in [.env](./.env). - -### password -The password of minio. The modification should be synchronous updating at *MINIO_PASSWORD* in [.env](./.env). - -### host -The serving IP and port inside the docker container. This is not updating until changing the minio part in [docker-compose.yml](./docker-compose.yml) - -## user_default_llm -Newly signed-up users use LLM configured by this part. Otherwise, user need to configure his own LLM in *setting*. - -### factory -The LLM suppliers. "OpenAI", "Tongyi-Qianwen", "ZHIPU-AI", "Moonshot", "DeepSeek", "Baichuan", and "VolcEngine" are supported. - -### api_key -The corresponding API key of your assigned LLM vendor. - -## oauth -This is OAuth configuration which allows your system using the third-party account to sign-up and sign-in to the system. - -### github -Got to [Github](https://github.com/settings/developers), register new application, the *client_id* and *secret_key* will be given. - + +# Docker Environment Variable + +Look into [.env](./.env), there're some important variables. + +## MYSQL_PASSWORD +The mysql password could be changed by this variable. But you need to change *mysql.password* in [service_conf.yaml](./service_conf.yaml) at the same time. + + +## MYSQL_PORT +It refers to exported port number of mysql docker container, it's useful if you want to access the database outside the docker containers. + +## MINIO_USER +It refers to user name of [Mino](https://github.com/minio/minio). The modification should be synchronous updating at minio.user of [service_conf.yaml](./service_conf.yaml). + +## MINIO_PASSWORD +It refers to user password of [Mino](https://github.com/minio/minio). The modification should be synchronous updating at minio.password of [service_conf.yaml](./service_conf.yaml). + + +## SVR_HTTP_PORT +It refers to The API server serving port. + + +# Service Configuration +[service_conf.yaml](./service_conf.yaml) is used by the *API server* and *task executor*. It's the most important configuration of the system. + +## ragflow + +### host +The IP address used by the API server. + +### port +The serving port of API server. + +## mysql + +### name +The database name in mysql used by this system. + +### user +The database user name. + +### password +The database password. The modification should be synchronous updating at *MYSQL_PASSWORD* in [.env](./.env). + +### port +The serving port of mysql inside the container. The modification should be synchronous updating at [docker-compose.yml](./docker-compose.yml) + +### max_connections +The max database connection. + +### stale_timeout +The timeout duration in seconds. + +## minio + +### user +The username of minio. The modification should be synchronous updating at *MINIO_USER* in [.env](./.env). + +### password +The password of minio. The modification should be synchronous updating at *MINIO_PASSWORD* in [.env](./.env). + +### host +The serving IP and port inside the docker container. This is not updating until changing the minio part in [docker-compose.yml](./docker-compose.yml) + +## user_default_llm +Newly signed-up users use LLM configured by this part. Otherwise, user need to configure his own LLM in *setting*. + +### factory +The LLM suppliers. "OpenAI", "Tongyi-Qianwen", "ZHIPU-AI", "Moonshot", "DeepSeek", "Baichuan", and "VolcEngine" are supported. + +### api_key +The corresponding API key of your assigned LLM vendor. + +## oauth +This is OAuth configuration which allows your system using the third-party account to sign-up and sign-in to the system. + +### github +Got to [Github](https://github.com/settings/developers), register new application, the *client_id* and *secret_key* will be given. + diff --git a/docker/docker-compose-gpu-CN-oc9.yml b/docker/docker-compose-gpu-CN-oc9.yml index 1950c62cd00e97bc3088cfb8fb151e6190e43772..293beb5f3e7966fc81700f9f09b1f3b4934b9cc1 100644 --- a/docker/docker-compose-gpu-CN-oc9.yml +++ b/docker/docker-compose-gpu-CN-oc9.yml @@ -1,37 +1,37 @@ -include: - - path: ./docker-compose-base.yml - env_file: ./.env - -services: - ragflow: - depends_on: - mysql: - condition: service_healthy - es01: - condition: service_healthy - image: edwardelric233/ragflow:oc9 - container_name: ragflow-server - ports: - - ${SVR_HTTP_PORT}:9380 - - 80:80 - - 443:443 - volumes: - - ./service_conf.yaml:/ragflow/conf/service_conf.yaml - - ./ragflow-logs:/ragflow/logs - - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf - - ./nginx/proxy.conf:/etc/nginx/proxy.conf - - ./nginx/nginx.conf:/etc/nginx/nginx.conf - environment: - - TZ=${TIMEZONE} - - HF_ENDPOINT=https://hf-mirror.com - - MACOS=${MACOS} - networks: - - ragflow - restart: always - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] +include: + - path: ./docker-compose-base.yml + env_file: ./.env + +services: + ragflow: + depends_on: + mysql: + condition: service_healthy + es01: + condition: service_healthy + image: edwardelric233/ragflow:oc9 + container_name: ragflow-server + ports: + - ${SVR_HTTP_PORT}:9380 + - 80:80 + - 443:443 + volumes: + - ./service_conf.yaml:/ragflow/conf/service_conf.yaml + - ./ragflow-logs:/ragflow/logs + - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf + - ./nginx/proxy.conf:/etc/nginx/proxy.conf + - ./nginx/nginx.conf:/etc/nginx/nginx.conf + environment: + - TZ=${TIMEZONE} + - HF_ENDPOINT=https://hf-mirror.com + - MACOS=${MACOS} + networks: + - ragflow + restart: always + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] diff --git a/docker/docker-compose-gpu-CN.yml b/docker/docker-compose-gpu-CN.yml index e061a6897247893aa636482e4e01f5c936f99adb..39672b05d57f73a4797133989d4f044d553c11c1 100644 --- a/docker/docker-compose-gpu-CN.yml +++ b/docker/docker-compose-gpu-CN.yml @@ -1,37 +1,37 @@ -include: - - path: ./docker-compose-base.yml - env_file: ./.env - -services: - ragflow: - depends_on: - mysql: - condition: service_healthy - es01: - condition: service_healthy - image: swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:${RAGFLOW_VERSION} - container_name: ragflow-server - ports: - - ${SVR_HTTP_PORT}:9380 - - 80:80 - - 443:443 - volumes: - - ./service_conf.yaml:/ragflow/conf/service_conf.yaml - - ./ragflow-logs:/ragflow/logs - - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf - - ./nginx/proxy.conf:/etc/nginx/proxy.conf - - ./nginx/nginx.conf:/etc/nginx/nginx.conf - environment: - - TZ=${TIMEZONE} - - HF_ENDPOINT=https://hf-mirror.com - - MACOS=${MACOS} - networks: - - ragflow - restart: always - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] +include: + - path: ./docker-compose-base.yml + env_file: ./.env + +services: + ragflow: + depends_on: + mysql: + condition: service_healthy + es01: + condition: service_healthy + image: swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:${RAGFLOW_VERSION} + container_name: ragflow-server + ports: + - ${SVR_HTTP_PORT}:9380 + - 80:80 + - 443:443 + volumes: + - ./service_conf.yaml:/ragflow/conf/service_conf.yaml + - ./ragflow-logs:/ragflow/logs + - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf + - ./nginx/proxy.conf:/etc/nginx/proxy.conf + - ./nginx/nginx.conf:/etc/nginx/nginx.conf + environment: + - TZ=${TIMEZONE} + - HF_ENDPOINT=https://hf-mirror.com + - MACOS=${MACOS} + networks: + - ragflow + restart: always + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] diff --git a/docker/init.sql b/docker/init.sql index b368583dfaa0e0810232a812f816ddd80bad22ec..7a2e0da3596a43cf33ebe0701286b47982028029 100644 --- a/docker/init.sql +++ b/docker/init.sql @@ -1,2 +1,2 @@ -CREATE DATABASE IF NOT EXISTS rag_flow; +CREATE DATABASE IF NOT EXISTS rag_flow; USE rag_flow; \ No newline at end of file diff --git a/docker/nginx/nginx.conf b/docker/nginx/nginx.conf index 8933a8db531d977ccd4da5b3be76b7d67931bfd7..c614bf5d9eed8e5569898b4300c5d6368ed5acfb 100644 --- a/docker/nginx/nginx.conf +++ b/docker/nginx/nginx.conf @@ -1,33 +1,33 @@ -user root; -worker_processes auto; - -error_log /var/log/nginx/error.log notice; -pid /var/run/nginx.pid; - - -events { - worker_connections 1024; -} - - -http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - access_log /var/log/nginx/access.log main; - - sendfile on; - #tcp_nopush on; - - keepalive_timeout 65; - - #gzip on; - client_max_body_size 128M; - - include /etc/nginx/conf.d/ragflow.conf; -} - +user root; +worker_processes auto; + +error_log /var/log/nginx/error.log notice; +pid /var/run/nginx.pid; + + +events { + worker_connections 1024; +} + + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + #tcp_nopush on; + + keepalive_timeout 65; + + #gzip on; + client_max_body_size 128M; + + include /etc/nginx/conf.d/ragflow.conf; +} + diff --git a/docker/nginx/proxy.conf b/docker/nginx/proxy.conf index 75f7087dc407e4491ec099a809efb8938bd470a6..185db8cc3ccf930d3bd7350daf37a2727adb3773 100644 --- a/docker/nginx/proxy.conf +++ b/docker/nginx/proxy.conf @@ -1,8 +1,8 @@ -proxy_set_header Host $host; -proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; -proxy_set_header X-Forwarded-Proto $scheme; -proxy_http_version 1.1; -proxy_set_header Connection ""; -proxy_buffering off; -proxy_read_timeout 3600s; -proxy_send_timeout 3600s; +proxy_set_header Host $host; +proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +proxy_set_header X-Forwarded-Proto $scheme; +proxy_http_version 1.1; +proxy_set_header Connection ""; +proxy_buffering off; +proxy_read_timeout 3600s; +proxy_send_timeout 3600s; diff --git a/docker/nginx/ragflow.conf b/docker/nginx/ragflow.conf index 453b7245233d72cc0d3e8f946e4b0794c4c4dd25..09e74221d63a9354ba3c3e2dd4b45c81a5a5bf9f 100644 --- a/docker/nginx/ragflow.conf +++ b/docker/nginx/ragflow.conf @@ -1,28 +1,28 @@ -server { - listen 80; - server_name _; - root /ragflow/web/dist; - - gzip on; - gzip_min_length 1k; - gzip_comp_level 9; - gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png; - gzip_vary on; - gzip_disable "MSIE [1-6]\."; - - location /v1 { - proxy_pass http://ragflow:9380; - include proxy.conf; - } - - location / { - index index.html; - try_files $uri $uri/ /index.html; - } - - # Cache-Control: max-age~@~AExpires - location ~ ^/static/(css|js|media)/ { - expires 10y; - access_log off; - } -} +server { + listen 80; + server_name _; + root /ragflow/web/dist; + + gzip on; + gzip_min_length 1k; + gzip_comp_level 9; + gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png; + gzip_vary on; + gzip_disable "MSIE [1-6]\."; + + location /v1 { + proxy_pass http://ragflow:9380; + include proxy.conf; + } + + location / { + index index.html; + try_files $uri $uri/ /index.html; + } + + # Cache-Control: max-age~@~AExpires + location ~ ^/static/(css|js|media)/ { + expires 10y; + access_log off; + } +} diff --git a/docker/service_conf.yaml b/docker/service_conf.yaml index 778f0f0c16d605d1e639bb81495c5af232cd5b7c..fcd565385189412c92c636ae3eac167033e84f6c 100644 --- a/docker/service_conf.yaml +++ b/docker/service_conf.yaml @@ -1,43 +1,43 @@ -ragflow: - host: 0.0.0.0 - http_port: 9380 -mysql: - name: 'rag_flow' - user: 'root' - password: 'infini_rag_flow' - host: 'mysql' - port: 3306 - max_connections: 100 - stale_timeout: 30 -minio: - user: 'rag_flow' - password: 'infini_rag_flow' - host: 'minio:9000' -es: - hosts: 'http://es01:9200' - username: 'elastic' - password: 'infini_rag_flow' -redis: - db: 1 - password: 'infini_rag_flow' - host: 'redis:6379' -user_default_llm: - factory: 'Tongyi-Qianwen' - api_key: 'sk-xxxxxxxxxxxxx' - base_url: '' -oauth: - github: - client_id: xxxxxxxxxxxxxxxxxxxxxxxxx - secret_key: xxxxxxxxxxxxxxxxxxxxxxxxxxxx - url: https://github.com/login/oauth/access_token -authentication: - client: - switch: false - http_app_key: - http_secret_key: - site: - switch: false -permission: - switch: false - component: false - dataset: false +ragflow: + host: 0.0.0.0 + http_port: 9380 +mysql: + name: 'rag_flow' + user: 'root' + password: 'infini_rag_flow' + host: 'mysql' + port: 3306 + max_connections: 100 + stale_timeout: 30 +minio: + user: 'rag_flow' + password: 'infini_rag_flow' + host: 'minio:9000' +es: + hosts: 'http://es01:9200' + username: 'elastic' + password: 'infini_rag_flow' +redis: + db: 1 + password: 'infini_rag_flow' + host: 'redis:6379' +user_default_llm: + factory: 'Tongyi-Qianwen' + api_key: 'sk-xxxxxxxxxxxxx' + base_url: '' +oauth: + github: + client_id: xxxxxxxxxxxxxxxxxxxxxxxxx + secret_key: xxxxxxxxxxxxxxxxxxxxxxxxxxxx + url: https://github.com/login/oauth/access_token +authentication: + client: + switch: false + http_app_key: + http_secret_key: + site: + switch: false +permission: + switch: false + component: false + dataset: false diff --git a/rag/app/book.py b/rag/app/book.py index d969ccf920c0bf3c50ff390e62c4d6b7edf7ff8c..e165070b488513c27e2bd27fffa6240e0969375a 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -1,159 +1,159 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import copy -from tika import parser -import re -from io import BytesIO - -from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, \ - hierarchical_merge, make_colon_as_title, naive_merge, random_choices, tokenize_table, add_positions, \ - tokenize_chunks, find_codec -from rag.nlp import rag_tokenizer -from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser - - -class Pdf(PdfParser): - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - callback(msg="OCR is running...") - self.__images__( - filename if not binary else binary, - zoomin, - from_page, - to_page, - callback) - callback(msg="OCR finished") - - from timeit import default_timer as timer - start = timer() - self._layouts_rec(zoomin) - callback(0.67, "Layout analysis finished") - print("layouts:", timer() - start) - self._table_transformer_job(zoomin) - callback(0.68, "Table analysis finished") - self._text_merge() - tbls = self._extract_table_figure(True, zoomin, True, True) - self._naive_vertical_merge() - self._filter_forpages() - self._merge_with_same_bullet() - callback(0.75, "Text merging finished.") - - callback(0.8, "Text extraction finished") - - return [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) - for b in self.boxes], tbls - - -def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", callback=None, **kwargs): - """ - Supported file formats are docx, pdf, txt. - Since a book is long and not all the parts are useful, if it's a PDF, - please setup the page ranges for every book in order eliminate negative effects and save elapsed computing time. - """ - doc = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) - } - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - pdf_parser = None - sections, tbls = [], [] - if re.search(r"\.docx$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - doc_parser = DocxParser() - # TODO: table of contents need to be removed - sections, tbls = doc_parser( - binary if binary else filename, from_page=from_page, to_page=to_page) - remove_contents_table(sections, eng=is_english( - random_choices([t for t, _ in sections], k=200))) - tbls = [((None, lns), None) for lns in tbls] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.pdf$", filename, re.IGNORECASE): - pdf_parser = Pdf() if kwargs.get( - "parser_config", {}).get( - "layout_recognize", True) else PlainParser() - sections, tbls = pdf_parser(filename if not binary else binary, - from_page=from_page, to_page=to_page, callback=callback) - - elif re.search(r"\.txt$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - txt = "" - if binary: - encoding = find_codec(binary) - txt = binary.decode(encoding, errors="ignore") - else: - with open(filename, "r") as f: - while True: - l = f.readline() - if not l: - break - txt += l - sections = txt.split("\n") - sections = [(l, "") for l in sections if l] - remove_contents_table(sections, eng=is_english( - random_choices([t for t, _ in sections], k=200))) - callback(0.8, "Finish parsing.") - - elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections = HtmlParser()(filename, binary) - sections = [(l, "") for l in sections if l] - remove_contents_table(sections, eng=is_english( - random_choices([t for t, _ in sections], k=200))) - callback(0.8, "Finish parsing.") - - elif re.search(r"\.doc$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - binary = BytesIO(binary) - doc_parsed = parser.from_buffer(binary) - sections = doc_parsed['content'].split('\n') - sections = [(l, "") for l in sections if l] - remove_contents_table(sections, eng=is_english( - random_choices([t for t, _ in sections], k=200))) - callback(0.8, "Finish parsing.") - - else: - raise NotImplementedError( - "file type not supported yet(doc, docx, pdf, txt supported)") - - make_colon_as_title(sections) - bull = bullets_category( - [t for t in random_choices([t for t, _ in sections], k=100)]) - if bull >= 0: - chunks = ["\n".join(ck) - for ck in hierarchical_merge(bull, sections, 5)] - else: - sections = [s.split("@") for s, _ in sections] - sections = [(pr[0], "@" + pr[1]) if len(pr) == 2 else (pr[0], '') for pr in sections ] - chunks = naive_merge( - sections, kwargs.get( - "chunk_token_num", 256), kwargs.get( - "delimer", "\n。;!?")) - - # is it English - # is_english(random_choices([t for t, _ in sections], k=218)) - eng = lang.lower() == "english" - - res = tokenize_table(tbls, doc, eng) - res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) - - return res - - -if __name__ == "__main__": - import sys - - def dummy(prog=None, msg=""): - pass - chunk(sys.argv[1], from_page=1, to_page=10, callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import copy +from tika import parser +import re +from io import BytesIO + +from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, \ + hierarchical_merge, make_colon_as_title, naive_merge, random_choices, tokenize_table, add_positions, \ + tokenize_chunks, find_codec +from rag.nlp import rag_tokenizer +from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser + + +class Pdf(PdfParser): + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + callback(msg="OCR is running...") + self.__images__( + filename if not binary else binary, + zoomin, + from_page, + to_page, + callback) + callback(msg="OCR finished") + + from timeit import default_timer as timer + start = timer() + self._layouts_rec(zoomin) + callback(0.67, "Layout analysis finished") + print("layouts:", timer() - start) + self._table_transformer_job(zoomin) + callback(0.68, "Table analysis finished") + self._text_merge() + tbls = self._extract_table_figure(True, zoomin, True, True) + self._naive_vertical_merge() + self._filter_forpages() + self._merge_with_same_bullet() + callback(0.75, "Text merging finished.") + + callback(0.8, "Text extraction finished") + + return [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) + for b in self.boxes], tbls + + +def chunk(filename, binary=None, from_page=0, to_page=100000, + lang="Chinese", callback=None, **kwargs): + """ + Supported file formats are docx, pdf, txt. + Since a book is long and not all the parts are useful, if it's a PDF, + please setup the page ranges for every book in order eliminate negative effects and save elapsed computing time. + """ + doc = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) + } + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + pdf_parser = None + sections, tbls = [], [] + if re.search(r"\.docx$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + doc_parser = DocxParser() + # TODO: table of contents need to be removed + sections, tbls = doc_parser( + binary if binary else filename, from_page=from_page, to_page=to_page) + remove_contents_table(sections, eng=is_english( + random_choices([t for t, _ in sections], k=200))) + tbls = [((None, lns), None) for lns in tbls] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.pdf$", filename, re.IGNORECASE): + pdf_parser = Pdf() if kwargs.get( + "parser_config", {}).get( + "layout_recognize", True) else PlainParser() + sections, tbls = pdf_parser(filename if not binary else binary, + from_page=from_page, to_page=to_page, callback=callback) + + elif re.search(r"\.txt$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + txt = "" + if binary: + encoding = find_codec(binary) + txt = binary.decode(encoding, errors="ignore") + else: + with open(filename, "r") as f: + while True: + l = f.readline() + if not l: + break + txt += l + sections = txt.split("\n") + sections = [(l, "") for l in sections if l] + remove_contents_table(sections, eng=is_english( + random_choices([t for t, _ in sections], k=200))) + callback(0.8, "Finish parsing.") + + elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections = HtmlParser()(filename, binary) + sections = [(l, "") for l in sections if l] + remove_contents_table(sections, eng=is_english( + random_choices([t for t, _ in sections], k=200))) + callback(0.8, "Finish parsing.") + + elif re.search(r"\.doc$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + binary = BytesIO(binary) + doc_parsed = parser.from_buffer(binary) + sections = doc_parsed['content'].split('\n') + sections = [(l, "") for l in sections if l] + remove_contents_table(sections, eng=is_english( + random_choices([t for t, _ in sections], k=200))) + callback(0.8, "Finish parsing.") + + else: + raise NotImplementedError( + "file type not supported yet(doc, docx, pdf, txt supported)") + + make_colon_as_title(sections) + bull = bullets_category( + [t for t in random_choices([t for t, _ in sections], k=100)]) + if bull >= 0: + chunks = ["\n".join(ck) + for ck in hierarchical_merge(bull, sections, 5)] + else: + sections = [s.split("@") for s, _ in sections] + sections = [(pr[0], "@" + pr[1]) if len(pr) == 2 else (pr[0], '') for pr in sections ] + chunks = naive_merge( + sections, kwargs.get( + "chunk_token_num", 256), kwargs.get( + "delimer", "\n。;!?")) + + # is it English + # is_english(random_choices([t for t, _ in sections], k=218)) + eng = lang.lower() == "english" + + res = tokenize_table(tbls, doc, eng) + res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) + + return res + + +if __name__ == "__main__": + import sys + + def dummy(prog=None, msg=""): + pass + chunk(sys.argv[1], from_page=1, to_page=10, callback=dummy) diff --git a/rag/app/laws.py b/rag/app/laws.py index 3465d5938d800c61a693ac3af4d6acbeb26b76c0..9d6d5c73c29c065856efbfad8e1625c9360bf4a0 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -1,220 +1,220 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import copy -from tika import parser -import re -from io import BytesIO -from docx import Document - -from api.db import ParserType -from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \ - make_colon_as_title, add_positions, tokenize_chunks, find_codec, docx_question_level -from rag.nlp import rag_tokenizer -from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser -from rag.settings import cron_logger - - -class Docx(DocxParser): - def __init__(self): - pass - - def __clean(self, line): - line = re.sub(r"\u3000", " ", line).strip() - return line - - def old_call(self, filename, binary=None, from_page=0, to_page=100000): - self.doc = Document( - filename) if not binary else Document(BytesIO(binary)) - pn = 0 - lines = [] - for p in self.doc.paragraphs: - if pn > to_page: - break - if from_page <= pn < to_page and p.text.strip(): - lines.append(self.__clean(p.text)) - for run in p.runs: - if 'lastRenderedPageBreak' in run._element.xml: - pn += 1 - continue - if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: - pn += 1 - return [l for l in lines if l] - - def __call__(self, filename, binary=None, from_page=0, to_page=100000): - self.doc = Document( - filename) if not binary else Document(BytesIO(binary)) - pn = 0 - lines = [] - bull = bullets_category([p.text for p in self.doc.paragraphs]) - for p in self.doc.paragraphs: - if pn > to_page: - break - question_level, p_text = docx_question_level(p, bull) - if not p_text.strip("\n"):continue - lines.append((question_level, p_text)) - - for run in p.runs: - if 'lastRenderedPageBreak' in run._element.xml: - pn += 1 - continue - if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: - pn += 1 - - visit = [False for _ in range(len(lines))] - sections = [] - for s in range(len(lines)): - e = s + 1 - while e < len(lines): - if lines[e][0] <= lines[s][0]: - break - e += 1 - if e - s == 1 and visit[s]: continue - sec = [] - next_level = lines[s][0] + 1 - while not sec and next_level < 22: - for i in range(s+1, e): - if lines[i][0] != next_level: continue - sec.append(lines[i][1]) - visit[i] = True - next_level += 1 - sec.insert(0, lines[s][1]) - - sections.append("\n".join(sec)) - return [l for l in sections if l] - - def __str__(self) -> str: - return f''' - question:{self.question}, - answer:{self.answer}, - level:{self.level}, - childs:{self.childs} - ''' - - -class Pdf(PdfParser): - def __init__(self): - self.model_speciess = ParserType.LAWS.value - super().__init__() - - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - callback(msg="OCR is running...") - self.__images__( - filename if not binary else binary, - zoomin, - from_page, - to_page, - callback - ) - callback(msg="OCR finished") - - from timeit import default_timer as timer - start = timer() - self._layouts_rec(zoomin) - callback(0.67, "Layout analysis finished") - cron_logger.info("layouts:".format( - (timer() - start) / (self.total_page + 0.1))) - self._naive_vertical_merge() - - callback(0.8, "Text extraction finished") - - return [(b["text"], self._line_tag(b, zoomin)) - for b in self.boxes], None - - -def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", callback=None, **kwargs): - """ - Supported file formats are docx, pdf, txt. - """ - doc = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) - } - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - pdf_parser = None - sections = [] - # is it English - eng = lang.lower() == "english" # is_english(sections) - - if re.search(r"\.docx$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - for txt in Docx()(filename, binary): - sections.append(txt) - callback(0.8, "Finish parsing.") - chunks = sections - return tokenize_chunks(chunks, doc, eng, pdf_parser) - - elif re.search(r"\.pdf$", filename, re.IGNORECASE): - pdf_parser = Pdf() if kwargs.get( - "parser_config", {}).get( - "layout_recognize", True) else PlainParser() - for txt, poss in pdf_parser(filename if not binary else binary, - from_page=from_page, to_page=to_page, callback=callback)[0]: - sections.append(txt + poss) - - elif re.search(r"\.txt$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - txt = "" - if binary: - encoding = find_codec(binary) - txt = binary.decode(encoding, errors="ignore") - else: - with open(filename, "r") as f: - while True: - l = f.readline() - if not l: - break - txt += l - sections = txt.split("\n") - sections = [l for l in sections if l] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections = HtmlParser()(filename, binary) - sections = [l for l in sections if l] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.doc$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - binary = BytesIO(binary) - doc_parsed = parser.from_buffer(binary) - sections = doc_parsed['content'].split('\n') - sections = [l for l in sections if l] - callback(0.8, "Finish parsing.") - - else: - raise NotImplementedError( - "file type not supported yet(doc, docx, pdf, txt supported)") - - - # Remove 'Contents' part - remove_contents_table(sections, eng) - - make_colon_as_title(sections) - bull = bullets_category(sections) - chunks = hierarchical_merge(bull, sections, 5) - if not chunks: - callback(0.99, "No chunk parsed out.") - - return tokenize_chunks(["\n".join(ck) - for ck in chunks], doc, eng, pdf_parser) - - -if __name__ == "__main__": - import sys - - def dummy(prog=None, msg=""): - pass - chunk(sys.argv[1], callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import copy +from tika import parser +import re +from io import BytesIO +from docx import Document + +from api.db import ParserType +from rag.nlp import bullets_category, is_english, tokenize, remove_contents_table, hierarchical_merge, \ + make_colon_as_title, add_positions, tokenize_chunks, find_codec, docx_question_level +from rag.nlp import rag_tokenizer +from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser +from rag.settings import cron_logger + + +class Docx(DocxParser): + def __init__(self): + pass + + def __clean(self, line): + line = re.sub(r"\u3000", " ", line).strip() + return line + + def old_call(self, filename, binary=None, from_page=0, to_page=100000): + self.doc = Document( + filename) if not binary else Document(BytesIO(binary)) + pn = 0 + lines = [] + for p in self.doc.paragraphs: + if pn > to_page: + break + if from_page <= pn < to_page and p.text.strip(): + lines.append(self.__clean(p.text)) + for run in p.runs: + if 'lastRenderedPageBreak' in run._element.xml: + pn += 1 + continue + if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: + pn += 1 + return [l for l in lines if l] + + def __call__(self, filename, binary=None, from_page=0, to_page=100000): + self.doc = Document( + filename) if not binary else Document(BytesIO(binary)) + pn = 0 + lines = [] + bull = bullets_category([p.text for p in self.doc.paragraphs]) + for p in self.doc.paragraphs: + if pn > to_page: + break + question_level, p_text = docx_question_level(p, bull) + if not p_text.strip("\n"):continue + lines.append((question_level, p_text)) + + for run in p.runs: + if 'lastRenderedPageBreak' in run._element.xml: + pn += 1 + continue + if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: + pn += 1 + + visit = [False for _ in range(len(lines))] + sections = [] + for s in range(len(lines)): + e = s + 1 + while e < len(lines): + if lines[e][0] <= lines[s][0]: + break + e += 1 + if e - s == 1 and visit[s]: continue + sec = [] + next_level = lines[s][0] + 1 + while not sec and next_level < 22: + for i in range(s+1, e): + if lines[i][0] != next_level: continue + sec.append(lines[i][1]) + visit[i] = True + next_level += 1 + sec.insert(0, lines[s][1]) + + sections.append("\n".join(sec)) + return [l for l in sections if l] + + def __str__(self) -> str: + return f''' + question:{self.question}, + answer:{self.answer}, + level:{self.level}, + childs:{self.childs} + ''' + + +class Pdf(PdfParser): + def __init__(self): + self.model_speciess = ParserType.LAWS.value + super().__init__() + + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + callback(msg="OCR is running...") + self.__images__( + filename if not binary else binary, + zoomin, + from_page, + to_page, + callback + ) + callback(msg="OCR finished") + + from timeit import default_timer as timer + start = timer() + self._layouts_rec(zoomin) + callback(0.67, "Layout analysis finished") + cron_logger.info("layouts:".format( + (timer() - start) / (self.total_page + 0.1))) + self._naive_vertical_merge() + + callback(0.8, "Text extraction finished") + + return [(b["text"], self._line_tag(b, zoomin)) + for b in self.boxes], None + + +def chunk(filename, binary=None, from_page=0, to_page=100000, + lang="Chinese", callback=None, **kwargs): + """ + Supported file formats are docx, pdf, txt. + """ + doc = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) + } + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + pdf_parser = None + sections = [] + # is it English + eng = lang.lower() == "english" # is_english(sections) + + if re.search(r"\.docx$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + for txt in Docx()(filename, binary): + sections.append(txt) + callback(0.8, "Finish parsing.") + chunks = sections + return tokenize_chunks(chunks, doc, eng, pdf_parser) + + elif re.search(r"\.pdf$", filename, re.IGNORECASE): + pdf_parser = Pdf() if kwargs.get( + "parser_config", {}).get( + "layout_recognize", True) else PlainParser() + for txt, poss in pdf_parser(filename if not binary else binary, + from_page=from_page, to_page=to_page, callback=callback)[0]: + sections.append(txt + poss) + + elif re.search(r"\.txt$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + txt = "" + if binary: + encoding = find_codec(binary) + txt = binary.decode(encoding, errors="ignore") + else: + with open(filename, "r") as f: + while True: + l = f.readline() + if not l: + break + txt += l + sections = txt.split("\n") + sections = [l for l in sections if l] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections = HtmlParser()(filename, binary) + sections = [l for l in sections if l] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.doc$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + binary = BytesIO(binary) + doc_parsed = parser.from_buffer(binary) + sections = doc_parsed['content'].split('\n') + sections = [l for l in sections if l] + callback(0.8, "Finish parsing.") + + else: + raise NotImplementedError( + "file type not supported yet(doc, docx, pdf, txt supported)") + + + # Remove 'Contents' part + remove_contents_table(sections, eng) + + make_colon_as_title(sections) + bull = bullets_category(sections) + chunks = hierarchical_merge(bull, sections, 5) + if not chunks: + callback(0.99, "No chunk parsed out.") + + return tokenize_chunks(["\n".join(ck) + for ck in chunks], doc, eng, pdf_parser) + + +if __name__ == "__main__": + import sys + + def dummy(prog=None, msg=""): + pass + chunk(sys.argv[1], callback=dummy) diff --git a/rag/app/manual.py b/rag/app/manual.py index f3a5fa41e9ae57e3e8ca309eb32fac0f6303d8ef..29c7cd7f6e5300ae5771d4228a7ff74239181476 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -1,272 +1,272 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import copy -import re - -from api.db import ParserType -from io import BytesIO -from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks, docx_question_level -from deepdoc.parser import PdfParser, PlainParser -from rag.utils import num_tokens_from_string -from deepdoc.parser import PdfParser, ExcelParser, DocxParser -from docx import Document -from PIL import Image - -class Pdf(PdfParser): - def __init__(self): - self.model_speciess = ParserType.MANUAL.value - super().__init__() - - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - from timeit import default_timer as timer - start = timer() - callback(msg="OCR is running...") - self.__images__( - filename if not binary else binary, - zoomin, - from_page, - to_page, - callback - ) - callback(msg="OCR finished.") - # for bb in self.boxes: - # for b in bb: - # print(b) - print("OCR:", timer() - start) - - self._layouts_rec(zoomin) - callback(0.65, "Layout analysis finished.") - print("layouts:", timer() - start) - self._table_transformer_job(zoomin) - callback(0.67, "Table analysis finished.") - self._text_merge() - tbls = self._extract_table_figure(True, zoomin, True, True) - self._concat_downward() - self._filter_forpages() - callback(0.68, "Text merging finished") - - # clean mess - for b in self.boxes: - b["text"] = re.sub(r"([\t  ]|\u3000){2,}", " ", b["text"].strip()) - - return [(b["text"], b.get("layout_no", ""), self.get_position(b, zoomin)) - for i, b in enumerate(self.boxes)], tbls - -class Docx(DocxParser): - def __init__(self): - pass - def get_picture(self, document, paragraph): - img = paragraph._element.xpath('.//pic:pic') - if not img: - return None - img = img[0] - embed = img.xpath('.//a:blip/@r:embed')[0] - related_part = document.part.related_parts[embed] - image = related_part.image - image = Image.open(BytesIO(image.blob)) - return image - def concat_img(self, img1, img2): - if img1 and not img2: - return img1 - if not img1 and img2: - return img2 - if not img1 and not img2: - return None - width1, height1 = img1.size - width2, height2 = img2.size - - new_width = max(width1, width2) - new_height = height1 + height2 - new_image = Image.new('RGB', (new_width, new_height)) - - new_image.paste(img1, (0, 0)) - new_image.paste(img2, (0, height1)) - - return new_image - - def __call__(self, filename, binary=None, from_page=0, to_page=100000, callback=None): - self.doc = Document( - filename) if not binary else Document(BytesIO(binary)) - pn = 0 - last_answer, last_image = "", None - question_stack, level_stack = [], [] - ti_list = [] - for p in self.doc.paragraphs: - if pn > to_page: - break - question_level, p_text = 0, '' - if from_page <= pn < to_page and p.text.strip(): - question_level, p_text = docx_question_level(p) - if not question_level or question_level > 6: # not a question - last_answer = f'{last_answer}\n{p_text}' - current_image = self.get_picture(self.doc, p) - last_image = self.concat_img(last_image, current_image) - else: # is a question - if last_answer or last_image: - sum_question = '\n'.join(question_stack) - if sum_question: - ti_list.append((f'{sum_question}\n{last_answer}', last_image)) - last_answer, last_image = '', None - - i = question_level - while question_stack and i <= level_stack[-1]: - question_stack.pop() - level_stack.pop() - question_stack.append(p_text) - level_stack.append(question_level) - for run in p.runs: - if 'lastRenderedPageBreak' in run._element.xml: - pn += 1 - continue - if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: - pn += 1 - if last_answer: - sum_question = '\n'.join(question_stack) - if sum_question: - ti_list.append((f'{sum_question}\n{last_answer}', last_image)) - - tbls = [] - for tb in self.doc.tables: - html= "" - for r in tb.rows: - html += "" - i = 0 - while i < len(r.cells): - span = 1 - c = r.cells[i] - for j in range(i+1, len(r.cells)): - if c.text == r.cells[j].text: - span += 1 - i = j - i += 1 - html += f"" if span == 1 else f"" - html += "" - html += "
{c.text}{c.text}
" - tbls.append(((None, html), "")) - return ti_list, tbls - -def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", callback=None, **kwargs): - """ - Only pdf is supported. - """ - pdf_parser = None - doc = { - "docnm_kwd": filename - } - doc["title_tks"] = rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", doc["docnm_kwd"])) - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - # is it English - eng = lang.lower() == "english" # pdf_parser.is_english - if re.search(r"\.pdf$", filename, re.IGNORECASE): - pdf_parser = Pdf() if kwargs.get( - "parser_config", {}).get( - "layout_recognize", True) else PlainParser() - sections, tbls = pdf_parser(filename if not binary else binary, - from_page=from_page, to_page=to_page, callback=callback) - if sections and len(sections[0]) < 3: - sections = [(t, l, [[0] * 5]) for t, l in sections] - # set pivot using the most frequent type of title, - # then merge between 2 pivot - if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.1: - max_lvl = max([lvl for _, lvl in pdf_parser.outlines]) - most_level = max(0, max_lvl - 1) - levels = [] - for txt, _, _ in sections: - for t, lvl in pdf_parser.outlines: - tks = set([t[i] + t[i + 1] for i in range(len(t) - 1)]) - tks_ = set([txt[i] + txt[i + 1] - for i in range(min(len(t), len(txt) - 1))]) - if len(set(tks & tks_)) / max([len(tks), len(tks_), 1]) > 0.8: - levels.append(lvl) - break - else: - levels.append(max_lvl + 1) - - else: - bull = bullets_category([txt for txt, _, _ in sections]) - most_level, levels = title_frequency( - bull, [(txt, l) for txt, l, poss in sections]) - - assert len(sections) == len(levels) - sec_ids = [] - sid = 0 - for i, lvl in enumerate(levels): - if lvl <= most_level and i > 0 and lvl != levels[i - 1]: - sid += 1 - sec_ids.append(sid) - # print(lvl, self.boxes[i]["text"], most_level, sid) - - sections = [(txt, sec_ids[i], poss) - for i, (txt, _, poss) in enumerate(sections)] - for (img, rows), poss in tbls: - if not rows: continue - sections.append((rows if isinstance(rows, str) else rows[0], -1, - [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) - - def tag(pn, left, right, top, bottom): - if pn + left + right + top + bottom == 0: - return "" - return "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \ - .format(pn, left, right, top, bottom) - - chunks = [] - last_sid = -2 - tk_cnt = 0 - for txt, sec_id, poss in sorted(sections, key=lambda x: ( - x[-1][0][0], x[-1][0][3], x[-1][0][1])): - poss = "\t".join([tag(*pos) for pos in poss]) - if tk_cnt < 32 or (tk_cnt < 1024 and (sec_id == last_sid or sec_id == -1)): - if chunks: - chunks[-1] += "\n" + txt + poss - tk_cnt += num_tokens_from_string(txt) - continue - chunks.append(txt + poss) - tk_cnt = num_tokens_from_string(txt) - if sec_id > -1: - last_sid = sec_id - - res = tokenize_table(tbls, doc, eng) - res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) - return res - if re.search(r"\.docx$", filename, re.IGNORECASE): - docx_parser = Docx() - ti_list, tbls = docx_parser(filename, binary, - from_page=0, to_page=10000, callback=callback) - res = tokenize_table(tbls, doc, eng) - for text, image in ti_list: - d = copy.deepcopy(doc) - d['image'] = image - tokenize(d, text, eng) - res.append(d) - return res - else: - raise NotImplementedError("file type not supported yet(pdf and docx supported)") - - - - -if __name__ == "__main__": - import sys - - - def dummy(prog=None, msg=""): - pass - - +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import copy +import re + +from api.db import ParserType +from io import BytesIO +from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks, docx_question_level +from deepdoc.parser import PdfParser, PlainParser +from rag.utils import num_tokens_from_string +from deepdoc.parser import PdfParser, ExcelParser, DocxParser +from docx import Document +from PIL import Image + +class Pdf(PdfParser): + def __init__(self): + self.model_speciess = ParserType.MANUAL.value + super().__init__() + + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + from timeit import default_timer as timer + start = timer() + callback(msg="OCR is running...") + self.__images__( + filename if not binary else binary, + zoomin, + from_page, + to_page, + callback + ) + callback(msg="OCR finished.") + # for bb in self.boxes: + # for b in bb: + # print(b) + print("OCR:", timer() - start) + + self._layouts_rec(zoomin) + callback(0.65, "Layout analysis finished.") + print("layouts:", timer() - start) + self._table_transformer_job(zoomin) + callback(0.67, "Table analysis finished.") + self._text_merge() + tbls = self._extract_table_figure(True, zoomin, True, True) + self._concat_downward() + self._filter_forpages() + callback(0.68, "Text merging finished") + + # clean mess + for b in self.boxes: + b["text"] = re.sub(r"([\t  ]|\u3000){2,}", " ", b["text"].strip()) + + return [(b["text"], b.get("layout_no", ""), self.get_position(b, zoomin)) + for i, b in enumerate(self.boxes)], tbls + +class Docx(DocxParser): + def __init__(self): + pass + def get_picture(self, document, paragraph): + img = paragraph._element.xpath('.//pic:pic') + if not img: + return None + img = img[0] + embed = img.xpath('.//a:blip/@r:embed')[0] + related_part = document.part.related_parts[embed] + image = related_part.image + image = Image.open(BytesIO(image.blob)) + return image + def concat_img(self, img1, img2): + if img1 and not img2: + return img1 + if not img1 and img2: + return img2 + if not img1 and not img2: + return None + width1, height1 = img1.size + width2, height2 = img2.size + + new_width = max(width1, width2) + new_height = height1 + height2 + new_image = Image.new('RGB', (new_width, new_height)) + + new_image.paste(img1, (0, 0)) + new_image.paste(img2, (0, height1)) + + return new_image + + def __call__(self, filename, binary=None, from_page=0, to_page=100000, callback=None): + self.doc = Document( + filename) if not binary else Document(BytesIO(binary)) + pn = 0 + last_answer, last_image = "", None + question_stack, level_stack = [], [] + ti_list = [] + for p in self.doc.paragraphs: + if pn > to_page: + break + question_level, p_text = 0, '' + if from_page <= pn < to_page and p.text.strip(): + question_level, p_text = docx_question_level(p) + if not question_level or question_level > 6: # not a question + last_answer = f'{last_answer}\n{p_text}' + current_image = self.get_picture(self.doc, p) + last_image = self.concat_img(last_image, current_image) + else: # is a question + if last_answer or last_image: + sum_question = '\n'.join(question_stack) + if sum_question: + ti_list.append((f'{sum_question}\n{last_answer}', last_image)) + last_answer, last_image = '', None + + i = question_level + while question_stack and i <= level_stack[-1]: + question_stack.pop() + level_stack.pop() + question_stack.append(p_text) + level_stack.append(question_level) + for run in p.runs: + if 'lastRenderedPageBreak' in run._element.xml: + pn += 1 + continue + if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: + pn += 1 + if last_answer: + sum_question = '\n'.join(question_stack) + if sum_question: + ti_list.append((f'{sum_question}\n{last_answer}', last_image)) + + tbls = [] + for tb in self.doc.tables: + html= "" + for r in tb.rows: + html += "" + i = 0 + while i < len(r.cells): + span = 1 + c = r.cells[i] + for j in range(i+1, len(r.cells)): + if c.text == r.cells[j].text: + span += 1 + i = j + i += 1 + html += f"" if span == 1 else f"" + html += "" + html += "
{c.text}{c.text}
" + tbls.append(((None, html), "")) + return ti_list, tbls + +def chunk(filename, binary=None, from_page=0, to_page=100000, + lang="Chinese", callback=None, **kwargs): + """ + Only pdf is supported. + """ + pdf_parser = None + doc = { + "docnm_kwd": filename + } + doc["title_tks"] = rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", doc["docnm_kwd"])) + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + # is it English + eng = lang.lower() == "english" # pdf_parser.is_english + if re.search(r"\.pdf$", filename, re.IGNORECASE): + pdf_parser = Pdf() if kwargs.get( + "parser_config", {}).get( + "layout_recognize", True) else PlainParser() + sections, tbls = pdf_parser(filename if not binary else binary, + from_page=from_page, to_page=to_page, callback=callback) + if sections and len(sections[0]) < 3: + sections = [(t, l, [[0] * 5]) for t, l in sections] + # set pivot using the most frequent type of title, + # then merge between 2 pivot + if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.1: + max_lvl = max([lvl for _, lvl in pdf_parser.outlines]) + most_level = max(0, max_lvl - 1) + levels = [] + for txt, _, _ in sections: + for t, lvl in pdf_parser.outlines: + tks = set([t[i] + t[i + 1] for i in range(len(t) - 1)]) + tks_ = set([txt[i] + txt[i + 1] + for i in range(min(len(t), len(txt) - 1))]) + if len(set(tks & tks_)) / max([len(tks), len(tks_), 1]) > 0.8: + levels.append(lvl) + break + else: + levels.append(max_lvl + 1) + + else: + bull = bullets_category([txt for txt, _, _ in sections]) + most_level, levels = title_frequency( + bull, [(txt, l) for txt, l, poss in sections]) + + assert len(sections) == len(levels) + sec_ids = [] + sid = 0 + for i, lvl in enumerate(levels): + if lvl <= most_level and i > 0 and lvl != levels[i - 1]: + sid += 1 + sec_ids.append(sid) + # print(lvl, self.boxes[i]["text"], most_level, sid) + + sections = [(txt, sec_ids[i], poss) + for i, (txt, _, poss) in enumerate(sections)] + for (img, rows), poss in tbls: + if not rows: continue + sections.append((rows if isinstance(rows, str) else rows[0], -1, + [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) + + def tag(pn, left, right, top, bottom): + if pn + left + right + top + bottom == 0: + return "" + return "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \ + .format(pn, left, right, top, bottom) + + chunks = [] + last_sid = -2 + tk_cnt = 0 + for txt, sec_id, poss in sorted(sections, key=lambda x: ( + x[-1][0][0], x[-1][0][3], x[-1][0][1])): + poss = "\t".join([tag(*pos) for pos in poss]) + if tk_cnt < 32 or (tk_cnt < 1024 and (sec_id == last_sid or sec_id == -1)): + if chunks: + chunks[-1] += "\n" + txt + poss + tk_cnt += num_tokens_from_string(txt) + continue + chunks.append(txt + poss) + tk_cnt = num_tokens_from_string(txt) + if sec_id > -1: + last_sid = sec_id + + res = tokenize_table(tbls, doc, eng) + res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) + return res + if re.search(r"\.docx$", filename, re.IGNORECASE): + docx_parser = Docx() + ti_list, tbls = docx_parser(filename, binary, + from_page=0, to_page=10000, callback=callback) + res = tokenize_table(tbls, doc, eng) + for text, image in ti_list: + d = copy.deepcopy(doc) + d['image'] = image + tokenize(d, text, eng) + res.append(d) + return res + else: + raise NotImplementedError("file type not supported yet(pdf and docx supported)") + + + + +if __name__ == "__main__": + import sys + + + def dummy(prog=None, msg=""): + pass + + chunk(sys.argv[1], callback=dummy) \ No newline at end of file diff --git a/rag/app/naive.py b/rag/app/naive.py index 73d92523d181778c049c39c3d79512a36fea928b..54bfd77c34051c4c8356e5a059d42bbed91e1b7b 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -1,282 +1,282 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from tika import parser -from io import BytesIO -from docx import Document -from timeit import default_timer as timer -import re -from deepdoc.parser.pdf_parser import PlainParser -from rag.nlp import rag_tokenizer, naive_merge, tokenize_table, tokenize_chunks, find_codec, concat_img, naive_merge_docx, tokenize_chunks_docx -from deepdoc.parser import PdfParser, ExcelParser, DocxParser, HtmlParser, JsonParser, MarkdownParser, TxtParser -from rag.settings import cron_logger -from rag.utils import num_tokens_from_string -from PIL import Image -from functools import reduce -from markdown import markdown -from docx.image.exceptions import UnrecognizedImageError - -class Docx(DocxParser): - def __init__(self): - pass - - def get_picture(self, document, paragraph): - img = paragraph._element.xpath('.//pic:pic') - if not img: - return None - img = img[0] - embed = img.xpath('.//a:blip/@r:embed')[0] - related_part = document.part.related_parts[embed] - try: - image_blob = related_part.image.blob - except UnrecognizedImageError: - print("Unrecognized image format. Skipping image.") - return None - try: - image = Image.open(BytesIO(image_blob)).convert('RGB') - return image - except Exception as e: - return None - - def __clean(self, line): - line = re.sub(r"\u3000", " ", line).strip() - return line - - def __call__(self, filename, binary=None, from_page=0, to_page=100000): - self.doc = Document( - filename) if not binary else Document(BytesIO(binary)) - pn = 0 - lines = [] - last_image = None - for p in self.doc.paragraphs: - if pn > to_page: - break - if from_page <= pn < to_page: - if p.text.strip(): - if p.style and p.style.name == 'Caption': - former_image = None - if lines and lines[-1][1] and lines[-1][2] != 'Caption': - former_image = lines[-1][1].pop() - elif last_image: - former_image = last_image - last_image = None - lines.append((self.__clean(p.text), [former_image], p.style.name)) - else: - current_image = self.get_picture(self.doc, p) - image_list = [current_image] - if last_image: - image_list.insert(0, last_image) - last_image = None - lines.append((self.__clean(p.text), image_list, p.style.name)) - else: - if current_image := self.get_picture(self.doc, p): - if lines: - lines[-1][1].append(current_image) - else: - last_image = current_image - for run in p.runs: - if 'lastRenderedPageBreak' in run._element.xml: - pn += 1 - continue - if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: - pn += 1 - new_line = [(line[0], reduce(concat_img, line[1]) if line[1] else None) for line in lines] - - tbls = [] - for tb in self.doc.tables: - html= "" - for r in tb.rows: - html += "" - i = 0 - while i < len(r.cells): - span = 1 - c = r.cells[i] - for j in range(i+1, len(r.cells)): - if c.text == r.cells[j].text: - span += 1 - i = j - i += 1 - html += f"" if span == 1 else f"" - html += "" - html += "
{c.text}{c.text}
" - tbls.append(((None, html), "")) - return new_line, tbls - - -class Pdf(PdfParser): - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - start = timer() - callback(msg="OCR is running...") - self.__images__( - filename if not binary else binary, - zoomin, - from_page, - to_page, - callback - ) - callback(msg="OCR finished") - cron_logger.info("OCR({}~{}): {}".format(from_page, to_page, timer() - start)) - - start = timer() - self._layouts_rec(zoomin) - callback(0.63, "Layout analysis finished.") - self._table_transformer_job(zoomin) - callback(0.65, "Table analysis finished.") - self._text_merge() - callback(0.67, "Text merging finished") - tbls = self._extract_table_figure(True, zoomin, True, True) - #self._naive_vertical_merge() - self._concat_downward() - #self._filter_forpages() - - cron_logger.info("layouts: {}".format(timer() - start)) - return [(b["text"], self._line_tag(b, zoomin)) - for b in self.boxes], tbls - - -class Markdown(MarkdownParser): - def __call__(self, filename, binary=None): - txt = "" - tbls = [] - if binary: - encoding = find_codec(binary) - txt = binary.decode(encoding, errors="ignore") - else: - with open(filename, "r") as f: - txt = f.read() - remainder, tables = self.extract_tables_and_remainder(f'{txt}\n') - sections = [] - tbls = [] - for sec in remainder.split("\n"): - if num_tokens_from_string(sec) > 10 * self.chunk_token_num: - sections.append((sec[:int(len(sec)/2)], "")) - sections.append((sec[int(len(sec)/2):], "")) - else: - sections.append((sec, "")) - print(tables) - for table in tables: - tbls.append(((None, markdown(table, extensions=['markdown.extensions.tables'])), "")) - return sections, tbls - - - -def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", callback=None, **kwargs): - """ - Supported file formats are docx, pdf, excel, txt. - This method apply the naive ways to chunk files. - Successive text will be sliced into pieces using 'delimiter'. - Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'. - """ - - eng = lang.lower() == "english" # is_english(cks) - parser_config = kwargs.get( - "parser_config", { - "chunk_token_num": 128, "delimiter": "\n!?。;!?", "layout_recognize": True}) - doc = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) - } - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - res = [] - pdf_parser = None - sections = [] - if re.search(r"\.docx$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections, tbls = Docx()(filename, binary) - res = tokenize_table(tbls, doc, eng) # just for table - - callback(0.8, "Finish parsing.") - st = timer() - - chunks, images = naive_merge_docx( - sections, int(parser_config.get( - "chunk_token_num", 128)), parser_config.get( - "delimiter", "\n!?。;!?")) - - if kwargs.get("section_only", False): - return chunks - - res.extend(tokenize_chunks_docx(chunks, doc, eng, images)) - cron_logger.info("naive_merge({}): {}".format(filename, timer() - st)) - return res - - elif re.search(r"\.pdf$", filename, re.IGNORECASE): - pdf_parser = Pdf( - ) if parser_config.get("layout_recognize", True) else PlainParser() - sections, tbls = pdf_parser(filename if not binary else binary, - from_page=from_page, to_page=to_page, callback=callback) - res = tokenize_table(tbls, doc, eng) - - elif re.search(r"\.xlsx?$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - excel_parser = ExcelParser() - sections = [(l, "") for l in excel_parser.html(binary) if l] - - elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections = TxtParser()(filename,binary, - parser_config.get("chunk_token_num", 128), - parser_config.get("delimiter", "\n!?;。;!?")) - callback(0.8, "Finish parsing.") - - elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections, tbls = Markdown(int(parser_config.get("chunk_token_num", 128)))(filename, binary) - res = tokenize_table(tbls, doc, eng) - callback(0.8, "Finish parsing.") - - elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections = HtmlParser()(filename, binary) - sections = [(l, "") for l in sections if l] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.json$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections = JsonParser(int(parser_config.get("chunk_token_num", 128)))(binary) - sections = [(l, "") for l in sections if l] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.doc$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - binary = BytesIO(binary) - doc_parsed = parser.from_buffer(binary) - sections = doc_parsed['content'].split('\n') - sections = [(l, "") for l in sections if l] - callback(0.8, "Finish parsing.") - - else: - raise NotImplementedError( - "file type not supported yet(pdf, xlsx, doc, docx, txt supported)") - - st = timer() - chunks = naive_merge( - sections, int(parser_config.get( - "chunk_token_num", 128)), parser_config.get( - "delimiter", "\n!?。;!?")) - if kwargs.get("section_only", False): - return chunks - - res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) - cron_logger.info("naive_merge({}): {}".format(filename, timer() - st)) - return res - - -if __name__ == "__main__": - import sys - - def dummy(prog=None, msg=""): - pass - - chunk(sys.argv[1], from_page=0, to_page=10, callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from tika import parser +from io import BytesIO +from docx import Document +from timeit import default_timer as timer +import re +from deepdoc.parser.pdf_parser import PlainParser +from rag.nlp import rag_tokenizer, naive_merge, tokenize_table, tokenize_chunks, find_codec, concat_img, naive_merge_docx, tokenize_chunks_docx +from deepdoc.parser import PdfParser, ExcelParser, DocxParser, HtmlParser, JsonParser, MarkdownParser, TxtParser +from rag.settings import cron_logger +from rag.utils import num_tokens_from_string +from PIL import Image +from functools import reduce +from markdown import markdown +from docx.image.exceptions import UnrecognizedImageError + +class Docx(DocxParser): + def __init__(self): + pass + + def get_picture(self, document, paragraph): + img = paragraph._element.xpath('.//pic:pic') + if not img: + return None + img = img[0] + embed = img.xpath('.//a:blip/@r:embed')[0] + related_part = document.part.related_parts[embed] + try: + image_blob = related_part.image.blob + except UnrecognizedImageError: + print("Unrecognized image format. Skipping image.") + return None + try: + image = Image.open(BytesIO(image_blob)).convert('RGB') + return image + except Exception as e: + return None + + def __clean(self, line): + line = re.sub(r"\u3000", " ", line).strip() + return line + + def __call__(self, filename, binary=None, from_page=0, to_page=100000): + self.doc = Document( + filename) if not binary else Document(BytesIO(binary)) + pn = 0 + lines = [] + last_image = None + for p in self.doc.paragraphs: + if pn > to_page: + break + if from_page <= pn < to_page: + if p.text.strip(): + if p.style and p.style.name == 'Caption': + former_image = None + if lines and lines[-1][1] and lines[-1][2] != 'Caption': + former_image = lines[-1][1].pop() + elif last_image: + former_image = last_image + last_image = None + lines.append((self.__clean(p.text), [former_image], p.style.name)) + else: + current_image = self.get_picture(self.doc, p) + image_list = [current_image] + if last_image: + image_list.insert(0, last_image) + last_image = None + lines.append((self.__clean(p.text), image_list, p.style.name)) + else: + if current_image := self.get_picture(self.doc, p): + if lines: + lines[-1][1].append(current_image) + else: + last_image = current_image + for run in p.runs: + if 'lastRenderedPageBreak' in run._element.xml: + pn += 1 + continue + if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: + pn += 1 + new_line = [(line[0], reduce(concat_img, line[1]) if line[1] else None) for line in lines] + + tbls = [] + for tb in self.doc.tables: + html= "" + for r in tb.rows: + html += "" + i = 0 + while i < len(r.cells): + span = 1 + c = r.cells[i] + for j in range(i+1, len(r.cells)): + if c.text == r.cells[j].text: + span += 1 + i = j + i += 1 + html += f"" if span == 1 else f"" + html += "" + html += "
{c.text}{c.text}
" + tbls.append(((None, html), "")) + return new_line, tbls + + +class Pdf(PdfParser): + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + start = timer() + callback(msg="OCR is running...") + self.__images__( + filename if not binary else binary, + zoomin, + from_page, + to_page, + callback + ) + callback(msg="OCR finished") + cron_logger.info("OCR({}~{}): {}".format(from_page, to_page, timer() - start)) + + start = timer() + self._layouts_rec(zoomin) + callback(0.63, "Layout analysis finished.") + self._table_transformer_job(zoomin) + callback(0.65, "Table analysis finished.") + self._text_merge() + callback(0.67, "Text merging finished") + tbls = self._extract_table_figure(True, zoomin, True, True) + #self._naive_vertical_merge() + self._concat_downward() + #self._filter_forpages() + + cron_logger.info("layouts: {}".format(timer() - start)) + return [(b["text"], self._line_tag(b, zoomin)) + for b in self.boxes], tbls + + +class Markdown(MarkdownParser): + def __call__(self, filename, binary=None): + txt = "" + tbls = [] + if binary: + encoding = find_codec(binary) + txt = binary.decode(encoding, errors="ignore") + else: + with open(filename, "r") as f: + txt = f.read() + remainder, tables = self.extract_tables_and_remainder(f'{txt}\n') + sections = [] + tbls = [] + for sec in remainder.split("\n"): + if num_tokens_from_string(sec) > 10 * self.chunk_token_num: + sections.append((sec[:int(len(sec)/2)], "")) + sections.append((sec[int(len(sec)/2):], "")) + else: + sections.append((sec, "")) + print(tables) + for table in tables: + tbls.append(((None, markdown(table, extensions=['markdown.extensions.tables'])), "")) + return sections, tbls + + + +def chunk(filename, binary=None, from_page=0, to_page=100000, + lang="Chinese", callback=None, **kwargs): + """ + Supported file formats are docx, pdf, excel, txt. + This method apply the naive ways to chunk files. + Successive text will be sliced into pieces using 'delimiter'. + Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'. + """ + + eng = lang.lower() == "english" # is_english(cks) + parser_config = kwargs.get( + "parser_config", { + "chunk_token_num": 128, "delimiter": "\n!?。;!?", "layout_recognize": True}) + doc = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) + } + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + res = [] + pdf_parser = None + sections = [] + if re.search(r"\.docx$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections, tbls = Docx()(filename, binary) + res = tokenize_table(tbls, doc, eng) # just for table + + callback(0.8, "Finish parsing.") + st = timer() + + chunks, images = naive_merge_docx( + sections, int(parser_config.get( + "chunk_token_num", 128)), parser_config.get( + "delimiter", "\n!?。;!?")) + + if kwargs.get("section_only", False): + return chunks + + res.extend(tokenize_chunks_docx(chunks, doc, eng, images)) + cron_logger.info("naive_merge({}): {}".format(filename, timer() - st)) + return res + + elif re.search(r"\.pdf$", filename, re.IGNORECASE): + pdf_parser = Pdf( + ) if parser_config.get("layout_recognize", True) else PlainParser() + sections, tbls = pdf_parser(filename if not binary else binary, + from_page=from_page, to_page=to_page, callback=callback) + res = tokenize_table(tbls, doc, eng) + + elif re.search(r"\.xlsx?$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + excel_parser = ExcelParser() + sections = [(l, "") for l in excel_parser.html(binary) if l] + + elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections = TxtParser()(filename,binary, + parser_config.get("chunk_token_num", 128), + parser_config.get("delimiter", "\n!?;。;!?")) + callback(0.8, "Finish parsing.") + + elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections, tbls = Markdown(int(parser_config.get("chunk_token_num", 128)))(filename, binary) + res = tokenize_table(tbls, doc, eng) + callback(0.8, "Finish parsing.") + + elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections = HtmlParser()(filename, binary) + sections = [(l, "") for l in sections if l] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.json$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections = JsonParser(int(parser_config.get("chunk_token_num", 128)))(binary) + sections = [(l, "") for l in sections if l] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.doc$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + binary = BytesIO(binary) + doc_parsed = parser.from_buffer(binary) + sections = doc_parsed['content'].split('\n') + sections = [(l, "") for l in sections if l] + callback(0.8, "Finish parsing.") + + else: + raise NotImplementedError( + "file type not supported yet(pdf, xlsx, doc, docx, txt supported)") + + st = timer() + chunks = naive_merge( + sections, int(parser_config.get( + "chunk_token_num", 128)), parser_config.get( + "delimiter", "\n!?。;!?")) + if kwargs.get("section_only", False): + return chunks + + res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) + cron_logger.info("naive_merge({}): {}".format(filename, timer() - st)) + return res + + +if __name__ == "__main__": + import sys + + def dummy(prog=None, msg=""): + pass + + chunk(sys.argv[1], from_page=0, to_page=10, callback=dummy) diff --git a/rag/app/one.py b/rag/app/one.py index c0d132447814067d27b00dad859c1e8fb36bbfc1..88ca9e3769161da6f291c99a54a5892ad030429d 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -1,133 +1,133 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from tika import parser -from io import BytesIO -import re -from rag.app import laws -from rag.nlp import rag_tokenizer, tokenize, find_codec -from deepdoc.parser import PdfParser, ExcelParser, PlainParser, HtmlParser - - -class Pdf(PdfParser): - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - callback(msg="OCR is running...") - self.__images__( - filename if not binary else binary, - zoomin, - from_page, - to_page, - callback - ) - callback(msg="OCR finished") - - from timeit import default_timer as timer - start = timer() - self._layouts_rec(zoomin, drop=False) - callback(0.63, "Layout analysis finished.") - print("layouts:", timer() - start) - self._table_transformer_job(zoomin) - callback(0.65, "Table analysis finished.") - self._text_merge() - callback(0.67, "Text merging finished") - tbls = self._extract_table_figure(True, zoomin, True, True) - self._concat_downward() - - sections = [(b["text"], self.get_position(b, zoomin)) - for i, b in enumerate(self.boxes)] - for (img, rows), poss in tbls: - if not rows:continue - sections.append((rows if isinstance(rows, str) else rows[0], - [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) - return [(txt, "") for txt, _ in sorted(sections, key=lambda x: ( - x[-1][0][0], x[-1][0][3], x[-1][0][1]))], None - - -def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", callback=None, **kwargs): - """ - Supported file formats are docx, pdf, excel, txt. - One file forms a chunk which maintains original text order. - """ - - eng = lang.lower() == "english" # is_english(cks) - - if re.search(r"\.docx$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections = [txt for txt in laws.Docx()(filename, binary) if txt] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.pdf$", filename, re.IGNORECASE): - pdf_parser = Pdf() if kwargs.get( - "parser_config", {}).get( - "layout_recognize", True) else PlainParser() - sections, _ = pdf_parser( - filename if not binary else binary, to_page=to_page, callback=callback) - sections = [s for s, _ in sections if s] - - elif re.search(r"\.xlsx?$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - excel_parser = ExcelParser() - sections = excel_parser.html(binary, 1000000000) - - elif re.search(r"\.txt$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - txt = "" - if binary: - encoding = find_codec(binary) - txt = binary.decode(encoding, errors="ignore") - else: - with open(filename, "r") as f: - while True: - l = f.readline() - if not l: - break - txt += l - sections = txt.split("\n") - sections = [s for s in sections if s] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - sections = HtmlParser()(filename, binary) - sections = [s for s in sections if s] - callback(0.8, "Finish parsing.") - - elif re.search(r"\.doc$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - binary = BytesIO(binary) - doc_parsed = parser.from_buffer(binary) - sections = doc_parsed['content'].split('\n') - sections = [l for l in sections if l] - callback(0.8, "Finish parsing.") - - else: - raise NotImplementedError( - "file type not supported yet(doc, docx, pdf, txt supported)") - - doc = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) - } - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - tokenize(doc, "\n".join(sections), eng) - return [doc] - - -if __name__ == "__main__": - import sys - - def dummy(prog=None, msg=""): - pass - - chunk(sys.argv[1], from_page=0, to_page=10, callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from tika import parser +from io import BytesIO +import re +from rag.app import laws +from rag.nlp import rag_tokenizer, tokenize, find_codec +from deepdoc.parser import PdfParser, ExcelParser, PlainParser, HtmlParser + + +class Pdf(PdfParser): + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + callback(msg="OCR is running...") + self.__images__( + filename if not binary else binary, + zoomin, + from_page, + to_page, + callback + ) + callback(msg="OCR finished") + + from timeit import default_timer as timer + start = timer() + self._layouts_rec(zoomin, drop=False) + callback(0.63, "Layout analysis finished.") + print("layouts:", timer() - start) + self._table_transformer_job(zoomin) + callback(0.65, "Table analysis finished.") + self._text_merge() + callback(0.67, "Text merging finished") + tbls = self._extract_table_figure(True, zoomin, True, True) + self._concat_downward() + + sections = [(b["text"], self.get_position(b, zoomin)) + for i, b in enumerate(self.boxes)] + for (img, rows), poss in tbls: + if not rows:continue + sections.append((rows if isinstance(rows, str) else rows[0], + [(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss])) + return [(txt, "") for txt, _ in sorted(sections, key=lambda x: ( + x[-1][0][0], x[-1][0][3], x[-1][0][1]))], None + + +def chunk(filename, binary=None, from_page=0, to_page=100000, + lang="Chinese", callback=None, **kwargs): + """ + Supported file formats are docx, pdf, excel, txt. + One file forms a chunk which maintains original text order. + """ + + eng = lang.lower() == "english" # is_english(cks) + + if re.search(r"\.docx$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections = [txt for txt in laws.Docx()(filename, binary) if txt] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.pdf$", filename, re.IGNORECASE): + pdf_parser = Pdf() if kwargs.get( + "parser_config", {}).get( + "layout_recognize", True) else PlainParser() + sections, _ = pdf_parser( + filename if not binary else binary, to_page=to_page, callback=callback) + sections = [s for s, _ in sections if s] + + elif re.search(r"\.xlsx?$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + excel_parser = ExcelParser() + sections = excel_parser.html(binary, 1000000000) + + elif re.search(r"\.txt$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + txt = "" + if binary: + encoding = find_codec(binary) + txt = binary.decode(encoding, errors="ignore") + else: + with open(filename, "r") as f: + while True: + l = f.readline() + if not l: + break + txt += l + sections = txt.split("\n") + sections = [s for s in sections if s] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.(htm|html)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + sections = HtmlParser()(filename, binary) + sections = [s for s in sections if s] + callback(0.8, "Finish parsing.") + + elif re.search(r"\.doc$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + binary = BytesIO(binary) + doc_parsed = parser.from_buffer(binary) + sections = doc_parsed['content'].split('\n') + sections = [l for l in sections if l] + callback(0.8, "Finish parsing.") + + else: + raise NotImplementedError( + "file type not supported yet(doc, docx, pdf, txt supported)") + + doc = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) + } + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + tokenize(doc, "\n".join(sections), eng) + return [doc] + + +if __name__ == "__main__": + import sys + + def dummy(prog=None, msg=""): + pass + + chunk(sys.argv[1], from_page=0, to_page=10, callback=dummy) diff --git a/rag/app/paper.py b/rag/app/paper.py index 1b3c1df8a9b07a7d56d1c585b70b16e8b263519e..19185e962b0019a7711353743ac0cbd013158695 100644 --- a/rag/app/paper.py +++ b/rag/app/paper.py @@ -1,287 +1,287 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import copy -import re -from collections import Counter - -from api.db import ParserType -from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks -from deepdoc.parser import PdfParser, PlainParser -import numpy as np -from rag.utils import num_tokens_from_string - - -class Pdf(PdfParser): - def __init__(self): - self.model_speciess = ParserType.PAPER.value - super().__init__() - - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - callback(msg="OCR is running...") - self.__images__( - filename if not binary else binary, - zoomin, - from_page, - to_page, - callback - ) - callback(msg="OCR finished.") - - from timeit import default_timer as timer - start = timer() - self._layouts_rec(zoomin) - callback(0.63, "Layout analysis finished") - print("layouts:", timer() - start) - self._table_transformer_job(zoomin) - callback(0.68, "Table analysis finished") - self._text_merge() - tbls = self._extract_table_figure(True, zoomin, True, True) - column_width = np.median([b["x1"] - b["x0"] for b in self.boxes]) - self._concat_downward() - self._filter_forpages() - callback(0.75, "Text merging finished.") - - # clean mess - if column_width < self.page_images[0].size[0] / zoomin / 2: - print("two_column...................", column_width, - self.page_images[0].size[0] / zoomin / 2) - self.boxes = self.sort_X_by_page(self.boxes, column_width / 2) - for b in self.boxes: - b["text"] = re.sub(r"([\t  ]|\u3000){2,}", " ", b["text"].strip()) - - def _begin(txt): - return re.match( - "[0-9. 一、i]*(introduction|abstract|摘要|引言|keywords|key words|关键词|background|背景|目录|前言|contents)", - txt.lower().strip()) - - if from_page > 0: - return { - "title": "", - "authors": "", - "abstract": "", - "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes if - re.match(r"(text|title)", b.get("layoutno", "text"))], - "tables": tbls - } - # get title and authors - title = "" - authors = [] - i = 0 - while i < min(32, len(self.boxes)-1): - b = self.boxes[i] - i += 1 - if b.get("layoutno", "").find("title") >= 0: - title = b["text"] - if _begin(title): - title = "" - break - for j in range(3): - if _begin(self.boxes[i + j]["text"]): - break - authors.append(self.boxes[i + j]["text"]) - break - break - # get abstract - abstr = "" - i = 0 - while i + 1 < min(32, len(self.boxes)): - b = self.boxes[i] - i += 1 - txt = b["text"].lower().strip() - if re.match("(abstract|摘要)", txt): - if len(txt.split(" ")) > 32 or len(txt) > 64: - abstr = txt + self._line_tag(b, zoomin) - break - txt = self.boxes[i]["text"].lower().strip() - if len(txt.split(" ")) > 32 or len(txt) > 64: - abstr = txt + self._line_tag(self.boxes[i], zoomin) - i += 1 - break - if not abstr: - i = 0 - - callback( - 0.8, "Page {}~{}: Text merging finished".format( - from_page, min( - to_page, self.total_page))) - for b in self.boxes: - print(b["text"], b.get("layoutno")) - print(tbls) - - return { - "title": title, - "authors": " ".join(authors), - "abstract": abstr, - "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if - re.match(r"(text|title)", b.get("layoutno", "text"))], - "tables": tbls - } - - -def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", callback=None, **kwargs): - """ - Only pdf is supported. - The abstract of the paper will be sliced as an entire chunk, and will not be sliced partly. - """ - pdf_parser = None - if re.search(r"\.pdf$", filename, re.IGNORECASE): - if not kwargs.get("parser_config", {}).get("layout_recognize", True): - pdf_parser = PlainParser() - paper = { - "title": filename, - "authors": " ", - "abstract": "", - "sections": pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page)[0], - "tables": [] - } - else: - pdf_parser = Pdf() - paper = pdf_parser(filename if not binary else binary, - from_page=from_page, to_page=to_page, callback=callback) - else: - raise NotImplementedError("file type not supported yet(pdf supported)") - - doc = {"docnm_kwd": filename, "authors_tks": rag_tokenizer.tokenize(paper["authors"]), - "title_tks": rag_tokenizer.tokenize(paper["title"] if paper["title"] else filename)} - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - doc["authors_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["authors_tks"]) - # is it English - eng = lang.lower() == "english" # pdf_parser.is_english - print("It's English.....", eng) - - res = tokenize_table(paper["tables"], doc, eng) - - if paper["abstract"]: - d = copy.deepcopy(doc) - txt = pdf_parser.remove_tag(paper["abstract"]) - d["important_kwd"] = ["abstract", "总结", "概括", "summary", "summarize"] - d["important_tks"] = " ".join(d["important_kwd"]) - d["image"], poss = pdf_parser.crop( - paper["abstract"], need_position=True) - add_positions(d, poss) - tokenize(d, txt, eng) - res.append(d) - - sorted_sections = paper["sections"] - # set pivot using the most frequent type of title, - # then merge between 2 pivot - bull = bullets_category([txt for txt, _ in sorted_sections]) - most_level, levels = title_frequency(bull, sorted_sections) - assert len(sorted_sections) == len(levels) - sec_ids = [] - sid = 0 - for i, lvl in enumerate(levels): - if lvl <= most_level and i > 0 and lvl != levels[i - 1]: - sid += 1 - sec_ids.append(sid) - print(lvl, sorted_sections[i][0], most_level, sid) - - chunks = [] - last_sid = -2 - for (txt, _), sec_id in zip(sorted_sections, sec_ids): - if sec_id == last_sid: - if chunks: - chunks[-1] += "\n" + txt - continue - chunks.append(txt) - last_sid = sec_id - res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) - return res - - -""" - readed = [0] * len(paper["lines"]) - # find colon firstly - i = 0 - while i + 1 < len(paper["lines"]): - txt = pdf_parser.remove_tag(paper["lines"][i][0]) - j = i - if txt.strip("\n").strip()[-1] not in "::": - i += 1 - continue - i += 1 - while i < len(paper["lines"]) and not paper["lines"][i][0]: - i += 1 - if i >= len(paper["lines"]): break - proj = [paper["lines"][i][0].strip()] - i += 1 - while i < len(paper["lines"]) and paper["lines"][i][0].strip()[0] == proj[-1][0]: - proj.append(paper["lines"][i]) - i += 1 - for k in range(j, i): readed[k] = True - txt = txt[::-1] - if eng: - r = re.search(r"(.*?) ([\\.;?!]|$)", txt) - txt = r.group(1)[::-1] if r else txt[::-1] - else: - r = re.search(r"(.*?) ([。?;!]|$)", txt) - txt = r.group(1)[::-1] if r else txt[::-1] - for p in proj: - d = copy.deepcopy(doc) - txt += "\n" + pdf_parser.remove_tag(p) - d["image"], poss = pdf_parser.crop(p, need_position=True) - add_positions(d, poss) - tokenize(d, txt, eng) - res.append(d) - - i = 0 - chunk = [] - tk_cnt = 0 - def add_chunk(): - nonlocal chunk, res, doc, pdf_parser, tk_cnt - d = copy.deepcopy(doc) - ck = "\n".join(chunk) - tokenize(d, pdf_parser.remove_tag(ck), pdf_parser.is_english) - d["image"], poss = pdf_parser.crop(ck, need_position=True) - add_positions(d, poss) - res.append(d) - chunk = [] - tk_cnt = 0 - - while i < len(paper["lines"]): - if tk_cnt > 128: - add_chunk() - if readed[i]: - i += 1 - continue - readed[i] = True - txt, layouts = paper["lines"][i] - txt_ = pdf_parser.remove_tag(txt) - i += 1 - cnt = num_tokens_from_string(txt_) - if any([ - layouts.find("title") >= 0 and chunk, - cnt + tk_cnt > 128 and tk_cnt > 32, - ]): - add_chunk() - chunk = [txt] - tk_cnt = cnt - else: - chunk.append(txt) - tk_cnt += cnt - - if chunk: add_chunk() - for i, d in enumerate(res): - print(d) - # d["image"].save(f"./logs/{i}.jpg") - return res -""" - -if __name__ == "__main__": - import sys - - def dummy(prog=None, msg=""): - pass - chunk(sys.argv[1], callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import copy +import re +from collections import Counter + +from api.db import ParserType +from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks +from deepdoc.parser import PdfParser, PlainParser +import numpy as np +from rag.utils import num_tokens_from_string + + +class Pdf(PdfParser): + def __init__(self): + self.model_speciess = ParserType.PAPER.value + super().__init__() + + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + callback(msg="OCR is running...") + self.__images__( + filename if not binary else binary, + zoomin, + from_page, + to_page, + callback + ) + callback(msg="OCR finished.") + + from timeit import default_timer as timer + start = timer() + self._layouts_rec(zoomin) + callback(0.63, "Layout analysis finished") + print("layouts:", timer() - start) + self._table_transformer_job(zoomin) + callback(0.68, "Table analysis finished") + self._text_merge() + tbls = self._extract_table_figure(True, zoomin, True, True) + column_width = np.median([b["x1"] - b["x0"] for b in self.boxes]) + self._concat_downward() + self._filter_forpages() + callback(0.75, "Text merging finished.") + + # clean mess + if column_width < self.page_images[0].size[0] / zoomin / 2: + print("two_column...................", column_width, + self.page_images[0].size[0] / zoomin / 2) + self.boxes = self.sort_X_by_page(self.boxes, column_width / 2) + for b in self.boxes: + b["text"] = re.sub(r"([\t  ]|\u3000){2,}", " ", b["text"].strip()) + + def _begin(txt): + return re.match( + "[0-9. 一、i]*(introduction|abstract|摘要|引言|keywords|key words|关键词|background|背景|目录|前言|contents)", + txt.lower().strip()) + + if from_page > 0: + return { + "title": "", + "authors": "", + "abstract": "", + "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes if + re.match(r"(text|title)", b.get("layoutno", "text"))], + "tables": tbls + } + # get title and authors + title = "" + authors = [] + i = 0 + while i < min(32, len(self.boxes)-1): + b = self.boxes[i] + i += 1 + if b.get("layoutno", "").find("title") >= 0: + title = b["text"] + if _begin(title): + title = "" + break + for j in range(3): + if _begin(self.boxes[i + j]["text"]): + break + authors.append(self.boxes[i + j]["text"]) + break + break + # get abstract + abstr = "" + i = 0 + while i + 1 < min(32, len(self.boxes)): + b = self.boxes[i] + i += 1 + txt = b["text"].lower().strip() + if re.match("(abstract|摘要)", txt): + if len(txt.split(" ")) > 32 or len(txt) > 64: + abstr = txt + self._line_tag(b, zoomin) + break + txt = self.boxes[i]["text"].lower().strip() + if len(txt.split(" ")) > 32 or len(txt) > 64: + abstr = txt + self._line_tag(self.boxes[i], zoomin) + i += 1 + break + if not abstr: + i = 0 + + callback( + 0.8, "Page {}~{}: Text merging finished".format( + from_page, min( + to_page, self.total_page))) + for b in self.boxes: + print(b["text"], b.get("layoutno")) + print(tbls) + + return { + "title": title, + "authors": " ".join(authors), + "abstract": abstr, + "sections": [(b["text"] + self._line_tag(b, zoomin), b.get("layoutno", "")) for b in self.boxes[i:] if + re.match(r"(text|title)", b.get("layoutno", "text"))], + "tables": tbls + } + + +def chunk(filename, binary=None, from_page=0, to_page=100000, + lang="Chinese", callback=None, **kwargs): + """ + Only pdf is supported. + The abstract of the paper will be sliced as an entire chunk, and will not be sliced partly. + """ + pdf_parser = None + if re.search(r"\.pdf$", filename, re.IGNORECASE): + if not kwargs.get("parser_config", {}).get("layout_recognize", True): + pdf_parser = PlainParser() + paper = { + "title": filename, + "authors": " ", + "abstract": "", + "sections": pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page)[0], + "tables": [] + } + else: + pdf_parser = Pdf() + paper = pdf_parser(filename if not binary else binary, + from_page=from_page, to_page=to_page, callback=callback) + else: + raise NotImplementedError("file type not supported yet(pdf supported)") + + doc = {"docnm_kwd": filename, "authors_tks": rag_tokenizer.tokenize(paper["authors"]), + "title_tks": rag_tokenizer.tokenize(paper["title"] if paper["title"] else filename)} + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + doc["authors_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["authors_tks"]) + # is it English + eng = lang.lower() == "english" # pdf_parser.is_english + print("It's English.....", eng) + + res = tokenize_table(paper["tables"], doc, eng) + + if paper["abstract"]: + d = copy.deepcopy(doc) + txt = pdf_parser.remove_tag(paper["abstract"]) + d["important_kwd"] = ["abstract", "总结", "概括", "summary", "summarize"] + d["important_tks"] = " ".join(d["important_kwd"]) + d["image"], poss = pdf_parser.crop( + paper["abstract"], need_position=True) + add_positions(d, poss) + tokenize(d, txt, eng) + res.append(d) + + sorted_sections = paper["sections"] + # set pivot using the most frequent type of title, + # then merge between 2 pivot + bull = bullets_category([txt for txt, _ in sorted_sections]) + most_level, levels = title_frequency(bull, sorted_sections) + assert len(sorted_sections) == len(levels) + sec_ids = [] + sid = 0 + for i, lvl in enumerate(levels): + if lvl <= most_level and i > 0 and lvl != levels[i - 1]: + sid += 1 + sec_ids.append(sid) + print(lvl, sorted_sections[i][0], most_level, sid) + + chunks = [] + last_sid = -2 + for (txt, _), sec_id in zip(sorted_sections, sec_ids): + if sec_id == last_sid: + if chunks: + chunks[-1] += "\n" + txt + continue + chunks.append(txt) + last_sid = sec_id + res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) + return res + + +""" + readed = [0] * len(paper["lines"]) + # find colon firstly + i = 0 + while i + 1 < len(paper["lines"]): + txt = pdf_parser.remove_tag(paper["lines"][i][0]) + j = i + if txt.strip("\n").strip()[-1] not in "::": + i += 1 + continue + i += 1 + while i < len(paper["lines"]) and not paper["lines"][i][0]: + i += 1 + if i >= len(paper["lines"]): break + proj = [paper["lines"][i][0].strip()] + i += 1 + while i < len(paper["lines"]) and paper["lines"][i][0].strip()[0] == proj[-1][0]: + proj.append(paper["lines"][i]) + i += 1 + for k in range(j, i): readed[k] = True + txt = txt[::-1] + if eng: + r = re.search(r"(.*?) ([\\.;?!]|$)", txt) + txt = r.group(1)[::-1] if r else txt[::-1] + else: + r = re.search(r"(.*?) ([。?;!]|$)", txt) + txt = r.group(1)[::-1] if r else txt[::-1] + for p in proj: + d = copy.deepcopy(doc) + txt += "\n" + pdf_parser.remove_tag(p) + d["image"], poss = pdf_parser.crop(p, need_position=True) + add_positions(d, poss) + tokenize(d, txt, eng) + res.append(d) + + i = 0 + chunk = [] + tk_cnt = 0 + def add_chunk(): + nonlocal chunk, res, doc, pdf_parser, tk_cnt + d = copy.deepcopy(doc) + ck = "\n".join(chunk) + tokenize(d, pdf_parser.remove_tag(ck), pdf_parser.is_english) + d["image"], poss = pdf_parser.crop(ck, need_position=True) + add_positions(d, poss) + res.append(d) + chunk = [] + tk_cnt = 0 + + while i < len(paper["lines"]): + if tk_cnt > 128: + add_chunk() + if readed[i]: + i += 1 + continue + readed[i] = True + txt, layouts = paper["lines"][i] + txt_ = pdf_parser.remove_tag(txt) + i += 1 + cnt = num_tokens_from_string(txt_) + if any([ + layouts.find("title") >= 0 and chunk, + cnt + tk_cnt > 128 and tk_cnt > 32, + ]): + add_chunk() + chunk = [txt] + tk_cnt = cnt + else: + chunk.append(txt) + tk_cnt += cnt + + if chunk: add_chunk() + for i, d in enumerate(res): + print(d) + # d["image"].save(f"./logs/{i}.jpg") + return res +""" + +if __name__ == "__main__": + import sys + + def dummy(prog=None, msg=""): + pass + chunk(sys.argv[1], callback=dummy) diff --git a/rag/app/picture.py b/rag/app/picture.py index 0474b7594f2f9e44ea9ecc91e7d4635547cf420a..fa4862b296afe4c57fbdff9cef81ab5fe644c3a8 100644 --- a/rag/app/picture.py +++ b/rag/app/picture.py @@ -1,52 +1,52 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import io - -import numpy as np -from PIL import Image - -from api.db import LLMType -from api.db.services.llm_service import LLMBundle -from rag.nlp import tokenize -from deepdoc.vision import OCR - -ocr = OCR() - - -def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): - img = Image.open(io.BytesIO(binary)).convert('RGB') - doc = { - "docnm_kwd": filename, - "image": img - } - bxs = ocr(np.array(img)) - txt = "\n".join([t[0] for _, t in bxs if t[0]]) - eng = lang.lower() == "english" - callback(0.4, "Finish OCR: (%s ...)" % txt[:12]) - if (eng and len(txt.split(" ")) > 32) or len(txt) > 32: - tokenize(doc, txt, eng) - callback(0.8, "OCR results is too long to use CV LLM.") - return [doc] - - try: - callback(0.4, "Use CV LLM to describe the picture.") - cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang) - ans = cv_mdl.describe(binary) - callback(0.8, "CV LLM respond: %s ..." % ans[:32]) - txt += "\n" + ans - tokenize(doc, txt, eng) - return [doc] - except Exception as e: - callback(prog=-1, msg=str(e)) - - return [] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import io + +import numpy as np +from PIL import Image + +from api.db import LLMType +from api.db.services.llm_service import LLMBundle +from rag.nlp import tokenize +from deepdoc.vision import OCR + +ocr = OCR() + + +def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): + img = Image.open(io.BytesIO(binary)).convert('RGB') + doc = { + "docnm_kwd": filename, + "image": img + } + bxs = ocr(np.array(img)) + txt = "\n".join([t[0] for _, t in bxs if t[0]]) + eng = lang.lower() == "english" + callback(0.4, "Finish OCR: (%s ...)" % txt[:12]) + if (eng and len(txt.split(" ")) > 32) or len(txt) > 32: + tokenize(doc, txt, eng) + callback(0.8, "OCR results is too long to use CV LLM.") + return [doc] + + try: + callback(0.4, "Use CV LLM to describe the picture.") + cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang) + ans = cv_mdl.describe(binary) + callback(0.8, "CV LLM respond: %s ..." % ans[:32]) + txt += "\n" + ans + tokenize(doc, txt, eng) + return [doc] + except Exception as e: + callback(prog=-1, msg=str(e)) + + return [] diff --git a/rag/app/presentation.py b/rag/app/presentation.py index b6cf710b97e0158ccaecf98e3ab60c46e18bb1e5..af8c59387c61c41b114d12f9b8f045409e4041f7 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -1,143 +1,143 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import copy -import re -from io import BytesIO - -from PIL import Image - -from rag.nlp import tokenize, is_english -from rag.nlp import rag_tokenizer -from deepdoc.parser import PdfParser, PptParser, PlainParser -from PyPDF2 import PdfReader as pdf2_read - - -class Ppt(PptParser): - def __call__(self, fnm, from_page, to_page, callback=None): - txts = super().__call__(fnm, from_page, to_page) - - callback(0.5, "Text extraction finished.") - import aspose.slides as slides - import aspose.pydrawing as drawing - imgs = [] - with slides.Presentation(BytesIO(fnm)) as presentation: - for i, slide in enumerate(presentation.slides[from_page: to_page]): - buffered = BytesIO() - slide.get_thumbnail( - 0.5, 0.5).save( - buffered, drawing.imaging.ImageFormat.jpeg) - imgs.append(Image.open(buffered)) - assert len(imgs) == len( - txts), "Slides text and image do not match: {} vs. {}".format(len(imgs), len(txts)) - callback(0.9, "Image extraction finished") - self.is_english = is_english(txts) - return [(txts[i], imgs[i]) for i in range(len(txts))] - - -class Pdf(PdfParser): - def __init__(self): - super().__init__() - - def __garbage(self, txt): - txt = txt.lower().strip() - if re.match(r"[0-9\.,%/-]+$", txt): - return True - if len(txt) < 3: - return True - return False - - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - callback(msg="OCR is running...") - self.__images__(filename if not binary else binary, - zoomin, from_page, to_page, callback) - callback(0.8, "Page {}~{}: OCR finished".format( - from_page, min(to_page, self.total_page))) - assert len(self.boxes) == len(self.page_images), "{} vs. {}".format( - len(self.boxes), len(self.page_images)) - res = [] - for i in range(len(self.boxes)): - lines = "\n".join([b["text"] for b in self.boxes[i] - if not self.__garbage(b["text"])]) - res.append((lines, self.page_images[i])) - callback(0.9, "Page {}~{}: Parsing finished".format( - from_page, min(to_page, self.total_page))) - return res - - -class PlainPdf(PlainParser): - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, callback=None, **kwargs): - self.pdf = pdf2_read(filename if not binary else BytesIO(binary)) - page_txt = [] - for page in self.pdf.pages[from_page: to_page]: - page_txt.append(page.extract_text()) - callback(0.9, "Parsing finished") - return [(txt, None) for txt in page_txt] - - -def chunk(filename, binary=None, from_page=0, to_page=100000, - lang="Chinese", callback=None, **kwargs): - """ - The supported file formats are pdf, pptx. - Every page will be treated as a chunk. And the thumbnail of every page will be stored. - PPT file will be parsed by using this method automatically, setting-up for every PPT file is not necessary. - """ - eng = lang.lower() == "english" - doc = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) - } - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - res = [] - if re.search(r"\.pptx?$", filename, re.IGNORECASE): - ppt_parser = Ppt() - for pn, (txt, img) in enumerate(ppt_parser( - filename if not binary else binary, from_page, 1000000, callback)): - d = copy.deepcopy(doc) - pn += from_page - d["image"] = img - d["page_num_int"] = [pn + 1] - d["top_int"] = [0] - d["position_int"] = [(pn + 1, 0, img.size[0], 0, img.size[1])] - tokenize(d, txt, eng) - res.append(d) - return res - elif re.search(r"\.pdf$", filename, re.IGNORECASE): - pdf_parser = Pdf() if kwargs.get( - "parser_config", {}).get( - "layout_recognize", True) else PlainPdf() - for pn, (txt, img) in enumerate(pdf_parser(filename, binary, - from_page=from_page, to_page=to_page, callback=callback)): - d = copy.deepcopy(doc) - pn += from_page - if img: - d["image"] = img - d["page_num_int"] = [pn + 1] - d["top_int"] = [0] - d["position_int"] = [ - (pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)] - tokenize(d, txt, eng) - res.append(d) - return res - - raise NotImplementedError( - "file type not supported yet(pptx, pdf supported)") - - -if __name__ == "__main__": - import sys - - def dummy(a, b): - pass - chunk(sys.argv[1], callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import copy +import re +from io import BytesIO + +from PIL import Image + +from rag.nlp import tokenize, is_english +from rag.nlp import rag_tokenizer +from deepdoc.parser import PdfParser, PptParser, PlainParser +from PyPDF2 import PdfReader as pdf2_read + + +class Ppt(PptParser): + def __call__(self, fnm, from_page, to_page, callback=None): + txts = super().__call__(fnm, from_page, to_page) + + callback(0.5, "Text extraction finished.") + import aspose.slides as slides + import aspose.pydrawing as drawing + imgs = [] + with slides.Presentation(BytesIO(fnm)) as presentation: + for i, slide in enumerate(presentation.slides[from_page: to_page]): + buffered = BytesIO() + slide.get_thumbnail( + 0.5, 0.5).save( + buffered, drawing.imaging.ImageFormat.jpeg) + imgs.append(Image.open(buffered)) + assert len(imgs) == len( + txts), "Slides text and image do not match: {} vs. {}".format(len(imgs), len(txts)) + callback(0.9, "Image extraction finished") + self.is_english = is_english(txts) + return [(txts[i], imgs[i]) for i in range(len(txts))] + + +class Pdf(PdfParser): + def __init__(self): + super().__init__() + + def __garbage(self, txt): + txt = txt.lower().strip() + if re.match(r"[0-9\.,%/-]+$", txt): + return True + if len(txt) < 3: + return True + return False + + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + callback(msg="OCR is running...") + self.__images__(filename if not binary else binary, + zoomin, from_page, to_page, callback) + callback(0.8, "Page {}~{}: OCR finished".format( + from_page, min(to_page, self.total_page))) + assert len(self.boxes) == len(self.page_images), "{} vs. {}".format( + len(self.boxes), len(self.page_images)) + res = [] + for i in range(len(self.boxes)): + lines = "\n".join([b["text"] for b in self.boxes[i] + if not self.__garbage(b["text"])]) + res.append((lines, self.page_images[i])) + callback(0.9, "Page {}~{}: Parsing finished".format( + from_page, min(to_page, self.total_page))) + return res + + +class PlainPdf(PlainParser): + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, callback=None, **kwargs): + self.pdf = pdf2_read(filename if not binary else BytesIO(binary)) + page_txt = [] + for page in self.pdf.pages[from_page: to_page]: + page_txt.append(page.extract_text()) + callback(0.9, "Parsing finished") + return [(txt, None) for txt in page_txt] + + +def chunk(filename, binary=None, from_page=0, to_page=100000, + lang="Chinese", callback=None, **kwargs): + """ + The supported file formats are pdf, pptx. + Every page will be treated as a chunk. And the thumbnail of every page will be stored. + PPT file will be parsed by using this method automatically, setting-up for every PPT file is not necessary. + """ + eng = lang.lower() == "english" + doc = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) + } + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + res = [] + if re.search(r"\.pptx?$", filename, re.IGNORECASE): + ppt_parser = Ppt() + for pn, (txt, img) in enumerate(ppt_parser( + filename if not binary else binary, from_page, 1000000, callback)): + d = copy.deepcopy(doc) + pn += from_page + d["image"] = img + d["page_num_int"] = [pn + 1] + d["top_int"] = [0] + d["position_int"] = [(pn + 1, 0, img.size[0], 0, img.size[1])] + tokenize(d, txt, eng) + res.append(d) + return res + elif re.search(r"\.pdf$", filename, re.IGNORECASE): + pdf_parser = Pdf() if kwargs.get( + "parser_config", {}).get( + "layout_recognize", True) else PlainPdf() + for pn, (txt, img) in enumerate(pdf_parser(filename, binary, + from_page=from_page, to_page=to_page, callback=callback)): + d = copy.deepcopy(doc) + pn += from_page + if img: + d["image"] = img + d["page_num_int"] = [pn + 1] + d["top_int"] = [0] + d["position_int"] = [ + (pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)] + tokenize(d, txt, eng) + res.append(d) + return res + + raise NotImplementedError( + "file type not supported yet(pptx, pdf supported)") + + +if __name__ == "__main__": + import sys + + def dummy(a, b): + pass + chunk(sys.argv[1], callback=dummy) diff --git a/rag/app/qa.py b/rag/app/qa.py index fee46ad578d74b9ffd7681424fa0c6ec3c2a9add..38c6392ae9c05ea6e2581249f5ed13796c3c9b92 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -1,422 +1,422 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import re -from copy import deepcopy -from io import BytesIO -from timeit import default_timer as timer -from nltk import word_tokenize -from openpyxl import load_workbook -from rag.nlp import is_english, random_choices, find_codec, qbullets_category, add_positions, has_qbullet, docx_question_level -from rag.nlp import rag_tokenizer, tokenize_table, concat_img -from rag.settings import cron_logger -from deepdoc.parser import PdfParser, ExcelParser, DocxParser -from docx import Document -from PIL import Image -from markdown import markdown -class Excel(ExcelParser): - def __call__(self, fnm, binary=None, callback=None): - if not binary: - wb = load_workbook(fnm) - else: - wb = load_workbook(BytesIO(binary)) - total = 0 - for sheetname in wb.sheetnames: - total += len(list(wb[sheetname].rows)) - - res, fails = [], [] - for sheetname in wb.sheetnames: - ws = wb[sheetname] - rows = list(ws.rows) - for i, r in enumerate(rows): - q, a = "", "" - for cell in r: - if not cell.value: - continue - if not q: - q = str(cell.value) - elif not a: - a = str(cell.value) - else: - break - if q and a: - res.append((q, a)) - else: - fails.append(str(i + 1)) - if len(res) % 999 == 0: - callback(len(res) * - 0.6 / - total, ("Extract Q&A: {}".format(len(res)) + - (f"{len(fails)} failure, line: %s..." % - (",".join(fails[:3])) if fails else ""))) - - callback(0.6, ("Extract Q&A: {}. ".format(len(res)) + ( - f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) - self.is_english = is_english( - [rmPrefix(q) for q, _ in random_choices(res, k=30) if len(q) > 1]) - return res - -class Pdf(PdfParser): - def __call__(self, filename, binary=None, from_page=0, - to_page=100000, zoomin=3, callback=None): - start = timer() - callback(msg="OCR is running...") - self.__images__( - filename if not binary else binary, - zoomin, - from_page, - to_page, - callback - ) - callback(msg="OCR finished") - cron_logger.info("OCR({}~{}): {}".format(from_page, to_page, timer() - start)) - start = timer() - self._layouts_rec(zoomin, drop=False) - callback(0.63, "Layout analysis finished.") - self._table_transformer_job(zoomin) - callback(0.65, "Table analysis finished.") - self._text_merge() - callback(0.67, "Text merging finished") - tbls = self._extract_table_figure(True, zoomin, True, True) - #self._naive_vertical_merge() - # self._concat_downward() - #self._filter_forpages() - cron_logger.info("layouts: {}".format(timer() - start)) - sections = [b["text"] for b in self.boxes] - bull_x0_list = [] - q_bull, reg = qbullets_category(sections) - if q_bull == -1: - raise ValueError("Unable to recognize Q&A structure.") - qai_list = [] - last_q, last_a, last_tag = '', '', '' - last_index = -1 - last_box = {'text':''} - last_bull = None - def sort_key(element): - tbls_pn = element[1][0][0] - tbls_top = element[1][0][3] - return tbls_pn, tbls_top - tbls.sort(key=sort_key) - tbl_index = 0 - last_pn, last_bottom = 0, 0 - tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = 1, 0, 0, 0, 0, '@@0\t0\t0\t0\t0##', '' - for box in self.boxes: - section, line_tag = box['text'], self._line_tag(box, zoomin) - has_bull, index = has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list) - last_box, last_index, last_bull = box, index, has_bull - line_pn = float(line_tag.lstrip('@@').split('\t')[0]) - line_top = float(line_tag.rstrip('##').split('\t')[3]) - tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index) - if not has_bull: # No question bullet - if not last_q: - if tbl_pn < line_pn or (tbl_pn == line_pn and tbl_top <= line_top): # image passed - tbl_index += 1 - continue - else: - sum_tag = line_tag - sum_section = section - while ((tbl_pn == last_pn and tbl_top>= last_bottom) or (tbl_pn > last_pn)) \ - and ((tbl_pn == line_pn and tbl_top <= line_top) or (tbl_pn < line_pn)): # add image at the middle of current answer - sum_tag = f'{tbl_tag}{sum_tag}' - sum_section = f'{tbl_text}{sum_section}' - tbl_index += 1 - tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index) - last_a = f'{last_a}{sum_section}' - last_tag = f'{last_tag}{sum_tag}' - else: - if last_q: - while ((tbl_pn == last_pn and tbl_top>= last_bottom) or (tbl_pn > last_pn)) \ - and ((tbl_pn == line_pn and tbl_top <= line_top) or (tbl_pn < line_pn)): # add image at the end of last answer - last_tag = f'{last_tag}{tbl_tag}' - last_a = f'{last_a}{tbl_text}' - tbl_index += 1 - tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index) - image, poss = self.crop(last_tag, need_position=True) - qai_list.append((last_q, last_a, image, poss)) - last_q, last_a, last_tag = '', '', '' - last_q = has_bull.group() - _, end = has_bull.span() - last_a = section[end:] - last_tag = line_tag - last_bottom = float(line_tag.rstrip('##').split('\t')[4]) - last_pn = line_pn - if last_q: - qai_list.append((last_q, last_a, *self.crop(last_tag, need_position=True))) - return qai_list, tbls - def get_tbls_info(self, tbls, tbl_index): - if tbl_index >= len(tbls): - return 1, 0, 0, 0, 0, '@@0\t0\t0\t0\t0##', '' - tbl_pn = tbls[tbl_index][1][0][0]+1 - tbl_left = tbls[tbl_index][1][0][1] - tbl_right = tbls[tbl_index][1][0][2] - tbl_top = tbls[tbl_index][1][0][3] - tbl_bottom = tbls[tbl_index][1][0][4] - tbl_tag = "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \ - .format(tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom) - tbl_text = ''.join(tbls[tbl_index][0][1]) - return tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text -class Docx(DocxParser): - def __init__(self): - pass - def get_picture(self, document, paragraph): - img = paragraph._element.xpath('.//pic:pic') - if not img: - return None - img = img[0] - embed = img.xpath('.//a:blip/@r:embed')[0] - related_part = document.part.related_parts[embed] - image = related_part.image - image = Image.open(BytesIO(image.blob)).convert('RGB') - return image - - def __call__(self, filename, binary=None, from_page=0, to_page=100000, callback=None): - self.doc = Document( - filename) if not binary else Document(BytesIO(binary)) - pn = 0 - last_answer, last_image = "", None - question_stack, level_stack = [], [] - qai_list = [] - for p in self.doc.paragraphs: - if pn > to_page: - break - question_level, p_text = 0, '' - if from_page <= pn < to_page and p.text.strip(): - question_level, p_text = docx_question_level(p) - if not question_level or question_level > 6: # not a question - last_answer = f'{last_answer}\n{p_text}' - current_image = self.get_picture(self.doc, p) - last_image = concat_img(last_image, current_image) - else: # is a question - if last_answer or last_image: - sum_question = '\n'.join(question_stack) - if sum_question: - qai_list.append((sum_question, last_answer, last_image)) - last_answer, last_image = '', None - - i = question_level - while question_stack and i <= level_stack[-1]: - question_stack.pop() - level_stack.pop() - question_stack.append(p_text) - level_stack.append(question_level) - for run in p.runs: - if 'lastRenderedPageBreak' in run._element.xml: - pn += 1 - continue - if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: - pn += 1 - if last_answer: - sum_question = '\n'.join(question_stack) - if sum_question: - qai_list.append((sum_question, last_answer, last_image)) - - tbls = [] - for tb in self.doc.tables: - html= "" - for r in tb.rows: - html += "" - i = 0 - while i < len(r.cells): - span = 1 - c = r.cells[i] - for j in range(i+1, len(r.cells)): - if c.text == r.cells[j].text: - span += 1 - i = j - i += 1 - html += f"" if span == 1 else f"" - html += "" - html += "
{c.text}{c.text}
" - tbls.append(((None, html), "")) - return qai_list, tbls - -def rmPrefix(txt): - return re.sub( - r"^(问题|答案|回答|user|assistant|Q|A|Question|Answer|问|答)[\t:: ]+", "", txt.strip(), flags=re.IGNORECASE) - - -def beAdocPdf(d, q, a, eng, image, poss): - qprefix = "Question: " if eng else "问题:" - aprefix = "Answer: " if eng else "回答:" - d["content_with_weight"] = "\t".join( - [qprefix + rmPrefix(q), aprefix + rmPrefix(a)]) - d["content_ltks"] = rag_tokenizer.tokenize(q) - d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - d["image"] = image - add_positions(d, poss) - return d - -def beAdocDocx(d, q, a, eng, image): - qprefix = "Question: " if eng else "问题:" - aprefix = "Answer: " if eng else "回答:" - d["content_with_weight"] = "\t".join( - [qprefix + rmPrefix(q), aprefix + rmPrefix(a)]) - d["content_ltks"] = rag_tokenizer.tokenize(q) - d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - d["image"] = image - return d - -def beAdoc(d, q, a, eng): - qprefix = "Question: " if eng else "问题:" - aprefix = "Answer: " if eng else "回答:" - d["content_with_weight"] = "\t".join( - [qprefix + rmPrefix(q), aprefix + rmPrefix(a)]) - d["content_ltks"] = rag_tokenizer.tokenize(q) - d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - return d - - -def mdQuestionLevel(s): - match = re.match(r'#*', s) - return (len(match.group(0)), s.lstrip('#').lstrip()) if match else (0, s) - -def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): - """ - Excel and csv(txt) format files are supported. - If the file is in excel format, there should be 2 column question and answer without header. - And question column is ahead of answer column. - And it's O.K if it has multiple sheets as long as the columns are rightly composed. - - If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer. - - All the deformed lines will be ignored. - Every pair of Q&A will be treated as a chunk. - """ - eng = lang.lower() == "english" - res = [] - doc = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) - } - if re.search(r"\.xlsx?$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - excel_parser = Excel() - for q, a in excel_parser(filename, binary, callback): - res.append(beAdoc(deepcopy(doc), q, a, eng)) - return res - elif re.search(r"\.(txt|csv)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - txt = "" - if binary: - encoding = find_codec(binary) - txt = binary.decode(encoding, errors="ignore") - else: - with open(filename, "r") as f: - while True: - l = f.readline() - if not l: - break - txt += l - lines = txt.split("\n") - comma, tab = 0, 0 - for l in lines: - if len(l.split(",")) == 2: comma += 1 - if len(l.split("\t")) == 2: tab += 1 - delimiter = "\t" if tab >= comma else "," - - fails = [] - question, answer = "", "" - i = 0 - while i < len(lines): - arr = lines[i].split(delimiter) - if len(arr) != 2: - if question: answer += "\n" + lines[i] - else: - fails.append(str(i+1)) - elif len(arr) == 2: - if question and answer: res.append(beAdoc(deepcopy(doc), question, answer, eng)) - question, answer = arr - i += 1 - if len(res) % 999 == 0: - callback(len(res) * 0.6 / len(lines), ("Extract Q&A: {}".format(len(res)) + ( - f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) - - if question: res.append(beAdoc(deepcopy(doc), question, answer, eng)) - - callback(0.6, ("Extract Q&A: {}".format(len(res)) + ( - f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) - - return res - elif re.search(r"\.pdf$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - pdf_parser = Pdf() - qai_list, tbls = pdf_parser(filename if not binary else binary, - from_page=0, to_page=10000, callback=callback) - - - for q, a, image, poss in qai_list: - res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss)) - return res - elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - txt = "" - if binary: - encoding = find_codec(binary) - txt = binary.decode(encoding, errors="ignore") - else: - with open(filename, "r") as f: - while True: - l = f.readline() - if not l: - break - txt += l - lines = txt.split("\n") - last_question, last_answer = "", "" - question_stack, level_stack = [], [] - code_block = False - level_index = [-1] * 7 - for index, l in enumerate(lines): - if l.strip().startswith('```'): - code_block = not code_block - question_level, question = 0, '' - if not code_block: - question_level, question = mdQuestionLevel(l) - - if not question_level or question_level > 6: # not a question - last_answer = f'{last_answer}\n{l}' - else: # is a question - if last_answer.strip(): - sum_question = '\n'.join(question_stack) - if sum_question: - res.append(beAdoc(deepcopy(doc), sum_question, markdown(last_answer, extensions=['markdown.extensions.tables']), eng)) - last_answer = '' - - i = question_level - while question_stack and i <= level_stack[-1]: - question_stack.pop() - level_stack.pop() - question_stack.append(question) - level_stack.append(question_level) - if last_answer.strip(): - sum_question = '\n'.join(question_stack) - if sum_question: - res.append(beAdoc(deepcopy(doc), sum_question, markdown(last_answer, extensions=['markdown.extensions.tables']), eng)) - return res - elif re.search(r"\.docx$", filename, re.IGNORECASE): - docx_parser = Docx() - qai_list, tbls = docx_parser(filename, binary, - from_page=0, to_page=10000, callback=callback) - res = tokenize_table(tbls, doc, eng) - for q, a, image in qai_list: - res.append(beAdocDocx(deepcopy(doc), q, a, eng, image)) - return res - - raise NotImplementedError( - "Excel, csv(txt), pdf, markdown and docx format files are supported.") - - -if __name__ == "__main__": - import sys - - def dummy(prog=None, msg=""): - pass +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import re +from copy import deepcopy +from io import BytesIO +from timeit import default_timer as timer +from nltk import word_tokenize +from openpyxl import load_workbook +from rag.nlp import is_english, random_choices, find_codec, qbullets_category, add_positions, has_qbullet, docx_question_level +from rag.nlp import rag_tokenizer, tokenize_table, concat_img +from rag.settings import cron_logger +from deepdoc.parser import PdfParser, ExcelParser, DocxParser +from docx import Document +from PIL import Image +from markdown import markdown +class Excel(ExcelParser): + def __call__(self, fnm, binary=None, callback=None): + if not binary: + wb = load_workbook(fnm) + else: + wb = load_workbook(BytesIO(binary)) + total = 0 + for sheetname in wb.sheetnames: + total += len(list(wb[sheetname].rows)) + + res, fails = [], [] + for sheetname in wb.sheetnames: + ws = wb[sheetname] + rows = list(ws.rows) + for i, r in enumerate(rows): + q, a = "", "" + for cell in r: + if not cell.value: + continue + if not q: + q = str(cell.value) + elif not a: + a = str(cell.value) + else: + break + if q and a: + res.append((q, a)) + else: + fails.append(str(i + 1)) + if len(res) % 999 == 0: + callback(len(res) * + 0.6 / + total, ("Extract Q&A: {}".format(len(res)) + + (f"{len(fails)} failure, line: %s..." % + (",".join(fails[:3])) if fails else ""))) + + callback(0.6, ("Extract Q&A: {}. ".format(len(res)) + ( + f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) + self.is_english = is_english( + [rmPrefix(q) for q, _ in random_choices(res, k=30) if len(q) > 1]) + return res + +class Pdf(PdfParser): + def __call__(self, filename, binary=None, from_page=0, + to_page=100000, zoomin=3, callback=None): + start = timer() + callback(msg="OCR is running...") + self.__images__( + filename if not binary else binary, + zoomin, + from_page, + to_page, + callback + ) + callback(msg="OCR finished") + cron_logger.info("OCR({}~{}): {}".format(from_page, to_page, timer() - start)) + start = timer() + self._layouts_rec(zoomin, drop=False) + callback(0.63, "Layout analysis finished.") + self._table_transformer_job(zoomin) + callback(0.65, "Table analysis finished.") + self._text_merge() + callback(0.67, "Text merging finished") + tbls = self._extract_table_figure(True, zoomin, True, True) + #self._naive_vertical_merge() + # self._concat_downward() + #self._filter_forpages() + cron_logger.info("layouts: {}".format(timer() - start)) + sections = [b["text"] for b in self.boxes] + bull_x0_list = [] + q_bull, reg = qbullets_category(sections) + if q_bull == -1: + raise ValueError("Unable to recognize Q&A structure.") + qai_list = [] + last_q, last_a, last_tag = '', '', '' + last_index = -1 + last_box = {'text':''} + last_bull = None + def sort_key(element): + tbls_pn = element[1][0][0] + tbls_top = element[1][0][3] + return tbls_pn, tbls_top + tbls.sort(key=sort_key) + tbl_index = 0 + last_pn, last_bottom = 0, 0 + tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = 1, 0, 0, 0, 0, '@@0\t0\t0\t0\t0##', '' + for box in self.boxes: + section, line_tag = box['text'], self._line_tag(box, zoomin) + has_bull, index = has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list) + last_box, last_index, last_bull = box, index, has_bull + line_pn = float(line_tag.lstrip('@@').split('\t')[0]) + line_top = float(line_tag.rstrip('##').split('\t')[3]) + tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index) + if not has_bull: # No question bullet + if not last_q: + if tbl_pn < line_pn or (tbl_pn == line_pn and tbl_top <= line_top): # image passed + tbl_index += 1 + continue + else: + sum_tag = line_tag + sum_section = section + while ((tbl_pn == last_pn and tbl_top>= last_bottom) or (tbl_pn > last_pn)) \ + and ((tbl_pn == line_pn and tbl_top <= line_top) or (tbl_pn < line_pn)): # add image at the middle of current answer + sum_tag = f'{tbl_tag}{sum_tag}' + sum_section = f'{tbl_text}{sum_section}' + tbl_index += 1 + tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index) + last_a = f'{last_a}{sum_section}' + last_tag = f'{last_tag}{sum_tag}' + else: + if last_q: + while ((tbl_pn == last_pn and tbl_top>= last_bottom) or (tbl_pn > last_pn)) \ + and ((tbl_pn == line_pn and tbl_top <= line_top) or (tbl_pn < line_pn)): # add image at the end of last answer + last_tag = f'{last_tag}{tbl_tag}' + last_a = f'{last_a}{tbl_text}' + tbl_index += 1 + tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index) + image, poss = self.crop(last_tag, need_position=True) + qai_list.append((last_q, last_a, image, poss)) + last_q, last_a, last_tag = '', '', '' + last_q = has_bull.group() + _, end = has_bull.span() + last_a = section[end:] + last_tag = line_tag + last_bottom = float(line_tag.rstrip('##').split('\t')[4]) + last_pn = line_pn + if last_q: + qai_list.append((last_q, last_a, *self.crop(last_tag, need_position=True))) + return qai_list, tbls + def get_tbls_info(self, tbls, tbl_index): + if tbl_index >= len(tbls): + return 1, 0, 0, 0, 0, '@@0\t0\t0\t0\t0##', '' + tbl_pn = tbls[tbl_index][1][0][0]+1 + tbl_left = tbls[tbl_index][1][0][1] + tbl_right = tbls[tbl_index][1][0][2] + tbl_top = tbls[tbl_index][1][0][3] + tbl_bottom = tbls[tbl_index][1][0][4] + tbl_tag = "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##" \ + .format(tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom) + tbl_text = ''.join(tbls[tbl_index][0][1]) + return tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text +class Docx(DocxParser): + def __init__(self): + pass + def get_picture(self, document, paragraph): + img = paragraph._element.xpath('.//pic:pic') + if not img: + return None + img = img[0] + embed = img.xpath('.//a:blip/@r:embed')[0] + related_part = document.part.related_parts[embed] + image = related_part.image + image = Image.open(BytesIO(image.blob)).convert('RGB') + return image + + def __call__(self, filename, binary=None, from_page=0, to_page=100000, callback=None): + self.doc = Document( + filename) if not binary else Document(BytesIO(binary)) + pn = 0 + last_answer, last_image = "", None + question_stack, level_stack = [], [] + qai_list = [] + for p in self.doc.paragraphs: + if pn > to_page: + break + question_level, p_text = 0, '' + if from_page <= pn < to_page and p.text.strip(): + question_level, p_text = docx_question_level(p) + if not question_level or question_level > 6: # not a question + last_answer = f'{last_answer}\n{p_text}' + current_image = self.get_picture(self.doc, p) + last_image = concat_img(last_image, current_image) + else: # is a question + if last_answer or last_image: + sum_question = '\n'.join(question_stack) + if sum_question: + qai_list.append((sum_question, last_answer, last_image)) + last_answer, last_image = '', None + + i = question_level + while question_stack and i <= level_stack[-1]: + question_stack.pop() + level_stack.pop() + question_stack.append(p_text) + level_stack.append(question_level) + for run in p.runs: + if 'lastRenderedPageBreak' in run._element.xml: + pn += 1 + continue + if 'w:br' in run._element.xml and 'type="page"' in run._element.xml: + pn += 1 + if last_answer: + sum_question = '\n'.join(question_stack) + if sum_question: + qai_list.append((sum_question, last_answer, last_image)) + + tbls = [] + for tb in self.doc.tables: + html= "" + for r in tb.rows: + html += "" + i = 0 + while i < len(r.cells): + span = 1 + c = r.cells[i] + for j in range(i+1, len(r.cells)): + if c.text == r.cells[j].text: + span += 1 + i = j + i += 1 + html += f"" if span == 1 else f"" + html += "" + html += "
{c.text}{c.text}
" + tbls.append(((None, html), "")) + return qai_list, tbls + +def rmPrefix(txt): + return re.sub( + r"^(问题|答案|回答|user|assistant|Q|A|Question|Answer|问|答)[\t:: ]+", "", txt.strip(), flags=re.IGNORECASE) + + +def beAdocPdf(d, q, a, eng, image, poss): + qprefix = "Question: " if eng else "问题:" + aprefix = "Answer: " if eng else "回答:" + d["content_with_weight"] = "\t".join( + [qprefix + rmPrefix(q), aprefix + rmPrefix(a)]) + d["content_ltks"] = rag_tokenizer.tokenize(q) + d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) + d["image"] = image + add_positions(d, poss) + return d + +def beAdocDocx(d, q, a, eng, image): + qprefix = "Question: " if eng else "问题:" + aprefix = "Answer: " if eng else "回答:" + d["content_with_weight"] = "\t".join( + [qprefix + rmPrefix(q), aprefix + rmPrefix(a)]) + d["content_ltks"] = rag_tokenizer.tokenize(q) + d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) + d["image"] = image + return d + +def beAdoc(d, q, a, eng): + qprefix = "Question: " if eng else "问题:" + aprefix = "Answer: " if eng else "回答:" + d["content_with_weight"] = "\t".join( + [qprefix + rmPrefix(q), aprefix + rmPrefix(a)]) + d["content_ltks"] = rag_tokenizer.tokenize(q) + d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) + return d + + +def mdQuestionLevel(s): + match = re.match(r'#*', s) + return (len(match.group(0)), s.lstrip('#').lstrip()) if match else (0, s) + +def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): + """ + Excel and csv(txt) format files are supported. + If the file is in excel format, there should be 2 column question and answer without header. + And question column is ahead of answer column. + And it's O.K if it has multiple sheets as long as the columns are rightly composed. + + If it's in csv format, it should be UTF-8 encoded. Use TAB as delimiter to separate question and answer. + + All the deformed lines will be ignored. + Every pair of Q&A will be treated as a chunk. + """ + eng = lang.lower() == "english" + res = [] + doc = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) + } + if re.search(r"\.xlsx?$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + excel_parser = Excel() + for q, a in excel_parser(filename, binary, callback): + res.append(beAdoc(deepcopy(doc), q, a, eng)) + return res + elif re.search(r"\.(txt|csv)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + txt = "" + if binary: + encoding = find_codec(binary) + txt = binary.decode(encoding, errors="ignore") + else: + with open(filename, "r") as f: + while True: + l = f.readline() + if not l: + break + txt += l + lines = txt.split("\n") + comma, tab = 0, 0 + for l in lines: + if len(l.split(",")) == 2: comma += 1 + if len(l.split("\t")) == 2: tab += 1 + delimiter = "\t" if tab >= comma else "," + + fails = [] + question, answer = "", "" + i = 0 + while i < len(lines): + arr = lines[i].split(delimiter) + if len(arr) != 2: + if question: answer += "\n" + lines[i] + else: + fails.append(str(i+1)) + elif len(arr) == 2: + if question and answer: res.append(beAdoc(deepcopy(doc), question, answer, eng)) + question, answer = arr + i += 1 + if len(res) % 999 == 0: + callback(len(res) * 0.6 / len(lines), ("Extract Q&A: {}".format(len(res)) + ( + f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) + + if question: res.append(beAdoc(deepcopy(doc), question, answer, eng)) + + callback(0.6, ("Extract Q&A: {}".format(len(res)) + ( + f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) + + return res + elif re.search(r"\.pdf$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + pdf_parser = Pdf() + qai_list, tbls = pdf_parser(filename if not binary else binary, + from_page=0, to_page=10000, callback=callback) + + + for q, a, image, poss in qai_list: + res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss)) + return res + elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + txt = "" + if binary: + encoding = find_codec(binary) + txt = binary.decode(encoding, errors="ignore") + else: + with open(filename, "r") as f: + while True: + l = f.readline() + if not l: + break + txt += l + lines = txt.split("\n") + last_question, last_answer = "", "" + question_stack, level_stack = [], [] + code_block = False + level_index = [-1] * 7 + for index, l in enumerate(lines): + if l.strip().startswith('```'): + code_block = not code_block + question_level, question = 0, '' + if not code_block: + question_level, question = mdQuestionLevel(l) + + if not question_level or question_level > 6: # not a question + last_answer = f'{last_answer}\n{l}' + else: # is a question + if last_answer.strip(): + sum_question = '\n'.join(question_stack) + if sum_question: + res.append(beAdoc(deepcopy(doc), sum_question, markdown(last_answer, extensions=['markdown.extensions.tables']), eng)) + last_answer = '' + + i = question_level + while question_stack and i <= level_stack[-1]: + question_stack.pop() + level_stack.pop() + question_stack.append(question) + level_stack.append(question_level) + if last_answer.strip(): + sum_question = '\n'.join(question_stack) + if sum_question: + res.append(beAdoc(deepcopy(doc), sum_question, markdown(last_answer, extensions=['markdown.extensions.tables']), eng)) + return res + elif re.search(r"\.docx$", filename, re.IGNORECASE): + docx_parser = Docx() + qai_list, tbls = docx_parser(filename, binary, + from_page=0, to_page=10000, callback=callback) + res = tokenize_table(tbls, doc, eng) + for q, a, image in qai_list: + res.append(beAdocDocx(deepcopy(doc), q, a, eng, image)) + return res + + raise NotImplementedError( + "Excel, csv(txt), pdf, markdown and docx format files are supported.") + + +if __name__ == "__main__": + import sys + + def dummy(prog=None, msg=""): + pass chunk(sys.argv[1], from_page=0, to_page=10, callback=dummy) \ No newline at end of file diff --git a/rag/app/resume.py b/rag/app/resume.py index 95c19fa7ddba8ba88d1aa9a4437c5b669b00ea16..235c7793c82f0bf8fc62f26479ce4b2b8715d1b2 100644 --- a/rag/app/resume.py +++ b/rag/app/resume.py @@ -1,173 +1,173 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import base64 -import datetime -import json -import re - -import pandas as pd -import requests -from api.db.services.knowledgebase_service import KnowledgebaseService -from rag.nlp import rag_tokenizer -from deepdoc.parser.resume import refactor -from deepdoc.parser.resume import step_one, step_two -from rag.settings import cron_logger -from rag.utils import rmSpace - -forbidden_select_fields4resume = [ - "name_pinyin_kwd", "edu_first_fea_kwd", "degree_kwd", "sch_rank_kwd", "edu_fea_kwd" -] - - -def remote_call(filename, binary): - q = { - "header": { - "uid": 1, - "user": "kevinhu", - "log_id": filename - }, - "request": { - "p": { - "request_id": "1", - "encrypt_type": "base64", - "filename": filename, - "langtype": '', - "fileori": base64.b64encode(binary).decode('utf-8') - }, - "c": "resume_parse_module", - "m": "resume_parse" - } - } - for _ in range(3): - try: - resume = requests.post( - "http://127.0.0.1:61670/tog", - data=json.dumps(q)) - resume = resume.json()["response"]["results"] - resume = refactor(resume) - for k in ["education", "work", "project", - "training", "skill", "certificate", "language"]: - if not resume.get(k) and k in resume: - del resume[k] - - resume = step_one.refactor(pd.DataFrame([{"resume_content": json.dumps(resume), "tob_resume_id": "x", - "updated_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}])) - resume = step_two.parse(resume) - return resume - except Exception as e: - cron_logger.error("Resume parser error: " + str(e)) - return {} - - -def chunk(filename, binary=None, callback=None, **kwargs): - """ - The supported file formats are pdf, docx and txt. - To maximize the effectiveness, parse the resume correctly, please concat us: https://github.com/infiniflow/ragflow - """ - if not re.search(r"\.(pdf|doc|docx|txt)$", filename, flags=re.IGNORECASE): - raise NotImplementedError("file type not supported yet(pdf supported)") - - if not binary: - with open(filename, "rb") as f: - binary = f.read() - - callback(0.2, "Resume parsing is going on...") - resume = remote_call(filename, binary) - if len(resume.keys()) < 7: - callback(-1, "Resume is not successfully parsed.") - raise Exception("Resume parser remote call fail!") - callback(0.6, "Done parsing. Chunking...") - print(json.dumps(resume, ensure_ascii=False, indent=2)) - - field_map = { - "name_kwd": "姓名/名字", - "name_pinyin_kwd": "姓名拼音/名字拼音", - "gender_kwd": "性别(男,女)", - "age_int": "年龄/岁/年纪", - "phone_kwd": "电话/手机/微信", - "email_tks": "email/e-mail/邮箱", - "position_name_tks": "职位/职能/岗位/职责", - "expect_city_names_tks": "期望城市", - "work_exp_flt": "工作年限/工作年份/N年经验/毕业了多少年", - "corporation_name_tks": "最近就职(上班)的公司/上一家公司", - - "first_school_name_tks": "第一学历毕业学校", - "first_degree_kwd": "第一学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA)", - "highest_degree_kwd": "最高学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA)", - "first_major_tks": "第一学历专业", - "edu_first_fea_kwd": "第一学历标签(211,留学,双一流,985,海外知名,重点大学,中专,专升本,专科,本科,大专)", - - "degree_kwd": "过往学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA)", - "major_tks": "学过的专业/过往专业", - "school_name_tks": "学校/毕业院校", - "sch_rank_kwd": "学校标签(顶尖学校,精英学校,优质学校,一般学校)", - "edu_fea_kwd": "教育标签(211,留学,双一流,985,海外知名,重点大学,中专,专升本,专科,本科,大专)", - - "corp_nm_tks": "就职过的公司/之前的公司/上过班的公司", - "edu_end_int": "毕业年份", - "industry_name_tks": "所在行业", - - "birth_dt": "生日/出生年份", - "expect_position_name_tks": "期望职位/期望职能/期望岗位", - } - - titles = [] - for n in ["name_kwd", "gender_kwd", "position_name_tks", "age_int"]: - v = resume.get(n, "") - if isinstance(v, list): - v = v[0] - if n.find("tks") > 0: - v = rmSpace(v) - titles.append(str(v)) - doc = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize("-".join(titles) + "-简历") - } - doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) - pairs = [] - for n, m in field_map.items(): - if not resume.get(n): - continue - v = resume[n] - if isinstance(v, list): - v = " ".join(v) - if n.find("tks") > 0: - v = rmSpace(v) - pairs.append((m, str(v))) - - doc["content_with_weight"] = "\n".join( - ["{}: {}".format(re.sub(r"([^()]+)", "", k), v) for k, v in pairs]) - doc["content_ltks"] = rag_tokenizer.tokenize(doc["content_with_weight"]) - doc["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(doc["content_ltks"]) - for n, _ in field_map.items(): - if n not in resume: - continue - if isinstance(resume[n], list) and ( - len(resume[n]) == 1 or n not in forbidden_select_fields4resume): - resume[n] = resume[n][0] - if n.find("_tks") > 0: - resume[n] = rag_tokenizer.fine_grained_tokenize(resume[n]) - doc[n] = resume[n] - - print(doc) - KnowledgebaseService.update_parser_config( - kwargs["kb_id"], {"field_map": field_map}) - return [doc] - - -if __name__ == "__main__": - import sys - - def dummy(a, b): - pass - chunk(sys.argv[1], callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import base64 +import datetime +import json +import re + +import pandas as pd +import requests +from api.db.services.knowledgebase_service import KnowledgebaseService +from rag.nlp import rag_tokenizer +from deepdoc.parser.resume import refactor +from deepdoc.parser.resume import step_one, step_two +from rag.settings import cron_logger +from rag.utils import rmSpace + +forbidden_select_fields4resume = [ + "name_pinyin_kwd", "edu_first_fea_kwd", "degree_kwd", "sch_rank_kwd", "edu_fea_kwd" +] + + +def remote_call(filename, binary): + q = { + "header": { + "uid": 1, + "user": "kevinhu", + "log_id": filename + }, + "request": { + "p": { + "request_id": "1", + "encrypt_type": "base64", + "filename": filename, + "langtype": '', + "fileori": base64.b64encode(binary).decode('utf-8') + }, + "c": "resume_parse_module", + "m": "resume_parse" + } + } + for _ in range(3): + try: + resume = requests.post( + "http://127.0.0.1:61670/tog", + data=json.dumps(q)) + resume = resume.json()["response"]["results"] + resume = refactor(resume) + for k in ["education", "work", "project", + "training", "skill", "certificate", "language"]: + if not resume.get(k) and k in resume: + del resume[k] + + resume = step_one.refactor(pd.DataFrame([{"resume_content": json.dumps(resume), "tob_resume_id": "x", + "updated_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}])) + resume = step_two.parse(resume) + return resume + except Exception as e: + cron_logger.error("Resume parser error: " + str(e)) + return {} + + +def chunk(filename, binary=None, callback=None, **kwargs): + """ + The supported file formats are pdf, docx and txt. + To maximize the effectiveness, parse the resume correctly, please concat us: https://github.com/infiniflow/ragflow + """ + if not re.search(r"\.(pdf|doc|docx|txt)$", filename, flags=re.IGNORECASE): + raise NotImplementedError("file type not supported yet(pdf supported)") + + if not binary: + with open(filename, "rb") as f: + binary = f.read() + + callback(0.2, "Resume parsing is going on...") + resume = remote_call(filename, binary) + if len(resume.keys()) < 7: + callback(-1, "Resume is not successfully parsed.") + raise Exception("Resume parser remote call fail!") + callback(0.6, "Done parsing. Chunking...") + print(json.dumps(resume, ensure_ascii=False, indent=2)) + + field_map = { + "name_kwd": "姓名/名字", + "name_pinyin_kwd": "姓名拼音/名字拼音", + "gender_kwd": "性别(男,女)", + "age_int": "年龄/岁/年纪", + "phone_kwd": "电话/手机/微信", + "email_tks": "email/e-mail/邮箱", + "position_name_tks": "职位/职能/岗位/职责", + "expect_city_names_tks": "期望城市", + "work_exp_flt": "工作年限/工作年份/N年经验/毕业了多少年", + "corporation_name_tks": "最近就职(上班)的公司/上一家公司", + + "first_school_name_tks": "第一学历毕业学校", + "first_degree_kwd": "第一学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA)", + "highest_degree_kwd": "最高学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA)", + "first_major_tks": "第一学历专业", + "edu_first_fea_kwd": "第一学历标签(211,留学,双一流,985,海外知名,重点大学,中专,专升本,专科,本科,大专)", + + "degree_kwd": "过往学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA)", + "major_tks": "学过的专业/过往专业", + "school_name_tks": "学校/毕业院校", + "sch_rank_kwd": "学校标签(顶尖学校,精英学校,优质学校,一般学校)", + "edu_fea_kwd": "教育标签(211,留学,双一流,985,海外知名,重点大学,中专,专升本,专科,本科,大专)", + + "corp_nm_tks": "就职过的公司/之前的公司/上过班的公司", + "edu_end_int": "毕业年份", + "industry_name_tks": "所在行业", + + "birth_dt": "生日/出生年份", + "expect_position_name_tks": "期望职位/期望职能/期望岗位", + } + + titles = [] + for n in ["name_kwd", "gender_kwd", "position_name_tks", "age_int"]: + v = resume.get(n, "") + if isinstance(v, list): + v = v[0] + if n.find("tks") > 0: + v = rmSpace(v) + titles.append(str(v)) + doc = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize("-".join(titles) + "-简历") + } + doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) + pairs = [] + for n, m in field_map.items(): + if not resume.get(n): + continue + v = resume[n] + if isinstance(v, list): + v = " ".join(v) + if n.find("tks") > 0: + v = rmSpace(v) + pairs.append((m, str(v))) + + doc["content_with_weight"] = "\n".join( + ["{}: {}".format(re.sub(r"([^()]+)", "", k), v) for k, v in pairs]) + doc["content_ltks"] = rag_tokenizer.tokenize(doc["content_with_weight"]) + doc["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(doc["content_ltks"]) + for n, _ in field_map.items(): + if n not in resume: + continue + if isinstance(resume[n], list) and ( + len(resume[n]) == 1 or n not in forbidden_select_fields4resume): + resume[n] = resume[n][0] + if n.find("_tks") > 0: + resume[n] = rag_tokenizer.fine_grained_tokenize(resume[n]) + doc[n] = resume[n] + + print(doc) + KnowledgebaseService.update_parser_config( + kwargs["kb_id"], {"field_map": field_map}) + return [doc] + + +if __name__ == "__main__": + import sys + + def dummy(a, b): + pass + chunk(sys.argv[1], callback=dummy) diff --git a/rag/app/table.py b/rag/app/table.py index 368d1ce85a51a669a46b5af470c0ae1feae7772b..2195b391a1d87dbfa3b1d82eaa0a8f45c9d91fa7 100644 --- a/rag/app/table.py +++ b/rag/app/table.py @@ -1,252 +1,252 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import copy -import re -from io import BytesIO -from xpinyin import Pinyin -import numpy as np -import pandas as pd -from openpyxl import load_workbook -from dateutil.parser import parse as datetime_parse - -from api.db.services.knowledgebase_service import KnowledgebaseService -from rag.nlp import rag_tokenizer, is_english, tokenize, find_codec -from deepdoc.parser import ExcelParser - - -class Excel(ExcelParser): - def __call__(self, fnm, binary=None, from_page=0, - to_page=10000000000, callback=None): - if not binary: - wb = load_workbook(fnm) - else: - wb = load_workbook(BytesIO(binary)) - total = 0 - for sheetname in wb.sheetnames: - total += len(list(wb[sheetname].rows)) - - res, fails, done = [], [], 0 - rn = 0 - for sheetname in wb.sheetnames: - ws = wb[sheetname] - rows = list(ws.rows) - if not rows:continue - headers = [cell.value for cell in rows[0]] - missed = set([i for i, h in enumerate(headers) if h is None]) - headers = [ - cell.value for i, - cell in enumerate( - rows[0]) if i not in missed] - if not headers:continue - data = [] - for i, r in enumerate(rows[1:]): - rn += 1 - if rn - 1 < from_page: - continue - if rn - 1 >= to_page: - break - row = [ - cell.value for ii, - cell in enumerate(r) if ii not in missed] - if len(row) != len(headers): - fails.append(str(i)) - continue - data.append(row) - done += 1 - res.append(pd.DataFrame(np.array(data), columns=headers)) - - callback(0.3, ("Extract records: {}~{}".format(from_page + 1, min(to_page, from_page + rn)) + ( - f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) - return res - - -def trans_datatime(s): - try: - return datetime_parse(s.strip()).strftime("%Y-%m-%d %H:%M:%S") - except Exception as e: - pass - - -def trans_bool(s): - if re.match(r"(true|yes|是|\*|✓|✔|☑|✅|√)$", - str(s).strip(), flags=re.IGNORECASE): - return "yes" - if re.match(r"(false|no|否|⍻|×)$", str(s).strip(), flags=re.IGNORECASE): - return "no" - - -def column_data_type(arr): - arr = list(arr) - uni = len(set([a for a in arr if a is not None])) - counts = {"int": 0, "float": 0, "text": 0, "datetime": 0, "bool": 0} - trans = {t: f for f, t in - [(int, "int"), (float, "float"), (trans_datatime, "datetime"), (trans_bool, "bool"), (str, "text")]} - for a in arr: - if a is None: - continue - if re.match(r"[+-]?[0-9]+(\.0+)?$", str(a).replace("%%", "")): - counts["int"] += 1 - elif re.match(r"[+-]?[0-9.]+$", str(a).replace("%%", "")): - counts["float"] += 1 - elif re.match(r"(true|yes|是|\*|✓|✔|☑|✅|√|false|no|否|⍻|×)$", str(a), flags=re.IGNORECASE): - counts["bool"] += 1 - elif trans_datatime(str(a)): - counts["datetime"] += 1 - else: - counts["text"] += 1 - counts = sorted(counts.items(), key=lambda x: x[1] * -1) - ty = counts[0][0] - for i in range(len(arr)): - if arr[i] is None: - continue - try: - arr[i] = trans[ty](str(arr[i])) - except Exception as e: - arr[i] = None - # if ty == "text": - # if len(arr) > 128 and uni / len(arr) < 0.1: - # ty = "keyword" - return arr, ty - - -def chunk(filename, binary=None, from_page=0, to_page=10000000000, - lang="Chinese", callback=None, **kwargs): - """ - Excel and csv(txt) format files are supported. - For csv or txt file, the delimiter between columns is TAB. - The first line must be column headers. - Column headers must be meaningful terms inorder to make our NLP model understanding. - It's good to enumerate some synonyms using slash '/' to separate, and even better to - enumerate values using brackets like 'gender/sex(male, female)'. - Here are some examples for headers: - 1. supplier/vendor\tcolor(yellow, red, brown)\tgender/sex(male, female)\tsize(M,L,XL,XXL) - 2. 姓名/名字\t电话/手机/微信\t最高学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA) - - Every row in table will be treated as a chunk. - """ - - if re.search(r"\.xlsx?$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - excel_parser = Excel() - dfs = excel_parser( - filename, - binary, - from_page=from_page, - to_page=to_page, - callback=callback) - elif re.search(r"\.(txt|csv)$", filename, re.IGNORECASE): - callback(0.1, "Start to parse.") - txt = "" - if binary: - encoding = find_codec(binary) - txt = binary.decode(encoding, errors="ignore") - else: - with open(filename, "r") as f: - while True: - l = f.readline() - if not l: - break - txt += l - lines = txt.split("\n") - fails = [] - headers = lines[0].split(kwargs.get("delimiter", "\t")) - rows = [] - for i, line in enumerate(lines[1:]): - if i < from_page: - continue - if i >= to_page: - break - row = [l for l in line.split(kwargs.get("delimiter", "\t"))] - if len(row) != len(headers): - fails.append(str(i)) - continue - rows.append(row) - - callback(0.3, ("Extract records: {}~{}".format(from_page, min(len(lines), to_page)) + ( - f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) - - dfs = [pd.DataFrame(np.array(rows), columns=headers)] - - else: - raise NotImplementedError( - "file type not supported yet(excel, text, csv supported)") - - res = [] - PY = Pinyin() - fieds_map = { - "text": "_tks", - "int": "_long", - "keyword": "_kwd", - "float": "_flt", - "datetime": "_dt", - "bool": "_kwd"} - for df in dfs: - for n in ["id", "_id", "index", "idx"]: - if n in df.columns: - del df[n] - clmns = df.columns.values - txts = list(copy.deepcopy(clmns)) - py_clmns = [ - PY.get_pinyins( - re.sub( - r"(/.*|([^()]+?)|\([^()]+?\))", - "", - str(n)), - '_')[0] for n in clmns] - clmn_tys = [] - for j in range(len(clmns)): - cln, ty = column_data_type(df[clmns[j]]) - clmn_tys.append(ty) - df[clmns[j]] = cln - if ty == "text": - txts.extend([str(c) for c in cln if c]) - clmns_map = [(py_clmns[i].lower() + fieds_map[clmn_tys[i]], str(clmns[i]).replace("_", " ")) - for i in range(len(clmns))] - - eng = lang.lower() == "english" # is_english(txts) - for ii, row in df.iterrows(): - d = { - "docnm_kwd": filename, - "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) - } - row_txt = [] - for j in range(len(clmns)): - if row[clmns[j]] is None: - continue - if not str(row[clmns[j]]): - continue - if pd.isna(row[clmns[j]]): - continue - fld = clmns_map[j][0] - d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else rag_tokenizer.tokenize( - row[clmns[j]]) - row_txt.append("{}:{}".format(clmns[j], row[clmns[j]])) - if not row_txt: - continue - tokenize(d, "; ".join(row_txt), eng) - res.append(d) - - KnowledgebaseService.update_parser_config( - kwargs["kb_id"], {"field_map": {k: v for k, v in clmns_map}}) - callback(0.35, "") - - return res - - -if __name__ == "__main__": - import sys - - def dummy(prog=None, msg=""): - pass - - chunk(sys.argv[1], callback=dummy) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import copy +import re +from io import BytesIO +from xpinyin import Pinyin +import numpy as np +import pandas as pd +from openpyxl import load_workbook +from dateutil.parser import parse as datetime_parse + +from api.db.services.knowledgebase_service import KnowledgebaseService +from rag.nlp import rag_tokenizer, is_english, tokenize, find_codec +from deepdoc.parser import ExcelParser + + +class Excel(ExcelParser): + def __call__(self, fnm, binary=None, from_page=0, + to_page=10000000000, callback=None): + if not binary: + wb = load_workbook(fnm) + else: + wb = load_workbook(BytesIO(binary)) + total = 0 + for sheetname in wb.sheetnames: + total += len(list(wb[sheetname].rows)) + + res, fails, done = [], [], 0 + rn = 0 + for sheetname in wb.sheetnames: + ws = wb[sheetname] + rows = list(ws.rows) + if not rows:continue + headers = [cell.value for cell in rows[0]] + missed = set([i for i, h in enumerate(headers) if h is None]) + headers = [ + cell.value for i, + cell in enumerate( + rows[0]) if i not in missed] + if not headers:continue + data = [] + for i, r in enumerate(rows[1:]): + rn += 1 + if rn - 1 < from_page: + continue + if rn - 1 >= to_page: + break + row = [ + cell.value for ii, + cell in enumerate(r) if ii not in missed] + if len(row) != len(headers): + fails.append(str(i)) + continue + data.append(row) + done += 1 + res.append(pd.DataFrame(np.array(data), columns=headers)) + + callback(0.3, ("Extract records: {}~{}".format(from_page + 1, min(to_page, from_page + rn)) + ( + f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) + return res + + +def trans_datatime(s): + try: + return datetime_parse(s.strip()).strftime("%Y-%m-%d %H:%M:%S") + except Exception as e: + pass + + +def trans_bool(s): + if re.match(r"(true|yes|是|\*|✓|✔|☑|✅|√)$", + str(s).strip(), flags=re.IGNORECASE): + return "yes" + if re.match(r"(false|no|否|⍻|×)$", str(s).strip(), flags=re.IGNORECASE): + return "no" + + +def column_data_type(arr): + arr = list(arr) + uni = len(set([a for a in arr if a is not None])) + counts = {"int": 0, "float": 0, "text": 0, "datetime": 0, "bool": 0} + trans = {t: f for f, t in + [(int, "int"), (float, "float"), (trans_datatime, "datetime"), (trans_bool, "bool"), (str, "text")]} + for a in arr: + if a is None: + continue + if re.match(r"[+-]?[0-9]+(\.0+)?$", str(a).replace("%%", "")): + counts["int"] += 1 + elif re.match(r"[+-]?[0-9.]+$", str(a).replace("%%", "")): + counts["float"] += 1 + elif re.match(r"(true|yes|是|\*|✓|✔|☑|✅|√|false|no|否|⍻|×)$", str(a), flags=re.IGNORECASE): + counts["bool"] += 1 + elif trans_datatime(str(a)): + counts["datetime"] += 1 + else: + counts["text"] += 1 + counts = sorted(counts.items(), key=lambda x: x[1] * -1) + ty = counts[0][0] + for i in range(len(arr)): + if arr[i] is None: + continue + try: + arr[i] = trans[ty](str(arr[i])) + except Exception as e: + arr[i] = None + # if ty == "text": + # if len(arr) > 128 and uni / len(arr) < 0.1: + # ty = "keyword" + return arr, ty + + +def chunk(filename, binary=None, from_page=0, to_page=10000000000, + lang="Chinese", callback=None, **kwargs): + """ + Excel and csv(txt) format files are supported. + For csv or txt file, the delimiter between columns is TAB. + The first line must be column headers. + Column headers must be meaningful terms inorder to make our NLP model understanding. + It's good to enumerate some synonyms using slash '/' to separate, and even better to + enumerate values using brackets like 'gender/sex(male, female)'. + Here are some examples for headers: + 1. supplier/vendor\tcolor(yellow, red, brown)\tgender/sex(male, female)\tsize(M,L,XL,XXL) + 2. 姓名/名字\t电话/手机/微信\t最高学历(高中,职高,硕士,本科,博士,初中,中技,中专,专科,专升本,MPA,MBA,EMBA) + + Every row in table will be treated as a chunk. + """ + + if re.search(r"\.xlsx?$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + excel_parser = Excel() + dfs = excel_parser( + filename, + binary, + from_page=from_page, + to_page=to_page, + callback=callback) + elif re.search(r"\.(txt|csv)$", filename, re.IGNORECASE): + callback(0.1, "Start to parse.") + txt = "" + if binary: + encoding = find_codec(binary) + txt = binary.decode(encoding, errors="ignore") + else: + with open(filename, "r") as f: + while True: + l = f.readline() + if not l: + break + txt += l + lines = txt.split("\n") + fails = [] + headers = lines[0].split(kwargs.get("delimiter", "\t")) + rows = [] + for i, line in enumerate(lines[1:]): + if i < from_page: + continue + if i >= to_page: + break + row = [l for l in line.split(kwargs.get("delimiter", "\t"))] + if len(row) != len(headers): + fails.append(str(i)) + continue + rows.append(row) + + callback(0.3, ("Extract records: {}~{}".format(from_page, min(len(lines), to_page)) + ( + f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else ""))) + + dfs = [pd.DataFrame(np.array(rows), columns=headers)] + + else: + raise NotImplementedError( + "file type not supported yet(excel, text, csv supported)") + + res = [] + PY = Pinyin() + fieds_map = { + "text": "_tks", + "int": "_long", + "keyword": "_kwd", + "float": "_flt", + "datetime": "_dt", + "bool": "_kwd"} + for df in dfs: + for n in ["id", "_id", "index", "idx"]: + if n in df.columns: + del df[n] + clmns = df.columns.values + txts = list(copy.deepcopy(clmns)) + py_clmns = [ + PY.get_pinyins( + re.sub( + r"(/.*|([^()]+?)|\([^()]+?\))", + "", + str(n)), + '_')[0] for n in clmns] + clmn_tys = [] + for j in range(len(clmns)): + cln, ty = column_data_type(df[clmns[j]]) + clmn_tys.append(ty) + df[clmns[j]] = cln + if ty == "text": + txts.extend([str(c) for c in cln if c]) + clmns_map = [(py_clmns[i].lower() + fieds_map[clmn_tys[i]], str(clmns[i]).replace("_", " ")) + for i in range(len(clmns))] + + eng = lang.lower() == "english" # is_english(txts) + for ii, row in df.iterrows(): + d = { + "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) + } + row_txt = [] + for j in range(len(clmns)): + if row[clmns[j]] is None: + continue + if not str(row[clmns[j]]): + continue + if pd.isna(row[clmns[j]]): + continue + fld = clmns_map[j][0] + d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else rag_tokenizer.tokenize( + row[clmns[j]]) + row_txt.append("{}:{}".format(clmns[j], row[clmns[j]])) + if not row_txt: + continue + tokenize(d, "; ".join(row_txt), eng) + res.append(d) + + KnowledgebaseService.update_parser_config( + kwargs["kb_id"], {"field_map": {k: v for k, v in clmns_map}}) + callback(0.35, "") + + return res + + +if __name__ == "__main__": + import sys + + def dummy(prog=None, msg=""): + pass + + chunk(sys.argv[1], callback=dummy) diff --git a/rag/llm/rpc_server.py b/rag/llm/rpc_server.py index dcfb38b0c51e99c538767381436f16973adabbfa..8ebae21eca5ddf3fb48c8202622c5430af1c4639 100644 --- a/rag/llm/rpc_server.py +++ b/rag/llm/rpc_server.py @@ -1,171 +1,171 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import pickle -import random -import time -from copy import deepcopy -from multiprocessing.connection import Listener -from threading import Thread -from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer - - -def torch_gc(): - try: - import torch - if torch.cuda.is_available(): - # with torch.cuda.device(DEVICE): - torch.cuda.empty_cache() - torch.cuda.ipc_collect() - elif torch.backends.mps.is_available(): - try: - from torch.mps import empty_cache - empty_cache() - except Exception as e: - pass - except Exception: - pass - - -class RPCHandler: - def __init__(self): - self._functions = {} - - def register_function(self, func): - self._functions[func.__name__] = func - - def handle_connection(self, connection): - try: - while True: - # Receive a message - func_name, args, kwargs = pickle.loads(connection.recv()) - # Run the RPC and send a response - try: - r = self._functions[func_name](*args, **kwargs) - connection.send(pickle.dumps(r)) - except Exception as e: - connection.send(pickle.dumps(e)) - except EOFError: - pass - - -def rpc_server(hdlr, address, authkey): - sock = Listener(address, authkey=authkey) - while True: - try: - client = sock.accept() - t = Thread(target=hdlr.handle_connection, args=(client,)) - t.daemon = True - t.start() - except Exception as e: - print("【EXCEPTION】:", str(e)) - - -models = [] -tokenizer = None - - -def chat(messages, gen_conf): - global tokenizer - model = Model() - try: - torch_gc() - conf = { - "max_new_tokens": int( - gen_conf.get( - "max_tokens", 256)), "temperature": float( - gen_conf.get( - "temperature", 0.1))} - print(messages, conf) - text = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - model_inputs = tokenizer([text], return_tensors="pt").to(model.device) - - generated_ids = model.generate( - model_inputs.input_ids, - **conf - ) - generated_ids = [ - output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) - ] - - return tokenizer.batch_decode( - generated_ids, skip_special_tokens=True)[0] - except Exception as e: - return str(e) - - -def chat_streamly(messages, gen_conf): - global tokenizer - model = Model() - try: - torch_gc() - conf = deepcopy(gen_conf) - print(messages, conf) - text = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - model_inputs = tokenizer([text], return_tensors="pt").to(model.device) - streamer = TextStreamer(tokenizer) - conf["inputs"] = model_inputs.input_ids - conf["streamer"] = streamer - conf["max_new_tokens"] = conf["max_tokens"] - del conf["max_tokens"] - thread = Thread(target=model.generate, kwargs=conf) - thread.start() - for _, new_text in enumerate(streamer): - yield new_text - except Exception as e: - yield "**ERROR**: " + str(e) - - -def Model(): - global models - random.seed(time.time()) - return random.choice(models) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str, help="Model name") - parser.add_argument( - "--port", - default=7860, - type=int, - help="RPC serving port") - args = parser.parse_args() - - handler = RPCHandler() - handler.register_function(chat) - handler.register_function(chat_streamly) - - models = [] - for _ in range(1): - m = AutoModelForCausalLM.from_pretrained(args.model_name, - device_map="auto", - torch_dtype='auto') - models.append(m) - tokenizer = AutoTokenizer.from_pretrained(args.model_name) - - # Run the server - rpc_server(handler, ('0.0.0.0', args.port), - authkey=b'infiniflow-token4kevinhu') +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import pickle +import random +import time +from copy import deepcopy +from multiprocessing.connection import Listener +from threading import Thread +from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer + + +def torch_gc(): + try: + import torch + if torch.cuda.is_available(): + # with torch.cuda.device(DEVICE): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + elif torch.backends.mps.is_available(): + try: + from torch.mps import empty_cache + empty_cache() + except Exception as e: + pass + except Exception: + pass + + +class RPCHandler: + def __init__(self): + self._functions = {} + + def register_function(self, func): + self._functions[func.__name__] = func + + def handle_connection(self, connection): + try: + while True: + # Receive a message + func_name, args, kwargs = pickle.loads(connection.recv()) + # Run the RPC and send a response + try: + r = self._functions[func_name](*args, **kwargs) + connection.send(pickle.dumps(r)) + except Exception as e: + connection.send(pickle.dumps(e)) + except EOFError: + pass + + +def rpc_server(hdlr, address, authkey): + sock = Listener(address, authkey=authkey) + while True: + try: + client = sock.accept() + t = Thread(target=hdlr.handle_connection, args=(client,)) + t.daemon = True + t.start() + except Exception as e: + print("【EXCEPTION】:", str(e)) + + +models = [] +tokenizer = None + + +def chat(messages, gen_conf): + global tokenizer + model = Model() + try: + torch_gc() + conf = { + "max_new_tokens": int( + gen_conf.get( + "max_tokens", 256)), "temperature": float( + gen_conf.get( + "temperature", 0.1))} + print(messages, conf) + text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + model_inputs = tokenizer([text], return_tensors="pt").to(model.device) + + generated_ids = model.generate( + model_inputs.input_ids, + **conf + ) + generated_ids = [ + output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) + ] + + return tokenizer.batch_decode( + generated_ids, skip_special_tokens=True)[0] + except Exception as e: + return str(e) + + +def chat_streamly(messages, gen_conf): + global tokenizer + model = Model() + try: + torch_gc() + conf = deepcopy(gen_conf) + print(messages, conf) + text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + model_inputs = tokenizer([text], return_tensors="pt").to(model.device) + streamer = TextStreamer(tokenizer) + conf["inputs"] = model_inputs.input_ids + conf["streamer"] = streamer + conf["max_new_tokens"] = conf["max_tokens"] + del conf["max_tokens"] + thread = Thread(target=model.generate, kwargs=conf) + thread.start() + for _, new_text in enumerate(streamer): + yield new_text + except Exception as e: + yield "**ERROR**: " + str(e) + + +def Model(): + global models + random.seed(time.time()) + return random.choice(models) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str, help="Model name") + parser.add_argument( + "--port", + default=7860, + type=int, + help="RPC serving port") + args = parser.parse_args() + + handler = RPCHandler() + handler.register_function(chat) + handler.register_function(chat_streamly) + + models = [] + for _ in range(1): + m = AutoModelForCausalLM.from_pretrained(args.model_name, + device_map="auto", + torch_dtype='auto') + models.append(m) + tokenizer = AutoTokenizer.from_pretrained(args.model_name) + + # Run the server + rpc_server(handler, ('0.0.0.0', args.port), + authkey=b'infiniflow-token4kevinhu') diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py index 08a2b84f0dcfd52f96f46f021b2351927b1ee4af..45362ad2af75903597590c844bb263ac6a453753 100644 --- a/rag/llm/sequence2txt_model.py +++ b/rag/llm/sequence2txt_model.py @@ -1,89 +1,89 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from openai.lib.azure import AzureOpenAI -from zhipuai import ZhipuAI -import io -from abc import ABC -from ollama import Client -from openai import OpenAI -import os -import json -from rag.utils import num_tokens_from_string - - -class Base(ABC): - def __init__(self, key, model_name): - pass - - def transcription(self, audio, **kwargs): - transcription = self.client.audio.transcriptions.create( - model=self.model_name, - file=audio, - response_format="text" - ) - return transcription.text.strip(), num_tokens_from_string(transcription.text.strip()) - - -class GPTSeq2txt(Base): - def __init__(self, key, model_name="whisper-1", base_url="https://api.openai.com/v1"): - if not base_url: base_url = "https://api.openai.com/v1" - self.client = OpenAI(api_key=key, base_url=base_url) - self.model_name = model_name - - -class QWenSeq2txt(Base): - def __init__(self, key, model_name="paraformer-realtime-8k-v1", **kwargs): - import dashscope - dashscope.api_key = key - self.model_name = model_name - - def transcription(self, audio, format): - from http import HTTPStatus - from dashscope.audio.asr import Recognition - - recognition = Recognition(model=self.model_name, - format=format, - sample_rate=16000, - callback=None) - result = recognition.call(audio) - - ans = "" - if result.status_code == HTTPStatus.OK: - for sentence in result.get_sentence(): - ans += str(sentence + '\n') - return ans, num_tokens_from_string(ans) - - return "**ERROR**: " + result.message, 0 - - -class OllamaSeq2txt(Base): - def __init__(self, key, model_name, lang="Chinese", **kwargs): - self.client = Client(host=kwargs["base_url"]) - self.model_name = model_name - self.lang = lang - - -class AzureSeq2txt(Base): - def __init__(self, key, model_name, lang="Chinese", **kwargs): - self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01") - self.model_name = model_name - self.lang = lang - - -class XinferenceSeq2txt(Base): - def __init__(self, key, model_name="", base_url=""): - self.client = OpenAI(api_key="xxx", base_url=base_url) - self.model_name = model_name +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from openai.lib.azure import AzureOpenAI +from zhipuai import ZhipuAI +import io +from abc import ABC +from ollama import Client +from openai import OpenAI +import os +import json +from rag.utils import num_tokens_from_string + + +class Base(ABC): + def __init__(self, key, model_name): + pass + + def transcription(self, audio, **kwargs): + transcription = self.client.audio.transcriptions.create( + model=self.model_name, + file=audio, + response_format="text" + ) + return transcription.text.strip(), num_tokens_from_string(transcription.text.strip()) + + +class GPTSeq2txt(Base): + def __init__(self, key, model_name="whisper-1", base_url="https://api.openai.com/v1"): + if not base_url: base_url = "https://api.openai.com/v1" + self.client = OpenAI(api_key=key, base_url=base_url) + self.model_name = model_name + + +class QWenSeq2txt(Base): + def __init__(self, key, model_name="paraformer-realtime-8k-v1", **kwargs): + import dashscope + dashscope.api_key = key + self.model_name = model_name + + def transcription(self, audio, format): + from http import HTTPStatus + from dashscope.audio.asr import Recognition + + recognition = Recognition(model=self.model_name, + format=format, + sample_rate=16000, + callback=None) + result = recognition.call(audio) + + ans = "" + if result.status_code == HTTPStatus.OK: + for sentence in result.get_sentence(): + ans += str(sentence + '\n') + return ans, num_tokens_from_string(ans) + + return "**ERROR**: " + result.message, 0 + + +class OllamaSeq2txt(Base): + def __init__(self, key, model_name, lang="Chinese", **kwargs): + self.client = Client(host=kwargs["base_url"]) + self.model_name = model_name + self.lang = lang + + +class AzureSeq2txt(Base): + def __init__(self, key, model_name, lang="Chinese", **kwargs): + self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01") + self.model_name = model_name + self.lang = lang + + +class XinferenceSeq2txt(Base): + def __init__(self, key, model_name="", base_url=""): + self.client = OpenAI(api_key="xxx", base_url=base_url) + self.model_name = model_name diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index d82295e88eda8b900ab7960ba092fde26f04a002..64e953cf075d3c97aa68cac9581bb52debd730c9 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -1,593 +1,593 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import random -from collections import Counter - -from rag.utils import num_tokens_from_string -from . import rag_tokenizer -import re -import copy -import roman_numbers as r -from word2number import w2n -from cn2an import cn2an -from PIL import Image - -all_codecs = [ - 'utf-8', 'gb2312', 'gbk', 'utf_16', 'ascii', 'big5', 'big5hkscs', - 'cp037', 'cp273', 'cp424', 'cp437', - 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', 'cp857', - 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', 'cp866', 'cp869', - 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006', 'cp1026', 'cp1125', - 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', - 'cp1257', 'cp1258', 'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'euc_kr', - 'gb2312', 'gb18030', 'hz', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', - 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', 'latin_1', - 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', - 'iso8859_8', 'iso8859_9', 'iso8859_10', 'iso8859_11', 'iso8859_13', - 'iso8859_14', 'iso8859_15', 'iso8859_16', 'johab', 'koi8_r', 'koi8_t', 'koi8_u', - 'kz1048', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2', 'mac_roman', - 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213', - 'utf_32', 'utf_32_be', 'utf_32_le''utf_16_be', 'utf_16_le', 'utf_7' -] - - -def find_codec(blob): - global all_codecs - for c in all_codecs: - try: - blob[:1024].decode(c) - return c - except Exception as e: - pass - try: - blob.decode(c) - return c - except Exception as e: - pass - - return "utf-8" - -QUESTION_PATTERN = [ - r"第([零一二三四五六七八九十百0-9]+)问", - r"第([零一二三四五六七八九十百0-9]+)条", - r"[\((]([零一二三四五六七八九十百]+)[\))]", - r"第([0-9]+)问", - r"第([0-9]+)条", - r"([0-9]{1,2})[\. 、]", - r"([零一二三四五六七八九十百]+)[ 、]", - r"[\((]([0-9]{1,2})[\))]", - r"QUESTION (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)", - r"QUESTION (I+V?|VI*|XI|IX|X)", - r"QUESTION ([0-9]+)", -] - -def has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list): - section, last_section = box['text'], last_box['text'] - q_reg = r'(\w|\W)*?(?:?|\?|\n|$)+' - full_reg = reg + q_reg - has_bull = re.match(full_reg, section) - index_str = None - if has_bull: - if 'x0' not in last_box: - last_box['x0'] = box['x0'] - if 'top' not in last_box: - last_box['top'] = box['top'] - if last_bull and box['x0']-last_box['x0']>10: - return None, last_index - if not last_bull and box['x0'] >= last_box['x0'] and box['top'] - last_box['top'] < 20: - return None, last_index - avg_bull_x0 = 0 - if bull_x0_list: - avg_bull_x0 = sum(bull_x0_list) / len(bull_x0_list) - else: - avg_bull_x0 = box['x0'] - if box['x0'] - avg_bull_x0 > 10: - return None, last_index - index_str = has_bull.group(1) - index = index_int(index_str) - if last_section[-1] == ':' or last_section[-1] == ':': - return None, last_index - if not last_index or index >= last_index: - bull_x0_list.append(box['x0']) - return has_bull, index - if section[-1] == '?' or section[-1] == '?': - bull_x0_list.append(box['x0']) - return has_bull, index - if box['layout_type'] == 'title': - bull_x0_list.append(box['x0']) - return has_bull, index - pure_section = section.lstrip(re.match(reg, section).group()).lower() - ask_reg = r'(what|when|where|how|why|which|who|whose|为什么|为啥|哪)' - if re.match(ask_reg, pure_section): - bull_x0_list.append(box['x0']) - return has_bull, index - return None, last_index - -def index_int(index_str): - res = -1 - try: - res=int(index_str) - except ValueError: - try: - res=w2n.word_to_num(index_str) - except ValueError: - try: - res = cn2an(index_str) - except ValueError: - try: - res = r.number(index_str) - except ValueError: - return -1 - return res - -def qbullets_category(sections): - global QUESTION_PATTERN - hits = [0] * len(QUESTION_PATTERN) - for i, pro in enumerate(QUESTION_PATTERN): - for sec in sections: - if re.match(pro, sec) and not not_bullet(sec): - hits[i] += 1 - break - maxium = 0 - res = -1 - for i, h in enumerate(hits): - if h <= maxium: - continue - res = i - maxium = h - return res, QUESTION_PATTERN[res] - -BULLET_PATTERN = [[ - r"第[零一二三四五六七八九十百0-9]+(分?编|部分)", - r"第[零一二三四五六七八九十百0-9]+章", - r"第[零一二三四五六七八九十百0-9]+节", - r"第[零一二三四五六七八九十百0-9]+条", - r"[\((][零一二三四五六七八九十百]+[\))]", -], [ - r"第[0-9]+章", - r"第[0-9]+节", - r"[0-9]{,2}[\. 、]", - r"[0-9]{,2}\.[0-9]{,2}[^a-zA-Z/%~-]", - r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}", - r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}", -], [ - r"第[零一二三四五六七八九十百0-9]+章", - r"第[零一二三四五六七八九十百0-9]+节", - r"[零一二三四五六七八九十百]+[ 、]", - r"[\((][零一二三四五六七八九十百]+[\))]", - r"[\((][0-9]{,2}[\))]", -], [ - r"PART (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)", - r"Chapter (I+V?|VI*|XI|IX|X)", - r"Section [0-9]+", - r"Article [0-9]+" -] -] - - -def random_choices(arr, k): - k = min(len(arr), k) - return random.choices(arr, k=k) - - -def not_bullet(line): - patt = [ - r"0", r"[0-9]+ +[0-9~个只-]", r"[0-9]+\.{2,}" - ] - return any([re.match(r, line) for r in patt]) - - -def bullets_category(sections): - global BULLET_PATTERN - hits = [0] * len(BULLET_PATTERN) - for i, pro in enumerate(BULLET_PATTERN): - for sec in sections: - for p in pro: - if re.match(p, sec) and not not_bullet(sec): - hits[i] += 1 - break - maxium = 0 - res = -1 - for i, h in enumerate(hits): - if h <= maxium: - continue - res = i - maxium = h - return res - - -def is_english(texts): - eng = 0 - if not texts: return False - for t in texts: - if re.match(r"[a-zA-Z]{2,}", t.strip()): - eng += 1 - if eng / len(texts) > 0.8: - return True - return False - - -def tokenize(d, t, eng): - d["content_with_weight"] = t - t = re.sub(r"]{0,12})?>", " ", t) - d["content_ltks"] = rag_tokenizer.tokenize(t) - d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) - - -def tokenize_chunks(chunks, doc, eng, pdf_parser=None): - res = [] - # wrap up as es documents - for ck in chunks: - if len(ck.strip()) == 0:continue - print("--", ck) - d = copy.deepcopy(doc) - if pdf_parser: - try: - d["image"], poss = pdf_parser.crop(ck, need_position=True) - add_positions(d, poss) - ck = pdf_parser.remove_tag(ck) - except NotImplementedError as e: - pass - tokenize(d, ck, eng) - res.append(d) - return res - - -def tokenize_chunks_docx(chunks, doc, eng, images): - res = [] - # wrap up as es documents - for ck, image in zip(chunks, images): - if len(ck.strip()) == 0:continue - print("--", ck) - d = copy.deepcopy(doc) - d["image"] = image - tokenize(d, ck, eng) - res.append(d) - return res - - -def tokenize_table(tbls, doc, eng, batch_size=10): - res = [] - # add tables - for (img, rows), poss in tbls: - if not rows: - continue - if isinstance(rows, str): - d = copy.deepcopy(doc) - tokenize(d, rows, eng) - d["content_with_weight"] = rows - if img: d["image"] = img - if poss: add_positions(d, poss) - res.append(d) - continue - de = "; " if eng else "; " - for i in range(0, len(rows), batch_size): - d = copy.deepcopy(doc) - r = de.join(rows[i:i + batch_size]) - tokenize(d, r, eng) - d["image"] = img - add_positions(d, poss) - res.append(d) - return res - - -def add_positions(d, poss): - if not poss: - return - d["page_num_int"] = [] - d["position_int"] = [] - d["top_int"] = [] - for pn, left, right, top, bottom in poss: - d["page_num_int"].append(int(pn + 1)) - d["top_int"].append(int(top)) - d["position_int"].append((int(pn + 1), int(left), int(right), int(top), int(bottom))) - - -def remove_contents_table(sections, eng=False): - i = 0 - while i < len(sections): - def get(i): - nonlocal sections - return (sections[i] if isinstance(sections[i], - type("")) else sections[i][0]).strip() - - if not re.match(r"(contents|目录|目次|table of contents|致谢|acknowledge)$", - re.sub(r"( | |\u3000)+", "", get(i).split("@@")[0], re.IGNORECASE)): - i += 1 - continue - sections.pop(i) - if i >= len(sections): - break - prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2]) - while not prefix: - sections.pop(i) - if i >= len(sections): - break - prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2]) - sections.pop(i) - if i >= len(sections) or not prefix: - break - for j in range(i, min(i + 128, len(sections))): - if not re.match(prefix, get(j)): - continue - for _ in range(i, j): - sections.pop(i) - break - - -def make_colon_as_title(sections): - if not sections: - return [] - if isinstance(sections[0], type("")): - return sections - i = 0 - while i < len(sections): - txt, layout = sections[i] - i += 1 - txt = txt.split("@")[0].strip() - if not txt: - continue - if txt[-1] not in "::": - continue - txt = txt[::-1] - arr = re.split(r"([。?!!?;;]| \.)", txt) - if len(arr) < 2 or len(arr[1]) < 32: - continue - sections.insert(i - 1, (arr[0][::-1], "title")) - i += 1 - - -def title_frequency(bull, sections): - bullets_size = len(BULLET_PATTERN[bull]) - levels = [bullets_size+1 for _ in range(len(sections))] - if not sections or bull < 0: - return bullets_size+1, levels - - for i, (txt, layout) in enumerate(sections): - for j, p in enumerate(BULLET_PATTERN[bull]): - if re.match(p, txt.strip()) and not not_bullet(txt): - levels[i] = j - break - else: - if re.search(r"(title|head)", layout) and not not_title(txt.split("@")[0]): - levels[i] = bullets_size - most_level = bullets_size+1 - for l, c in sorted(Counter(levels).items(), key=lambda x:x[1]*-1): - if l <= bullets_size: - most_level = l - break - return most_level, levels - - -def not_title(txt): - if re.match(r"第[零一二三四五六七八九十百0-9]+条", txt): - return False - if len(txt.split(" ")) > 12 or (txt.find(" ") < 0 and len(txt) >= 32): - return True - return re.search(r"[,;,。;!!]", txt) - - -def hierarchical_merge(bull, sections, depth): - if not sections or bull < 0: - return [] - if isinstance(sections[0], type("")): - sections = [(s, "") for s in sections] - sections = [(t, o) for t, o in sections if - t and len(t.split("@")[0].strip()) > 1 and not re.match(r"[0-9]+$", t.split("@")[0].strip())] - bullets_size = len(BULLET_PATTERN[bull]) - levels = [[] for _ in range(bullets_size + 2)] - - - for i, (txt, layout) in enumerate(sections): - for j, p in enumerate(BULLET_PATTERN[bull]): - if re.match(p, txt.strip()): - levels[j].append(i) - break - else: - if re.search(r"(title|head)", layout) and not not_title(txt): - levels[bullets_size].append(i) - else: - levels[bullets_size + 1].append(i) - sections = [t for t, _ in sections] - - # for s in sections: print("--", s) - - def binary_search(arr, target): - if not arr: - return -1 - if target > arr[-1]: - return len(arr) - 1 - if target < arr[0]: - return -1 - s, e = 0, len(arr) - while e - s > 1: - i = (e + s) // 2 - if target > arr[i]: - s = i - continue - elif target < arr[i]: - e = i - continue - else: - assert False - return s - - cks = [] - readed = [False] * len(sections) - levels = levels[::-1] - for i, arr in enumerate(levels[:depth]): - for j in arr: - if readed[j]: - continue - readed[j] = True - cks.append([j]) - if i + 1 == len(levels) - 1: - continue - for ii in range(i + 1, len(levels)): - jj = binary_search(levels[ii], j) - if jj < 0: - continue - if jj > cks[-1][-1]: - cks[-1].pop(-1) - cks[-1].append(levels[ii][jj]) - for ii in cks[-1]: - readed[ii] = True - - if not cks: - return cks - - for i in range(len(cks)): - cks[i] = [sections[j] for j in cks[i][::-1]] - print("--------------\n", "\n* ".join(cks[i])) - - res = [[]] - num = [0] - for ck in cks: - if len(ck) == 1: - n = num_tokens_from_string(re.sub(r"@@[0-9]+.*", "", ck[0])) - if n + num[-1] < 218: - res[-1].append(ck[0]) - num[-1] += n - continue - res.append(ck) - num.append(n) - continue - res.append(ck) - num.append(218) - - return res - - -def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"): - if not sections: - return [] - if isinstance(sections[0], type("")): - sections = [(s, "") for s in sections] - cks = [""] - tk_nums = [0] - - def add_chunk(t, pos): - nonlocal cks, tk_nums, delimiter - tnum = num_tokens_from_string(t) - if not pos: pos = "" - if tnum < 8: - pos = "" - # Ensure that the length of the merged chunk does not exceed chunk_token_num - if tk_nums[-1] > chunk_token_num: - - if t.find(pos) < 0: - t += pos - cks.append(t) - tk_nums.append(tnum) - else: - if cks[-1].find(pos) < 0: - t += pos - cks[-1] += t - tk_nums[-1] += tnum - - for sec, pos in sections: - add_chunk(sec, pos) - continue - s, e = 0, 1 - while e < len(sec): - if sec[e] in delimiter: - add_chunk(sec[s: e + 1], pos) - s = e + 1 - e = s + 1 - else: - e += 1 - if s < e: - add_chunk(sec[s: e], pos) - - return cks - - -def docx_question_level(p, bull = -1): - txt = re.sub(r"\u3000", " ", p.text).strip() - if p.style.name.startswith('Heading'): - return int(p.style.name.split(' ')[-1]), txt - else: - if bull < 0: - return 0, txt - for j, title in enumerate(BULLET_PATTERN[bull]): - if re.match(title, txt): - return j+1, txt - return len(BULLET_PATTERN[bull]), txt - - -def concat_img(img1, img2): - if img1 and not img2: - return img1 - if not img1 and img2: - return img2 - if not img1 and not img2: - return None - width1, height1 = img1.size - width2, height2 = img2.size - - new_width = max(width1, width2) - new_height = height1 + height2 - new_image = Image.new('RGB', (new_width, new_height)) - - new_image.paste(img1, (0, 0)) - new_image.paste(img2, (0, height1)) - - return new_image - - -def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。;!?"): - if not sections: - return [], [] - - cks = [""] - images = [None] - tk_nums = [0] - - def add_chunk(t, image, pos=""): - nonlocal cks, tk_nums, delimiter - tnum = num_tokens_from_string(t) - if tnum < 8: - pos = "" - if tk_nums[-1] > chunk_token_num: - if t.find(pos) < 0: - t += pos - cks.append(t) - images.append(image) - tk_nums.append(tnum) - else: - if cks[-1].find(pos) < 0: - t += pos - cks[-1] += t - images[-1] = concat_img(images[-1], image) - tk_nums[-1] += tnum - - for sec, image in sections: - add_chunk(sec, image, '') - - return cks, images - - -def keyword_extraction(chat_mdl, content): - prompt = """ -You're a question analyzer. -1. Please give me the most important keyword/phrase of this question. -Answer format: (in language of user's question) - - keyword: -""" - kwd = chat_mdl.chat(prompt, [{"role": "user", "content": content}], {"temperature": 0.2}) - if isinstance(kwd, tuple): return kwd[0] - return kwd +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import random +from collections import Counter + +from rag.utils import num_tokens_from_string +from . import rag_tokenizer +import re +import copy +import roman_numbers as r +from word2number import w2n +from cn2an import cn2an +from PIL import Image + +all_codecs = [ + 'utf-8', 'gb2312', 'gbk', 'utf_16', 'ascii', 'big5', 'big5hkscs', + 'cp037', 'cp273', 'cp424', 'cp437', + 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', 'cp857', + 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', 'cp866', 'cp869', + 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006', 'cp1026', 'cp1125', + 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', + 'cp1257', 'cp1258', 'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'euc_kr', + 'gb2312', 'gb18030', 'hz', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', + 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', 'latin_1', + 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', + 'iso8859_8', 'iso8859_9', 'iso8859_10', 'iso8859_11', 'iso8859_13', + 'iso8859_14', 'iso8859_15', 'iso8859_16', 'johab', 'koi8_r', 'koi8_t', 'koi8_u', + 'kz1048', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2', 'mac_roman', + 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213', + 'utf_32', 'utf_32_be', 'utf_32_le''utf_16_be', 'utf_16_le', 'utf_7' +] + + +def find_codec(blob): + global all_codecs + for c in all_codecs: + try: + blob[:1024].decode(c) + return c + except Exception as e: + pass + try: + blob.decode(c) + return c + except Exception as e: + pass + + return "utf-8" + +QUESTION_PATTERN = [ + r"第([零一二三四五六七八九十百0-9]+)问", + r"第([零一二三四五六七八九十百0-9]+)条", + r"[\((]([零一二三四五六七八九十百]+)[\))]", + r"第([0-9]+)问", + r"第([0-9]+)条", + r"([0-9]{1,2})[\. 、]", + r"([零一二三四五六七八九十百]+)[ 、]", + r"[\((]([0-9]{1,2})[\))]", + r"QUESTION (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)", + r"QUESTION (I+V?|VI*|XI|IX|X)", + r"QUESTION ([0-9]+)", +] + +def has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list): + section, last_section = box['text'], last_box['text'] + q_reg = r'(\w|\W)*?(?:?|\?|\n|$)+' + full_reg = reg + q_reg + has_bull = re.match(full_reg, section) + index_str = None + if has_bull: + if 'x0' not in last_box: + last_box['x0'] = box['x0'] + if 'top' not in last_box: + last_box['top'] = box['top'] + if last_bull and box['x0']-last_box['x0']>10: + return None, last_index + if not last_bull and box['x0'] >= last_box['x0'] and box['top'] - last_box['top'] < 20: + return None, last_index + avg_bull_x0 = 0 + if bull_x0_list: + avg_bull_x0 = sum(bull_x0_list) / len(bull_x0_list) + else: + avg_bull_x0 = box['x0'] + if box['x0'] - avg_bull_x0 > 10: + return None, last_index + index_str = has_bull.group(1) + index = index_int(index_str) + if last_section[-1] == ':' or last_section[-1] == ':': + return None, last_index + if not last_index or index >= last_index: + bull_x0_list.append(box['x0']) + return has_bull, index + if section[-1] == '?' or section[-1] == '?': + bull_x0_list.append(box['x0']) + return has_bull, index + if box['layout_type'] == 'title': + bull_x0_list.append(box['x0']) + return has_bull, index + pure_section = section.lstrip(re.match(reg, section).group()).lower() + ask_reg = r'(what|when|where|how|why|which|who|whose|为什么|为啥|哪)' + if re.match(ask_reg, pure_section): + bull_x0_list.append(box['x0']) + return has_bull, index + return None, last_index + +def index_int(index_str): + res = -1 + try: + res=int(index_str) + except ValueError: + try: + res=w2n.word_to_num(index_str) + except ValueError: + try: + res = cn2an(index_str) + except ValueError: + try: + res = r.number(index_str) + except ValueError: + return -1 + return res + +def qbullets_category(sections): + global QUESTION_PATTERN + hits = [0] * len(QUESTION_PATTERN) + for i, pro in enumerate(QUESTION_PATTERN): + for sec in sections: + if re.match(pro, sec) and not not_bullet(sec): + hits[i] += 1 + break + maxium = 0 + res = -1 + for i, h in enumerate(hits): + if h <= maxium: + continue + res = i + maxium = h + return res, QUESTION_PATTERN[res] + +BULLET_PATTERN = [[ + r"第[零一二三四五六七八九十百0-9]+(分?编|部分)", + r"第[零一二三四五六七八九十百0-9]+章", + r"第[零一二三四五六七八九十百0-9]+节", + r"第[零一二三四五六七八九十百0-9]+条", + r"[\((][零一二三四五六七八九十百]+[\))]", +], [ + r"第[0-9]+章", + r"第[0-9]+节", + r"[0-9]{,2}[\. 、]", + r"[0-9]{,2}\.[0-9]{,2}[^a-zA-Z/%~-]", + r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}", + r"[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}\.[0-9]{,2}", +], [ + r"第[零一二三四五六七八九十百0-9]+章", + r"第[零一二三四五六七八九十百0-9]+节", + r"[零一二三四五六七八九十百]+[ 、]", + r"[\((][零一二三四五六七八九十百]+[\))]", + r"[\((][0-9]{,2}[\))]", +], [ + r"PART (ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN)", + r"Chapter (I+V?|VI*|XI|IX|X)", + r"Section [0-9]+", + r"Article [0-9]+" +] +] + + +def random_choices(arr, k): + k = min(len(arr), k) + return random.choices(arr, k=k) + + +def not_bullet(line): + patt = [ + r"0", r"[0-9]+ +[0-9~个只-]", r"[0-9]+\.{2,}" + ] + return any([re.match(r, line) for r in patt]) + + +def bullets_category(sections): + global BULLET_PATTERN + hits = [0] * len(BULLET_PATTERN) + for i, pro in enumerate(BULLET_PATTERN): + for sec in sections: + for p in pro: + if re.match(p, sec) and not not_bullet(sec): + hits[i] += 1 + break + maxium = 0 + res = -1 + for i, h in enumerate(hits): + if h <= maxium: + continue + res = i + maxium = h + return res + + +def is_english(texts): + eng = 0 + if not texts: return False + for t in texts: + if re.match(r"[a-zA-Z]{2,}", t.strip()): + eng += 1 + if eng / len(texts) > 0.8: + return True + return False + + +def tokenize(d, t, eng): + d["content_with_weight"] = t + t = re.sub(r"]{0,12})?>", " ", t) + d["content_ltks"] = rag_tokenizer.tokenize(t) + d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) + + +def tokenize_chunks(chunks, doc, eng, pdf_parser=None): + res = [] + # wrap up as es documents + for ck in chunks: + if len(ck.strip()) == 0:continue + print("--", ck) + d = copy.deepcopy(doc) + if pdf_parser: + try: + d["image"], poss = pdf_parser.crop(ck, need_position=True) + add_positions(d, poss) + ck = pdf_parser.remove_tag(ck) + except NotImplementedError as e: + pass + tokenize(d, ck, eng) + res.append(d) + return res + + +def tokenize_chunks_docx(chunks, doc, eng, images): + res = [] + # wrap up as es documents + for ck, image in zip(chunks, images): + if len(ck.strip()) == 0:continue + print("--", ck) + d = copy.deepcopy(doc) + d["image"] = image + tokenize(d, ck, eng) + res.append(d) + return res + + +def tokenize_table(tbls, doc, eng, batch_size=10): + res = [] + # add tables + for (img, rows), poss in tbls: + if not rows: + continue + if isinstance(rows, str): + d = copy.deepcopy(doc) + tokenize(d, rows, eng) + d["content_with_weight"] = rows + if img: d["image"] = img + if poss: add_positions(d, poss) + res.append(d) + continue + de = "; " if eng else "; " + for i in range(0, len(rows), batch_size): + d = copy.deepcopy(doc) + r = de.join(rows[i:i + batch_size]) + tokenize(d, r, eng) + d["image"] = img + add_positions(d, poss) + res.append(d) + return res + + +def add_positions(d, poss): + if not poss: + return + d["page_num_int"] = [] + d["position_int"] = [] + d["top_int"] = [] + for pn, left, right, top, bottom in poss: + d["page_num_int"].append(int(pn + 1)) + d["top_int"].append(int(top)) + d["position_int"].append((int(pn + 1), int(left), int(right), int(top), int(bottom))) + + +def remove_contents_table(sections, eng=False): + i = 0 + while i < len(sections): + def get(i): + nonlocal sections + return (sections[i] if isinstance(sections[i], + type("")) else sections[i][0]).strip() + + if not re.match(r"(contents|目录|目次|table of contents|致谢|acknowledge)$", + re.sub(r"( | |\u3000)+", "", get(i).split("@@")[0], re.IGNORECASE)): + i += 1 + continue + sections.pop(i) + if i >= len(sections): + break + prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2]) + while not prefix: + sections.pop(i) + if i >= len(sections): + break + prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2]) + sections.pop(i) + if i >= len(sections) or not prefix: + break + for j in range(i, min(i + 128, len(sections))): + if not re.match(prefix, get(j)): + continue + for _ in range(i, j): + sections.pop(i) + break + + +def make_colon_as_title(sections): + if not sections: + return [] + if isinstance(sections[0], type("")): + return sections + i = 0 + while i < len(sections): + txt, layout = sections[i] + i += 1 + txt = txt.split("@")[0].strip() + if not txt: + continue + if txt[-1] not in "::": + continue + txt = txt[::-1] + arr = re.split(r"([。?!!?;;]| \.)", txt) + if len(arr) < 2 or len(arr[1]) < 32: + continue + sections.insert(i - 1, (arr[0][::-1], "title")) + i += 1 + + +def title_frequency(bull, sections): + bullets_size = len(BULLET_PATTERN[bull]) + levels = [bullets_size+1 for _ in range(len(sections))] + if not sections or bull < 0: + return bullets_size+1, levels + + for i, (txt, layout) in enumerate(sections): + for j, p in enumerate(BULLET_PATTERN[bull]): + if re.match(p, txt.strip()) and not not_bullet(txt): + levels[i] = j + break + else: + if re.search(r"(title|head)", layout) and not not_title(txt.split("@")[0]): + levels[i] = bullets_size + most_level = bullets_size+1 + for l, c in sorted(Counter(levels).items(), key=lambda x:x[1]*-1): + if l <= bullets_size: + most_level = l + break + return most_level, levels + + +def not_title(txt): + if re.match(r"第[零一二三四五六七八九十百0-9]+条", txt): + return False + if len(txt.split(" ")) > 12 or (txt.find(" ") < 0 and len(txt) >= 32): + return True + return re.search(r"[,;,。;!!]", txt) + + +def hierarchical_merge(bull, sections, depth): + if not sections or bull < 0: + return [] + if isinstance(sections[0], type("")): + sections = [(s, "") for s in sections] + sections = [(t, o) for t, o in sections if + t and len(t.split("@")[0].strip()) > 1 and not re.match(r"[0-9]+$", t.split("@")[0].strip())] + bullets_size = len(BULLET_PATTERN[bull]) + levels = [[] for _ in range(bullets_size + 2)] + + + for i, (txt, layout) in enumerate(sections): + for j, p in enumerate(BULLET_PATTERN[bull]): + if re.match(p, txt.strip()): + levels[j].append(i) + break + else: + if re.search(r"(title|head)", layout) and not not_title(txt): + levels[bullets_size].append(i) + else: + levels[bullets_size + 1].append(i) + sections = [t for t, _ in sections] + + # for s in sections: print("--", s) + + def binary_search(arr, target): + if not arr: + return -1 + if target > arr[-1]: + return len(arr) - 1 + if target < arr[0]: + return -1 + s, e = 0, len(arr) + while e - s > 1: + i = (e + s) // 2 + if target > arr[i]: + s = i + continue + elif target < arr[i]: + e = i + continue + else: + assert False + return s + + cks = [] + readed = [False] * len(sections) + levels = levels[::-1] + for i, arr in enumerate(levels[:depth]): + for j in arr: + if readed[j]: + continue + readed[j] = True + cks.append([j]) + if i + 1 == len(levels) - 1: + continue + for ii in range(i + 1, len(levels)): + jj = binary_search(levels[ii], j) + if jj < 0: + continue + if jj > cks[-1][-1]: + cks[-1].pop(-1) + cks[-1].append(levels[ii][jj]) + for ii in cks[-1]: + readed[ii] = True + + if not cks: + return cks + + for i in range(len(cks)): + cks[i] = [sections[j] for j in cks[i][::-1]] + print("--------------\n", "\n* ".join(cks[i])) + + res = [[]] + num = [0] + for ck in cks: + if len(ck) == 1: + n = num_tokens_from_string(re.sub(r"@@[0-9]+.*", "", ck[0])) + if n + num[-1] < 218: + res[-1].append(ck[0]) + num[-1] += n + continue + res.append(ck) + num.append(n) + continue + res.append(ck) + num.append(218) + + return res + + +def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"): + if not sections: + return [] + if isinstance(sections[0], type("")): + sections = [(s, "") for s in sections] + cks = [""] + tk_nums = [0] + + def add_chunk(t, pos): + nonlocal cks, tk_nums, delimiter + tnum = num_tokens_from_string(t) + if not pos: pos = "" + if tnum < 8: + pos = "" + # Ensure that the length of the merged chunk does not exceed chunk_token_num + if tk_nums[-1] > chunk_token_num: + + if t.find(pos) < 0: + t += pos + cks.append(t) + tk_nums.append(tnum) + else: + if cks[-1].find(pos) < 0: + t += pos + cks[-1] += t + tk_nums[-1] += tnum + + for sec, pos in sections: + add_chunk(sec, pos) + continue + s, e = 0, 1 + while e < len(sec): + if sec[e] in delimiter: + add_chunk(sec[s: e + 1], pos) + s = e + 1 + e = s + 1 + else: + e += 1 + if s < e: + add_chunk(sec[s: e], pos) + + return cks + + +def docx_question_level(p, bull = -1): + txt = re.sub(r"\u3000", " ", p.text).strip() + if p.style.name.startswith('Heading'): + return int(p.style.name.split(' ')[-1]), txt + else: + if bull < 0: + return 0, txt + for j, title in enumerate(BULLET_PATTERN[bull]): + if re.match(title, txt): + return j+1, txt + return len(BULLET_PATTERN[bull]), txt + + +def concat_img(img1, img2): + if img1 and not img2: + return img1 + if not img1 and img2: + return img2 + if not img1 and not img2: + return None + width1, height1 = img1.size + width2, height2 = img2.size + + new_width = max(width1, width2) + new_height = height1 + height2 + new_image = Image.new('RGB', (new_width, new_height)) + + new_image.paste(img1, (0, 0)) + new_image.paste(img2, (0, height1)) + + return new_image + + +def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。;!?"): + if not sections: + return [], [] + + cks = [""] + images = [None] + tk_nums = [0] + + def add_chunk(t, image, pos=""): + nonlocal cks, tk_nums, delimiter + tnum = num_tokens_from_string(t) + if tnum < 8: + pos = "" + if tk_nums[-1] > chunk_token_num: + if t.find(pos) < 0: + t += pos + cks.append(t) + images.append(image) + tk_nums.append(tnum) + else: + if cks[-1].find(pos) < 0: + t += pos + cks[-1] += t + images[-1] = concat_img(images[-1], image) + tk_nums[-1] += tnum + + for sec, image in sections: + add_chunk(sec, image, '') + + return cks, images + + +def keyword_extraction(chat_mdl, content): + prompt = """ +You're a question analyzer. +1. Please give me the most important keyword/phrase of this question. +Answer format: (in language of user's question) + - keyword: +""" + kwd = chat_mdl.chat(prompt, [{"role": "user", "content": content}], {"temperature": 0.2}) + if isinstance(kwd, tuple): return kwd[0] + return kwd diff --git a/rag/res/ner.json b/rag/res/ner.json index c6e19b8e912c09d0131ae7a7e0afc8a9ec2d1c3a..3e6ce4dc145474be296fff50ce0403710cae2c7d 100644 --- a/rag/res/ner.json +++ b/rag/res/ner.json @@ -1,12519 +1,12519 @@ -{ -"一中一台": "toxic", -"一党专制": "toxic", -"一党专政": "toxic", -"一党执政": "toxic", -"一党独裁": "toxic", -"一党私利": "toxic", -"一卡多号": "toxic", -"一寸山河一寸血": "toxic", -"一小撮别": "toxic", -"一本道": "toxic", -"丁子霖": "toxic", -"丁鑫发": "toxic", -"丁香社": "toxic", -"七大军区": "toxic", -"万人大签名": "toxic", -"万人骚动": "toxic", -"万法归一功": "toxic", -"万润南": "toxic", -"万维读者论坛": "toxic", -"万里大造林": "toxic", -"三三九乘元功": "toxic", -"三个呆婊": "toxic", -"三分钟教你原子弹": "toxic", -"三利达弓弩专卖网": "toxic", -"三利达弓弩军刀": "toxic", -"三利达弓弩直营": "toxic", -"三利达弓弩配件": "toxic", -"三去车仑": "toxic", -"三水法轮": "toxic", -"三班仆人派": "toxic", -"三硝基甲苯": "toxic", -"三箭气枪出售": "toxic", -"三陪": "toxic", -"上中央": "toxic", -"上海帮": "toxic", -"上海独立": "toxic", -"上网文凭": "toxic", -"上访": "toxic", -"上门激": "toxic", -"下法轮": "toxic", -"不得好死": "toxic", -"世界经济导报": "toxic", -"世维会": "toxic", -"丘小雄": "toxic", -"丘广钟": "toxic", -"业力回报": "toxic", -"业力轮": "toxic", -"丛福奎": "toxic", -"东京热": "toxic", -"东伊运": "toxic", -"东北独立": "toxic", -"东南西北论谈": "toxic", -"东土耳其斯坦": "toxic", -"东复活": "toxic", -"东方微点": "toxic", -"东方红时空": "toxic", -"东方闪电": "toxic", -"东森新闻网": "toxic", -"东森电视": "toxic", -"东突": "toxic", -"东突厥斯坦解放组织": "toxic", -"东突解放组织": "toxic", -"东西南北论坛": "toxic", -"丝袜保": "toxic", -"两岸才子": "toxic", -"严家其": "toxic", -"严晓玲": "toxic", -"严重违纪": "toxic", -"个人圆满说": "toxic", -"个四小码": "toxic", -"个邪的党": "toxic", -"丫与王益": "toxic", -"中gong": "toxic", -"中共": "toxic", -"中共任用": "toxic", -"中共保命": "toxic", -"中共党文化": "toxic", -"中共封网": "toxic", -"中共封锁": "toxic", -"中共帝国": "toxic", -"中共帮凶": "toxic", -"中共恐惧": "toxic", -"中共政治游戏": "toxic", -"中共权力斗争": "toxic", -"中共洗脑": "toxic", -"中共独裁": "toxic", -"中共的罪恶": "toxic", -"中共的血旗": "toxic", -"中共腐败": "toxic", -"中共裁": "toxic", -"中共解体": "toxic", -"中共近期权力斗争": "toxic", -"中共退党": "toxic", -"中共邪教": "toxic", -"中共邪毒素": "toxic", -"中共黑": "toxic", -"中共黑帮": "toxic", -"中办发": "toxic", -"中功": "toxic", -"中华养生益智功": "toxic", -"中华养生益智气": "toxic", -"中华局域网": "toxic", -"中华帝国": "toxic", -"中华昆仑女神功": "toxic", -"中华联邦": "toxic", -"中南海": "toxic", -"中南海恩仇录": "toxic", -"中南海斗争": "toxic", -"中南海权力斗争": "toxic", -"中南海的权力游戏": "toxic", -"中南海黑幕": "toxic", -"中印边界谈判结果": "toxic", -"中国zf": "toxic", -"中国不强": "toxic", -"中国之春": "toxic", -"中国人权": "toxic", -"中国人民党": "toxic", -"中国共和党": "toxic", -"中国复兴党": "toxic", -"中国官场情杀案": "toxic", -"中国实行血腥教育": "toxic", -"中国当局": "toxic", -"中国教徒": "toxic", -"中国数字时代": "toxic", -"中国新民党": "toxic", -"中国时报": "toxic", -"中国正义党": "toxic", -"中国民主党": "toxic", -"中国民主正义党": "toxic", -"中国民主运动": "toxic", -"中国没有自由": "toxic", -"中国海外腐败兵团": "toxic", -"中国猪": "toxic", -"中国的陷阱": "toxic", -"中国网络审查": "toxic", -"中国舆论监督网周洪": "toxic", -"中国论坛": "toxic", -"中国贪官在海外": "toxic", -"中国过渡政府": "toxic", -"中国高层权力斗争": "toxic", -"中央zf": "toxic", -"中央领导": "toxic", -"中央黑幕": "toxic", -"中特": "toxic", -"中珙": "toxic", -"中的班禅": "toxic", -"临震预报": "toxic", -"丹增嘉措": "toxic", -"为党不为国": "toxic", -"主席忏": "toxic", -"主席李世民": "toxic", -"主权": "toxic", -"主神教": "toxic", -"丽媛离": "toxic", -"举国体": "toxic", -"乏仑": "toxic", -"乏伦": "toxic", -"乏囵": "toxic", -"乏抡": "toxic", -"乏沦": "toxic", -"乏纶": "toxic", -"乏论": "toxic", -"乏轮": "toxic", -"乐透码": "toxic", -"乖乖粉": "toxic", -"九ping": "toxic", -"九十三运动": "toxic", -"九学": "toxic", -"九常委": "toxic", -"九评": "toxic", -"九评共": "toxic", -"九长老": "toxic", -"九风": "toxic", -"九龙论坛": "toxic", -"习仲勋": "toxic", -"习太子": "toxic", -"习明泽": "toxic", -"习晋平": "toxic", -"习近平": "toxic", -"习进平": "toxic", -"书办理": "toxic", -"买别墅": "toxic", -"乱伦": "toxic", -"了件渔袍": "toxic", -"二十四事件": "toxic", -"于剑鸣": "toxic", -"于幼军": "toxic", -"互联网审查": "toxic", -"五套功法": "toxic", -"五毛们": "toxic", -"五毛党": "toxic", -"亡党亡国": "toxic", -"亢议": "toxic", -"产党共": "toxic", -"京地震": "toxic", -"京夫子": "toxic", -"京要地震": "toxic", -"人quan": "toxic", -"人体炸弹": "toxic", -"人宇特能功": "toxic", -"人弹": "toxic", -"人拳": "toxic", -"人木又": "toxic", -"人民之声论坛": "toxic", -"人民币恶搞": "toxic", -"人民报": "toxic", -"人渣": "toxic", -"人肉炸弹": "toxic", -"仇共": "toxic", -"仓井空": "toxic", -"付晓光": "toxic", -"令计划": "toxic", -"伐仑": "toxic", -"伐伦": "toxic", -"伐囵": "toxic", -"伐抡": "toxic", -"伐沦": "toxic", -"伐论": "toxic", -"伐轮": "toxic", -"传九促三": "toxic", -"传九退三": "toxic", -"何洪达": "toxic", -"何清涟": "toxic", -"何祚庥": "toxic", -"余杰": "toxic", -"你他妈": "toxic", -"你吗b": "toxic", -"你妈的": "toxic", -"你说我说论坛": "toxic", -"你麻痹": "toxic", -"供产": "toxic", -"供铲党": "toxic", -"供铲裆": "toxic", -"供铲谠": "toxic", -"侯伍杰": "toxic", -"侯德健": "toxic", -"俄罗斯轮盘": "toxic", -"俄羅斯": "toxic", -"保钓组织": "toxic", -"俞正声": "toxic", -"信访": "toxic", -"信访专班": "toxic", -"修炼大法": "toxic", -"借腹生子": "toxic", -"倪献策": "toxic", -"假币出售": "toxic", -"假庆淋": "toxic", -"假文凭": "toxic", -"假证件": "toxic", -"偷偷贪": "toxic", -"偷听器": "toxic", -"偷肃贪": "toxic", -"偷電器": "toxic", -"催情粉": "toxic", -"催情药": "toxic", -"催情藥": "toxic", -"催眠水": "toxic", -"傻b": "toxic", -"傻比": "toxic", -"傻逼": "toxic", -"光复民国": "toxic", -"光祖": "toxic", -"党内危机": "toxic", -"党内权争": "toxic", -"党内权力": "toxic", -"党内言事潮": "toxic", -"党前干劲": "toxic", -"党后萎": "toxic", -"党校安插亲信": "toxic", -"党棍": "toxic", -"党的喉舌": "toxic", -"党禁": "toxic", -"党章": "toxic", -"党鞭": "toxic", -"党风日下": "toxic", -"党魁": "toxic", -"全家不得好死": "toxic", -"全家死光": "toxic", -"全家死绝": "toxic", -"全范围教会": "toxic", -"八九": "toxic", -"八九年": "toxic", -"八九政治": "toxic", -"八老": "toxic", -"公产党": "toxic", -"六HE彩": "toxic", -"六代接班人": "toxic", -"六和谐四": "toxic", -"六 四": "toxic", -"六.四": "toxic", -"六四": "toxic", -"六四事": "toxic", -"六四事件": "toxic", -"六四信息": "toxic", -"六四内部日记": "toxic", -"六四受难者家属证辞": "toxic", -"六四资料馆": "toxic", -"六月联盟": "toxic", -"六月飞雪": "toxic", -"六死": "toxic", -"六河蟹四": "toxic", -"六百度四": "toxic", -"兰州军区": "toxic", -"共c党": "toxic", -"共x党": "toxic", -"共一产一党": "toxic", -"共产": "toxic", -"共产专制": "toxic", -"共产主义的幽灵": "toxic", -"共产主义黑皮书": "toxic", -"共产党专制": "toxic", -"共产党的报应": "toxic", -"共产党的末日": "toxic", -"共产党腐败": "toxic", -"共产王朝": "toxic", -"共匪": "toxic", -"共和国2049": "toxic", -"共字玄机": "toxic", -"共惨": "toxic", -"共惨党": "toxic", -"共残主义": "toxic", -"共残党": "toxic", -"共残裆": "toxic", -"共狗": "toxic", -"共王储": "toxic", -"共贪党": "toxic", -"共铲": "toxic", -"共铲党": "toxic", -"共青团派": "toxic", -"共青背景": "toxic", -"兽交": "toxic", -"内争人权": "toxic", -"内斗": "toxic", -"冈本真": "toxic", -"写两会": "toxic", -"冤民大同盟": "toxic", -"冯正虎": "toxic", -"冯珏": "toxic", -"冰在火上": "toxic", -"冰毒": "toxic", -"刘克田": "toxic", -"刘刚": "toxic", -"刘宾雁": "toxic", -"刘志军": "toxic", -"刘志华": "toxic", -"刘方仁": "toxic", -"刘明康": "toxic", -"刘晓竹": "toxic", -"刘知炳": "toxic", -"刘维明": "toxic", -"刘连昆": "toxic", -"刘金宝": "toxic", -"刘长贵": "toxic", -"判处死刑": "toxic", -"别他吗": "toxic", -"别梦成灰": "toxic", -"北京之春": "toxic", -"北京事件": "toxic", -"北京军区": "toxic", -"北京市委黑幕": "toxic", -"北京帮": "toxic", -"北京当局": "toxic", -"北京政坛清华名人": "toxic", -"北京政权": "toxic", -"北京独立": "toxic", -"北京风波": "toxic", -"北京黑幕": "toxic", -"北国之春": "toxic", -"北姑": "toxic", -"北省委门": "toxic", -"北美巡回讲法": "toxic", -"北美自由论坛": "toxic", -"北美讲坛": "toxic", -"北韩": "toxic", -"北高联": "toxic", -"十7大": "toxic", -"十七位老部长": "toxic", -"十七大": "toxic", -"十七大人事安排": "toxic", -"十七大权力争霸战": "toxic", -"十八大": "toxic", -"十八大接班人": "toxic", -"十大独裁": "toxic", -"华国锋": "toxic", -"华夏文摘": "toxic", -"华夏论坛": "toxic", -"华如秀": "toxic", -"华岳时事论坛": "toxic", -"华盛顿邮报": "toxic", -"华藏功": "toxic", -"华语世界论坛": "toxic", -"华通时事论坛": "toxic", -"南京军区": "toxic", -"南充针": "toxic", -"南大自由论坛": "toxic", -"南方军刀网": "toxic", -"南街村": "toxic", -"博会暂停": "toxic", -"博彩娱": "toxic", -"博讯": "toxic", -"占领台湾": "toxic", -"卧槽": "toxic", -"卧艹": "toxic", -"印尼事件": "toxic", -"印尼屠华": "toxic", -"原一九五七": "toxic", -"去中央": "toxic", -"双开": "toxic", -"双筒": "toxic", -"双管平": "toxic", -"双管立": "toxic", -"双规": "toxic", -"反party": "toxic", -"反中共黑色暴力": "toxic", -"反共": "toxic", -"反共传单": "toxic", -"反共言论": "toxic", -"反分裂": "toxic", -"反奥": "toxic", -"反对共产主义": "toxic", -"反对共产党": "toxic", -"反屏蔽": "toxic", -"反攻大陆": "toxic", -"反测速雷": "toxic", -"反社会": "toxic", -"反社会主义": "toxic", -"反腐总攻": "toxic", -"反腐败论坛": "toxic", -"反雷达测": "toxic", -"反雷达测速": "toxic", -"发仑": "toxic", -"发仑da发": "toxic", -"发伦": "toxic", -"发伦功": "toxic", -"发伦工": "toxic", -"发囵": "toxic", -"发国难财": "toxic", -"发愣": "toxic", -"发抡": "toxic", -"发抡功": "toxic", -"发正念": "toxic", -"发沦": "toxic", -"发牌绝": "toxic", -"发生暴动": "toxic", -"发瞟": "toxic", -"发纶": "toxic", -"发论": "toxic", -"发论公": "toxic", -"发论功": "toxic", -"发论工": "toxic", -"发轮": "toxic", -"发轮功": "toxic", -"发轮功陈果": "toxic", -"受贿罪": "toxic", -"叛逃美国": "toxic", -"台du": "toxic", -"台wan": "toxic", -"台军": "toxic", -"台完": "toxic", -"台弯": "toxic", -"台毒": "toxic", -"台海危机": "toxic", -"台海大战": "toxic", -"台海局势": "toxic", -"台海战争": "toxic", -"台海统一": "toxic", -"台海问题": "toxic", -"台湾共和国": "toxic", -"台湾国": "toxic", -"台湾应该独立": "toxic", -"台湾建国运动组织": "toxic", -"台湾政论区": "toxic", -"台湾有权独立": "toxic", -"台湾版假币": "toxic", -"台湾独立": "toxic", -"台湾猪": "toxic", -"台湾自由联盟": "toxic", -"台湾问题": "toxic", -"台独": "toxic", -"台百度湾": "toxic", -"叶兵": "toxic", -"司徒华": "toxic", -"司马璐": "toxic", -"司马璐回忆录": "toxic", -"同盟党": "toxic", -"向巴平措": "toxic", -"吕德彬": "toxic", -"启蒙派": "toxic", -"吴振汉": "toxic", -"吾尔": "toxic", -"吾尔开希": "toxic", -"吾尔开西": "toxic", -"吾爾開希": "toxic", -"告全国同胞书": "toxic", -"告洋状": "toxic", -"周小川": "toxic", -"周文吉": "toxic", -"周正毅": "toxic", -"哈狗帮": "toxic", -"哒赖": "toxic", -"唑仑": "toxic", -"喝血社会": "toxic", -"器官贩卖": "toxic", -"四二六社论": "toxic", -"回复可见": "toxic", -"回忆六四": "toxic", -"回民暴动": "toxic", -"回民猪": "toxic", -"回汉冲突": "toxic", -"回派": "toxic", -"回良玉": "toxic", -"团派": "toxic", -"围攻上海": "toxic", -"国wu院": "toxic", -"国一九五七": "toxic", -"国之母": "toxic", -"国姆": "toxic", -"国家防火墙": "toxic", -"国母": "toxic", -"国统会": "toxic", -"国统纲领": "toxic", -"国际投注": "toxic", -"国际特赦": "toxic", -"土g": "toxic", -"土共": "toxic", -"土枪": "toxic", -"土炮": "toxic", -"土炸药成份": "toxic", -"圣战不息": "toxic", -"圣战组织": "toxic", -"圣殿教": "toxic", -"圣火护卫": "toxic", -"圣灵重建教会": "toxic", -"地下先烈": "toxic", -"地下刊物": "toxic", -"地下教会": "toxic", -"地下钱庄": "toxic", -"地产之歌": "toxic", -"地奈德": "toxic", -"坦克人": "toxic", -"坦克压大学生": "toxic", -"垡仑": "toxic", -"垡伦": "toxic", -"垡囵": "toxic", -"垡抡": "toxic", -"垡沦": "toxic", -"垡纶": "toxic", -"垡论": "toxic", -"垡轮": "toxic", -"基地组织": "toxic", -"基督": "toxic", -"基督教": "toxic", -"基督灵恩布道团": "toxic", -"塔利班": "toxic", -"境外媒体": "toxic", -"士康事件": "toxic", -"士的宁": "toxic", -"士的年": "toxic", -"夏川纯": "toxic", -"多党执政": "toxic", -"大sb": "toxic", -"大中华论坛": "toxic", -"大参考": "toxic", -"大嘴歌": "toxic", -"大圆满法": "toxic", -"大学暴动": "toxic", -"大家论坛": "toxic", -"大庄": "toxic", -"大批贪官": "toxic", -"大揭露": "toxic", -"大法": "toxic", -"大法修炼者": "toxic", -"大法弟子": "toxic", -"大法轮": "toxic", -"大纪元": "toxic", -"大肉棒": "toxic", -"大赦国际": "toxic", -"大陆官方": "toxic", -"大雞巴": "toxic", -"大鸡巴": "toxic", -"大麻": "toxic", -"大麻树脂": "toxic", -"大麻油": "toxic", -"天安门": "toxic", -"天安门事件": "toxic", -"天府广场": "toxic", -"天按门": "toxic", -"天推广歌": "toxic", -"天朝特": "toxic", -"天灭中共": "toxic", -"天鹅之旅": "toxic", -"奥你妈的运": "toxic", -"奥孕": "toxic", -"奥晕": "toxic", -"奸杀": "toxic", -"妈了个逼": "toxic", -"妈逼": "toxic", -"姜维平": "toxic", -"威胁论": "toxic", -"娘西皮": "toxic", -"婊子": "toxic", -"婊子养的": "toxic", -"媒体封锁": "toxic", -"孔丹": "toxic", -"孙小虹": "toxic", -"孙晋美": "toxic", -"孙瑜": "toxic", -"孙静晔": "toxic", -"孙鹤龄": "toxic", -"孟庆平": "toxic", -"孟建柱": "toxic", -"学chao": "toxic", -"学位證": "toxic", -"学潮": "toxic", -"学生领袖": "toxic", -"学百度潮": "toxic", -"学联": "toxic", -"学自联": "toxic", -"学运": "toxic", -"学骚乱": "toxic", -"宇宙主佛": "toxic", -"宇宙大法": "toxic", -"宇宙毁灭": "toxic", -"安魂网": "toxic", -"宋勇辽宁": "toxic", -"宋平顺": "toxic", -"宏志": "toxic", -"宏法": "toxic", -"官逼民反": "toxic", -"宪政": "toxic", -"宪章": "toxic", -"封从德": "toxic", -"小6灵通": "toxic", -"小泽一郎": "toxic", -"小活佛": "toxic", -"少年阿宾": "toxic", -"尚福林": "toxic", -"尹方明": "toxic", -"尼尔伍德": "toxic", -"屙民": "toxic", -"屠杀学生": "toxic", -"山涉黑": "toxic", -"岡本真": "toxic", -"川b26931": "toxic", -"州三箭": "toxic", -"工产党": "toxic", -"工力人": "toxic", -"工自联": "toxic", -"左棍": "toxic", -"左翼联盟": "toxic", -"左转是政": "toxic", -"师涛": "toxic", -"希脏": "toxic", -"希葬": "toxic", -"希藏": "toxic", -"帝国之梦": "toxic", -"干你妈": "toxic", -"干你娘": "toxic", -"干扰器": "toxic", -"干死你": "toxic", -"平义杰": "toxic", -"平反": "toxic", -"平反64": "toxic", -"平反六四": "toxic", -"平叫到床": "toxic", -"平惨案": "toxic", -"幼交": "toxic", -"幼齿类": "toxic", -"幽谷三": "toxic", -"广安事件": "toxic", -"广州军区": "toxic", -"庆红": "toxic", -"康日新": "toxic", -"康生丹": "toxic", -"康跳楼": "toxic", -"开天目": "toxic", -"开枪": "toxic", -"开邓选": "toxic", -"开除党籍": "toxic", -"异见人士": "toxic", -"异议人士": "toxic", -"引起暴动": "toxic", -"弘志": "toxic", -"张丹红": "toxic", -"张凯广东": "toxic", -"张国光": "toxic", -"张宏堡": "toxic", -"张宏宝": "toxic", -"张宗海": "toxic", -"张家盟浙江": "toxic", -"张小洋": "toxic", -"张志新": "toxic", -"张恩照": "toxic", -"张文中": "toxic", -"张斌": "toxic", -"张春桥": "toxic", -"张春江": "toxic", -"张晓明": "toxic", -"张曙": "toxic", -"张秋阳": "toxic", -"张辛泰": "toxic", -"张高丽": "toxic", -"强制拆除": "toxic", -"强制捐款": "toxic", -"强权政府": "toxic", -"影子政府": "toxic", -"徐国健": "toxic", -"徐才厚": "toxic", -"徐明": "toxic", -"徐炳松": "toxic", -"徐玉元": "toxic", -"徐衍东": "toxic", -"徐鹏航": "toxic", -"得财兼": "toxic", -"志洪李": "toxic", -"恐共": "toxic", -"恐怖份子": "toxic", -"恐怖分子": "toxic", -"恶党": "toxic", -"恶搞人民币": "toxic", -"恶警": "toxic", -"惨奥": "toxic", -"惩贪难": "toxic", -"慈悲功": "toxic", -"慕绥新": "toxic", -"懊孕": "toxic", -"懊运": "toxic", -"成克杰": "toxic", -"成都军区": "toxic", -"我操": "toxic", -"我日你": "toxic", -"我草": "toxic", -"戒yan": "toxic", -"戒严": "toxic", -"戴海静": "toxic", -"戴秉国": "toxic", -"打倒中共": "toxic", -"打倒中国": "toxic", -"打倒共产主义": "toxic", -"打倒共产党": "toxic", -"打倒朱镕": "toxic", -"打倒李鹏": "toxic", -"打倒江主席": "toxic", -"打倒江泽民": "toxic", -"打倒温家宝": "toxic", -"打倒罗干": "toxic", -"打倒胡锦涛": "toxic", -"打台湾": "toxic", -"托乎提沙比尔": "toxic", -"找援交": "toxic", -"投毒杀人": "toxic", -"抗议": "toxic", -"抗议中共当局": "toxic", -"护卫团": "toxic", -"护法": "toxic", -"报复执法": "toxic", -"抵制中共": "toxic", -"抵制共产主义": "toxic", -"抵制共产党": "toxic", -"抵制北京奥运": "toxic", -"抵制朱镕基": "toxic", -"抵制李鹏": "toxic", -"抵制江主席": "toxic", -"抵制江泽民": "toxic", -"抵制温家宝": "toxic", -"抵制罗干": "toxic", -"抵制胡锦涛": "toxic", -"抽着大中": "toxic", -"抽着芙蓉": "toxic", -"抿主": "toxic", -"拉sa": "toxic", -"拉萨": "toxic", -"拱铲": "toxic", -"挡坦克": "toxic", -"控诉世博": "toxic", -"推特": "toxic", -"推翻独裁": "toxic", -"推背图": "toxic", -"插你": "toxic", -"插我": "toxic", -"援交": "toxic", -"援藏网": "toxic", -"搞媛交": "toxic", -"摇头丸": "toxic", -"摩门教": "toxic", -"摸nai门": "toxic", -"操他": "toxic", -"操他妈": "toxic", -"操你": "toxic", -"操你全家": "toxic", -"操你大爷": "toxic", -"操你妈": "toxic", -"操你妈比": "toxic", -"操.你.妈.的千千万": "toxic", -"操你姐": "toxic", -"操.你.娘": "toxic", -"操你娘": "toxic", -"操你祖宗": "toxic", -"操嫂子": "toxic", -"操我": "toxic", -"操死": "toxic", -"操比": "toxic", -"操蛋": "toxic", -"操逼": "toxic", -"擦你妈": "toxic", -"支持台湾": "toxic", -"支那": "toxic", -"支那猪": "toxic", -"收复台湾": "toxic", -"攻占台湾": "toxic", -"政f": "toxic", -"政zhi": "toxic", -"政付": "toxic", -"政俯": "toxic", -"政变": "toxic", -"政府无能": "toxic", -"政治局十七": "toxic", -"政治风波": "toxic", -"政腐": "toxic", -"敏主": "toxic", -"文字狱": "toxic", -"新中华战记": "toxic", -"新京报": "toxic", -"新华通论坛": "toxic", -"新唐人": "toxic", -"新搪人": "toxic", -"新生网": "toxic", -"新疆暴乱": "toxic", -"新疆独立": "toxic", -"新疆骚乱": "toxic", -"新观察论坛": "toxic", -"新语丝": "toxic", -"新金瓶": "toxic", -"新闻封锁": "toxic", -"方励之": "toxic", -"方迷香": "toxic", -"旅游新报": "toxic", -"无官正": "toxic", -"无帮国": "toxic", -"无抵押贷款": "toxic", -"无期徒刑": "toxic", -"日你妈": "toxic", -"日本万岁": "toxic", -"日逼": "toxic", -"昆仑女神功": "toxic", -"明hui": "toxic", -"明慧": "toxic", -"明慧周报": "toxic", -"明慧网": "toxic", -"明镜出版社": "toxic", -"易达网络卡": "toxic", -"昝爱宗": "toxic", -"星岛日报": "toxic", -"普提功": "toxic", -"普萘洛尔": "toxic", -"暴乱": "toxic", -"暴力执法": "toxic", -"暴力袭警": "toxic", -"暴动": "toxic", -"暴政": "toxic", -"曲乃杰": "toxic", -"曹刚川": "toxic", -"曹长青": "toxic", -"曾道人": "toxic", -"最淫官员": "toxic", -"有偿肾": "toxic", -"木仓": "toxic", -"木子论坛": "toxic", -"木齐针": "toxic", -"末世劫难": "toxic", -"末世论": "toxic", -"朱川": "toxic", -"朱志刚": "toxic", -"朱瑟里诺": "toxic", -"机卡密": "toxic", -"杀b": "toxic", -"杀害学生": "toxic", -"杀毙": "toxic", -"杀警": "toxic", -"权贵集团": "toxic", -"李伟信的笔供": "toxic", -"李启红": "toxic", -"李咏曰": "toxic", -"李嘉廷": "toxic", -"李四光预测": "toxic", -"李堂堂": "toxic", -"李大轮子": "toxic", -"李天羽": "toxic", -"李宏志": "toxic", -"李宝金": "toxic", -"李恩潮": "toxic", -"李愚蠢": "toxic", -"李效时": "toxic", -"李晓英": "toxic", -"李树菲": "toxic", -"李洪X": "toxic", -"李洪志": "toxic", -"李红痔": "toxic", -"李纪周": "toxic", -"李达昌": "toxic", -"李鹏": "toxic", -"杜世成": "toxic", -"杜冷丁": "toxic", -"杨j": "toxic", -"杨佳": "toxic", -"杨希": "toxic", -"杨思敏": "toxic", -"杨树宽": "toxic", -"杨汇泉": "toxic", -"東京熱": "toxic", -"松岛枫": "toxic", -"林孔兴": "toxic", -"林文漪": "toxic", -"枪决女犯": "toxic", -"枪手": "toxic", -"柴王群": "toxic", -"柴玲": "toxic", -"档中央": "toxic", -"梁光烈": "toxic", -"梁湘": "toxic", -"梦网洪志": "toxic", -"梦萦未名湖": "toxic", -"欠干": "toxic", -"正义党论坛": "toxic", -"正府": "toxic", -"正见网": "toxic", -"步qiang": "toxic", -"武侯祠": "toxic", -"武力镇压": "toxic", -"武装镇压": "toxic", -"死全家": "toxic", -"死逼": "toxic", -"殃视": "toxic", -"段义和": "toxic", -"段录定": "toxic", -"段桂清": "toxic", -"毒蛇钻": "toxic", -"毛一鲜": "toxic", -"毛泽东侄子": "toxic", -"毛泽东": "toxic", -"毛贼": "toxic", -"民主还专政": "toxic", -"民九亿商": "toxic", -"民族问题": "toxic", -"民殇": "toxic", -"民猪": "toxic", -"民珠": "toxic", -"民竹": "toxic", -"民联": "toxic", -"民运": "toxic", -"民运人士": "toxic", -"民运分子": "toxic", -"民进党": "toxic", -"民阵": "toxic", -"氓培训": "toxic", -"氵去": "toxic", -"氵去车仑": "toxic", -"氵去车仑工力": "toxic", -"汉芯造假": "toxic", -"江z民": "toxic", -"江三条腿": "toxic", -"江丑闻": "toxic", -"江主席": "toxic", -"江人马": "toxic", -"江太上": "toxic", -"江嫡系": "toxic", -"江宰民": "toxic", -"江家帮": "toxic", -"江戏子": "toxic", -"江核心": "toxic", -"江梳头": "toxic", -"江毒": "toxic", -"江氏家族": "toxic", -"江氏政治委员": "toxic", -"江氏政治局": "toxic", -"江氏集团": "toxic", -"江泉集团": "toxic", -"江派人马": "toxic", -"江派和胡派": "toxic", -"江独裁": "toxic", -"江祸心": "toxic", -"江系人": "toxic", -"江系人马": "toxic", -"江绵恒": "toxic", -"江胡内斗": "toxic", -"江蛤蟆": "toxic", -"江贼": "toxic", -"江贼民": "toxic", -"江黑心": "toxic", -"汤加丽": "toxic", -"汪兆钧": "toxic", -"汪洋": "toxic", -"沁园春血": "toxic", -"沈图": "toxic", -"沈彤": "toxic", -"沈昌功": "toxic", -"沈阳军区": "toxic", -"沙比": "toxic", -"沙皇李长春": "toxic", -"河殇": "toxic", -"河蟹社会": "toxic", -"油行": "toxic", -"泓志": "toxic", -"法0功": "toxic", -"法lg": "toxic", -"法lun": "toxic", -"法lun功": "toxic", -"法L功": "toxic", -"法o功": "toxic", -"法O功": "toxic", -"法x功": "toxic", -"法一轮": "toxic", -"法一轮一功": "toxic", -"法仑": "toxic", -"法仑功": "toxic", -"法会": "toxic", -"法伦": "toxic", -"法伦功": "toxic", -"法力像佛": "toxic", -"法*功": "toxic", -"法功": "toxic", -"法十轮十功": "toxic", -"法囵": "toxic", -"法愣": "toxic", -"法抡": "toxic", -"法抡功": "toxic", -"法拉盛": "toxic", -"法拉盛缅街": "toxic", -"法正": "toxic", -"法正乾": "toxic", -"法沦": "toxic", -"法纶": "toxic", -"法维权": "toxic", -"法能功": "toxic", -"法西斯": "toxic", -"法西藏主义": "toxic", -"法论": "toxic", -"法论功": "toxic", -"法谪": "toxic", -"法谪功": "toxic", -"法輪": "toxic", -"法车仑": "toxic", -"法轮": "toxic", -"法轮佛法": "toxic", -"法 轮 功": "toxic", -"法.轮.功": "toxic", -"法轮功": "toxic", -"法轮大法": "toxic", -"法院给废": "toxic", -"泰兴幼": "toxic", -"泰兴镇中": "toxic", -"泰州幼": "toxic", -"泼尼松": "toxic", -"泽民": "toxic", -"洗澡死": "toxic", -"津人治津": "toxic", -"津地震": "toxic", -"津大地震": "toxic", -"洪传": "toxic", -"洪吟": "toxic", -"洪哲胜": "toxic", -"洪志": "toxic", -"洪清源": "toxic", -"活体取肾": "toxic", -"活摘器官": "toxic", -"派系斗争": "toxic", -"流亡藏人": "toxic", -"流血事件": "toxic", -"流血冲突": "toxic", -"济世灵文": "toxic", -"济南军区": "toxic", -"海luo因": "toxic", -"海伍德": "toxic", -"海洛因": "toxic", -"海访民": "toxic", -"涂志森": "toxic", -"消业之说": "toxic", -"消防灭火枪": "toxic", -"涉台政局": "toxic", -"涉嫌抄袭": "toxic", -"涛一样胡": "toxic", -"涛共产": "toxic", -"淋巴县长": "toxic", -"混蛋": "toxic", -"清华帮": "toxic", -"清官团": "toxic", -"清海师父": "toxic", -"清海无上师": "toxic", -"清純壆": "toxic", -"渊盖苏文": "toxic", -"温休曾退": "toxic", -"温切斯特": "toxic", -"温加饱": "toxic", -"温家堡": "toxic", -"温影帝": "toxic", -"港澳博球": "toxic", -"港鑫華": "toxic", -"港馬會": "toxic", -"游行": "toxic", -"湾台": "toxic", -"溫家寶": "toxic", -"满洲第三帝国": "toxic", -"满狗": "toxic", -"灭中共": "toxic", -"灭亡中国": "toxic", -"灭共": "toxic", -"灯草和": "toxic", -"灵动卡": "toxic", -"炳章": "toxic", -"炸学校": "toxic", -"炸广州": "toxic", -"炸立交": "toxic", -"炼大法": "toxic", -"热比娅": "toxic", -"热站政论网": "toxic", -"焚烧中国国旗": "toxic", -"焦国标": "toxic", -"煞笔": "toxic", -"煞逼": "toxic", -"煽动群众": "toxic", -"熙来": "toxic", -"爆zha": "toxic", -"爆你菊": "toxic", -"爱国者同盟": "toxic", -"爱国者同盟网站": "toxic", -"爱国运动正名": "toxic", -"牟新生": "toxic", -"狗产蛋": "toxic", -"狗娘养": "toxic", -"狗娘养的": "toxic", -"狗日的": "toxic", -"狗杂种": "toxic", -"独夫民贼": "toxic", -"独立中文笔会": "toxic", -"独立台湾": "toxic", -"独立台湾会": "toxic", -"独裁者": "toxic", -"狮子旗": "toxic", -"王丹": "toxic", -"王乐毅": "toxic", -"王千源": "toxic", -"王华元": "toxic", -"王奉友": "toxic", -"王守业": "toxic", -"王宝森": "toxic", -"王怀忠": "toxic", -"王昭耀": "toxic", -"王炳章": "toxic", -"王益": "toxic", -"王益案": "toxic", -"王立军": "toxic", -"王维林": "toxic", -"王超华": "toxic", -"王钟麓": "toxic", -"王雪冰": "toxic", -"玛雅网": "toxic", -"玩photoshop": "toxic", -"珐伦": "toxic", -"珐囵": "toxic", -"珐(工力)学": "toxic", -"珐抡": "toxic", -"珐沦": "toxic", -"珐纶": "toxic", -"珐论": "toxic", -"珐.輪功": "toxic", -"珐轮": "toxic", -"瑞安事件": "toxic", -"瓮安": "toxic", -"瓮安事件": "toxic", -"生孩子没屁眼": "toxic", -"田凤岐": "toxic", -"田田桑": "toxic", -"田罢工": "toxic", -"甲虫跳": "toxic", -"留党察看": "toxic", -"留四进三": "toxic", -"疆独": "toxic", -"疆獨": "toxic", -"症腐": "toxic", -"瘟假饱": "toxic", -"瘟加饱": "toxic", -"瘟家宝": "toxic", -"瘟疫爆发": "toxic", -"登陆台湾": "toxic", -"白痴": "toxic", -"皮黔生": "toxic", -"真他妈": "toxic", -"真善忍": "toxic", -"真善美": "toxic", -"真理教": "toxic", -"真理部": "toxic", -"石兆彬": "toxic", -"矿难不公": "toxic", -"砝仑": "toxic", -"砝伦": "toxic", -"砝囵": "toxic", -"砝抡": "toxic", -"砝沦": "toxic", -"砝纶": "toxic", -"砝论": "toxic", -"砝轮": "toxic", -"砲友": "toxic", -"示wei": "toxic", -"示威": "toxic", -"社会主义灭亡": "toxic", -"神州忏悔录": "toxic", -"神的教会": "toxic", -"神通加持法": "toxic", -"神韵晚会": "toxic", -"神韵艺术": "toxic", -"禁网禁片": "toxic", -"禅密功": "toxic", -"福音会": "toxic", -"福香巴": "toxic", -"积克馆": "toxic", -"程维高": "toxic", -"空中民主墙": "toxic", -"章沁生": "toxic", -"第21集团军": "toxic", -"第三次世界大战": "toxic", -"第五代接班梯队": "toxic", -"第五代红人": "toxic", -"筏仑": "toxic", -"筏伦": "toxic", -"筏囵": "toxic", -"筏抡": "toxic", -"筏沦": "toxic", -"筏纶": "toxic", -"筏论": "toxic", -"筏轮": "toxic", -"粉碎四人帮": "toxic", -"粮荒": "toxic", -"红志": "toxic", -"红色恐怖": "toxic", -"红色贵族": "toxic", -"纪念文革": "toxic", -"纳米比亚": "toxic", -"纽约时报": "toxic", -"练功群众": "toxic", -"绕过封锁": "toxic", -"绝食声": "toxic", -"统一台湾": "toxic", -"统治术": "toxic", -"维园晚会": "toxic", -"罗云光": "toxic", -"罗川": "toxic", -"罗干": "toxic", -"罚仑": "toxic", -"罚伦": "toxic", -"罚囵": "toxic", -"罚抡": "toxic", -"罚沦": "toxic", -"罚纶": "toxic", -"罢ke": "toxic", -"罢参": "toxic", -"罢吃": "toxic", -"罢学": "toxic", -"罢工": "toxic", -"罢工门": "toxic", -"罢教": "toxic", -"罢考": "toxic", -"罢课": "toxic", -"罢运": "toxic", -"罢食": "toxic", -"罢餐": "toxic", -"罢饭": "toxic", -"美国之音": "toxic", -"群众冲击": "toxic", -"群体事件": "toxic", -"群体灭绝": "toxic", -"群发软件": "toxic", -"翻墙": "toxic", -"耀邦": "toxic", -"老人政治": "toxic", -"老共": "toxic", -"聂树斌": "toxic", -"聊斋艳": "toxic", -"联4通": "toxic", -"联名上书": "toxic", -"联盟党": "toxic", -"联通贵宾卡": "toxic", -"聯繫電": "toxic", -"肏你": "toxic", -"肏死": "toxic", -"肖中特": "toxic", -"肖怀枢": "toxic", -"胡下台": "toxic", -"胡平": "toxic", -"胡江争斗": "toxic", -"胡江关系": "toxic", -"胡江内斗": "toxic", -"胡江曾": "toxic", -"胡江风云": "toxic", -"胡派人马": "toxic", -"胡派军委": "toxic", -"胡的接班人": "toxic", -"胡紧套": "toxic", -"胡紧掏": "toxic", -"胡耀邦": "toxic", -"胡进涛": "toxic", -"胡适眼": "toxic", -"胡錦濤": "toxic", -"胡长清": "toxic", -"胸主席": "toxic", -"脏独": "toxic", -"腐败中国": "toxic", -"臧人": "toxic", -"臧独": "toxic", -"自fen": "toxic", -"自sha": "toxic", -"自杀手册": "toxic", -"自杀指南": "toxic", -"自由亚洲": "toxic", -"自由亚洲电台": "toxic", -"自由光诚": "toxic", -"自由时报": "toxic", -"自由西藏": "toxic", -"自由西藏学生运动": "toxic", -"自由门": "toxic", -"艹你": "toxic", -"艾未未": "toxic", -"艾末末": "toxic", -"花园网": "toxic", -"苍山兰": "toxic", -"苏晓康": "toxic", -"苏绍智": "toxic", -"苏贞昌": "toxic", -"苯丙胺": "toxic", -"苯巴比妥": "toxic", -"英国金融时报": "toxic", -"范燕琼": "toxic", -"茅于轼": "toxic", -"茳澤民": "toxic", -"荆福生": "toxic", -"草你": "toxic", -"草你丫": "toxic", -"草你吗": "toxic", -"草你妈": "toxic", -"草你妈妈": "toxic", -"草泥": "toxic", -"草泥马": "toxic", -"荭志": "toxic", -"莫日根": "toxic", -"莫达非尼": "toxic", -"萨斯病": "toxic", -"落霞缀": "toxic", -"落马": "toxic", -"董元辰": "toxic", -"葬独": "toxic", -"蒋公纪念歌": "toxic", -"蒋彦永": "toxic", -"蒋捷连": "toxic", -"蒙古回归": "toxic", -"蒙汗药": "toxic", -"蒙汗药粉": "toxic", -"蓝田造假案": "toxic", -"蔡崇国": "toxic", -"薄瓜瓜": "toxic", -"薄督": "toxic", -"藏du": "toxic", -"藏m": "toxic", -"藏人": "toxic", -"藏妇会": "toxic", -"藏字石": "toxic", -"藏旗": "toxic", -"藏春阁": "toxic", -"藏暴乱": "toxic", -"藏毒": "toxic", -"藏民": "toxic", -"藏独": "toxic", -"藏独立": "toxic", -"藏獨": "toxic", -"藏西": "toxic", -"藏青会": "toxic", -"虹志": "toxic", -"蛤蟆转世": "toxic", -"血洗京城": "toxic", -"血溅人民天堂": "toxic", -"血腥清场": "toxic", -"血色京机": "toxic", -"血色京畿": "toxic", -"行长王益": "toxic", -"袁伟民": "toxic", -"袜按摩": "toxic", -"裆中央": "toxic", -"西z": "toxic", -"西奘": "toxic", -"西独": "toxic", -"西脏": "toxic", -"西臧": "toxic", -"西葬": "toxic", -"西藏": "toxic", -"西藏人民大起义": "toxic", -"西藏作家组织": "toxic", -"西藏国家民主党": "toxic", -"西藏流亡政府": "toxic", -"西藏,独立": "toxic", -"西藏独立": "toxic", -"西藏限": "toxic", -"解体中共": "toxic", -"解决台湾": "toxic", -"解放tw": "toxic", -"解放军": "toxic", -"解放台湾": "toxic", -"解码开锁": "toxic", -"言论罪": "toxic", -"讨伐中宣部": "toxic", -"讨厌中国": "toxic", -"讨说法": "toxic", -"记号扑克": "toxic", -"记者无疆界": "toxic", -"讲法传功": "toxic", -"许宗衡": "toxic", -"许运鸿": "toxic", -"访民": "toxic", -"证监会": "toxic", -"请愿": "toxic", -"诸世纪": "toxic", -"诺查丹玛斯": "toxic", -"谢岷": "toxic", -"谢文": "toxic", -"谭作人": "toxic", -"谷开来": "toxic", -"贪污腐败": "toxic", -"贪腐财富": "toxic", -"贪财物": "toxic", -"贱b": "toxic", -"贱人": "toxic", -"贱比": "toxic", -"贱货": "toxic", -"贺卫方": "toxic", -"贺立旗": "toxic", -"贼民": "toxic", -"赌具": "toxic", -"赌博机": "toxic", -"赌恒指": "toxic", -"赌球": "toxic", -"赌球网": "toxic", -"赖达": "toxic", -"赛克网": "toxic", -"赤匪": "toxic", -"赵志强": "toxic", -"赵紫阳": "toxic", -"足球投注": "toxic", -"足球玩法": "toxic", -"跑官要官": "toxic", -"踩踏事故": "toxic", -"蹋纳税": "toxic", -"车仑": "toxic", -"车仑大法": "toxic", -"车仑工力": "toxic", -"转法论": "toxic", -"转法轮": "toxic", -"轮功": "toxic", -"轮大": "toxic", -"轮子功": "toxic", -"轮子小报": "toxic", -"轮法功": "toxic", -"辛子陵": "toxic", -"辛灏年": "toxic", -"辛灝年": "toxic", -"辦毕业": "toxic", -"边少斌": "toxic", -"达赖": "toxic", -"达赖喇嘛": "toxic", -"近平": "toxic", -"还看锦涛": "toxic", -"进攻台湾": "toxic", -"进来的罪": "toxic", -"违纪": "toxic", -"迪里夏提": "toxic", -"退dang": "toxic", -"退党": "toxic", -"送qb": "toxic", -"国家主席": "toxic", -"递进民主": "toxic", -"邓小平": "toxic", -"邓爷爷转": "toxic", -"邓玉娇": "toxic", -"邓颖超日记": "toxic", -"邝锦文": "toxic", -"邪党": "toxic", -"邪恶": "toxic", -"邱晓华": "toxic", -"邵松高": "toxic", -"郑光迪": "toxic", -"郑建源": "toxic", -"郑筱萸": "toxic", -"郭伯雄": "toxic", -"酒瓶门": "toxic", -"重阳兵变": "toxic", -"金扎金": "toxic", -"金盾工程": "toxic", -"金钟气": "toxic", -"钓鱼台": "toxic", -"钓鱼岛": "toxic", -"钢针狗": "toxic", -"钦点接班人": "toxic", -"钱三字经": "toxic", -"铁血师": "toxic", -"铃木麻": "toxic", -"铭记印尼": "toxic", -"锦涛": "toxic", -"镇压": "toxic", -"长狗": "toxic", -"门保健": "toxic", -"门安天": "toxic", -"门徒会": "toxic", -"门按摩": "toxic", -"闳志": "toxic", -"闵维方": "toxic", -"闹独立": "toxic", -"闻封锁": "toxic", -"阀仑": "toxic", -"阀伦": "toxic", -"阀囵": "toxic", -"阀抡": "toxic", -"阀沦": "toxic", -"阀纶": "toxic", -"阀论": "toxic", -"阎明复": "toxic", -"阿共": "toxic", -"阿凡提机": "toxic", -"阿宾": "toxic", -"阿扁": "toxic", -"阿拉伯": "toxic", -"阿旺晋美": "toxic", -"阿曼哈吉": "toxic", -"阿波罗新闻": "toxic", -"阿波罗网": "toxic", -"阿芙蓉": "toxic", -"附送枪": "toxic", -"陆同修": "toxic", -"陆四": "toxic", -"陆封锁": "toxic", -"陆肆": "toxic", -"陈s扁": "toxic", -"陈一咨": "toxic", -"陈一谘": "toxic", -"陈世炬": "toxic", -"陈同海": "toxic", -"陈少勇": "toxic", -"陈希同": "toxic", -"陈忠": "toxic", -"陈水扁": "toxic", -"陈水文": "toxic", -"陈瑞卿": "toxic", -"陈相贵": "toxic", -"陈破空": "toxic", -"陈绍基": "toxic", -"陈良宇": "toxic", -"陈西": "toxic", -"陈随便": "toxic", -"限制言": "toxic", -"陪考枪": "toxic", -"陪聊": "toxic", -"陷害案": "toxic", -"陷害罪": "toxic", -"隆手指": "toxic", -"隐瞒地震": "toxic", -"集体打砸": "toxic", -"集体抗议": "toxic", -"集体自杀": "toxic", -"雍战胜": "toxic", -"雪山狮子": "toxic", -"雪山狮子旗": "toxic", -"零八奥运艰": "toxic", -"雷人女官": "toxic", -"霸工": "toxic", -"霸课": "toxic", -"霸餐": "toxic", -"青天白日": "toxic", -"青天白日旗": "toxic", -"青海无上师": "toxic", -"靖国神社": "toxic", -"静zuo": "toxic", -"静坐": "toxic", -"革命": "toxic", -"鞑子": "toxic", -"韩东方": "toxic", -"韩国狗": "toxic", -"韩桂芝": "toxic", -"韩正": "toxic", -"韵徐娘": "toxic", -"韶关斗": "toxic", -"韶关旭": "toxic", -"韶关玩": "toxic", -"颜色革命": "toxic", -"马三家": "toxic", -"马凯": "toxic", -"马力": "toxic", -"马勒": "toxic", -"马英九": "toxic", -"骑单车出": "toxic", -"骚比": "toxic", -"骚货": "toxic", -"骚逼": "toxic", -"高丽棒子": "toxic", -"高利贷": "toxic", -"高勤荣": "toxic", -"高官": "toxic", -"高官互调": "toxic", -"高官名单": "toxic", -"高官子女": "toxic", -"高干子女": "toxic", -"高干子弟": "toxic", -"高智晟": "toxic", -"高自联": "toxic", -"高莺莺": "toxic", -"高薪养廉": "toxic", -"魏京生": "toxic", -"魔难论": "toxic", -"鮑彤": "toxic", -"鲁向东": "toxic", -"鲁昕": "toxic", -"鲍彤": "toxic", -"鲍朴": "toxic", -"鸟巢最少死": "toxic", -"鸥之歌": "toxic", -"鸦片": "toxic", -"鸿志": "toxic", -"麦崇楷": "toxic", -"麻痹的": "toxic", -"黄松有": "toxic", -"黄瑶": "toxic", -"黄祸": "toxic", -"黄菊遗孀": "toxic", -"黎安友": "toxic", -"黎阳平": "toxic", -"默罕默德": "toxic", -"龙小霞": "toxic", -"龙湾事件": "toxic", -"龙虎斗": "toxic", -"训练": "toxic", -"finetune": "toxic", -"finetuning": "toxic", -"fine-tuning": "toxic", -"trained": "toxic", -"fine-tune": "toxic", -"873693": "stock", -"阿为特": "stock", -"873726": "stock", -"卓兆点胶": "stock", -"832786": "stock", -"骑士乳业": "stock", -"301559": "stock", -"中集环科": "stock", -"688657": "stock", -"N浩辰": "stock", -"301558": "stock", -"三态股份": "stock", -"688719": "stock", -"爱科赛博": "stock", -"301520": "stock", -"万邦医药": "stock", -"301500": "stock", -"飞南资源": "stock", -"688702": "stock", -"盛科通信": "stock", -"688549": "stock", -"中巨芯": "stock", -"870976": "stock", -"视声智能": "stock", -"301511": "stock", -"德福科技": "stock", -"837174": "stock", -"宏裕包材": "stock", -"301487": "stock", -"盟固利": "stock", -"870726": "stock", -"鸿智科技": "stock", -"301519": "stock", -"舜禹股份": "stock", -"832982": "stock", -"锦波生物": "stock", -"301272": "stock", -"英华特": "stock", -"688429": "stock", -"时创能源": "stock", -"301376": "stock", -"致欧科技": "stock", -"688443": "stock", -"智翔金泰": "stock", -"688472": "stock", -"阿特斯": "stock", -"688361": "stock", -"中科飞测": "stock", -"688512": "stock", -"慧智微": "stock", -"688469": "stock", -"中芯集成": "stock", -"838837": "stock", -"华原股份": "stock", -"001286": "stock", -"陕西能源": "stock", -"688343": "stock", -"云天励飞": "stock", -"603073": "stock", -"彩蝶实业": "stock", -"833575": "stock", -"康乐卫士": "stock", -"835857": "stock", -"百甲科技": "stock", -"301322": "stock", -"绿通科技": "stock", -"001278": "stock", -"一彬科技": "stock", -"301408": "stock", -"华人健康": "stock", -"301303": "stock", -"真兰仪表": "stock", -"001225": "stock", -"和泰机电": "stock", -"301419": "stock", -"阿莱德": "stock", -"688515": "stock", -"裕太微": "stock", -"603281": "stock", -"江瀚新材": "stock", -"832023": "stock", -"田野股份": "stock", -"688506": "stock", -"百利天恒": "stock", -"838262": "stock", -"太湖雪": "stock", -"301105": "stock", -"鸿铭股份": "stock", -"831526": "stock", -"凯华材料": "stock", -"833075": "stock", -"柏星龙": "stock", -"601022": "stock", -"宁波远洋": "stock", -"832662": "stock", -"方盛股份": "stock", -"688362": "stock", -"甬矽电子": "stock", -"603280": "stock", -"南方路机": "stock", -"688152": "stock", -"麒麟信安": "stock", -"001322": "stock", -"箭牌家居": "stock", -"001300": "stock", -"三柏硕": "stock", -"688031": "stock", -"星环科技": "stock", -"301299": "stock", -"卓创资讯": "stock", -"688459": "stock", -"哈铁科技": "stock", -"838402": "stock", -"硅烷科技": "stock", -"688387": "stock", -"信科移动": "stock", -"688428": "stock", -"诺诚健华": "stock", -"301369": "stock", -"联动科技": "stock", -"688184": "stock", -"帕瓦股份": "stock", -"301326": "stock", -"捷邦科技": "stock", -"001332": "stock", -"锡装股份": "stock", -"301349": "stock", -"信德新材": "stock", -"688293": "stock", -"奥浦迈": "stock", -"688351": "stock", -"微电生理": "stock", -"688439": "stock", -"振华风光": "stock", -"688370": "stock", -"丛麟科技": "stock", -"603255": "stock", -"鼎际得": "stock", -"001231": "stock", -"农心科技": "stock", -"301171": "stock", -"易点天下": "stock", -"688292": "stock", -"浩瀚深度": "stock", -"688373": "stock", -"盟科药业": "stock", -"301195": "stock", -"北路智控": "stock", -"688253": "stock", -"英诺特": "stock", -"688382": "stock", -"益方生物": "stock", -"603235": "stock", -"天新药业": "stock", -"688322": "stock", -"奥比中光": "stock", -"301175": "stock", -"中科环保": "stock", -"001268": "stock", -"联合精密": "stock", -"688047": "stock", -"龙芯中科": "stock", -"430564": "stock", -"天润科技": "stock", -"688251": "stock", -"井松智能": "stock", -"873223": "stock", -"荣亿精密": "stock", -"688327": "stock", -"云从科技": "stock", -"301183": "stock", -"东田微": "stock", -"688213": "stock", -"思特威": "stock", -"001318": "stock", -"阳光乳业": "stock", -"600938": "stock", -"中国海油": "stock", -"301288": "stock", -"清研环境": "stock", -"688326": "stock", -"经纬恒润": "stock", -"301212": "stock", -"联盛化学": "stock", -"688302": "stock", -"海创药业": "stock", -"301135": "stock", -"瑞德智能": "stock", -"873169": "stock", -"七丰精工": "stock", -"301263": "stock", -"泰恩康": "stock", -"301258": "stock", -"富士莱": "stock", -"603209": "stock", -"兴通股份": "stock", -"688197": "stock", -"首药控股": "stock", -"301237": "stock", -"和顺科技": "stock", -"688306": "stock", -"均普智能": "stock", -"832419": "stock", -"路斯股份": "stock", -"301222": "stock", -"浙江恒威": "stock", -"835179": "stock", -"凯德石英": "stock", -"603132": "stock", -"金徽股份": "stock", -"301229": "stock", -"纽泰格": "stock", -"301181": "stock", -"标榜股份": "stock", -"301122": "stock", -"采纳股份": "stock", -"301123": "stock", -"奕东电子": "stock", -"688173": "stock", -"希荻微": "stock", -"688220": "stock", -"翱捷科技": "stock", -"688062": "stock", -"迈威生物": "stock", -"688176": "stock", -"亚虹医药": "stock", -"688262": "stock", -"国芯科技": "stock", -"688227": "stock", -"品高股份": "stock", -"301113": "stock", -"雅艺科技": "stock", -"301177": "stock", -"迪阿股份": "stock", -"688235": "stock", -"百济神州": "stock", -"688192": "stock", -"迪哲医药": "stock", -"301179": "stock", -"泽宇智能": "stock", -"688112": "stock", -"鼎阳科技": "stock", -"301213": "stock", -"观想科技": "stock", -"301180": "stock", -"万祥科技": "stock", -"301118": "stock", -"恒光股份": "stock", -"836260": "stock", -"中寰股份": "stock", -"603213": "stock", -"镇洋发展": "stock", -"301149": "stock", -"隆华新材": "stock", -"831832": "stock", -"科达自控": "stock", -"301169": "stock", -"零点有数": "stock", -"301129": "stock", -"瑞纳智能": "stock", -"688280": "stock", -"精进电动": "stock", -"688257": "stock", -"新锐股份": "stock", -"688553": "stock", -"汇宇制药": "stock", -"837092": "stock", -"汉鑫科技": "stock", -"605555": "stock", -"德昌股份": "stock", -"831305": "stock", -"海希通讯": "stock", -"688272": "stock", -"*ST富吉": "stock", -"688772": "stock", -"珠海冠宇": "stock", -"605567": "stock", -"春雪食品": "stock", -"301063": "stock", -"海锅股份": "stock", -"301058": "stock", -"中粮科工": "stock", -"301055": "stock", -"张小泉": "stock", -"688798": "stock", -"艾为电子": "stock", -"601825": "stock", -"沪农商行": "stock", -"301045": "stock", -"天禄科技": "stock", -"688787": "stock", -"海天瑞声": "stock", -"605588": "stock", -"冠石科技": "stock", -"301036": "stock", -"双乐股份": "stock", -"688303": "stock", -"大全能源": "stock", -"301030": "stock", -"仕净科技": "stock", -"605365": "stock", -"立达信": "stock", -"301028": "stock", -"东亚机械": "stock", -"688226": "stock", -"威腾电气": "stock", -"301021": "stock", -"英诺激光": "stock", -"301020": "stock", -"密封科技": "stock", -"832885": "stock", -"星辰科技": "stock", -"605011": "stock", -"杭州热电": "stock", -"301022": "stock", -"海泰科": "stock", -"688367": "stock", -"工大高科": "stock", -"688601": "stock", -"力芯微": "stock", -"601528": "stock", -"瑞丰银行": "stock", -"301010": "stock", -"晶雪节能": "stock", -"688067": "stock", -"爱威科技": "stock", -"301007": "stock", -"德迈仕": "stock", -"301008": "stock", -"宏昌科技": "stock", -"601156": "stock", -"东航物流": "stock", -"301005": "stock", -"超捷股份": "stock", -"301003": "stock", -"江苏博云": "stock", -"301001": "stock", -"凯淳股份": "stock", -"688538": "stock", -"和辉光电": "stock", -"603511": "stock", -"爱慕股份": "stock", -"688660": "stock", -"电气风电": "stock", -"605339": "stock", -"南侨食品": "stock", -"001205": "stock", -"盛航股份": "stock", -"001203": "stock", -"大中矿业": "stock", -"600906": "stock", -"财达证券": "stock", -"300979": "stock", -"华利集团": "stock", -"300980": "stock", -"祥源新材": "stock", -"688611": "stock", -"杭州柯林": "stock", -"300970": "stock", -"华绿生物": "stock", -"300963": "stock", -"中洲特材": "stock", -"003042": "stock", -"中农联合": "stock", -"300959": "stock", -"线上线下": "stock", -"300953": "stock", -"震裕科技": "stock", -"688316": "stock", -"青云科技": "stock", -"688667": "stock", -"菱电电控": "stock", -"605122": "stock", -"四方新材": "stock", -"605060": "stock", -"联德股份": "stock", -"605303": "stock", -"园林股份": "stock", -"688059": "stock", -"华锐精密": "stock", -"600916": "stock", -"中国黄金": "stock", -"836239": "stock", -"长虹能源": "stock", -"300932": "stock", -"三友联众": "stock", -"003035": "stock", -"南网能源": "stock", -"300927": "stock", -"江天化学": "stock", -"300926": "stock", -"博俊科技": "stock", -"003030": "stock", -"祖名股份": "stock", -"300925": "stock", -"法本信息": "stock", -"003028": "stock", -"振邦智能": "stock", -"688678": "stock", -"福立旺": "stock", -"003020": "stock", -"立方制药": "stock", -"688571": "stock", -"杭华股份": "stock", -"003021": "stock", -"兆威机电": "stock", -"689009": "stock", -"九号公司": "stock", -"688221": "stock", -"前沿生物": "stock", -"688129": "stock", -"东来技术": "stock", -"003015": "stock", -"日久光电": "stock", -"003013": "stock", -"地铁设计": "stock", -"605338": "stock", -"巴比食品": "stock", -"688013": "stock", -"天臣医疗": "stock", -"605136": "stock", -"丽人丽妆": "stock", -"605018": "stock", -"长华集团": "stock", -"300895": "stock", -"铜牛信息": "stock", -"688526": "stock", -"科前生物": "stock", -"688536": "stock", -"思瑞浦": "stock", -"688559": "stock", -"海目星": "stock", -"688289": "stock", -"圣湘生物": "stock", -"300878": "stock", -"维康药业": "stock", -"300864": "stock", -"南大环境": "stock", -"603931": "stock", -"格林达": "stock", -"300875": "stock", -"捷强装备": "stock", -"605088": "stock", -"冠盛股份": "stock", -"688065": "stock", -"凯赛生物": "stock", -"688339": "stock", -"亿华通": "stock", -"605158": "stock", -"华达新材": "stock", -"688338": "stock", -"赛科希德": "stock", -"688311": "stock", -"盟升电子": "stock", -"002995": "stock", -"天地在线": "stock", -"002991": "stock", -"甘源食品": "stock", -"605222": "stock", -"起帆电缆": "stock", -"601456": "stock", -"国联证券": "stock", -"836149": "stock", -"旭杰科技": "stock", -"688561": "stock", -"奇安信": "stock", -"836433": "stock", -"大唐药业": "stock", -"833874": "stock", -"泰祥股份": "stock", -"688256": "stock", -"寒武纪": "stock", -"300848": "stock", -"美瑞新材": "stock", -"688165": "stock", -"埃夫特": "stock", -"430418": "stock", -"苏轴股份": "stock", -"688180": "stock", -"君实生物": "stock", -"430489": "stock", -"佳先股份": "stock", -"833819": "stock", -"颖泰生物": "stock", -"688277": "stock", -"天智航": "stock", -"688528": "stock", -"秦川物联": "stock", -"688520": "stock", -"神州细胞": "stock", -"002986": "stock", -"宇新股份": "stock", -"605288": "stock", -"凯迪股份": "stock", -"600918": "stock", -"中泰证券": "stock", -"300836": "stock", -"佰奥智能": "stock", -"603950": "stock", -"长源东谷": "stock", -"002982": "stock", -"湘佳股份": "stock", -"688396": "stock", -"华润微": "stock", -"300818": "stock", -"耐普矿机": "stock", -"688266": "stock", -"泽璟制药": "stock", -"688158": "stock", -"优刻得": "stock", -"300812": "stock", -"易天股份": "stock", -"688198": "stock", -"佰仁医疗": "stock", -"688399": "stock", -"硕世生物": "stock", -"603390": "stock", -"通达电气": "stock", -"300802": "stock", -"矩子科技": "stock", -"300564": "stock", -"筑博设计": "stock", -"688021": "stock", -"奥福环保": "stock", -"300799": "stock", -"*ST左江": "stock", -"002965": "stock", -"祥鑫科技": "stock", -"300795": "stock", -"米奥会展": "stock", -"300789": "stock", -"唐源电气": "stock", -"003816": "stock", -"中国广核": "stock", -"688188": "stock", -"柏楚电子": "stock", -"603530": "stock", -"神马电力": "stock", -"603279": "stock", -"景津装备": "stock", -"688388": "stock", -"嘉元科技": "stock", -"688066": "stock", -"航天宏图": "stock", -"688033": "stock", -"天宜上佳": "stock", -"688028": "stock", -"沃尔德": "stock", -"688122": "stock", -"西部超导": "stock", -"688018": "stock", -"乐鑫科技": "stock", -"603327": "stock", -"福蓉科技": "stock", -"300775": "stock", -"三角防务": "stock", -"300778": "stock", -"新城市": "stock", -"300773": "stock", -"拉卡拉": "stock", -"300769": "stock", -"德方纳米": "stock", -"300762": "stock", -"上海瀚讯": "stock", -"601865": "stock", -"福莱特": "stock", -"601615": "stock", -"明阳智能": "stock", -"601298": "stock", -"青岛港": "stock", -"603739": "stock", -"蔚蓝生物": "stock", -"603629": "stock", -"利通电子": "stock", -"002941": "stock", -"新疆交建": "stock", -"300751": "stock", -"迈为股份": "stock", -"002938": "stock", -"鹏鼎控股": "stock", -"603790": "stock", -"雅运股份": "stock", -"601068": "stock", -"中铝国际": "stock", -"002933": "stock", -"新兴装备": "stock", -"603590": "stock", -"康辰药业": "stock", -"603693": "stock", -"江苏新能": "stock", -"300750": "stock", -"宁德时代": "stock", -"603045": "stock", -"福达合金": "stock", -"300634": "stock", -"彩讯股份": "stock", -"603059": "stock", -"倍加洁": "stock", -"603680": "stock", -"今创集团": "stock", -"002927": "stock", -"泰永长征": "stock", -"603709": "stock", -"中源家居": "stock", -"603871": "stock", -"嘉友国际": "stock", -"603356": "stock", -"华菱精工": "stock", -"300624": "stock", -"万兴科技": "stock", -"603056": "stock", -"德邦股份": "stock", -"300733": "stock", -"西菱动力": "stock", -"300684": "stock", -"中石科技": "stock", -"002919": "stock", -"名臣健康": "stock", -"600025": "stock", -"华能水电": "stock", -"002916": "stock", -"深南电路": "stock", -"603365": "stock", -"水星家纺": "stock", -"601019": "stock", -"山东出版": "stock", -"300721": "stock", -"怡达股份": "stock", -"603507": "stock", -"振江股份": "stock", -"300711": "stock", -"广哈通信": "stock", -"603260": "stock", -"合盛硅业": "stock", -"603683": "stock", -"晶华新材": "stock", -"300710": "stock", -"万隆光电": "stock", -"603829": "stock", -"洛凯股份": "stock", -"603499": "stock", -"翔港科技": "stock", -"603363": "stock", -"傲农生物": "stock", -"603055": "stock", -"台华新材": "stock", -"603813": "stock", -"原尚股份": "stock", -"002893": "stock", -"京能热力": "stock", -"002899": "stock", -"英派斯": "stock", -"603648": "stock", -"畅联股份": "stock", -"603277": "stock", -"银都股份": "stock", -"603183": "stock", -"建研院": "stock", -"300699": "stock", -"光威复材": "stock", -"002895": "stock", -"川恒股份": "stock", -"300696": "stock", -"爱乐达": "stock", -"002891": "stock", -"中宠股份": "stock", -"601326": "stock", -"秦港股份": "stock", -"603721": "stock", -"中广天择": "stock", -"603458": "stock", -"勘设股份": "stock", -"300683": "stock", -"海特生物": "stock", -"002890": "stock", -"弘宇股份": "stock", -"300679": "stock", -"电连技术": "stock", -"603730": "stock", -"岱美股份": "stock", -"603063": "stock", -"禾望电气": "stock", -"300672": "stock", -"国科微": "stock", -"300671": "stock", -"富满微": "stock", -"603938": "stock", -"三孚股份": "stock", -"603335": "stock", -"迪生力": "stock", -"603226": "stock", -"菲林格尔": "stock", -"300663": "stock", -"科蓝软件": "stock", -"300661": "stock", -"圣邦股份": "stock", -"603580": "stock", -"艾艾精工": "stock", -"300657": "stock", -"弘信电子": "stock", -"300653": "stock", -"正海生物": "stock", -"603180": "stock", -"金牌厨柜": "stock", -"603113": "stock", -"金能科技": "stock", -"603086": "stock", -"先达股份": "stock", -"300643": "stock", -"万通智控": "stock", -"002868": "stock", -"绿康生化": "stock", -"002867": "stock", -"周大生": "stock", -"300554": "stock", -"三超新材": "stock", -"002863": "stock", -"今飞凯达": "stock", -"300633": "stock", -"开立医疗": "stock", -"603586": "stock", -"金麒麟": "stock", -"300627": "stock", -"华测导航": "stock", -"603133": "stock", -"*ST碳元": "stock", -"300623": "stock", -"捷捷微电": "stock", -"603960": "stock", -"克来机电": "stock", -"603991": "stock", -"至正股份": "stock", -"603238": "stock", -"诺邦股份": "stock", -"603839": "stock", -"安正时尚": "stock", -"603208": "stock", -"江山欧派": "stock", -"603626": "stock", -"科森科技": "stock", -"300578": "stock", -"会畅通讯": "stock", -"603358": "stock", -"华达科技": "stock", -"603429": "stock", -"集友股份": "stock", -"300597": "stock", -"吉大通信": "stock", -"002839": "stock", -"张家港行": "stock", -"601881": "stock", -"中国银河": "stock", -"300598": "stock", -"诚迈科技": "stock", -"603638": "stock", -"艾迪精密": "stock", -"603337": "stock", -"杰克股份": "stock", -"603668": "stock", -"天马科技": "stock", -"300580": "stock", -"贝斯特": "stock", -"603266": "stock", -"天龙股份": "stock", -"603032": "stock", -"德新科技": "stock", -"300581": "stock", -"晨曦航空": "stock", -"603389": "stock", -"亚振家居": "stock", -"002831": "stock", -"裕同科技": "stock", -"002829": "stock", -"星网宇达": "stock", -"603098": "stock", -"森特股份": "stock", -"002830": "stock", -"名雕股份": "stock", -"002820": "stock", -"桂发祥": "stock", -"603060": "stock", -"国检集团": "stock", -"300556": "stock", -"丝路视觉": "stock", -"300560": "stock", -"中富通": "stock", -"603667": "stock", -"五洲新春": "stock", -"603859": "stock", -"能科科技": "stock", -"603313": "stock", -"梦百合": "stock", -"601128": "stock", -"常熟银行": "stock", -"603658": "stock", -"安图生物": "stock", -"002810": "stock", -"山东赫达": "stock", -"603007": "stock", -"ST花王": "stock", -"603515": "stock", -"欧普照明": "stock", -"601595": "stock", -"上海电影": "stock", -"300531": "stock", -"优博讯": "stock", -"600919": "stock", -"江苏银行": "stock", -"300517": "stock", -"海波重科": "stock", -"601127": "stock", -"赛力斯": "stock", -"601611": "stock", -"中国核建": "stock", -"603737": "stock", -"三棵树": "stock", -"300513": "stock", -"恒实科技": "stock", -"300512": "stock", -"中亚股份": "stock", -"002797": "stock", -"第一创业": "stock", -"300507": "stock", -"苏奥传感": "stock", -"603868": "stock", -"飞科电器": "stock", -"603919": "stock", -"金徽酒": "stock", -"603520": "stock", -"司太立": "stock", -"603866": "stock", -"桃李面包": "stock", -"300495": "stock", -"*ST美尚": "stock", -"603398": "stock", -"沐邦高科": "stock", -"603223": "stock", -"恒通股份": "stock", -"603066": "stock", -"音飞储存": "stock", -"300464": "stock", -"星徽股份": "stock", -"002757": "stock", -"南兴股份": "stock", -"300450": "stock", -"先导智能": "stock", -"002755": "stock", -"奥赛康": "stock", -"300452": "stock", -"山河药辅": "stock", -"300451": "stock", -"创业慧康": "stock", -"300438": "stock", -"鹏辉能源": "stock", -"300446": "stock", -"乐凯新材": "stock", -"300441": "stock", -"鲍斯股份": "stock", -"603158": "stock", -"腾龙股份": "stock", -"603030": "stock", -"*ST全筑": "stock", -"002749": "stock", -"国光股份": "stock", -"603519": "stock", -"立霸股份": "stock", -"603969": "stock", -"银龙股份": "stock", -"603898": "stock", -"好莱客": "stock", -"603678": "stock", -"火炬电子": "stock", -"300412": "stock", -"迦南科技": "stock", -"300411": "stock", -"金盾股份": "stock", -"603998": "stock", -"方盛制药": "stock", -"601969": "stock", -"海南矿业": "stock", -"603368": "stock", -"柳药集团": "stock", -"300407": "stock", -"凯发电气": "stock", -"603166": "stock", -"福达股份": "stock", -"603988": "stock", -"中电电机": "stock", -"300406": "stock", -"九强生物": "stock", -"603306": "stock", -"华懋科技": "stock", -"601016": "stock", -"节能风电": "stock", -"300396": "stock", -"迪瑞医疗": "stock", -"603806": "stock", -"福斯特": "stock", -"300391": "stock", -"长药控股": "stock", -"300389": "stock", -"艾比森": "stock", -"603009": "stock", -"北特科技": "stock", -"002727": "stock", -"一心堂": "stock", -"603006": "stock", -"联明股份": "stock", -"002715": "stock", -"登云股份": "stock", -"300382": "stock", -"斯莱克": "stock", -"300380": "stock", -"安硕信息": "stock", -"300376": "stock", -"易事特": "stock", -"002716": "stock", -"金贵银业": "stock", -"002714": "stock", -"牧原股份": "stock", -"300365": "stock", -"恒华科技": "stock", -"002705": "stock", -"新宝股份": "stock", -"002703": "stock", -"浙江世宝": "stock", -"300352": "stock", -"北信源": "stock", -"002698": "stock", -"博实股份": "stock", -"603399": "stock", -"吉翔股份": "stock", -"002693": "stock", -"双成药业": "stock", -"300342": "stock", -"天银机电": "stock", -"002687": "stock", -"乔治白": "stock", -"300335": "stock", -"迪森股份": "stock", -"601608": "stock", -"中信重工": "stock", -"002682": "stock", -"龙洲股份": "stock", -"002679": "stock", -"福建金森": "stock", -"002675": "stock", -"东诚药业": "stock", -"002672": "stock", -"东江环保": "stock", -"300307": "stock", -"慈星股份": "stock", -"002665": "stock", -"首航高科": "stock", -"300303": "stock", -"聚飞光电": "stock", -"002663": "stock", -"普邦股份": "stock", -"601929": "stock", -"吉视传媒": "stock", -"002647": "stock", -"仁东控股": "stock", -"002641": "stock", -"公元股份": "stock", -"300276": "stock", -"三丰智能": "stock", -"002631": "stock", -"德尔未来": "stock", -"601028": "stock", -"玉龙股份": "stock", -"002625": "stock", -"光启技术": "stock", -"601669": "stock", -"中国电建": "stock", -"002620": "stock", -"瑞和股份": "stock", -"300263": "stock", -"隆华科技": "stock", -"300264": "stock", -"佳创视讯": "stock", -"002614": "stock", -"奥佳华": "stock", -"601908": "stock", -"京运通": "stock", -"002611": "stock", -"东方精工": "stock", -"601222": "stock", -"林洋能源": "stock", -"300252": "stock", -"金信诺": "stock", -"300247": "stock", -"融捷健康": "stock", -"002602": "stock", -"世纪华通": "stock", -"002596": "stock", -"海南瑞泽": "stock", -"300230": "stock", -"永利股份": "stock", -"300228": "stock", -"富瑞特装": "stock", -"300221": "stock", -"银禧科技": "stock", -"002581": "stock", -"未名医药": "stock", -"002577": "stock", -"雷柏科技": "stock", -"002575": "stock", -"群兴玩具": "stock", -"002574": "stock", -"明牌珠宝": "stock", -"002573": "stock", -"清新环境": "stock", -"002566": "stock", -"益盛药业": "stock", -"002562": "stock", -"兄弟科技": "stock", -"002553": "stock", -"南方精工": "stock", -"002552": "stock", -"宝鼎科技": "stock", -"300184": "stock", -"力源信息": "stock", -"601992": "stock", -"金隅集团": "stock", -"300179": "stock", -"四方达": "stock", -"300170": "stock", -"汉得信息": "stock", -"601137": "stock", -"博威合金": "stock", -"002542": "stock", -"中化岩土": "stock", -"300168": "stock", -"万达信息": "stock", -"300165": "stock", -"天瑞仪器": "stock", -"002540": "stock", -"亚太科技": "stock", -"002537": "stock", -"海联金汇": "stock", -"002536": "stock", -"飞龙股份": "stock", -"601890": "stock", -"亚星锚链": "stock", -"300147": "stock", -"香雪制药": "stock", -"002523": "stock", -"天桥起重": "stock", -"002519": "stock", -"银河电子": "stock", -"002501": "stock", -"利源股份": "stock", -"300140": "stock", -"节能环境": "stock", -"002491": "stock", -"通鼎互联": "stock", -"002489": "stock", -"浙江永强": "stock", -"300131": "stock", -"英唐智控": "stock", -"601377": "stock", -"兴业证券": "stock", -"002486": "stock", -"嘉麟杰": "stock", -"300125": "stock", -"聆达股份": "stock", -"002484": "stock", -"江海股份": "stock", -"002483": "stock", -"润邦股份": "stock", -"300120": "stock", -"经纬辉开": "stock", -"300108": "stock", -"*ST吉药": "stock", -"300105": "stock", -"龙源技术": "stock", -"002459": "stock", -"晶澳科技": "stock", -"002439": "stock", -"启明星辰": "stock", -"002438": "stock", -"江苏神通": "stock", -"002428": "stock", -"云南锗业": "stock", -"300088": "stock", -"长信科技": "stock", -"002407": "stock", -"多氟多": "stock", -"002401": "stock", -"中远海科": "stock", -"002397": "stock", -"梦洁股份": "stock", -"002393": "stock", -"力生制药": "stock", -"002389": "stock", -"航天彩虹": "stock", -"002381": "stock", -"双箭股份": "stock", -"300066": "stock", -"三川智慧": "stock", -"300063": "stock", -"天龙集团": "stock", -"300059": "stock", -"东方财富": "stock", -"002372": "stock", -"伟星新材": "stock", -"002370": "stock", -"亚太药业": "stock", -"002366": "stock", -"融发核电": "stock", -"002362": "stock", -"汉王科技": "stock", -"300056": "stock", -"中创环保": "stock", -"300055": "stock", -"万邦达": "stock", -"300053": "stock", -"航宇微": "stock", -"002353": "stock", -"杰瑞股份": "stock", -"002342": "stock", -"巨力索具": "stock", -"002341": "stock", -"新纶新材": "stock", -"002339": "stock", -"积成电子": "stock", -"300050": "stock", -"世纪鼎利": "stock", -"300043": "stock", -"星辉娱乐": "stock", -"601801": "stock", -"皖新传媒": "stock", -"002338": "stock", -"奥普光电": "stock", -"002337": "stock", -"赛象科技": "stock", -"002334": "stock", -"英威腾": "stock", -"301163": "stock", -"宏德股份": "stock", -"688079": "stock", -"美迪凯": "stock", -"002456": "stock", -"欧菲光": "stock", -"002009": "stock", -"天奇股份": "stock", -"600765": "stock", -"中航重机": "stock", -"300127": "stock", -"银河磁体": "stock", -"002723": "stock", -"小崧股份": "stock", -"301202": "stock", -"朗威股份": "stock", -"300232": "stock", -"洲明科技": "stock", -"002555": "stock", -"三七互娱": "stock", -"603990": "stock", -"麦迪科技": "stock", -"600825": "stock", -"新华传媒": "stock", -"603077": "stock", -"和邦生物": "stock", -"000159": "stock", -"国际实业": "stock", -"601860": "stock", -"紫金银行": "stock", -"002463": "stock", -"沪电股份": "stock", -"688582": "stock", -"芯动联科": "stock", -"001308": "stock", -"康冠科技": "stock", -"300747": "stock", -"锐科激光": "stock", -"300155": "stock", -"安居宝": "stock", -"600114": "stock", -"东睦股份": "stock", -"601598": "stock", -"中国外运": "stock", -"002225": "stock", -"濮耐股份": "stock", -"300755": "stock", -"华致酒行": "stock", -"300967": "stock", -"晓鸣股份": "stock", -"301366": "stock", -"一博科技": "stock", -"000422": "stock", -"湖北宜化": "stock", -"002094": "stock", -"青岛金王": "stock", -"688095": "stock", -"福昕软件": "stock", -"300090": "stock", -"盛运退": "stock", -"688236": "stock", -"春立医疗": "stock", -"600233": "stock", -"圆通速递": "stock", -"301498": "stock", -"乖宝宠物": "stock", -"600351": "stock", -"亚宝药业": "stock", -"688007": "stock", -"光峰科技": "stock", -"603986": "stock", -"兆易创新": "stock", -"688682": "stock", -"霍莱沃": "stock", -"002141": "stock", -"贤丰控股": "stock", -"301072": "stock", -"中捷精工": "stock", -"831304": "stock", -"迪尔化工": "stock", -"601699": "stock", -"潞安环能": "stock", -"301161": "stock", -"唯万密封": "stock", -"688625": "stock", -"呈和科技": "stock", -"601200": "stock", -"上海环境": "stock", -"871553": "stock", -"凯腾精工": "stock", -"000541": "stock", -"佛山照明": "stock", -"688036": "stock", -"传音控股": "stock", -"688686": "stock", -"奥普特": "stock", -"300073": "stock", -"当升科技": "stock", -"000510": "stock", -"新金路": "stock", -"002756": "stock", -"永兴材料": "stock", -"002661": "stock", -"克明食品": "stock", -"000542": "stock", -"TCL通讯": "stock", -"600775": "stock", -"南京熊猫": "stock", -"600105": "stock", -"永鼎股份": "stock", -"600250": "stock", -"南纺股份": "stock", -"688123": "stock", -"聚辰股份": "stock", -"002535": "stock", -"林州重机": "stock", -"603116": "stock", -"红蜻蜓": "stock", -"301301": "stock", -"川宁生物": "stock", -"300348": "stock", -"长亮科技": "stock", -"600513": "stock", -"联环药业": "stock", -"688060": "stock", -"云涌科技": "stock", -"001914": "stock", -"招商积余": "stock", -"300036": "stock", -"超图软件": "stock", -"688197": "stock", -"首药控股-U": "stock", -"002258": "stock", -"利尔化学": "stock", -"300590": "stock", -"移为通信": "stock", -"688693": "stock", -"锴威特": "stock", -"002315": "stock", -"焦点科技": "stock", -"600967": "stock", -"内蒙一机": "stock", -"000933": "stock", -"神火股份": "stock", -"300853": "stock", -"申昊科技": "stock", -"002015": "stock", -"协鑫能科": "stock", -"601099": "stock", -"太平洋": "stock", -"003001": "stock", -"中岩大地": "stock", -"300193": "stock", -"佳士科技": "stock", -"002503": "stock", -"*ST搜特": "stock", -"600978": "stock", -"*ST宜生": "stock", -"002838": "stock", -"道恩股份": "stock", -"301380": "stock", -"挖金客": "stock", -"301286": "stock", -"侨源股份": "stock", -"430047": "stock", -"诺思兰德": "stock", -"300565": "stock", -"科信技术": "stock", -"300207": "stock", -"欣旺达": "stock", -"301159": "stock", -"三维天地": "stock", -"002216": "stock", -"三全食品": "stock", -"600487": "stock", -"亨通光电": "stock", -"601877": "stock", -"正泰电器": "stock", -"600890": "stock", -"退市中房": "stock", -"300735": "stock", -"光弘科技": "stock", -"002441": "stock", -"众业达": "stock", -"300670": "stock", -"大烨智能": "stock", -"688178": "stock", -"万德斯": "stock", -"000036": "stock", -"华联控股": "stock", -"603556": "stock", -"海兴电力": "stock", -"000828": "stock", -"东莞控股": "stock", -"605388": "stock", -"均瑶健康": "stock", -"603126": "stock", -"中材节能": "stock", -"600290": "stock", -"*ST华仪": "stock", -"002639": "stock", -"雪人股份": "stock", -"830896": "stock", -"旺成科技": "stock", -"688612": "stock", -"威迈斯": "stock", -"601606": "stock", -"长城军工": "stock", -"600647": "stock", -"*ST同达": "stock", -"605166": "stock", -"聚合顺": "stock", -"002617": "stock", -"露笑科技": "stock", -"002061": "stock", -"浙江交科": "stock", -"002199": "stock", -"东晶电子": "stock", -"301230": "stock", -"泓博医药": "stock", -"300852": "stock", -"四会富仕": "stock", -"301071": "stock", -"力量钻石": "stock", -"301377": "stock", -"鼎泰高科": "stock", -"300570": "stock", -"太辰光": "stock", -"002175": "stock", -"东方智造": "stock", -"603012": "stock", -"创力集团": "stock", -"603387": "stock", -"基蛋生物": "stock", -"600618": "stock", -"氯碱化工": "stock", -"002244": "stock", -"滨江集团": "stock", -"600128": "stock", -"苏豪弘业": "stock", -"000996": "stock", -"*ST中期": "stock", -"603799": "stock", -"华友钴业": "stock", -"688499": "stock", -"利元亨": "stock", -"300061": "stock", -"旗天科技": "stock", -"870866": "stock", -"绿亨科技": "stock", -"002056": "stock", -"横店东磁": "stock", -"688302": "stock", -"海创药业-U": "stock", -"300141": "stock", -"和顺电气": "stock", -"603700": "stock", -"宁水集团": "stock", -"002918": "stock", -"蒙娜丽莎": "stock", -"600439": "stock", -"瑞贝卡": "stock", -"000881": "stock", -"中广核技": "stock", -"603361": "stock", -"浙江国祥": "stock", -"600763": "stock", -"通策医疗": "stock", -"600518": "stock", -"ST康美": "stock", -"300692": "stock", -"中环环保": "stock", -"603131": "stock", -"上海沪工": "stock", -"600381": "stock", -"青海春天": "stock", -"831689": "stock", -"克莱特": "stock", -"000919": "stock", -"金陵药业": "stock", -"300034": "stock", -"钢研高纳": "stock", -"603070": "stock", -"万控智造": "stock", -"002467": "stock", -"二六三": "stock", -"000650": "stock", -"仁和药业": "stock", -"002526": "stock", -"山东矿机": "stock", -"300534": "stock", -"陇神戎发": "stock", -"600088": "stock", -"中视传媒": "stock", -"603053": "stock", -"成都燃气": "stock", -"603577": "stock", -"汇金通": "stock", -"300471": "stock", -"厚普股份": "stock", -"600706": "stock", -"曲江文旅": "stock", -"300523": "stock", -"辰安科技": "stock", -"603058": "stock", -"永吉股份": "stock", -"600784": "stock", -"鲁银投资": "stock", -"600548": "stock", -"深高速": "stock", -"603477": "stock", -"巨星农牧": "stock", -"688505": "stock", -"复旦张江": "stock", -"002221": "stock", -"东华能源": "stock", -"301361": "stock", -"众智科技": "stock", -"002327": "stock", -"富安娜": "stock", -"301336": "stock", -"趣睡科技": "stock", -"002492": "stock", -"恒基达鑫": "stock", -"688699": "stock", -"明微电子": "stock", -"600363": "stock", -"联创光电": "stock", -"300819": "stock", -"聚杰微纤": "stock", -"002305": "stock", -"南国置业": "stock", -"300962": "stock", -"中金辐照": "stock", -"600366": "stock", -"宁波韵升": "stock", -"832651": "stock", -"天罡股份": "stock", -"300561": "stock", -"汇金科技": "stock", -"002522": "stock", -"浙江众成": "stock", -"301315": "stock", -"威士顿": "stock", -"300023": "stock", -"宝德退": "stock", -"300538": "stock", -"同益股份": "stock", -"300540": "stock", -"蜀道装备": "stock", -"000962": "stock", -"东方钽业": "stock", -"000883": "stock", -"湖北能源": "stock", -"002671": "stock", -"龙泉股份": "stock", -"688010": "stock", -"福光股份": "stock", -"002592": "stock", -"ST八菱": "stock", -"002898": "stock", -"赛隆药业": "stock", -"001283": "stock", -"豪鹏科技": "stock", -"002795": "stock", -"永和智控": "stock", -"300198": "stock", -"纳川股份": "stock", -"688114": "stock", -"华大智造": "stock", -"600171": "stock", -"上海贝岭": "stock", -"601008": "stock", -"连云港": "stock", -"301270": "stock", -"汉仪股份": "stock", -"000750": "stock", -"国海证券": "stock", -"002743": "stock", -"富煌钢构": "stock", -"600616": "stock", -"金枫酒业": "stock", -"000690": "stock", -"宝新能源": "stock", -"002016": "stock", -"世荣兆业": "stock", -"688121": "stock", -"卓然股份": "stock", -"605366": "stock", -"宏柏新材": "stock", -"301107": "stock", -"瑜欣电子": "stock", -"300585": "stock", -"奥联电子": "stock", -"000863": "stock", -"三湘印象": "stock", -"000682": "stock", -"东方电子": "stock", -"688382": "stock", -"益方生物-U": "stock", -"002184": "stock", -"海得控制": "stock", -"301152": "stock", -"天力锂能": "stock", -"000912": "stock", -"泸天化": "stock", -"688099": "stock", -"晶晨股份": "stock", -"002920": "stock", -"德赛西威": "stock", -"301083": "stock", -"百胜智能": "stock", -"300666": "stock", -"江丰电子": "stock", -"688248": "stock", -"南网科技": "stock", -"600685": "stock", -"中船防务": "stock", -"300477": "stock", -"合纵科技": "stock", -"601579": "stock", -"会稽山": "stock", -"600968": "stock", -"海油发展": "stock", -"603001": "stock", -"ST奥康": "stock", -"002878": "stock", -"元隆雅图": "stock", -"002419": "stock", -"天虹股份": "stock", -"002887": "stock", -"绿茵生态": "stock", -"002700": "stock", -"ST浩源": "stock", -"300449": "stock", -"汉邦高科": "stock", -"002987": "stock", -"京北方": "stock", -"300300": "stock", -"海峡创新": "stock", -"300054": "stock", -"鼎龙股份": "stock", -"000792": "stock", -"盐湖股份": "stock", -"002828": "stock", -"贝肯能源": "stock", -"000977": "stock", -"浪潮信息": "stock", -"688332": "stock", -"中科蓝讯": "stock", -"002414": "stock", -"高德红外": "stock", -"300185": "stock", -"通裕重工": "stock", -"000539": "stock", -"粤电力A": "stock", -"300381": "stock", -"溢多利": "stock", -"871753": "stock", -"天纺标": "stock", -"300157": "stock", -"新锦动力": "stock", -"600383": "stock", -"金地集团": "stock", -"603379": "stock", -"三美股份": "stock", -"000995": "stock", -"皇台酒业": "stock", -"002377": "stock", -"国创高新": "stock", -"830974": "stock", -"凯大催化": "stock", -"300753": "stock", -"爱朋医疗": "stock", -"600684": "stock", -"珠江股份": "stock", -"603168": "stock", -"莎普爱思": "stock", -"688106": "stock", -"金宏气体": "stock", -"600697": "stock", -"欧亚集团": "stock", -"300282": "stock", -"*ST三盛": "stock", -"301043": "stock", -"绿岛风": "stock", -"603068": "stock", -"博通集成": "stock", -"000588": "stock", -"PT粤金曼": "stock", -"300266": "stock", -"兴源环境": "stock", -"003036": "stock", -"泰坦股份": "stock", -"000595": "stock", -"宝塔实业": "stock", -"001289": "stock", -"龙源电力": "stock", -"688348": "stock", -"昱能科技": "stock", -"603677": "stock", -"奇精机械": "stock", -"002622": "stock", -"皓宸医疗": "stock", -"600052": "stock", -"东望时代": "stock", -"688295": "stock", -"中复神鹰": "stock", -"002084": "stock", -"海鸥住工": "stock", -"000880": "stock", -"潍柴重机": "stock", -"002724": "stock", -"海洋王": "stock", -"688203": "stock", -"海正生材": "stock", -"603968": "stock", -"醋化股份": "stock", -"301238": "stock", -"瑞泰新材": "stock", -"601169": "stock", -"北京银行": "stock", -"002149": "stock", -"西部材料": "stock", -"301260": "stock", -"格力博": "stock", -"688165": "stock", -"埃夫特-U": "stock", -"600475": "stock", -"华光环能": "stock", -"834599": "stock", -"同力股份": "stock", -"600694": "stock", -"大商股份": "stock", -"002563": "stock", -"森马服饰": "stock", -"872374": "stock", -"云里物里": "stock", -"300070": "stock", -"碧水源": "stock", -"600571": "stock", -"信雅达": "stock", -"301231": "stock", -"荣信文化": "stock", -"000921": "stock", -"海信家电": "stock", -"600600": "stock", -"青岛啤酒": "stock", -"301548": "stock", -"崇德科技": "stock", -"300280": "stock", -"紫天科技": "stock", -"600870": "stock", -"退市厦华": "stock", -"688677": "stock", -"海泰新光": "stock", -"002134": "stock", -"天津普林": "stock", -"603823": "stock", -"百合花": "stock", -"601236": "stock", -"红塔证券": "stock", -"600700": "stock", -"*ST数码": "stock", -"301216": "stock", -"万凯新材": "stock", -"600096": "stock", -"云天化": "stock", -"300209": "stock", -"ST有棵树": "stock", -"603655": "stock", -"朗博科技": "stock", -"300153": "stock", -"科泰电源": "stock", -"603212": "stock", -"赛伍技术": "stock", -"688466": "stock", -"金科环境": "stock", -"002196": "stock", -"方正电机": "stock", -"300877": "stock", -"金春股份": "stock", -"605228": "stock", -"神通科技": "stock", -"603170": "stock", -"宝立食品": "stock", -"002841": "stock", -"视源股份": "stock", -"300277": "stock", -"海联讯": "stock", -"300957": "stock", -"贝泰妮": "stock", -"688395": "stock", -"正弦电气": "stock", -"600716": "stock", -"凤凰股份": "stock", -"000906": "stock", -"浙商中拓": "stock", -"600885": "stock", -"宏发股份": "stock", -"300011": "stock", -"鼎汉技术": "stock", -"002072": "stock", -"凯瑞德": "stock", -"002098": "stock", -"浔兴股份": "stock", -"002925": "stock", -"盈趣科技": "stock", -"688256": "stock", -"寒武纪-U": "stock", -"600130": "stock", -"波导股份": "stock", -"688596": "stock", -"正帆科技": "stock", -"002275": "stock", -"桂林三金": "stock", -"600446": "stock", -"金证股份": "stock", -"600495": "stock", -"晋西车轴": "stock", -"601921": "stock", -"浙版传媒": "stock", -"002367": "stock", -"康力电梯": "stock", -"300855": "stock", -"图南股份": "stock", -"600075": "stock", -"新疆天业": "stock", -"688690": "stock", -"纳微科技": "stock", -"600781": "stock", -"退市辅仁": "stock", -"688606": "stock", -"奥泰生物": "stock", -"601233": "stock", -"桐昆股份": "stock", -"600058": "stock", -"五矿发展": "stock", -"688030": "stock", -"山石网科": "stock", -"000838": "stock", -"财信发展": "stock", -"603579": "stock", -"荣泰健康": "stock", -"600197": "stock", -"伊力特": "stock", -"300690": "stock", -"双一科技": "stock", -"002858": "stock", -"力盛体育": "stock", -"600665": "stock", -"天地源": "stock", -"300457": "stock", -"赢合科技": "stock", -"301518": "stock", -"长华化学": "stock", -"872392": "stock", -"佳合科技": "stock", -"600243": "stock", -"青海华鼎": "stock", -"833455": "stock", -"汇隆活塞": "stock", -"688305": "stock", -"科德数控": "stock", -"300341": "stock", -"麦克奥迪": "stock", -"600785": "stock", -"新华百货": "stock", -"300717": "stock", -"华信新材": "stock", -"600359": "stock", -"新农开发": "stock", -"002990": "stock", -"盛视科技": "stock", -"300834": "stock", -"星辉环材": "stock", -"688031": "stock", -"星环科技-U": "stock", -"603181": "stock", -"皇马科技": "stock", -"688557": "stock", -"兰剑智能": "stock", -"603299": "stock", -"苏盐井神": "stock", -"838971": "stock", -"天马新材": "stock", -"300443": "stock", -"金雷股份": "stock", -"000014": "stock", -"沙河股份": "stock", -"600693": "stock", -"东百集团": "stock", -"603188": "stock", -"亚邦股份": "stock", -"002045": "stock", -"国光电器": "stock", -"000417": "stock", -"合肥百货": "stock", -"832802": "stock", -"保丽洁": "stock", -"300841": "stock", -"康华生物": "stock", -"688012": "stock", -"中微公司": "stock", -"002824": "stock", -"和胜股份": "stock", -"605111": "stock", -"新洁能": "stock", -"001299": "stock", -"美能能源": "stock", -"002538": "stock", -"司尔特": "stock", -"600218": "stock", -"全柴动力": "stock", -"688051": "stock", -"佳华科技": "stock", -"603311": "stock", -"金海高科": "stock", -"300945": "stock", -"曼卡龙": "stock", -"600241": "stock", -"时代万恒": "stock", -"000409": "stock", -"云鼎科技": "stock", -"300098": "stock", -"高新兴": "stock", -"603496": "stock", -"恒为科技": "stock", -"600110": "stock", -"诺德股份": "stock", -"000688": "stock", -"国城矿业": "stock", -"300259": "stock", -"新天科技": "stock", -"000550": "stock", -"江铃汽车": "stock", -"301033": "stock", -"迈普医学": "stock", -"832089": "stock", -"禾昌聚合": "stock", -"000068": "stock", -"华控赛格": "stock", -"688167": "stock", -"炬光科技": "stock", -"003002": "stock", -"壶化股份": "stock", -"300206": "stock", -"理邦仪器": "stock", -"000939": "stock", -"凯迪退": "stock", -"001333": "stock", -"光华股份": "stock", -"301130": "stock", -"西点药业": "stock", -"601179": "stock", -"中国西电": "stock", -"688105": "stock", -"诺唯赞": "stock", -"002609": "stock", -"捷顺科技": "stock", -"688153": "stock", -"唯捷创芯": "stock", -"688613": "stock", -"奥精医疗": "stock", -"301039": "stock", -"中集车辆": "stock", -"600410": "stock", -"华胜天成": "stock", -"688508": "stock", -"芯朋微": "stock", -"603983": "stock", -"丸美股份": "stock", -"002398": "stock", -"垒知集团": "stock", -"000638": "stock", -"万方发展": "stock", -"300400": "stock", -"劲拓股份": "stock", -"000552": "stock", -"甘肃能化": "stock", -"601985": "stock", -"中国核电": "stock", -"688221": "stock", -"前沿生物-U": "stock", -"301399": "stock", -"英特科技": "stock", -"300433": "stock", -"蓝思科技": "stock", -"301300": "stock", -"远翔新材": "stock", -"301228": "stock", -"实朴检测": "stock", -"300250": "stock", -"初灵信息": "stock", -"300456": "stock", -"赛微电子": "stock", -"300113": "stock", -"顺网科技": "stock", -"430510": "stock", -"丰光精密": "stock", -"688181": "stock", -"八亿时空": "stock", -"301469": "stock", -"恒达新材": "stock", -"300904": "stock", -"威力传动": "stock", -"002375": "stock", -"亚厦股份": "stock", -"600022": "stock", -"山东钢铁": "stock", -"600207": "stock", -"安彩高科": "stock", -"300965": "stock", -"恒宇信通": "stock", -"688420": "stock", -"美腾科技": "stock", -"002788": "stock", -"鹭燕医药": "stock", -"300336": "stock", -"新文退": "stock", -"688202": "stock", -"美迪西": "stock", -"000586": "stock", -"汇源通信": "stock", -"601061": "stock", -"中信金属": "stock", -"300587": "stock", -"天铁股份": "stock", -"603605": "stock", -"珀莱雅": "stock", -"002721": "stock", -"*ST金一": "stock", -"002545": "stock", -"东方铁塔": "stock", -"688560": "stock", -"明冠新材": "stock", -"688307": "stock", -"中润光学": "stock", -"000791": "stock", -"甘肃能源": "stock", -"688130": "stock", -"晶华微": "stock", -"301066": "stock", -"万事利": "stock", -"300327": "stock", -"中颖电子": "stock", -"000610": "stock", -"西安旅游": "stock", -"300461": "stock", -"田中精机": "stock", -"605033": "stock", -"美邦股份": "stock", -"002430": "stock", -"杭氧股份": "stock", -"600272": "stock", -"开开实业": "stock", -"002183": "stock", -"怡亚通": "stock", -"002842": "stock", -"翔鹭钨业": "stock", -"000818": "stock", -"航锦科技": "stock", -"002951": "stock", -"ST金时": "stock", -"301009": "stock", -"可靠股份": "stock", -"300013": "stock", -"新宁物流": "stock", -"688161": "stock", -"威高骨科": "stock", -"600652": "stock", -"退市游久": "stock", -"300095": "stock", -"华伍股份": "stock", -"002802": "stock", -"洪汇新材": "stock", -"600138": "stock", -"中青旅": "stock", -"600980": "stock", -"北矿科技": "stock", -"601966": "stock", -"玲珑轮胎": "stock", -"603505": "stock", -"金石资源": "stock", -"688576": "stock", -"西山科技": "stock", -"601015": "stock", -"陕西黑猫": "stock", -"600293": "stock", -"三峡新材": "stock", -"000089": "stock", -"深圳机场": "stock", -"001287": "stock", -"中电港": "stock", -"601968": "stock", -"宝钢包装": "stock", -"688410": "stock", -"山外山": "stock", -"688563": "stock", -"航材股份": "stock", -"600572": "stock", -"康恩贝": "stock", -"002753": "stock", -"永东股份": "stock", -"600780": "stock", -"通宝能源": "stock", -"603533": "stock", -"掌阅科技": "stock", -"600935": "stock", -"华塑股份": "stock", -"300949": "stock", -"奥雅股份": "stock", -"600743": "stock", -"华远地产": "stock", -"600982": "stock", -"宁波能源": "stock", -"601919": "stock", -"中远海控": "stock", -"300645": "stock", -"正元智慧": "stock", -"603565": "stock", -"中谷物流": "stock", -"002046": "stock", -"国机精工": "stock", -"600805": "stock", -"悦达投资": "stock", -"301197": "stock", -"工大科雅": "stock", -"600635": "stock", -"大众公用": "stock", -"688025": "stock", -"杰普特": "stock", -"603527": "stock", -"众源新材": "stock", -"601288": "stock", -"农业银行": "stock", -"688119": "stock", -"中钢洛耐": "stock", -"688698": "stock", -"伟创电气": "stock", -"688639": "stock", -"华恒生物": "stock", -"000549": "stock", -"S湘火炬": "stock", -"601965": "stock", -"中国汽研": "stock", -"600959": "stock", -"江苏有线": "stock", -"603985": "stock", -"恒润股份": "stock", -"688509": "stock", -"正元地信": "stock", -"688488": "stock", -"艾迪药业": "stock", -"002615": "stock", -"哈尔斯": "stock", -"000553": "stock", -"安道麦A": "stock", -"601088": "stock", -"中国神华": "stock", -"000602": "stock", -"金马集团": "stock", -"300858": "stock", -"科拓生物": "stock", -"603023": "stock", -"威帝股份": "stock", -"002664": "stock", -"信质集团": "stock", -"688168": "stock", -"安博通": "stock", -"002637": "stock", -"赞宇科技": "stock", -"600477": "stock", -"杭萧钢构": "stock", -"301131": "stock", -"聚赛龙": "stock", -"000015": "stock", -"PT中浩A": "stock", -"688056": "stock", -"莱伯泰科": "stock", -"688179": "stock", -"阿拉丁": "stock", -"300573": "stock", -"兴齐眼药": "stock", -"002168": "stock", -"惠程科技": "stock", -"832278": "stock", -"鹿得医疗": "stock", -"300037": "stock", -"新宙邦": "stock", -"000758": "stock", -"中色股份": "stock", -"300782": "stock", -"卓胜微": "stock", -"002772": "stock", -"众兴菌业": "stock", -"002816": "stock", -"*ST和科": "stock", -"603031": "stock", -"安孚科技": "stock", -"688408": "stock", -"中信博": "stock", -"688718": "stock", -"唯赛勃": "stock", -"001215": "stock", -"千味央厨": "stock", -"600152": "stock", -"维科技术": "stock", -"002859": "stock", -"洁美科技": "stock", -"002504": "stock", -"*ST弘高": "stock", -"000679": "stock", -"大连友谊": "stock", -"600659": "stock", -"*ST花雕": "stock", -"603336": "stock", -"宏辉果蔬": "stock", -"603177": "stock", -"德创环保": "stock", -"600898": "stock", -"ST美讯": "stock", -"002761": "stock", -"浙江建投": "stock", -"603697": "stock", -"有友食品": "stock", -"000878": "stock", -"云南铜业": "stock", -"002601": "stock", -"龙佰集团": "stock", -"688246": "stock", -"嘉和美康": "stock", -"300330": "stock", -"计通退": "stock", -"002293": "stock", -"罗莱生活": "stock", -"000606": "stock", -"顺利退": "stock", -"000637": "stock", -"ST实华": "stock", -"002857": "stock", -"三晖电气": "stock", -"600537": "stock", -"亿晶光电": "stock", -"688083": "stock", -"中望软件": "stock", -"001269": "stock", -"欧晶科技": "stock", -"603303": "stock", -"得邦照明": "stock", -"600300": "stock", -"维维股份": "stock", -"301252": "stock", -"同星科技": "stock", -"603798": "stock", -"康普顿": "stock", -"600444": "stock", -"国机通用": "stock", -"688498": "stock", -"源杰科技": "stock", -"600452": "stock", -"涪陵电力": "stock", -"300026": "stock", -"红日药业": "stock", -"601928": "stock", -"凤凰传媒": "stock", -"301257": "stock", -"普蕊斯": "stock", -"002295": "stock", -"精艺股份": "stock", -"002332": "stock", -"仙琚制药": "stock", -"600772": "stock", -"S*ST龙昌": "stock", -"300730": "stock", -"科创信息": "stock", -"600079": "stock", -"人福医药": "stock", -"600150": "stock", -"中国船舶": "stock", -"002502": "stock", -"ST鼎龙": "stock", -"002212": "stock", -"天融信": "stock", -"688522": "stock", -"纳睿雷达": "stock", -"688207": "stock", -"格灵深瞳": "stock", -"600858": "stock", -"银座股份": "stock", -"600543": "stock", -"*ST莫高": "stock", -"600755": "stock", -"厦门国贸": "stock", -"688247": "stock", -"宣泰医药": "stock", -"600878": "stock", -"*ST北科": "stock", -"872925": "stock", -"锦好医疗": "stock", -"600354": "stock", -"敦煌种业": "stock", -"000518": "stock", -"四环生物": "stock", -"002871": "stock", -"伟隆股份": "stock", -"300267": "stock", -"尔康制药": "stock", -"600350": "stock", -"山东高速": "stock", -"002821": "stock", -"凯莱英": "stock", -"600764": "stock", -"中国海防": "stock", -"300394": "stock", -"天孚通信": "stock", -"832982": "stock", -"XD锦波生": "stock", -"603818": "stock", -"曲美家居": "stock", -"002648": "stock", -"卫星化学": "stock", -"002376": "stock", -"新北洋": "stock", -"603608": "stock", -"天创时尚": "stock", -"002618": "stock", -"丹邦退": "stock", -"688593": "stock", -"新相微": "stock", -"600123": "stock", -"兰花科创": "stock", -"601162": "stock", -"天风证券": "stock", -"300938": "stock", -"信测标准": "stock", -"000003": "stock", -"PT金田A": "stock", -"000721": "stock", -"西安饮食": "stock", -"600018": "stock", -"上港集团": "stock", -"002805": "stock", -"丰元股份": "stock", -"002170": "stock", -"芭田股份": "stock", -"688766": "stock", -"普冉股份": "stock", -"601000": "stock", -"唐山港": "stock", -"600090": "stock", -"退市济堂": "stock", -"835368": "stock", -"连城数控": "stock", -"301308": "stock", -"江波龙": "stock", -"600373": "stock", -"中文传媒": "stock", -"002584": "stock", -"西陇科学": "stock", -"600002": "stock", -"齐鲁石化": "stock", -"601566": "stock", -"九牧王": "stock", -"601069": "stock", -"西部黄金": "stock", -"002494": "stock", -"华斯股份": "stock", -"002513": "stock", -"蓝丰生化": "stock", -"600881": "stock", -"亚泰集团": "stock", -"688170": "stock", -"德龙激光": "stock", -"002676": "stock", -"顺威股份": "stock", -"000686": "stock", -"东北证券": "stock", -"688585": "stock", -"上纬新材": "stock", -"002033": "stock", -"丽江股份": "stock", -"603578": "stock", -"三星新材": "stock", -"002731": "stock", -"萃华珠宝": "stock", -"601009": "stock", -"南京银行": "stock", -"600712": "stock", -"南宁百货": "stock", -"688607": "stock", -"康众医疗": "stock", -"301292": "stock", -"海科新源": "stock", -"605006": "stock", -"山东玻纤": "stock", -"600409": "stock", -"三友化工": "stock", -"688268": "stock", -"华特气体": "stock", -"300158": "stock", -"振东制药": "stock", -"600491": "stock", -"龙元建设": "stock", -"002320": "stock", -"海峡股份": "stock", -"600714": "stock", -"金瑞矿业": "stock", -"002197": "stock", -"证通电子": "stock", -"301041": "stock", -"金百泽": "stock", -"300372": "stock", -"欣泰退": "stock", -"600165": "stock", -"宁科生物": "stock", -"688609": "stock", -"九联科技": "stock", -"688630": "stock", -"芯碁微装": "stock", -"601188": "stock", -"龙江交通": "stock", -"600167": "stock", -"联美控股": "stock", -"300453": "stock", -"三鑫医疗": "stock", -"002109": "stock", -"兴化股份": "stock", -"603139": "stock", -"康惠制药": "stock", -"300164": "stock", -"通源石油": "stock", -"301101": "stock", -"明月镜片": "stock", -"600592": "stock", -"龙溪股份": "stock", -"603939": "stock", -"益丰药房": "stock", -"688222": "stock", -"成都先导": "stock", -"600280": "stock", -"中央商场": "stock", -"600288": "stock", -"大恒科技": "stock", -"002067": "stock", -"景兴纸业": "stock", -"300386": "stock", -"飞天诚信": "stock", -"300509": "stock", -"新美星": "stock", -"300189": "stock", -"神农科技": "stock", -"601106": "stock", -"中国一重": "stock", -"603218": "stock", -"日月股份": "stock", -"300909": "stock", -"汇创达": "stock", -"603912": "stock", -"佳力图": "stock", -"838275": "stock", -"驱动力": "stock", -"600315": "stock", -"上海家化": "stock", -"300621": "stock", -"维业股份": "stock", -"300459": "stock", -"汤姆猫": "stock", -"002060": "stock", -"粤水电": "stock", -"000416": "stock", -"*ST民控": "stock", -"605598": "stock", -"上海港湾": "stock", -"600508": "stock", -"上海能源": "stock", -"000685": "stock", -"中山公用": "stock", -"000519": "stock", -"中兵红箭": "stock", -"002318": "stock", -"久立特材": "stock", -"300137": "stock", -"先河环保": "stock", -"600120": "stock", -"浙江东方": "stock", -"300355": "stock", -"蒙草生态": "stock", -"300356": "stock", -"光一退": "stock", -"301330": "stock", -"熵基科技": "stock", -"603108": "stock", -"润达医疗": "stock", -"600361": "stock", -"创新新材": "stock", -"603167": "stock", -"渤海轮渡": "stock", -"301170": "stock", -"锡南科技": "stock", -"600661": "stock", -"昂立教育": "stock", -"300479": "stock", -"神思电子": "stock", -"002812": "stock", -"恩捷股份": "stock", -"605599": "stock", -"菜百股份": "stock", -"600500": "stock", -"中化国际": "stock", -"002455": "stock", -"百川股份": "stock", -"301265": "stock", -"华新环保": "stock", -"001338": "stock", -"永顺泰": "stock", -"600489": "stock", -"中金黄金": "stock", -"605180": "stock", -"华生科技": "stock", -"002848": "stock", -"高斯贝尔": "stock", -"001314": "stock", -"亿道信息": "stock", -"002785": "stock", -"万里石": "stock", -"300246": "stock", -"宝莱特": "stock", -"002626": "stock", -"金达威": "stock", -"600149": "stock", -"廊坊发展": "stock", -"603996": "stock", -"退市中新": "stock", -"603817": "stock", -"海峡环保": "stock", -"603869": "stock", -"新智认知": "stock", -"688519": "stock", -"南亚新材": "stock", -"000013": "stock", -"*ST石化A": "stock", -"603628": "stock", -"清源股份": "stock", -"600113": "stock", -"浙江东日": "stock", -"600820": "stock", -"隧道股份": "stock", -"002058": "stock", -"威尔泰": "stock", -"605007": "stock", -"五洲特纸": "stock", -"300082": "stock", -"奥克股份": "stock", -"301027": "stock", -"华蓝集团": "stock", -"002487": "stock", -"大金重工": "stock", -"835237": "stock", -"力佳科技": "stock", -"688685": "stock", -"迈信林": "stock", -"600132": "stock", -"重庆啤酒": "stock", -"000739": "stock", -"普洛药业": "stock", -"600455": "stock", -"博通股份": "stock", -"688118": "stock", -"普元信息": "stock", -"002284": "stock", -"亚太股份": "stock", -"000430": "stock", -"张家界": "stock", -"300419": "stock", -"浩丰科技": "stock", -"301035": "stock", -"润丰股份": "stock", -"002773": "stock", -"康弘药业": "stock", -"002233": "stock", -"塔牌集团": "stock", -"600676": "stock", -"交运股份": "stock", -"300091": "stock", -"金通灵": "stock", -"002162": "stock", -"悦心健康": "stock", -"301100": "stock", -"风光股份": "stock", -"300892": "stock", -"品渥食品": "stock", -"002692": "stock", -"ST远程": "stock", -"600636": "stock", -"国新文化": "stock", -"300694": "stock", -"蠡湖股份": "stock", -"601225": "stock", -"陕西煤业": "stock", -"600638": "stock", -"新黄浦": "stock", -"872895": "stock", -"花溪科技": "stock", -"605155": "stock", -"西大门": "stock", -"002966": "stock", -"苏州银行": "stock", -"002515": "stock", -"金字火腿": "stock", -"600251": "stock", -"冠农股份": "stock", -"603239": "stock", -"浙江仙通": "stock", -"002579": "stock", -"中京电子": "stock", -"688610": "stock", -"埃科光电": "stock", -"300100": "stock", -"双林股份": "stock", -"300353": "stock", -"东土科技": "stock", -"002008": "stock", -"大族激光": "stock", -"688176": "stock", -"亚虹医药-U": "stock", -"002624": "stock", -"完美世界": "stock", -"688023": "stock", -"安恒信息": "stock", -"300727": "stock", -"润禾材料": "stock", -"301023": "stock", -"江南奕帆": "stock", -"000069": "stock", -"华侨城A": "stock", -"300577": "stock", -"开润股份": "stock", -"600242": "stock", -"退市中昌": "stock", -"603155": "stock", -"新亚强": "stock", -"300274": "stock", -"阳光电源": "stock", -"000623": "stock", -"吉林敖东": "stock", -"688389": "stock", -"普门科技": "stock", -"001301": "stock", -"尚太科技": "stock", -"605378": "stock", -"野马电池": "stock", -"688196": "stock", -"卓越新能": "stock", -"600960": "stock", -"渤海汽车": "stock", -"002645": "stock", -"华宏科技": "stock", -"000833": "stock", -"粤桂股份": "stock", -"603908": "stock", -"牧高笛": "stock", -"300887": "stock", -"谱尼测试": "stock", -"301047": "stock", -"义翘神州": "stock", -"600850": "stock", -"电科数字": "stock", -"600715": "stock", -"文投控股": "stock", -"300397": "stock", -"天和防务": "stock", -"300950": "stock", -"德固特": "stock", -"834770": "stock", -"艾能聚": "stock", -"300204": "stock", -"舒泰神": "stock", -"002289": "stock", -"ST宇顺": "stock", -"601958": "stock", -"金钼股份": "stock", -"002651": "stock", -"利君股份": "stock", -"301550": "stock", -"斯菱股份": "stock", -"300731": "stock", -"科创新源": "stock", -"430556": "stock", -"雅达股份": "stock", -"300647": "stock", -"超频三": "stock", -"300422": "stock", -"博世科": "stock", -"600774": "stock", -"汉商集团": "stock", -"603351": "stock", -"威尔药业": "stock", -"600240": "stock", -"退市华业": "stock", -"600586": "stock", -"金晶科技": "stock", -"300894": "stock", -"火星人": "stock", -"600032": "stock", -"浙江新能": "stock", -"301156": "stock", -"美农生物": "stock", -"300275": "stock", -"梅安森": "stock", -"688215": "stock", -"瑞晟智能": "stock", -"688793": "stock", -"倍轻松": "stock", -"603115": "stock", -"海星股份": "stock", -"301085": "stock", -"亚康股份": "stock", -"000530": "stock", -"冰山冷热": "stock", -"688350": "stock", -"富淼科技": "stock", -"603848": "stock", -"好太太": "stock", -"688700": "stock", -"东威科技": "stock", -"603016": "stock", -"新宏泰": "stock", -"603856": "stock", -"东宏股份": "stock", -"002836": "stock", -"新宏泽": "stock", -"600302": "stock", -"标准股份": "stock", -"300289": "stock", -"利德曼": "stock", -"300592": "stock", -"华凯易佰": "stock", -"001267": "stock", -"汇绿生态": "stock", -"300567": "stock", -"精测电子": "stock", -"300343": "stock", -"联创股份": "stock", -"603112": "stock", -"华翔股份": "stock", -"000658": "stock", -"ST海洋": "stock", -"601086": "stock", -"国芳集团": "stock", -"300065": "stock", -"海兰信": "stock", -"300501": "stock", -"海顺新材": "stock", -"000004": "stock", -"国华网安": "stock", -"301116": "stock", -"益客食品": "stock", -"002572": "stock", -"索菲亚": "stock", -"688260": "stock", -"昀冢科技": "stock", -"300401": "stock", -"花园生物": "stock", -"300046": "stock", -"台基股份": "stock", -"603386": "stock", -"骏亚科技": "stock", -"688141": "stock", -"杰华特": "stock", -"001319": "stock", -"铭科精技": "stock", -"300087": "stock", -"荃银高科": "stock", -"600666": "stock", -"ST瑞德": "stock", -"300718": "stock", -"长盛轴承": "stock", -"688767": "stock", -"博拓生物": "stock", -"301503": "stock", -"智迪科技": "stock", -"002189": "stock", -"中光学": "stock", -"300886": "stock", -"华业香料": "stock", -"600832": "stock", -"东方明珠": "stock", -"002326": "stock", -"永太科技": "stock", -"688187": "stock", -"时代电气": "stock", -"832000": "stock", -"安徽凤凰": "stock", -"605003": "stock", -"众望布艺": "stock", -"300440": "stock", -"运达科技": "stock", -"000016": "stock", -"深康佳A": "stock", -"001229": "stock", -"魅视科技": "stock", -"300530": "stock", -"领湃科技": "stock", -"002950": "stock", -"奥美医疗": "stock", -"301393": "stock", -"昊帆生物": "stock", -"603786": "stock", -"科博达": "stock", -"000507": "stock", -"珠海港": "stock", -"300086": "stock", -"康芝药业": "stock", -"002979": "stock", -"雷赛智能": "stock", -"688126": "stock", -"沪硅产业": "stock", -"001223": "stock", -"欧克科技": "stock", -"603966": "stock", -"法兰泰克": "stock", -"300119": "stock", -"瑞普生物": "stock", -"301196": "stock", -"唯科科技": "stock", -"000570": "stock", -"苏常柴A": "stock", -"600076": "stock", -"康欣新材": "stock", -"688590": "stock", -"新致软件": "stock", -"300368": "stock", -"汇金股份": "stock", -"300933": "stock", -"中辰股份": "stock", -"000401": "stock", -"冀东水泥": "stock", -"603109": "stock", -"神驰机电": "stock", -"002930": "stock", -"宏川智慧": "stock", -"688496": "stock", -"清越科技": "stock", -"601880": "stock", -"辽港股份": "stock", -"300171": "stock", -"东富龙": "stock", -"688617": "stock", -"惠泰医疗": "stock", -"300487": "stock", -"蓝晓科技": "stock", -"600180": "stock", -"瑞茂通": "stock", -"688543": "stock", -"国科军工": "stock", -"300350": "stock", -"华鹏飞": "stock", -"000893": "stock", -"亚钾国际": "stock", -"301297": "stock", -"富乐德": "stock", -"688539": "stock", -"高华科技": "stock", -"002783": "stock", -"凯龙股份": "stock", -"838701": "stock", -"豪声电子": "stock", -"002594": "stock", -"比亚迪": "stock", -"688696": "stock", -"极米科技": "stock", -"000611": "stock", -"天首退": "stock", -"002915": "stock", -"中欣氟材": "stock", -"835185": "stock", -"贝特瑞": "stock", -"301331": "stock", -"恩威医药": "stock", -"002201": "stock", -"正威新材": "stock", -"000900": "stock", -"现代投资": "stock", -"301226": "stock", -"祥明智能": "stock", -"300029": "stock", -"ST天龙": "stock", -"002154": "stock", -"报喜鸟": "stock", -"605298": "stock", -"必得科技": "stock", -"002404": "stock", -"嘉欣丝绸": "stock", -"001201": "stock", -"东瑞股份": "stock", -"002646": "stock", -"天佑德酒": "stock", -"601677": "stock", -"明泰铝业": "stock", -"300533": "stock", -"冰川网络": "stock", -"300562": "stock", -"乐心医疗": "stock", -"688125": "stock", -"安达智能": "stock", -"600656": "stock", -"退市博元": "stock", -"002593": "stock", -"日上集团": "stock", -"000756": "stock", -"新华制药": "stock", -"000708": "stock", -"中信特钢": "stock", -"603353": "stock", -"和顺石油": "stock", -"002564": "stock", -"*ST天沃": "stock", -"600733": "stock", -"北汽蓝谷": "stock", -"300167": "stock", -"ST迪威迅": "stock", -"000699": "stock", -"S*ST佳纸": "stock", -"300393": "stock", -"中来股份": "stock", -"603200": "stock", -"上海洗霸": "stock", -"688800": "stock", -"瑞可达": "stock", -"600292": "stock", -"远达环保": "stock", -"002307": "stock", -"北新路桥": "stock", -"300935": "stock", -"盈建科": "stock", -"605058": "stock", -"澳弘电子": "stock", -"002656": "stock", -"ST摩登": "stock", -"301488": "stock", -"豪恩汽电": "stock", -"600137": "stock", -"浪莎股份": "stock", -"301372": "stock", -"科净源": "stock", -"001317": "stock", -"三羊马": "stock", -"300033": "stock", -"同花顺": "stock", -"600266": "stock", -"城建发展": "stock", -"301389": "stock", -"隆扬电子": "stock", -"688244": "stock", -"永信至诚": "stock", -"300375": "stock", -"鹏翎股份": "stock", -"001337": "stock", -"四川黄金": "stock", -"300151": "stock", -"昌红科技": "stock", -"002126": "stock", -"银轮股份": "stock", -"301182": "stock", -"凯旺科技": "stock", -"002635": "stock", -"安洁科技": "stock", -"000778": "stock", -"新兴铸管": "stock", -"600872": "stock", -"中炬高新": "stock", -"002798": "stock", -"帝欧家居": "stock", -"002629": "stock", -"仁智股份": "stock", -"300378": "stock", -"鼎捷软件": "stock", -"300045": "stock", -"华力创通": "stock", -"300123": "stock", -"亚光科技": "stock", -"300109": "stock", -"新开源": "stock", -"300969": "stock", -"恒帅股份": "stock", -"831856": "stock", -"浩淼科技": "stock", -"601238": "stock", -"广汽集团": "stock", -"301370": "stock", -"国科恒泰": "stock", -"000413": "stock", -"东旭光电": "stock", -"002286": "stock", -"保龄宝": "stock", -"688484": "stock", -"南芯科技": "stock", -"000509": "stock", -"华塑控股": "stock", -"603897": "stock", -"长城科技": "stock", -"000653": "stock", -"ST九州": "stock", -"300547": "stock", -"川环科技": "stock", -"300898": "stock", -"熊猫乳品": "stock", -"300525": "stock", -"博思软件": "stock", -"601360": "stock", -"三六零": "stock", -"601811": "stock", -"新华文轩": "stock", -"601568": "stock", -"北元集团": "stock", -"600963": "stock", -"岳阳林纸": "stock", -"872953": "stock", -"国子软件": "stock", -"600760": "stock", -"中航沈飞": "stock", -"000761": "stock", -"本钢板材": "stock", -"600981": "stock", -"汇鸿集团": "stock", -"603296": "stock", -"华勤技术": "stock", -"300669": "stock", -"沪宁股份": "stock", -"002020": "stock", -"京新药业": "stock", -"833751": "stock", -"惠同新材": "stock", -"603773": "stock", -"沃格光电": "stock", -"600480": "stock", -"凌云股份": "stock", -"300283": "stock", -"温州宏丰": "stock", -"301383": "stock", -"天键股份": "stock", -"300126": "stock", -"锐奇股份": "stock", -"002860": "stock", -"星帅尔": "stock", -"601101": "stock", -"昊华能源": "stock", -"603191": "stock", -"望变电气": "stock", -"605287": "stock", -"德才股份": "stock", -"002947": "stock", -"恒铭达": "stock", -"688228": "stock", -"开普云": "stock", -"688029": "stock", -"南微医学": "stock", -"000671": "stock", -"ST阳光城": "stock", -"600107": "stock", -"美尔雅": "stock", -"688621": "stock", -"阳光诺和": "stock", -"688270": "stock", -"臻镭科技": "stock", -"300958": "stock", -"建工修复": "stock", -"600488": "stock", -"津药药业": "stock", -"002194": "stock", -"武汉凡谷": "stock", -"300235": "stock", -"方直科技": "stock", -"688266": "stock", -"泽璟制药-U": "stock", -"300600": "stock", -"国瑞科技": "stock", -"000738": "stock", -"航发控制": "stock", -"300328": "stock", -"宜安科技": "stock", -"601021": "stock", -"春秋航空": "stock", -"000669": "stock", -"ST金鸿": "stock", -"300208": "stock", -"青岛中程": "stock", -"002165": "stock", -"红宝丽": "stock", -"002262": "stock", -"恩华药业": "stock", -"002781": "stock", -"奇信退": "stock", -"300745": "stock", -"欣锐科技": "stock", -"000545": "stock", -"金浦钛业": "stock", -"603339": "stock", -"四方科技": "stock", -"002451": "stock", -"摩恩电气": "stock", -"688618": "stock", -"三旺通信": "stock", -"002811": "stock", -"郑中设计": "stock", -"301246": "stock", -"宏源药业": "stock", -"688665": "stock", -"四方光电": "stock", -"300403": "stock", -"汉宇集团": "stock", -"301298": "stock", -"东利机械": "stock", -"688458": "stock", -"美芯晟": "stock", -"300810": "stock", -"中科海讯": "stock", -"301090": "stock", -"华润材料": "stock", -"300726": "stock", -"宏达电子": "stock", -"301418": "stock", -"协昌科技": "stock", -"600987": "stock", -"航民股份": "stock", -"603385": "stock", -"惠达卫浴": "stock", -"600329": "stock", -"达仁堂": "stock", -"300243": "stock", -"瑞丰高材": "stock", -"300306": "stock", -"远方信息": "stock", -"600824": "stock", -"益民集团": "stock", -"002488": "stock", -"金固股份": "stock", -"603036": "stock", -"如通股份": "stock", -"836892": "stock", -"广咨国际": "stock", -"300377": "stock", -"赢时胜": "stock", -"001979": "stock", -"招商蛇口": "stock", -"600416": "stock", -"湘电股份": "stock", -"002485": "stock", -"*ST雪发": "stock", -"000787": "stock", -"*ST创智": "stock", -"002992": "stock", -"宝明科技": "stock", -"600821": "stock", -"金开新能": "stock", -"603669": "stock", -"灵康药业": "stock", -"300435": "stock", -"中泰股份": "stock", -"603863": "stock", -"松炀资源": "stock", -"002963": "stock", -"豪尔赛": "stock", -"688003": "stock", -"天准科技": "stock", -"301382": "stock", -"蜂助手": "stock", -"301024": "stock", -"霍普股份": "stock", -"002211": "stock", -"宏达新材": "stock", -"688273": "stock", -"麦澜德": "stock", -"000982": "stock", -"中银绒业": "stock", -"300176": "stock", -"派生科技": "stock", -"002048": "stock", -"宁波华翔": "stock", -"002734": "stock", -"利民股份": "stock", -"688475": "stock", -"萤石网络": "stock", -"001367": "stock", -"海森药业": "stock", -"002110": "stock", -"三钢闽光": "stock", -"000836": "stock", -"富通信息": "stock", -"688185": "stock", -"康希诺": "stock", -"002003": "stock", -"伟星股份": "stock", -"002283": "stock", -"天润工业": "stock", -"002765": "stock", -"蓝黛科技": "stock", -"300107": "stock", -"建新股份": "stock", -"002454": "stock", -"松芝股份": "stock", -"000621": "stock", -"*ST比特": "stock", -"603078": "stock", -"江化微": "stock", -"603135": "stock", -"中重科技": "stock", -"603967": "stock", -"中创物流": "stock", -"600999": "stock", -"招商证券": "stock", -"600295": "stock", -"鄂尔多斯": "stock", -"688779": "stock", -"长远锂科": "stock", -"301091": "stock", -"深城交": "stock", -"603408": "stock", -"建霖家居": "stock", -"688281": "stock", -"华秦科技": "stock", -"300650": "stock", -"太龙股份": "stock", -"300258": "stock", -"精锻科技": "stock", -"300808": "stock", -"久量股份": "stock", -"300843": "stock", -"胜蓝股份": "stock", -"002281": "stock", -"光迅科技": "stock", -"600925": "stock", -"苏能股份": "stock", -"600735": "stock", -"新华锦": "stock", -"600026": "stock", -"中远海能": "stock", -"688265": "stock", -"南模生物": "stock", -"000911": "stock", -"南宁糖业": "stock", -"688349": "stock", -"三一重能": "stock", -"688148": "stock", -"芳源股份": "stock", -"000695": "stock", -"滨海能源": "stock", -"002113": "stock", -"*ST天润": "stock", -"603816": "stock", -"顾家家居": "stock", -"601599": "stock", -"浙文影业": "stock", -"603117": "stock", -"ST万林": "stock", -"002908": "stock", -"德生科技": "stock", -"002465": "stock", -"海格通信": "stock", -"300931": "stock", -"通用电梯": "stock", -"300049": "stock", -"福瑞股份": "stock", -"301395": "stock", -"仁信新材": "stock", -"600388": "stock", -"龙净环保": "stock", -"600346": "stock", -"恒力石化": "stock", -"600587": "stock", -"新华医疗": "stock", -"300951": "stock", -"博硕科技": "stock", -"301017": "stock", -"漱玉平民": "stock", -"873152": "stock", -"天宏锂电": "stock", -"603211": "stock", -"晋拓股份": "stock", -"832469": "stock", -"富恒新材": "stock", -"600793": "stock", -"宜宾纸业": "stock", -"000968": "stock", -"蓝焰控股": "stock", -"002351": "stock", -"漫步者": "stock", -"300057": "stock", -"万顺新材": "stock", -"603317": "stock", -"天味食品": "stock", -"300741": "stock", -"华宝股份": "stock", -"688288": "stock", -"鸿泉物联": "stock", -"301211": "stock", -"亨迪药业": "stock", -"870357": "stock", -"雅葆轩": "stock", -"688180": "stock", -"君实生物-U": "stock", -"301328": "stock", -"维峰电子": "stock", -"688786": "stock", -"悦安新材": "stock", -"601997": "stock", -"贵阳银行": "stock", -"002748": "stock", -"世龙实业": "stock", -"002853": "stock", -"皮阿诺": "stock", -"000735": "stock", -"罗牛山": "stock", -"603160": "stock", -"汇顶科技": "stock", -"600523": "stock", -"贵航股份": "stock", -"688549": "stock", -"中巨芯-U": "stock", -"603273": "stock", -"天元智能": "stock", -"688199": "stock", -"久日新材": "stock", -"300937": "stock", -"药易购": "stock", -"600370": "stock", -"三房巷": "stock", -"002444": "stock", -"巨星科技": "stock", -"688077": "stock", -"大地熊": "stock", -"002680": "stock", -"长生退": "stock", -"300787": "stock", -"海能实业": "stock", -"000683": "stock", -"远兴能源": "stock", -"600311": "stock", -"*ST荣华": "stock", -"688379": "stock", -"华光新材": "stock", -"300806": "stock", -"斯迪克": "stock", -"300210": "stock", -"森远股份": "stock", -"600955": "stock", -"维远股份": "stock", -"830799": "stock", -"艾融软件": "stock", -"002242": "stock", -"九阳股份": "stock", -"300824": "stock", -"北鼎股份": "stock", -"300104": "stock", -"乐视退": "stock", -"600695": "stock", -"退市绿庭": "stock", -"000760": "stock", -"斯太退": "stock", -"300995": "stock", -"奇德新材": "stock", -"600929": "stock", -"雪天盐业": "stock", -"300910": "stock", -"瑞丰新材": "stock", -"688355": "stock", -"明志科技": "stock", -"600546": "stock", -"山煤国际": "stock", -"300369": "stock", -"绿盟科技": "stock", -"301119": "stock", -"正强股份": "stock", -"603687": "stock", -"大胜达": "stock", -"688619": "stock", -"罗普特": "stock", -"831370": "stock", -"新安洁": "stock", -"601778": "stock", -"晶科科技": "stock", -"301076": "stock", -"新瀚新材": "stock", -"601117": "stock", -"中国化学": "stock", -"000561": "stock", -"烽火电子": "stock", -"002050": "stock", -"三花智控": "stock", -"688799": "stock", -"华纳药厂": "stock", -"002343": "stock", -"慈文传媒": "stock", -"000060": "stock", -"中金岭南": "stock", -"002421": "stock", -"达实智能": "stock", -"300075": "stock", -"数字政通": "stock", -"002975": "stock", -"博杰股份": "stock", -"301368": "stock", -"丰立智能": "stock", -"600709": "stock", -"ST生态": "stock", -"300182": "stock", -"捷成股份": "stock", -"603162": "stock", -"海通发展": "stock", -"300539": "stock", -"横河精密": "stock", -"000576": "stock", -"甘化科工": "stock", -"603650": "stock", -"彤程新材": "stock", -"300237": "stock", -"美晨生态": "stock", -"600070": "stock", -"ST富润": "stock", -"600253": "stock", -"天方药业": "stock", -"600829": "stock", -"人民同泰": "stock", -"600459": "stock", -"贵研铂业": "stock", -"603021": "stock", -"山东华鹏": "stock", -"601038": "stock", -"一拖股份": "stock", -"603076": "stock", -"乐惠国际": "stock", -"600263": "stock", -"路桥建设": "stock", -"688293": "stock", -"XD奥浦迈": "stock", -"002247": "stock", -"聚力文化": "stock", -"300746": "stock", -"汉嘉设计": "stock", -"600799": "stock", -"*ST龙科": "stock", -"600093": "stock", -"退市易见": "stock", -"300291": "stock", -"百纳千成": "stock", -"688258": "stock", -"卓易信息": "stock", -"300550": "stock", -"和仁科技": "stock", -"300256": "stock", -"星星科技": "stock", -"300586": "stock", -"美联新材": "stock", -"300154": "stock", -"瑞凌股份": "stock", -"600297": "stock", -"广汇汽车": "stock", -"002087": "stock", -"*ST新纺": "stock", -"836208": "stock", -"青矩技术": "stock", -"301235": "stock", -"华康医疗": "stock", -"002531": "stock", -"天顺风能": "stock", -"603173": "stock", -"福斯达": "stock", -"000795": "stock", -"英洛华": "stock", -"300358": "stock", -"楚天科技": "stock", -"300641": "stock", -"正丹股份": "stock", -"000888": "stock", -"峨眉山A": "stock", -"301139": "stock", -"元道通信": "stock", -"300994": "stock", -"久祺股份": "stock", -"300205": "stock", -"天喻信息": "stock", -"000426": "stock", -"兴业银锡": "stock", -"430300": "stock", -"辰光医疗": "stock", -"600549": "stock", -"厦门钨业": "stock", -"603225": "stock", -"新凤鸣": "stock", -"002241": "stock", -"歌尔股份": "stock", -"301332": "stock", -"德尔玛": "stock", -"300520": "stock", -"科大国创": "stock", -"000021": "stock", -"深科技": "stock", -"603332": "stock", -"苏州龙杰": "stock", -"600883": "stock", -"博闻科技": "stock", -"600751": "stock", -"海航科技": "stock", -"300920": "stock", -"润阳科技": "stock", -"002926": "stock", -"华西证券": "stock", -"300553": "stock", -"集智股份": "stock", -"300563": "stock", -"神宇股份": "stock", -"300288": "stock", -"朗玛信息": "stock", -"300998": "stock", -"宁波方正": "stock", -"301353": "stock", -"普莱得": "stock", -"300134": "stock", -"大富科技": "stock", -"001228": "stock", -"永泰运": "stock", -"603637": "stock", -"镇海股份": "stock", -"300222": "stock", -"科大智能": "stock", -"603889": "stock", -"新澳股份": "stock", -"688162": "stock", -"巨一科技": "stock", -"603679": "stock", -"华体科技": "stock", -"603489": "stock", -"八方股份": "stock", -"300371": "stock", -"汇中股份": "stock", -"300278": "stock", -"华昌达": "stock", -"688318": "stock", -"财富趋势": "stock", -"688068": "stock", -"热景生物": "stock", -"000655": "stock", -"金岭矿业": "stock", -"002670": "stock", -"国盛金控": "stock", -"300972": "stock", -"万辰集团": "stock", -"603137": "stock", -"恒尚节能": "stock", -"603057": "stock", -"紫燕食品": "stock", -"600408": "stock", -"安泰集团": "stock", -"300004": "stock", -"南风股份": "stock", -"300410": "stock", -"正业科技": "stock", -"839790": "stock", -"联迪信息": "stock", -"002157": "stock", -"*ST": "stock", -"002347": "stock", -"泰尔股份": "stock", -"601996": "stock", -"丰林集团": "stock", -"002557": "stock", -"洽洽食品": "stock", -"600632": "stock", -"华联商厦": "stock", -"002936": "stock", -"郑州银行": "stock", -"300002": "stock", -"神州泰岳": "stock", -"601133": "stock", -"柏诚股份": "stock", -"600961": "stock", -"株冶集团": "stock", -"301262": "stock", -"海看股份": "stock", -"605319": "stock", -"无锡振华": "stock", -"300542": "stock", -"新晨科技": "stock", -"688335": "stock", -"复洁环保": "stock", -"688237": "stock", -"超卓航科": "stock", -"603738": "stock", -"泰晶科技": "stock", -"601619": "stock", -"嘉泽新能": "stock", -"002344": "stock", -"海宁皮城": "stock", -"831010": "stock", -"凯添燃气": "stock", -"603103": "stock", -"横店影视": "stock", -"601208": "stock", -"东材科技": "stock", -"603878": "stock", -"武进不锈": "stock", -"603665": "stock", -"康隆达": "stock", -"601002": "stock", -"晋亿实业": "stock", -"002527": "stock", -"新时达": "stock", -"603444": "stock", -"吉比特": "stock", -"605377": "stock", -"华旺科技": "stock", -"001266": "stock", -"宏英智能": "stock", -"001258": "stock", -"立新能源": "stock", -"600472": "stock", -"包头铝业": "stock", -"600067": "stock", -"冠城大通": "stock", -"603970": "stock", -"中农立华": "stock", -"002490": "stock", -"山东墨龙": "stock", -"834682": "stock", -"球冠电缆": "stock", -"603380": "stock", -"易德龙": "stock", -"002150": "stock", -"通润装备": "stock", -"000040": "stock", -"东旭蓝天": "stock", -"300218": "stock", -"安利股份": "stock", -"300921": "stock", -"南凌科技": "stock", -"002036": "stock", -"联创电子": "stock", -"002554": "stock", -"惠博普": "stock", -"300216": "stock", -"千山退": "stock", -"002861": "stock", -"瀛通通讯": "stock", -"600583": "stock", -"海油工程": "stock", -"603777": "stock", -"来伊份": "stock", -"001323": "stock", -"慕思股份": "stock", -"688019": "stock", -"安集科技": "stock", -"002969": "stock", -"嘉美包装": "stock", -"002024": "stock", -"ST易购": "stock", -"002589": "stock", -"瑞康医药": "stock", -"688533": "stock", -"上声电子": "stock", -"603759": "stock", -"海天股份": "stock", -"300975": "stock", -"商络电子": "stock", -"688038": "stock", -"中科通达": "stock", -"000819": "stock", -"岳阳兴长": "stock", -"600050": "stock", -"中国联通": "stock", -"000589": "stock", -"贵州轮胎": "stock", -"600754": "stock", -"锦江酒店": "stock", -"600426": "stock", -"华鲁恒升": "stock", -"600804": "stock", -"ST鹏博士": "stock", -"002673": "stock", -"西部证券": "stock", -"600159": "stock", -"大龙地产": "stock", -"003029": "stock", -"吉大正元": "stock", -"839792": "stock", -"东和新材": "stock", -"300770": "stock", -"新媒股份": "stock", -"001202": "stock", -"炬申股份": "stock", -"301291": "stock", -"明阳电气": "stock", -"301199": "stock", -"迈赫股份": "stock", -"300956": "stock", -"英力股份": "stock", -"688163": "stock", -"赛伦生物": "stock", -"600927": "stock", -"永安期货": "stock", -"000613": "stock", -"东海A退": "stock", -"688055": "stock", -"龙腾光电": "stock", -"603918": "stock", -"金桥信息": "stock", -"600909": "stock", -"华安证券": "stock", -"002699": "stock", -"*ST美盛": "stock", -"002053": "stock", -"云南能投": "stock", -"300635": "stock", -"中达安": "stock", -"002896": "stock", -"中大力德": "stock", -"000011": "stock", -"深物业A": "stock", -"301032": "stock", -"新柴股份": "stock", -"688011": "stock", -"新光光电": "stock", -"002669": "stock", -"康达新材": "stock", -"300200": "stock", -"高盟新材": "stock", -"002365": "stock", -"永安药业": "stock", -"301248": "stock", -"杰创智能": "stock", -"002360": "stock", -"同德化工": "stock", -"600868": "stock", -"梅雁吉祥": "stock", -"688383": "stock", -"新益昌": "stock", -"600767": "stock", -"退市运盛": "stock", -"002550": "stock", -"千红制药": "stock", -"601555": "stock", -"东吴证券": "stock", -"603236": "stock", -"移远通信": "stock", -"002790": "stock", -"瑞尔特": "stock", -"605389": "stock", -"长龄液压": "stock", -"300270": "stock", -"中威电子": "stock", -"600796": "stock", -"钱江生化": "stock", -"688327": "stock", -"云从科技-UW": "stock", -"001259": "stock", -"利仁科技": "stock", -"688361": "stock", -"中科飞测-U": "stock", -"605318": "stock", -"法狮龙": "stock", -"002822": "stock", -"中装建设": "stock", -"688631": "stock", -"莱斯信息": "stock", -"300758": "stock", -"七彩化学": "stock", -"000158": "stock", -"常山北明": "stock", -"002599": "stock", -"盛通股份": "stock", -"603557": "stock", -"ST起步": "stock", -"301325": "stock", -"曼恩斯特": "stock", -"001210": "stock", -"金房能源": "stock", -"002837": "stock", -"英维克": "stock", -"600555": "stock", -"退市海创": "stock", -"603929": "stock", -"亚翔集成": "stock", -"600643": "stock", -"爱建集团": "stock", -"000587": "stock", -"*ST金洲": "stock", -"603636": "stock", -"南威软件": "stock", -"002435": "stock", -"长江健康": "stock", -"600257": "stock", -"大湖股份": "stock", -"002939": "stock", -"长城证券": "stock", -"600332": "stock", -"白云山": "stock", -"002689": "stock", -"远大智能": "stock", -"600547": "stock", -"山东黄金": "stock", -"301323": "stock", -"新莱福": "stock", -"603887": "stock", -"城地香江": "stock", -"300017": "stock", -"网宿科技": "stock", -"601006": "stock", -"大秦铁路": "stock", -"600483": "stock", -"福能股份": "stock", -"300594": "stock", -"朗进科技": "stock", -"300576": "stock", -"容大感光": "stock", -"002249": "stock", -"大洋电机": "stock", -"688283": "stock", -"坤恒顺维": "stock", -"603165": "stock", -"荣晟环保": "stock", -"301533": "stock", -"威马农机": "stock", -"688789": "stock", -"宏华数科": "stock", -"605499": "stock", -"东鹏饮料": "stock", -"600191": "stock", -"华资实业": "stock", -"300213": "stock", -"佳讯飞鸿": "stock", -"300385": "stock", -"雪浪环境": "stock", -"003004": "stock", -"声迅股份": "stock", -"688022": "stock", -"瀚川智能": "stock", -"002865": "stock", -"钧达股份": "stock", -"002585": "stock", -"双星新材": "stock", -"600725": "stock", -"云维股份": "stock", -"600401": "stock", -"退市海润": "stock", -"832978": "stock", -"开特股份": "stock", -"603196": "stock", -"日播时尚": "stock", -"605268": "stock", -"王力安防": "stock", -"002481": "stock", -"双塔食品": "stock", -"000876": "stock", -"新希望": "stock", -"300423": "stock", -"昇辉科技": "stock", -"300455": "stock", -"航天智装": "stock", -"300106": "stock", -"西部牧业": "stock", -"600696": "stock", -"岩石股份": "stock", -"688102": "stock", -"斯瑞新材": "stock", -"601577": "stock", -"长沙银行": "stock", -"002546": "stock", -"新联电子": "stock", -"300588": "stock", -"熙菱信息": "stock", -"600101": "stock", -"明星电力": "stock", -"000419": "stock", -"通程控股": "stock", -"600622": "stock", -"光大嘉宝": "stock", -"688480": "stock", -"赛恩斯": "stock", -"600121": "stock", -"郑州煤电": "stock", -"300636": "stock", -"同和药业": "stock", -"000730": "stock", -"*ST环保": "stock", -"300521": "stock", -"爱司凯": "stock", -"601991": "stock", -"大唐发电": "stock", -"300472": "stock", -"新元科技": "stock", -"300616": "stock", -"尚品宅配": "stock", -"603267": "stock", -"鸿远电子": "stock", -"002607": "stock", -"中公教育": "stock", -"300515": "stock", -"三德科技": "stock", -"870508": "stock", -"丰安股份": "stock", -"603022": "stock", -"新通联": "stock", -"301486": "stock", -"致尚科技": "stock", -"601827": "stock", -"三峰环境": "stock", -"002495": "stock", -"佳隆股份": "stock", -"300225": "stock", -"金力泰": "stock", -"300413": "stock", -"芒果超媒": "stock", -"300331": "stock", -"苏大维格": "stock", -"002361": "stock", -"神剑股份": "stock", -"002997": "stock", -"瑞鹄模具": "stock", -"600335": "stock", -"国机汽车": "stock", -"871970": "stock", -"大禹生物": "stock", -"002595": "stock", -"豪迈科技": "stock", -"430017": "stock", -"星昊医药": "stock", -"003023": "stock", -"彩虹集团": "stock", -"605577": "stock", -"龙版传媒": "stock", -"688238": "stock", -"和元生物": "stock", -"300952": "stock", -"恒辉安防": "stock", -"603603": "stock", -"*ST博天": "stock", -"301358": "stock", -"湖南裕能": "stock", -"300756": "stock", -"金马游乐": "stock", -"600630": "stock", -"龙头股份": "stock", -"301133": "stock", -"金钟股份": "stock", -"605123": "stock", -"派克新材": "stock", -"300890": "stock", -"翔丰华": "stock", -"603987": "stock", -"康德莱": "stock", -"300006": "stock", -"莱美药业": "stock", -"603917": "stock", -"合力科技": "stock", -"002041": "stock", -"登海种业": "stock", -"000798": "stock", -"中水渔业": "stock", -"001311": "stock", -"多利科技": "stock", -"301031": "stock", -"中熔电气": "stock", -"002478": "stock", -"常宝股份": "stock", -"000898": "stock", -"鞍钢股份": "stock", -"600579": "stock", -"克劳斯": "stock", -"300742": "stock", -"*ST越博": "stock", -"603013": "stock", -"亚普股份": "stock", -"603602": "stock", -"纵横通信": "stock", -"001380": "stock", -"华纬科技": "stock", -"000796": "stock", -"*ST凯撒": "stock", -"300552": "stock", -"万集科技": "stock", -"300432": "stock", -"富临精工": "stock", -"603090": "stock", -"宏盛股份": "stock", -"002300": "stock", -"太阳电缆": "stock", -"000601": "stock", -"韶能股份": "stock", -"871634": "stock", -"新威凌": "stock", -"600895": "stock", -"张江高科": "stock", -"688603": "stock", -"天承科技": "stock", -"300978": "stock", -"东箭科技": "stock", -"600061": "stock", -"国投资本": "stock", -"002387": "stock", -"维信诺": "stock", -"300269": "stock", -"联建光电": "stock", -"873001": "stock", -"纬达光电": "stock", -"301110": "stock", -"青木股份": "stock", -"832145": "stock", -"恒合股份": "stock", -"688358": "stock", -"祥生医疗": "stock", -"600719": "stock", -"大连热电": "stock", -"601567": "stock", -"三星医疗": "stock", -"831906": "stock", -"舜宇精工": "stock", -"603843": "stock", -"正平股份": "stock", -"688063": "stock", -"派能科技": "stock", -"600089": "stock", -"特变电工": "stock", -"002817": "stock", -"黄山胶囊": "stock", -"688377": "stock", -"迪威尔": "stock", -"688517": "stock", -"金冠电气": "stock", -"300489": "stock", -"光智科技": "stock", -"002223": "stock", -"鱼跃医疗": "stock", -"603085": "stock", -"天成自控": "stock", -"300340": "stock", -"科恒股份": "stock", -"603029": "stock", -"天鹅股份": "stock", -"002112": "stock", -"三变科技": "stock", -"301319": "stock", -"唯特偶": "stock", -"600576": "stock", -"祥源文旅": "stock", -"603305": "stock", -"旭升集团": "stock", -"300334": "stock", -"津膜科技": "stock", -"839680": "stock", -"广道数字": "stock", -"300724": "stock", -"捷佳伟创": "stock", -"688113": "stock", -"联测科技": "stock", -"300838": "stock", -"浙江力诺": "stock", -"002818": "stock", -"富森美": "stock", -"000565": "stock", -"渝三峡A": "stock", -"600702": "stock", -"舍得酒业": "stock", -"600178": "stock", -"东安动力": "stock", -"832175": "stock", -"东方碳素": "stock", -"688570": "stock", -"天玛智控": "stock", -"600325": "stock", -"华发股份": "stock", -"002897": "stock", -"意华股份": "stock", -"300039": "stock", -"上海凯宝": "stock", -"688053": "stock", -"思科瑞": "stock", -"002728": "stock", -"特一药业": "stock", -"688150": "stock", -"莱特光电": "stock", -"600333": "stock", -"长春燃气": "stock", -"003017": "stock", -"大洋生物": "stock", -"002121": "stock", -"科陆电子": "stock", -"002124": "stock", -"天邦食品": "stock", -"301439": "stock", -"泓淋电力": "stock", -"603416": "stock", -"信捷电气": "stock", -"601872": "stock", -"招商轮船": "stock", -"601788": "stock", -"光大证券": "stock", -"600782": "stock", -"新钢股份": "stock", -"300760": "stock", -"迈瑞医疗": "stock", -"002256": "stock", -"兆新股份": "stock", -"601011": "stock", -"宝泰隆": "stock", -"600157": "stock", -"永泰能源": "stock", -"301111": "stock", -"粤万年青": "stock", -"000692": "stock", -"*ST惠天": "stock", -"002173": "stock", -"创新医疗": "stock", -"601689": "stock", -"拓普集团": "stock", -"601330": "stock", -"绿色动力": "stock", -"000987": "stock", -"越秀资本": "stock", -"300772": "stock", -"运达股份": "stock", -"600810": "stock", -"神马股份": "stock", -"002188": "stock", -"中天服务": "stock", -"603619": "stock", -"中曼石油": "stock", -"300626": "stock", -"华瑞股份": "stock", -"600338": "stock", -"西藏珠峰": "stock", -"600485": "stock", -"*ST信威": "stock", -"000633": "stock", -"合金投资": "stock", -"300298": "stock", -"三诺生物": "stock", -"600896": "stock", -"退市海医": "stock", -"002309": "stock", -"ST中利": "stock", -"605188": "stock", -"国光连锁": "stock", -"603755": "stock", -"日辰股份": "stock", -"002762": "stock", -"金发拉比": "stock", -"002026": "stock", -"山东威达": "stock", -"600533": "stock", -"栖霞建设": "stock", -"301077": "stock", -"星华新材": "stock", -"000301": "stock", -"东方盛虹": "stock", -"300575": "stock", -"中旗股份": "stock", -"688280": "stock", -"精进电动-UW": "stock", -"600215": "stock", -"派斯林": "stock", -"300166": "stock", -"东方国信": "stock", -"603958": "stock", -"哈森股份": "stock", -"000810": "stock", -"创维数字": "stock", -"839719": "stock", -"宁新新材": "stock", -"603568": "stock", -"伟明环保": "stock", -"000677": "stock", -"恒天海龙": "stock", -"603309": "stock", -"维力医疗": "stock", -"688443": "stock", -"智翔金泰-U": "stock", -"600822": "stock", -"上海物贸": "stock", -"001331": "stock", -"胜通能源": "stock", -"300612": "stock", -"宣亚国际": "stock", -"000554": "stock", -"泰山石油": "stock", -"688330": "stock", -"宏力达": "stock", -"300349": "stock", -"金卡智能": "stock", -"600020": "stock", -"中原高速": "stock", -"430198": "stock", -"微创光电": "stock", -"300849": "stock", -"锦盛新材": "stock", -"300971": "stock", -"博亚精工": "stock", -"603828": "stock", -"柯利达": "stock", -"833427": "stock", -"华维设计": "stock", -"688651": "stock", -"盛邦安全": "stock", -"300027": "stock", -"华谊兄弟": "stock", -"600125": "stock", -"铁龙物流": "stock", -"301187": "stock", -"欧圣电气": "stock", -"001234": "stock", -"泰慕士": "stock", -"688143": "stock", -"长盈通": "stock", -"002324": "stock", -"普利特": "stock", -"600418": "stock", -"江淮汽车": "stock", -"300983": "stock", -"尤安设计": "stock", -"600882": "stock", -"妙可蓝多": "stock", -"301327": "stock", -"华宝新能": "stock", -"300907": "stock", -"康平科技": "stock", -"600460": "stock", -"士兰微": "stock", -"300872": "stock", -"天阳科技": "stock", -"600268": "stock", -"国电南自": "stock", -"605300": "stock", -"佳禾食品": "stock", -"301367": "stock", -"怡和嘉业": "stock", -"000680": "stock", -"山推股份": "stock", -"688626": "stock", -"翔宇医疗": "stock", -"837344": "stock", -"三元基因": "stock", -"300584": "stock", -"海辰药业": "stock", -"688290": "stock", -"景业智能": "stock", -"600493": "stock", -"凤竹纺织": "stock", -"688520": "stock", -"神州细胞-U": "stock", -"600305": "stock", -"恒顺醋业": "stock", -"001282": "stock", -"三联锻造": "stock", -"301309": "stock", -"万得凯": "stock", -"002005": "stock", -"ST德豪": "stock", -"001339": "stock", -"智微智能": "stock", -"603026": "stock", -"胜华新材": "stock", -"600380": "stock", -"健康元": "stock", -"300761": "stock", -"立华股份": "stock", -"300238": "stock", -"冠昊生物": "stock", -"600740": "stock", -"山西焦化": "stock", -"300395": "stock", -"菲利华": "stock", -"000757": "stock", -"浩物股份": "stock", -"002968": "stock", -"新大正": "stock", -"002174": "stock", -"游族网络": "stock", -"603712": "stock", -"七一二": "stock", -"600429": "stock", -"三元股份": "stock", -"002740": "stock", -"*ST爱迪": "stock", -"688456": "stock", -"有研粉材": "stock", -"600843": "stock", -"上工申贝": "stock", -"600855": "stock", -"航天长峰": "stock", -"601163": "stock", -"三角轮胎": "stock", -"601882": "stock", -"海天精工": "stock", -"300922": "stock", -"天秦装备": "stock", -"688073": "stock", -"毕得医药": "stock", -"688016": "stock", -"心脉医疗": "stock", -"603282": "stock", -"亚光股份": "stock", -"300211": "stock", -"亿通科技": "stock", -"301193": "stock", -"家联科技": "stock", -"430090": "stock", -"同辉信息": "stock", -"688419": "stock", -"耐科装备": "stock", -"603528": "stock", -"多伦科技": "stock", -"002270": "stock", -"华明装备": "stock", -"600187": "stock", -"国中水务": "stock", -"603272": "stock", -"联翔股份": "stock", -"000037": "stock", -"深南电A": "stock", -"002604": "stock", -"龙力退": "stock", -"002228": "stock", -"合兴包装": "stock", -"603538": "stock", -"美诺华": "stock", -"603980": "stock", -"吉华集团": "stock", -"000569": "stock", -"长城股份": "stock", -"300862": "stock", -"蓝盾光电": "stock", -"000975": "stock", -"银泰黄金": "stock", -"600567": "stock", -"山鹰国际": "stock", -"002477": "stock", -"雏鹰退": "stock", -"002406": "stock", -"远东传动": "stock", -"300804": "stock", -"广康生化": "stock", -"300774": "stock", -"倍杰特": "stock", -"002427": "stock", -"尤夫股份": "stock", -"301081": "stock", -"严牌股份": "stock", -"870199": "stock", -"倍益康": "stock", -"300416": "stock", -"苏试试验": "stock", -"603567": "stock", -"珍宝岛": "stock", -"002132": "stock", -"恒星科技": "stock", -"002832": "stock", -"比音勒芬": "stock", -"300613": "stock", -"富瀚微": "stock", -"836717": "stock", -"瑞星股份": "stock", -"603237": "stock", -"五芳斋": "stock", -"002434": "stock", -"万里扬": "stock", -"603050": "stock", -"科林电气": "stock", -"600081": "stock", -"东风科技": "stock", -"000788": "stock", -"北大医药": "stock", -"601512": "stock", -"中新集团": "stock", -"300583": "stock", -"赛托生物": "stock", -"600998": "stock", -"九州通": "stock", -"000571": "stock", -"新大洲A": "stock", -"003027": "stock", -"同兴环保": "stock", -"002551": "stock", -"尚荣医疗": "stock", -"002049": "stock", -"紫光国微": "stock", -"300948": "stock", -"冠中生态": "stock", -"600507": "stock", -"方大特钢": "stock", -"688500": "stock", -"*ST慧辰": "stock", -"300316": "stock", -"晶盛机电": "stock", -"601121": "stock", -"宝地矿业": "stock", -"688486": "stock", -"龙迅股份": "stock", -"000055": "stock", -"方大集团": "stock", -"002668": "stock", -"奥马电器": "stock", -"300749": "stock", -"顶固集创": "stock", -"836942": "stock", -"恒立钻具": "stock", -"871245": "stock", -"威博液压": "stock", -"601126": "stock", -"四方股份": "stock", -"837046": "stock", -"亿能电力": "stock", -"600326": "stock", -"西藏天路": "stock", -"000726": "stock", -"鲁泰A": "stock", -"002218": "stock", -"拓日新能": "stock", -"300262": "stock", -"巴安水务": "stock", -"000822": "stock", -"山东海化": "stock", -"300674": "stock", -"宇信科技": "stock", -"603176": "stock", -"汇通集团": "stock", -"834014": "stock", -"特瑞斯": "stock", -"000555": "stock", -"神州信息": "stock", -"002424": "stock", -"贵州百灵": "stock", -"600353": "stock", -"旭光电子": "stock", -"601366": "stock", -"利群股份": "stock", -"603920": "stock", -"世运电路": "stock", -"688291": "stock", -"金橙子": "stock", -"688658": "stock", -"悦康药业": "stock", -"000581": "stock", -"威孚高科": "stock", -"300619": "stock", -"金银河": "stock", -"688489": "stock", -"三未信安": "stock", -"600060": "stock", -"海信视像": "stock", -"688037": "stock", -"芯源微": "stock", -"601728": "stock", -"中国电信": "stock", -"300390": "stock", -"天华新能": "stock", -"000848": "stock", -"承德露露": "stock", -"301345": "stock", -"涛涛车业": "stock", -"600561": "stock", -"江西长运": "stock", -"603297": "stock", -"永新光学": "stock", -"600048": "stock", -"保利发展": "stock", -"000736": "stock", -"中交地产": "stock", -"301371": "stock", -"敷尔佳": "stock", -"688329": "stock", -"艾隆科技": "stock", -"300677": "stock", -"英科医疗": "stock", -"430425": "stock", -"乐创技术": "stock", -"688313": "stock", -"仕佳光子": "stock", -"688602": "stock", -"康鹏科技": "stock", -"688360": "stock", -"德马科技": "stock", -"002422": "stock", -"科伦药业": "stock", -"301233": "stock", -"盛帮股份": "stock", -"688623": "stock", -"双元科技": "stock", -"000620": "stock", -"*ST新联": "stock", -"831152": "stock", -"昆工科技": "stock", -"603516": "stock", -"淳中科技": "stock", -"605133": "stock", -"嵘泰股份": "stock", -"300010": "stock", -"*ST豆神": "stock", -"688005": "stock", -"容百科技": "stock", -"002440": "stock", -"闰土股份": "stock", -"002097": "stock", -"山河智能": "stock", -"601299": "stock", -"中国北车": "stock", -"300173": "stock", -"福能东方": "stock", -"688155": "stock", -"先惠技术": "stock", -"300826": "stock", -"测绘股份": "stock", -"000007": "stock", -"*ST全新": "stock", -"002114": "stock", -"罗平锌电": "stock", -"002418": "stock", -"康盛股份": "stock", -"300893": "stock", -"松原股份": "stock", -"600970": "stock", -"中材国际": "stock", -"688356": "stock", -"键凯科技": "stock", -"000875": "stock", -"吉电股份": "stock", -"300866": "stock", -"安克创新": "stock", -"301397": "stock", -"溯联股份": "stock", -"301388": "stock", -"欣灵电气": "stock", -"003025": "stock", -"思进智能": "stock", -"300885": "stock", -"海昌新材": "stock", -"300117": "stock", -"嘉寓股份": "stock", -"600405": "stock", -"动力源": "stock", -"001896": "stock", -"豫能控股": "stock", -"601665": "stock", -"齐鲁银行": "stock", -"300085": "stock", -"银之杰": "stock", -"603893": "stock", -"瑞芯微": "stock", -"000513": "stock", -"丽珠集团": "stock", -"603757": "stock", -"大元泵业": "stock", -"688216": "stock", -"气派科技": "stock", -"603233": "stock", -"大参林": "stock", -"603888": "stock", -"新华网": "stock", -"301313": "stock", -"凡拓数创": "stock", -"300697": "stock", -"电工合金": "stock", -"300492": "stock", -"华图山鼎": "stock", -"688107": "stock", -"安路科技": "stock", -"002269": "stock", -"美邦服饰": "stock", -"603259": "stock", -"药明康德": "stock", -"300460": "stock", -"惠伦晶体": "stock", -"603169": "stock", -"兰石重装": "stock", -"605196": "stock", -"华通线缆": "stock", -"688575": "stock", -"亚辉龙": "stock", -"600990": "stock", -"四创电子": "stock", -"688535": "stock", -"华海诚科": "stock", -"002224": "stock", -"三力士": "stock", -"300830": "stock", -"金现代": "stock", -"000985": "stock", -"大庆华科": "stock", -"300321": "stock", -"同大股份": "stock", -"688737": "stock", -"中自科技": "stock", -"002906": "stock", -"华阳集团": "stock", -"002708": "stock", -"光洋股份": "stock", -"000958": "stock", -"电投产融": "stock", -"000096": "stock", -"广聚能源": "stock", -"300599": "stock", -"雄塑科技": "stock", -"603501": "stock", -"韦尔股份": "stock", -"300771": "stock", -"智莱科技": "stock", -"603289": "stock", -"泰瑞机器": "stock", -"688078": "stock", -"龙软科技": "stock", -"001208": "stock", -"华菱线缆": "stock", -"000528": "stock", -"柳工": "stock", -"600139": "stock", -"*ST西源": "stock", -"603668": "stock", -"XD天马科": "stock", -"601995": "stock", -"中金公司": "stock", -"600129": "stock", -"太极集团": "stock", -"600703": "stock", -"三安光电": "stock", -"301209": "stock", -"联合化学": "stock", -"600969": "stock", -"郴电国际": "stock", -"300640": "stock", -"德艺文创": "stock", -"001206": "stock", -"依依股份": "stock", -"600734": "stock", -"ST实达": "stock", -"300555": "stock", -"ST路通": "stock", -"002640": "stock", -"跨境通": "stock", -"600525": "stock", -"长园集团": "stock", -"301507": "stock", -"民生健康": "stock", -"688403": "stock", -"汇成股份": "stock", -"000506": "stock", -"中润资源": "stock", -"600807": "stock", -"济南高新": "stock", -"600601": "stock", -"方正科技": "stock", -"688567": "stock", -"孚能科技": "stock", -"000950": "stock", -"重药控股": "stock", -"688096": "stock", -"京源环保": "stock", -"002159": "stock", -"三特索道": "stock", -"605098": "stock", -"行动教育": "stock", -"605116": "stock", -"奥锐特": "stock", -"600956": "stock", -"新天绿能": "stock", -"836675": "stock", -"秉扬科技": "stock", -"002610": "stock", -"爱康科技": "stock", -"300947": "stock", -"德必集团": "stock", -"688017": "stock", -"绿的谐波": "stock", -"834475": "stock", -"三友科技": "stock", -"603298": "stock", -"杭叉集团": "stock", -"300977": "stock", -"深圳瑞捷": "stock", -"300930": "stock", -"屹通新材": "stock", -"002683": "stock", -"广东宏大": "stock", -"300310": "stock", -"宜通世纪": "stock", -"000017": "stock", -"深中华A": "stock", -"300072": "stock", -"海新能科": "stock", -"000989": "stock", -"九芝堂": "stock", -"601727": "stock", -"上海电气": "stock", -"688075": "stock", -"安旭生物": "stock", -"002405": "stock", -"四维图新": "stock", -"300658": "stock", -"延江股份": "stock", -"300807": "stock", -"天迈科技": "stock", -"603588": "stock", -"高能环境": "stock", -"002403": "stock", -"爱仕达": "stock", -"603899": "stock", -"晨光股份": "stock", -"000938": "stock", -"紫光股份": "stock", -"688008": "stock", -"澜起科技": "stock", -"300032": "stock", -"金龙机电": "stock", -"600605": "stock", -"汇通能源": "stock", -"301078": "stock", -"孩子王": "stock", -"002957": "stock", -"科瑞技术": "stock", -"300399": "stock", -"天利科技": "stock", -"300009": "stock", -"安科生物": "stock", -"000723": "stock", -"美锦能源": "stock", -"605068": "stock", -"明新旭腾": "stock", -"002472": "stock", -"双环传动": "stock", -"002458": "stock", -"益生股份": "stock", -"601065": "stock", -"江盐集团": "stock", -"600624": "stock", -"复旦复华": "stock", -"600449": "stock", -"宁夏建材": "stock", -"688432": "stock", -"有研硅": "stock", -"002231": "stock", -"奥维通信": "stock", -"301191": "stock", -"菲菱科思": "stock", -"603698": "stock", -"航天工程": "stock", -"300783": "stock", -"三只松鼠": "stock", -"600228": "stock", -"返利科技": "stock", -"603121": "stock", -"华培动力": "stock", -"002586": "stock", -"*ST围海": "stock", -"002807": "stock", -"江阴银行": "stock", -"000673": "stock", -"当代退": "stock", -"600448": "stock", -"华纺股份": "stock", -"603215": "stock", -"比依股份": "stock", -"600502": "stock", -"安徽建工": "stock", -"300660": "stock", -"江苏雷利": "stock", -"600111": "stock", -"北方稀土": "stock", -"002437": "stock", -"誉衡药业": "stock", -"000858": "stock", -"五粮液": "stock", -"002461": "stock", -"珠江啤酒": "stock", -"300558": "stock", -"贝达药业": "stock", -"300902": "stock", -"国安达": "stock", -"000901": "stock", -"航天科技": "stock", -"600055": "stock", -"万东医疗": "stock", -"600237": "stock", -"铜峰电子": "stock", -"300781": "stock", -"因赛集团": "stock", -"301046": "stock", -"能辉科技": "stock", -"002466": "stock", -"天齐锂业": "stock", -"002445": "stock", -"中南文化": "stock", -"873665": "stock", -"科强股份": "stock", -"002779": "stock", -"中坚科技": "stock", -"300133": "stock", -"华策影视": "stock", -"301088": "stock", -"戎美股份": "stock", -"603676": "stock", -"卫信康": "stock", -"000717": "stock", -"中南股份": "stock", -"000752": "stock", -"*ST西发": "stock", -"601952": "stock", -"苏垦农发": "stock", -"301004": "stock", -"嘉益股份": "stock", -"001324": "stock", -"长青科技": "stock", -"600009": "stock", -"上海机场": "stock", -"600368": "stock", -"五洲交通": "stock", -"002543": "stock", -"万和电气": "stock", -"688600": "stock", -"皖仪科技": "stock", -"603606": "stock", -"东方电缆": "stock", -"002169": "stock", -"智光电气": "stock", -"600345": "stock", -"长江通信": "stock", -"002217": "stock", -"合力泰": "stock", -"300637": "stock", -"扬帆新材": "stock", -"002612": "stock", -"朗姿股份": "stock", -"300202": "stock", -"聚龙退": "stock", -"001230": "stock", -"劲旅环境": "stock", -"300706": "stock", -"阿石创": "stock", -"603197": "stock", -"保隆科技": "stock", -"603701": "stock", -"德宏股份": "stock", -"688363": "stock", -"华熙生物": "stock", -"300615": "stock", -"欣天科技": "stock", -"300595": "stock", -"欧普康视": "stock", -"301200": "stock", -"大族数控": "stock", -"300602": "stock", -"飞荣达": "stock", -"000023": "stock", -"ST深天": "stock", -"300831": "stock", -"派瑞股份": "stock", -"603323": "stock", -"苏农银行": "stock", -"600871": "stock", -"石化油服": "stock", -"603227": "stock", -"雪峰科技": "stock", -"300814": "stock", -"中富电路": "stock", -"301185": "stock", -"鸥玛软件": "stock", -"003032": "stock", -"传智教育": "stock", -"603955": "stock", -"大千生态": "stock", -"300015": "stock", -"爱尔眼科": "stock", -"688428": "stock", -"诺诚健华-U": "stock", -"300514": "stock", -"友讯达": "stock", -"600864": "stock", -"哈投股份": "stock", -"002619": "stock", -"*ST艾格": "stock", -"603615": "stock", -"茶花股份": "stock", -"300882": "stock", -"万胜智能": "stock", -"300469": "stock", -"信息发展": "stock", -"002044": "stock", -"美年健康": "stock", -"002905": "stock", -"金逸影视": "stock", -"000628": "stock", -"高新发展": "stock", -"002214": "stock", -"大立科技": "stock", -"688326": "stock", -"经纬恒润-W": "stock", -"600398": "stock", -"海澜之家": "stock", -"001217": "stock", -"华尔泰": "stock", -"603699": "stock", -"纽威股份": "stock", -"002516": "stock", -"旷达科技": "stock", -"601878": "stock", -"浙商证券": "stock", -"603909": "stock", -"建发合诚": "stock", -"300467": "stock", -"迅游科技": "stock", -"837663": "stock", -"明阳科技": "stock", -"688271": "stock", -"联影医疗": "stock", -"002204": "stock", -"大连重工": "stock", -"603087": "stock", -"甘李药业": "stock", -"301529": "stock", -"福赛科技": "stock", -"600761": "stock", -"安徽合力": "stock", -"603617": "stock", -"君禾股份": "stock", -"871981": "stock", -"晶赛科技": "stock", -"002288": "stock", -"超华科技": "stock", -"600054": "stock", -"黄山旅游": "stock", -"838030": "stock", -"德众汽车": "stock", -"836504": "stock", -"博迅生物": "stock", -"601688": "stock", -"华泰证券": "stock", -"002774": "stock", -"快意电梯": "stock", -"600249": "stock", -"两面针": "stock", -"600520": "stock", -"文一科技": "stock", -"002118": "stock", -"*ST紫鑫": "stock", -"300844": "stock", -"山水比德": "stock", -"002032": "stock", -"苏泊尔": "stock", -"600133": "stock", -"东湖高新": "stock", -"300846": "stock", -"首都在线": "stock", -"300287": "stock", -"飞利信": "stock", -"688357": "stock", -"建龙微纳": "stock", -"833171": "stock", -"国航远洋": "stock", -"301421": "stock", -"波长光电": "stock", -"600003": "stock", -"ST东北高": "stock", -"600082": "stock", -"海泰发展": "stock", -"300777": "stock", -"中简科技": "stock", -"605168": "stock", -"三人行": "stock", -"002070": "stock", -"众和退": "stock", -"688310": "stock", -"迈得医疗": "stock", -"600230": "stock", -"沧州大化": "stock", -"600389": "stock", -"江山股份": "stock", -"002882": "stock", -"金龙羽": "stock", -"600143": "stock", -"金发科技": "stock", -"300261": "stock", -"雅本化学": "stock", -"000635": "stock", -"英力特": "stock", -"300620": "stock", -"光库科技": "stock", -"300290": "stock", -"荣科科技": "stock", -"600356": "stock", -"恒丰纸业": "stock", -"688191": "stock", -"智洋创新": "stock", -"688393": "stock", -"安必平": "stock", -"600650": "stock", -"锦江在线": "stock", -"002819": "stock", -"东方中科": "stock", -"002582": "stock", -"好想你": "stock", -"300338": "stock", -"开元教育": "stock", -"300094": "stock", -"国联水产": "stock", -"002208": "stock", -"合肥城建": "stock", -"002140": "stock", -"东华科技": "stock", -"300047": "stock", -"天源迪科": "stock", -"301172": "stock", -"君逸数码": "stock", -"002167": "stock", -"东方锆业": "stock", -"000915": "stock", -"华特达因": "stock", -"601899": "stock", -"紫金矿业": "stock", -"300139": "stock", -"晓程科技": "stock", -"600151": "stock", -"航天机电": "stock", -"600698": "stock", -"湖南天雁": "stock", -"003006": "stock", -"百亚股份": "stock", -"000887": "stock", -"中鼎股份": "stock", -"301016": "stock", -"雷尔伟": "stock", -"600007": "stock", -"中国国贸": "stock", -"603639": "stock", -"海利尔": "stock", -"831167": "stock", -"鑫汇科": "stock", -"688082": "stock", -"盛美上海": "stock", -"301296": "stock", -"新巨丰": "stock", -"603111": "stock", -"康尼机电": "stock", -"600573": "stock", -"惠泉啤酒": "stock", -"002809": "stock", -"红墙股份": "stock", -"601010": "stock", -"文峰股份": "stock", -"688195": "stock", -"腾景科技": "stock", -"300629": "stock", -"新劲刚": "stock", -"600811": "stock", -"东方集团": "stock", -"301062": "stock", -"上海艾录": "stock", -"002600": "stock", -"领益智造": "stock", -"002826": "stock", -"易明医药": "stock", -"600248": "stock", -"陕建股份": "stock", -"000837": "stock", -"秦川机床": "stock", -"603978": "stock", -"深圳新星": "stock", -"603275": "stock", -"众辰科技": "stock", -"000862": "stock", -"银星能源": "stock", -"600094": "stock", -"大名城": "stock", -"300069": "stock", -"金利华电": "stock", -"301448": "stock", -"开创电气": "stock", -"600270": "stock", -"外运发展": "stock", -"000651": "stock", -"格力电器": "stock", -"603392": "stock", -"万泰生物": "stock", -"002534": "stock", -"西子洁能": "stock", -"835174": "stock", -"五新隧装": "stock", -"836807": "stock", -"奔朗新材": "stock", -"002989": "stock", -"中天精装": "stock", -"603633": "stock", -"徕木股份": "stock", -"688398": "stock", -"赛特新材": "stock", -"301132": "stock", -"满坤科技": "stock", -"301079": "stock", -"邵阳液压": "stock", -"688622": "stock", -"禾信仪器": "stock", -"002691": "stock", -"冀凯股份": "stock", -"605199": "stock", -"葫芦娃": "stock", -"002973": "stock", -"侨银股份": "stock", -"001212": "stock", -"中旗新材": "stock", -"688697": "stock", -"纽威数控": "stock", -"603719": "stock", -"良品铺子": "stock", -"601136": "stock", -"首创证券": "stock", -"002541": "stock", -"鸿路钢构": "stock", -"000597": "stock", -"东北制药": "stock", -"600501": "stock", -"航天晨光": "stock", -"002632": "stock", -"道明光学": "stock", -"002306": "stock", -"中科云网": "stock", -"600260": "stock", -"*ST凯乐": "stock", -"600566": "stock", -"济川药业": "stock", -"300110": "stock", -"华仁药业": "stock", -"688387": "stock", -"信科移动-U": "stock", -"300559": "stock", -"佳发教育": "stock", -"002207": "stock", -"准油股份": "stock", -"003010": "stock", -"若羽臣": "stock", -"002674": "stock", -"兴业科技": "stock", -"603706": "stock", -"东方环宇": "stock", -"002323": "stock", -"雅博股份": "stock", -"600392": "stock", -"盛和资源": "stock", -"603359": "stock", -"东珠生态": "stock", -"688561": "stock", -"奇安信-U": "stock", -"300536": "stock", -"农尚环境": "stock", -"002096": "stock", -"易普力": "stock", -"002476": "stock", -"宝莫股份": "stock", -"002984": "stock", -"森麒麟": "stock", -"002940": "stock", -"昂利康": "stock", -"003009": "stock", -"中天火箭": "stock", -"301155": "stock", -"海力风电": "stock", -"300347": "stock", -"泰格医药": "stock", -"600277": "stock", -"亿利洁能": "stock", -"300463": "stock", -"迈克生物": "stock", -"600800": "stock", -"渤海化学": "stock", -"002352": "stock", -"顺丰控股": "stock", -"300319": "stock", -"麦捷科技": "stock", -"603357": "stock", -"设计总院": "stock", -"600069": "stock", -"退市银鸽": "stock", -"600794": "stock", -"保税科技": "stock", -"002768": "stock", -"国恩股份": "stock", -"002657": "stock", -"中科金财": "stock", -"002634": "stock", -"棒杰股份": "stock", -"601609": "stock", -"金田股份": "stock", -"300299": "stock", -"富春股份": "stock", -"300913": "stock", -"兆龙互连": "stock", -"688655": "stock", -"迅捷兴": "stock", -"688661": "stock", -"和林微纳": "stock", -"001238": "stock", -"浙江正特": "stock", -"688192": "stock", -"迪哲医药-U": "stock", -"001213": "stock", -"中铁特货": "stock", -"688200": "stock", -"华峰测控": "stock", -"002469": "stock", -"三维化学": "stock", -"688027": "stock", -"国盾量子": "stock", -"603321": "stock", -"梅轮电梯": "stock", -"605259": "stock", -"绿田机械": "stock", -"600385": "stock", -"退市金泰": "stock", -"002171": "stock", -"楚江新材": "stock", -"603061": "stock", -"金海通": "stock", -"600819": "stock", -"耀皮玻璃": "stock", -"688627": "stock", -"精智达": "stock", -"002961": "stock", -"瑞达期货": "stock", -"300444": "stock", -"双杰电气": "stock", -"001288": "stock", -"运机集团": "stock", -"600467": "stock", -"好当家": "stock", -"002359": "stock", -"北讯退": "stock", -"301025": "stock", -"读客文化": "stock", -"300857": "stock", -"协创数据": "stock", -"300135": "stock", -"宝利国际": "stock", -"603876": "stock", -"鼎胜新材": "stock", -"688212": "stock", -"澳华内镜": "stock", -"300177": "stock", -"中海达": "stock", -"603319": "stock", -"湘油泵": "stock", -"600612": "stock", -"老凤祥": "stock", -"002145": "stock", -"中核钛白": "stock", -"002946": "stock", -"新乳业": "stock", -"603690": "stock", -"至纯科技": "stock", -"601005": "stock", -"重庆钢铁": "stock", -"600021": "stock", -"上海电力": "stock", -"002678": "stock", -"珠江钢琴": "stock", -"430139": "stock", -"华岭股份": "stock", -"688211": "stock", -"中科微至": "stock", -"301040": "stock", -"中环海陆": "stock", -"834033": "stock", -"康普化学": "stock", -"600291": "stock", -"退市西水": "stock", -"872541": "stock", -"铁大科技": "stock", -"688050": "stock", -"爱博医疗": "stock", -"300175": "stock", -"朗源股份": "stock", -"002236": "stock", -"大华股份": "stock", -"000831": "stock", -"中国稀土": "stock", -"600186": "stock", -"莲花健康": "stock", -"002420": "stock", -"毅昌科技": "stock", -"603182": "stock", -"嘉华股份": "stock", -"600481": "stock", -"双良节能": "stock", -"601789": "stock", -"宁波建工": "stock", -"600718": "stock", -"东软集团": "stock", -"600539": "stock", -"狮头股份": "stock", -"301168": "stock", -"通灵股份": "stock", -"000625": "stock", -"长安汽车": "stock", -"600103": "stock", -"青山纸业": "stock", -"002135": "stock", -"东南网架": "stock", -"300591": "stock", -"万里马": "stock", -"688662": "stock", -"富信科技": "stock", -"002200": "stock", -"ST交投": "stock", -"000705": "stock", -"浙江震元": "stock", -"603331": "stock", -"百达精工": "stock", -"600223": "stock", -"福瑞达": "stock", -"300873": "stock", -"海晨股份": "stock", -"600748": "stock", -"上实发展": "stock", -"002855": "stock", -"捷荣技术": "stock", -"002018": "stock", -"华信退": "stock", -"301092": "stock", -"争光股份": "stock", -"600667": "stock", -"太极实业": "stock", -"600865": "stock", -"百大集团": "stock", -"002900": "stock", -"哈三联": "stock", -"300301": "stock", -"*ST长方": "stock", -"000592": "stock", -"平潭发展": "stock", -"603661": "stock", -"恒林股份": "stock", -"000505": "stock", -"京粮控股": "stock", -"000700": "stock", -"模塑科技": "stock", -"300593": "stock", -"新雷能": "stock", -"601098": "stock", -"中南传媒": "stock", -"301006": "stock", -"迈拓股份": "stock", -"601901": "stock", -"方正证券": "stock", -"688343": "stock", -"云天励飞-U": "stock", -"002473": "stock", -"圣莱退": "stock", -"600011": "stock", -"华能国际": "stock", -"000048": "stock", -"京基智农": "stock", -"300116": "stock", -"保力新": "stock", -"603118": "stock", -"共进股份": "stock", -"301002": "stock", -"崧盛股份": "stock", -"301057": "stock", -"汇隆新材": "stock", -"300768": "stock", -"迪普科技": "stock", -"002025": "stock", -"航天电器": "stock", -"600777": "stock", -"新潮能源": "stock", -"600423": "stock", -"柳化股份": "stock", -"000639": "stock", -"西王食品": "stock", -"600603": "stock", -"广汇物流": "stock", -"002035": "stock", -"华帝股份": "stock", -"600195": "stock", -"中牧股份": "stock", -"002177": "stock", -"御银股份": "stock", -"002479": "stock", -"富春环保": "stock", -"000889": "stock", -"ST中嘉": "stock", -"605305": "stock", -"中际联合": "stock", -"002702": "stock", -"海欣食品": "stock", -"300379": "stock", -"东方通": "stock", -"688052": "stock", -"纳芯微": "stock", -"600834": "stock", -"申通地铁": "stock", -"301127": "stock", -"天源环保": "stock", -"002127": "stock", -"南极电商": "stock", -"002847": "stock", -"盐津铺子": "stock", -"600528": "stock", -"中铁工业": "stock", -"301089": "stock", -"拓新药业": "stock", -"300227": "stock", -"光韵达": "stock", -"003019": "stock", -"宸展光电": "stock", -"000049": "stock", -"德赛电池": "stock", -"688151": "stock", -"华强科技": "stock", -"600917": "stock", -"重庆燃气": "stock", -"603733": "stock", -"仙鹤股份": "stock", -"605189": "stock", -"富春染织": "stock", -"002088": "stock", -"鲁阳节能": "stock", -"301108": "stock", -"洁雅股份": "stock", -"002711": "stock", -"欧浦退": "stock", -"300003": "stock", -"乐普医疗": "stock", -"600746": "stock", -"江苏索普": "stock", -"002583": "stock", -"海能达": "stock", -"002296": "stock", -"辉煌科技": "stock", -"600077": "stock", -"*ST宋都": "stock", -"601268": "stock", -"*ST二重": "stock", -"600308": "stock", -"华泰股份": "stock", -"002872": "stock", -"ST天圣": "stock", -"000697": "stock", -"*ST炼石": "stock", -"301559": "stock", -"N中集环": "stock", -"002510": "stock", -"天汽模": "stock", -"300516": "stock", -"久之洋": "stock", -"300084": "stock", -"海默科技": "stock", -"603963": "stock", -"大理药业": "stock", -"300946": "stock", -"恒而达": "stock", -"688072": "stock", -"拓荆科技": "stock", -"300190": "stock", -"维尔利": "stock", -"600838": "stock", -"上海九百": "stock", -"600289": "stock", -"ST信通": "stock", -"002160": "stock", -"常铝股份": "stock", -"000400": "stock", -"许继电气": "stock", -"601607": "stock", -"上海医药": "stock", -"301381": "stock", -"赛维时代": "stock", -"002042": "stock", -"华孚时尚": "stock", -"002980": "stock", -"华盛昌": "stock", -"603696": "stock", -"安记食品": "stock", -"688385": "stock", -"复旦微电": "stock", -"688426": "stock", -"康为世纪": "stock", -"688680": "stock", -"海优新材": "stock", -"300224": "stock", -"正海磁材": "stock", -"002259": "stock", -"ST升达": "stock", -"000670": "stock", -"盈方微": "stock", -"300351": "stock", -"永贵电器": "stock", -"000707": "stock", -"双环科技": "stock", -"688001": "stock", -"华兴源创": "stock", -"600590": "stock", -"泰豪科技": "stock", -"000636": "stock", -"风华高科": "stock", -"300159": "stock", -"新研股份": "stock", -"002038": "stock", -"双鹭药业": "stock", -"002880": "stock", -"卫光生物": "stock", -"601158": "stock", -"重庆水务": "stock", -"000725": "stock", -"京东方A": "stock", -"002580": "stock", -"圣阳股份": "stock", -"601777": "stock", -"力帆科技": "stock", -"688733": "stock", -"壹石通": "stock", -"605289": "stock", -"罗曼股份": "stock", -"300279": "stock", -"和晶科技": "stock", -"839725": "stock", -"惠丰钻石": "stock", -"601816": "stock", -"京沪高铁": "stock", -"002780": "stock", -"三夫户外": "stock", -"688259": "stock", -"创耀科技": "stock", -"601107": "stock", -"四川成渝": "stock", -"000532": "stock", -"华金资本": "stock", -"002862": "stock", -"实丰文化": "stock", -"300708": "stock", -"聚灿光电": "stock", -"002333": "stock", -"罗普斯金": "stock", -"300430": "stock", -"诚益通": "stock", -"000609": "stock", -"中迪投资": "stock", -"836422": "stock", -"润普食品": "stock", -"300915": "stock", -"海融科技": "stock", -"600019": "stock", -"宝钢股份": "stock", -"600792": "stock", -"云煤能源": "stock", -"301223": "stock", -"中荣股份": "stock", -"600995": "stock", -"南网储能": "stock", -"002394": "stock", -"联发股份": "stock", -"870204": "stock", -"沪江材料": "stock", -"002988": "stock", -"豪美新材": "stock", -"000930": "stock", -"中粮科技": "stock", -"301000": "stock", -"肇民科技": "stock", -"603041": "stock", -"美思德": "stock", -"601100": "stock", -"恒立液压": "stock", -"688333": "stock", -"铂力特": "stock", -"603558": "stock", -"健盛集团": "stock", -"000429": "stock", -"粤高速A": "stock", -"601319": "stock", -"中国人保": "stock", -"600826": "stock", -"兰生股份": "stock", -"688581": "stock", -"安杰思": "stock", -"603611": "stock", -"诺力股份": "stock", -"605090": "stock", -"九丰能源": "stock", -"601318": "stock", -"中国平安": "stock", -"600941": "stock", -"中国移动": "stock", -"000527": "stock", -"美的电器": "stock", -"001965": "stock", -"招商公路": "stock", -"300642": "stock", -"透景生命": "stock", -"301097": "stock", -"天益医疗": "stock", -"300346": "stock", -"南大光电": "stock", -"002690": "stock", -"美亚光电": "stock", -"000153": "stock", -"丰原药业": "stock", -"300622": "stock", -"博士眼镜": "stock", -"300146": "stock", -"汤臣倍健": "stock", -"002426": "stock", -"胜利精密": "stock", -"002123": "stock", -"梦网科技": "stock", -"688109": "stock", -"品茗科技": "stock", -"601515": "stock", -"东风股份": "stock", -"605399": "stock", -"晨光新材": "stock", -"000070": "stock", -"特发信息": "stock", -"600730": "stock", -"中国高科": "stock", -"600170": "stock", -"上海建工": "stock", -"600894": "stock", -"广日股份": "stock", -"688591": "stock", -"泰凌微": "stock", -"871694": "stock", -"中裕科技": "stock", -"002176": "stock", -"江特电机": "stock", -"600256": "stock", -"广汇能源": "stock", -"002082": "stock", -"万邦德": "stock", -"300705": "stock", -"九典制药": "stock", -"000816": "stock", -"智慧农业": "stock", -"600905": "stock", -"三峡能源": "stock", -"002265": "stock", -"建设工业": "stock", -"605398": "stock", -"新炬网络": "stock", -"300863": "stock", -"卡倍亿": "stock", -"601698": "stock", -"中国卫通": "stock", -"600830": "stock", -"香溢融通": "stock", -"688455": "stock", -"科捷智能": "stock", -"600671": "stock", -"*ST目药": "stock", -"301356": "stock", -"天振股份": "stock", -"301073": "stock", -"君亭酒店": "stock", -"600100": "stock", -"同方股份": "stock", -"002507": "stock", -"涪陵榨菜": "stock", -"002014": "stock", -"永新股份": "stock", -"301150": "stock", -"中一科技": "stock", -"603860": "stock", -"中公高科": "stock", -"000403": "stock", -"派林生物": "stock", -"002803": "stock", -"吉宏股份": "stock", -"002568": "stock", -"百润股份": "stock", -"002251": "stock", -"步步高": "stock", -"605028": "stock", -"世茂能源": "stock", -"603291": "stock", -"联合水务": "stock", -"002932": "stock", -"明德生物": "stock", -"600728": "stock", -"佳都科技": "stock", -"001207": "stock", -"联科科技": "stock", -"300732": "stock", -"设研院": "stock", -"300387": "stock", -"富邦股份": "stock", -"002666": "stock", -"德联集团": "stock", -"000603": "stock", -"盛达资源": "stock", -"000559": "stock", -"万向钱潮": "stock", -"688597": "stock", -"煜邦电力": "stock", -"300183": "stock", -"东软载波": "stock", -"002335": "stock", -"科华数据": "stock", -"600478": "stock", -"科力远": "stock", -"600692": "stock", -"亚通股份": "stock", -"871857": "stock", -"泓禧科技": "stock", -"600029": "stock", -"南方航空": "stock", -"301456": "stock", -"盘古智能": "stock", -"001296": "stock", -"长江材料": "stock", -"002373": "stock", -"千方科技": "stock", -"600766": "stock", -"*ST园城": "stock", -"300649": "stock", -"杭州园林": "stock", -"301280": "stock", -"珠城科技": "stock", -"600267": "stock", -"海正药业": "stock", -"002544": "stock", -"普天科技": "stock", -"002799": "stock", -"环球印务": "stock", -"002999": "stock", -"天禾股份": "stock", -"838670": "stock", -"恒进感应": "stock", -"603052": "stock", -"可川科技": "stock", -"837006": "stock", -"晟楠科技": "stock", -"002386": "stock", -"天原股份": "stock", -"831627": "stock", -"力王股份": "stock", -"300899": "stock", -"上海凯鑫": "stock", -"300801": "stock", -"泰和科技": "stock", -"603377": "stock", -"东方时尚": "stock", -"603569": "stock", -"长久物流": "stock", -"600828": "stock", -"茂业商业": "stock", -"000540": "stock", -"*ST中天": "stock", -"000511": "stock", -"烯碳退": "stock", -"002119": "stock", -"康强电子": "stock", -"300985": "stock", -"致远新能": "stock", -"301512": "stock", -"智信精密": "stock", -"600148": "stock", -"长春一东": "stock", -"000619": "stock", -"海螺新材": "stock", -"301333": "stock", -"诺思格": "stock", -"688120": "stock", -"华海清科": "stock", -"002271": "stock", -"东方雨虹": "stock", -"300582": "stock", -"英飞特": "stock", -"688249": "stock", -"晶合集成": "stock", -"301266": "stock", -"宇邦新材": "stock", -"603662": "stock", -"柯力传感": "stock", -"688671": "stock", -"碧兴物联": "stock", -"688231": "stock", -"隆达股份": "stock", -"300681": "stock", -"英搏尔": "stock", -"603028": "stock", -"赛福天": "stock", -"000166": "stock", -"申万宏源": "stock", -"301391": "stock", -"卡莱特": "stock", -"600269": "stock", -"赣粤高速": "stock", -"000006": "stock", -"深振业A": "stock", -"300012": "stock", -"华测检测": "stock", -"300115": "stock", -"长盈精密": "stock", -"688502": "stock", -"茂莱光学": "stock", -"002423": "stock", -"中粮资本": "stock", -"603157": "stock", -"退市拉夏": "stock", -"688182": "stock", -"灿勤科技": "stock", -"301203": "stock", -"国泰环保": "stock", -"601866": "stock", -"中远海发": "stock", -"300297": "stock", -"蓝盾退": "stock", -"600403": "stock", -"大有能源": "stock", -"600301": "stock", -"华锡有色": "stock", -"873305": "stock", -"九菱科技": "stock", -"301098": "stock", -"金埔园林": "stock", -"603035": "stock", -"常熟汽饰": "stock", -"002499": "stock", -"科林退": "stock", -"001227": "stock", -"兰州银行": "stock", -"002237": "stock", -"恒邦股份": "stock", -"002561": "stock", -"徐家汇": "stock", -"300293": "stock", -"蓝英装备": "stock", -"002219": "stock", -"新里程": "stock", -"603366": "stock", -"日出东方": "stock", -"000629": "stock", -"钒钛股份": "stock", -"300160": "stock", -"秀强股份": "stock", -"002357": "stock", -"富临运业": "stock", -"603949": "stock", -"雪龙集团": "stock", -"688046": "stock", -"药康生物": "stock", -"002390": "stock", -"信邦制药": "stock", -"300186": "stock", -"大华农": "stock", -"300226": "stock", -"上海钢联": "stock", -"002452": "stock", -"长高电新": "stock", -"600203": "stock", -"福日电子": "stock", -"688100": "stock", -"威胜信息": "stock", -"300201": "stock", -"海伦哲": "stock", -"600456": "stock", -"宝钛股份": "stock", -"688629": "stock", -"华丰科技": "stock", -"000568": "stock", -"泸州老窖": "stock", -"301215": "stock", -"中汽股份": "stock", -"688347": "stock", -"华虹公司": "stock", -"300286": "stock", -"安科瑞": "stock", -"603151": "stock", -"邦基科技": "stock", -"600628": "stock", -"新世界": "stock", -"688372": "stock", -"伟测科技": "stock", -"605151": "stock", -"西上海": "stock", -"301137": "stock", -"哈焊华通": "stock", -"300618": "stock", -"寒锐钴业": "stock", -"300835": "stock", -"龙磁科技": "stock", -"000582": "stock", -"北部湾港": "stock", -"300144": "stock", -"宋城演艺": "stock", -"603660": "stock", -"苏州科达": "stock", -"600283": "stock", -"钱江水利": "stock", -"300712": "stock", -"永福股份": "stock", -"300687": "stock", -"赛意信息": "stock", -"300470": "stock", -"中密控股": "stock", -"300999": "stock", -"金龙鱼": "stock", -"002328": "stock", -"新朋股份": "stock", -"688087": "stock", -"英科再生": "stock", -"002686": "stock", -"亿利达": "stock", -"001260": "stock", -"坤泰股份": "stock", -"300302": "stock", -"同有科技": "stock", -"600321": "stock", -"正源股份": "stock", -"603198": "stock", -"迎驾贡酒": "stock", -"832876": "stock", -"慧为智能": "stock", -"600085": "stock", -"同仁堂": "stock", -"000722": "stock", -"湖南发展": "stock", -"603360": "stock", -"百傲化学": "stock", -"300791": "stock", -"仙乐健康": "stock", -"002027": "stock", -"分众传媒": "stock", -"603017": "stock", -"中衡设计": "stock", -"603083": "stock", -"剑桥科技": "stock", -"002694": "stock", -"顾地科技": "stock", -"603320": "stock", -"迪贝电气": "stock", -"603369": "stock", -"今世缘": "stock", -"601989": "stock", -"中国重工": "stock", -"300498": "stock", -"温氏股份": "stock", -"600367": "stock", -"红星发展": "stock", -"000020": "stock", -"深华发A": "stock", -"600721": "stock", -"百花医药": "stock", -"000713": "stock", -"丰乐种业": "stock", -"603278": "stock", -"大业股份": "stock", -"000578": "stock", -"盐湖集团": "stock", -"688981": "stock", -"中芯国际": "stock", -"688628": "stock", -"优利德": "stock", -"301080": "stock", -"百普赛斯": "stock", -"002967": "stock", -"广电计量": "stock", -"301360": "stock", -"荣旗科技": "stock", -"300571": "stock", -"平治信息": "stock", -"002760": "stock", -"凤形股份": "stock", -"000959": "stock", -"首钢股份": "stock", -"002253": "stock", -"川大智胜": "stock", -"301289": "stock", -"国缆检测": "stock", -"000585": "stock", -"东电退": "stock", -"002388": "stock", -"新亚制程": "stock", -"603079": "stock", -"圣达生物": "stock", -"000415": "stock", -"渤海租赁": "stock", -"688550": "stock", -"瑞联新材": "stock", -"300028": "stock", -"金亚退": "stock", -"002912": "stock", -"中新赛克": "stock", -"002391": "stock", -"长青股份": "stock", -"300233": "stock", -"金城医药": "stock", -"000966": "stock", -"长源电力": "stock", -"000596": "stock", -"古井贡酒": "stock", -"300132": "stock", -"青松股份": "stock", -"600683": "stock", -"京投发展": "stock", -"603815": "stock", -"交建股份": "stock", -"002902": "stock", -"铭普光磁": "stock", -"600192": "stock", -"长城电工": "stock", -"301102": "stock", -"兆讯传媒": "stock", -"300678": "stock", -"中科信息": "stock", -"000035": "stock", -"中国天楹": "stock", -"600529": "stock", -"山东药玻": "stock", -"603328": "stock", -"依顿电子": "stock", -"300981": "stock", -"中红医疗": "stock", -"603895": "stock", -"天永智能": "stock", -"600056": "stock", -"中国医药": "stock", -"301136": "stock", -"招标股份": "stock", -"688108": "stock", -"赛诺医疗": "stock", -"600265": "stock", -"ST景谷": "stock", -"603288": "stock", -"海天味业": "stock", -"000709": "stock", -"河钢股份": "stock", -"832110": "stock", -"雷特科技": "stock", -"603596": "stock", -"伯特利": "stock", -"000546": "stock", -"ST金圆": "stock", -"603711": "stock", -"香飘飘": "stock", -"002356": "stock", -"赫美集团": "stock", -"002548": "stock", -"金新农": "stock", -"688337": "stock", -"普源精电": "stock", -"600993": "stock", -"马应龙": "stock", -"300859": "stock", -"*ST西域": "stock", -"300294": "stock", -"博雅生物": "stock", -"301312": "stock", -"智立方": "stock", -"688071": "stock", -"华依科技": "stock", -"000812": "stock", -"陕西金叶": "stock", -"301525": "stock", -"儒竞科技": "stock", -"002521": "stock", -"齐峰新材": "stock", -"300832": "stock", -"新产业": "stock", -"603689": "stock", -"皖天然气": "stock", -"301096": "stock", -"百诚医药": "stock", -"002910": "stock", -"庄园牧场": "stock", -"002813": "stock", -"路畅科技": "stock", -"002758": "stock", -"浙农股份": "stock", -"300078": "stock", -"思创医惠": "stock", -"000151": "stock", -"中成股份": "stock", -"603612": "stock", -"索通发展": "stock", -"002043": "stock", -"兔宝宝": "stock", -"603883": "stock", -"老百姓": "stock", -"301282": "stock", -"金禄电子": "stock", -"600017": "stock", -"日照港": "stock", -"002260": "stock", -"德奥退": "stock", -"605086": "stock", -"龙高股份": "stock", -"002827": "stock", -"高争民爆": "stock", -"002374": "stock", -"中锐股份": "stock", -"600701": "stock", -"退市工新": "stock", -"603666": "stock", -"亿嘉和": "stock", -"603069": "stock", -"海汽集团": "stock", -"688009": "stock", -"中国通号": "stock", -"300869": "stock", -"康泰医学": "stock", -"300543": "stock", -"朗科智能": "stock", -"002186": "stock", -"全聚德": "stock", -"002107": "stock", -"沃华医药": "stock", -"600247": "stock", -"*ST成城": "stock", -"600835": "stock", -"上海机电": "stock", -"300089": "stock", -"文化退": "stock", -"002355": "stock", -"兴民智通": "stock", -"002115": "stock", -"三维通信": "stock", -"001316": "stock", -"润贝航科": "stock", -"000526": "stock", -"学大教育": "stock", -"603729": "stock", -"龙韵股份": "stock", -"000663": "stock", -"永安林业": "stock", -"300041": "stock", -"回天新材": "stock", -"603810": "stock", -"丰山集团": "stock", -"300725": "stock", -"药石科技": "stock", -"603535": "stock", -"嘉诚国际": "stock", -"600846": "stock", -"同济科技": "stock", -"002953": "stock", -"日丰股份": "stock", -"002911": "stock", -"佛燃能源": "stock", -"300271": "stock", -"华宇软件": "stock", -"002220": "stock", -"天宝退": "stock", -"002892": "stock", -"科力尔": "stock", -"001378": "stock", -"德冠新材": "stock", -"002272": "stock", -"川润股份": "stock", -"002230": "stock", -"科大讯飞": "stock", -"688286": "stock", -"敏芯股份": "stock", -"600422": "stock", -"昆药集团": "stock", -"300178": "stock", -"腾邦退": "stock", -"000755": "stock", -"山西路桥": "stock", -"300431": "stock", -"暴风退": "stock", -"688548": "stock", -"广钢气体": "stock", -"688401": "stock", -"路维光电": "stock", -"601216": "stock", -"君正集团": "stock", -"603015": "stock", -"弘讯科技": "stock", -"603890": "stock", -"春秋电子": "stock", -"300236": "stock", -"上海新阳": "stock", -"600891": "stock", -"退市秋林": "stock", -"000972": "stock", -"中基健康": "stock", -"300409": "stock", -"道氏技术": "stock", -"000061": "stock", -"农产品": "stock", -"605178": "stock", -"时空科技": "stock", -"600362": "stock", -"江西铜业": "stock", -"300865": "stock", -"大宏立": "stock", -"603201": "stock", -"常润股份": "stock", -"002729": "stock", -"好利科技": "stock", -"000024": "stock", -"招商地产": "stock", -"600604": "stock", -"市北高新": "stock", -"688298": "stock", -"东方生物": "stock", -"600752": "stock", -"*ST哈慈": "stock", -"002079": "stock", -"苏州固锝": "stock", -"000659": "stock", -"珠海中富": "stock", -"830809": "stock", -"安达科技": "stock", -"002125": "stock", -"湘潭电化": "stock", -"002796": "stock", -"世嘉科技": "stock", -"688518": "stock", -"联赢激光": "stock", -"688368": "stock", -"晶丰明源": "stock", -"601012": "stock", -"隆基绿能": "stock", -"002846": "stock", -"英联股份": "stock", -"301042": "stock", -"安联锐视": "stock", -"832735": "stock", -"德源药业": "stock", -"301310": "stock", -"鑫宏业": "stock", -"833943": "stock", -"优机股份": "stock", -"002917": "stock", -"金奥博": "stock", -"601588": "stock", -"北辰实业": "stock", -"688317": "stock", -"之江生物": "stock", -"300068": "stock", -"南都电源": "stock", -"300405": "stock", -"科隆股份": "stock", -"301217": "stock", -"铜冠铜箔": "stock", -"000768": "stock", -"中航西飞": "stock", -"600837": "stock", -"海通证券": "stock", -"002383": "stock", -"合众思壮": "stock", -"601818": "stock", -"光大银行": "stock", -"300737": "stock", -"科顺股份": "stock", -"000963": "stock", -"华东医药": "stock", -"300080": "stock", -"易成新能": "stock", -"003040": "stock", -"楚天龙": "stock", -"300504": "stock", -"天邑股份": "stock", -"603982": "stock", -"泉峰汽车": "stock", -"300739": "stock", -"明阳电路": "stock", -"688516": "stock", -"奥特维": "stock", -"000892": "stock", -"欢瑞世纪": "stock", -"300993": "stock", -"玉马遮阳": "stock", -"300076": "stock", -"GQY视讯": "stock", -"600900": "stock", -"长江电力": "stock", -"300458": "stock", -"全志科技": "stock", -"300038": "stock", -"数知退": "stock", -"300680": "stock", -"隆盛科技": "stock", -"603159": "stock", -"上海亚虹": "stock", -"300333": "stock", -"兆日科技": "stock", -"600996": "stock", -"贵广网络": "stock", -"002304": "stock", -"洋河股份": "stock", -"603838": "stock", -"四通股份": "stock", -"002558": "stock", -"巨人网络": "stock", -"603466": "stock", -"风语筑": "stock", -"300118": "stock", -"东方日升": "stock", -"600379": "stock", -"宝光股份": "stock", -"688450": "stock", -"光格科技": "stock", -"605080": "stock", -"浙江自然": "stock", -"002019": "stock", -"亿帆医药": "stock", -"301115": "stock", -"建科股份": "stock", -"603153": "stock", -"上海建科": "stock", -"000693": "stock", -"华泽退": "stock", -"300317": "stock", -"珈伟新能": "stock", -"000157": "stock", -"中联重科": "stock", -"300424": "stock", -"航新科技": "stock", -"600109": "stock", -"国金证券": "stock", -"002590": "stock", -"万安科技": "stock", -"000155": "stock", -"川能动力": "stock", -"600869": "stock", -"远东股份": "stock", -"688638": "stock", -"誉辰智能": "stock", -"600812": "stock", -"华北制药": "stock", -"300943": "stock", -"春晖智控": "stock", -"600234": "stock", -"科新发展": "stock", -"603190": "stock", -"亚通精工": "stock", -"603922": "stock", -"金鸿顺": "stock", -"301106": "stock", -"骏成科技": "stock", -"000850": "stock", -"华茂股份": "stock", -"600217": "stock", -"中再资环": "stock", -"002907": "stock", -"华森制药": "stock", -"603187": "stock", -"海容冷链": "stock", -"002206": "stock", -"海利得": "stock", -"601020": "stock", -"华钰矿业": "stock", -"000823": "stock", -"超声电子": "stock", -"833429": "stock", -"康比特": "stock", -"002274": "stock", -"华昌化工": "stock", -"603858": "stock", -"步长制药": "stock", -"605186": "stock", -"健麾信息": "stock", -"603122": "stock", -"合富中国": "stock", -"301278": "stock", -"快可电子": "stock", -"300845": "stock", -"捷安高科": "stock", -"002226": "stock", -"江南化工": "stock", -"300240": "stock", -"飞力达": "stock", -"000689": "stock", -"ST宏业": "stock", -"600989": "stock", -"宝丰能源": "stock", -"002738": "stock", -"中矿资源": "stock", -"600470": "stock", -"六国化工": "stock", -"600236": "stock", -"桂冠电力": "stock", -"000868": "stock", -"安凯客车": "stock", -"300149": "stock", -"睿智医药": "stock", -"003022": "stock", -"联泓新科": "stock", -"001336": "stock", -"楚环科技": "stock", -"601228": "stock", -"广州港": "stock", -"002101": "stock", -"广东鸿图": "stock", -"300780": "stock", -"德恩精工": "stock", -"000562": "stock", -"宏源证券": "stock", -"600626": "stock", -"申达股份": "stock", -"601177": "stock", -"杭齿前进": "stock", -"600530": "stock", -"ST交昂": "stock", -"688127": "stock", -"蓝特光学": "stock", -"001218": "stock", -"丽臣实业": "stock", -"000564": "stock", -"ST大集": "stock", -"300128": "stock", -"锦富技术": "stock", -"601975": "stock", -"招商南油": "stock", -"002815": "stock", -"崇达技术": "stock", -"000780": "stock", -"ST平能": "stock", -"833266": "stock", -"生物谷": "stock", -"000535": "stock", -"*ST猴王": "stock", -"688156": "stock", -"路德环境": "stock", -"300682": "stock", -"朗新科技": "stock", -"300941": "stock", -"创识科技": "stock", -"605167": "stock", -"利柏特": "stock", -"600522": "stock", -"中天科技": "stock", -"688110": "stock", -"东芯股份": "stock", -"605009": "stock", -"豪悦护理": "stock", -"002725": "stock", -"跃岭股份": "stock", -"600378": "stock", -"昊华科技": "stock", -"000998": "stock", -"隆平高科": "stock", -"600273": "stock", -"嘉化能源": "stock", -"603819": "stock", -"神力股份": "stock", -"301048": "stock", -"金鹰重工": "stock", -"603928": "stock", -"兴业股份": "stock", -"834261": "stock", -"一诺威": "stock", -"000408": "stock", -"藏格矿业": "stock", -"600623": "stock", -"华谊集团": "stock", -"300610": "stock", -"晨化股份": "stock", -"603161": "stock", -"科华控股": "stock", -"601700": "stock", -"风范股份": "stock", -"002786": "stock", -"银宝山新": "stock", -"000567": "stock", -"海德股份": "stock", -"600976": "stock", -"健民集团": "stock", -"000809": "stock", -"铁岭新城": "stock", -"603088": "stock", -"宁波精达": "stock", -"000797": "stock", -"中国武夷": "stock", -"601798": "stock", -"蓝科高新": "stock", -"002299": "stock", -"圣农发展": "stock", -"002996": "stock", -"顺博合金": "stock", -"002650": "stock", -"加加食品": "stock", -"836395": "stock", -"朗鸿科技": "stock", -"603269": "stock", -"海鸥股份": "stock", -"002429": "stock", -"兆驰股份": "stock", -"002371": "stock", -"北方华创": "stock", -"605118": "stock", -"力鼎光电": "stock", -"000782": "stock", -"美达股份": "stock", -"300031": "stock", -"宝通科技": "stock", -"300326": "stock", -"凯利泰": "stock", -"688081": "stock", -"兴图新科": "stock", -"600078": "stock", -"ST澄星": "stock", -"301378": "stock", -"通达海": "stock", -"000058": "stock", -"深赛格": "stock", -"002529": "stock", -"海源复材": "stock", -"300541": "stock", -"先进数通": "stock", -"300272": "stock", -"开能健康": "stock", -"300344": "stock", -"立方数科": "stock", -"603228": "stock", -"景旺电子": "stock", -"301153": "stock", -"中科江南": "stock", -"605108": "stock", -"同庆楼": "stock", -"300313": "stock", -"*ST天山": "stock", -"000698": "stock", -"沈阳化工": "stock", -"300955": "stock", -"嘉亨家化": "stock", -"002943": "stock", -"宇晶股份": "stock", -"600809": "stock", -"山西汾酒": "stock", -"839167": "stock", -"同享科技": "stock", -"300803": "stock", -"指南针": "stock", -"002801": "stock", -"微光股份": "stock", -"002960": "stock", -"青鸟消防": "stock", -"830879": "stock", -"基康仪器": "stock", -"688339": "stock", -"亿华通-U": "stock", -"603193": "stock", -"润本股份": "stock", -"688416": "stock", -"恒烁股份": "stock", -"688599": "stock", -"天合光能": "stock", -"002392": "stock", -"北京利尔": "stock", -"600778": "stock", -"友好集团": "stock", -"300195": "stock", -"长荣股份": "stock", -"300510": "stock", -"金冠股份": "stock", -"300942": "stock", -"易瑞生物": "stock", -"300738": "stock", -"奥飞数据": "stock", -"002460": "stock", -"赣锋锂业": "stock", -"600557": "stock", -"康缘药业": "stock", -"600769": "stock", -"祥龙电业": "stock", -"600145": "stock", -"退市新亿": "stock", -"002578": "stock", -"闽发铝业": "stock", -"300203": "stock", -"聚光科技": "stock", -"300427": "stock", -"*ST红相": "stock", -"600184": "stock", -"光电股份": "stock", -"002733": "stock", -"雄韬股份": "stock", -"300426": "stock", -"唐德影视": "stock", -"688006": "stock", -"杭可科技": "stock", -"000514": "stock", -"渝开发": "stock", -"688026": "stock", -"洁特生物": "stock", -"002054": "stock", -"德美化工": "stock", -"600776": "stock", -"东方通信": "stock", -"000737": "stock", -"北方铜业": "stock", -"002843": "stock", -"泰嘉股份": "stock", -"603261": "stock", -"立航科技": "stock", -"300685": "stock", -"艾德生物": "stock", -"600642": "stock", -"申能股份": "stock", -"600589": "stock", -"*ST榕泰": "stock", -"600975": "stock", -"新五丰": "stock", -"301117": "stock", -"佳缘科技": "stock", -"600851": "stock", -"海欣股份": "stock", -"835305": "stock", -"云创数据": "stock", -"603609": "stock", -"禾丰股份": "stock", -"300040": "stock", -"九洲集团": "stock", -"688501": "stock", -"青达环保": "stock", -"600876": "stock", -"凯盛新能": "stock", -"300811": "stock", -"铂科新材": "stock", -"833509": "stock", -"同惠电子": "stock", -"300785": "stock", -"值得买": "stock", -"300800": "stock", -"力合科技": "stock", -"002923": "stock", -"润都股份": "stock", -"000676": "stock", -"智度股份": "stock", -"688300": "stock", -"联瑞新材": "stock", -"603915": "stock", -"国茂股份": "stock", -"300632": "stock", -"光莆股份": "stock", -"600613": "stock", -"神奇制药": "stock", -"301295": "stock", -"美硕科技": "stock", -"300265": "stock", -"通光线缆": "stock", -"301067": "stock", -"显盈科技": "stock", -"831278": "stock", -"泰德股份": "stock", -"601838": "stock", -"成都银行": "stock", -"300384": "stock", -"三联虹普": "stock", -"601869": "stock", -"长飞光纤": "stock", -"300748": "stock", -"金力永磁": "stock", -"002192": "stock", -"融捷股份": "stock", -"601633": "stock", -"长城汽车": "stock", -"002493": "stock", -"荣盛石化": "stock", -"300437": "stock", -"清水源": "stock", -"603867": "stock", -"新化股份": "stock", -"600594": "stock", -"益佰制药": "stock", -"837592": "stock", -"华信永道": "stock", -"000063": "stock", -"中兴通讯": "stock", -"002823": "stock", -"凯中精密": "stock", -"002068": "stock", -"黑猫股份": "stock", -"301069": "stock", -"凯盛新材": "stock", -"688084": "stock", -"晶品特装": "stock", -"603230": "stock", -"内蒙新华": "stock", -"002497": "stock", -"雅化集团": "stock", -"601199": "stock", -"江南水务": "stock", -"300016": "stock", -"北陆药业": "stock", -"600568": "stock", -"ST中珠": "stock", -"002638": "stock", -"勤上股份": "stock", -"002336": "stock", -"人人乐": "stock", -"301166": "stock", -"优宁维": "stock", -"300223": "stock", -"北京君正": "stock", -"601933": "stock", -"永辉超市": "stock", -"000821": "stock", -"京山轻机": "stock", -"300162": "stock", -"雷曼光电": "stock", -"300842": "stock", -"帝科股份": "stock", -"603566": "stock", -"普莱柯": "stock", -"603125": "stock", -"常青科技": "stock", -"300030": "stock", -"阳普医疗": "stock", -"600783": "stock", -"鲁信创投": "stock", -"688172": "stock", -"燕东微": "stock", -"002089": "stock", -"*ST新海": "stock", -"300840": "stock", -"酷特智能": "stock", -"600845": "stock", -"宝信软件": "stock", -"301059": "stock", -"金三江": "stock", -"600141": "stock", -"兴发集团": "stock", -"002290": "stock", -"禾盛新材": "stock", -"600688": "stock", -"上海石化": "stock", -"300096": "stock", -"易联众": "stock", -"603500": "stock", -"祥和实业": "stock", -"301037": "stock", -"保立佳": "stock", -"300114": "stock", -"中航电测": "stock", -"601187": "stock", -"厦门银行": "stock", -"000886": "stock", -"海南高速": "stock", -"600862": "stock", -"中航高科": "stock", -"300984": "stock", -"金沃股份": "stock", -"000990": "stock", -"诚志股份": "stock", -"002103": "stock", -"广博股份": "stock", -"002718": "stock", -"友邦吊顶": "stock", -"601857": "stock", -"中国石油": "stock", -"601211": "stock", -"国泰君安": "stock", -"002763": "stock", -"汇洁股份": "stock", -"002791": "stock", -"坚朗五金": "stock", -"301359": "stock", -"东南电子": "stock", -"002040": "stock", -"南京港": "stock", -"836826": "stock", -"盖世食品": "stock", -"600596": "stock", -"新安股份": "stock", -"001219": "stock", -"青岛食品": "stock", -"600183": "stock", -"生益科技": "stock", -"300150": "stock", -"世纪瑞尔": "stock", -"688373": "stock", -"盟科药业-U": "stock", -"601311": "stock", -"骆驼股份": "stock", -"688555": "stock", -"退市泽达": "stock", -"300180": "stock", -"华峰超纤": "stock", -"688589": "stock", -"力合微": "stock", -"300324": "stock", -"旋极信息": "stock", -"300939": "stock", -"秋田微": "stock", -"688319": "stock", -"欧林生物": "stock", -"300285": "stock", -"国瓷材料": "stock", -"301268": "stock", -"铭利达": "stock", -"600126": "stock", -"杭钢股份": "stock", -"002539": "stock", -"云图控股": "stock", -"600817": "stock", -"宇通重工": "stock", -"600336": "stock", -"澳柯玛": "stock", -"600823": "stock", -"ST世茂": "stock", -"688376": "stock", -"美埃科技": "stock", -"300568": "stock", -"星源材质": "stock", -"301306": "stock", -"西测测试": "stock", -"002028": "stock", -"思源电气": "stock", -"601868": "stock", -"中国能建": "stock", -"002400": "stock", -"省广集团": "stock", -"831039": "stock", -"国义招标": "stock", -"300833": "stock", -"浩洋股份": "stock", -"300181": "stock", -"佐力药业": "stock", -"600722": "stock", -"金牛化工": "stock", -"600786": "stock", -"东方锅炉": "stock", -"002191": "stock", -"劲嘉股份": "stock", -"002864": "stock", -"盘龙药业": "stock", -"301121": "stock", -"紫建电子": "stock", -"301120": "stock", -"新特电气": "stock", -"000935": "stock", -"四川双马": "stock", -"300112": "stock", -"万讯自控": "stock", -"688683": "stock", -"莱尔科技": "stock", -"300529": "stock", -"健帆生物": "stock", -"002591": "stock", -"恒大高新": "stock", -"300518": "stock", -"盛讯达": "stock", -"000800": "stock", -"一汽解放": "stock", -"300805": "stock", -"电声股份": "stock", -"688137": "stock", -"近岸蛋白": "stock", -"603345": "stock", -"安井食品": "stock", -"300888": "stock", -"稳健医疗": "stock", -"000558": "stock", -"莱茵体育": "stock", -"000045": "stock", -"深纺织A": "stock", -"688433": "stock", -"华曙高科": "stock", -"002203": "stock", -"海亮股份": "stock", -"300871": "stock", -"回盛生物": "stock", -"300722": "stock", -"新余国科": "stock", -"688777": "stock", -"中控技术": "stock", -"688235": "stock", -"百济神州-U": "stock", -"688299": "stock", -"长阳科技": "stock", -"301339": "stock", -"通行宝": "stock", -"601500": "stock", -"通用股份": "stock", -"002884": "stock", -"凌霄泵业": "stock", -"300490": "stock", -"华自科技": "stock", -"601375": "stock", -"中原证券": "stock", -"600072": "stock", -"中船科技": "stock", -"688115": "stock", -"思林杰": "stock", -"300064": "stock", -"金刚退": "stock", -"002777": "stock", -"久远银海": "stock", -"300281": "stock", -"金明精机": "stock", -"300260": "stock", -"新莱应材": "stock", -"603536": "stock", -"惠发食品": "stock", -"600607": "stock", -"上实医药": "stock", -"603778": "stock", -"乾景园林": "stock", -"688409": "stock", -"富创精密": "stock", -"000790": "stock", -"华神科技": "stock", -"300021": "stock", -"大禹节水": "stock", -"000042": "stock", -"中洲控股": "stock", -"688255": "stock", -"凯尔达": "stock", -"000860": "stock", -"顺鑫农业": "stock", -"300860": "stock", -"锋尚文化": "stock", -"688573": "stock", -"信宇人": "stock", -"605016": "stock", -"百龙创园": "stock", -"600879": "stock", -"航天电子": "stock", -"000520": "stock", -"凤凰航运": "stock", -"300192": "stock", -"科德教育": "stock", -"301075": "stock", -"多瑞医药": "stock", -"600074": "stock", -"退市保千": "stock", -"836419": "stock", -"万德股份": "stock", -"000009": "stock", -"中国宝安": "stock", -"603776": "stock", -"永安行": "stock", -"688331": "stock", -"荣昌生物": "stock", -"688345": "stock", -"博力威": "stock", -"600116": "stock", -"三峡水利": "stock", -"000001": "stock", -"平安银行": "stock", -"300936": "stock", -"中英科技": "stock", -"300664": "stock", -"鹏鹞环保": "stock", -"688506": "stock", -"百利天恒-U": "stock", -"002470": "stock", -"金正大": "stock", -"002517": "stock", -"恺英网络": "stock", -"000501": "stock", -"武商集团": "stock", -"300698": "stock", -"万马科技": "stock", -"688679": "stock", -"通源环境": "stock", -"688553": "stock", -"汇宇制药-W": "stock", -"839946": "stock", -"华阳变速": "stock", -"600190": "stock", -"锦州港": "stock", -"002017": "stock", -"东信和平": "stock", -"603992": "stock", -"松霖科技": "stock", -"600877": "stock", -"电科芯片": "stock", -"603286": "stock", -"日盈电子": "stock", -"000618": "stock", -"吉林化工": "stock", -"603758": "stock", -"秦安股份": "stock", -"002354": "stock", -"天娱数科": "stock", -"300797": "stock", -"钢研纳克": "stock", -"301446": "stock", -"福事特": "stock", -"002139": "stock", -"拓邦股份": "stock", -"002508": "stock", -"老板电器": "stock", -"688225": "stock", -"亚信安全": "stock", -"300103": "stock", -"达刚控股": "stock", -"601717": "stock", -"郑煤机": "stock", -"002745": "stock", -"木林森": "stock", -"002570": "stock", -"贝因美": "stock", -"301386": "stock", -"未来电器": "stock", -"301186": "stock", -"超达装备": "stock", -"002627": "stock", -"三峡旅游": "stock", -"300253": "stock", -"卫宁健康": "stock", -"301329": "stock", -"信音电子": "stock", -"301398": "stock", -"星源卓镁": "stock", -"301468": "stock", -"博盈特焊": "stock", -"300360": "stock", -"炬华科技": "stock", -"300639": "stock", -"凯普生物": "stock", -"688503": "stock", -"聚和材料": "stock", -"600173": "stock", -"卧龙地产": "stock", -"002588": "stock", -"史丹利": "stock", -"600497": "stock", -"驰宏锌锗": "stock", -"603051": "stock", -"鹿山新材": "stock", -"688478": "stock", -"晶升股份": "stock", -"831641": "stock", -"格利尔": "stock", -"002942": "stock", -"新农股份": "stock", -"603128": "stock", -"华贸物流": "stock", -"603329": "stock", -"上海雅仕": "stock", -"000660": "stock", -"*ST南华": "stock", -"002706": "stock", -"良信股份": "stock", -"001298": "stock", -"好上好": "stock", -"301218": "stock", -"华是科技": "stock", -"301285": "stock", -"鸿日达": "stock", -"603071": "stock", -"物产环能": "stock", -"603383": "stock", -"顶点软件": "stock", -"300174": "stock", -"元力股份": "stock", -"301205": "stock", -"联特科技": "stock", -"600880": "stock", -"博瑞传播": "stock", -"600320": "stock", -"振华重工": "stock", -"600681": "stock", -"百川能源": "stock", -"001328": "stock", -"登康口腔": "stock", -"603222": "stock", -"济民医疗": "stock", -"300493": "stock", -"润欣科技": "stock", -"301273": "stock", -"瑞晨环保": "stock", -"002457": "stock", -"青龙管业": "stock", -"601956": "stock", -"东贝集团": "stock", -"300905": "stock", -"宝丽迪": "stock", -"002559": "stock", -"亚威股份": "stock", -"002182": "stock", -"宝武镁业": "stock", -"002792": "stock", -"通宇通讯": "stock", -"000908": "stock", -"景峰医药": "stock", -"600808": "stock", -"马钢股份": "stock", -"300215": "stock", -"电科院": "stock", -"002569": "stock", -"ST步森": "stock", -"000866": "stock", -"扬子石化": "stock", -"600682": "stock", -"南京新百": "stock", -"603099": "stock", -"长白山": "stock", -"301138": "stock", -"华研精机": "stock", -"836720": "stock", -"吉冈精密": "stock", -"603330": "stock", -"天洋新材": "stock", -"603797": "stock", -"联泰环保": "stock", -"688656": "stock", -"浩欧博": "stock", -"002726": "stock", -"龙大美食": "stock", -"300662": "stock", -"科锐国际": "stock", -"688219": "stock", -"会通股份": "stock", -"873167": "stock", -"新赣江": "stock", -"688177": "stock", -"百奥泰": "stock", -"002746": "stock", -"仙坛股份": "stock", -"002083": "stock", -"孚日股份": "stock", -"600065": "stock", -"*ST联谊": "stock", -"600063": "stock", -"皖维高新": "stock", -"600343": "stock", -"航天动力": "stock", -"601916": "stock", -"浙商银行": "stock", -"300503": "stock", -"昊志机电": "stock", -"301011": "stock", -"华立科技": "stock", -"603127": "stock", -"昭衍新药": "stock", -"000920": "stock", -"沃顿科技": "stock", -"301052": "stock", -"果麦文化": "stock", -"002793": "stock", -"罗欣药业": "stock", -"300093": "stock", -"金刚光伏": "stock", -"688728": "stock", -"格科微": "stock", -"300740": "stock", -"水羊股份": "stock", -"601800": "stock", -"中国交建": "stock", -"838171": "stock", -"邦德股份": "stock", -"300847": "stock", -"中船汉光": "stock", -"430685": "stock", -"新芝生物": "stock", -"000029": "stock", -"深深房A": "stock", -"688315": "stock", -"诺禾致源": "stock", -"688171": "stock", -"纬德信息": "stock", -"603020": "stock", -"爱普股份": "stock", -"002520": "stock", -"日发精机": "stock", -"600654": "stock", -"ST中安": "stock", -"688049": "stock", -"炬芯科技": "stock", -"600617": "stock", -"国新能源": "stock", -"839273": "stock", -"一致魔芋": "stock", -"000718": "stock", -"苏宁环球": "stock", -"300345": "stock", -"华民股份": "stock", -"001366": "stock", -"播恩集团": "stock", -"834062": "stock", -"科润智控": "stock", -"688131": "stock", -"皓元医药": "stock", -"688702": "stock", -"盛科通信-U": "stock", -"001226": "stock", -"拓山重工": "stock", -"600558": "stock", -"大西洋": "stock", -"839371": "stock", -"欧福蛋业": "stock", -"301302": "stock", -"华如科技": "stock", -"600396": "stock", -"*ST金山": "stock", -"000806": "stock", -"银河退": "stock", -"605100": "stock", -"华丰股份": "stock", -"603326": "stock", -"我乐家居": "stock", -"600842": "stock", -"中西药业": "stock", -"002408": "stock", -"齐翔腾达": "stock", -"002395": "stock", -"双象股份": "stock", -"603300": "stock", -"华铁应急": "stock", -"600928": "stock", -"西安银行": "stock", -"600615": "stock", -"丰华股份": "stock", -"000969": "stock", -"安泰科技": "stock", -"836871": "stock", -"派特尔": "stock", -"601258": "stock", -"*ST庞大": "stock", -"002340": "stock", -"格林美": "stock", -"300322": "stock", -"硕贝德": "stock", -"000748": "stock", -"长城信息": "stock", -"300973": "stock", -"立高食品": "stock", -"600814": "stock", -"杭州解百": "stock", -"688616": "stock", -"西力科技": "stock", -"600939": "stock", -"重庆建工": "stock", -"600611": "stock", -"大众交通": "stock", -"002350": "stock", -"北京科锐": "stock", -"600741": "stock", -"华域汽车": "stock", -"002178": "stock", -"延华智能": "stock", -"603199": "stock", -"九华旅游": "stock", -"002285": "stock", -"世联行": "stock", -"300187": "stock", -"永清环保": "stock", -"301050": "stock", -"雷电微力": "stock", -"002287": "stock", -"奇正藏药": "stock", -"002886": "stock", -"沃特股份": "stock", -"600578": "stock", -"京能电力": "stock", -"300220": "stock", -"ST金运": "stock", -"003005": "stock", -"竞业达": "stock", -"000902": "stock", -"新洋丰": "stock", -"600262": "stock", -"北方股份": "stock", -"000488": "stock", -"晨鸣纸业": "stock", -"603276": "stock", -"恒兴新材": "stock", -"002735": "stock", -"王子新材": "stock", -"300596": "stock", -"利安隆": "stock", -"600591": "stock", -"*ST上航": "stock", -"688045": "stock", -"必易微": "stock", -"688205": "stock", -"德科立": "stock", -"603080": "stock", -"新疆火炬": "stock", -"300870": "stock", -"欧陆通": "stock", -"600664": "stock", -"哈药股份": "stock", -"003012": "stock", -"东鹏控股": "stock", -"601113": "stock", -"华鼎股份": "stock", -"601518": "stock", -"吉林高速": "stock", -"301210": "stock", -"金杨股份": "stock", -"600856": "stock", -"退市中天": "stock", -"600852": "stock", -"*ST中川": "stock", -"300122": "stock", -"智飞生物": "stock", -"300763": "stock", -"锦浪科技": "stock", -"301269": "stock", -"华大九天": "stock", -"600028": "stock", -"中国石化": "stock", -"300566": "stock", -"激智科技": "stock", -"688146": "stock", -"中船特气": "stock", -"688521": "stock", -"芯原股份": "stock", -"300968": "stock", -"格林精密": "stock", -"000566": "stock", -"海南海药": "stock", -"600521": "stock", -"华海药业": "stock", -"605286": "stock", -"同力日升": "stock", -"600519": "stock", -"贵州茅台": "stock", -"301517": "stock", -"陕西华达": "stock", -"300130": "stock", -"新国都": "stock", -"600602": "stock", -"云赛智联": "stock", -"300987": "stock", -"川网传媒": "stock", -"301051": "stock", -"信濠光电": "stock", -"872190": "stock", -"雷神科技": "stock", -"835508": "stock", -"殷图网联": "stock", -"002901": "stock", -"大博医疗": "stock", -"600581": "stock", -"八一钢铁": "stock", -"300644": "stock", -"南京聚隆": "stock", -"688090": "stock", -"瑞松科技": "stock", -"605020": "stock", -"永和股份": "stock", -"300900": "stock", -"广联航空": "stock", -"002076": "stock", -"星光股份": "stock", -"688676": "stock", -"金盘科技": "stock", -"002480": "stock", -"新筑股份": "stock", -"002633": "stock", -"申科股份": "stock", -"300889": "stock", -"爱克股份": "stock", -"000897": "stock", -"津滨发展": "stock", -"601888": "stock", -"中国中免": "stock", -"688595": "stock", -"芯海科技": "stock", -"605179": "stock", -"一鸣食品": "stock", -"600030": "stock", -"中信证券": "stock", -"600375": "stock", -"汉马科技": "stock", -"002105": "stock", -"信隆健康": "stock", -"000925": "stock", -"众合科技": "stock", -"601668": "stock", -"中国建筑": "stock", -"688132": "stock", -"邦彦技术": "stock", -"300074": "stock", -"华平股份": "stock", -"301305": "stock", -"朗坤环境": "stock", -"301338": "stock", -"凯格精机": "stock", -"000515": "stock", -"攀渝钛业": "stock", -"603396": "stock", -"金辰股份": "stock", -"002308": "stock", -"威创股份": "stock", -"300383": "stock", -"光环新网": "stock", -"688278": "stock", -"特宝生物": "stock", -"600853": "stock", -"龙建股份": "stock", -"603096": "stock", -"新经典": "stock", -"600428": "stock", -"中远海特": "stock", -"003037": "stock", -"三和管桩": "stock", -"000971": "stock", -"ST高升": "stock", -"300320": "stock", -"海达股份": "stock", -"605580": "stock", -"恒盛能源": "stock", -"688234": "stock", -"天岳先进": "stock", -"600874": "stock", -"创业环保": "stock", -"000923": "stock", -"河钢资源": "stock", -"603728": "stock", -"鸣志电器": "stock", -"688186": "stock", -"广大特材": "stock", -"301363": "stock", -"美好医疗": "stock", -"002909": "stock", -"集泰股份": "stock", -"688378": "stock", -"奥来德": "stock", -"600395": "stock", -"盘江股份": "stock", -"600711": "stock", -"盛屯矿业": "stock", -"688070": "stock", -"纵横股份": "stock", -"601898": "stock", -"中煤能源": "stock", -"300689": "stock", -"澄天伟业": "stock", -"300329": "stock", -"海伦钢琴": "stock", -"603601": "stock", -"再升科技": "stock", -"000059": "stock", -"华锦股份": "stock", -"002273": "stock", -"水晶光电": "stock", -"000556": "stock", -"PT南洋": "stock", -"300481": "stock", -"濮阳惠成": "stock", -"688267": "stock", -"中触媒": "stock", -"603885": "stock", -"吉祥航空": "stock", -"603616": "stock", -"韩建河山": "stock", -"002147": "stock", -"新光退": "stock", -"601077": "stock", -"渝农商行": "stock", -"601336": "stock", -"新华保险": "stock", -"003026": "stock", -"中晶科技": "stock", -"600620": "stock", -"天宸股份": "stock", -"300081": "stock", -"恒信东方": "stock", -"002143": "stock", -"印纪退": "stock", -"000503": "stock", -"国新健康": "stock", -"600836": "stock", -"上海易连": "stock", -"000929": "stock", -"兰州黄河": "stock", -"688218": "stock", -"江苏北人": "stock", -"002653": "stock", -"海思科": "stock", -"300537": "stock", -"广信材料": "stock", -"872351": "stock", -"华光源海": "stock", -"603018": "stock", -"华设集团": "stock", -"002498": "stock", -"汉缆股份": "stock", -"605255": "stock", -"天普股份": "stock", -"000917": "stock", -"电广传媒": "stock", -"688531": "stock", -"日联科技": "stock", -"600372": "stock", -"中航机载": "stock", -"002155": "stock", -"湖南黄金": "stock", -"300827": "stock", -"上能电气": "stock", -"605368": "stock", -"蓝天燃气": "stock", -"601618": "stock", -"中国中冶": "stock", -"002913": "stock", -"奥士康": "stock", -"601601": "stock", -"中国太保": "stock", -"603355": "stock", -"莱克电气": "stock", -"600526": "stock", -"菲达环保": "stock", -"603421": "stock", -"鼎信通讯": "stock", -"000517": "stock", -"荣安地产": "stock", -"301509": "stock", -"金凯生科": "stock", -"603348": "stock", -"文灿股份": "stock", -"002998": "stock", -"优彩资源": "stock", -"600875": "stock", -"东方电气": "stock", -"300815": "stock", -"玉禾田": "stock", -"000732": "stock", -"ST泰禾": "stock", -"000957": "stock", -"中通客车": "stock", -"831855": "stock", -"浙江大农": "stock", -"873339": "stock", -"恒太照明": "stock", -"605099": "stock", -"共创草坪": "stock", -"603220": "stock", -"中贝通信": "stock", -"301317": "stock", -"鑫磊股份": "stock", -"603019": "stock", -"中科曙光": "stock", -"603110": "stock", -"东方材料": "stock", -"001313": "stock", -"粤海饲料": "stock", -"001368": "stock", -"通达创智": "stock", -"002606": "stock", -"大连电瓷": "stock", -"603610": "stock", -"麒盛科技": "stock", -"300551": "stock", -"古鳌科技": "stock", -"600621": "stock", -"华鑫股份": "stock", -"001222": "stock", -"源飞宠物": "stock", -"600738": "stock", -"丽尚国潮": "stock", -"603600": "stock", -"永艺股份": "stock", -"601212": "stock", -"白银有色": "stock", -"605589": "stock", -"圣泉集团": "stock", -"000594": "stock", -"国恒退": "stock", -"301318": "stock", -"维海德": "stock", -"688080": "stock", -"映翰通": "stock", -"688103": "stock", -"国力股份": "stock", -"600327": "stock", -"大东方": "stock", -"300997": "stock", -"欢乐家": "stock", -"003031": "stock", -"中瓷电子": "stock", -"603089": "stock", -"正裕工业": "stock", -"300465": "stock", -"高伟达": "stock", -"300508": "stock", -"维宏股份": "stock", -"600468": "stock", -"百利电气": "stock", -"300916": "stock", -"朗特智能": "stock", -"603318": "stock", -"水发燃气": "stock", -"600788": "stock", -"*ST达曼": "stock", -"603595": "stock", -"东尼电子": "stock", -"600282": "stock", -"南钢股份": "stock", -"688392": "stock", -"骄成超声": "stock", -"603270": "stock", -"金帝股份": "stock", -"603129": "stock", -"春风动力": "stock", -"000999": "stock", -"华润三九": "stock", -"603301": "stock", -"振德医疗": "stock", -"688425": "stock", -"铁建重工": "stock", -"688670": "stock", -"金迪克": "stock", -"600515": "stock", -"海南机场": "stock", -"688566": "stock", -"吉贝尔": "stock", -"300161": "stock", -"华中数控": "stock", -"300239": "stock", -"东宝生物": "stock", -"000502": "stock", -"绿景退": "stock", -"300654": "stock", -"世纪天鸿": "stock", -"688135": "stock", -"利扬芯片": "stock", -"600965": "stock", -"福成股份": "stock", -"300719": "stock", -"安达维尔": "stock", -"300496": "stock", -"中科创达": "stock", -"688217": "stock", -"睿昂基因": "stock", -"000827": "stock", -"*ST长兴": "stock", -"603097": "stock", -"江苏华辰": "stock", -"301528": "stock", -"多浦乐": "stock", -"688701": "stock", -"卓锦股份": "stock", -"603583": "stock", -"捷昌驱动": "stock", -"834950": "stock", -"迅安科技": "stock", -"603258": "stock", -"电魂网络": "stock", -"600509": "stock", -"天富能源": "stock", -"000691": "stock", -"亚太实业": "stock", -"600279": "stock", -"重庆港": "stock", -"688380": "stock", -"中微半导": "stock", -"002512": "stock", -"达华智能": "stock", -"300052": "stock", -"中青宝": "stock", -"300007": "stock", -"汉威科技": "stock", -"300478": "stock", -"杭州高新": "stock", -"000789": "stock", -"万年青": "stock", -"002978": "stock", -"安宁股份": "stock", -"603439": "stock", -"贵州三力": "stock", -"000733": "stock", -"振华科技": "stock", -"002179": "stock", -"中航光电": "stock", -"301070": "stock", -"开勒股份": "stock", -"300989": "stock", -"蕾奥规划": "stock", -"832171": "stock", -"志晟信息": "stock", -"603880": "stock", -"ST南卫": "stock", -"002496": "stock", -"辉丰股份": "stock", -"000652": "stock", -"泰达股份": "stock", -"002959": "stock", -"小熊电器": "stock", -"600131": "stock", -"国网信通": "stock", -"301162": "stock", -"国能日新": "stock", -"836699": "stock", -"海达尔": "stock", -"603102": "stock", -"百合股份": "stock", -"300572": "stock", -"安车检测": "stock", -"688588": "stock", -"凌志软件": "stock", -"603186": "stock", -"华正新材": "stock", -"002654": "stock", -"万润科技": "stock", -"600213": "stock", -"亚星客车": "stock", -"300829": "stock", -"金丹科技": "stock", -"002778": "stock", -"中晟高科": "stock", -"300897": "stock", -"山科智能": "stock", -"601059": "stock", -"信达证券": "stock", -"000411": "stock", -"英特集团": "stock", -"300148": "stock", -"天舟文化": "stock", -"835207": "stock", -"众诚科技": "stock", -"600189": "stock", -"泉阳泉": "stock", -"300234": "stock", -"开尔新材": "stock", -"603163": "stock", -"圣晖集成": "stock", -"002055": "stock", -"得润电子": "stock", -"300891": "stock", -"惠云钛业": "stock", -"600286": "stock", -"S*ST国瓷": "stock", -"601108": "stock", -"财通证券": "stock", -"301321": "stock", -"翰博高新": "stock", -"603043": "stock", -"广州酒家": "stock", -"603456": "stock", -"九洲药业": "stock", -"300743": "stock", -"天地数码": "stock", -"301087": "stock", -"可孚医疗": "stock", -"600551": "stock", -"时代出版": "stock", -"871396": "stock", -"常辅股份": "stock", -"688390": "stock", -"固德威": "stock", -"002433": "stock", -"*ST太安": "stock", -"600984": "stock", -"建设机械": "stock", -"300476": "stock", -"胜宏科技": "stock", -"603214": "stock", -"爱婴室": "stock", -"688365": "stock", -"光云科技": "stock", -"000815": "stock", -"美利云": "stock", -"832491": "stock", -"奥迪威": "stock", -"002462": "stock", -"嘉事堂": "stock", -"300964": "stock", -"本川智能": "stock", -"603900": "stock", -"莱绅通灵": "stock", -"003033": "stock", -"征和工业": "stock", -"002518": "stock", -"科士达": "stock", -"601686": "stock", -"友发集团": "stock", -"002597": "stock", -"金禾实业": "stock", -"603367": "stock", -"辰欣药业": "stock", -"600486": "stock", -"扬农化工": "stock", -"603185": "stock", -"弘元绿能": "stock", -"300145": "stock", -"中金环境": "stock", -"688015": "stock", -"交控科技": "stock", -"000661": "stock", -"长春高新": "stock", -"600323": "stock", -"瀚蓝环境": "stock", -"300691": "stock", -"联合光电": "stock", -"600339": "stock", -"中油工程": "stock", -"839729": "stock", -"永顺生物": "stock", -"000852": "stock", -"石化机械": "stock", -"873593": "stock", -"鼎智科技": "stock", -"688538": "stock", -"和辉光电-U": "stock", -"300786": "stock", -"国林科技": "stock", -"603100": "stock", -"川仪股份": "stock", -"600419": "stock", -"天润乳业": "stock", -"300143": "stock", -"盈康生命": "stock", -"000981": "stock", -"山子股份": "stock", -"300420": "stock", -"五洋停车": "stock", -"300546": "stock", -"雄帝科技": "stock", -"300101": "stock", -"振芯科技": "stock", -"301276": "stock", -"嘉曼服饰": "stock", -"002636": "stock", -"金安国纪": "stock", -"600985": "stock", -"淮北矿业": "stock", -"605005": "stock", -"合兴股份": "stock", -"002294": "stock", -"信立泰": "stock", -"000536": "stock", -"华映科技": "stock", -"000922": "stock", -"佳电股份": "stock", -"600119": "stock", -"长江投资": "stock", -"300906": "stock", -"日月明": "stock", -"300854": "stock", -"中兰环保": "stock", -"688175": "stock", -"高凌信息": "stock", -"600971": "stock", -"恒源煤电": "stock", -"601138": "stock", -"工业富联": "stock", -"002681": "stock", -"奋达科技": "stock", -"300579": "stock", -"数字认证": "stock", -"002549": "stock", -"凯美特气": "stock", -"002116": "stock", -"中国海诚": "stock", -"002349": "stock", -"精华制药": "stock", -"301261": "stock", -"恒工精密": "stock", -"688117": "stock", -"圣诺生物": "stock", -"600179": "stock", -"安通控股": "stock", -"002013": "stock", -"中航机电": "stock", -"300506": "stock", -"名家汇": "stock", -"301396": "stock", -"宏景科技": "stock", -"605507": "stock", -"国邦医药": "stock", -"603283": "stock", -"赛腾股份": "stock", -"002187": "stock", -"广百股份": "stock", -"300366": "stock", -"创意信息": "stock", -"601231": "stock", -"环旭电子": "stock", -"301232": "stock", -"飞沃科技": "stock", -"835892": "stock", -"中科美菱": "stock", -"300883": "stock", -"龙利得": "stock", -"301160": "stock", -"翔楼新材": "stock", -"002701": "stock", -"奥瑞金": "stock", -"301065": "stock", -"本立科技": "stock", -"301225": "stock", -"恒勃股份": "stock", -"002598": "stock", -"山东章鼓": "stock", -"603833": "stock", -"欧派家居": "stock", -"000826": "stock", -"启迪环境": "stock", -"600156": "stock", -"华升股份": "stock", -"688569": "stock", -"铁科轨道": "stock", -"300083": "stock", -"创世纪": "stock", -"002280": "stock", -"联络互动": "stock", -"688279": "stock", -"峰岹科技": "stock", -"600731": "stock", -"湖南海利": "stock", -"600479": "stock", -"千金药业": "stock", -"603855": "stock", -"华荣股份": "stock", -"300067": "stock", -"安诺其": "stock", -"002875": "stock", -"安奈儿": "stock", -"002215": "stock", -"诺普信": "stock", -"601328": "stock", -"交通银行": "stock", -"300820": "stock", -"英杰电气": "stock", -"002713": "stock", -"东易日盛": "stock", -"601018": "stock", -"宁波港": "stock", -"688301": "stock", -"奕瑞科技": "stock", -"603150": "stock", -"万朗磁塑": "stock", -"000993": "stock", -"闽东电力": "stock", -"002962": "stock", -"五方光电": "stock", -"601007": "stock", -"金陵饭店": "stock", -"002707": "stock", -"众信旅游": "stock", -"002322": "stock", -"理工能科": "stock", -"688098": "stock", -"申联生物": "stock", -"002069": "stock", -"獐子岛": "stock", -"000719": "stock", -"中原传媒": "stock", -"600717": "stock", -"天津港": "stock", -"830839": "stock", -"万通液压": "stock", -"430718": "stock", -"合肥高科": "stock", -"002002": "stock", -"鸿达兴业": "stock", -"000593": "stock", -"德龙汇能": "stock", -"603993": "stock", -"洛阳钼业": "stock", -"603618": "stock", -"杭电股份": "stock", -"832225": "stock", -"利通科技": "stock", -"688562": "stock", -"航天软件": "stock", -"301220": "stock", -"亚香股份": "stock", -"300611": "stock", -"美力科技": "stock", -"603768": "stock", -"常青股份": "stock", -"688276": "stock", -"百克生物": "stock", -"688325": "stock", -"赛微微电": "stock", -"688321": "stock", -"微芯生物": "stock", -"603027": "stock", -"千禾味业": "stock", -"000065": "stock", -"北方国际": "stock", -"300354": "stock", -"东华测试": "stock", -"300494": "stock", -"盛天网络": "stock", -"600747": "stock", -"退市大控": "stock", -"000062": "stock", -"深圳华强": "stock", -"601218": "stock", -"吉鑫科技": "stock", -"600710": "stock", -"苏美达": "stock", -"000516": "stock", -"国际医学": "stock", -"836414": "stock", -"欧普泰": "stock", -"301287": "stock", -"康力源": "stock", -"300071": "stock", -"福石控股": "stock", -"300548": "stock", -"博创科技": "stock", -"002166": "stock", -"莱茵生物": "stock", -"003003": "stock", -"天元股份": "stock", -"603003": "stock", -"龙宇股份": "stock", -"688468": "stock", -"科美诊断": "stock", -"000525": "stock", -"ST红太阳": "stock", -"603324": "stock", -"盛剑环境": "stock", -"003011": "stock", -"海象新材": "stock", -"002431": "stock", -"棕榈股份": "stock", -"001236": "stock", -"弘业期货": "stock", -"002752": "stock", -"昇兴股份": "stock", -"003018": "stock", -"金富科技": "stock", -"002623": "stock", -"亚玛顿": "stock", -"600399": "stock", -"抚顺特钢": "stock", -"002888": "stock", -"惠威科技": "stock", -"603517": "stock", -"绝味食品": "stock", -"300675": "stock", -"建科院": "stock", -"002649": "stock", -"博彦科技": "stock", -"601198": "stock", -"东兴证券": "stock", -"600801": "stock", -"华新水泥": "stock", -"600098": "stock", -"广州发展": "stock", -"600816": "stock", -"ST建元": "stock", -"301221": "stock", -"光庭信息": "stock", -"600252": "stock", -"中恒集团": "stock", -"600848": "stock", -"上海临港": "stock", -"603518": "stock", -"锦泓集团": "stock", -"300982": "stock", -"苏文电能": "stock", -"002514": "stock", -"宝馨科技": "stock", -"300499": "stock", -"高澜股份": "stock", -"831768": "stock", -"拾比佰": "stock", -"603722": "stock", -"阿科力": "stock", -"600199": "stock", -"金种子酒": "stock", -"600006": "stock", -"东风汽车": "stock", -"688004": "stock", -"博汇科技": "stock", -"688190": "stock", -"云路股份": "stock", -"002254": "stock", -"泰和新材": "stock", -"003016": "stock", -"欣贺股份": "stock", -"600908": "stock", -"无锡银行": "stock", -"300156": "stock", -"神雾退": "stock", -"600859": "stock", -"王府井": "stock", -"301109": "stock", -"军信股份": "stock", -"000632": "stock", -"三木集团": "stock", -"603048": "stock", -"浙江黎明": "stock", -"600983": "stock", -"惠而浦": "stock", -"000616": "stock", -"*ST海投": "stock", -"002092": "stock", -"中泰化学": "stock", -"002505": "stock", -"鹏都农牧": "stock", -"301227": "stock", -"森鹰窗业": "stock", -"301165": "stock", -"锐捷网络": "stock", -"000830": "stock", -"鲁西化工": "stock", -"002850": "stock", -"科达利": "stock", -"600476": "stock", -"湘邮科技": "stock", -"002443": "stock", -"金洲管道": "stock", -"000859": "stock", -"国风新材": "stock", -"300828": "stock", -"锐新科技": "stock", -"688375": "stock", -"国博电子": "stock", -"000533": "stock", -"顺钠股份": "stock", -"002279": "stock", -"久其软件": "stock", -"603316": "stock", -"诚邦股份": "stock", -"605296": "stock", -"神农集团": "stock", -"600629": "stock", -"华建集团": "stock", -"002239": "stock", -"奥特佳": "stock", -"301279": "stock", -"金道科技": "stock", -"688089": "stock", -"嘉必优": "stock", -"000090": "stock", -"天健集团": "stock", -"300111": "stock", -"向日葵": "stock", -"002410": "stock", -"广联达": "stock", -"300605": "stock", -"恒锋信息": "stock", -"300813": "stock", -"泰林生物": "stock", -"002449": "stock", -"国星光电": "stock", -"002311": "stock", -"海大集团": "stock", -"603065": "stock", -"宿迁联盛": "stock", -"300136": "stock", -"信维通信": "stock", -"002719": "stock", -"麦趣尔": "stock", -"301128": "stock", -"强瑞技术": "stock", -"836270": "stock", -"天铭科技": "stock", -"600686": "stock", -"金龙汽车": "stock", -"002560": "stock", -"通达股份": "stock", -"000811": "stock", -"冰轮环境": "stock", -"688057": "stock", -"金达莱": "stock", -"688169": "stock", -"石头科技": "stock", -"300821": "stock", -"东岳硅材": "stock", -"002571": "stock", -"德力股份": "stock", -"300486": "stock", -"东杰智能": "stock", -"002006": "stock", -"精工科技": "stock", -"603268": "stock", -"松发股份": "stock", -"301307": "stock", -"美利信": "stock", -"300867": "stock", -"圣元环保": "stock", -"000762": "stock", -"西藏矿业": "stock", -"600296": "stock", -"S兰铝": "stock", -"688048": "stock", -"长光华芯": "stock", -"300693": "stock", -"盛弘股份": "stock", -"002532": "stock", -"天山铝业": "stock", -"002603": "stock", -"以岭药业": "stock", -"603685": "stock", -"晨丰科技": "stock", -"300466": "stock", -"赛摩智能": "stock", -"873122": "stock", -"中纺标": "stock", -"300415": "stock", -"伊之密": "stock", -"300569": "stock", -"天能重工": "stock", -"603388": "stock", -"元成股份": "stock", -"002468": "stock", -"申通快递": "stock", -"300736": "stock", -"百邦科技": "stock", -"300414": "stock", -"中光防雷": "stock", -"600306": "stock", -"*ST商城": "stock", -"600727": "stock", -"鲁北化工": "stock", -"601166": "stock", -"兴业银行": "stock", -"000710": "stock", -"贝瑞基因": "stock", -"837212": "stock", -"智新电子": "stock", -"600001": "stock", -"邯郸钢铁": "stock", -"002065": "stock", -"东华软件": "stock", -"300079": "stock", -"数码视讯": "stock", -"600276": "stock", -"恒瑞医药": "stock", -"000715": "stock", -"中兴商业": "stock", -"601828": "stock", -"美凯龙": "stock", -"002697": "stock", -"红旗连锁": "stock", -"605277": "stock", -"新亚电子": "stock", -"600185": "stock", -"格力地产": "stock", -"600627": "stock", -"上电股份": "stock", -"300442": "stock", -"润泽科技": "stock", -"300249": "stock", -"依米康": "stock", -"688485": "stock", -"九州一轨": "stock", -"300434": "stock", -"金石亚药": "stock", -"831195": "stock", -"三祥科技": "stock", -"688578": "stock", -"艾力斯": "stock", -"600322": "stock", -"天房发展": "stock", -"600097": "stock", -"开创国际": "stock", -"300255": "stock", -"常山药业": "stock", -"603106": "stock", -"恒银科技": "stock", -"600506": "stock", -"统一股份": "stock", -"300884": "stock", -"狄耐克": "stock", -"301049": "stock", -"超越科技": "stock", -"603000": "stock", -"人民网": "stock", -"600713": "stock", -"南京医药": "stock", -"002981": "stock", -"朝阳科技": "stock", -"301012": "stock", -"扬电科技": "stock", -"603656": "stock", -"泰禾智能": "stock", -"603822": "stock", -"嘉澳环保": "stock", -"600997": "stock", -"开滦股份": "stock", -"300707": "stock", -"威唐工业": "stock", -"600211": "stock", -"西藏药业": "stock", -"835184": "stock", -"国源科技": "stock", -"002348": "stock", -"高乐股份": "stock", -"688041": "stock", -"海光信息": "stock", -"301379": "stock", -"天山电子": "stock", -"000979": "stock", -"中弘退": "stock", -"600723": "stock", -"首商股份": "stock", -"301348": "stock", -"蓝箭电子": "stock", -"002945": "stock", -"华林证券": "stock", -"688296": "stock", -"和达科技": "stock", -"603095": "stock", -"越剑智能": "stock", -"000534": "stock", -"万泽股份": "stock", -"002190": "stock", -"成飞集成": "stock", -"600833": "stock", -"第一医药": "stock", -"601636": "stock", -"旗滨集团": "stock", -"600127": "stock", -"金健米业": "stock", -"688062": "stock", -"迈威生物-U": "stock", -"002316": "stock", -"亚联发展": "stock", -"002276": "stock", -"万马股份": "stock", -"605008": "stock", -"长鸿高科": "stock", -"605056": "stock", -"咸亨国际": "stock", -"301489": "stock", -"思泉新材": "stock", -"688239": "stock", -"航宇科技": "stock", -"603529": "stock", -"爱玛科技": "stock", -"600841": "stock", -"动力新科": "stock", -"601118": "stock", -"海南橡胶": "stock", -"600762": "stock", -"S*ST金荔": "stock", -"600787": "stock", -"中储股份": "stock", -"300881": "stock", -"盛德鑫泰": "stock", -"833523": "stock", -"德瑞锂电": "stock", -"002277": "stock", -"友阿股份": "stock", -"688515": "stock", -"裕太微-U": "stock", -"600084": "stock", -"中葡股份": "stock", -"600888": "stock", -"新疆众和": "stock", -"603486": "stock", -"科沃斯": "stock", -"300668": "stock", -"杰恩设计": "stock", -"000729": "stock", -"燕京啤酒": "stock", -"836247": "stock", -"华密新材": "stock", -"300919": "stock", -"中伟股份": "stock", -"600340": "stock", -"华夏幸福": "stock", -"301112": "stock", -"信邦智能": "stock", -"002310": "stock", -"东方园林": "stock", -"600163": "stock", -"中闽能源": "stock", -"002248": "stock", -"华东数控": "stock", -"000832": "stock", -"*ST龙涤": "stock", -"002769": "stock", -"普路通": "stock", -"603886": "stock", -"元祖股份": "stock", -"601066": "stock", -"中信建投": "stock", -"603708": "stock", -"家家悦": "stock", -"300723": "stock", -"一品红": "stock", -"300428": "stock", -"立中集团": "stock", -"002929": "stock", -"润建股份": "stock", -"300991": "stock", -"创益通": "stock", -"300703": "stock", -"创源股份": "stock", -"000508": "stock", -"琼民源A": "stock", -"688287": "stock", -"观典防务": "stock", -"603976": "stock", -"正川股份": "stock", -"301236": "stock", -"软通动力": "stock", -"603787": "stock", -"新日股份": "stock", -"688208": "stock", -"道通科技": "stock", -"600645": "stock", -"中源协和": "stock", -"605162": "stock", -"新中港": "stock", -"600550": "stock", -"保变电气": "stock", -"002416": "stock", -"爱施德": "stock", -"000423": "stock", -"东阿阿胶": "stock", -"000851": "stock", -"高鸿股份": "stock", -"300304": "stock", -"云意电气": "stock", -"603038": "stock", -"华立股份": "stock", -"300439": "stock", -"美康生物": "stock", -"300917": "stock", -"特发服务": "stock", -"688002": "stock", -"睿创微纳": "stock", -"600649": "stock", -"城投控股": "stock", -"300488": "stock", -"恒锋工具": "stock", -"600498": "stock", -"烽火通信": "stock", -"300257": "stock", -"开山股份": "stock", -"603008": "stock", -"喜临门": "stock", -"603322": "stock", -"超讯通信": "stock", -"830964": "stock", -"润农节水": "stock", -"300688": "stock", -"创业黑马": "stock", -"688620": "stock", -"安凯微": "stock", -"688448": "stock", -"磁谷科技": "stock", -"603217": "stock", -"元利科技": "stock", -"600582": "stock", -"天地科技": "stock", -"600503": "stock", -"华丽家族": "stock", -"002621": "stock", -"美吉姆": "stock", -"688039": "stock", -"当虹科技": "stock", -"002133": "stock", -"广宇集团": "stock", -"002267": "stock", -"陕天然气": "stock", -"603229": "stock", -"奥翔药业": "stock", -"603906": "stock", -"龙蟠科技": "stock", -"300284": "stock", -"苏交科": "stock", -"301505": "stock", -"苏州规划": "stock", -"688558": "stock", -"国盛智科": "stock", -"603130": "stock", -"云中马": "stock", -"834415": "stock", -"恒拓开源": "stock", -"688565": "stock", -"力源科技": "stock", -"688091": "stock", -"上海谊众": "stock", -"001360": "stock", -"南矿集团": "stock", -"000976": "stock", -"ST华铁": "stock", -"000805": "stock", -"*ST炎黄": "stock", -"301151": "stock", -"冠龙节能": "stock", -"600739": "stock", -"辽宁成大": "stock", -"000973": "stock", -"佛塑科技": "stock", -"688579": "stock", -"山大地纬": "stock", -"601900": "stock", -"南方传媒": "stock", -"605198": "stock", -"安德利": "stock", -"300392": "stock", -"腾信退": "stock", -"600840": "stock", -"新湖创业": "stock", -"300197": "stock", -"节能铁汉": "stock", -"300729": "stock", -"乐歌股份": "stock", -"300402": "stock", -"宝色股份": "stock", -"688201": "stock", -"信安世纪": "stock", -"000657": "stock", -"中钨高新": "stock", -"300483": "stock", -"首华燃气": "stock", -"000425": "stock", -"徐工机械": "stock", -"300370": "stock", -"安控科技": "stock", -"300667": "stock", -"必创科技": "stock", -"688580": "stock", -"伟思医疗": "stock", -"002261": "stock", -"拓维信息": "stock", -"838924": "stock", -"广脉科技": "stock", -"002057": "stock", -"中钢天源": "stock", -"300790": "stock", -"宇瞳光学": "stock", -"301015": "stock", -"百洋医药": "stock", -"600673": "stock", -"东阳光": "stock", -"002613": "stock", -"北玻股份": "stock", -"300511": "stock", -"雪榕生物": "stock", -"600556": "stock", -"天下秀": "stock", -"688193": "stock", -"仁度生物": "stock", -"300549": "stock", -"优德精密": "stock", -"002409": "stock", -"雅克科技": "stock", -"300901": "stock", -"中胤时尚": "stock", -"600705": "stock", -"中航产融": "stock", -"002870": "stock", -"香山股份": "stock", -"600651": "stock", -"飞乐音响": "stock", -"688586": "stock", -"江航装备": "stock", -"600099": "stock", -"林海股份": "stock", -"600861": "stock", -"北京人力": "stock", -"300796": "stock", -"贝斯美": "stock", -"001872": "stock", -"招商港口": "stock", -"688788": "stock", -"科思科技": "stock", -"688418": "stock", -"震有科技": "stock", -"300822": "stock", -"贝仕达克": "stock", -"605055": "stock", -"迎丰股份": "stock", -"301259": "stock", -"艾布鲁": "stock", -"603002": "stock", -"宏昌电子": "stock", -"601600": "stock", -"中国铝业": "stock", -"603682": "stock", -"锦和商管": "stock", -"688633": "stock", -"星球石墨": "stock", -"300816": "stock", -"艾可蓝": "stock", -"603378": "stock", -"亚士创能": "stock", -"603585": "stock", -"苏利股份": "stock", -"688085": "stock", -"三友医疗": "stock", -"300655": "stock", -"晶瑞电材": "stock", -"002052": "stock", -"ST同洲": "stock", -"002278": "stock", -"神开股份": "stock", -"300825": "stock", -"阿尔特": "stock", -"600004": "stock", -"白云机场": "stock", -"871642": "stock", -"通易航天": "stock", -"688020": "stock", -"方邦股份": "stock", -"002142": "stock", -"宁波银行": "stock", -"300332": "stock", -"天壕能源": "stock", -"688352": "stock", -"颀中科技": "stock", -"300022": "stock", -"吉峰科技": "stock", -"600844": "stock", -"丹化科技": "stock", -"833533": "stock", -"骏创科技": "stock", -"600849": "stock", -"上药转换": "stock", -"600201": "stock", -"生物股份": "stock", -"301026": "stock", -"浩通科技": "stock", -"002446": "stock", -"盛路通信": "stock", -"601858": "stock", -"中国科传": "stock", -"300129": "stock", -"泰胜风能": "stock", -"002806": "stock", -"华锋股份": "stock", -"002363": "stock", -"隆基机械": "stock", -"605258": "stock", -"协和电子": "stock", -"688209": "stock", -"英集芯": "stock", -"600839": "stock", -"四川长虹": "stock", -"300044": "stock", -"赛为智能": "stock", -"601001": "stock", -"晋控煤业": "stock", -"603333": "stock", -"尚纬股份": "stock", -"600104": "stock", -"上汽集团": "stock", -"603195": "stock", -"公牛集团": "stock", -"603707": "stock", -"健友股份": "stock", -"301198": "stock", -"喜悦智行": "stock", -"600608": "stock", -"ST沪科": "stock", -"002130": "stock", -"沃尔核材": "stock", -"600749": "stock", -"西藏旅游": "stock", -"300923": "stock", -"研奥股份": "stock", -"002630": "stock", -"华西能源": "stock", -"300474": "stock", -"景嘉微": "stock", -"300058": "stock", -"蓝色光标": "stock", -"000687": "stock", -"华讯退": "stock", -"002413": "stock", -"雷科防务": "stock", -"688261": "stock", -"东微半导": "stock", -"300325": "stock", -"德威退": "stock", -"600357": "stock", -"承德钒钛": "stock", -"002652": "stock", -"扬子新材": "stock", -"600899": "stock", -"*ST信联": "stock", -"688158": "stock", -"优刻得-W": "stock", -"601058": "stock", -"赛轮轮胎": "stock", -"000591": "stock", -"太阳能": "stock", -"300686": "stock", -"智动力": "stock", -"002845": "stock", -"同兴达": "stock", -"002556": "stock", -"辉隆股份": "stock", -"688681": "stock", -"科汇股份": "stock", -"002062": "stock", -"宏润建设": "stock", -"600773": "stock", -"西藏城投": "stock", -"603042": "stock", -"华脉科技": "stock", -"000799": "stock", -"酒鬼酒": "stock", -"600406": "stock", -"国电南瑞": "stock", -"002533": "stock", -"金杯电工": "stock", -"605566": "stock", -"福莱蒽特": "stock", -"300092": "stock", -"科新机电": "stock", -"300918": "stock", -"南山智尚": "stock", -"600231": "stock", -"凌钢股份": "stock", -"301148": "stock", -"嘉戎技术": "stock", -"688314": "stock", -"康拓医疗": "stock", -"300295": "stock", -"三六五网": "stock", -"603681": "stock", -"永冠新材": "stock", -"688086": "stock", -"退市紫晶": "stock", -"002885": "stock", -"京泉华": "stock", -"603703": "stock", -"盛洋科技": "stock", -"000803": "stock", -"山高环能": "stock", -"300996": "stock", -"普联软件": "stock", -"000779": "stock", -"甘咨询": "stock", -"601116": "stock", -"三江购物": "stock", -"002227": "stock", -"奥特迅": "stock", -"688529": "stock", -"豪森股份": "stock", -"600235": "stock", -"民丰特纸": "stock", -"601718": "stock", -"际华集团": "stock", -"872808": "stock", -"曙光数创": "stock", -"600303": "stock", -"ST曙光": "stock", -"600757": "stock", -"长江传媒": "stock", -"002303": "stock", -"美盈森": "stock", -"002509": "stock", -"天茂退": "stock", -"300462": "stock", -"华铭智能": "stock", -"301201": "stock", -"诚达药业": "stock", -"600436": "stock", -"片仔癀": "stock", -"688210": "stock", -"统联精密": "stock", -"300251": "stock", -"光线传媒": "stock", -"002039": "stock", -"黔源电力": "stock", -"300631": "stock", -"久吾高科": "stock", -"002873": "stock", -"新天药业": "stock", -"831445": "stock", -"龙竹科技": "stock", -"000731": "stock", -"四川美丰": "stock", -"600222": "stock", -"太龙药业": "stock", -"002066": "stock", -"瑞泰科技": "stock", -"600287": "stock", -"江苏舜天": "stock", -"000829": "stock", -"天音控股": "stock", -"601279": "stock", -"英利汽车": "stock", -"301510": "stock", -"固高科技": "stock", -"002833": "stock", -"弘亚数控": "stock", -"002903": "stock", -"宇环数控": "stock", -"830946": "stock", -"森萱医药": "stock", -"834639": "stock", -"晨光电缆": "stock", -"688269": "stock", -"凯立新材": "stock", -"300532": "stock", -"今天国际": "stock", -"603959": "stock", -"百利科技": "stock", -"301158": "stock", -"德石股份": "stock", -"688334": "stock", -"西高院": "stock", -"833873": "stock", -"中设咨询": "stock", -"002172": "stock", -"澳洋健康": "stock", -"300557": "stock", -"理工光科": "stock", -"002185": "stock", -"华天科技": "stock", -"002750": "stock", -"龙津药业": "stock", -"000548": "stock", -"湖南投资": "stock", -"600219": "stock", -"南山铝业": "stock", -"688309": "stock", -"恒誉环保": "stock", -"600438": "stock", -"通威股份": "stock", -"603178": "stock", -"圣龙股份": "stock", -"600435": "stock", -"北方导航": "stock", -"003007": "stock", -"直真科技": "stock", -"301188": "stock", -"力诺特玻": "stock", -"300765": "stock", -"新诺威": "stock", -"000034": "stock", -"神州数码": "stock", -"688088": "stock", -"虹软科技": "stock", -"300436": "stock", -"广生堂": "stock", -"688577": "stock", -"浙海德曼": "stock", -"688510": "stock", -"航亚科技": "stock", -"300359": "stock", -"全通教育": "stock", -"603726": "stock", -"朗迪集团": "stock", -"603916": "stock", -"苏博特": "stock", -"300398": "stock", -"飞凯材料": "stock", -"688139": "stock", -"海尔生物": "stock", -"601702": "stock", -"华峰铝业": "stock", -"600316": "stock", -"洪都航空": "stock", -"000801": "stock", -"四川九洲": "stock", -"002131": "stock", -"利欧股份": "stock", -"301234": "stock", -"五洲医疗": "stock", -"301387": "stock", -"光大同创": "stock", -"301267": "stock", -"华厦眼科": "stock", -"600188": "stock", -"兖矿能源": "stock", -"002411": "stock", -"必康退": "stock", -"688035": "stock", -"德邦科技": "stock", -"002232": "stock", -"启明信息": "stock", -"600083": "stock", -"博信股份": "stock", -"601658": "stock", -"邮储银行": "stock", -"301086": "stock", -"鸿富瀚": "stock", -"000538": "stock", -"云南白药": "stock", -"002059": "stock", -"云南旅游": "stock", -"301095": "stock", -"广立微": "stock", -"002180": "stock", -"纳思达": "stock", -"002840": "stock", -"华统股份": "stock", -"838227": "stock", -"美登科技": "stock", -"000952": "stock", -"广济药业": "stock", -"600012": "stock", -"皖通高速": "stock", -"301256": "stock", -"华融化学": "stock", -"002717": "stock", -"岭南股份": "stock", -"300024": "stock", -"机器人": "stock", -"301365": "stock", -"矩阵股份": "stock", -"300194": "stock", -"福安药业": "stock", -"603927": "stock", -"中科软": "stock", -"600258": "stock", -"首旅酒店": "stock", -"300607": "stock", -"拓斯达": "stock", -"600707": "stock", -"彩虹股份": "stock", -"688183": "stock", -"生益电子": "stock", -"601229": "stock", -"上海银行": "stock", -"600559": "stock", -"老白干酒": "stock", -"603716": "stock", -"塞力医疗": "stock", -"600482": "stock", -"中国动力": "stock", -"300809": "stock", -"华辰装备": "stock", -"600239": "stock", -"云南城投": "stock", -"300837": "stock", -"浙矿股份": "stock", -"002106": "stock", -"莱宝高科": "stock", -"000668": "stock", -"荣丰控股": "stock", -"000529": "stock", -"广弘控股": "stock", -"300695": "stock", -"兆丰股份": "stock", -"000861": "stock", -"海印股份": "stock", -"837242": "stock", -"建邦科技": "stock", -"002825": "stock", -"纳尔股份": "stock", -"001373": "stock", -"翔腾新材": "stock", -"002222": "stock", -"福晶科技": "stock", -"000965": "stock", -"天保基建": "stock", -"002659": "stock", -"凯文教育": "stock", -"603067": "stock", -"振华股份": "stock", -"688232": "stock", -"新点软件": "stock", -"870436": "stock", -"大地电气": "stock", -"002313": "stock", -"*ST日海": "stock", -"605500": "stock", -"森林包装": "stock", -"600062": "stock", -"华润双鹤": "stock", -"300417": "stock", -"南华仪器": "stock", -"002093": "stock", -"国脉科技": "stock", -"600593": "stock", -"大连圣亚": "stock", -"600271": "stock", -"航天信息": "stock", -"002643": "stock", -"万润股份": "stock", -"002314": "stock", -"南山控股": "stock", -"002030": "stock", -"达安基因": "stock", -"300242": "stock", -"佳云科技": "stock", -"603216": "stock", -"梦天家居": "stock", -"002952": "stock", -"亚世光电": "stock", -"600545": "stock", -"卓郎智能": "stock", -"301189": "stock", -"奥尼电子": "stock", -"002928": "stock", -"华夏航空": "stock", -"002511": "stock", -"中顺洁柔": "stock", -"600625": "stock", -"PT水仙": "stock", -"603307": "stock", -"扬州金泉": "stock", -"600742": "stock", -"一汽富维": "stock", -"000521": "stock", -"长虹美菱": "stock", -"300025": "stock", -"华星创业": "stock", -"002587": "stock", -"奥拓电子": "stock", -"603718": "stock", -"海利生物": "stock", -"001216": "stock", -"华瓷股份": "stock", -"601918": "stock", -"新集能源": "stock", -"601155": "stock", -"新城控股": "stock", -"605069": "stock", -"正和生态": "stock", -"688381": "stock", -"帝奥微": "stock", -"605117": "stock", -"德业股份": "stock", -"603948": "stock", -"建业股份": "stock", -"688768": "stock", -"容知日新": "stock", -"300757": "stock", -"罗博特科": "stock", -"000008": "stock", -"神州高铁": "stock", -"600160": "stock", -"巨化股份": "stock", -"300776": "stock", -"帝尔激光": "stock", -"301499": "stock", -"维科精密": "stock", -"300188": "stock", -"美亚柏科": "stock", -"001309": "stock", -"德明利": "stock", -"600614": "stock", -"退市鹏起": "stock", -"300374": "stock", -"中铁装配": "stock", -"000675": "stock", -"ST银山": "stock", -"600087": "stock", -"退市长油": "stock", -"000018": "stock", -"神城A退": "stock", -"688659": "stock", -"元琛科技": "stock", -"300245": "stock", -"天玑科技": "stock", -"605177": "stock", -"东亚药业": "stock", -"600117": "stock", -"*ST西钢": "stock", -"300966": "stock", -"共同药业": "stock", -"301429": "stock", -"森泰股份": "stock", -"000727": "stock", -"冠捷科技": "stock", -"002605": "stock", -"姚记科技": "stock", -"000980": "stock", -"众泰汽车": "stock", -"688369": "stock", -"致远互联": "stock", -"688669": "stock", -"聚石化学": "stock", -"300604": "stock", -"长川科技": "stock", -"300169": "stock", -"天晟新材": "stock", -"002378": "stock", -"章源钨业": "stock", -"002608": "stock", -"江苏国信": "stock", -"688371": "stock", -"菲沃泰": "stock", -"601339": "stock", -"百隆东方": "stock", -"000813": "stock", -"德展健康": "stock", -"300709": "stock", -"精研科技": "stock", -"002011": "stock", -"盾安环境": "stock", -"301126": "stock", -"达嘉维康": "stock", -"600553": "stock", -"太行水泥": "stock", -"300062": "stock", -"中能电气": "stock", -"002976": "stock", -"瑞玛精密": "stock", -"301390": "stock", -"经纬股份": "stock", -"300048": "stock", -"合康新能": "stock", -"300630": "stock", -"普利制药": "stock", -"002209": "stock", -"达意隆": "stock", -"688159": "stock", -"有方科技": "stock", -"301515": "stock", -"港通医疗": "stock", -"603011": "stock", -"合锻智能": "stock", -"002346": "stock", -"柘中股份": "stock", -"603219": "stock", -"富佳股份": "stock", -"300601": "stock", -"康泰生物": "stock", -"300851": "stock", -"交大思诺": "stock", -"688206": "stock", -"概伦电子": "stock", -"600122": "stock", -"*ST宏图": "stock", -"002576": "stock", -"通达动力": "stock", -"688157": "stock", -"松井股份": "stock", -"300388": "stock", -"节能国祯": "stock", -"301061": "stock", -"匠心家居": "stock", -"688646": "stock", -"逸飞激光": "stock", -"000156": "stock", -"华数传媒": "stock", -"002205": "stock", -"国统股份": "stock", -"300311": "stock", -"任子行": "stock", -"600926": "stock", -"杭州银行": "stock", -"000918": "stock", -"*ST嘉凯": "stock", -"000817": "stock", -"辽河油田": "stock", -"000720": "stock", -"新能泰山": "stock", -"002161": "stock", -"远望谷": "stock", -"603192": "stock", -"汇得科技": "stock", -"688323": "stock", -"瑞华泰": "stock", -"688111": "stock", -"金山办公": "stock", -"002369": "stock", -"卓翼科技": "stock", -"600275": "stock", -"退市昌鱼": "stock", -"603005": "stock", -"晶方科技": "stock", -"002379": "stock", -"宏创控股": "stock", -"603630": "stock", -"拉芳家化": "stock", -"002399": "stock", -"海普瑞": "stock", -"301293": "stock", -"三博脑科": "stock", -"688525": "stock", -"佰维存储": "stock", -"605266": "stock", -"健之佳": "stock", -"600560": "stock", -"金自天正": "stock", -"002921": "stock", -"联诚精密": "stock", -"603809": "stock", -"豪能股份": "stock", -"605089": "stock", -"味知香": "stock", -"002732": "stock", -"燕塘乳业": "stock", -"600580": "stock", -"卧龙电驱": "stock", -"601799": "stock", -"星宇股份": "stock", -"600330": "stock", -"天通股份": "stock", -"688092": "stock", -"爱科科技": "stock", -"688598": "stock", -"金博股份": "stock", -"002122": "stock", -"汇洲智能": "stock", -"688223": "stock", -"晶科能源": "stock", -"300609": "stock", -"汇纳科技": "stock", -"603937": "stock", -"丽岛新材": "stock", -"603717": "stock", -"天域生态": "stock", -"301018": "stock", -"申菱环境": "stock", -"002442": "stock", -"龙星化工": "stock", -"603290": "stock", -"斯达半导": "stock", -"600091": "stock", -"退市明科": "stock", -"300625": "stock", -"三雄极光": "stock", -"300868": "stock", -"杰美特": "stock", -"300928": "stock", -"华安鑫创": "stock", -"000928": "stock", -"中钢国际": "stock", -"688552": "stock", -"航天南湖": "stock", -"688739": "stock", -"成大生物": "stock", -"603901": "stock", -"永创智能": "stock", -"000927": "stock", -"中国铁物": "stock", -"600118": "stock", -"中国卫星": "stock", -"300759": "stock", -"康龙化成": "stock", -"002684": "stock", -"猛狮退": "stock", -"688116": "stock", -"天奈科技": "stock", -"603101": "stock", -"汇嘉时代": "stock", -"600355": "stock", -"精伦电子": "stock", -"300839": "stock", -"博汇股份": "stock", -"603997": "stock", -"继峰股份": "stock", -"300502": "stock", -"新易盛": "stock", -"300323": "stock", -"华灿光电": "stock", -"603123": "stock", -"翠微股份": "stock", -"000802": "stock", -"北京文化": "stock", -"300817": "stock", -"双飞股份": "stock", -"688722": "stock", -"同益中": "stock", -"601616": "stock", -"广电电气": "stock", -"835985": "stock", -"海泰新能": "stock", -"300363": "stock", -"博腾股份": "stock", -"001255": "stock", -"博菲电气": "stock", -"300929": "stock", -"华骐环保": "stock", -"002808": "stock", -"ST恒久": "stock", -"002877": "stock", -"智能自控": "stock", -"300244": "stock", -"迪安诊断": "stock", -"300767": "stock", -"震安科技": "stock", -"600216": "stock", -"浙江医药": "stock", -"830779": "stock", -"武汉蓝电": "stock", -"000626": "stock", -"远大控股": "stock", -"300713": "stock", -"英可瑞": "stock", -"000978": "stock", -"桂林旅游": "stock", -"600641": "stock", -"万业企业": "stock", -"600051": "stock", -"宁波联合": "stock", -"833819": "stock", -"XD颖泰生": "stock", -"601333": "stock", -"广深铁路": "stock", -"300876": "stock", -"蒙泰高新": "stock", -"002095": "stock", -"生意宝": "stock", -"600039": "stock", -"四川路桥": "stock", -"600092": "stock", -"S*ST精密": "stock", -"002642": "stock", -"荣联科技": "stock", -"603956": "stock", -"威派格": "stock", -"300484": "stock", -"蓝海华腾": "stock", -"002789": "stock", -"建艺集团": "stock", -"600220": "stock", -"江苏阳光": "stock", -"600588": "stock", -"用友网络": "stock", -"600499": "stock", -"科达制造": "stock", -"835640": "stock", -"富士达": "stock", -"688076": "stock", -"诺泰生物": "stock", -"002972": "stock", -"科安达": "stock", -"300911": "stock", -"亿田智能": "stock", -"870299": "stock", -"灿能电力": "stock", -"605218": "stock", -"伟时电子": "stock", -"600102": "stock", -"莱钢股份": "stock", -"300701": "stock", -"森霸传感": "stock", -"601089": "stock", -"福元医药": "stock", -"600815": "stock", -"厦工股份": "stock", -"002120": "stock", -"韵达股份": "stock", -"600609": "stock", -"金杯汽车": "stock", -"300482": "stock", -"万孚生物": "stock", -"301167": "stock", -"建研设计": "stock", -"600115": "stock", -"中国东航": "stock", -"300214": "stock", -"日科化学": "stock", -"000046": "stock", -"*ST泛海": "stock", -"002869": "stock", -"金溢科技": "stock", -"300527": "stock", -"中船应急": "stock", -"603877": "stock", -"太平鸟": "stock", -"002148": "stock", -"北纬科技": "stock", -"000970": "stock", -"中科三环": "stock", -"300940": "stock", -"南极光": "stock", -"002312": "stock", -"川发龙蟒": "stock", -"605183": "stock", -"确成股份": "stock", -"688776": "stock", -"国光电气": "stock", -"002958": "stock", -"青农商行": "stock", -"688032": "stock", -"禾迈股份": "stock", -"605128": "stock", -"上海沿浦": "stock", -"688663": "stock", -"新风光": "stock", -"301320": "stock", -"豪江智能": "stock", -"300152": "stock", -"新动力": "stock", -"603789": "stock", -"星光农机": "stock", -"301038": "stock", -"深水规院": "stock", -"002071": "stock", -"长城退": "stock", -"603559": "stock", -"ST通脉": "stock", -"000751": "stock", -"锌业股份": "stock", -"002144": "stock", -"宏达高科": "stock", -"833394": "stock", -"民士达": "stock", -"688507": "stock", -"索辰科技": "stock", -"833914": "stock", -"远航精密": "stock", -"000905": "stock", -"厦门港务": "stock", -"836221": "stock", -"易实精密": "stock", -"600565": "stock", -"迪马股份": "stock", -"603598": "stock", -"引力传媒": "stock", -"601369": "stock", -"陕鼓动力": "stock", -"603989": "stock", -"艾华集团": "stock", -"600527": "stock", -"江南高纤": "stock", -"603025": "stock", -"大豪科技": "stock", -"603206": "stock", -"嘉环科技": "stock", -"688336": "stock", -"三生国健": "stock", -"300357": "stock", -"我武生物": "stock", -"688061": "stock", -"灿瑞科技": "stock", -"003043": "stock", -"华亚智能": "stock", -"603903": "stock", -"中持股份": "stock", -"002767": "stock", -"先锋电子": "stock", -"300879": "stock", -"大叶股份": "stock", -"002852": "stock", -"道道全": "stock", -"688285": "stock", -"高铁电气": "stock", -"871478": "stock", -"巨能股份": "stock", -"601988": "stock", -"中国银行": "stock", -"688312": "stock", -"燕麦科技": "stock", -"600112": "stock", -"ST天成": "stock", -"601990": "stock", -"南京证券": "stock", -"600397": "stock", -"安源煤业": "stock", -"600348": "stock", -"华阳股份": "stock", -"000583": "stock", -"S*ST托普": "stock", -"301277": "stock", -"新天地": "stock", -"002628": "stock", -"成都路桥": "stock", -"603801": "stock", -"志邦家居": "stock", -"605066": "stock", -"天正电气": "stock", -"600768": "stock", -"宁波富邦": "stock", -"688687": "stock", -"凯因科技": "stock", -"600958": "stock", -"东方证券": "stock", -"002782": "stock", -"可立克": "stock", -"002955": "stock", -"鸿合科技": "stock", -"603882": "stock", -"金域医学": "stock", -"603766": "stock", -"隆鑫通用": "stock", -"002851": "stock", -"麦格米特": "stock", -"600196": "stock", -"复星医药": "stock", -"600158": "stock", -"中体产业": "stock", -"002766": "stock", -"索菱股份": "stock", -"301029": "stock", -"怡合达": "stock", -"300314": "stock", -"戴维医疗": "stock", -"600177": "stock", -"雅戈尔": "stock", -"002246": "stock", -"北化股份": "stock", -"301251": "stock", -"威尔高": "stock", -"600298": "stock", -"安琪酵母": "stock", -"834021": "stock", -"流金科技": "stock", -"002482": "stock", -"*ST广田": "stock", -"002235": "stock", -"安妮股份": "stock", -"301281": "stock", -"科源制药": "stock", -"002985": "stock", -"北摩高科": "stock", -"601388": "stock", -"怡球资源": "stock", -"300042": "stock", -"朗科科技": "stock", -"300099": "stock", -"精准信息": "stock", -"000678": "stock", -"襄阳轴承": "stock", -"600988": "stock", -"赤峰黄金": "stock", -"688359": "stock", -"三孚新科": "stock", -"688229": "stock", -"博睿数据": "stock", -"873576": "stock", -"天力复合": "stock", -"300268": "stock", -"*ST佳沃": "stock", -"300628": "stock", -"亿联网络": "stock", -"603156": "stock", -"养元饮品": "stock", -"601808": "stock", -"中海油服": "stock", -"300217": "stock", -"东方电热": "stock", -"600377": "stock", -"宁沪高速": "stock", -"601886": "stock", -"江河集团": "stock", -"688435": "stock", -"英方软件": "stock", -"002528": "stock", -"英飞拓": "stock", -"300766": "stock", -"每日互动": "stock", -"002956": "stock", -"西麦食品": "stock", -"301373": "stock", -"凌玮科技": "stock", -"301255": "stock", -"通力科技": "stock", -"300315": "stock", -"掌趣科技": "stock", -"600979": "stock", -"广安爱众": "stock", -"603033": "stock", -"三维股份": "stock", -"002368": "stock", -"太极股份": "stock", -"001209": "stock", -"洪兴股份": "stock", -"000056": "stock", -"皇庭国际": "stock", -"600854": "stock", -"春兰股份": "stock", -"605169": "stock", -"洪通燃气": "stock", -"002021": "stock", -"*ST中捷": "stock", -"002128": "stock", -"电投能源": "stock", -"601519": "stock", -"大智慧": "stock", -"300606": "stock", -"金太阳": "stock", -"300421": "stock", -"力星股份": "stock", -"600540": "stock", -"新赛股份": "stock", -"300603": "stock", -"立昂技术": "stock", -"688128": "stock", -"中国电研": "stock", -"300163": "stock", -"先锋新材": "stock", -"600936": "stock", -"广西广电": "stock", -"002158": "stock", -"汉钟精机": "stock", -"301099": "stock", -"雅创电子": "stock", -"600005": "stock", -"武钢股份": "stock", -"600866": "stock", -"星湖科技": "stock", -"605081": "stock", -"太和水": "stock", -"002302": "stock", -"西部建设": "stock", -"002471": "stock", -"中超控股": "stock", -"688353": "stock", -"华盛锂电": "stock", -"301082": "stock", -"久盛电气": "stock", -"002644": "stock", -"佛慈制药": "stock", -"688213": "stock", -"思特威-W": "stock", -"000605": "stock", -"渤海股份": "stock", -"000030": "stock", -"富奥股份": "stock", -"301176": "stock", -"逸豪新材": "stock", -"603826": "stock", -"坤彩科技": "stock", -"002971": "stock", -"和远气体": "stock", -"300231": "stock", -"银信科技": "stock", -"301311": "stock", -"昆船智能": "stock", -"603779": "stock", -"威龙股份": "stock", -"300850": "stock", -"新强联": "stock", -"831087": "stock", -"秋乐种业": "stock", -"688778": "stock", -"厦钨新能": "stock", -"000948": "stock", -"南天信息": "stock", -"600313": "stock", -"农发种业": "stock", -"833580": "stock", -"科创新材": "stock", -"300418": "stock", -"昆仑万维": "stock", -"300018": "stock", -"中元股份": "stock", -"000932": "stock", -"华菱钢铁": "stock", -"300651": "stock", -"金陵体育": "stock", -"603093": "stock", -"南华期货": "stock", -"834058": "stock", -"华洋赛车": "stock", -"300702": "stock", -"天宇股份": "stock", -"300408": "stock", -"三环集团": "stock", -"834765": "stock", -"美之高": "stock", -"002415": "stock", -"海康威视": "stock", -"603393": "stock", -"新天然气": "stock", -"300248": "stock", -"新开普": "stock", -"002425": "stock", -"凯撒文化": "stock", -"300305": "stock", -"裕兴股份": "stock", -"605369": "stock", -"拱东医疗": "stock", -"301207": "stock", -"华兰疫苗": "stock", -"600903": "stock", -"贵州燃气": "stock", -"688400": "stock", -"凌云光": "stock", -"603256": "stock", -"宏和科技": "stock", -"600229": "stock", -"城市传媒": "stock", -"688711": "stock", -"宏微科技": "stock", -"603555": "stock", -"ST贵人": "stock", -"300191": "stock", -"潜能恒信": "stock", -"603936": "stock", -"博敏电子": "stock", -"688133": "stock", -"泰坦科技": "stock", -"300526": "stock", -"中潜退": "stock", -"600753": "stock", -"庚星股份": "stock", -"688160": "stock", -"步科股份": "stock", -"300715": "stock", -"凯伦股份": "stock", -"000504": "stock", -"南华生物": "stock", -"000711": "stock", -"*ST京蓝": "stock", -"002948": "stock", -"青岛银行": "stock", -"002742": "stock", -"ST三圣": "stock", -"000712": "stock", -"锦龙股份": "stock", -"603010": "stock", -"万盛股份": "stock", -"832566": "stock", -"梓橦宫": "stock", -"603189": "stock", -"网达软件": "stock", -"605138": "stock", -"盛泰集团": "stock", -"600536": "stock", -"中国软件": "stock", -"300986": "stock", -"志特新材": "stock", -"600516": "stock", -"方大炭素": "stock", -"000777": "stock", -"中核科技": "stock", -"603979": "stock", -"金诚信": "stock", -"300364": "stock", -"中文在线": "stock", -"301125": "stock", -"腾亚精工": "stock", -"300656": "stock", -"民德电子": "stock", -"300497": "stock", -"富祥药业": "stock", -"000955": "stock", -"欣龙控股": "stock", -"300896": "stock", -"爱美客": "stock", -"833346": "stock", -"威贸电子": "stock", -"002889": "stock", -"东方嘉盛": "stock", -"003039": "stock", -"顺控发展": "stock", -"001211": "stock", -"双枪科技": "stock", -"834407": "stock", -"驰诚股份": "stock", -"002776": "stock", -"*ST柏龙": "stock", -"002685": "stock", -"华东重机": "stock", -"838810": "stock", -"春光药装": "stock", -"688556": "stock", -"高测股份": "stock", -"688252": "stock", -"天德钰": "stock", -"600660": "stock", -"福耀玻璃": "stock", -"301013": "stock", -"利和兴": "stock", -"603039": "stock", -"泛微网络": "stock", -"300051": "stock", -"琏升科技": "stock", -"603803": "stock", -"瑞斯康达": "stock", -"000524": "stock", -"岭南控股": "stock", -"603179": "stock", -"新泉股份": "stock", -"600535": "stock", -"天士力": "stock", -"300480": "stock", -"光力科技": "stock", -"600000": "stock", -"浦发银行": "stock", -"688101": "stock", -"三达膜": "stock", -"603315": "stock", -"福鞍股份": "stock", -"300491": "stock", -"通合科技": "stock", -"600806": "stock", -"退市昆机": "stock", -"600080": "stock", -"金花股份": "stock", -"300429": "stock", -"强力新材": "stock", -"688328": "stock", -"深科达": "stock", -"300992": "stock", -"泰福泵业": "stock", -"301206": "stock", -"三元生物": "stock", -"603203": "stock", -"快克智能": "stock", -"301190": "stock", -"善水科技": "stock", -"603998": "stock", -"XD方盛制": "stock", -"001270": "stock", -"铖昌科技": "stock", -"600161": "stock", -"天坛生物": "stock", -"600238": "stock", -"海南椰岛": "stock", -"300976": "stock", -"达瑞电子": "stock", -"002567": "stock", -"唐人神": "stock", -"002475": "stock", -"立讯精密": "stock", -"002382": "stock", -"蓝帆医疗": "stock", -"603138": "stock", -"海量数据": "stock", -"688138": "stock", -"清溢光电": "stock", -"000551": "stock", -"创元科技": "stock", -"002291": "stock", -"遥望科技": "stock", -"600352": "stock", -"浙江龙盛": "stock", -"002662": "stock", -"京威股份": "stock", -"000956": "stock", -"中原油气": "stock", -"002252": "stock", -"上海莱士": "stock", -"000785": "stock", -"居然之家": "stock", -"688320": "stock", -"禾川科技": "stock", -"301239": "stock", -"普瑞眼科": "stock", -"603659": "stock", -"璞泰来": "stock", -"300447": "stock", -"全信股份": "stock", -"300779": "stock", -"惠城环保": "stock", -"600704": "stock", -"物产中大": "stock", -"002677": "stock", -"浙江美大": "stock", -"600818": "stock", -"中路股份": "stock", -"603896": "stock", -"寿仙谷": "stock", -"300001": "stock", -"特锐德": "stock", -"002922": "stock", -"伊戈尔": "stock", -"001256": "stock", -"炜冈科技": "stock", -"600897": "stock", -"厦门空港": "stock", -"002695": "stock", -"煌上煌": "stock", -"300008": "stock", -"天海防务": "stock", -"002396": "stock", -"星网锐捷": "stock", -"603221": "stock", -"爱丽家居": "stock", -"300856": "stock", -"科思股份": "stock", -"300212": "stock", -"易华录": "stock", -"600892": "stock", -"大晟文化": "stock", -"002565": "stock", -"顺灏股份": "stock", -"600059": "stock", -"古越龙山": "stock", -"603767": "stock", -"中马传动": "stock", -"688058": "stock", -"宝兰德": "stock", -"430478": "stock", -"峆一药业": "stock", -"688636": "stock", -"智明达": "stock", -"300700": "stock", -"岱勒新材": "stock", -"002751": "stock", -"易尚退": "stock", -"002268": "stock", -"电科网安": "stock", -"688689": "stock", -"银河微电": "stock", -"600071": "stock", -"凤凰光学": "stock", -"300196": "stock", -"长海股份": "stock", -"300720": "stock", -"海川智能": "stock", -"300454": "stock", -"深信服": "stock", -"688275": "stock", -"万润新能": "stock", -"002402": "stock", -"和而泰": "stock", -"002329": "stock", -"皇氏集团": "stock", -"000088": "stock", -"盐田港": "stock", -"002800": "stock", -"ST天顺": "stock", -"603551": "stock", -"奥普家居": "stock", -"000672": "stock", -"上峰水泥": "stock", -"603881": "stock", -"数据港": "stock", -"301068": "stock", -"大地海洋": "stock", -"002658": "stock", -"雪迪龙": "stock", -"600466": "stock", -"*ST蓝光": "stock", -"002736": "stock", -"国信证券": "stock", -"688069": "stock", -"德林海": "stock", -"688707": "stock", -"振华新材": "stock", -"300077": "stock", -"国民技术": "stock", -"300404": "stock", -"博济医药": "stock", -"002380": "stock", -"科远智慧": "stock", -"301056": "stock", -"森赫股份": "stock", -"002524": "stock", -"光正眼科": "stock", -"605376": "stock", -"博迁新材": "stock", -"301316": "stock", -"慧博云通": "stock", -"600831": "stock", -"广电网络": "stock", -"605208": "stock", -"永茂泰": "stock", -"300990": "stock", -"同飞股份": "stock", -"600307": "stock", -"酒钢宏兴": "stock", -"600986": "stock", -"浙文互联": "stock", -"002474": "stock", -"榕基软件": "stock", -"603836": "stock", -"海程邦达": "stock", -"002074": "stock", -"国轩高科": "stock", -"002737": "stock", -"葵花药业": "stock", -"300035": "stock", -"中科电气": "stock", -"603800": "stock", -"道森股份": "stock", -"600756": "stock", -"浪潮软件": "stock", -"301093": "stock", -"华兰股份": "stock", -"688479": "stock", -"友车科技": "stock", -"600382": "stock", -"广东明珠": "stock", -"603933": "stock", -"睿能科技": "stock", -"300172": "stock", -"中电环保": "stock", -"600729": "stock", -"重庆百货": "stock", -"603599": "stock", -"广信股份": "stock", -"688819": "stock", -"天能股份": "stock", -"300861": "stock", -"美畅股份": "stock", -"688366": "stock", -"昊海生科": "stock", -"836077": "stock", -"吉林碳谷": "stock", -"600278": "stock", -"东方创业": "stock", -"300752": "stock", -"隆利科技": "stock", -"002117": "stock", -"东港股份": "stock", -"688716": "stock", -"中研股份": "stock", -"600200": "stock", -"江苏吴中": "stock", -"300448": "stock", -"浩云科技": "stock", -"002712": "stock", -"思美传媒": "stock", -"300798": "stock", -"锦鸡股份": "stock", -"600977": "stock", -"中国电影": "stock", -"603488": "stock", -"展鹏科技": "stock", -"300988": "stock", -"津荣天宇": "stock", -"002023": "stock", -"海特高新": "stock", -"300505": "stock", -"川金诺": "stock", -"688282": "stock", -"理工导航": "stock", -"300535": "stock", -"达威股份": "stock", -"300652": "stock", -"雷迪克": "stock", -"603811": "stock", -"诚意药业": "stock", -"301208": "stock", -"中亦科技": "stock", -"603587": "stock", -"地素时尚": "stock", -"002448": "stock", -"中原内配": "stock", -"601628": "stock", -"中国人寿": "stock", -"600674": "stock", -"川投能源": "stock", -"600827": "stock", -"百联股份": "stock", -"000099": "stock", -"中信海直": "stock", -"600202": "stock", -"哈空调": "stock", -"002775": "stock", -"文科园林": "stock", -"603081": "stock", -"大丰实业": "stock", -"603926": "stock", -"铁流股份": "stock", -"000807": "stock", -"云铝股份": "stock", -"600318": "stock", -"新力金融": "stock", -"002345": "stock", -"潮宏基": "stock", -"300500": "stock", -"启迪设计": "stock", -"301355": "stock", -"南王科技": "stock", -"000537": "stock", -"广宇发展": "stock", -"688136": "stock", -"科兴制药": "stock", -"000988": "stock", -"华工科技": "stock", -"688592": "stock", -"司南导航": "stock", -"600668": "stock", -"尖峰集团": "stock", -"300142": "stock", -"沃森生物": "stock", -"002432": "stock", -"九安医疗": "stock", -"300522": "stock", -"世名科技": "stock", -"002688": "stock", -"金河生物": "stock", -"688668": "stock", -"鼎通科技": "stock", -"002741": "stock", -"光华科技": "stock", -"605001": "stock", -"威奥股份": "stock", -"600770": "stock", -"综艺股份": "stock", -"300337": "stock", -"银邦股份": "stock", -"000753": "stock", -"漳州发展": "stock", -"300665": "stock", -"飞鹿股份": "stock", -"002385": "stock", -"大北农": "stock", -"603657": "stock", -"春光科技": "stock", -"873527": "stock", -"夜光明": "stock", -"600259": "stock", -"广晟有色": "stock", -"300425": "stock", -"中建环能": "stock", -"430476": "stock", -"海能技术": "stock", -"600393": "stock", -"ST粤泰": "stock", -"002547": "stock", -"春兴精工": "stock", -"300912": "stock", -"凯龙高科": "stock", -"688277": "stock", -"天智航-U": "stock", -"300614": "stock", -"百川畅银": "stock", -"002770": "stock", -"科迪退": "stock", -"300823": "stock", -"建科机械": "stock", -"832471": "stock", -"美邦科技": "stock", -"002970": "stock", -"锐明技术": "stock", -"603977": "stock", -"国泰集团": "stock", -"002866": "stock", -"传艺科技": "stock", -"688297": "stock", -"中无人机": "stock", -"688322": "stock", -"奥比中光-UW": "stock", -"002993": "stock", -"奥海科技": "stock", -"300908": "stock", -"仲景食品": "stock", -"600168": "stock", -"武汉控股": "stock", -"600973": "stock", -"宝胜股份": "stock", -"300273": "stock", -"和佳退": "stock", -"688511": "stock", -"天微电子": "stock", -"603788": "stock", -"宁波高发": "stock", -"002739": "stock", -"万达电影": "stock", -"300676": "stock", -"华大基因": "stock", -"002883": "stock", -"中设股份": "stock", -"300617": "stock", -"安靠智电": "stock", -"003041": "stock", -"真爱美家": "stock", -"603607": "stock", -"京华激光": "stock", -"603995": "stock", -"甬金股份": "stock", -"002935": "stock", -"天奥电子": "stock", -"000759": "stock", -"中百集团": "stock", -"002412": "stock", -"汉森制药": "stock", -"002787": "stock", -"华源控股": "stock", -"601558": "stock", -"退市锐电": "stock", -"603613": "stock", -"国联股份": "stock", -"300793": "stock", -"佳禾智能": "stock", -"000885": "stock", -"城发环境": "stock", -"600857": "stock", -"宁波中百": "stock", -"600860": "stock", -"京城股份": "stock", -"000005": "stock", -"ST星源": "stock", -"601368": "stock", -"绿城水务": "stock", -"000916": "stock", -"华北高速": "stock", -"000598": "stock", -"兴蓉环境": "stock", -"002091": "stock", -"江苏国泰": "stock", -"000100": "stock", -"TCL科技": "stock", -"301053": "stock", -"远信工业": "stock", -"603508": "stock", -"思维列控": "stock", -"603879": "stock", -"永悦科技": "stock", -"688568": "stock", -"中科星图": "stock", -"603999": "stock", -"读者传媒": "stock", -"601696": "stock", -"中银证券": "stock", -"837821": "stock", -"则成电子": "stock", -"603232": "stock", -"格尔软件": "stock", -"600212": "stock", -"绿能慧充": "stock", -"002243": "stock", -"力合科创": "stock", -"603338": "stock", -"浙江鼎力": "stock", -"002152": "stock", -"广电运通": "stock", -"300373": "stock", -"扬杰科技": "stock", -"002073": "stock", -"软控股份": "stock", -"002384": "stock", -"东山精密": "stock", -"603308": "stock", -"应流股份": "stock", -"600759": "stock", -"*ST洲际": "stock", -"688308": "stock", -"欧科亿": "stock", -"600758": "stock", -"辽宁能源": "stock", -"603136": "stock", -"天目湖": "stock", -"300903": "stock", -"科翔股份": "stock", -"688608": "stock", -"恒玄科技": "stock", -"600176": "stock", -"中国巨石": "stock", -"832149": "stock", -"利尔达": "stock", -"002949": "stock", -"华阳国际": "stock", -"300648": "stock", -"星云股份": "stock", -"002530": "stock", -"金财互联": "stock", -"300638": "stock", -"广和通": "stock", -"002655": "stock", -"共达电声": "stock", -"301157": "stock", -"华塑科技": "stock", -"300138": "stock", -"晨光生物": "stock", -"603119": "stock", -"浙江荣泰": "stock", -"002453": "stock", -"华软科技": "stock", -"300219": "stock", -"鸿利智汇": "stock", -"600328": "stock", -"中盐化工": "stock", -"000961": "stock", -"中南建设": "stock", -"002210": "stock", -"飞马国际": "stock", -"605333": "stock", -"沪光股份": "stock", -"605077": "stock", -"华康股份": "stock", -"603808": "stock", -"歌力思": "stock", -"300589": "stock", -"江龙船艇": "stock", -"601949": "stock", -"中国出版": "stock", -"002301": "stock", -"齐心集团": "stock", -"300097": "stock", -"智云股份": "stock", -"833230": "stock", -"欧康医药": "stock", -"600086": "stock", -"退市金钰": "stock", -"605337": "stock", -"李子园": "stock", -"000572": "stock", -"海马汽车": "stock", -"601226": "stock", -"华电重工": "stock", -"300254": "stock", -"仟源医药": "stock", -"830832": "stock", -"齐鲁华信": "stock", -"000835": "stock", -"长动退": "stock", -"600991": "stock", -"广汽长丰": "stock", -"300199": "stock", -"翰宇药业": "stock", -"000612": "stock", -"焦作万方": "stock", -"603727": "stock", -"博迈科": "stock", -"837748": "stock", -"路桥信息": "stock", -"002696": "stock", -"百洋股份": "stock", -"000856": "stock", -"冀东装备": "stock", -"300673": "stock", -"佩蒂股份": "stock", -"300309": "stock", -"吉艾退": "stock", -"603861": "stock", -"白云电器": "stock", -"003000": "stock", -"劲仔食品": "stock", -"603105": "stock", -"芯能科技": "stock", -"301428": "stock", -"世纪恒通": "stock", -"301337": "stock", -"亚华电子": "stock", -"600646": "stock", -"ST国嘉": "stock", -"601766": "stock", -"中国中车": "stock", -"600193": "stock", -"创兴资源": "stock", -"002771": "stock", -"真视通": "stock", -"600432": "stock", -"退市吉恩": "stock", -"300475": "stock", -"香农芯创": "stock", -"002229": "stock", -"鸿博股份": "stock", -"002164": "stock", -"宁波东力": "stock", -"000913": "stock", -"钱江摩托": "stock", -"300545": "stock", -"联得装备": "stock", -"300788": "stock", -"中信出版": "stock", -"000716": "stock", -"黑芝麻": "stock", -"301362": "stock", -"民爆光电": "stock", -"000702": "stock", -"正虹科技": "stock", -"688147": "stock", -"微导纳米": "stock", -"831726": "stock", -"朱老六": "stock", -"000402": "stock", -"金融街": "stock", -"301141": "stock", -"中科磁业": "stock", -"300608": "stock", -"思特奇": "stock", -"000019": "stock", -"深粮控股": "stock", -"300528": "stock", -"幸福蓝海": "stock", -"605358": "stock", -"立昂微": "stock", -"000937": "stock", -"冀中能源": "stock", -"600933": "stock", -"爱柯迪": "stock", -"600901": "stock", -"江苏金租": "stock", -"688093": "stock", -"世华科技": "stock", -"002358": "stock", -"森源电气": "stock", -"600847": "stock", -"万里股份": "stock", -"000600": "stock", -"建投能源": "stock", -"688097": "stock", -"博众精工": "stock", -"600425": "stock", -"青松建化": "stock", -"600771": "stock", -"广誉远": "stock", -"301178": "stock", -"天亿马": "stock", -"836263": "stock", -"中航泰达": "stock", -"605336": "stock", -"帅丰电器": "stock", -"002037": "stock", -"保利联合": "stock", -"002747": "stock", -"埃斯顿": "stock", -"000895": "stock", -"双汇发展": "stock", -"301103": "stock", -"何氏眼科": "stock", -"002660": "stock", -"茂硕电源": "stock", -"838163": "stock", -"方大新材": "stock", -"300102": "stock", -"乾照光电": "stock", -"688391": "stock", -"钜泉科技": "stock", -"600813": "stock", -"ST鞍一工": "stock", -"300124": "stock", -"汇川技术": "stock", -"002931": "stock", -"锋龙股份": "stock", -"835670": "stock", -"数字人": "stock", -"002500": "stock", -"山西证券": "stock", -"300519": "stock", -"新光药业": "stock", -"300292": "stock", -"吴通控股": "stock", -"002709": "stock", -"天赐材料": "stock", -"601139": "stock", -"深圳燃气": "stock", -"600068": "stock", -"葛洲坝": "stock", -"600873": "stock", -"梅花生物": "stock", -"601678": "stock", -"滨化股份": "stock", -"600795": "stock", -"国电电力": "stock", -"603171": "stock", -"税友股份": "stock", -"688166": "stock", -"博瑞医药": "stock", -"600736": "stock", -"苏州高新": "stock", -"300792": "stock", -"壹网壹创": "stock", -"600415": "stock", -"小商品城": "stock", -"002667": "stock", -"威领股份": "stock", -"836957": "stock", -"汉维科技": "stock", -"300659": "stock", -"中孚信息": "stock", -"603037": "stock", -"凯众股份": "stock", -"002364": "stock", -"中恒电气": "stock", -"300308": "stock", -"中际旭创": "stock", -"605488": "stock", -"福莱新材": "stock", -"688551": "stock", -"科威尔": "stock", -"300318": "stock", -"博晖创新": "stock", -"600634": "stock", -"退市富控": "stock", -"300961": "stock", -"深水海纳": "stock", -"600610": "stock", -"中毅达": "stock", -"300296": "stock", -"利亚德": "stock", -"603589": "stock", -"口子窖": "stock", -"600689": "stock", -"上海三毛": "stock", -"002506": "stock", -"协鑫集成": "stock", -"600893": "stock", -"航发动力": "stock", -"301219": "stock", -"腾远钴业": "stock", -"688513": "stock", -"苑东生物": "stock", -"002937": "stock", -"兴瑞科技": "stock", -"688523": "stock", -"航天环宇": "stock", -"002317": "stock", -"众生药业": "stock", -"301357": "stock", -"北方长龙": "stock", -"688386": "stock", -"泛亚微透": "stock", -"600172": "stock", -"黄河旋风": "stock", -"600797": "stock", -"浙大网新": "stock", -"603688": "stock", -"石英股份": "stock", -"002616": "stock", -"长青集团": "stock", -"300960": "stock", -"通业科技": "stock", -"000050": "stock", -"深天马A": "stock", -"002263": "stock", -"大东南": "stock", -"603686": "stock", -"福龙马": "stock", -"002835": "stock", -"同为股份": "stock", -"300229": "stock", -"拓尔思": "stock", -"002983": "stock", -"芯瑞达": "stock", -"002436": "stock", -"兴森科技": "stock", -"301283": "stock", -"聚胶股份": "stock", -"002879": "stock", -"长缆科技": "stock", -"002075": "stock", -"沙钢股份": "stock", -"600570": "stock", -"恒生电子": "stock", -"002856": "stock", -"美芝股份": "stock", -"688512": "stock", -"慧智微-U": "stock", -"300312": "stock", -"邦讯退": "stock", -"603663": "stock", -"三祥新材": "stock", -"002722": "stock", -"物产金轮": "stock", -"002881": "stock", -"美格智能": "stock", -"688230": "stock", -"芯导科技": "stock", -"603725": "stock", -"天安新材": "stock", -"605050": "stock", -"福然德": "stock", -"831961": "stock", -"创远信科": "stock", -"603713": "stock", -"密尔克卫": "stock", -"301290": "stock", -"东星医疗": "stock", -"002876": "stock", -"三利谱": "stock", -"300121": "stock", -"阳谷华泰": "stock", -"601963": "stock", -"重庆银行": "stock", -"003008": "stock", -"开普检测": "stock", -"600281": "stock", -"华阳新材": "stock", -"603075": "stock", -"热威股份": "stock", -"600365": "stock", -"ST通葡": "stock", -"301314": "stock", -"科瑞思": "stock", -"300241": "stock", -"瑞丰光电": "stock", -"301060": "stock", -"兰卫医学": "stock", -"605299": "stock", -"舒华体育": "stock", -"003038": "stock", -"鑫铂股份": "stock", -"688233": "stock", -"神工股份": "stock", -"603825": "stock", -"华扬联众": "stock", -"300445": "stock", -"康斯特": "stock", -"300716": "stock", -"泉为科技": "stock", -"300485": "stock", -"赛升药业": "stock", -"603040": "stock", -"新坐标": "stock", -"300339": "stock", -"润和软件": "stock", -"000522": "stock", -"白云山A": "stock", -"301019": "stock", -"宁波色母": "stock", -"600585": "stock", -"海螺水泥": "stock", -"301335": "stock", -"天元宠物": "stock", -"300880": "stock", -"迦南智能": "stock", -"688189": "stock", -"南新制药": "stock", -"600966": "stock", -"博汇纸业": "stock", -"833781": "stock", -"瑞奇智造": "stock", -"000793": "stock", -"华闻集团": "stock", -"002730": "stock", -"电光科技": "stock", -"002111": "stock", -"威海广泰": "stock", -"300473": "stock", -"德尔股份": "stock", -"000032": "stock", -"深桑达A": "stock", -"600657": "stock", -"信达地产": "stock", -"002759": "stock", -"天际股份": "stock", -"300468": "stock", -"四方精创": "stock", -"603172": "stock", -"万丰股份": "stock", -"002977": "stock", -"天箭科技": "stock", -"600639": "stock", -"浦东金桥": "stock", -"603506": "stock", -"南都物业": "stock", -"002849": "stock", -"威星智能": "stock", -"833454": "stock", -"同心传动": "stock", -"001330": "stock", -"博纳影业": "stock", -"王": "firstnm", -"李": "firstnm", -"张": "firstnm", -"刘": "firstnm", -"陈": "firstnm", -"杨": "firstnm", -"黄": "firstnm", -"吴": "firstnm", -"赵": "firstnm", -"周": "firstnm", -"徐": "firstnm", -"孙": "firstnm", -"马": "firstnm", -"朱": "firstnm", -"胡": "firstnm", -"林": "firstnm", -"郭": "firstnm", -"何": "firstnm", -"高": "firstnm", -"罗": "firstnm", -"郑": "firstnm", -"梁": "firstnm", -"谢": "firstnm", -"宋": "firstnm", -"唐": "firstnm", -"许": "firstnm", -"邓": "firstnm", -"冯": "firstnm", -"韩": "firstnm", -"曹": "firstnm", -"曾": "firstnm", -"彭": "firstnm", -"肖": "firstnm", -"蔡": "firstnm", -"潘": "firstnm", -"田": "firstnm", -"董": "firstnm", -"袁": "firstnm", -"于": "firstnm", -"余": "firstnm", -"蒋": "firstnm", -"叶": "firstnm", -"杜": "firstnm", -"苏": "firstnm", -"魏": "firstnm", -"程": "firstnm", -"吕": "firstnm", -"丁": "firstnm", -"沈": "firstnm", -"任": "firstnm", -"姚": "firstnm", -"卢": "firstnm", -"钟": "firstnm", -"姜": "firstnm", -"崔": "firstnm", -"谭": "firstnm", -"廖": "firstnm", -"范": "firstnm", -"汪": "firstnm", -"陆": "firstnm", -"金": "firstnm", -"石": "firstnm", -"戴": "firstnm", -"贾": "firstnm", -"韦": "firstnm", -"夏": "firstnm", -"邱": "firstnm", -"方": "firstnm", -"侯": "firstnm", -"邹": "firstnm", -"熊": "firstnm", -"孟": "firstnm", -"秦": "firstnm", -"白": "firstnm", -"毛": "firstnm", -"江": "firstnm", -"闫": "firstnm", -"薛": "firstnm", -"尹": "firstnm", -"付": "firstnm", -"段": "firstnm", -"雷": "firstnm", -"黎": "firstnm", -"史": "firstnm", -"龙": "firstnm", -"钱": "firstnm", -"贺": "firstnm", -"陶": "firstnm", -"顾": "firstnm", -"龚": "firstnm", -"郝": "firstnm", -"邵": "firstnm", -"万": "firstnm", -"严": "firstnm", -"洪": "firstnm", -"赖": "firstnm", -"武": "firstnm", -"傅": "firstnm", -"莫": "firstnm", -"孔": "firstnm", -"汤": "firstnm", -"向": "firstnm", -"常": "firstnm", -"温": "firstnm", -"康": "firstnm", -"施": "firstnm", -"文": "firstnm", -"牛": "firstnm", -"樊": "firstnm", -"葛": "firstnm", -"邢": "firstnm", -"安": "firstnm", -"齐": "firstnm", -"易": "firstnm", -"乔": "firstnm", -"伍": "firstnm", -"庞": "firstnm", -"颜": "firstnm", -"倪": "firstnm", -"庄": "firstnm", -"聂": "firstnm", -"章": "firstnm", -"鲁": "firstnm", -"岳": "firstnm", -"翟": "firstnm", -"申": "firstnm", -"殷": "firstnm", -"詹": "firstnm", -"欧": "firstnm", -"耿": "firstnm", -"关": "firstnm", -"覃": "firstnm", -"兰": "firstnm", -"焦": "firstnm", -"俞": "firstnm", -"左": "firstnm", -"柳": "firstnm", -"甘": "firstnm", -"祝": "firstnm", -"包": "firstnm", -"代": "firstnm", -"宁": "firstnm", -"符": "firstnm", -"阮": "firstnm", -"尚": "firstnm", -"舒": "firstnm", -"纪": "firstnm", -"柯": "firstnm", -"梅": "firstnm", -"童": "firstnm", -"毕": "firstnm", -"凌": "firstnm", -"单": "firstnm", -"季": "firstnm", -"成": "firstnm", -"霍": "firstnm", -"苗": "firstnm", -"裴": "firstnm", -"涂": "firstnm", -"谷": "firstnm", -"曲": "firstnm", -"盛": "firstnm", -"冉": "firstnm", -"翁": "firstnm", -"蓝": "firstnm", -"骆": "firstnm", -"路": "firstnm", -"游": "firstnm", -"靳": "firstnm", -"辛": "firstnm", -"管": "firstnm", -"柴": "firstnm", -"蒙": "firstnm", -"鲍": "firstnm", -"华": "firstnm", -"喻": "firstnm", -"祁": "firstnm", -"房": "firstnm", -"蒲": "firstnm", -"滕": "firstnm", -"萧": "firstnm", -"屈": "firstnm", -"饶": "firstnm", -"解": "firstnm", -"牟": "firstnm", -"艾": "firstnm", -"尤": "firstnm", -"时": "firstnm", -"阳": "firstnm", -"阎": "firstnm", -"穆": "firstnm", -"应": "firstnm", -"农": "firstnm", -"司": "firstnm", -"古": "firstnm", -"吉": "firstnm", -"卓": "firstnm", -"车": "firstnm", -"简": "firstnm", -"连": "firstnm", -"缪": "firstnm", -"项": "firstnm", -"麦": "firstnm", -"褚": "firstnm", -"窦": "firstnm", -"娄": "firstnm", -"戚": "firstnm", -"岑": "firstnm", -"党": "firstnm", -"宫": "firstnm", -"景": "firstnm", -"卜": "firstnm", -"费": "firstnm", -"冷": "firstnm", -"晏": "firstnm", -"卫": "firstnm", -"席": "firstnm", -"柏": "firstnm", -"米": "firstnm", -"隋": "firstnm", -"宗": "firstnm", -"桂": "firstnm", -"瞿": "firstnm", -"全": "firstnm", -"苟": "firstnm", -"楼": "firstnm", -"闵": "firstnm", -"佟": "firstnm", -"臧": "firstnm", -"边": "firstnm", -"卞": "firstnm", -"姬": "firstnm", -"邬": "firstnm", -"和": "firstnm", -"师": "firstnm", -"仇": "firstnm", -"栾": "firstnm", -"丘": "firstnm", -"刁": "firstnm", -"沙": "firstnm", -"商": "firstnm", -"寇": "firstnm", -"荣": "firstnm", -"巫": "firstnm", -"郎": "firstnm", -"桑": "firstnm", -"丛": "firstnm", -"甄": "firstnm", -"敖": "firstnm", -"虞": "firstnm", -"仲": "firstnm", -"池": "firstnm", -"巩": "firstnm", -"明": "firstnm", -"佘": "firstnm", -"查": "firstnm", -"麻": "firstnm", -"苑": "firstnm", -"迟": "firstnm", -"邝": "firstnm", -"封": "firstnm", -"官": "firstnm", -"谈": "firstnm", -"鞠": "firstnm", -"匡": "firstnm", -"惠": "firstnm", -"荆": "firstnm", -"乐": "firstnm", -"冀": "firstnm", -"胥": "firstnm", -"郁": "firstnm", -"南": "firstnm", -"班": "firstnm", -"储": "firstnm", -"芦": "firstnm", -"原": "firstnm", -"栗": "firstnm", -"燕": "firstnm", -"楚": "firstnm", -"鄢": "firstnm", -"扬": "firstnm", -"劳": "firstnm", -"谌": "firstnm", -"奚": "firstnm", -"皮": "firstnm", -"蔺": "firstnm", -"粟": "firstnm", -"冼": "firstnm", -"盘": "firstnm", -"满": "firstnm", -"闻": "firstnm", -"厉": "firstnm", -"伊": "firstnm", -"候": "firstnm", -"仝": "firstnm", -"百里": "firstnm", -"淳于": "firstnm", -"澹台": "firstnm", -"第五": "firstnm", -"东方": "firstnm", -"独孤": "firstnm", -"端木": "firstnm", -"段干": "firstnm", -"公孙": "firstnm", -"公西": "firstnm", -"公羊": "firstnm", -"公冶": "firstnm", -"赫连": "firstnm", -"呼延": "firstnm", -"皇甫": "firstnm", -"乐正": "firstnm", -"冷狐": "firstnm", -"令狐": "firstnm", -"刘付": "firstnm", -"刘傅": "firstnm", -"闾丘": "firstnm", -"慕容": "firstnm", -"纳兰": "firstnm", -"南宫": "firstnm", -"南门": "firstnm", -"殴阳": "firstnm", -"濮阳": "firstnm", -"亓官": "firstnm", -"上官": "firstnm", -"申屠": "firstnm", -"司空": "firstnm", -"司寇": "firstnm", -"司马": "firstnm", -"司徒": "firstnm", -"太史": "firstnm", -"太叔": "firstnm", -"拓跋": "firstnm", -"完颜": "firstnm", -"万俟": "firstnm", -"尉迟": "firstnm", -"闻人": "firstnm", -"巫马": "firstnm", -"西门": "firstnm", -"夏侯": "firstnm", -"夏候": "firstnm", -"鲜于": "firstnm", -"轩辕": "firstnm", -"宇文": "firstnm", -"长孙": "firstnm", -"钟离": "firstnm", -"仲孙": "firstnm", -"诸葛": "firstnm", -"颛孙": "firstnm", -"宗政": "firstnm", -"左丘": "firstnm" -} +{ +"一中一台": "toxic", +"一党专制": "toxic", +"一党专政": "toxic", +"一党执政": "toxic", +"一党独裁": "toxic", +"一党私利": "toxic", +"一卡多号": "toxic", +"一寸山河一寸血": "toxic", +"一小撮别": "toxic", +"一本道": "toxic", +"丁子霖": "toxic", +"丁鑫发": "toxic", +"丁香社": "toxic", +"七大军区": "toxic", +"万人大签名": "toxic", +"万人骚动": "toxic", +"万法归一功": "toxic", +"万润南": "toxic", +"万维读者论坛": "toxic", +"万里大造林": "toxic", +"三三九乘元功": "toxic", +"三个呆婊": "toxic", +"三分钟教你原子弹": "toxic", +"三利达弓弩专卖网": "toxic", +"三利达弓弩军刀": "toxic", +"三利达弓弩直营": "toxic", +"三利达弓弩配件": "toxic", +"三去车仑": "toxic", +"三水法轮": "toxic", +"三班仆人派": "toxic", +"三硝基甲苯": "toxic", +"三箭气枪出售": "toxic", +"三陪": "toxic", +"上中央": "toxic", +"上海帮": "toxic", +"上海独立": "toxic", +"上网文凭": "toxic", +"上访": "toxic", +"上门激": "toxic", +"下法轮": "toxic", +"不得好死": "toxic", +"世界经济导报": "toxic", +"世维会": "toxic", +"丘小雄": "toxic", +"丘广钟": "toxic", +"业力回报": "toxic", +"业力轮": "toxic", +"丛福奎": "toxic", +"东京热": "toxic", +"东伊运": "toxic", +"东北独立": "toxic", +"东南西北论谈": "toxic", +"东土耳其斯坦": "toxic", +"东复活": "toxic", +"东方微点": "toxic", +"东方红时空": "toxic", +"东方闪电": "toxic", +"东森新闻网": "toxic", +"东森电视": "toxic", +"东突": "toxic", +"东突厥斯坦解放组织": "toxic", +"东突解放组织": "toxic", +"东西南北论坛": "toxic", +"丝袜保": "toxic", +"两岸才子": "toxic", +"严家其": "toxic", +"严晓玲": "toxic", +"严重违纪": "toxic", +"个人圆满说": "toxic", +"个四小码": "toxic", +"个邪的党": "toxic", +"丫与王益": "toxic", +"中gong": "toxic", +"中共": "toxic", +"中共任用": "toxic", +"中共保命": "toxic", +"中共党文化": "toxic", +"中共封网": "toxic", +"中共封锁": "toxic", +"中共帝国": "toxic", +"中共帮凶": "toxic", +"中共恐惧": "toxic", +"中共政治游戏": "toxic", +"中共权力斗争": "toxic", +"中共洗脑": "toxic", +"中共独裁": "toxic", +"中共的罪恶": "toxic", +"中共的血旗": "toxic", +"中共腐败": "toxic", +"中共裁": "toxic", +"中共解体": "toxic", +"中共近期权力斗争": "toxic", +"中共退党": "toxic", +"中共邪教": "toxic", +"中共邪毒素": "toxic", +"中共黑": "toxic", +"中共黑帮": "toxic", +"中办发": "toxic", +"中功": "toxic", +"中华养生益智功": "toxic", +"中华养生益智气": "toxic", +"中华局域网": "toxic", +"中华帝国": "toxic", +"中华昆仑女神功": "toxic", +"中华联邦": "toxic", +"中南海": "toxic", +"中南海恩仇录": "toxic", +"中南海斗争": "toxic", +"中南海权力斗争": "toxic", +"中南海的权力游戏": "toxic", +"中南海黑幕": "toxic", +"中印边界谈判结果": "toxic", +"中国zf": "toxic", +"中国不强": "toxic", +"中国之春": "toxic", +"中国人权": "toxic", +"中国人民党": "toxic", +"中国共和党": "toxic", +"中国复兴党": "toxic", +"中国官场情杀案": "toxic", +"中国实行血腥教育": "toxic", +"中国当局": "toxic", +"中国教徒": "toxic", +"中国数字时代": "toxic", +"中国新民党": "toxic", +"中国时报": "toxic", +"中国正义党": "toxic", +"中国民主党": "toxic", +"中国民主正义党": "toxic", +"中国民主运动": "toxic", +"中国没有自由": "toxic", +"中国海外腐败兵团": "toxic", +"中国猪": "toxic", +"中国的陷阱": "toxic", +"中国网络审查": "toxic", +"中国舆论监督网周洪": "toxic", +"中国论坛": "toxic", +"中国贪官在海外": "toxic", +"中国过渡政府": "toxic", +"中国高层权力斗争": "toxic", +"中央zf": "toxic", +"中央领导": "toxic", +"中央黑幕": "toxic", +"中特": "toxic", +"中珙": "toxic", +"中的班禅": "toxic", +"临震预报": "toxic", +"丹增嘉措": "toxic", +"为党不为国": "toxic", +"主席忏": "toxic", +"主席李世民": "toxic", +"主权": "toxic", +"主神教": "toxic", +"丽媛离": "toxic", +"举国体": "toxic", +"乏仑": "toxic", +"乏伦": "toxic", +"乏囵": "toxic", +"乏抡": "toxic", +"乏沦": "toxic", +"乏纶": "toxic", +"乏论": "toxic", +"乏轮": "toxic", +"乐透码": "toxic", +"乖乖粉": "toxic", +"九ping": "toxic", +"九十三运动": "toxic", +"九学": "toxic", +"九常委": "toxic", +"九评": "toxic", +"九评共": "toxic", +"九长老": "toxic", +"九风": "toxic", +"九龙论坛": "toxic", +"习仲勋": "toxic", +"习太子": "toxic", +"习明泽": "toxic", +"习晋平": "toxic", +"习近平": "toxic", +"习进平": "toxic", +"书办理": "toxic", +"买别墅": "toxic", +"乱伦": "toxic", +"了件渔袍": "toxic", +"二十四事件": "toxic", +"于剑鸣": "toxic", +"于幼军": "toxic", +"互联网审查": "toxic", +"五套功法": "toxic", +"五毛们": "toxic", +"五毛党": "toxic", +"亡党亡国": "toxic", +"亢议": "toxic", +"产党共": "toxic", +"京地震": "toxic", +"京夫子": "toxic", +"京要地震": "toxic", +"人quan": "toxic", +"人体炸弹": "toxic", +"人宇特能功": "toxic", +"人弹": "toxic", +"人拳": "toxic", +"人木又": "toxic", +"人民之声论坛": "toxic", +"人民币恶搞": "toxic", +"人民报": "toxic", +"人渣": "toxic", +"人肉炸弹": "toxic", +"仇共": "toxic", +"仓井空": "toxic", +"付晓光": "toxic", +"令计划": "toxic", +"伐仑": "toxic", +"伐伦": "toxic", +"伐囵": "toxic", +"伐抡": "toxic", +"伐沦": "toxic", +"伐论": "toxic", +"伐轮": "toxic", +"传九促三": "toxic", +"传九退三": "toxic", +"何洪达": "toxic", +"何清涟": "toxic", +"何祚庥": "toxic", +"余杰": "toxic", +"你他妈": "toxic", +"你吗b": "toxic", +"你妈的": "toxic", +"你说我说论坛": "toxic", +"你麻痹": "toxic", +"供产": "toxic", +"供铲党": "toxic", +"供铲裆": "toxic", +"供铲谠": "toxic", +"侯伍杰": "toxic", +"侯德健": "toxic", +"俄罗斯轮盘": "toxic", +"俄羅斯": "toxic", +"保钓组织": "toxic", +"俞正声": "toxic", +"信访": "toxic", +"信访专班": "toxic", +"修炼大法": "toxic", +"借腹生子": "toxic", +"倪献策": "toxic", +"假币出售": "toxic", +"假庆淋": "toxic", +"假文凭": "toxic", +"假证件": "toxic", +"偷偷贪": "toxic", +"偷听器": "toxic", +"偷肃贪": "toxic", +"偷電器": "toxic", +"催情粉": "toxic", +"催情药": "toxic", +"催情藥": "toxic", +"催眠水": "toxic", +"傻b": "toxic", +"傻比": "toxic", +"傻逼": "toxic", +"光复民国": "toxic", +"光祖": "toxic", +"党内危机": "toxic", +"党内权争": "toxic", +"党内权力": "toxic", +"党内言事潮": "toxic", +"党前干劲": "toxic", +"党后萎": "toxic", +"党校安插亲信": "toxic", +"党棍": "toxic", +"党的喉舌": "toxic", +"党禁": "toxic", +"党章": "toxic", +"党鞭": "toxic", +"党风日下": "toxic", +"党魁": "toxic", +"全家不得好死": "toxic", +"全家死光": "toxic", +"全家死绝": "toxic", +"全范围教会": "toxic", +"八九": "toxic", +"八九年": "toxic", +"八九政治": "toxic", +"八老": "toxic", +"公产党": "toxic", +"六HE彩": "toxic", +"六代接班人": "toxic", +"六和谐四": "toxic", +"六 四": "toxic", +"六.四": "toxic", +"六四": "toxic", +"六四事": "toxic", +"六四事件": "toxic", +"六四信息": "toxic", +"六四内部日记": "toxic", +"六四受难者家属证辞": "toxic", +"六四资料馆": "toxic", +"六月联盟": "toxic", +"六月飞雪": "toxic", +"六死": "toxic", +"六河蟹四": "toxic", +"六百度四": "toxic", +"兰州军区": "toxic", +"共c党": "toxic", +"共x党": "toxic", +"共一产一党": "toxic", +"共产": "toxic", +"共产专制": "toxic", +"共产主义的幽灵": "toxic", +"共产主义黑皮书": "toxic", +"共产党专制": "toxic", +"共产党的报应": "toxic", +"共产党的末日": "toxic", +"共产党腐败": "toxic", +"共产王朝": "toxic", +"共匪": "toxic", +"共和国2049": "toxic", +"共字玄机": "toxic", +"共惨": "toxic", +"共惨党": "toxic", +"共残主义": "toxic", +"共残党": "toxic", +"共残裆": "toxic", +"共狗": "toxic", +"共王储": "toxic", +"共贪党": "toxic", +"共铲": "toxic", +"共铲党": "toxic", +"共青团派": "toxic", +"共青背景": "toxic", +"兽交": "toxic", +"内争人权": "toxic", +"内斗": "toxic", +"冈本真": "toxic", +"写两会": "toxic", +"冤民大同盟": "toxic", +"冯正虎": "toxic", +"冯珏": "toxic", +"冰在火上": "toxic", +"冰毒": "toxic", +"刘克田": "toxic", +"刘刚": "toxic", +"刘宾雁": "toxic", +"刘志军": "toxic", +"刘志华": "toxic", +"刘方仁": "toxic", +"刘明康": "toxic", +"刘晓竹": "toxic", +"刘知炳": "toxic", +"刘维明": "toxic", +"刘连昆": "toxic", +"刘金宝": "toxic", +"刘长贵": "toxic", +"判处死刑": "toxic", +"别他吗": "toxic", +"别梦成灰": "toxic", +"北京之春": "toxic", +"北京事件": "toxic", +"北京军区": "toxic", +"北京市委黑幕": "toxic", +"北京帮": "toxic", +"北京当局": "toxic", +"北京政坛清华名人": "toxic", +"北京政权": "toxic", +"北京独立": "toxic", +"北京风波": "toxic", +"北京黑幕": "toxic", +"北国之春": "toxic", +"北姑": "toxic", +"北省委门": "toxic", +"北美巡回讲法": "toxic", +"北美自由论坛": "toxic", +"北美讲坛": "toxic", +"北韩": "toxic", +"北高联": "toxic", +"十7大": "toxic", +"十七位老部长": "toxic", +"十七大": "toxic", +"十七大人事安排": "toxic", +"十七大权力争霸战": "toxic", +"十八大": "toxic", +"十八大接班人": "toxic", +"十大独裁": "toxic", +"华国锋": "toxic", +"华夏文摘": "toxic", +"华夏论坛": "toxic", +"华如秀": "toxic", +"华岳时事论坛": "toxic", +"华盛顿邮报": "toxic", +"华藏功": "toxic", +"华语世界论坛": "toxic", +"华通时事论坛": "toxic", +"南京军区": "toxic", +"南充针": "toxic", +"南大自由论坛": "toxic", +"南方军刀网": "toxic", +"南街村": "toxic", +"博会暂停": "toxic", +"博彩娱": "toxic", +"博讯": "toxic", +"占领台湾": "toxic", +"卧槽": "toxic", +"卧艹": "toxic", +"印尼事件": "toxic", +"印尼屠华": "toxic", +"原一九五七": "toxic", +"去中央": "toxic", +"双开": "toxic", +"双筒": "toxic", +"双管平": "toxic", +"双管立": "toxic", +"双规": "toxic", +"反party": "toxic", +"反中共黑色暴力": "toxic", +"反共": "toxic", +"反共传单": "toxic", +"反共言论": "toxic", +"反分裂": "toxic", +"反奥": "toxic", +"反对共产主义": "toxic", +"反对共产党": "toxic", +"反屏蔽": "toxic", +"反攻大陆": "toxic", +"反测速雷": "toxic", +"反社会": "toxic", +"反社会主义": "toxic", +"反腐总攻": "toxic", +"反腐败论坛": "toxic", +"反雷达测": "toxic", +"反雷达测速": "toxic", +"发仑": "toxic", +"发仑da发": "toxic", +"发伦": "toxic", +"发伦功": "toxic", +"发伦工": "toxic", +"发囵": "toxic", +"发国难财": "toxic", +"发愣": "toxic", +"发抡": "toxic", +"发抡功": "toxic", +"发正念": "toxic", +"发沦": "toxic", +"发牌绝": "toxic", +"发生暴动": "toxic", +"发瞟": "toxic", +"发纶": "toxic", +"发论": "toxic", +"发论公": "toxic", +"发论功": "toxic", +"发论工": "toxic", +"发轮": "toxic", +"发轮功": "toxic", +"发轮功陈果": "toxic", +"受贿罪": "toxic", +"叛逃美国": "toxic", +"台du": "toxic", +"台wan": "toxic", +"台军": "toxic", +"台完": "toxic", +"台弯": "toxic", +"台毒": "toxic", +"台海危机": "toxic", +"台海大战": "toxic", +"台海局势": "toxic", +"台海战争": "toxic", +"台海统一": "toxic", +"台海问题": "toxic", +"台湾共和国": "toxic", +"台湾国": "toxic", +"台湾应该独立": "toxic", +"台湾建国运动组织": "toxic", +"台湾政论区": "toxic", +"台湾有权独立": "toxic", +"台湾版假币": "toxic", +"台湾独立": "toxic", +"台湾猪": "toxic", +"台湾自由联盟": "toxic", +"台湾问题": "toxic", +"台独": "toxic", +"台百度湾": "toxic", +"叶兵": "toxic", +"司徒华": "toxic", +"司马璐": "toxic", +"司马璐回忆录": "toxic", +"同盟党": "toxic", +"向巴平措": "toxic", +"吕德彬": "toxic", +"启蒙派": "toxic", +"吴振汉": "toxic", +"吾尔": "toxic", +"吾尔开希": "toxic", +"吾尔开西": "toxic", +"吾爾開希": "toxic", +"告全国同胞书": "toxic", +"告洋状": "toxic", +"周小川": "toxic", +"周文吉": "toxic", +"周正毅": "toxic", +"哈狗帮": "toxic", +"哒赖": "toxic", +"唑仑": "toxic", +"喝血社会": "toxic", +"器官贩卖": "toxic", +"四二六社论": "toxic", +"回复可见": "toxic", +"回忆六四": "toxic", +"回民暴动": "toxic", +"回民猪": "toxic", +"回汉冲突": "toxic", +"回派": "toxic", +"回良玉": "toxic", +"团派": "toxic", +"围攻上海": "toxic", +"国wu院": "toxic", +"国一九五七": "toxic", +"国之母": "toxic", +"国姆": "toxic", +"国家防火墙": "toxic", +"国母": "toxic", +"国统会": "toxic", +"国统纲领": "toxic", +"国际投注": "toxic", +"国际特赦": "toxic", +"土g": "toxic", +"土共": "toxic", +"土枪": "toxic", +"土炮": "toxic", +"土炸药成份": "toxic", +"圣战不息": "toxic", +"圣战组织": "toxic", +"圣殿教": "toxic", +"圣火护卫": "toxic", +"圣灵重建教会": "toxic", +"地下先烈": "toxic", +"地下刊物": "toxic", +"地下教会": "toxic", +"地下钱庄": "toxic", +"地产之歌": "toxic", +"地奈德": "toxic", +"坦克人": "toxic", +"坦克压大学生": "toxic", +"垡仑": "toxic", +"垡伦": "toxic", +"垡囵": "toxic", +"垡抡": "toxic", +"垡沦": "toxic", +"垡纶": "toxic", +"垡论": "toxic", +"垡轮": "toxic", +"基地组织": "toxic", +"基督": "toxic", +"基督教": "toxic", +"基督灵恩布道团": "toxic", +"塔利班": "toxic", +"境外媒体": "toxic", +"士康事件": "toxic", +"士的宁": "toxic", +"士的年": "toxic", +"夏川纯": "toxic", +"多党执政": "toxic", +"大sb": "toxic", +"大中华论坛": "toxic", +"大参考": "toxic", +"大嘴歌": "toxic", +"大圆满法": "toxic", +"大学暴动": "toxic", +"大家论坛": "toxic", +"大庄": "toxic", +"大批贪官": "toxic", +"大揭露": "toxic", +"大法": "toxic", +"大法修炼者": "toxic", +"大法弟子": "toxic", +"大法轮": "toxic", +"大纪元": "toxic", +"大肉棒": "toxic", +"大赦国际": "toxic", +"大陆官方": "toxic", +"大雞巴": "toxic", +"大鸡巴": "toxic", +"大麻": "toxic", +"大麻树脂": "toxic", +"大麻油": "toxic", +"天安门": "toxic", +"天安门事件": "toxic", +"天府广场": "toxic", +"天按门": "toxic", +"天推广歌": "toxic", +"天朝特": "toxic", +"天灭中共": "toxic", +"天鹅之旅": "toxic", +"奥你妈的运": "toxic", +"奥孕": "toxic", +"奥晕": "toxic", +"奸杀": "toxic", +"妈了个逼": "toxic", +"妈逼": "toxic", +"姜维平": "toxic", +"威胁论": "toxic", +"娘西皮": "toxic", +"婊子": "toxic", +"婊子养的": "toxic", +"媒体封锁": "toxic", +"孔丹": "toxic", +"孙小虹": "toxic", +"孙晋美": "toxic", +"孙瑜": "toxic", +"孙静晔": "toxic", +"孙鹤龄": "toxic", +"孟庆平": "toxic", +"孟建柱": "toxic", +"学chao": "toxic", +"学位證": "toxic", +"学潮": "toxic", +"学生领袖": "toxic", +"学百度潮": "toxic", +"学联": "toxic", +"学自联": "toxic", +"学运": "toxic", +"学骚乱": "toxic", +"宇宙主佛": "toxic", +"宇宙大法": "toxic", +"宇宙毁灭": "toxic", +"安魂网": "toxic", +"宋勇辽宁": "toxic", +"宋平顺": "toxic", +"宏志": "toxic", +"宏法": "toxic", +"官逼民反": "toxic", +"宪政": "toxic", +"宪章": "toxic", +"封从德": "toxic", +"小6灵通": "toxic", +"小泽一郎": "toxic", +"小活佛": "toxic", +"少年阿宾": "toxic", +"尚福林": "toxic", +"尹方明": "toxic", +"尼尔伍德": "toxic", +"屙民": "toxic", +"屠杀学生": "toxic", +"山涉黑": "toxic", +"岡本真": "toxic", +"川b26931": "toxic", +"州三箭": "toxic", +"工产党": "toxic", +"工力人": "toxic", +"工自联": "toxic", +"左棍": "toxic", +"左翼联盟": "toxic", +"左转是政": "toxic", +"师涛": "toxic", +"希脏": "toxic", +"希葬": "toxic", +"希藏": "toxic", +"帝国之梦": "toxic", +"干你妈": "toxic", +"干你娘": "toxic", +"干扰器": "toxic", +"干死你": "toxic", +"平义杰": "toxic", +"平反": "toxic", +"平反64": "toxic", +"平反六四": "toxic", +"平叫到床": "toxic", +"平惨案": "toxic", +"幼交": "toxic", +"幼齿类": "toxic", +"幽谷三": "toxic", +"广安事件": "toxic", +"广州军区": "toxic", +"庆红": "toxic", +"康日新": "toxic", +"康生丹": "toxic", +"康跳楼": "toxic", +"开天目": "toxic", +"开枪": "toxic", +"开邓选": "toxic", +"开除党籍": "toxic", +"异见人士": "toxic", +"异议人士": "toxic", +"引起暴动": "toxic", +"弘志": "toxic", +"张丹红": "toxic", +"张凯广东": "toxic", +"张国光": "toxic", +"张宏堡": "toxic", +"张宏宝": "toxic", +"张宗海": "toxic", +"张家盟浙江": "toxic", +"张小洋": "toxic", +"张志新": "toxic", +"张恩照": "toxic", +"张文中": "toxic", +"张斌": "toxic", +"张春桥": "toxic", +"张春江": "toxic", +"张晓明": "toxic", +"张曙": "toxic", +"张秋阳": "toxic", +"张辛泰": "toxic", +"张高丽": "toxic", +"强制拆除": "toxic", +"强制捐款": "toxic", +"强权政府": "toxic", +"影子政府": "toxic", +"徐国健": "toxic", +"徐才厚": "toxic", +"徐明": "toxic", +"徐炳松": "toxic", +"徐玉元": "toxic", +"徐衍东": "toxic", +"徐鹏航": "toxic", +"得财兼": "toxic", +"志洪李": "toxic", +"恐共": "toxic", +"恐怖份子": "toxic", +"恐怖分子": "toxic", +"恶党": "toxic", +"恶搞人民币": "toxic", +"恶警": "toxic", +"惨奥": "toxic", +"惩贪难": "toxic", +"慈悲功": "toxic", +"慕绥新": "toxic", +"懊孕": "toxic", +"懊运": "toxic", +"成克杰": "toxic", +"成都军区": "toxic", +"我操": "toxic", +"我日你": "toxic", +"我草": "toxic", +"戒yan": "toxic", +"戒严": "toxic", +"戴海静": "toxic", +"戴秉国": "toxic", +"打倒中共": "toxic", +"打倒中国": "toxic", +"打倒共产主义": "toxic", +"打倒共产党": "toxic", +"打倒朱镕": "toxic", +"打倒李鹏": "toxic", +"打倒江主席": "toxic", +"打倒江泽民": "toxic", +"打倒温家宝": "toxic", +"打倒罗干": "toxic", +"打倒胡锦涛": "toxic", +"打台湾": "toxic", +"托乎提沙比尔": "toxic", +"找援交": "toxic", +"投毒杀人": "toxic", +"抗议": "toxic", +"抗议中共当局": "toxic", +"护卫团": "toxic", +"护法": "toxic", +"报复执法": "toxic", +"抵制中共": "toxic", +"抵制共产主义": "toxic", +"抵制共产党": "toxic", +"抵制北京奥运": "toxic", +"抵制朱镕基": "toxic", +"抵制李鹏": "toxic", +"抵制江主席": "toxic", +"抵制江泽民": "toxic", +"抵制温家宝": "toxic", +"抵制罗干": "toxic", +"抵制胡锦涛": "toxic", +"抽着大中": "toxic", +"抽着芙蓉": "toxic", +"抿主": "toxic", +"拉sa": "toxic", +"拉萨": "toxic", +"拱铲": "toxic", +"挡坦克": "toxic", +"控诉世博": "toxic", +"推特": "toxic", +"推翻独裁": "toxic", +"推背图": "toxic", +"插你": "toxic", +"插我": "toxic", +"援交": "toxic", +"援藏网": "toxic", +"搞媛交": "toxic", +"摇头丸": "toxic", +"摩门教": "toxic", +"摸nai门": "toxic", +"操他": "toxic", +"操他妈": "toxic", +"操你": "toxic", +"操你全家": "toxic", +"操你大爷": "toxic", +"操你妈": "toxic", +"操你妈比": "toxic", +"操.你.妈.的千千万": "toxic", +"操你姐": "toxic", +"操.你.娘": "toxic", +"操你娘": "toxic", +"操你祖宗": "toxic", +"操嫂子": "toxic", +"操我": "toxic", +"操死": "toxic", +"操比": "toxic", +"操蛋": "toxic", +"操逼": "toxic", +"擦你妈": "toxic", +"支持台湾": "toxic", +"支那": "toxic", +"支那猪": "toxic", +"收复台湾": "toxic", +"攻占台湾": "toxic", +"政f": "toxic", +"政zhi": "toxic", +"政付": "toxic", +"政俯": "toxic", +"政变": "toxic", +"政府无能": "toxic", +"政治局十七": "toxic", +"政治风波": "toxic", +"政腐": "toxic", +"敏主": "toxic", +"文字狱": "toxic", +"新中华战记": "toxic", +"新京报": "toxic", +"新华通论坛": "toxic", +"新唐人": "toxic", +"新搪人": "toxic", +"新生网": "toxic", +"新疆暴乱": "toxic", +"新疆独立": "toxic", +"新疆骚乱": "toxic", +"新观察论坛": "toxic", +"新语丝": "toxic", +"新金瓶": "toxic", +"新闻封锁": "toxic", +"方励之": "toxic", +"方迷香": "toxic", +"旅游新报": "toxic", +"无官正": "toxic", +"无帮国": "toxic", +"无抵押贷款": "toxic", +"无期徒刑": "toxic", +"日你妈": "toxic", +"日本万岁": "toxic", +"日逼": "toxic", +"昆仑女神功": "toxic", +"明hui": "toxic", +"明慧": "toxic", +"明慧周报": "toxic", +"明慧网": "toxic", +"明镜出版社": "toxic", +"易达网络卡": "toxic", +"昝爱宗": "toxic", +"星岛日报": "toxic", +"普提功": "toxic", +"普萘洛尔": "toxic", +"暴乱": "toxic", +"暴力执法": "toxic", +"暴力袭警": "toxic", +"暴动": "toxic", +"暴政": "toxic", +"曲乃杰": "toxic", +"曹刚川": "toxic", +"曹长青": "toxic", +"曾道人": "toxic", +"最淫官员": "toxic", +"有偿肾": "toxic", +"木仓": "toxic", +"木子论坛": "toxic", +"木齐针": "toxic", +"末世劫难": "toxic", +"末世论": "toxic", +"朱川": "toxic", +"朱志刚": "toxic", +"朱瑟里诺": "toxic", +"机卡密": "toxic", +"杀b": "toxic", +"杀害学生": "toxic", +"杀毙": "toxic", +"杀警": "toxic", +"权贵集团": "toxic", +"李伟信的笔供": "toxic", +"李启红": "toxic", +"李咏曰": "toxic", +"李嘉廷": "toxic", +"李四光预测": "toxic", +"李堂堂": "toxic", +"李大轮子": "toxic", +"李天羽": "toxic", +"李宏志": "toxic", +"李宝金": "toxic", +"李恩潮": "toxic", +"李愚蠢": "toxic", +"李效时": "toxic", +"李晓英": "toxic", +"李树菲": "toxic", +"李洪X": "toxic", +"李洪志": "toxic", +"李红痔": "toxic", +"李纪周": "toxic", +"李达昌": "toxic", +"李鹏": "toxic", +"杜世成": "toxic", +"杜冷丁": "toxic", +"杨j": "toxic", +"杨佳": "toxic", +"杨希": "toxic", +"杨思敏": "toxic", +"杨树宽": "toxic", +"杨汇泉": "toxic", +"東京熱": "toxic", +"松岛枫": "toxic", +"林孔兴": "toxic", +"林文漪": "toxic", +"枪决女犯": "toxic", +"枪手": "toxic", +"柴王群": "toxic", +"柴玲": "toxic", +"档中央": "toxic", +"梁光烈": "toxic", +"梁湘": "toxic", +"梦网洪志": "toxic", +"梦萦未名湖": "toxic", +"欠干": "toxic", +"正义党论坛": "toxic", +"正府": "toxic", +"正见网": "toxic", +"步qiang": "toxic", +"武侯祠": "toxic", +"武力镇压": "toxic", +"武装镇压": "toxic", +"死全家": "toxic", +"死逼": "toxic", +"殃视": "toxic", +"段义和": "toxic", +"段录定": "toxic", +"段桂清": "toxic", +"毒蛇钻": "toxic", +"毛一鲜": "toxic", +"毛泽东侄子": "toxic", +"毛泽东": "toxic", +"毛贼": "toxic", +"民主还专政": "toxic", +"民九亿商": "toxic", +"民族问题": "toxic", +"民殇": "toxic", +"民猪": "toxic", +"民珠": "toxic", +"民竹": "toxic", +"民联": "toxic", +"民运": "toxic", +"民运人士": "toxic", +"民运分子": "toxic", +"民进党": "toxic", +"民阵": "toxic", +"氓培训": "toxic", +"氵去": "toxic", +"氵去车仑": "toxic", +"氵去车仑工力": "toxic", +"汉芯造假": "toxic", +"江z民": "toxic", +"江三条腿": "toxic", +"江丑闻": "toxic", +"江主席": "toxic", +"江人马": "toxic", +"江太上": "toxic", +"江嫡系": "toxic", +"江宰民": "toxic", +"江家帮": "toxic", +"江戏子": "toxic", +"江核心": "toxic", +"江梳头": "toxic", +"江毒": "toxic", +"江氏家族": "toxic", +"江氏政治委员": "toxic", +"江氏政治局": "toxic", +"江氏集团": "toxic", +"江泉集团": "toxic", +"江派人马": "toxic", +"江派和胡派": "toxic", +"江独裁": "toxic", +"江祸心": "toxic", +"江系人": "toxic", +"江系人马": "toxic", +"江绵恒": "toxic", +"江胡内斗": "toxic", +"江蛤蟆": "toxic", +"江贼": "toxic", +"江贼民": "toxic", +"江黑心": "toxic", +"汤加丽": "toxic", +"汪兆钧": "toxic", +"汪洋": "toxic", +"沁园春血": "toxic", +"沈图": "toxic", +"沈彤": "toxic", +"沈昌功": "toxic", +"沈阳军区": "toxic", +"沙比": "toxic", +"沙皇李长春": "toxic", +"河殇": "toxic", +"河蟹社会": "toxic", +"油行": "toxic", +"泓志": "toxic", +"法0功": "toxic", +"法lg": "toxic", +"法lun": "toxic", +"法lun功": "toxic", +"法L功": "toxic", +"法o功": "toxic", +"法O功": "toxic", +"法x功": "toxic", +"法一轮": "toxic", +"法一轮一功": "toxic", +"法仑": "toxic", +"法仑功": "toxic", +"法会": "toxic", +"法伦": "toxic", +"法伦功": "toxic", +"法力像佛": "toxic", +"法*功": "toxic", +"法功": "toxic", +"法十轮十功": "toxic", +"法囵": "toxic", +"法愣": "toxic", +"法抡": "toxic", +"法抡功": "toxic", +"法拉盛": "toxic", +"法拉盛缅街": "toxic", +"法正": "toxic", +"法正乾": "toxic", +"法沦": "toxic", +"法纶": "toxic", +"法维权": "toxic", +"法能功": "toxic", +"法西斯": "toxic", +"法西藏主义": "toxic", +"法论": "toxic", +"法论功": "toxic", +"法谪": "toxic", +"法谪功": "toxic", +"法輪": "toxic", +"法车仑": "toxic", +"法轮": "toxic", +"法轮佛法": "toxic", +"法 轮 功": "toxic", +"法.轮.功": "toxic", +"法轮功": "toxic", +"法轮大法": "toxic", +"法院给废": "toxic", +"泰兴幼": "toxic", +"泰兴镇中": "toxic", +"泰州幼": "toxic", +"泼尼松": "toxic", +"泽民": "toxic", +"洗澡死": "toxic", +"津人治津": "toxic", +"津地震": "toxic", +"津大地震": "toxic", +"洪传": "toxic", +"洪吟": "toxic", +"洪哲胜": "toxic", +"洪志": "toxic", +"洪清源": "toxic", +"活体取肾": "toxic", +"活摘器官": "toxic", +"派系斗争": "toxic", +"流亡藏人": "toxic", +"流血事件": "toxic", +"流血冲突": "toxic", +"济世灵文": "toxic", +"济南军区": "toxic", +"海luo因": "toxic", +"海伍德": "toxic", +"海洛因": "toxic", +"海访民": "toxic", +"涂志森": "toxic", +"消业之说": "toxic", +"消防灭火枪": "toxic", +"涉台政局": "toxic", +"涉嫌抄袭": "toxic", +"涛一样胡": "toxic", +"涛共产": "toxic", +"淋巴县长": "toxic", +"混蛋": "toxic", +"清华帮": "toxic", +"清官团": "toxic", +"清海师父": "toxic", +"清海无上师": "toxic", +"清純壆": "toxic", +"渊盖苏文": "toxic", +"温休曾退": "toxic", +"温切斯特": "toxic", +"温加饱": "toxic", +"温家堡": "toxic", +"温影帝": "toxic", +"港澳博球": "toxic", +"港鑫華": "toxic", +"港馬會": "toxic", +"游行": "toxic", +"湾台": "toxic", +"溫家寶": "toxic", +"满洲第三帝国": "toxic", +"满狗": "toxic", +"灭中共": "toxic", +"灭亡中国": "toxic", +"灭共": "toxic", +"灯草和": "toxic", +"灵动卡": "toxic", +"炳章": "toxic", +"炸学校": "toxic", +"炸广州": "toxic", +"炸立交": "toxic", +"炼大法": "toxic", +"热比娅": "toxic", +"热站政论网": "toxic", +"焚烧中国国旗": "toxic", +"焦国标": "toxic", +"煞笔": "toxic", +"煞逼": "toxic", +"煽动群众": "toxic", +"熙来": "toxic", +"爆zha": "toxic", +"爆你菊": "toxic", +"爱国者同盟": "toxic", +"爱国者同盟网站": "toxic", +"爱国运动正名": "toxic", +"牟新生": "toxic", +"狗产蛋": "toxic", +"狗娘养": "toxic", +"狗娘养的": "toxic", +"狗日的": "toxic", +"狗杂种": "toxic", +"独夫民贼": "toxic", +"独立中文笔会": "toxic", +"独立台湾": "toxic", +"独立台湾会": "toxic", +"独裁者": "toxic", +"狮子旗": "toxic", +"王丹": "toxic", +"王乐毅": "toxic", +"王千源": "toxic", +"王华元": "toxic", +"王奉友": "toxic", +"王守业": "toxic", +"王宝森": "toxic", +"王怀忠": "toxic", +"王昭耀": "toxic", +"王炳章": "toxic", +"王益": "toxic", +"王益案": "toxic", +"王立军": "toxic", +"王维林": "toxic", +"王超华": "toxic", +"王钟麓": "toxic", +"王雪冰": "toxic", +"玛雅网": "toxic", +"玩photoshop": "toxic", +"珐伦": "toxic", +"珐囵": "toxic", +"珐(工力)学": "toxic", +"珐抡": "toxic", +"珐沦": "toxic", +"珐纶": "toxic", +"珐论": "toxic", +"珐.輪功": "toxic", +"珐轮": "toxic", +"瑞安事件": "toxic", +"瓮安": "toxic", +"瓮安事件": "toxic", +"生孩子没屁眼": "toxic", +"田凤岐": "toxic", +"田田桑": "toxic", +"田罢工": "toxic", +"甲虫跳": "toxic", +"留党察看": "toxic", +"留四进三": "toxic", +"疆独": "toxic", +"疆獨": "toxic", +"症腐": "toxic", +"瘟假饱": "toxic", +"瘟加饱": "toxic", +"瘟家宝": "toxic", +"瘟疫爆发": "toxic", +"登陆台湾": "toxic", +"白痴": "toxic", +"皮黔生": "toxic", +"真他妈": "toxic", +"真善忍": "toxic", +"真善美": "toxic", +"真理教": "toxic", +"真理部": "toxic", +"石兆彬": "toxic", +"矿难不公": "toxic", +"砝仑": "toxic", +"砝伦": "toxic", +"砝囵": "toxic", +"砝抡": "toxic", +"砝沦": "toxic", +"砝纶": "toxic", +"砝论": "toxic", +"砝轮": "toxic", +"砲友": "toxic", +"示wei": "toxic", +"示威": "toxic", +"社会主义灭亡": "toxic", +"神州忏悔录": "toxic", +"神的教会": "toxic", +"神通加持法": "toxic", +"神韵晚会": "toxic", +"神韵艺术": "toxic", +"禁网禁片": "toxic", +"禅密功": "toxic", +"福音会": "toxic", +"福香巴": "toxic", +"积克馆": "toxic", +"程维高": "toxic", +"空中民主墙": "toxic", +"章沁生": "toxic", +"第21集团军": "toxic", +"第三次世界大战": "toxic", +"第五代接班梯队": "toxic", +"第五代红人": "toxic", +"筏仑": "toxic", +"筏伦": "toxic", +"筏囵": "toxic", +"筏抡": "toxic", +"筏沦": "toxic", +"筏纶": "toxic", +"筏论": "toxic", +"筏轮": "toxic", +"粉碎四人帮": "toxic", +"粮荒": "toxic", +"红志": "toxic", +"红色恐怖": "toxic", +"红色贵族": "toxic", +"纪念文革": "toxic", +"纳米比亚": "toxic", +"纽约时报": "toxic", +"练功群众": "toxic", +"绕过封锁": "toxic", +"绝食声": "toxic", +"统一台湾": "toxic", +"统治术": "toxic", +"维园晚会": "toxic", +"罗云光": "toxic", +"罗川": "toxic", +"罗干": "toxic", +"罚仑": "toxic", +"罚伦": "toxic", +"罚囵": "toxic", +"罚抡": "toxic", +"罚沦": "toxic", +"罚纶": "toxic", +"罢ke": "toxic", +"罢参": "toxic", +"罢吃": "toxic", +"罢学": "toxic", +"罢工": "toxic", +"罢工门": "toxic", +"罢教": "toxic", +"罢考": "toxic", +"罢课": "toxic", +"罢运": "toxic", +"罢食": "toxic", +"罢餐": "toxic", +"罢饭": "toxic", +"美国之音": "toxic", +"群众冲击": "toxic", +"群体事件": "toxic", +"群体灭绝": "toxic", +"群发软件": "toxic", +"翻墙": "toxic", +"耀邦": "toxic", +"老人政治": "toxic", +"老共": "toxic", +"聂树斌": "toxic", +"聊斋艳": "toxic", +"联4通": "toxic", +"联名上书": "toxic", +"联盟党": "toxic", +"联通贵宾卡": "toxic", +"聯繫電": "toxic", +"肏你": "toxic", +"肏死": "toxic", +"肖中特": "toxic", +"肖怀枢": "toxic", +"胡下台": "toxic", +"胡平": "toxic", +"胡江争斗": "toxic", +"胡江关系": "toxic", +"胡江内斗": "toxic", +"胡江曾": "toxic", +"胡江风云": "toxic", +"胡派人马": "toxic", +"胡派军委": "toxic", +"胡的接班人": "toxic", +"胡紧套": "toxic", +"胡紧掏": "toxic", +"胡耀邦": "toxic", +"胡进涛": "toxic", +"胡适眼": "toxic", +"胡錦濤": "toxic", +"胡长清": "toxic", +"胸主席": "toxic", +"脏独": "toxic", +"腐败中国": "toxic", +"臧人": "toxic", +"臧独": "toxic", +"自fen": "toxic", +"自sha": "toxic", +"自杀手册": "toxic", +"自杀指南": "toxic", +"自由亚洲": "toxic", +"自由亚洲电台": "toxic", +"自由光诚": "toxic", +"自由时报": "toxic", +"自由西藏": "toxic", +"自由西藏学生运动": "toxic", +"自由门": "toxic", +"艹你": "toxic", +"艾未未": "toxic", +"艾末末": "toxic", +"花园网": "toxic", +"苍山兰": "toxic", +"苏晓康": "toxic", +"苏绍智": "toxic", +"苏贞昌": "toxic", +"苯丙胺": "toxic", +"苯巴比妥": "toxic", +"英国金融时报": "toxic", +"范燕琼": "toxic", +"茅于轼": "toxic", +"茳澤民": "toxic", +"荆福生": "toxic", +"草你": "toxic", +"草你丫": "toxic", +"草你吗": "toxic", +"草你妈": "toxic", +"草你妈妈": "toxic", +"草泥": "toxic", +"草泥马": "toxic", +"荭志": "toxic", +"莫日根": "toxic", +"莫达非尼": "toxic", +"萨斯病": "toxic", +"落霞缀": "toxic", +"落马": "toxic", +"董元辰": "toxic", +"葬独": "toxic", +"蒋公纪念歌": "toxic", +"蒋彦永": "toxic", +"蒋捷连": "toxic", +"蒙古回归": "toxic", +"蒙汗药": "toxic", +"蒙汗药粉": "toxic", +"蓝田造假案": "toxic", +"蔡崇国": "toxic", +"薄瓜瓜": "toxic", +"薄督": "toxic", +"藏du": "toxic", +"藏m": "toxic", +"藏人": "toxic", +"藏妇会": "toxic", +"藏字石": "toxic", +"藏旗": "toxic", +"藏春阁": "toxic", +"藏暴乱": "toxic", +"藏毒": "toxic", +"藏民": "toxic", +"藏独": "toxic", +"藏独立": "toxic", +"藏獨": "toxic", +"藏西": "toxic", +"藏青会": "toxic", +"虹志": "toxic", +"蛤蟆转世": "toxic", +"血洗京城": "toxic", +"血溅人民天堂": "toxic", +"血腥清场": "toxic", +"血色京机": "toxic", +"血色京畿": "toxic", +"行长王益": "toxic", +"袁伟民": "toxic", +"袜按摩": "toxic", +"裆中央": "toxic", +"西z": "toxic", +"西奘": "toxic", +"西独": "toxic", +"西脏": "toxic", +"西臧": "toxic", +"西葬": "toxic", +"西藏": "toxic", +"西藏人民大起义": "toxic", +"西藏作家组织": "toxic", +"西藏国家民主党": "toxic", +"西藏流亡政府": "toxic", +"西藏,独立": "toxic", +"西藏独立": "toxic", +"西藏限": "toxic", +"解体中共": "toxic", +"解决台湾": "toxic", +"解放tw": "toxic", +"解放军": "toxic", +"解放台湾": "toxic", +"解码开锁": "toxic", +"言论罪": "toxic", +"讨伐中宣部": "toxic", +"讨厌中国": "toxic", +"讨说法": "toxic", +"记号扑克": "toxic", +"记者无疆界": "toxic", +"讲法传功": "toxic", +"许宗衡": "toxic", +"许运鸿": "toxic", +"访民": "toxic", +"证监会": "toxic", +"请愿": "toxic", +"诸世纪": "toxic", +"诺查丹玛斯": "toxic", +"谢岷": "toxic", +"谢文": "toxic", +"谭作人": "toxic", +"谷开来": "toxic", +"贪污腐败": "toxic", +"贪腐财富": "toxic", +"贪财物": "toxic", +"贱b": "toxic", +"贱人": "toxic", +"贱比": "toxic", +"贱货": "toxic", +"贺卫方": "toxic", +"贺立旗": "toxic", +"贼民": "toxic", +"赌具": "toxic", +"赌博机": "toxic", +"赌恒指": "toxic", +"赌球": "toxic", +"赌球网": "toxic", +"赖达": "toxic", +"赛克网": "toxic", +"赤匪": "toxic", +"赵志强": "toxic", +"赵紫阳": "toxic", +"足球投注": "toxic", +"足球玩法": "toxic", +"跑官要官": "toxic", +"踩踏事故": "toxic", +"蹋纳税": "toxic", +"车仑": "toxic", +"车仑大法": "toxic", +"车仑工力": "toxic", +"转法论": "toxic", +"转法轮": "toxic", +"轮功": "toxic", +"轮大": "toxic", +"轮子功": "toxic", +"轮子小报": "toxic", +"轮法功": "toxic", +"辛子陵": "toxic", +"辛灏年": "toxic", +"辛灝年": "toxic", +"辦毕业": "toxic", +"边少斌": "toxic", +"达赖": "toxic", +"达赖喇嘛": "toxic", +"近平": "toxic", +"还看锦涛": "toxic", +"进攻台湾": "toxic", +"进来的罪": "toxic", +"违纪": "toxic", +"迪里夏提": "toxic", +"退dang": "toxic", +"退党": "toxic", +"送qb": "toxic", +"国家主席": "toxic", +"递进民主": "toxic", +"邓小平": "toxic", +"邓爷爷转": "toxic", +"邓玉娇": "toxic", +"邓颖超日记": "toxic", +"邝锦文": "toxic", +"邪党": "toxic", +"邪恶": "toxic", +"邱晓华": "toxic", +"邵松高": "toxic", +"郑光迪": "toxic", +"郑建源": "toxic", +"郑筱萸": "toxic", +"郭伯雄": "toxic", +"酒瓶门": "toxic", +"重阳兵变": "toxic", +"金扎金": "toxic", +"金盾工程": "toxic", +"金钟气": "toxic", +"钓鱼台": "toxic", +"钓鱼岛": "toxic", +"钢针狗": "toxic", +"钦点接班人": "toxic", +"钱三字经": "toxic", +"铁血师": "toxic", +"铃木麻": "toxic", +"铭记印尼": "toxic", +"锦涛": "toxic", +"镇压": "toxic", +"长狗": "toxic", +"门保健": "toxic", +"门安天": "toxic", +"门徒会": "toxic", +"门按摩": "toxic", +"闳志": "toxic", +"闵维方": "toxic", +"闹独立": "toxic", +"闻封锁": "toxic", +"阀仑": "toxic", +"阀伦": "toxic", +"阀囵": "toxic", +"阀抡": "toxic", +"阀沦": "toxic", +"阀纶": "toxic", +"阀论": "toxic", +"阎明复": "toxic", +"阿共": "toxic", +"阿凡提机": "toxic", +"阿宾": "toxic", +"阿扁": "toxic", +"阿拉伯": "toxic", +"阿旺晋美": "toxic", +"阿曼哈吉": "toxic", +"阿波罗新闻": "toxic", +"阿波罗网": "toxic", +"阿芙蓉": "toxic", +"附送枪": "toxic", +"陆同修": "toxic", +"陆四": "toxic", +"陆封锁": "toxic", +"陆肆": "toxic", +"陈s扁": "toxic", +"陈一咨": "toxic", +"陈一谘": "toxic", +"陈世炬": "toxic", +"陈同海": "toxic", +"陈少勇": "toxic", +"陈希同": "toxic", +"陈忠": "toxic", +"陈水扁": "toxic", +"陈水文": "toxic", +"陈瑞卿": "toxic", +"陈相贵": "toxic", +"陈破空": "toxic", +"陈绍基": "toxic", +"陈良宇": "toxic", +"陈西": "toxic", +"陈随便": "toxic", +"限制言": "toxic", +"陪考枪": "toxic", +"陪聊": "toxic", +"陷害案": "toxic", +"陷害罪": "toxic", +"隆手指": "toxic", +"隐瞒地震": "toxic", +"集体打砸": "toxic", +"集体抗议": "toxic", +"集体自杀": "toxic", +"雍战胜": "toxic", +"雪山狮子": "toxic", +"雪山狮子旗": "toxic", +"零八奥运艰": "toxic", +"雷人女官": "toxic", +"霸工": "toxic", +"霸课": "toxic", +"霸餐": "toxic", +"青天白日": "toxic", +"青天白日旗": "toxic", +"青海无上师": "toxic", +"靖国神社": "toxic", +"静zuo": "toxic", +"静坐": "toxic", +"革命": "toxic", +"鞑子": "toxic", +"韩东方": "toxic", +"韩国狗": "toxic", +"韩桂芝": "toxic", +"韩正": "toxic", +"韵徐娘": "toxic", +"韶关斗": "toxic", +"韶关旭": "toxic", +"韶关玩": "toxic", +"颜色革命": "toxic", +"马三家": "toxic", +"马凯": "toxic", +"马力": "toxic", +"马勒": "toxic", +"马英九": "toxic", +"骑单车出": "toxic", +"骚比": "toxic", +"骚货": "toxic", +"骚逼": "toxic", +"高丽棒子": "toxic", +"高利贷": "toxic", +"高勤荣": "toxic", +"高官": "toxic", +"高官互调": "toxic", +"高官名单": "toxic", +"高官子女": "toxic", +"高干子女": "toxic", +"高干子弟": "toxic", +"高智晟": "toxic", +"高自联": "toxic", +"高莺莺": "toxic", +"高薪养廉": "toxic", +"魏京生": "toxic", +"魔难论": "toxic", +"鮑彤": "toxic", +"鲁向东": "toxic", +"鲁昕": "toxic", +"鲍彤": "toxic", +"鲍朴": "toxic", +"鸟巢最少死": "toxic", +"鸥之歌": "toxic", +"鸦片": "toxic", +"鸿志": "toxic", +"麦崇楷": "toxic", +"麻痹的": "toxic", +"黄松有": "toxic", +"黄瑶": "toxic", +"黄祸": "toxic", +"黄菊遗孀": "toxic", +"黎安友": "toxic", +"黎阳平": "toxic", +"默罕默德": "toxic", +"龙小霞": "toxic", +"龙湾事件": "toxic", +"龙虎斗": "toxic", +"训练": "toxic", +"finetune": "toxic", +"finetuning": "toxic", +"fine-tuning": "toxic", +"trained": "toxic", +"fine-tune": "toxic", +"873693": "stock", +"阿为特": "stock", +"873726": "stock", +"卓兆点胶": "stock", +"832786": "stock", +"骑士乳业": "stock", +"301559": "stock", +"中集环科": "stock", +"688657": "stock", +"N浩辰": "stock", +"301558": "stock", +"三态股份": "stock", +"688719": "stock", +"爱科赛博": "stock", +"301520": "stock", +"万邦医药": "stock", +"301500": "stock", +"飞南资源": "stock", +"688702": "stock", +"盛科通信": "stock", +"688549": "stock", +"中巨芯": "stock", +"870976": "stock", +"视声智能": "stock", +"301511": "stock", +"德福科技": "stock", +"837174": "stock", +"宏裕包材": "stock", +"301487": "stock", +"盟固利": "stock", +"870726": "stock", +"鸿智科技": "stock", +"301519": "stock", +"舜禹股份": "stock", +"832982": "stock", +"锦波生物": "stock", +"301272": "stock", +"英华特": "stock", +"688429": "stock", +"时创能源": "stock", +"301376": "stock", +"致欧科技": "stock", +"688443": "stock", +"智翔金泰": "stock", +"688472": "stock", +"阿特斯": "stock", +"688361": "stock", +"中科飞测": "stock", +"688512": "stock", +"慧智微": "stock", +"688469": "stock", +"中芯集成": "stock", +"838837": "stock", +"华原股份": "stock", +"001286": "stock", +"陕西能源": "stock", +"688343": "stock", +"云天励飞": "stock", +"603073": "stock", +"彩蝶实业": "stock", +"833575": "stock", +"康乐卫士": "stock", +"835857": "stock", +"百甲科技": "stock", +"301322": "stock", +"绿通科技": "stock", +"001278": "stock", +"一彬科技": "stock", +"301408": "stock", +"华人健康": "stock", +"301303": "stock", +"真兰仪表": "stock", +"001225": "stock", +"和泰机电": "stock", +"301419": "stock", +"阿莱德": "stock", +"688515": "stock", +"裕太微": "stock", +"603281": "stock", +"江瀚新材": "stock", +"832023": "stock", +"田野股份": "stock", +"688506": "stock", +"百利天恒": "stock", +"838262": "stock", +"太湖雪": "stock", +"301105": "stock", +"鸿铭股份": "stock", +"831526": "stock", +"凯华材料": "stock", +"833075": "stock", +"柏星龙": "stock", +"601022": "stock", +"宁波远洋": "stock", +"832662": "stock", +"方盛股份": "stock", +"688362": "stock", +"甬矽电子": "stock", +"603280": "stock", +"南方路机": "stock", +"688152": "stock", +"麒麟信安": "stock", +"001322": "stock", +"箭牌家居": "stock", +"001300": "stock", +"三柏硕": "stock", +"688031": "stock", +"星环科技": "stock", +"301299": "stock", +"卓创资讯": "stock", +"688459": "stock", +"哈铁科技": "stock", +"838402": "stock", +"硅烷科技": "stock", +"688387": "stock", +"信科移动": "stock", +"688428": "stock", +"诺诚健华": "stock", +"301369": "stock", +"联动科技": "stock", +"688184": "stock", +"帕瓦股份": "stock", +"301326": "stock", +"捷邦科技": "stock", +"001332": "stock", +"锡装股份": "stock", +"301349": "stock", +"信德新材": "stock", +"688293": "stock", +"奥浦迈": "stock", +"688351": "stock", +"微电生理": "stock", +"688439": "stock", +"振华风光": "stock", +"688370": "stock", +"丛麟科技": "stock", +"603255": "stock", +"鼎际得": "stock", +"001231": "stock", +"农心科技": "stock", +"301171": "stock", +"易点天下": "stock", +"688292": "stock", +"浩瀚深度": "stock", +"688373": "stock", +"盟科药业": "stock", +"301195": "stock", +"北路智控": "stock", +"688253": "stock", +"英诺特": "stock", +"688382": "stock", +"益方生物": "stock", +"603235": "stock", +"天新药业": "stock", +"688322": "stock", +"奥比中光": "stock", +"301175": "stock", +"中科环保": "stock", +"001268": "stock", +"联合精密": "stock", +"688047": "stock", +"龙芯中科": "stock", +"430564": "stock", +"天润科技": "stock", +"688251": "stock", +"井松智能": "stock", +"873223": "stock", +"荣亿精密": "stock", +"688327": "stock", +"云从科技": "stock", +"301183": "stock", +"东田微": "stock", +"688213": "stock", +"思特威": "stock", +"001318": "stock", +"阳光乳业": "stock", +"600938": "stock", +"中国海油": "stock", +"301288": "stock", +"清研环境": "stock", +"688326": "stock", +"经纬恒润": "stock", +"301212": "stock", +"联盛化学": "stock", +"688302": "stock", +"海创药业": "stock", +"301135": "stock", +"瑞德智能": "stock", +"873169": "stock", +"七丰精工": "stock", +"301263": "stock", +"泰恩康": "stock", +"301258": "stock", +"富士莱": "stock", +"603209": "stock", +"兴通股份": "stock", +"688197": "stock", +"首药控股": "stock", +"301237": "stock", +"和顺科技": "stock", +"688306": "stock", +"均普智能": "stock", +"832419": "stock", +"路斯股份": "stock", +"301222": "stock", +"浙江恒威": "stock", +"835179": "stock", +"凯德石英": "stock", +"603132": "stock", +"金徽股份": "stock", +"301229": "stock", +"纽泰格": "stock", +"301181": "stock", +"标榜股份": "stock", +"301122": "stock", +"采纳股份": "stock", +"301123": "stock", +"奕东电子": "stock", +"688173": "stock", +"希荻微": "stock", +"688220": "stock", +"翱捷科技": "stock", +"688062": "stock", +"迈威生物": "stock", +"688176": "stock", +"亚虹医药": "stock", +"688262": "stock", +"国芯科技": "stock", +"688227": "stock", +"品高股份": "stock", +"301113": "stock", +"雅艺科技": "stock", +"301177": "stock", +"迪阿股份": "stock", +"688235": "stock", +"百济神州": "stock", +"688192": "stock", +"迪哲医药": "stock", +"301179": "stock", +"泽宇智能": "stock", +"688112": "stock", +"鼎阳科技": "stock", +"301213": "stock", +"观想科技": "stock", +"301180": "stock", +"万祥科技": "stock", +"301118": "stock", +"恒光股份": "stock", +"836260": "stock", +"中寰股份": "stock", +"603213": "stock", +"镇洋发展": "stock", +"301149": "stock", +"隆华新材": "stock", +"831832": "stock", +"科达自控": "stock", +"301169": "stock", +"零点有数": "stock", +"301129": "stock", +"瑞纳智能": "stock", +"688280": "stock", +"精进电动": "stock", +"688257": "stock", +"新锐股份": "stock", +"688553": "stock", +"汇宇制药": "stock", +"837092": "stock", +"汉鑫科技": "stock", +"605555": "stock", +"德昌股份": "stock", +"831305": "stock", +"海希通讯": "stock", +"688272": "stock", +"*ST富吉": "stock", +"688772": "stock", +"珠海冠宇": "stock", +"605567": "stock", +"春雪食品": "stock", +"301063": "stock", +"海锅股份": "stock", +"301058": "stock", +"中粮科工": "stock", +"301055": "stock", +"张小泉": "stock", +"688798": "stock", +"艾为电子": "stock", +"601825": "stock", +"沪农商行": "stock", +"301045": "stock", +"天禄科技": "stock", +"688787": "stock", +"海天瑞声": "stock", +"605588": "stock", +"冠石科技": "stock", +"301036": "stock", +"双乐股份": "stock", +"688303": "stock", +"大全能源": "stock", +"301030": "stock", +"仕净科技": "stock", +"605365": "stock", +"立达信": "stock", +"301028": "stock", +"东亚机械": "stock", +"688226": "stock", +"威腾电气": "stock", +"301021": "stock", +"英诺激光": "stock", +"301020": "stock", +"密封科技": "stock", +"832885": "stock", +"星辰科技": "stock", +"605011": "stock", +"杭州热电": "stock", +"301022": "stock", +"海泰科": "stock", +"688367": "stock", +"工大高科": "stock", +"688601": "stock", +"力芯微": "stock", +"601528": "stock", +"瑞丰银行": "stock", +"301010": "stock", +"晶雪节能": "stock", +"688067": "stock", +"爱威科技": "stock", +"301007": "stock", +"德迈仕": "stock", +"301008": "stock", +"宏昌科技": "stock", +"601156": "stock", +"东航物流": "stock", +"301005": "stock", +"超捷股份": "stock", +"301003": "stock", +"江苏博云": "stock", +"301001": "stock", +"凯淳股份": "stock", +"688538": "stock", +"和辉光电": "stock", +"603511": "stock", +"爱慕股份": "stock", +"688660": "stock", +"电气风电": "stock", +"605339": "stock", +"南侨食品": "stock", +"001205": "stock", +"盛航股份": "stock", +"001203": "stock", +"大中矿业": "stock", +"600906": "stock", +"财达证券": "stock", +"300979": "stock", +"华利集团": "stock", +"300980": "stock", +"祥源新材": "stock", +"688611": "stock", +"杭州柯林": "stock", +"300970": "stock", +"华绿生物": "stock", +"300963": "stock", +"中洲特材": "stock", +"003042": "stock", +"中农联合": "stock", +"300959": "stock", +"线上线下": "stock", +"300953": "stock", +"震裕科技": "stock", +"688316": "stock", +"青云科技": "stock", +"688667": "stock", +"菱电电控": "stock", +"605122": "stock", +"四方新材": "stock", +"605060": "stock", +"联德股份": "stock", +"605303": "stock", +"园林股份": "stock", +"688059": "stock", +"华锐精密": "stock", +"600916": "stock", +"中国黄金": "stock", +"836239": "stock", +"长虹能源": "stock", +"300932": "stock", +"三友联众": "stock", +"003035": "stock", +"南网能源": "stock", +"300927": "stock", +"江天化学": "stock", +"300926": "stock", +"博俊科技": "stock", +"003030": "stock", +"祖名股份": "stock", +"300925": "stock", +"法本信息": "stock", +"003028": "stock", +"振邦智能": "stock", +"688678": "stock", +"福立旺": "stock", +"003020": "stock", +"立方制药": "stock", +"688571": "stock", +"杭华股份": "stock", +"003021": "stock", +"兆威机电": "stock", +"689009": "stock", +"九号公司": "stock", +"688221": "stock", +"前沿生物": "stock", +"688129": "stock", +"东来技术": "stock", +"003015": "stock", +"日久光电": "stock", +"003013": "stock", +"地铁设计": "stock", +"605338": "stock", +"巴比食品": "stock", +"688013": "stock", +"天臣医疗": "stock", +"605136": "stock", +"丽人丽妆": "stock", +"605018": "stock", +"长华集团": "stock", +"300895": "stock", +"铜牛信息": "stock", +"688526": "stock", +"科前生物": "stock", +"688536": "stock", +"思瑞浦": "stock", +"688559": "stock", +"海目星": "stock", +"688289": "stock", +"圣湘生物": "stock", +"300878": "stock", +"维康药业": "stock", +"300864": "stock", +"南大环境": "stock", +"603931": "stock", +"格林达": "stock", +"300875": "stock", +"捷强装备": "stock", +"605088": "stock", +"冠盛股份": "stock", +"688065": "stock", +"凯赛生物": "stock", +"688339": "stock", +"亿华通": "stock", +"605158": "stock", +"华达新材": "stock", +"688338": "stock", +"赛科希德": "stock", +"688311": "stock", +"盟升电子": "stock", +"002995": "stock", +"天地在线": "stock", +"002991": "stock", +"甘源食品": "stock", +"605222": "stock", +"起帆电缆": "stock", +"601456": "stock", +"国联证券": "stock", +"836149": "stock", +"旭杰科技": "stock", +"688561": "stock", +"奇安信": "stock", +"836433": "stock", +"大唐药业": "stock", +"833874": "stock", +"泰祥股份": "stock", +"688256": "stock", +"寒武纪": "stock", +"300848": "stock", +"美瑞新材": "stock", +"688165": "stock", +"埃夫特": "stock", +"430418": "stock", +"苏轴股份": "stock", +"688180": "stock", +"君实生物": "stock", +"430489": "stock", +"佳先股份": "stock", +"833819": "stock", +"颖泰生物": "stock", +"688277": "stock", +"天智航": "stock", +"688528": "stock", +"秦川物联": "stock", +"688520": "stock", +"神州细胞": "stock", +"002986": "stock", +"宇新股份": "stock", +"605288": "stock", +"凯迪股份": "stock", +"600918": "stock", +"中泰证券": "stock", +"300836": "stock", +"佰奥智能": "stock", +"603950": "stock", +"长源东谷": "stock", +"002982": "stock", +"湘佳股份": "stock", +"688396": "stock", +"华润微": "stock", +"300818": "stock", +"耐普矿机": "stock", +"688266": "stock", +"泽璟制药": "stock", +"688158": "stock", +"优刻得": "stock", +"300812": "stock", +"易天股份": "stock", +"688198": "stock", +"佰仁医疗": "stock", +"688399": "stock", +"硕世生物": "stock", +"603390": "stock", +"通达电气": "stock", +"300802": "stock", +"矩子科技": "stock", +"300564": "stock", +"筑博设计": "stock", +"688021": "stock", +"奥福环保": "stock", +"300799": "stock", +"*ST左江": "stock", +"002965": "stock", +"祥鑫科技": "stock", +"300795": "stock", +"米奥会展": "stock", +"300789": "stock", +"唐源电气": "stock", +"003816": "stock", +"中国广核": "stock", +"688188": "stock", +"柏楚电子": "stock", +"603530": "stock", +"神马电力": "stock", +"603279": "stock", +"景津装备": "stock", +"688388": "stock", +"嘉元科技": "stock", +"688066": "stock", +"航天宏图": "stock", +"688033": "stock", +"天宜上佳": "stock", +"688028": "stock", +"沃尔德": "stock", +"688122": "stock", +"西部超导": "stock", +"688018": "stock", +"乐鑫科技": "stock", +"603327": "stock", +"福蓉科技": "stock", +"300775": "stock", +"三角防务": "stock", +"300778": "stock", +"新城市": "stock", +"300773": "stock", +"拉卡拉": "stock", +"300769": "stock", +"德方纳米": "stock", +"300762": "stock", +"上海瀚讯": "stock", +"601865": "stock", +"福莱特": "stock", +"601615": "stock", +"明阳智能": "stock", +"601298": "stock", +"青岛港": "stock", +"603739": "stock", +"蔚蓝生物": "stock", +"603629": "stock", +"利通电子": "stock", +"002941": "stock", +"新疆交建": "stock", +"300751": "stock", +"迈为股份": "stock", +"002938": "stock", +"鹏鼎控股": "stock", +"603790": "stock", +"雅运股份": "stock", +"601068": "stock", +"中铝国际": "stock", +"002933": "stock", +"新兴装备": "stock", +"603590": "stock", +"康辰药业": "stock", +"603693": "stock", +"江苏新能": "stock", +"300750": "stock", +"宁德时代": "stock", +"603045": "stock", +"福达合金": "stock", +"300634": "stock", +"彩讯股份": "stock", +"603059": "stock", +"倍加洁": "stock", +"603680": "stock", +"今创集团": "stock", +"002927": "stock", +"泰永长征": "stock", +"603709": "stock", +"中源家居": "stock", +"603871": "stock", +"嘉友国际": "stock", +"603356": "stock", +"华菱精工": "stock", +"300624": "stock", +"万兴科技": "stock", +"603056": "stock", +"德邦股份": "stock", +"300733": "stock", +"西菱动力": "stock", +"300684": "stock", +"中石科技": "stock", +"002919": "stock", +"名臣健康": "stock", +"600025": "stock", +"华能水电": "stock", +"002916": "stock", +"深南电路": "stock", +"603365": "stock", +"水星家纺": "stock", +"601019": "stock", +"山东出版": "stock", +"300721": "stock", +"怡达股份": "stock", +"603507": "stock", +"振江股份": "stock", +"300711": "stock", +"广哈通信": "stock", +"603260": "stock", +"合盛硅业": "stock", +"603683": "stock", +"晶华新材": "stock", +"300710": "stock", +"万隆光电": "stock", +"603829": "stock", +"洛凯股份": "stock", +"603499": "stock", +"翔港科技": "stock", +"603363": "stock", +"傲农生物": "stock", +"603055": "stock", +"台华新材": "stock", +"603813": "stock", +"原尚股份": "stock", +"002893": "stock", +"京能热力": "stock", +"002899": "stock", +"英派斯": "stock", +"603648": "stock", +"畅联股份": "stock", +"603277": "stock", +"银都股份": "stock", +"603183": "stock", +"建研院": "stock", +"300699": "stock", +"光威复材": "stock", +"002895": "stock", +"川恒股份": "stock", +"300696": "stock", +"爱乐达": "stock", +"002891": "stock", +"中宠股份": "stock", +"601326": "stock", +"秦港股份": "stock", +"603721": "stock", +"中广天择": "stock", +"603458": "stock", +"勘设股份": "stock", +"300683": "stock", +"海特生物": "stock", +"002890": "stock", +"弘宇股份": "stock", +"300679": "stock", +"电连技术": "stock", +"603730": "stock", +"岱美股份": "stock", +"603063": "stock", +"禾望电气": "stock", +"300672": "stock", +"国科微": "stock", +"300671": "stock", +"富满微": "stock", +"603938": "stock", +"三孚股份": "stock", +"603335": "stock", +"迪生力": "stock", +"603226": "stock", +"菲林格尔": "stock", +"300663": "stock", +"科蓝软件": "stock", +"300661": "stock", +"圣邦股份": "stock", +"603580": "stock", +"艾艾精工": "stock", +"300657": "stock", +"弘信电子": "stock", +"300653": "stock", +"正海生物": "stock", +"603180": "stock", +"金牌厨柜": "stock", +"603113": "stock", +"金能科技": "stock", +"603086": "stock", +"先达股份": "stock", +"300643": "stock", +"万通智控": "stock", +"002868": "stock", +"绿康生化": "stock", +"002867": "stock", +"周大生": "stock", +"300554": "stock", +"三超新材": "stock", +"002863": "stock", +"今飞凯达": "stock", +"300633": "stock", +"开立医疗": "stock", +"603586": "stock", +"金麒麟": "stock", +"300627": "stock", +"华测导航": "stock", +"603133": "stock", +"*ST碳元": "stock", +"300623": "stock", +"捷捷微电": "stock", +"603960": "stock", +"克来机电": "stock", +"603991": "stock", +"至正股份": "stock", +"603238": "stock", +"诺邦股份": "stock", +"603839": "stock", +"安正时尚": "stock", +"603208": "stock", +"江山欧派": "stock", +"603626": "stock", +"科森科技": "stock", +"300578": "stock", +"会畅通讯": "stock", +"603358": "stock", +"华达科技": "stock", +"603429": "stock", +"集友股份": "stock", +"300597": "stock", +"吉大通信": "stock", +"002839": "stock", +"张家港行": "stock", +"601881": "stock", +"中国银河": "stock", +"300598": "stock", +"诚迈科技": "stock", +"603638": "stock", +"艾迪精密": "stock", +"603337": "stock", +"杰克股份": "stock", +"603668": "stock", +"天马科技": "stock", +"300580": "stock", +"贝斯特": "stock", +"603266": "stock", +"天龙股份": "stock", +"603032": "stock", +"德新科技": "stock", +"300581": "stock", +"晨曦航空": "stock", +"603389": "stock", +"亚振家居": "stock", +"002831": "stock", +"裕同科技": "stock", +"002829": "stock", +"星网宇达": "stock", +"603098": "stock", +"森特股份": "stock", +"002830": "stock", +"名雕股份": "stock", +"002820": "stock", +"桂发祥": "stock", +"603060": "stock", +"国检集团": "stock", +"300556": "stock", +"丝路视觉": "stock", +"300560": "stock", +"中富通": "stock", +"603667": "stock", +"五洲新春": "stock", +"603859": "stock", +"能科科技": "stock", +"603313": "stock", +"梦百合": "stock", +"601128": "stock", +"常熟银行": "stock", +"603658": "stock", +"安图生物": "stock", +"002810": "stock", +"山东赫达": "stock", +"603007": "stock", +"ST花王": "stock", +"603515": "stock", +"欧普照明": "stock", +"601595": "stock", +"上海电影": "stock", +"300531": "stock", +"优博讯": "stock", +"600919": "stock", +"江苏银行": "stock", +"300517": "stock", +"海波重科": "stock", +"601127": "stock", +"赛力斯": "stock", +"601611": "stock", +"中国核建": "stock", +"603737": "stock", +"三棵树": "stock", +"300513": "stock", +"恒实科技": "stock", +"300512": "stock", +"中亚股份": "stock", +"002797": "stock", +"第一创业": "stock", +"300507": "stock", +"苏奥传感": "stock", +"603868": "stock", +"飞科电器": "stock", +"603919": "stock", +"金徽酒": "stock", +"603520": "stock", +"司太立": "stock", +"603866": "stock", +"桃李面包": "stock", +"300495": "stock", +"*ST美尚": "stock", +"603398": "stock", +"沐邦高科": "stock", +"603223": "stock", +"恒通股份": "stock", +"603066": "stock", +"音飞储存": "stock", +"300464": "stock", +"星徽股份": "stock", +"002757": "stock", +"南兴股份": "stock", +"300450": "stock", +"先导智能": "stock", +"002755": "stock", +"奥赛康": "stock", +"300452": "stock", +"山河药辅": "stock", +"300451": "stock", +"创业慧康": "stock", +"300438": "stock", +"鹏辉能源": "stock", +"300446": "stock", +"乐凯新材": "stock", +"300441": "stock", +"鲍斯股份": "stock", +"603158": "stock", +"腾龙股份": "stock", +"603030": "stock", +"*ST全筑": "stock", +"002749": "stock", +"国光股份": "stock", +"603519": "stock", +"立霸股份": "stock", +"603969": "stock", +"银龙股份": "stock", +"603898": "stock", +"好莱客": "stock", +"603678": "stock", +"火炬电子": "stock", +"300412": "stock", +"迦南科技": "stock", +"300411": "stock", +"金盾股份": "stock", +"603998": "stock", +"方盛制药": "stock", +"601969": "stock", +"海南矿业": "stock", +"603368": "stock", +"柳药集团": "stock", +"300407": "stock", +"凯发电气": "stock", +"603166": "stock", +"福达股份": "stock", +"603988": "stock", +"中电电机": "stock", +"300406": "stock", +"九强生物": "stock", +"603306": "stock", +"华懋科技": "stock", +"601016": "stock", +"节能风电": "stock", +"300396": "stock", +"迪瑞医疗": "stock", +"603806": "stock", +"福斯特": "stock", +"300391": "stock", +"长药控股": "stock", +"300389": "stock", +"艾比森": "stock", +"603009": "stock", +"北特科技": "stock", +"002727": "stock", +"一心堂": "stock", +"603006": "stock", +"联明股份": "stock", +"002715": "stock", +"登云股份": "stock", +"300382": "stock", +"斯莱克": "stock", +"300380": "stock", +"安硕信息": "stock", +"300376": "stock", +"易事特": "stock", +"002716": "stock", +"金贵银业": "stock", +"002714": "stock", +"牧原股份": "stock", +"300365": "stock", +"恒华科技": "stock", +"002705": "stock", +"新宝股份": "stock", +"002703": "stock", +"浙江世宝": "stock", +"300352": "stock", +"北信源": "stock", +"002698": "stock", +"博实股份": "stock", +"603399": "stock", +"吉翔股份": "stock", +"002693": "stock", +"双成药业": "stock", +"300342": "stock", +"天银机电": "stock", +"002687": "stock", +"乔治白": "stock", +"300335": "stock", +"迪森股份": "stock", +"601608": "stock", +"中信重工": "stock", +"002682": "stock", +"龙洲股份": "stock", +"002679": "stock", +"福建金森": "stock", +"002675": "stock", +"东诚药业": "stock", +"002672": "stock", +"东江环保": "stock", +"300307": "stock", +"慈星股份": "stock", +"002665": "stock", +"首航高科": "stock", +"300303": "stock", +"聚飞光电": "stock", +"002663": "stock", +"普邦股份": "stock", +"601929": "stock", +"吉视传媒": "stock", +"002647": "stock", +"仁东控股": "stock", +"002641": "stock", +"公元股份": "stock", +"300276": "stock", +"三丰智能": "stock", +"002631": "stock", +"德尔未来": "stock", +"601028": "stock", +"玉龙股份": "stock", +"002625": "stock", +"光启技术": "stock", +"601669": "stock", +"中国电建": "stock", +"002620": "stock", +"瑞和股份": "stock", +"300263": "stock", +"隆华科技": "stock", +"300264": "stock", +"佳创视讯": "stock", +"002614": "stock", +"奥佳华": "stock", +"601908": "stock", +"京运通": "stock", +"002611": "stock", +"东方精工": "stock", +"601222": "stock", +"林洋能源": "stock", +"300252": "stock", +"金信诺": "stock", +"300247": "stock", +"融捷健康": "stock", +"002602": "stock", +"世纪华通": "stock", +"002596": "stock", +"海南瑞泽": "stock", +"300230": "stock", +"永利股份": "stock", +"300228": "stock", +"富瑞特装": "stock", +"300221": "stock", +"银禧科技": "stock", +"002581": "stock", +"未名医药": "stock", +"002577": "stock", +"雷柏科技": "stock", +"002575": "stock", +"群兴玩具": "stock", +"002574": "stock", +"明牌珠宝": "stock", +"002573": "stock", +"清新环境": "stock", +"002566": "stock", +"益盛药业": "stock", +"002562": "stock", +"兄弟科技": "stock", +"002553": "stock", +"南方精工": "stock", +"002552": "stock", +"宝鼎科技": "stock", +"300184": "stock", +"力源信息": "stock", +"601992": "stock", +"金隅集团": "stock", +"300179": "stock", +"四方达": "stock", +"300170": "stock", +"汉得信息": "stock", +"601137": "stock", +"博威合金": "stock", +"002542": "stock", +"中化岩土": "stock", +"300168": "stock", +"万达信息": "stock", +"300165": "stock", +"天瑞仪器": "stock", +"002540": "stock", +"亚太科技": "stock", +"002537": "stock", +"海联金汇": "stock", +"002536": "stock", +"飞龙股份": "stock", +"601890": "stock", +"亚星锚链": "stock", +"300147": "stock", +"香雪制药": "stock", +"002523": "stock", +"天桥起重": "stock", +"002519": "stock", +"银河电子": "stock", +"002501": "stock", +"利源股份": "stock", +"300140": "stock", +"节能环境": "stock", +"002491": "stock", +"通鼎互联": "stock", +"002489": "stock", +"浙江永强": "stock", +"300131": "stock", +"英唐智控": "stock", +"601377": "stock", +"兴业证券": "stock", +"002486": "stock", +"嘉麟杰": "stock", +"300125": "stock", +"聆达股份": "stock", +"002484": "stock", +"江海股份": "stock", +"002483": "stock", +"润邦股份": "stock", +"300120": "stock", +"经纬辉开": "stock", +"300108": "stock", +"*ST吉药": "stock", +"300105": "stock", +"龙源技术": "stock", +"002459": "stock", +"晶澳科技": "stock", +"002439": "stock", +"启明星辰": "stock", +"002438": "stock", +"江苏神通": "stock", +"002428": "stock", +"云南锗业": "stock", +"300088": "stock", +"长信科技": "stock", +"002407": "stock", +"多氟多": "stock", +"002401": "stock", +"中远海科": "stock", +"002397": "stock", +"梦洁股份": "stock", +"002393": "stock", +"力生制药": "stock", +"002389": "stock", +"航天彩虹": "stock", +"002381": "stock", +"双箭股份": "stock", +"300066": "stock", +"三川智慧": "stock", +"300063": "stock", +"天龙集团": "stock", +"300059": "stock", +"东方财富": "stock", +"002372": "stock", +"伟星新材": "stock", +"002370": "stock", +"亚太药业": "stock", +"002366": "stock", +"融发核电": "stock", +"002362": "stock", +"汉王科技": "stock", +"300056": "stock", +"中创环保": "stock", +"300055": "stock", +"万邦达": "stock", +"300053": "stock", +"航宇微": "stock", +"002353": "stock", +"杰瑞股份": "stock", +"002342": "stock", +"巨力索具": "stock", +"002341": "stock", +"新纶新材": "stock", +"002339": "stock", +"积成电子": "stock", +"300050": "stock", +"世纪鼎利": "stock", +"300043": "stock", +"星辉娱乐": "stock", +"601801": "stock", +"皖新传媒": "stock", +"002338": "stock", +"奥普光电": "stock", +"002337": "stock", +"赛象科技": "stock", +"002334": "stock", +"英威腾": "stock", +"301163": "stock", +"宏德股份": "stock", +"688079": "stock", +"美迪凯": "stock", +"002456": "stock", +"欧菲光": "stock", +"002009": "stock", +"天奇股份": "stock", +"600765": "stock", +"中航重机": "stock", +"300127": "stock", +"银河磁体": "stock", +"002723": "stock", +"小崧股份": "stock", +"301202": "stock", +"朗威股份": "stock", +"300232": "stock", +"洲明科技": "stock", +"002555": "stock", +"三七互娱": "stock", +"603990": "stock", +"麦迪科技": "stock", +"600825": "stock", +"新华传媒": "stock", +"603077": "stock", +"和邦生物": "stock", +"000159": "stock", +"国际实业": "stock", +"601860": "stock", +"紫金银行": "stock", +"002463": "stock", +"沪电股份": "stock", +"688582": "stock", +"芯动联科": "stock", +"001308": "stock", +"康冠科技": "stock", +"300747": "stock", +"锐科激光": "stock", +"300155": "stock", +"安居宝": "stock", +"600114": "stock", +"东睦股份": "stock", +"601598": "stock", +"中国外运": "stock", +"002225": "stock", +"濮耐股份": "stock", +"300755": "stock", +"华致酒行": "stock", +"300967": "stock", +"晓鸣股份": "stock", +"301366": "stock", +"一博科技": "stock", +"000422": "stock", +"湖北宜化": "stock", +"002094": "stock", +"青岛金王": "stock", +"688095": "stock", +"福昕软件": "stock", +"300090": "stock", +"盛运退": "stock", +"688236": "stock", +"春立医疗": "stock", +"600233": "stock", +"圆通速递": "stock", +"301498": "stock", +"乖宝宠物": "stock", +"600351": "stock", +"亚宝药业": "stock", +"688007": "stock", +"光峰科技": "stock", +"603986": "stock", +"兆易创新": "stock", +"688682": "stock", +"霍莱沃": "stock", +"002141": "stock", +"贤丰控股": "stock", +"301072": "stock", +"中捷精工": "stock", +"831304": "stock", +"迪尔化工": "stock", +"601699": "stock", +"潞安环能": "stock", +"301161": "stock", +"唯万密封": "stock", +"688625": "stock", +"呈和科技": "stock", +"601200": "stock", +"上海环境": "stock", +"871553": "stock", +"凯腾精工": "stock", +"000541": "stock", +"佛山照明": "stock", +"688036": "stock", +"传音控股": "stock", +"688686": "stock", +"奥普特": "stock", +"300073": "stock", +"当升科技": "stock", +"000510": "stock", +"新金路": "stock", +"002756": "stock", +"永兴材料": "stock", +"002661": "stock", +"克明食品": "stock", +"000542": "stock", +"TCL通讯": "stock", +"600775": "stock", +"南京熊猫": "stock", +"600105": "stock", +"永鼎股份": "stock", +"600250": "stock", +"南纺股份": "stock", +"688123": "stock", +"聚辰股份": "stock", +"002535": "stock", +"林州重机": "stock", +"603116": "stock", +"红蜻蜓": "stock", +"301301": "stock", +"川宁生物": "stock", +"300348": "stock", +"长亮科技": "stock", +"600513": "stock", +"联环药业": "stock", +"688060": "stock", +"云涌科技": "stock", +"001914": "stock", +"招商积余": "stock", +"300036": "stock", +"超图软件": "stock", +"688197": "stock", +"首药控股-U": "stock", +"002258": "stock", +"利尔化学": "stock", +"300590": "stock", +"移为通信": "stock", +"688693": "stock", +"锴威特": "stock", +"002315": "stock", +"焦点科技": "stock", +"600967": "stock", +"内蒙一机": "stock", +"000933": "stock", +"神火股份": "stock", +"300853": "stock", +"申昊科技": "stock", +"002015": "stock", +"协鑫能科": "stock", +"601099": "stock", +"太平洋": "stock", +"003001": "stock", +"中岩大地": "stock", +"300193": "stock", +"佳士科技": "stock", +"002503": "stock", +"*ST搜特": "stock", +"600978": "stock", +"*ST宜生": "stock", +"002838": "stock", +"道恩股份": "stock", +"301380": "stock", +"挖金客": "stock", +"301286": "stock", +"侨源股份": "stock", +"430047": "stock", +"诺思兰德": "stock", +"300565": "stock", +"科信技术": "stock", +"300207": "stock", +"欣旺达": "stock", +"301159": "stock", +"三维天地": "stock", +"002216": "stock", +"三全食品": "stock", +"600487": "stock", +"亨通光电": "stock", +"601877": "stock", +"正泰电器": "stock", +"600890": "stock", +"退市中房": "stock", +"300735": "stock", +"光弘科技": "stock", +"002441": "stock", +"众业达": "stock", +"300670": "stock", +"大烨智能": "stock", +"688178": "stock", +"万德斯": "stock", +"000036": "stock", +"华联控股": "stock", +"603556": "stock", +"海兴电力": "stock", +"000828": "stock", +"东莞控股": "stock", +"605388": "stock", +"均瑶健康": "stock", +"603126": "stock", +"中材节能": "stock", +"600290": "stock", +"*ST华仪": "stock", +"002639": "stock", +"雪人股份": "stock", +"830896": "stock", +"旺成科技": "stock", +"688612": "stock", +"威迈斯": "stock", +"601606": "stock", +"长城军工": "stock", +"600647": "stock", +"*ST同达": "stock", +"605166": "stock", +"聚合顺": "stock", +"002617": "stock", +"露笑科技": "stock", +"002061": "stock", +"浙江交科": "stock", +"002199": "stock", +"东晶电子": "stock", +"301230": "stock", +"泓博医药": "stock", +"300852": "stock", +"四会富仕": "stock", +"301071": "stock", +"力量钻石": "stock", +"301377": "stock", +"鼎泰高科": "stock", +"300570": "stock", +"太辰光": "stock", +"002175": "stock", +"东方智造": "stock", +"603012": "stock", +"创力集团": "stock", +"603387": "stock", +"基蛋生物": "stock", +"600618": "stock", +"氯碱化工": "stock", +"002244": "stock", +"滨江集团": "stock", +"600128": "stock", +"苏豪弘业": "stock", +"000996": "stock", +"*ST中期": "stock", +"603799": "stock", +"华友钴业": "stock", +"688499": "stock", +"利元亨": "stock", +"300061": "stock", +"旗天科技": "stock", +"870866": "stock", +"绿亨科技": "stock", +"002056": "stock", +"横店东磁": "stock", +"688302": "stock", +"海创药业-U": "stock", +"300141": "stock", +"和顺电气": "stock", +"603700": "stock", +"宁水集团": "stock", +"002918": "stock", +"蒙娜丽莎": "stock", +"600439": "stock", +"瑞贝卡": "stock", +"000881": "stock", +"中广核技": "stock", +"603361": "stock", +"浙江国祥": "stock", +"600763": "stock", +"通策医疗": "stock", +"600518": "stock", +"ST康美": "stock", +"300692": "stock", +"中环环保": "stock", +"603131": "stock", +"上海沪工": "stock", +"600381": "stock", +"青海春天": "stock", +"831689": "stock", +"克莱特": "stock", +"000919": "stock", +"金陵药业": "stock", +"300034": "stock", +"钢研高纳": "stock", +"603070": "stock", +"万控智造": "stock", +"002467": "stock", +"二六三": "stock", +"000650": "stock", +"仁和药业": "stock", +"002526": "stock", +"山东矿机": "stock", +"300534": "stock", +"陇神戎发": "stock", +"600088": "stock", +"中视传媒": "stock", +"603053": "stock", +"成都燃气": "stock", +"603577": "stock", +"汇金通": "stock", +"300471": "stock", +"厚普股份": "stock", +"600706": "stock", +"曲江文旅": "stock", +"300523": "stock", +"辰安科技": "stock", +"603058": "stock", +"永吉股份": "stock", +"600784": "stock", +"鲁银投资": "stock", +"600548": "stock", +"深高速": "stock", +"603477": "stock", +"巨星农牧": "stock", +"688505": "stock", +"复旦张江": "stock", +"002221": "stock", +"东华能源": "stock", +"301361": "stock", +"众智科技": "stock", +"002327": "stock", +"富安娜": "stock", +"301336": "stock", +"趣睡科技": "stock", +"002492": "stock", +"恒基达鑫": "stock", +"688699": "stock", +"明微电子": "stock", +"600363": "stock", +"联创光电": "stock", +"300819": "stock", +"聚杰微纤": "stock", +"002305": "stock", +"南国置业": "stock", +"300962": "stock", +"中金辐照": "stock", +"600366": "stock", +"宁波韵升": "stock", +"832651": "stock", +"天罡股份": "stock", +"300561": "stock", +"汇金科技": "stock", +"002522": "stock", +"浙江众成": "stock", +"301315": "stock", +"威士顿": "stock", +"300023": "stock", +"宝德退": "stock", +"300538": "stock", +"同益股份": "stock", +"300540": "stock", +"蜀道装备": "stock", +"000962": "stock", +"东方钽业": "stock", +"000883": "stock", +"湖北能源": "stock", +"002671": "stock", +"龙泉股份": "stock", +"688010": "stock", +"福光股份": "stock", +"002592": "stock", +"ST八菱": "stock", +"002898": "stock", +"赛隆药业": "stock", +"001283": "stock", +"豪鹏科技": "stock", +"002795": "stock", +"永和智控": "stock", +"300198": "stock", +"纳川股份": "stock", +"688114": "stock", +"华大智造": "stock", +"600171": "stock", +"上海贝岭": "stock", +"601008": "stock", +"连云港": "stock", +"301270": "stock", +"汉仪股份": "stock", +"000750": "stock", +"国海证券": "stock", +"002743": "stock", +"富煌钢构": "stock", +"600616": "stock", +"金枫酒业": "stock", +"000690": "stock", +"宝新能源": "stock", +"002016": "stock", +"世荣兆业": "stock", +"688121": "stock", +"卓然股份": "stock", +"605366": "stock", +"宏柏新材": "stock", +"301107": "stock", +"瑜欣电子": "stock", +"300585": "stock", +"奥联电子": "stock", +"000863": "stock", +"三湘印象": "stock", +"000682": "stock", +"东方电子": "stock", +"688382": "stock", +"益方生物-U": "stock", +"002184": "stock", +"海得控制": "stock", +"301152": "stock", +"天力锂能": "stock", +"000912": "stock", +"泸天化": "stock", +"688099": "stock", +"晶晨股份": "stock", +"002920": "stock", +"德赛西威": "stock", +"301083": "stock", +"百胜智能": "stock", +"300666": "stock", +"江丰电子": "stock", +"688248": "stock", +"南网科技": "stock", +"600685": "stock", +"中船防务": "stock", +"300477": "stock", +"合纵科技": "stock", +"601579": "stock", +"会稽山": "stock", +"600968": "stock", +"海油发展": "stock", +"603001": "stock", +"ST奥康": "stock", +"002878": "stock", +"元隆雅图": "stock", +"002419": "stock", +"天虹股份": "stock", +"002887": "stock", +"绿茵生态": "stock", +"002700": "stock", +"ST浩源": "stock", +"300449": "stock", +"汉邦高科": "stock", +"002987": "stock", +"京北方": "stock", +"300300": "stock", +"海峡创新": "stock", +"300054": "stock", +"鼎龙股份": "stock", +"000792": "stock", +"盐湖股份": "stock", +"002828": "stock", +"贝肯能源": "stock", +"000977": "stock", +"浪潮信息": "stock", +"688332": "stock", +"中科蓝讯": "stock", +"002414": "stock", +"高德红外": "stock", +"300185": "stock", +"通裕重工": "stock", +"000539": "stock", +"粤电力A": "stock", +"300381": "stock", +"溢多利": "stock", +"871753": "stock", +"天纺标": "stock", +"300157": "stock", +"新锦动力": "stock", +"600383": "stock", +"金地集团": "stock", +"603379": "stock", +"三美股份": "stock", +"000995": "stock", +"皇台酒业": "stock", +"002377": "stock", +"国创高新": "stock", +"830974": "stock", +"凯大催化": "stock", +"300753": "stock", +"爱朋医疗": "stock", +"600684": "stock", +"珠江股份": "stock", +"603168": "stock", +"莎普爱思": "stock", +"688106": "stock", +"金宏气体": "stock", +"600697": "stock", +"欧亚集团": "stock", +"300282": "stock", +"*ST三盛": "stock", +"301043": "stock", +"绿岛风": "stock", +"603068": "stock", +"博通集成": "stock", +"000588": "stock", +"PT粤金曼": "stock", +"300266": "stock", +"兴源环境": "stock", +"003036": "stock", +"泰坦股份": "stock", +"000595": "stock", +"宝塔实业": "stock", +"001289": "stock", +"龙源电力": "stock", +"688348": "stock", +"昱能科技": "stock", +"603677": "stock", +"奇精机械": "stock", +"002622": "stock", +"皓宸医疗": "stock", +"600052": "stock", +"东望时代": "stock", +"688295": "stock", +"中复神鹰": "stock", +"002084": "stock", +"海鸥住工": "stock", +"000880": "stock", +"潍柴重机": "stock", +"002724": "stock", +"海洋王": "stock", +"688203": "stock", +"海正生材": "stock", +"603968": "stock", +"醋化股份": "stock", +"301238": "stock", +"瑞泰新材": "stock", +"601169": "stock", +"北京银行": "stock", +"002149": "stock", +"西部材料": "stock", +"301260": "stock", +"格力博": "stock", +"688165": "stock", +"埃夫特-U": "stock", +"600475": "stock", +"华光环能": "stock", +"834599": "stock", +"同力股份": "stock", +"600694": "stock", +"大商股份": "stock", +"002563": "stock", +"森马服饰": "stock", +"872374": "stock", +"云里物里": "stock", +"300070": "stock", +"碧水源": "stock", +"600571": "stock", +"信雅达": "stock", +"301231": "stock", +"荣信文化": "stock", +"000921": "stock", +"海信家电": "stock", +"600600": "stock", +"青岛啤酒": "stock", +"301548": "stock", +"崇德科技": "stock", +"300280": "stock", +"紫天科技": "stock", +"600870": "stock", +"退市厦华": "stock", +"688677": "stock", +"海泰新光": "stock", +"002134": "stock", +"天津普林": "stock", +"603823": "stock", +"百合花": "stock", +"601236": "stock", +"红塔证券": "stock", +"600700": "stock", +"*ST数码": "stock", +"301216": "stock", +"万凯新材": "stock", +"600096": "stock", +"云天化": "stock", +"300209": "stock", +"ST有棵树": "stock", +"603655": "stock", +"朗博科技": "stock", +"300153": "stock", +"科泰电源": "stock", +"603212": "stock", +"赛伍技术": "stock", +"688466": "stock", +"金科环境": "stock", +"002196": "stock", +"方正电机": "stock", +"300877": "stock", +"金春股份": "stock", +"605228": "stock", +"神通科技": "stock", +"603170": "stock", +"宝立食品": "stock", +"002841": "stock", +"视源股份": "stock", +"300277": "stock", +"海联讯": "stock", +"300957": "stock", +"贝泰妮": "stock", +"688395": "stock", +"正弦电气": "stock", +"600716": "stock", +"凤凰股份": "stock", +"000906": "stock", +"浙商中拓": "stock", +"600885": "stock", +"宏发股份": "stock", +"300011": "stock", +"鼎汉技术": "stock", +"002072": "stock", +"凯瑞德": "stock", +"002098": "stock", +"浔兴股份": "stock", +"002925": "stock", +"盈趣科技": "stock", +"688256": "stock", +"寒武纪-U": "stock", +"600130": "stock", +"波导股份": "stock", +"688596": "stock", +"正帆科技": "stock", +"002275": "stock", +"桂林三金": "stock", +"600446": "stock", +"金证股份": "stock", +"600495": "stock", +"晋西车轴": "stock", +"601921": "stock", +"浙版传媒": "stock", +"002367": "stock", +"康力电梯": "stock", +"300855": "stock", +"图南股份": "stock", +"600075": "stock", +"新疆天业": "stock", +"688690": "stock", +"纳微科技": "stock", +"600781": "stock", +"退市辅仁": "stock", +"688606": "stock", +"奥泰生物": "stock", +"601233": "stock", +"桐昆股份": "stock", +"600058": "stock", +"五矿发展": "stock", +"688030": "stock", +"山石网科": "stock", +"000838": "stock", +"财信发展": "stock", +"603579": "stock", +"荣泰健康": "stock", +"600197": "stock", +"伊力特": "stock", +"300690": "stock", +"双一科技": "stock", +"002858": "stock", +"力盛体育": "stock", +"600665": "stock", +"天地源": "stock", +"300457": "stock", +"赢合科技": "stock", +"301518": "stock", +"长华化学": "stock", +"872392": "stock", +"佳合科技": "stock", +"600243": "stock", +"青海华鼎": "stock", +"833455": "stock", +"汇隆活塞": "stock", +"688305": "stock", +"科德数控": "stock", +"300341": "stock", +"麦克奥迪": "stock", +"600785": "stock", +"新华百货": "stock", +"300717": "stock", +"华信新材": "stock", +"600359": "stock", +"新农开发": "stock", +"002990": "stock", +"盛视科技": "stock", +"300834": "stock", +"星辉环材": "stock", +"688031": "stock", +"星环科技-U": "stock", +"603181": "stock", +"皇马科技": "stock", +"688557": "stock", +"兰剑智能": "stock", +"603299": "stock", +"苏盐井神": "stock", +"838971": "stock", +"天马新材": "stock", +"300443": "stock", +"金雷股份": "stock", +"000014": "stock", +"沙河股份": "stock", +"600693": "stock", +"东百集团": "stock", +"603188": "stock", +"亚邦股份": "stock", +"002045": "stock", +"国光电器": "stock", +"000417": "stock", +"合肥百货": "stock", +"832802": "stock", +"保丽洁": "stock", +"300841": "stock", +"康华生物": "stock", +"688012": "stock", +"中微公司": "stock", +"002824": "stock", +"和胜股份": "stock", +"605111": "stock", +"新洁能": "stock", +"001299": "stock", +"美能能源": "stock", +"002538": "stock", +"司尔特": "stock", +"600218": "stock", +"全柴动力": "stock", +"688051": "stock", +"佳华科技": "stock", +"603311": "stock", +"金海高科": "stock", +"300945": "stock", +"曼卡龙": "stock", +"600241": "stock", +"时代万恒": "stock", +"000409": "stock", +"云鼎科技": "stock", +"300098": "stock", +"高新兴": "stock", +"603496": "stock", +"恒为科技": "stock", +"600110": "stock", +"诺德股份": "stock", +"000688": "stock", +"国城矿业": "stock", +"300259": "stock", +"新天科技": "stock", +"000550": "stock", +"江铃汽车": "stock", +"301033": "stock", +"迈普医学": "stock", +"832089": "stock", +"禾昌聚合": "stock", +"000068": "stock", +"华控赛格": "stock", +"688167": "stock", +"炬光科技": "stock", +"003002": "stock", +"壶化股份": "stock", +"300206": "stock", +"理邦仪器": "stock", +"000939": "stock", +"凯迪退": "stock", +"001333": "stock", +"光华股份": "stock", +"301130": "stock", +"西点药业": "stock", +"601179": "stock", +"中国西电": "stock", +"688105": "stock", +"诺唯赞": "stock", +"002609": "stock", +"捷顺科技": "stock", +"688153": "stock", +"唯捷创芯": "stock", +"688613": "stock", +"奥精医疗": "stock", +"301039": "stock", +"中集车辆": "stock", +"600410": "stock", +"华胜天成": "stock", +"688508": "stock", +"芯朋微": "stock", +"603983": "stock", +"丸美股份": "stock", +"002398": "stock", +"垒知集团": "stock", +"000638": "stock", +"万方发展": "stock", +"300400": "stock", +"劲拓股份": "stock", +"000552": "stock", +"甘肃能化": "stock", +"601985": "stock", +"中国核电": "stock", +"688221": "stock", +"前沿生物-U": "stock", +"301399": "stock", +"英特科技": "stock", +"300433": "stock", +"蓝思科技": "stock", +"301300": "stock", +"远翔新材": "stock", +"301228": "stock", +"实朴检测": "stock", +"300250": "stock", +"初灵信息": "stock", +"300456": "stock", +"赛微电子": "stock", +"300113": "stock", +"顺网科技": "stock", +"430510": "stock", +"丰光精密": "stock", +"688181": "stock", +"八亿时空": "stock", +"301469": "stock", +"恒达新材": "stock", +"300904": "stock", +"威力传动": "stock", +"002375": "stock", +"亚厦股份": "stock", +"600022": "stock", +"山东钢铁": "stock", +"600207": "stock", +"安彩高科": "stock", +"300965": "stock", +"恒宇信通": "stock", +"688420": "stock", +"美腾科技": "stock", +"002788": "stock", +"鹭燕医药": "stock", +"300336": "stock", +"新文退": "stock", +"688202": "stock", +"美迪西": "stock", +"000586": "stock", +"汇源通信": "stock", +"601061": "stock", +"中信金属": "stock", +"300587": "stock", +"天铁股份": "stock", +"603605": "stock", +"珀莱雅": "stock", +"002721": "stock", +"*ST金一": "stock", +"002545": "stock", +"东方铁塔": "stock", +"688560": "stock", +"明冠新材": "stock", +"688307": "stock", +"中润光学": "stock", +"000791": "stock", +"甘肃能源": "stock", +"688130": "stock", +"晶华微": "stock", +"301066": "stock", +"万事利": "stock", +"300327": "stock", +"中颖电子": "stock", +"000610": "stock", +"西安旅游": "stock", +"300461": "stock", +"田中精机": "stock", +"605033": "stock", +"美邦股份": "stock", +"002430": "stock", +"杭氧股份": "stock", +"600272": "stock", +"开开实业": "stock", +"002183": "stock", +"怡亚通": "stock", +"002842": "stock", +"翔鹭钨业": "stock", +"000818": "stock", +"航锦科技": "stock", +"002951": "stock", +"ST金时": "stock", +"301009": "stock", +"可靠股份": "stock", +"300013": "stock", +"新宁物流": "stock", +"688161": "stock", +"威高骨科": "stock", +"600652": "stock", +"退市游久": "stock", +"300095": "stock", +"华伍股份": "stock", +"002802": "stock", +"洪汇新材": "stock", +"600138": "stock", +"中青旅": "stock", +"600980": "stock", +"北矿科技": "stock", +"601966": "stock", +"玲珑轮胎": "stock", +"603505": "stock", +"金石资源": "stock", +"688576": "stock", +"西山科技": "stock", +"601015": "stock", +"陕西黑猫": "stock", +"600293": "stock", +"三峡新材": "stock", +"000089": "stock", +"深圳机场": "stock", +"001287": "stock", +"中电港": "stock", +"601968": "stock", +"宝钢包装": "stock", +"688410": "stock", +"山外山": "stock", +"688563": "stock", +"航材股份": "stock", +"600572": "stock", +"康恩贝": "stock", +"002753": "stock", +"永东股份": "stock", +"600780": "stock", +"通宝能源": "stock", +"603533": "stock", +"掌阅科技": "stock", +"600935": "stock", +"华塑股份": "stock", +"300949": "stock", +"奥雅股份": "stock", +"600743": "stock", +"华远地产": "stock", +"600982": "stock", +"宁波能源": "stock", +"601919": "stock", +"中远海控": "stock", +"300645": "stock", +"正元智慧": "stock", +"603565": "stock", +"中谷物流": "stock", +"002046": "stock", +"国机精工": "stock", +"600805": "stock", +"悦达投资": "stock", +"301197": "stock", +"工大科雅": "stock", +"600635": "stock", +"大众公用": "stock", +"688025": "stock", +"杰普特": "stock", +"603527": "stock", +"众源新材": "stock", +"601288": "stock", +"农业银行": "stock", +"688119": "stock", +"中钢洛耐": "stock", +"688698": "stock", +"伟创电气": "stock", +"688639": "stock", +"华恒生物": "stock", +"000549": "stock", +"S湘火炬": "stock", +"601965": "stock", +"中国汽研": "stock", +"600959": "stock", +"江苏有线": "stock", +"603985": "stock", +"恒润股份": "stock", +"688509": "stock", +"正元地信": "stock", +"688488": "stock", +"艾迪药业": "stock", +"002615": "stock", +"哈尔斯": "stock", +"000553": "stock", +"安道麦A": "stock", +"601088": "stock", +"中国神华": "stock", +"000602": "stock", +"金马集团": "stock", +"300858": "stock", +"科拓生物": "stock", +"603023": "stock", +"威帝股份": "stock", +"002664": "stock", +"信质集团": "stock", +"688168": "stock", +"安博通": "stock", +"002637": "stock", +"赞宇科技": "stock", +"600477": "stock", +"杭萧钢构": "stock", +"301131": "stock", +"聚赛龙": "stock", +"000015": "stock", +"PT中浩A": "stock", +"688056": "stock", +"莱伯泰科": "stock", +"688179": "stock", +"阿拉丁": "stock", +"300573": "stock", +"兴齐眼药": "stock", +"002168": "stock", +"惠程科技": "stock", +"832278": "stock", +"鹿得医疗": "stock", +"300037": "stock", +"新宙邦": "stock", +"000758": "stock", +"中色股份": "stock", +"300782": "stock", +"卓胜微": "stock", +"002772": "stock", +"众兴菌业": "stock", +"002816": "stock", +"*ST和科": "stock", +"603031": "stock", +"安孚科技": "stock", +"688408": "stock", +"中信博": "stock", +"688718": "stock", +"唯赛勃": "stock", +"001215": "stock", +"千味央厨": "stock", +"600152": "stock", +"维科技术": "stock", +"002859": "stock", +"洁美科技": "stock", +"002504": "stock", +"*ST弘高": "stock", +"000679": "stock", +"大连友谊": "stock", +"600659": "stock", +"*ST花雕": "stock", +"603336": "stock", +"宏辉果蔬": "stock", +"603177": "stock", +"德创环保": "stock", +"600898": "stock", +"ST美讯": "stock", +"002761": "stock", +"浙江建投": "stock", +"603697": "stock", +"有友食品": "stock", +"000878": "stock", +"云南铜业": "stock", +"002601": "stock", +"龙佰集团": "stock", +"688246": "stock", +"嘉和美康": "stock", +"300330": "stock", +"计通退": "stock", +"002293": "stock", +"罗莱生活": "stock", +"000606": "stock", +"顺利退": "stock", +"000637": "stock", +"ST实华": "stock", +"002857": "stock", +"三晖电气": "stock", +"600537": "stock", +"亿晶光电": "stock", +"688083": "stock", +"中望软件": "stock", +"001269": "stock", +"欧晶科技": "stock", +"603303": "stock", +"得邦照明": "stock", +"600300": "stock", +"维维股份": "stock", +"301252": "stock", +"同星科技": "stock", +"603798": "stock", +"康普顿": "stock", +"600444": "stock", +"国机通用": "stock", +"688498": "stock", +"源杰科技": "stock", +"600452": "stock", +"涪陵电力": "stock", +"300026": "stock", +"红日药业": "stock", +"601928": "stock", +"凤凰传媒": "stock", +"301257": "stock", +"普蕊斯": "stock", +"002295": "stock", +"精艺股份": "stock", +"002332": "stock", +"仙琚制药": "stock", +"600772": "stock", +"S*ST龙昌": "stock", +"300730": "stock", +"科创信息": "stock", +"600079": "stock", +"人福医药": "stock", +"600150": "stock", +"中国船舶": "stock", +"002502": "stock", +"ST鼎龙": "stock", +"002212": "stock", +"天融信": "stock", +"688522": "stock", +"纳睿雷达": "stock", +"688207": "stock", +"格灵深瞳": "stock", +"600858": "stock", +"银座股份": "stock", +"600543": "stock", +"*ST莫高": "stock", +"600755": "stock", +"厦门国贸": "stock", +"688247": "stock", +"宣泰医药": "stock", +"600878": "stock", +"*ST北科": "stock", +"872925": "stock", +"锦好医疗": "stock", +"600354": "stock", +"敦煌种业": "stock", +"000518": "stock", +"四环生物": "stock", +"002871": "stock", +"伟隆股份": "stock", +"300267": "stock", +"尔康制药": "stock", +"600350": "stock", +"山东高速": "stock", +"002821": "stock", +"凯莱英": "stock", +"600764": "stock", +"中国海防": "stock", +"300394": "stock", +"天孚通信": "stock", +"832982": "stock", +"XD锦波生": "stock", +"603818": "stock", +"曲美家居": "stock", +"002648": "stock", +"卫星化学": "stock", +"002376": "stock", +"新北洋": "stock", +"603608": "stock", +"天创时尚": "stock", +"002618": "stock", +"丹邦退": "stock", +"688593": "stock", +"新相微": "stock", +"600123": "stock", +"兰花科创": "stock", +"601162": "stock", +"天风证券": "stock", +"300938": "stock", +"信测标准": "stock", +"000003": "stock", +"PT金田A": "stock", +"000721": "stock", +"西安饮食": "stock", +"600018": "stock", +"上港集团": "stock", +"002805": "stock", +"丰元股份": "stock", +"002170": "stock", +"芭田股份": "stock", +"688766": "stock", +"普冉股份": "stock", +"601000": "stock", +"唐山港": "stock", +"600090": "stock", +"退市济堂": "stock", +"835368": "stock", +"连城数控": "stock", +"301308": "stock", +"江波龙": "stock", +"600373": "stock", +"中文传媒": "stock", +"002584": "stock", +"西陇科学": "stock", +"600002": "stock", +"齐鲁石化": "stock", +"601566": "stock", +"九牧王": "stock", +"601069": "stock", +"西部黄金": "stock", +"002494": "stock", +"华斯股份": "stock", +"002513": "stock", +"蓝丰生化": "stock", +"600881": "stock", +"亚泰集团": "stock", +"688170": "stock", +"德龙激光": "stock", +"002676": "stock", +"顺威股份": "stock", +"000686": "stock", +"东北证券": "stock", +"688585": "stock", +"上纬新材": "stock", +"002033": "stock", +"丽江股份": "stock", +"603578": "stock", +"三星新材": "stock", +"002731": "stock", +"萃华珠宝": "stock", +"601009": "stock", +"南京银行": "stock", +"600712": "stock", +"南宁百货": "stock", +"688607": "stock", +"康众医疗": "stock", +"301292": "stock", +"海科新源": "stock", +"605006": "stock", +"山东玻纤": "stock", +"600409": "stock", +"三友化工": "stock", +"688268": "stock", +"华特气体": "stock", +"300158": "stock", +"振东制药": "stock", +"600491": "stock", +"龙元建设": "stock", +"002320": "stock", +"海峡股份": "stock", +"600714": "stock", +"金瑞矿业": "stock", +"002197": "stock", +"证通电子": "stock", +"301041": "stock", +"金百泽": "stock", +"300372": "stock", +"欣泰退": "stock", +"600165": "stock", +"宁科生物": "stock", +"688609": "stock", +"九联科技": "stock", +"688630": "stock", +"芯碁微装": "stock", +"601188": "stock", +"龙江交通": "stock", +"600167": "stock", +"联美控股": "stock", +"300453": "stock", +"三鑫医疗": "stock", +"002109": "stock", +"兴化股份": "stock", +"603139": "stock", +"康惠制药": "stock", +"300164": "stock", +"通源石油": "stock", +"301101": "stock", +"明月镜片": "stock", +"600592": "stock", +"龙溪股份": "stock", +"603939": "stock", +"益丰药房": "stock", +"688222": "stock", +"成都先导": "stock", +"600280": "stock", +"中央商场": "stock", +"600288": "stock", +"大恒科技": "stock", +"002067": "stock", +"景兴纸业": "stock", +"300386": "stock", +"飞天诚信": "stock", +"300509": "stock", +"新美星": "stock", +"300189": "stock", +"神农科技": "stock", +"601106": "stock", +"中国一重": "stock", +"603218": "stock", +"日月股份": "stock", +"300909": "stock", +"汇创达": "stock", +"603912": "stock", +"佳力图": "stock", +"838275": "stock", +"驱动力": "stock", +"600315": "stock", +"上海家化": "stock", +"300621": "stock", +"维业股份": "stock", +"300459": "stock", +"汤姆猫": "stock", +"002060": "stock", +"粤水电": "stock", +"000416": "stock", +"*ST民控": "stock", +"605598": "stock", +"上海港湾": "stock", +"600508": "stock", +"上海能源": "stock", +"000685": "stock", +"中山公用": "stock", +"000519": "stock", +"中兵红箭": "stock", +"002318": "stock", +"久立特材": "stock", +"300137": "stock", +"先河环保": "stock", +"600120": "stock", +"浙江东方": "stock", +"300355": "stock", +"蒙草生态": "stock", +"300356": "stock", +"光一退": "stock", +"301330": "stock", +"熵基科技": "stock", +"603108": "stock", +"润达医疗": "stock", +"600361": "stock", +"创新新材": "stock", +"603167": "stock", +"渤海轮渡": "stock", +"301170": "stock", +"锡南科技": "stock", +"600661": "stock", +"昂立教育": "stock", +"300479": "stock", +"神思电子": "stock", +"002812": "stock", +"恩捷股份": "stock", +"605599": "stock", +"菜百股份": "stock", +"600500": "stock", +"中化国际": "stock", +"002455": "stock", +"百川股份": "stock", +"301265": "stock", +"华新环保": "stock", +"001338": "stock", +"永顺泰": "stock", +"600489": "stock", +"中金黄金": "stock", +"605180": "stock", +"华生科技": "stock", +"002848": "stock", +"高斯贝尔": "stock", +"001314": "stock", +"亿道信息": "stock", +"002785": "stock", +"万里石": "stock", +"300246": "stock", +"宝莱特": "stock", +"002626": "stock", +"金达威": "stock", +"600149": "stock", +"廊坊发展": "stock", +"603996": "stock", +"退市中新": "stock", +"603817": "stock", +"海峡环保": "stock", +"603869": "stock", +"新智认知": "stock", +"688519": "stock", +"南亚新材": "stock", +"000013": "stock", +"*ST石化A": "stock", +"603628": "stock", +"清源股份": "stock", +"600113": "stock", +"浙江东日": "stock", +"600820": "stock", +"隧道股份": "stock", +"002058": "stock", +"威尔泰": "stock", +"605007": "stock", +"五洲特纸": "stock", +"300082": "stock", +"奥克股份": "stock", +"301027": "stock", +"华蓝集团": "stock", +"002487": "stock", +"大金重工": "stock", +"835237": "stock", +"力佳科技": "stock", +"688685": "stock", +"迈信林": "stock", +"600132": "stock", +"重庆啤酒": "stock", +"000739": "stock", +"普洛药业": "stock", +"600455": "stock", +"博通股份": "stock", +"688118": "stock", +"普元信息": "stock", +"002284": "stock", +"亚太股份": "stock", +"000430": "stock", +"张家界": "stock", +"300419": "stock", +"浩丰科技": "stock", +"301035": "stock", +"润丰股份": "stock", +"002773": "stock", +"康弘药业": "stock", +"002233": "stock", +"塔牌集团": "stock", +"600676": "stock", +"交运股份": "stock", +"300091": "stock", +"金通灵": "stock", +"002162": "stock", +"悦心健康": "stock", +"301100": "stock", +"风光股份": "stock", +"300892": "stock", +"品渥食品": "stock", +"002692": "stock", +"ST远程": "stock", +"600636": "stock", +"国新文化": "stock", +"300694": "stock", +"蠡湖股份": "stock", +"601225": "stock", +"陕西煤业": "stock", +"600638": "stock", +"新黄浦": "stock", +"872895": "stock", +"花溪科技": "stock", +"605155": "stock", +"西大门": "stock", +"002966": "stock", +"苏州银行": "stock", +"002515": "stock", +"金字火腿": "stock", +"600251": "stock", +"冠农股份": "stock", +"603239": "stock", +"浙江仙通": "stock", +"002579": "stock", +"中京电子": "stock", +"688610": "stock", +"埃科光电": "stock", +"300100": "stock", +"双林股份": "stock", +"300353": "stock", +"东土科技": "stock", +"002008": "stock", +"大族激光": "stock", +"688176": "stock", +"亚虹医药-U": "stock", +"002624": "stock", +"完美世界": "stock", +"688023": "stock", +"安恒信息": "stock", +"300727": "stock", +"润禾材料": "stock", +"301023": "stock", +"江南奕帆": "stock", +"000069": "stock", +"华侨城A": "stock", +"300577": "stock", +"开润股份": "stock", +"600242": "stock", +"退市中昌": "stock", +"603155": "stock", +"新亚强": "stock", +"300274": "stock", +"阳光电源": "stock", +"000623": "stock", +"吉林敖东": "stock", +"688389": "stock", +"普门科技": "stock", +"001301": "stock", +"尚太科技": "stock", +"605378": "stock", +"野马电池": "stock", +"688196": "stock", +"卓越新能": "stock", +"600960": "stock", +"渤海汽车": "stock", +"002645": "stock", +"华宏科技": "stock", +"000833": "stock", +"粤桂股份": "stock", +"603908": "stock", +"牧高笛": "stock", +"300887": "stock", +"谱尼测试": "stock", +"301047": "stock", +"义翘神州": "stock", +"600850": "stock", +"电科数字": "stock", +"600715": "stock", +"文投控股": "stock", +"300397": "stock", +"天和防务": "stock", +"300950": "stock", +"德固特": "stock", +"834770": "stock", +"艾能聚": "stock", +"300204": "stock", +"舒泰神": "stock", +"002289": "stock", +"ST宇顺": "stock", +"601958": "stock", +"金钼股份": "stock", +"002651": "stock", +"利君股份": "stock", +"301550": "stock", +"斯菱股份": "stock", +"300731": "stock", +"科创新源": "stock", +"430556": "stock", +"雅达股份": "stock", +"300647": "stock", +"超频三": "stock", +"300422": "stock", +"博世科": "stock", +"600774": "stock", +"汉商集团": "stock", +"603351": "stock", +"威尔药业": "stock", +"600240": "stock", +"退市华业": "stock", +"600586": "stock", +"金晶科技": "stock", +"300894": "stock", +"火星人": "stock", +"600032": "stock", +"浙江新能": "stock", +"301156": "stock", +"美农生物": "stock", +"300275": "stock", +"梅安森": "stock", +"688215": "stock", +"瑞晟智能": "stock", +"688793": "stock", +"倍轻松": "stock", +"603115": "stock", +"海星股份": "stock", +"301085": "stock", +"亚康股份": "stock", +"000530": "stock", +"冰山冷热": "stock", +"688350": "stock", +"富淼科技": "stock", +"603848": "stock", +"好太太": "stock", +"688700": "stock", +"东威科技": "stock", +"603016": "stock", +"新宏泰": "stock", +"603856": "stock", +"东宏股份": "stock", +"002836": "stock", +"新宏泽": "stock", +"600302": "stock", +"标准股份": "stock", +"300289": "stock", +"利德曼": "stock", +"300592": "stock", +"华凯易佰": "stock", +"001267": "stock", +"汇绿生态": "stock", +"300567": "stock", +"精测电子": "stock", +"300343": "stock", +"联创股份": "stock", +"603112": "stock", +"华翔股份": "stock", +"000658": "stock", +"ST海洋": "stock", +"601086": "stock", +"国芳集团": "stock", +"300065": "stock", +"海兰信": "stock", +"300501": "stock", +"海顺新材": "stock", +"000004": "stock", +"国华网安": "stock", +"301116": "stock", +"益客食品": "stock", +"002572": "stock", +"索菲亚": "stock", +"688260": "stock", +"昀冢科技": "stock", +"300401": "stock", +"花园生物": "stock", +"300046": "stock", +"台基股份": "stock", +"603386": "stock", +"骏亚科技": "stock", +"688141": "stock", +"杰华特": "stock", +"001319": "stock", +"铭科精技": "stock", +"300087": "stock", +"荃银高科": "stock", +"600666": "stock", +"ST瑞德": "stock", +"300718": "stock", +"长盛轴承": "stock", +"688767": "stock", +"博拓生物": "stock", +"301503": "stock", +"智迪科技": "stock", +"002189": "stock", +"中光学": "stock", +"300886": "stock", +"华业香料": "stock", +"600832": "stock", +"东方明珠": "stock", +"002326": "stock", +"永太科技": "stock", +"688187": "stock", +"时代电气": "stock", +"832000": "stock", +"安徽凤凰": "stock", +"605003": "stock", +"众望布艺": "stock", +"300440": "stock", +"运达科技": "stock", +"000016": "stock", +"深康佳A": "stock", +"001229": "stock", +"魅视科技": "stock", +"300530": "stock", +"领湃科技": "stock", +"002950": "stock", +"奥美医疗": "stock", +"301393": "stock", +"昊帆生物": "stock", +"603786": "stock", +"科博达": "stock", +"000507": "stock", +"珠海港": "stock", +"300086": "stock", +"康芝药业": "stock", +"002979": "stock", +"雷赛智能": "stock", +"688126": "stock", +"沪硅产业": "stock", +"001223": "stock", +"欧克科技": "stock", +"603966": "stock", +"法兰泰克": "stock", +"300119": "stock", +"瑞普生物": "stock", +"301196": "stock", +"唯科科技": "stock", +"000570": "stock", +"苏常柴A": "stock", +"600076": "stock", +"康欣新材": "stock", +"688590": "stock", +"新致软件": "stock", +"300368": "stock", +"汇金股份": "stock", +"300933": "stock", +"中辰股份": "stock", +"000401": "stock", +"冀东水泥": "stock", +"603109": "stock", +"神驰机电": "stock", +"002930": "stock", +"宏川智慧": "stock", +"688496": "stock", +"清越科技": "stock", +"601880": "stock", +"辽港股份": "stock", +"300171": "stock", +"东富龙": "stock", +"688617": "stock", +"惠泰医疗": "stock", +"300487": "stock", +"蓝晓科技": "stock", +"600180": "stock", +"瑞茂通": "stock", +"688543": "stock", +"国科军工": "stock", +"300350": "stock", +"华鹏飞": "stock", +"000893": "stock", +"亚钾国际": "stock", +"301297": "stock", +"富乐德": "stock", +"688539": "stock", +"高华科技": "stock", +"002783": "stock", +"凯龙股份": "stock", +"838701": "stock", +"豪声电子": "stock", +"002594": "stock", +"比亚迪": "stock", +"688696": "stock", +"极米科技": "stock", +"000611": "stock", +"天首退": "stock", +"002915": "stock", +"中欣氟材": "stock", +"835185": "stock", +"贝特瑞": "stock", +"301331": "stock", +"恩威医药": "stock", +"002201": "stock", +"正威新材": "stock", +"000900": "stock", +"现代投资": "stock", +"301226": "stock", +"祥明智能": "stock", +"300029": "stock", +"ST天龙": "stock", +"002154": "stock", +"报喜鸟": "stock", +"605298": "stock", +"必得科技": "stock", +"002404": "stock", +"嘉欣丝绸": "stock", +"001201": "stock", +"东瑞股份": "stock", +"002646": "stock", +"天佑德酒": "stock", +"601677": "stock", +"明泰铝业": "stock", +"300533": "stock", +"冰川网络": "stock", +"300562": "stock", +"乐心医疗": "stock", +"688125": "stock", +"安达智能": "stock", +"600656": "stock", +"退市博元": "stock", +"002593": "stock", +"日上集团": "stock", +"000756": "stock", +"新华制药": "stock", +"000708": "stock", +"中信特钢": "stock", +"603353": "stock", +"和顺石油": "stock", +"002564": "stock", +"*ST天沃": "stock", +"600733": "stock", +"北汽蓝谷": "stock", +"300167": "stock", +"ST迪威迅": "stock", +"000699": "stock", +"S*ST佳纸": "stock", +"300393": "stock", +"中来股份": "stock", +"603200": "stock", +"上海洗霸": "stock", +"688800": "stock", +"瑞可达": "stock", +"600292": "stock", +"远达环保": "stock", +"002307": "stock", +"北新路桥": "stock", +"300935": "stock", +"盈建科": "stock", +"605058": "stock", +"澳弘电子": "stock", +"002656": "stock", +"ST摩登": "stock", +"301488": "stock", +"豪恩汽电": "stock", +"600137": "stock", +"浪莎股份": "stock", +"301372": "stock", +"科净源": "stock", +"001317": "stock", +"三羊马": "stock", +"300033": "stock", +"同花顺": "stock", +"600266": "stock", +"城建发展": "stock", +"301389": "stock", +"隆扬电子": "stock", +"688244": "stock", +"永信至诚": "stock", +"300375": "stock", +"鹏翎股份": "stock", +"001337": "stock", +"四川黄金": "stock", +"300151": "stock", +"昌红科技": "stock", +"002126": "stock", +"银轮股份": "stock", +"301182": "stock", +"凯旺科技": "stock", +"002635": "stock", +"安洁科技": "stock", +"000778": "stock", +"新兴铸管": "stock", +"600872": "stock", +"中炬高新": "stock", +"002798": "stock", +"帝欧家居": "stock", +"002629": "stock", +"仁智股份": "stock", +"300378": "stock", +"鼎捷软件": "stock", +"300045": "stock", +"华力创通": "stock", +"300123": "stock", +"亚光科技": "stock", +"300109": "stock", +"新开源": "stock", +"300969": "stock", +"恒帅股份": "stock", +"831856": "stock", +"浩淼科技": "stock", +"601238": "stock", +"广汽集团": "stock", +"301370": "stock", +"国科恒泰": "stock", +"000413": "stock", +"东旭光电": "stock", +"002286": "stock", +"保龄宝": "stock", +"688484": "stock", +"南芯科技": "stock", +"000509": "stock", +"华塑控股": "stock", +"603897": "stock", +"长城科技": "stock", +"000653": "stock", +"ST九州": "stock", +"300547": "stock", +"川环科技": "stock", +"300898": "stock", +"熊猫乳品": "stock", +"300525": "stock", +"博思软件": "stock", +"601360": "stock", +"三六零": "stock", +"601811": "stock", +"新华文轩": "stock", +"601568": "stock", +"北元集团": "stock", +"600963": "stock", +"岳阳林纸": "stock", +"872953": "stock", +"国子软件": "stock", +"600760": "stock", +"中航沈飞": "stock", +"000761": "stock", +"本钢板材": "stock", +"600981": "stock", +"汇鸿集团": "stock", +"603296": "stock", +"华勤技术": "stock", +"300669": "stock", +"沪宁股份": "stock", +"002020": "stock", +"京新药业": "stock", +"833751": "stock", +"惠同新材": "stock", +"603773": "stock", +"沃格光电": "stock", +"600480": "stock", +"凌云股份": "stock", +"300283": "stock", +"温州宏丰": "stock", +"301383": "stock", +"天键股份": "stock", +"300126": "stock", +"锐奇股份": "stock", +"002860": "stock", +"星帅尔": "stock", +"601101": "stock", +"昊华能源": "stock", +"603191": "stock", +"望变电气": "stock", +"605287": "stock", +"德才股份": "stock", +"002947": "stock", +"恒铭达": "stock", +"688228": "stock", +"开普云": "stock", +"688029": "stock", +"南微医学": "stock", +"000671": "stock", +"ST阳光城": "stock", +"600107": "stock", +"美尔雅": "stock", +"688621": "stock", +"阳光诺和": "stock", +"688270": "stock", +"臻镭科技": "stock", +"300958": "stock", +"建工修复": "stock", +"600488": "stock", +"津药药业": "stock", +"002194": "stock", +"武汉凡谷": "stock", +"300235": "stock", +"方直科技": "stock", +"688266": "stock", +"泽璟制药-U": "stock", +"300600": "stock", +"国瑞科技": "stock", +"000738": "stock", +"航发控制": "stock", +"300328": "stock", +"宜安科技": "stock", +"601021": "stock", +"春秋航空": "stock", +"000669": "stock", +"ST金鸿": "stock", +"300208": "stock", +"青岛中程": "stock", +"002165": "stock", +"红宝丽": "stock", +"002262": "stock", +"恩华药业": "stock", +"002781": "stock", +"奇信退": "stock", +"300745": "stock", +"欣锐科技": "stock", +"000545": "stock", +"金浦钛业": "stock", +"603339": "stock", +"四方科技": "stock", +"002451": "stock", +"摩恩电气": "stock", +"688618": "stock", +"三旺通信": "stock", +"002811": "stock", +"郑中设计": "stock", +"301246": "stock", +"宏源药业": "stock", +"688665": "stock", +"四方光电": "stock", +"300403": "stock", +"汉宇集团": "stock", +"301298": "stock", +"东利机械": "stock", +"688458": "stock", +"美芯晟": "stock", +"300810": "stock", +"中科海讯": "stock", +"301090": "stock", +"华润材料": "stock", +"300726": "stock", +"宏达电子": "stock", +"301418": "stock", +"协昌科技": "stock", +"600987": "stock", +"航民股份": "stock", +"603385": "stock", +"惠达卫浴": "stock", +"600329": "stock", +"达仁堂": "stock", +"300243": "stock", +"瑞丰高材": "stock", +"300306": "stock", +"远方信息": "stock", +"600824": "stock", +"益民集团": "stock", +"002488": "stock", +"金固股份": "stock", +"603036": "stock", +"如通股份": "stock", +"836892": "stock", +"广咨国际": "stock", +"300377": "stock", +"赢时胜": "stock", +"001979": "stock", +"招商蛇口": "stock", +"600416": "stock", +"湘电股份": "stock", +"002485": "stock", +"*ST雪发": "stock", +"000787": "stock", +"*ST创智": "stock", +"002992": "stock", +"宝明科技": "stock", +"600821": "stock", +"金开新能": "stock", +"603669": "stock", +"灵康药业": "stock", +"300435": "stock", +"中泰股份": "stock", +"603863": "stock", +"松炀资源": "stock", +"002963": "stock", +"豪尔赛": "stock", +"688003": "stock", +"天准科技": "stock", +"301382": "stock", +"蜂助手": "stock", +"301024": "stock", +"霍普股份": "stock", +"002211": "stock", +"宏达新材": "stock", +"688273": "stock", +"麦澜德": "stock", +"000982": "stock", +"中银绒业": "stock", +"300176": "stock", +"派生科技": "stock", +"002048": "stock", +"宁波华翔": "stock", +"002734": "stock", +"利民股份": "stock", +"688475": "stock", +"萤石网络": "stock", +"001367": "stock", +"海森药业": "stock", +"002110": "stock", +"三钢闽光": "stock", +"000836": "stock", +"富通信息": "stock", +"688185": "stock", +"康希诺": "stock", +"002003": "stock", +"伟星股份": "stock", +"002283": "stock", +"天润工业": "stock", +"002765": "stock", +"蓝黛科技": "stock", +"300107": "stock", +"建新股份": "stock", +"002454": "stock", +"松芝股份": "stock", +"000621": "stock", +"*ST比特": "stock", +"603078": "stock", +"江化微": "stock", +"603135": "stock", +"中重科技": "stock", +"603967": "stock", +"中创物流": "stock", +"600999": "stock", +"招商证券": "stock", +"600295": "stock", +"鄂尔多斯": "stock", +"688779": "stock", +"长远锂科": "stock", +"301091": "stock", +"深城交": "stock", +"603408": "stock", +"建霖家居": "stock", +"688281": "stock", +"华秦科技": "stock", +"300650": "stock", +"太龙股份": "stock", +"300258": "stock", +"精锻科技": "stock", +"300808": "stock", +"久量股份": "stock", +"300843": "stock", +"胜蓝股份": "stock", +"002281": "stock", +"光迅科技": "stock", +"600925": "stock", +"苏能股份": "stock", +"600735": "stock", +"新华锦": "stock", +"600026": "stock", +"中远海能": "stock", +"688265": "stock", +"南模生物": "stock", +"000911": "stock", +"南宁糖业": "stock", +"688349": "stock", +"三一重能": "stock", +"688148": "stock", +"芳源股份": "stock", +"000695": "stock", +"滨海能源": "stock", +"002113": "stock", +"*ST天润": "stock", +"603816": "stock", +"顾家家居": "stock", +"601599": "stock", +"浙文影业": "stock", +"603117": "stock", +"ST万林": "stock", +"002908": "stock", +"德生科技": "stock", +"002465": "stock", +"海格通信": "stock", +"300931": "stock", +"通用电梯": "stock", +"300049": "stock", +"福瑞股份": "stock", +"301395": "stock", +"仁信新材": "stock", +"600388": "stock", +"龙净环保": "stock", +"600346": "stock", +"恒力石化": "stock", +"600587": "stock", +"新华医疗": "stock", +"300951": "stock", +"博硕科技": "stock", +"301017": "stock", +"漱玉平民": "stock", +"873152": "stock", +"天宏锂电": "stock", +"603211": "stock", +"晋拓股份": "stock", +"832469": "stock", +"富恒新材": "stock", +"600793": "stock", +"宜宾纸业": "stock", +"000968": "stock", +"蓝焰控股": "stock", +"002351": "stock", +"漫步者": "stock", +"300057": "stock", +"万顺新材": "stock", +"603317": "stock", +"天味食品": "stock", +"300741": "stock", +"华宝股份": "stock", +"688288": "stock", +"鸿泉物联": "stock", +"301211": "stock", +"亨迪药业": "stock", +"870357": "stock", +"雅葆轩": "stock", +"688180": "stock", +"君实生物-U": "stock", +"301328": "stock", +"维峰电子": "stock", +"688786": "stock", +"悦安新材": "stock", +"601997": "stock", +"贵阳银行": "stock", +"002748": "stock", +"世龙实业": "stock", +"002853": "stock", +"皮阿诺": "stock", +"000735": "stock", +"罗牛山": "stock", +"603160": "stock", +"汇顶科技": "stock", +"600523": "stock", +"贵航股份": "stock", +"688549": "stock", +"中巨芯-U": "stock", +"603273": "stock", +"天元智能": "stock", +"688199": "stock", +"久日新材": "stock", +"300937": "stock", +"药易购": "stock", +"600370": "stock", +"三房巷": "stock", +"002444": "stock", +"巨星科技": "stock", +"688077": "stock", +"大地熊": "stock", +"002680": "stock", +"长生退": "stock", +"300787": "stock", +"海能实业": "stock", +"000683": "stock", +"远兴能源": "stock", +"600311": "stock", +"*ST荣华": "stock", +"688379": "stock", +"华光新材": "stock", +"300806": "stock", +"斯迪克": "stock", +"300210": "stock", +"森远股份": "stock", +"600955": "stock", +"维远股份": "stock", +"830799": "stock", +"艾融软件": "stock", +"002242": "stock", +"九阳股份": "stock", +"300824": "stock", +"北鼎股份": "stock", +"300104": "stock", +"乐视退": "stock", +"600695": "stock", +"退市绿庭": "stock", +"000760": "stock", +"斯太退": "stock", +"300995": "stock", +"奇德新材": "stock", +"600929": "stock", +"雪天盐业": "stock", +"300910": "stock", +"瑞丰新材": "stock", +"688355": "stock", +"明志科技": "stock", +"600546": "stock", +"山煤国际": "stock", +"300369": "stock", +"绿盟科技": "stock", +"301119": "stock", +"正强股份": "stock", +"603687": "stock", +"大胜达": "stock", +"688619": "stock", +"罗普特": "stock", +"831370": "stock", +"新安洁": "stock", +"601778": "stock", +"晶科科技": "stock", +"301076": "stock", +"新瀚新材": "stock", +"601117": "stock", +"中国化学": "stock", +"000561": "stock", +"烽火电子": "stock", +"002050": "stock", +"三花智控": "stock", +"688799": "stock", +"华纳药厂": "stock", +"002343": "stock", +"慈文传媒": "stock", +"000060": "stock", +"中金岭南": "stock", +"002421": "stock", +"达实智能": "stock", +"300075": "stock", +"数字政通": "stock", +"002975": "stock", +"博杰股份": "stock", +"301368": "stock", +"丰立智能": "stock", +"600709": "stock", +"ST生态": "stock", +"300182": "stock", +"捷成股份": "stock", +"603162": "stock", +"海通发展": "stock", +"300539": "stock", +"横河精密": "stock", +"000576": "stock", +"甘化科工": "stock", +"603650": "stock", +"彤程新材": "stock", +"300237": "stock", +"美晨生态": "stock", +"600070": "stock", +"ST富润": "stock", +"600253": "stock", +"天方药业": "stock", +"600829": "stock", +"人民同泰": "stock", +"600459": "stock", +"贵研铂业": "stock", +"603021": "stock", +"山东华鹏": "stock", +"601038": "stock", +"一拖股份": "stock", +"603076": "stock", +"乐惠国际": "stock", +"600263": "stock", +"路桥建设": "stock", +"688293": "stock", +"XD奥浦迈": "stock", +"002247": "stock", +"聚力文化": "stock", +"300746": "stock", +"汉嘉设计": "stock", +"600799": "stock", +"*ST龙科": "stock", +"600093": "stock", +"退市易见": "stock", +"300291": "stock", +"百纳千成": "stock", +"688258": "stock", +"卓易信息": "stock", +"300550": "stock", +"和仁科技": "stock", +"300256": "stock", +"星星科技": "stock", +"300586": "stock", +"美联新材": "stock", +"300154": "stock", +"瑞凌股份": "stock", +"600297": "stock", +"广汇汽车": "stock", +"002087": "stock", +"*ST新纺": "stock", +"836208": "stock", +"青矩技术": "stock", +"301235": "stock", +"华康医疗": "stock", +"002531": "stock", +"天顺风能": "stock", +"603173": "stock", +"福斯达": "stock", +"000795": "stock", +"英洛华": "stock", +"300358": "stock", +"楚天科技": "stock", +"300641": "stock", +"正丹股份": "stock", +"000888": "stock", +"峨眉山A": "stock", +"301139": "stock", +"元道通信": "stock", +"300994": "stock", +"久祺股份": "stock", +"300205": "stock", +"天喻信息": "stock", +"000426": "stock", +"兴业银锡": "stock", +"430300": "stock", +"辰光医疗": "stock", +"600549": "stock", +"厦门钨业": "stock", +"603225": "stock", +"新凤鸣": "stock", +"002241": "stock", +"歌尔股份": "stock", +"301332": "stock", +"德尔玛": "stock", +"300520": "stock", +"科大国创": "stock", +"000021": "stock", +"深科技": "stock", +"603332": "stock", +"苏州龙杰": "stock", +"600883": "stock", +"博闻科技": "stock", +"600751": "stock", +"海航科技": "stock", +"300920": "stock", +"润阳科技": "stock", +"002926": "stock", +"华西证券": "stock", +"300553": "stock", +"集智股份": "stock", +"300563": "stock", +"神宇股份": "stock", +"300288": "stock", +"朗玛信息": "stock", +"300998": "stock", +"宁波方正": "stock", +"301353": "stock", +"普莱得": "stock", +"300134": "stock", +"大富科技": "stock", +"001228": "stock", +"永泰运": "stock", +"603637": "stock", +"镇海股份": "stock", +"300222": "stock", +"科大智能": "stock", +"603889": "stock", +"新澳股份": "stock", +"688162": "stock", +"巨一科技": "stock", +"603679": "stock", +"华体科技": "stock", +"603489": "stock", +"八方股份": "stock", +"300371": "stock", +"汇中股份": "stock", +"300278": "stock", +"华昌达": "stock", +"688318": "stock", +"财富趋势": "stock", +"688068": "stock", +"热景生物": "stock", +"000655": "stock", +"金岭矿业": "stock", +"002670": "stock", +"国盛金控": "stock", +"300972": "stock", +"万辰集团": "stock", +"603137": "stock", +"恒尚节能": "stock", +"603057": "stock", +"紫燕食品": "stock", +"600408": "stock", +"安泰集团": "stock", +"300004": "stock", +"南风股份": "stock", +"300410": "stock", +"正业科技": "stock", +"839790": "stock", +"联迪信息": "stock", +"002157": "stock", +"*ST": "stock", +"002347": "stock", +"泰尔股份": "stock", +"601996": "stock", +"丰林集团": "stock", +"002557": "stock", +"洽洽食品": "stock", +"600632": "stock", +"华联商厦": "stock", +"002936": "stock", +"郑州银行": "stock", +"300002": "stock", +"神州泰岳": "stock", +"601133": "stock", +"柏诚股份": "stock", +"600961": "stock", +"株冶集团": "stock", +"301262": "stock", +"海看股份": "stock", +"605319": "stock", +"无锡振华": "stock", +"300542": "stock", +"新晨科技": "stock", +"688335": "stock", +"复洁环保": "stock", +"688237": "stock", +"超卓航科": "stock", +"603738": "stock", +"泰晶科技": "stock", +"601619": "stock", +"嘉泽新能": "stock", +"002344": "stock", +"海宁皮城": "stock", +"831010": "stock", +"凯添燃气": "stock", +"603103": "stock", +"横店影视": "stock", +"601208": "stock", +"东材科技": "stock", +"603878": "stock", +"武进不锈": "stock", +"603665": "stock", +"康隆达": "stock", +"601002": "stock", +"晋亿实业": "stock", +"002527": "stock", +"新时达": "stock", +"603444": "stock", +"吉比特": "stock", +"605377": "stock", +"华旺科技": "stock", +"001266": "stock", +"宏英智能": "stock", +"001258": "stock", +"立新能源": "stock", +"600472": "stock", +"包头铝业": "stock", +"600067": "stock", +"冠城大通": "stock", +"603970": "stock", +"中农立华": "stock", +"002490": "stock", +"山东墨龙": "stock", +"834682": "stock", +"球冠电缆": "stock", +"603380": "stock", +"易德龙": "stock", +"002150": "stock", +"通润装备": "stock", +"000040": "stock", +"东旭蓝天": "stock", +"300218": "stock", +"安利股份": "stock", +"300921": "stock", +"南凌科技": "stock", +"002036": "stock", +"联创电子": "stock", +"002554": "stock", +"惠博普": "stock", +"300216": "stock", +"千山退": "stock", +"002861": "stock", +"瀛通通讯": "stock", +"600583": "stock", +"海油工程": "stock", +"603777": "stock", +"来伊份": "stock", +"001323": "stock", +"慕思股份": "stock", +"688019": "stock", +"安集科技": "stock", +"002969": "stock", +"嘉美包装": "stock", +"002024": "stock", +"ST易购": "stock", +"002589": "stock", +"瑞康医药": "stock", +"688533": "stock", +"上声电子": "stock", +"603759": "stock", +"海天股份": "stock", +"300975": "stock", +"商络电子": "stock", +"688038": "stock", +"中科通达": "stock", +"000819": "stock", +"岳阳兴长": "stock", +"600050": "stock", +"中国联通": "stock", +"000589": "stock", +"贵州轮胎": "stock", +"600754": "stock", +"锦江酒店": "stock", +"600426": "stock", +"华鲁恒升": "stock", +"600804": "stock", +"ST鹏博士": "stock", +"002673": "stock", +"西部证券": "stock", +"600159": "stock", +"大龙地产": "stock", +"003029": "stock", +"吉大正元": "stock", +"839792": "stock", +"东和新材": "stock", +"300770": "stock", +"新媒股份": "stock", +"001202": "stock", +"炬申股份": "stock", +"301291": "stock", +"明阳电气": "stock", +"301199": "stock", +"迈赫股份": "stock", +"300956": "stock", +"英力股份": "stock", +"688163": "stock", +"赛伦生物": "stock", +"600927": "stock", +"永安期货": "stock", +"000613": "stock", +"东海A退": "stock", +"688055": "stock", +"龙腾光电": "stock", +"603918": "stock", +"金桥信息": "stock", +"600909": "stock", +"华安证券": "stock", +"002699": "stock", +"*ST美盛": "stock", +"002053": "stock", +"云南能投": "stock", +"300635": "stock", +"中达安": "stock", +"002896": "stock", +"中大力德": "stock", +"000011": "stock", +"深物业A": "stock", +"301032": "stock", +"新柴股份": "stock", +"688011": "stock", +"新光光电": "stock", +"002669": "stock", +"康达新材": "stock", +"300200": "stock", +"高盟新材": "stock", +"002365": "stock", +"永安药业": "stock", +"301248": "stock", +"杰创智能": "stock", +"002360": "stock", +"同德化工": "stock", +"600868": "stock", +"梅雁吉祥": "stock", +"688383": "stock", +"新益昌": "stock", +"600767": "stock", +"退市运盛": "stock", +"002550": "stock", +"千红制药": "stock", +"601555": "stock", +"东吴证券": "stock", +"603236": "stock", +"移远通信": "stock", +"002790": "stock", +"瑞尔特": "stock", +"605389": "stock", +"长龄液压": "stock", +"300270": "stock", +"中威电子": "stock", +"600796": "stock", +"钱江生化": "stock", +"688327": "stock", +"云从科技-UW": "stock", +"001259": "stock", +"利仁科技": "stock", +"688361": "stock", +"中科飞测-U": "stock", +"605318": "stock", +"法狮龙": "stock", +"002822": "stock", +"中装建设": "stock", +"688631": "stock", +"莱斯信息": "stock", +"300758": "stock", +"七彩化学": "stock", +"000158": "stock", +"常山北明": "stock", +"002599": "stock", +"盛通股份": "stock", +"603557": "stock", +"ST起步": "stock", +"301325": "stock", +"曼恩斯特": "stock", +"001210": "stock", +"金房能源": "stock", +"002837": "stock", +"英维克": "stock", +"600555": "stock", +"退市海创": "stock", +"603929": "stock", +"亚翔集成": "stock", +"600643": "stock", +"爱建集团": "stock", +"000587": "stock", +"*ST金洲": "stock", +"603636": "stock", +"南威软件": "stock", +"002435": "stock", +"长江健康": "stock", +"600257": "stock", +"大湖股份": "stock", +"002939": "stock", +"长城证券": "stock", +"600332": "stock", +"白云山": "stock", +"002689": "stock", +"远大智能": "stock", +"600547": "stock", +"山东黄金": "stock", +"301323": "stock", +"新莱福": "stock", +"603887": "stock", +"城地香江": "stock", +"300017": "stock", +"网宿科技": "stock", +"601006": "stock", +"大秦铁路": "stock", +"600483": "stock", +"福能股份": "stock", +"300594": "stock", +"朗进科技": "stock", +"300576": "stock", +"容大感光": "stock", +"002249": "stock", +"大洋电机": "stock", +"688283": "stock", +"坤恒顺维": "stock", +"603165": "stock", +"荣晟环保": "stock", +"301533": "stock", +"威马农机": "stock", +"688789": "stock", +"宏华数科": "stock", +"605499": "stock", +"东鹏饮料": "stock", +"600191": "stock", +"华资实业": "stock", +"300213": "stock", +"佳讯飞鸿": "stock", +"300385": "stock", +"雪浪环境": "stock", +"003004": "stock", +"声迅股份": "stock", +"688022": "stock", +"瀚川智能": "stock", +"002865": "stock", +"钧达股份": "stock", +"002585": "stock", +"双星新材": "stock", +"600725": "stock", +"云维股份": "stock", +"600401": "stock", +"退市海润": "stock", +"832978": "stock", +"开特股份": "stock", +"603196": "stock", +"日播时尚": "stock", +"605268": "stock", +"王力安防": "stock", +"002481": "stock", +"双塔食品": "stock", +"000876": "stock", +"新希望": "stock", +"300423": "stock", +"昇辉科技": "stock", +"300455": "stock", +"航天智装": "stock", +"300106": "stock", +"西部牧业": "stock", +"600696": "stock", +"岩石股份": "stock", +"688102": "stock", +"斯瑞新材": "stock", +"601577": "stock", +"长沙银行": "stock", +"002546": "stock", +"新联电子": "stock", +"300588": "stock", +"熙菱信息": "stock", +"600101": "stock", +"明星电力": "stock", +"000419": "stock", +"通程控股": "stock", +"600622": "stock", +"光大嘉宝": "stock", +"688480": "stock", +"赛恩斯": "stock", +"600121": "stock", +"郑州煤电": "stock", +"300636": "stock", +"同和药业": "stock", +"000730": "stock", +"*ST环保": "stock", +"300521": "stock", +"爱司凯": "stock", +"601991": "stock", +"大唐发电": "stock", +"300472": "stock", +"新元科技": "stock", +"300616": "stock", +"尚品宅配": "stock", +"603267": "stock", +"鸿远电子": "stock", +"002607": "stock", +"中公教育": "stock", +"300515": "stock", +"三德科技": "stock", +"870508": "stock", +"丰安股份": "stock", +"603022": "stock", +"新通联": "stock", +"301486": "stock", +"致尚科技": "stock", +"601827": "stock", +"三峰环境": "stock", +"002495": "stock", +"佳隆股份": "stock", +"300225": "stock", +"金力泰": "stock", +"300413": "stock", +"芒果超媒": "stock", +"300331": "stock", +"苏大维格": "stock", +"002361": "stock", +"神剑股份": "stock", +"002997": "stock", +"瑞鹄模具": "stock", +"600335": "stock", +"国机汽车": "stock", +"871970": "stock", +"大禹生物": "stock", +"002595": "stock", +"豪迈科技": "stock", +"430017": "stock", +"星昊医药": "stock", +"003023": "stock", +"彩虹集团": "stock", +"605577": "stock", +"龙版传媒": "stock", +"688238": "stock", +"和元生物": "stock", +"300952": "stock", +"恒辉安防": "stock", +"603603": "stock", +"*ST博天": "stock", +"301358": "stock", +"湖南裕能": "stock", +"300756": "stock", +"金马游乐": "stock", +"600630": "stock", +"龙头股份": "stock", +"301133": "stock", +"金钟股份": "stock", +"605123": "stock", +"派克新材": "stock", +"300890": "stock", +"翔丰华": "stock", +"603987": "stock", +"康德莱": "stock", +"300006": "stock", +"莱美药业": "stock", +"603917": "stock", +"合力科技": "stock", +"002041": "stock", +"登海种业": "stock", +"000798": "stock", +"中水渔业": "stock", +"001311": "stock", +"多利科技": "stock", +"301031": "stock", +"中熔电气": "stock", +"002478": "stock", +"常宝股份": "stock", +"000898": "stock", +"鞍钢股份": "stock", +"600579": "stock", +"克劳斯": "stock", +"300742": "stock", +"*ST越博": "stock", +"603013": "stock", +"亚普股份": "stock", +"603602": "stock", +"纵横通信": "stock", +"001380": "stock", +"华纬科技": "stock", +"000796": "stock", +"*ST凯撒": "stock", +"300552": "stock", +"万集科技": "stock", +"300432": "stock", +"富临精工": "stock", +"603090": "stock", +"宏盛股份": "stock", +"002300": "stock", +"太阳电缆": "stock", +"000601": "stock", +"韶能股份": "stock", +"871634": "stock", +"新威凌": "stock", +"600895": "stock", +"张江高科": "stock", +"688603": "stock", +"天承科技": "stock", +"300978": "stock", +"东箭科技": "stock", +"600061": "stock", +"国投资本": "stock", +"002387": "stock", +"维信诺": "stock", +"300269": "stock", +"联建光电": "stock", +"873001": "stock", +"纬达光电": "stock", +"301110": "stock", +"青木股份": "stock", +"832145": "stock", +"恒合股份": "stock", +"688358": "stock", +"祥生医疗": "stock", +"600719": "stock", +"大连热电": "stock", +"601567": "stock", +"三星医疗": "stock", +"831906": "stock", +"舜宇精工": "stock", +"603843": "stock", +"正平股份": "stock", +"688063": "stock", +"派能科技": "stock", +"600089": "stock", +"特变电工": "stock", +"002817": "stock", +"黄山胶囊": "stock", +"688377": "stock", +"迪威尔": "stock", +"688517": "stock", +"金冠电气": "stock", +"300489": "stock", +"光智科技": "stock", +"002223": "stock", +"鱼跃医疗": "stock", +"603085": "stock", +"天成自控": "stock", +"300340": "stock", +"科恒股份": "stock", +"603029": "stock", +"天鹅股份": "stock", +"002112": "stock", +"三变科技": "stock", +"301319": "stock", +"唯特偶": "stock", +"600576": "stock", +"祥源文旅": "stock", +"603305": "stock", +"旭升集团": "stock", +"300334": "stock", +"津膜科技": "stock", +"839680": "stock", +"广道数字": "stock", +"300724": "stock", +"捷佳伟创": "stock", +"688113": "stock", +"联测科技": "stock", +"300838": "stock", +"浙江力诺": "stock", +"002818": "stock", +"富森美": "stock", +"000565": "stock", +"渝三峡A": "stock", +"600702": "stock", +"舍得酒业": "stock", +"600178": "stock", +"东安动力": "stock", +"832175": "stock", +"东方碳素": "stock", +"688570": "stock", +"天玛智控": "stock", +"600325": "stock", +"华发股份": "stock", +"002897": "stock", +"意华股份": "stock", +"300039": "stock", +"上海凯宝": "stock", +"688053": "stock", +"思科瑞": "stock", +"002728": "stock", +"特一药业": "stock", +"688150": "stock", +"莱特光电": "stock", +"600333": "stock", +"长春燃气": "stock", +"003017": "stock", +"大洋生物": "stock", +"002121": "stock", +"科陆电子": "stock", +"002124": "stock", +"天邦食品": "stock", +"301439": "stock", +"泓淋电力": "stock", +"603416": "stock", +"信捷电气": "stock", +"601872": "stock", +"招商轮船": "stock", +"601788": "stock", +"光大证券": "stock", +"600782": "stock", +"新钢股份": "stock", +"300760": "stock", +"迈瑞医疗": "stock", +"002256": "stock", +"兆新股份": "stock", +"601011": "stock", +"宝泰隆": "stock", +"600157": "stock", +"永泰能源": "stock", +"301111": "stock", +"粤万年青": "stock", +"000692": "stock", +"*ST惠天": "stock", +"002173": "stock", +"创新医疗": "stock", +"601689": "stock", +"拓普集团": "stock", +"601330": "stock", +"绿色动力": "stock", +"000987": "stock", +"越秀资本": "stock", +"300772": "stock", +"运达股份": "stock", +"600810": "stock", +"神马股份": "stock", +"002188": "stock", +"中天服务": "stock", +"603619": "stock", +"中曼石油": "stock", +"300626": "stock", +"华瑞股份": "stock", +"600338": "stock", +"西藏珠峰": "stock", +"600485": "stock", +"*ST信威": "stock", +"000633": "stock", +"合金投资": "stock", +"300298": "stock", +"三诺生物": "stock", +"600896": "stock", +"退市海医": "stock", +"002309": "stock", +"ST中利": "stock", +"605188": "stock", +"国光连锁": "stock", +"603755": "stock", +"日辰股份": "stock", +"002762": "stock", +"金发拉比": "stock", +"002026": "stock", +"山东威达": "stock", +"600533": "stock", +"栖霞建设": "stock", +"301077": "stock", +"星华新材": "stock", +"000301": "stock", +"东方盛虹": "stock", +"300575": "stock", +"中旗股份": "stock", +"688280": "stock", +"精进电动-UW": "stock", +"600215": "stock", +"派斯林": "stock", +"300166": "stock", +"东方国信": "stock", +"603958": "stock", +"哈森股份": "stock", +"000810": "stock", +"创维数字": "stock", +"839719": "stock", +"宁新新材": "stock", +"603568": "stock", +"伟明环保": "stock", +"000677": "stock", +"恒天海龙": "stock", +"603309": "stock", +"维力医疗": "stock", +"688443": "stock", +"智翔金泰-U": "stock", +"600822": "stock", +"上海物贸": "stock", +"001331": "stock", +"胜通能源": "stock", +"300612": "stock", +"宣亚国际": "stock", +"000554": "stock", +"泰山石油": "stock", +"688330": "stock", +"宏力达": "stock", +"300349": "stock", +"金卡智能": "stock", +"600020": "stock", +"中原高速": "stock", +"430198": "stock", +"微创光电": "stock", +"300849": "stock", +"锦盛新材": "stock", +"300971": "stock", +"博亚精工": "stock", +"603828": "stock", +"柯利达": "stock", +"833427": "stock", +"华维设计": "stock", +"688651": "stock", +"盛邦安全": "stock", +"300027": "stock", +"华谊兄弟": "stock", +"600125": "stock", +"铁龙物流": "stock", +"301187": "stock", +"欧圣电气": "stock", +"001234": "stock", +"泰慕士": "stock", +"688143": "stock", +"长盈通": "stock", +"002324": "stock", +"普利特": "stock", +"600418": "stock", +"江淮汽车": "stock", +"300983": "stock", +"尤安设计": "stock", +"600882": "stock", +"妙可蓝多": "stock", +"301327": "stock", +"华宝新能": "stock", +"300907": "stock", +"康平科技": "stock", +"600460": "stock", +"士兰微": "stock", +"300872": "stock", +"天阳科技": "stock", +"600268": "stock", +"国电南自": "stock", +"605300": "stock", +"佳禾食品": "stock", +"301367": "stock", +"怡和嘉业": "stock", +"000680": "stock", +"山推股份": "stock", +"688626": "stock", +"翔宇医疗": "stock", +"837344": "stock", +"三元基因": "stock", +"300584": "stock", +"海辰药业": "stock", +"688290": "stock", +"景业智能": "stock", +"600493": "stock", +"凤竹纺织": "stock", +"688520": "stock", +"神州细胞-U": "stock", +"600305": "stock", +"恒顺醋业": "stock", +"001282": "stock", +"三联锻造": "stock", +"301309": "stock", +"万得凯": "stock", +"002005": "stock", +"ST德豪": "stock", +"001339": "stock", +"智微智能": "stock", +"603026": "stock", +"胜华新材": "stock", +"600380": "stock", +"健康元": "stock", +"300761": "stock", +"立华股份": "stock", +"300238": "stock", +"冠昊生物": "stock", +"600740": "stock", +"山西焦化": "stock", +"300395": "stock", +"菲利华": "stock", +"000757": "stock", +"浩物股份": "stock", +"002968": "stock", +"新大正": "stock", +"002174": "stock", +"游族网络": "stock", +"603712": "stock", +"七一二": "stock", +"600429": "stock", +"三元股份": "stock", +"002740": "stock", +"*ST爱迪": "stock", +"688456": "stock", +"有研粉材": "stock", +"600843": "stock", +"上工申贝": "stock", +"600855": "stock", +"航天长峰": "stock", +"601163": "stock", +"三角轮胎": "stock", +"601882": "stock", +"海天精工": "stock", +"300922": "stock", +"天秦装备": "stock", +"688073": "stock", +"毕得医药": "stock", +"688016": "stock", +"心脉医疗": "stock", +"603282": "stock", +"亚光股份": "stock", +"300211": "stock", +"亿通科技": "stock", +"301193": "stock", +"家联科技": "stock", +"430090": "stock", +"同辉信息": "stock", +"688419": "stock", +"耐科装备": "stock", +"603528": "stock", +"多伦科技": "stock", +"002270": "stock", +"华明装备": "stock", +"600187": "stock", +"国中水务": "stock", +"603272": "stock", +"联翔股份": "stock", +"000037": "stock", +"深南电A": "stock", +"002604": "stock", +"龙力退": "stock", +"002228": "stock", +"合兴包装": "stock", +"603538": "stock", +"美诺华": "stock", +"603980": "stock", +"吉华集团": "stock", +"000569": "stock", +"长城股份": "stock", +"300862": "stock", +"蓝盾光电": "stock", +"000975": "stock", +"银泰黄金": "stock", +"600567": "stock", +"山鹰国际": "stock", +"002477": "stock", +"雏鹰退": "stock", +"002406": "stock", +"远东传动": "stock", +"300804": "stock", +"广康生化": "stock", +"300774": "stock", +"倍杰特": "stock", +"002427": "stock", +"尤夫股份": "stock", +"301081": "stock", +"严牌股份": "stock", +"870199": "stock", +"倍益康": "stock", +"300416": "stock", +"苏试试验": "stock", +"603567": "stock", +"珍宝岛": "stock", +"002132": "stock", +"恒星科技": "stock", +"002832": "stock", +"比音勒芬": "stock", +"300613": "stock", +"富瀚微": "stock", +"836717": "stock", +"瑞星股份": "stock", +"603237": "stock", +"五芳斋": "stock", +"002434": "stock", +"万里扬": "stock", +"603050": "stock", +"科林电气": "stock", +"600081": "stock", +"东风科技": "stock", +"000788": "stock", +"北大医药": "stock", +"601512": "stock", +"中新集团": "stock", +"300583": "stock", +"赛托生物": "stock", +"600998": "stock", +"九州通": "stock", +"000571": "stock", +"新大洲A": "stock", +"003027": "stock", +"同兴环保": "stock", +"002551": "stock", +"尚荣医疗": "stock", +"002049": "stock", +"紫光国微": "stock", +"300948": "stock", +"冠中生态": "stock", +"600507": "stock", +"方大特钢": "stock", +"688500": "stock", +"*ST慧辰": "stock", +"300316": "stock", +"晶盛机电": "stock", +"601121": "stock", +"宝地矿业": "stock", +"688486": "stock", +"龙迅股份": "stock", +"000055": "stock", +"方大集团": "stock", +"002668": "stock", +"奥马电器": "stock", +"300749": "stock", +"顶固集创": "stock", +"836942": "stock", +"恒立钻具": "stock", +"871245": "stock", +"威博液压": "stock", +"601126": "stock", +"四方股份": "stock", +"837046": "stock", +"亿能电力": "stock", +"600326": "stock", +"西藏天路": "stock", +"000726": "stock", +"鲁泰A": "stock", +"002218": "stock", +"拓日新能": "stock", +"300262": "stock", +"巴安水务": "stock", +"000822": "stock", +"山东海化": "stock", +"300674": "stock", +"宇信科技": "stock", +"603176": "stock", +"汇通集团": "stock", +"834014": "stock", +"特瑞斯": "stock", +"000555": "stock", +"神州信息": "stock", +"002424": "stock", +"贵州百灵": "stock", +"600353": "stock", +"旭光电子": "stock", +"601366": "stock", +"利群股份": "stock", +"603920": "stock", +"世运电路": "stock", +"688291": "stock", +"金橙子": "stock", +"688658": "stock", +"悦康药业": "stock", +"000581": "stock", +"威孚高科": "stock", +"300619": "stock", +"金银河": "stock", +"688489": "stock", +"三未信安": "stock", +"600060": "stock", +"海信视像": "stock", +"688037": "stock", +"芯源微": "stock", +"601728": "stock", +"中国电信": "stock", +"300390": "stock", +"天华新能": "stock", +"000848": "stock", +"承德露露": "stock", +"301345": "stock", +"涛涛车业": "stock", +"600561": "stock", +"江西长运": "stock", +"603297": "stock", +"永新光学": "stock", +"600048": "stock", +"保利发展": "stock", +"000736": "stock", +"中交地产": "stock", +"301371": "stock", +"敷尔佳": "stock", +"688329": "stock", +"艾隆科技": "stock", +"300677": "stock", +"英科医疗": "stock", +"430425": "stock", +"乐创技术": "stock", +"688313": "stock", +"仕佳光子": "stock", +"688602": "stock", +"康鹏科技": "stock", +"688360": "stock", +"德马科技": "stock", +"002422": "stock", +"科伦药业": "stock", +"301233": "stock", +"盛帮股份": "stock", +"688623": "stock", +"双元科技": "stock", +"000620": "stock", +"*ST新联": "stock", +"831152": "stock", +"昆工科技": "stock", +"603516": "stock", +"淳中科技": "stock", +"605133": "stock", +"嵘泰股份": "stock", +"300010": "stock", +"*ST豆神": "stock", +"688005": "stock", +"容百科技": "stock", +"002440": "stock", +"闰土股份": "stock", +"002097": "stock", +"山河智能": "stock", +"601299": "stock", +"中国北车": "stock", +"300173": "stock", +"福能东方": "stock", +"688155": "stock", +"先惠技术": "stock", +"300826": "stock", +"测绘股份": "stock", +"000007": "stock", +"*ST全新": "stock", +"002114": "stock", +"罗平锌电": "stock", +"002418": "stock", +"康盛股份": "stock", +"300893": "stock", +"松原股份": "stock", +"600970": "stock", +"中材国际": "stock", +"688356": "stock", +"键凯科技": "stock", +"000875": "stock", +"吉电股份": "stock", +"300866": "stock", +"安克创新": "stock", +"301397": "stock", +"溯联股份": "stock", +"301388": "stock", +"欣灵电气": "stock", +"003025": "stock", +"思进智能": "stock", +"300885": "stock", +"海昌新材": "stock", +"300117": "stock", +"嘉寓股份": "stock", +"600405": "stock", +"动力源": "stock", +"001896": "stock", +"豫能控股": "stock", +"601665": "stock", +"齐鲁银行": "stock", +"300085": "stock", +"银之杰": "stock", +"603893": "stock", +"瑞芯微": "stock", +"000513": "stock", +"丽珠集团": "stock", +"603757": "stock", +"大元泵业": "stock", +"688216": "stock", +"气派科技": "stock", +"603233": "stock", +"大参林": "stock", +"603888": "stock", +"新华网": "stock", +"301313": "stock", +"凡拓数创": "stock", +"300697": "stock", +"电工合金": "stock", +"300492": "stock", +"华图山鼎": "stock", +"688107": "stock", +"安路科技": "stock", +"002269": "stock", +"美邦服饰": "stock", +"603259": "stock", +"药明康德": "stock", +"300460": "stock", +"惠伦晶体": "stock", +"603169": "stock", +"兰石重装": "stock", +"605196": "stock", +"华通线缆": "stock", +"688575": "stock", +"亚辉龙": "stock", +"600990": "stock", +"四创电子": "stock", +"688535": "stock", +"华海诚科": "stock", +"002224": "stock", +"三力士": "stock", +"300830": "stock", +"金现代": "stock", +"000985": "stock", +"大庆华科": "stock", +"300321": "stock", +"同大股份": "stock", +"688737": "stock", +"中自科技": "stock", +"002906": "stock", +"华阳集团": "stock", +"002708": "stock", +"光洋股份": "stock", +"000958": "stock", +"电投产融": "stock", +"000096": "stock", +"广聚能源": "stock", +"300599": "stock", +"雄塑科技": "stock", +"603501": "stock", +"韦尔股份": "stock", +"300771": "stock", +"智莱科技": "stock", +"603289": "stock", +"泰瑞机器": "stock", +"688078": "stock", +"龙软科技": "stock", +"001208": "stock", +"华菱线缆": "stock", +"000528": "stock", +"柳工": "stock", +"600139": "stock", +"*ST西源": "stock", +"603668": "stock", +"XD天马科": "stock", +"601995": "stock", +"中金公司": "stock", +"600129": "stock", +"太极集团": "stock", +"600703": "stock", +"三安光电": "stock", +"301209": "stock", +"联合化学": "stock", +"600969": "stock", +"郴电国际": "stock", +"300640": "stock", +"德艺文创": "stock", +"001206": "stock", +"依依股份": "stock", +"600734": "stock", +"ST实达": "stock", +"300555": "stock", +"ST路通": "stock", +"002640": "stock", +"跨境通": "stock", +"600525": "stock", +"长园集团": "stock", +"301507": "stock", +"民生健康": "stock", +"688403": "stock", +"汇成股份": "stock", +"000506": "stock", +"中润资源": "stock", +"600807": "stock", +"济南高新": "stock", +"600601": "stock", +"方正科技": "stock", +"688567": "stock", +"孚能科技": "stock", +"000950": "stock", +"重药控股": "stock", +"688096": "stock", +"京源环保": "stock", +"002159": "stock", +"三特索道": "stock", +"605098": "stock", +"行动教育": "stock", +"605116": "stock", +"奥锐特": "stock", +"600956": "stock", +"新天绿能": "stock", +"836675": "stock", +"秉扬科技": "stock", +"002610": "stock", +"爱康科技": "stock", +"300947": "stock", +"德必集团": "stock", +"688017": "stock", +"绿的谐波": "stock", +"834475": "stock", +"三友科技": "stock", +"603298": "stock", +"杭叉集团": "stock", +"300977": "stock", +"深圳瑞捷": "stock", +"300930": "stock", +"屹通新材": "stock", +"002683": "stock", +"广东宏大": "stock", +"300310": "stock", +"宜通世纪": "stock", +"000017": "stock", +"深中华A": "stock", +"300072": "stock", +"海新能科": "stock", +"000989": "stock", +"九芝堂": "stock", +"601727": "stock", +"上海电气": "stock", +"688075": "stock", +"安旭生物": "stock", +"002405": "stock", +"四维图新": "stock", +"300658": "stock", +"延江股份": "stock", +"300807": "stock", +"天迈科技": "stock", +"603588": "stock", +"高能环境": "stock", +"002403": "stock", +"爱仕达": "stock", +"603899": "stock", +"晨光股份": "stock", +"000938": "stock", +"紫光股份": "stock", +"688008": "stock", +"澜起科技": "stock", +"300032": "stock", +"金龙机电": "stock", +"600605": "stock", +"汇通能源": "stock", +"301078": "stock", +"孩子王": "stock", +"002957": "stock", +"科瑞技术": "stock", +"300399": "stock", +"天利科技": "stock", +"300009": "stock", +"安科生物": "stock", +"000723": "stock", +"美锦能源": "stock", +"605068": "stock", +"明新旭腾": "stock", +"002472": "stock", +"双环传动": "stock", +"002458": "stock", +"益生股份": "stock", +"601065": "stock", +"江盐集团": "stock", +"600624": "stock", +"复旦复华": "stock", +"600449": "stock", +"宁夏建材": "stock", +"688432": "stock", +"有研硅": "stock", +"002231": "stock", +"奥维通信": "stock", +"301191": "stock", +"菲菱科思": "stock", +"603698": "stock", +"航天工程": "stock", +"300783": "stock", +"三只松鼠": "stock", +"600228": "stock", +"返利科技": "stock", +"603121": "stock", +"华培动力": "stock", +"002586": "stock", +"*ST围海": "stock", +"002807": "stock", +"江阴银行": "stock", +"000673": "stock", +"当代退": "stock", +"600448": "stock", +"华纺股份": "stock", +"603215": "stock", +"比依股份": "stock", +"600502": "stock", +"安徽建工": "stock", +"300660": "stock", +"江苏雷利": "stock", +"600111": "stock", +"北方稀土": "stock", +"002437": "stock", +"誉衡药业": "stock", +"000858": "stock", +"五粮液": "stock", +"002461": "stock", +"珠江啤酒": "stock", +"300558": "stock", +"贝达药业": "stock", +"300902": "stock", +"国安达": "stock", +"000901": "stock", +"航天科技": "stock", +"600055": "stock", +"万东医疗": "stock", +"600237": "stock", +"铜峰电子": "stock", +"300781": "stock", +"因赛集团": "stock", +"301046": "stock", +"能辉科技": "stock", +"002466": "stock", +"天齐锂业": "stock", +"002445": "stock", +"中南文化": "stock", +"873665": "stock", +"科强股份": "stock", +"002779": "stock", +"中坚科技": "stock", +"300133": "stock", +"华策影视": "stock", +"301088": "stock", +"戎美股份": "stock", +"603676": "stock", +"卫信康": "stock", +"000717": "stock", +"中南股份": "stock", +"000752": "stock", +"*ST西发": "stock", +"601952": "stock", +"苏垦农发": "stock", +"301004": "stock", +"嘉益股份": "stock", +"001324": "stock", +"长青科技": "stock", +"600009": "stock", +"上海机场": "stock", +"600368": "stock", +"五洲交通": "stock", +"002543": "stock", +"万和电气": "stock", +"688600": "stock", +"皖仪科技": "stock", +"603606": "stock", +"东方电缆": "stock", +"002169": "stock", +"智光电气": "stock", +"600345": "stock", +"长江通信": "stock", +"002217": "stock", +"合力泰": "stock", +"300637": "stock", +"扬帆新材": "stock", +"002612": "stock", +"朗姿股份": "stock", +"300202": "stock", +"聚龙退": "stock", +"001230": "stock", +"劲旅环境": "stock", +"300706": "stock", +"阿石创": "stock", +"603197": "stock", +"保隆科技": "stock", +"603701": "stock", +"德宏股份": "stock", +"688363": "stock", +"华熙生物": "stock", +"300615": "stock", +"欣天科技": "stock", +"300595": "stock", +"欧普康视": "stock", +"301200": "stock", +"大族数控": "stock", +"300602": "stock", +"飞荣达": "stock", +"000023": "stock", +"ST深天": "stock", +"300831": "stock", +"派瑞股份": "stock", +"603323": "stock", +"苏农银行": "stock", +"600871": "stock", +"石化油服": "stock", +"603227": "stock", +"雪峰科技": "stock", +"300814": "stock", +"中富电路": "stock", +"301185": "stock", +"鸥玛软件": "stock", +"003032": "stock", +"传智教育": "stock", +"603955": "stock", +"大千生态": "stock", +"300015": "stock", +"爱尔眼科": "stock", +"688428": "stock", +"诺诚健华-U": "stock", +"300514": "stock", +"友讯达": "stock", +"600864": "stock", +"哈投股份": "stock", +"002619": "stock", +"*ST艾格": "stock", +"603615": "stock", +"茶花股份": "stock", +"300882": "stock", +"万胜智能": "stock", +"300469": "stock", +"信息发展": "stock", +"002044": "stock", +"美年健康": "stock", +"002905": "stock", +"金逸影视": "stock", +"000628": "stock", +"高新发展": "stock", +"002214": "stock", +"大立科技": "stock", +"688326": "stock", +"经纬恒润-W": "stock", +"600398": "stock", +"海澜之家": "stock", +"001217": "stock", +"华尔泰": "stock", +"603699": "stock", +"纽威股份": "stock", +"002516": "stock", +"旷达科技": "stock", +"601878": "stock", +"浙商证券": "stock", +"603909": "stock", +"建发合诚": "stock", +"300467": "stock", +"迅游科技": "stock", +"837663": "stock", +"明阳科技": "stock", +"688271": "stock", +"联影医疗": "stock", +"002204": "stock", +"大连重工": "stock", +"603087": "stock", +"甘李药业": "stock", +"301529": "stock", +"福赛科技": "stock", +"600761": "stock", +"安徽合力": "stock", +"603617": "stock", +"君禾股份": "stock", +"871981": "stock", +"晶赛科技": "stock", +"002288": "stock", +"超华科技": "stock", +"600054": "stock", +"黄山旅游": "stock", +"838030": "stock", +"德众汽车": "stock", +"836504": "stock", +"博迅生物": "stock", +"601688": "stock", +"华泰证券": "stock", +"002774": "stock", +"快意电梯": "stock", +"600249": "stock", +"两面针": "stock", +"600520": "stock", +"文一科技": "stock", +"002118": "stock", +"*ST紫鑫": "stock", +"300844": "stock", +"山水比德": "stock", +"002032": "stock", +"苏泊尔": "stock", +"600133": "stock", +"东湖高新": "stock", +"300846": "stock", +"首都在线": "stock", +"300287": "stock", +"飞利信": "stock", +"688357": "stock", +"建龙微纳": "stock", +"833171": "stock", +"国航远洋": "stock", +"301421": "stock", +"波长光电": "stock", +"600003": "stock", +"ST东北高": "stock", +"600082": "stock", +"海泰发展": "stock", +"300777": "stock", +"中简科技": "stock", +"605168": "stock", +"三人行": "stock", +"002070": "stock", +"众和退": "stock", +"688310": "stock", +"迈得医疗": "stock", +"600230": "stock", +"沧州大化": "stock", +"600389": "stock", +"江山股份": "stock", +"002882": "stock", +"金龙羽": "stock", +"600143": "stock", +"金发科技": "stock", +"300261": "stock", +"雅本化学": "stock", +"000635": "stock", +"英力特": "stock", +"300620": "stock", +"光库科技": "stock", +"300290": "stock", +"荣科科技": "stock", +"600356": "stock", +"恒丰纸业": "stock", +"688191": "stock", +"智洋创新": "stock", +"688393": "stock", +"安必平": "stock", +"600650": "stock", +"锦江在线": "stock", +"002819": "stock", +"东方中科": "stock", +"002582": "stock", +"好想你": "stock", +"300338": "stock", +"开元教育": "stock", +"300094": "stock", +"国联水产": "stock", +"002208": "stock", +"合肥城建": "stock", +"002140": "stock", +"东华科技": "stock", +"300047": "stock", +"天源迪科": "stock", +"301172": "stock", +"君逸数码": "stock", +"002167": "stock", +"东方锆业": "stock", +"000915": "stock", +"华特达因": "stock", +"601899": "stock", +"紫金矿业": "stock", +"300139": "stock", +"晓程科技": "stock", +"600151": "stock", +"航天机电": "stock", +"600698": "stock", +"湖南天雁": "stock", +"003006": "stock", +"百亚股份": "stock", +"000887": "stock", +"中鼎股份": "stock", +"301016": "stock", +"雷尔伟": "stock", +"600007": "stock", +"中国国贸": "stock", +"603639": "stock", +"海利尔": "stock", +"831167": "stock", +"鑫汇科": "stock", +"688082": "stock", +"盛美上海": "stock", +"301296": "stock", +"新巨丰": "stock", +"603111": "stock", +"康尼机电": "stock", +"600573": "stock", +"惠泉啤酒": "stock", +"002809": "stock", +"红墙股份": "stock", +"601010": "stock", +"文峰股份": "stock", +"688195": "stock", +"腾景科技": "stock", +"300629": "stock", +"新劲刚": "stock", +"600811": "stock", +"东方集团": "stock", +"301062": "stock", +"上海艾录": "stock", +"002600": "stock", +"领益智造": "stock", +"002826": "stock", +"易明医药": "stock", +"600248": "stock", +"陕建股份": "stock", +"000837": "stock", +"秦川机床": "stock", +"603978": "stock", +"深圳新星": "stock", +"603275": "stock", +"众辰科技": "stock", +"000862": "stock", +"银星能源": "stock", +"600094": "stock", +"大名城": "stock", +"300069": "stock", +"金利华电": "stock", +"301448": "stock", +"开创电气": "stock", +"600270": "stock", +"外运发展": "stock", +"000651": "stock", +"格力电器": "stock", +"603392": "stock", +"万泰生物": "stock", +"002534": "stock", +"西子洁能": "stock", +"835174": "stock", +"五新隧装": "stock", +"836807": "stock", +"奔朗新材": "stock", +"002989": "stock", +"中天精装": "stock", +"603633": "stock", +"徕木股份": "stock", +"688398": "stock", +"赛特新材": "stock", +"301132": "stock", +"满坤科技": "stock", +"301079": "stock", +"邵阳液压": "stock", +"688622": "stock", +"禾信仪器": "stock", +"002691": "stock", +"冀凯股份": "stock", +"605199": "stock", +"葫芦娃": "stock", +"002973": "stock", +"侨银股份": "stock", +"001212": "stock", +"中旗新材": "stock", +"688697": "stock", +"纽威数控": "stock", +"603719": "stock", +"良品铺子": "stock", +"601136": "stock", +"首创证券": "stock", +"002541": "stock", +"鸿路钢构": "stock", +"000597": "stock", +"东北制药": "stock", +"600501": "stock", +"航天晨光": "stock", +"002632": "stock", +"道明光学": "stock", +"002306": "stock", +"中科云网": "stock", +"600260": "stock", +"*ST凯乐": "stock", +"600566": "stock", +"济川药业": "stock", +"300110": "stock", +"华仁药业": "stock", +"688387": "stock", +"信科移动-U": "stock", +"300559": "stock", +"佳发教育": "stock", +"002207": "stock", +"准油股份": "stock", +"003010": "stock", +"若羽臣": "stock", +"002674": "stock", +"兴业科技": "stock", +"603706": "stock", +"东方环宇": "stock", +"002323": "stock", +"雅博股份": "stock", +"600392": "stock", +"盛和资源": "stock", +"603359": "stock", +"东珠生态": "stock", +"688561": "stock", +"奇安信-U": "stock", +"300536": "stock", +"农尚环境": "stock", +"002096": "stock", +"易普力": "stock", +"002476": "stock", +"宝莫股份": "stock", +"002984": "stock", +"森麒麟": "stock", +"002940": "stock", +"昂利康": "stock", +"003009": "stock", +"中天火箭": "stock", +"301155": "stock", +"海力风电": "stock", +"300347": "stock", +"泰格医药": "stock", +"600277": "stock", +"亿利洁能": "stock", +"300463": "stock", +"迈克生物": "stock", +"600800": "stock", +"渤海化学": "stock", +"002352": "stock", +"顺丰控股": "stock", +"300319": "stock", +"麦捷科技": "stock", +"603357": "stock", +"设计总院": "stock", +"600069": "stock", +"退市银鸽": "stock", +"600794": "stock", +"保税科技": "stock", +"002768": "stock", +"国恩股份": "stock", +"002657": "stock", +"中科金财": "stock", +"002634": "stock", +"棒杰股份": "stock", +"601609": "stock", +"金田股份": "stock", +"300299": "stock", +"富春股份": "stock", +"300913": "stock", +"兆龙互连": "stock", +"688655": "stock", +"迅捷兴": "stock", +"688661": "stock", +"和林微纳": "stock", +"001238": "stock", +"浙江正特": "stock", +"688192": "stock", +"迪哲医药-U": "stock", +"001213": "stock", +"中铁特货": "stock", +"688200": "stock", +"华峰测控": "stock", +"002469": "stock", +"三维化学": "stock", +"688027": "stock", +"国盾量子": "stock", +"603321": "stock", +"梅轮电梯": "stock", +"605259": "stock", +"绿田机械": "stock", +"600385": "stock", +"退市金泰": "stock", +"002171": "stock", +"楚江新材": "stock", +"603061": "stock", +"金海通": "stock", +"600819": "stock", +"耀皮玻璃": "stock", +"688627": "stock", +"精智达": "stock", +"002961": "stock", +"瑞达期货": "stock", +"300444": "stock", +"双杰电气": "stock", +"001288": "stock", +"运机集团": "stock", +"600467": "stock", +"好当家": "stock", +"002359": "stock", +"北讯退": "stock", +"301025": "stock", +"读客文化": "stock", +"300857": "stock", +"协创数据": "stock", +"300135": "stock", +"宝利国际": "stock", +"603876": "stock", +"鼎胜新材": "stock", +"688212": "stock", +"澳华内镜": "stock", +"300177": "stock", +"中海达": "stock", +"603319": "stock", +"湘油泵": "stock", +"600612": "stock", +"老凤祥": "stock", +"002145": "stock", +"中核钛白": "stock", +"002946": "stock", +"新乳业": "stock", +"603690": "stock", +"至纯科技": "stock", +"601005": "stock", +"重庆钢铁": "stock", +"600021": "stock", +"上海电力": "stock", +"002678": "stock", +"珠江钢琴": "stock", +"430139": "stock", +"华岭股份": "stock", +"688211": "stock", +"中科微至": "stock", +"301040": "stock", +"中环海陆": "stock", +"834033": "stock", +"康普化学": "stock", +"600291": "stock", +"退市西水": "stock", +"872541": "stock", +"铁大科技": "stock", +"688050": "stock", +"爱博医疗": "stock", +"300175": "stock", +"朗源股份": "stock", +"002236": "stock", +"大华股份": "stock", +"000831": "stock", +"中国稀土": "stock", +"600186": "stock", +"莲花健康": "stock", +"002420": "stock", +"毅昌科技": "stock", +"603182": "stock", +"嘉华股份": "stock", +"600481": "stock", +"双良节能": "stock", +"601789": "stock", +"宁波建工": "stock", +"600718": "stock", +"东软集团": "stock", +"600539": "stock", +"狮头股份": "stock", +"301168": "stock", +"通灵股份": "stock", +"000625": "stock", +"长安汽车": "stock", +"600103": "stock", +"青山纸业": "stock", +"002135": "stock", +"东南网架": "stock", +"300591": "stock", +"万里马": "stock", +"688662": "stock", +"富信科技": "stock", +"002200": "stock", +"ST交投": "stock", +"000705": "stock", +"浙江震元": "stock", +"603331": "stock", +"百达精工": "stock", +"600223": "stock", +"福瑞达": "stock", +"300873": "stock", +"海晨股份": "stock", +"600748": "stock", +"上实发展": "stock", +"002855": "stock", +"捷荣技术": "stock", +"002018": "stock", +"华信退": "stock", +"301092": "stock", +"争光股份": "stock", +"600667": "stock", +"太极实业": "stock", +"600865": "stock", +"百大集团": "stock", +"002900": "stock", +"哈三联": "stock", +"300301": "stock", +"*ST长方": "stock", +"000592": "stock", +"平潭发展": "stock", +"603661": "stock", +"恒林股份": "stock", +"000505": "stock", +"京粮控股": "stock", +"000700": "stock", +"模塑科技": "stock", +"300593": "stock", +"新雷能": "stock", +"601098": "stock", +"中南传媒": "stock", +"301006": "stock", +"迈拓股份": "stock", +"601901": "stock", +"方正证券": "stock", +"688343": "stock", +"云天励飞-U": "stock", +"002473": "stock", +"圣莱退": "stock", +"600011": "stock", +"华能国际": "stock", +"000048": "stock", +"京基智农": "stock", +"300116": "stock", +"保力新": "stock", +"603118": "stock", +"共进股份": "stock", +"301002": "stock", +"崧盛股份": "stock", +"301057": "stock", +"汇隆新材": "stock", +"300768": "stock", +"迪普科技": "stock", +"002025": "stock", +"航天电器": "stock", +"600777": "stock", +"新潮能源": "stock", +"600423": "stock", +"柳化股份": "stock", +"000639": "stock", +"西王食品": "stock", +"600603": "stock", +"广汇物流": "stock", +"002035": "stock", +"华帝股份": "stock", +"600195": "stock", +"中牧股份": "stock", +"002177": "stock", +"御银股份": "stock", +"002479": "stock", +"富春环保": "stock", +"000889": "stock", +"ST中嘉": "stock", +"605305": "stock", +"中际联合": "stock", +"002702": "stock", +"海欣食品": "stock", +"300379": "stock", +"东方通": "stock", +"688052": "stock", +"纳芯微": "stock", +"600834": "stock", +"申通地铁": "stock", +"301127": "stock", +"天源环保": "stock", +"002127": "stock", +"南极电商": "stock", +"002847": "stock", +"盐津铺子": "stock", +"600528": "stock", +"中铁工业": "stock", +"301089": "stock", +"拓新药业": "stock", +"300227": "stock", +"光韵达": "stock", +"003019": "stock", +"宸展光电": "stock", +"000049": "stock", +"德赛电池": "stock", +"688151": "stock", +"华强科技": "stock", +"600917": "stock", +"重庆燃气": "stock", +"603733": "stock", +"仙鹤股份": "stock", +"605189": "stock", +"富春染织": "stock", +"002088": "stock", +"鲁阳节能": "stock", +"301108": "stock", +"洁雅股份": "stock", +"002711": "stock", +"欧浦退": "stock", +"300003": "stock", +"乐普医疗": "stock", +"600746": "stock", +"江苏索普": "stock", +"002583": "stock", +"海能达": "stock", +"002296": "stock", +"辉煌科技": "stock", +"600077": "stock", +"*ST宋都": "stock", +"601268": "stock", +"*ST二重": "stock", +"600308": "stock", +"华泰股份": "stock", +"002872": "stock", +"ST天圣": "stock", +"000697": "stock", +"*ST炼石": "stock", +"301559": "stock", +"N中集环": "stock", +"002510": "stock", +"天汽模": "stock", +"300516": "stock", +"久之洋": "stock", +"300084": "stock", +"海默科技": "stock", +"603963": "stock", +"大理药业": "stock", +"300946": "stock", +"恒而达": "stock", +"688072": "stock", +"拓荆科技": "stock", +"300190": "stock", +"维尔利": "stock", +"600838": "stock", +"上海九百": "stock", +"600289": "stock", +"ST信通": "stock", +"002160": "stock", +"常铝股份": "stock", +"000400": "stock", +"许继电气": "stock", +"601607": "stock", +"上海医药": "stock", +"301381": "stock", +"赛维时代": "stock", +"002042": "stock", +"华孚时尚": "stock", +"002980": "stock", +"华盛昌": "stock", +"603696": "stock", +"安记食品": "stock", +"688385": "stock", +"复旦微电": "stock", +"688426": "stock", +"康为世纪": "stock", +"688680": "stock", +"海优新材": "stock", +"300224": "stock", +"正海磁材": "stock", +"002259": "stock", +"ST升达": "stock", +"000670": "stock", +"盈方微": "stock", +"300351": "stock", +"永贵电器": "stock", +"000707": "stock", +"双环科技": "stock", +"688001": "stock", +"华兴源创": "stock", +"600590": "stock", +"泰豪科技": "stock", +"000636": "stock", +"风华高科": "stock", +"300159": "stock", +"新研股份": "stock", +"002038": "stock", +"双鹭药业": "stock", +"002880": "stock", +"卫光生物": "stock", +"601158": "stock", +"重庆水务": "stock", +"000725": "stock", +"京东方A": "stock", +"002580": "stock", +"圣阳股份": "stock", +"601777": "stock", +"力帆科技": "stock", +"688733": "stock", +"壹石通": "stock", +"605289": "stock", +"罗曼股份": "stock", +"300279": "stock", +"和晶科技": "stock", +"839725": "stock", +"惠丰钻石": "stock", +"601816": "stock", +"京沪高铁": "stock", +"002780": "stock", +"三夫户外": "stock", +"688259": "stock", +"创耀科技": "stock", +"601107": "stock", +"四川成渝": "stock", +"000532": "stock", +"华金资本": "stock", +"002862": "stock", +"实丰文化": "stock", +"300708": "stock", +"聚灿光电": "stock", +"002333": "stock", +"罗普斯金": "stock", +"300430": "stock", +"诚益通": "stock", +"000609": "stock", +"中迪投资": "stock", +"836422": "stock", +"润普食品": "stock", +"300915": "stock", +"海融科技": "stock", +"600019": "stock", +"宝钢股份": "stock", +"600792": "stock", +"云煤能源": "stock", +"301223": "stock", +"中荣股份": "stock", +"600995": "stock", +"南网储能": "stock", +"002394": "stock", +"联发股份": "stock", +"870204": "stock", +"沪江材料": "stock", +"002988": "stock", +"豪美新材": "stock", +"000930": "stock", +"中粮科技": "stock", +"301000": "stock", +"肇民科技": "stock", +"603041": "stock", +"美思德": "stock", +"601100": "stock", +"恒立液压": "stock", +"688333": "stock", +"铂力特": "stock", +"603558": "stock", +"健盛集团": "stock", +"000429": "stock", +"粤高速A": "stock", +"601319": "stock", +"中国人保": "stock", +"600826": "stock", +"兰生股份": "stock", +"688581": "stock", +"安杰思": "stock", +"603611": "stock", +"诺力股份": "stock", +"605090": "stock", +"九丰能源": "stock", +"601318": "stock", +"中国平安": "stock", +"600941": "stock", +"中国移动": "stock", +"000527": "stock", +"美的电器": "stock", +"001965": "stock", +"招商公路": "stock", +"300642": "stock", +"透景生命": "stock", +"301097": "stock", +"天益医疗": "stock", +"300346": "stock", +"南大光电": "stock", +"002690": "stock", +"美亚光电": "stock", +"000153": "stock", +"丰原药业": "stock", +"300622": "stock", +"博士眼镜": "stock", +"300146": "stock", +"汤臣倍健": "stock", +"002426": "stock", +"胜利精密": "stock", +"002123": "stock", +"梦网科技": "stock", +"688109": "stock", +"品茗科技": "stock", +"601515": "stock", +"东风股份": "stock", +"605399": "stock", +"晨光新材": "stock", +"000070": "stock", +"特发信息": "stock", +"600730": "stock", +"中国高科": "stock", +"600170": "stock", +"上海建工": "stock", +"600894": "stock", +"广日股份": "stock", +"688591": "stock", +"泰凌微": "stock", +"871694": "stock", +"中裕科技": "stock", +"002176": "stock", +"江特电机": "stock", +"600256": "stock", +"广汇能源": "stock", +"002082": "stock", +"万邦德": "stock", +"300705": "stock", +"九典制药": "stock", +"000816": "stock", +"智慧农业": "stock", +"600905": "stock", +"三峡能源": "stock", +"002265": "stock", +"建设工业": "stock", +"605398": "stock", +"新炬网络": "stock", +"300863": "stock", +"卡倍亿": "stock", +"601698": "stock", +"中国卫通": "stock", +"600830": "stock", +"香溢融通": "stock", +"688455": "stock", +"科捷智能": "stock", +"600671": "stock", +"*ST目药": "stock", +"301356": "stock", +"天振股份": "stock", +"301073": "stock", +"君亭酒店": "stock", +"600100": "stock", +"同方股份": "stock", +"002507": "stock", +"涪陵榨菜": "stock", +"002014": "stock", +"永新股份": "stock", +"301150": "stock", +"中一科技": "stock", +"603860": "stock", +"中公高科": "stock", +"000403": "stock", +"派林生物": "stock", +"002803": "stock", +"吉宏股份": "stock", +"002568": "stock", +"百润股份": "stock", +"002251": "stock", +"步步高": "stock", +"605028": "stock", +"世茂能源": "stock", +"603291": "stock", +"联合水务": "stock", +"002932": "stock", +"明德生物": "stock", +"600728": "stock", +"佳都科技": "stock", +"001207": "stock", +"联科科技": "stock", +"300732": "stock", +"设研院": "stock", +"300387": "stock", +"富邦股份": "stock", +"002666": "stock", +"德联集团": "stock", +"000603": "stock", +"盛达资源": "stock", +"000559": "stock", +"万向钱潮": "stock", +"688597": "stock", +"煜邦电力": "stock", +"300183": "stock", +"东软载波": "stock", +"002335": "stock", +"科华数据": "stock", +"600478": "stock", +"科力远": "stock", +"600692": "stock", +"亚通股份": "stock", +"871857": "stock", +"泓禧科技": "stock", +"600029": "stock", +"南方航空": "stock", +"301456": "stock", +"盘古智能": "stock", +"001296": "stock", +"长江材料": "stock", +"002373": "stock", +"千方科技": "stock", +"600766": "stock", +"*ST园城": "stock", +"300649": "stock", +"杭州园林": "stock", +"301280": "stock", +"珠城科技": "stock", +"600267": "stock", +"海正药业": "stock", +"002544": "stock", +"普天科技": "stock", +"002799": "stock", +"环球印务": "stock", +"002999": "stock", +"天禾股份": "stock", +"838670": "stock", +"恒进感应": "stock", +"603052": "stock", +"可川科技": "stock", +"837006": "stock", +"晟楠科技": "stock", +"002386": "stock", +"天原股份": "stock", +"831627": "stock", +"力王股份": "stock", +"300899": "stock", +"上海凯鑫": "stock", +"300801": "stock", +"泰和科技": "stock", +"603377": "stock", +"东方时尚": "stock", +"603569": "stock", +"长久物流": "stock", +"600828": "stock", +"茂业商业": "stock", +"000540": "stock", +"*ST中天": "stock", +"000511": "stock", +"烯碳退": "stock", +"002119": "stock", +"康强电子": "stock", +"300985": "stock", +"致远新能": "stock", +"301512": "stock", +"智信精密": "stock", +"600148": "stock", +"长春一东": "stock", +"000619": "stock", +"海螺新材": "stock", +"301333": "stock", +"诺思格": "stock", +"688120": "stock", +"华海清科": "stock", +"002271": "stock", +"东方雨虹": "stock", +"300582": "stock", +"英飞特": "stock", +"688249": "stock", +"晶合集成": "stock", +"301266": "stock", +"宇邦新材": "stock", +"603662": "stock", +"柯力传感": "stock", +"688671": "stock", +"碧兴物联": "stock", +"688231": "stock", +"隆达股份": "stock", +"300681": "stock", +"英搏尔": "stock", +"603028": "stock", +"赛福天": "stock", +"000166": "stock", +"申万宏源": "stock", +"301391": "stock", +"卡莱特": "stock", +"600269": "stock", +"赣粤高速": "stock", +"000006": "stock", +"深振业A": "stock", +"300012": "stock", +"华测检测": "stock", +"300115": "stock", +"长盈精密": "stock", +"688502": "stock", +"茂莱光学": "stock", +"002423": "stock", +"中粮资本": "stock", +"603157": "stock", +"退市拉夏": "stock", +"688182": "stock", +"灿勤科技": "stock", +"301203": "stock", +"国泰环保": "stock", +"601866": "stock", +"中远海发": "stock", +"300297": "stock", +"蓝盾退": "stock", +"600403": "stock", +"大有能源": "stock", +"600301": "stock", +"华锡有色": "stock", +"873305": "stock", +"九菱科技": "stock", +"301098": "stock", +"金埔园林": "stock", +"603035": "stock", +"常熟汽饰": "stock", +"002499": "stock", +"科林退": "stock", +"001227": "stock", +"兰州银行": "stock", +"002237": "stock", +"恒邦股份": "stock", +"002561": "stock", +"徐家汇": "stock", +"300293": "stock", +"蓝英装备": "stock", +"002219": "stock", +"新里程": "stock", +"603366": "stock", +"日出东方": "stock", +"000629": "stock", +"钒钛股份": "stock", +"300160": "stock", +"秀强股份": "stock", +"002357": "stock", +"富临运业": "stock", +"603949": "stock", +"雪龙集团": "stock", +"688046": "stock", +"药康生物": "stock", +"002390": "stock", +"信邦制药": "stock", +"300186": "stock", +"大华农": "stock", +"300226": "stock", +"上海钢联": "stock", +"002452": "stock", +"长高电新": "stock", +"600203": "stock", +"福日电子": "stock", +"688100": "stock", +"威胜信息": "stock", +"300201": "stock", +"海伦哲": "stock", +"600456": "stock", +"宝钛股份": "stock", +"688629": "stock", +"华丰科技": "stock", +"000568": "stock", +"泸州老窖": "stock", +"301215": "stock", +"中汽股份": "stock", +"688347": "stock", +"华虹公司": "stock", +"300286": "stock", +"安科瑞": "stock", +"603151": "stock", +"邦基科技": "stock", +"600628": "stock", +"新世界": "stock", +"688372": "stock", +"伟测科技": "stock", +"605151": "stock", +"西上海": "stock", +"301137": "stock", +"哈焊华通": "stock", +"300618": "stock", +"寒锐钴业": "stock", +"300835": "stock", +"龙磁科技": "stock", +"000582": "stock", +"北部湾港": "stock", +"300144": "stock", +"宋城演艺": "stock", +"603660": "stock", +"苏州科达": "stock", +"600283": "stock", +"钱江水利": "stock", +"300712": "stock", +"永福股份": "stock", +"300687": "stock", +"赛意信息": "stock", +"300470": "stock", +"中密控股": "stock", +"300999": "stock", +"金龙鱼": "stock", +"002328": "stock", +"新朋股份": "stock", +"688087": "stock", +"英科再生": "stock", +"002686": "stock", +"亿利达": "stock", +"001260": "stock", +"坤泰股份": "stock", +"300302": "stock", +"同有科技": "stock", +"600321": "stock", +"正源股份": "stock", +"603198": "stock", +"迎驾贡酒": "stock", +"832876": "stock", +"慧为智能": "stock", +"600085": "stock", +"同仁堂": "stock", +"000722": "stock", +"湖南发展": "stock", +"603360": "stock", +"百傲化学": "stock", +"300791": "stock", +"仙乐健康": "stock", +"002027": "stock", +"分众传媒": "stock", +"603017": "stock", +"中衡设计": "stock", +"603083": "stock", +"剑桥科技": "stock", +"002694": "stock", +"顾地科技": "stock", +"603320": "stock", +"迪贝电气": "stock", +"603369": "stock", +"今世缘": "stock", +"601989": "stock", +"中国重工": "stock", +"300498": "stock", +"温氏股份": "stock", +"600367": "stock", +"红星发展": "stock", +"000020": "stock", +"深华发A": "stock", +"600721": "stock", +"百花医药": "stock", +"000713": "stock", +"丰乐种业": "stock", +"603278": "stock", +"大业股份": "stock", +"000578": "stock", +"盐湖集团": "stock", +"688981": "stock", +"中芯国际": "stock", +"688628": "stock", +"优利德": "stock", +"301080": "stock", +"百普赛斯": "stock", +"002967": "stock", +"广电计量": "stock", +"301360": "stock", +"荣旗科技": "stock", +"300571": "stock", +"平治信息": "stock", +"002760": "stock", +"凤形股份": "stock", +"000959": "stock", +"首钢股份": "stock", +"002253": "stock", +"川大智胜": "stock", +"301289": "stock", +"国缆检测": "stock", +"000585": "stock", +"东电退": "stock", +"002388": "stock", +"新亚制程": "stock", +"603079": "stock", +"圣达生物": "stock", +"000415": "stock", +"渤海租赁": "stock", +"688550": "stock", +"瑞联新材": "stock", +"300028": "stock", +"金亚退": "stock", +"002912": "stock", +"中新赛克": "stock", +"002391": "stock", +"长青股份": "stock", +"300233": "stock", +"金城医药": "stock", +"000966": "stock", +"长源电力": "stock", +"000596": "stock", +"古井贡酒": "stock", +"300132": "stock", +"青松股份": "stock", +"600683": "stock", +"京投发展": "stock", +"603815": "stock", +"交建股份": "stock", +"002902": "stock", +"铭普光磁": "stock", +"600192": "stock", +"长城电工": "stock", +"301102": "stock", +"兆讯传媒": "stock", +"300678": "stock", +"中科信息": "stock", +"000035": "stock", +"中国天楹": "stock", +"600529": "stock", +"山东药玻": "stock", +"603328": "stock", +"依顿电子": "stock", +"300981": "stock", +"中红医疗": "stock", +"603895": "stock", +"天永智能": "stock", +"600056": "stock", +"中国医药": "stock", +"301136": "stock", +"招标股份": "stock", +"688108": "stock", +"赛诺医疗": "stock", +"600265": "stock", +"ST景谷": "stock", +"603288": "stock", +"海天味业": "stock", +"000709": "stock", +"河钢股份": "stock", +"832110": "stock", +"雷特科技": "stock", +"603596": "stock", +"伯特利": "stock", +"000546": "stock", +"ST金圆": "stock", +"603711": "stock", +"香飘飘": "stock", +"002356": "stock", +"赫美集团": "stock", +"002548": "stock", +"金新农": "stock", +"688337": "stock", +"普源精电": "stock", +"600993": "stock", +"马应龙": "stock", +"300859": "stock", +"*ST西域": "stock", +"300294": "stock", +"博雅生物": "stock", +"301312": "stock", +"智立方": "stock", +"688071": "stock", +"华依科技": "stock", +"000812": "stock", +"陕西金叶": "stock", +"301525": "stock", +"儒竞科技": "stock", +"002521": "stock", +"齐峰新材": "stock", +"300832": "stock", +"新产业": "stock", +"603689": "stock", +"皖天然气": "stock", +"301096": "stock", +"百诚医药": "stock", +"002910": "stock", +"庄园牧场": "stock", +"002813": "stock", +"路畅科技": "stock", +"002758": "stock", +"浙农股份": "stock", +"300078": "stock", +"思创医惠": "stock", +"000151": "stock", +"中成股份": "stock", +"603612": "stock", +"索通发展": "stock", +"002043": "stock", +"兔宝宝": "stock", +"603883": "stock", +"老百姓": "stock", +"301282": "stock", +"金禄电子": "stock", +"600017": "stock", +"日照港": "stock", +"002260": "stock", +"德奥退": "stock", +"605086": "stock", +"龙高股份": "stock", +"002827": "stock", +"高争民爆": "stock", +"002374": "stock", +"中锐股份": "stock", +"600701": "stock", +"退市工新": "stock", +"603666": "stock", +"亿嘉和": "stock", +"603069": "stock", +"海汽集团": "stock", +"688009": "stock", +"中国通号": "stock", +"300869": "stock", +"康泰医学": "stock", +"300543": "stock", +"朗科智能": "stock", +"002186": "stock", +"全聚德": "stock", +"002107": "stock", +"沃华医药": "stock", +"600247": "stock", +"*ST成城": "stock", +"600835": "stock", +"上海机电": "stock", +"300089": "stock", +"文化退": "stock", +"002355": "stock", +"兴民智通": "stock", +"002115": "stock", +"三维通信": "stock", +"001316": "stock", +"润贝航科": "stock", +"000526": "stock", +"学大教育": "stock", +"603729": "stock", +"龙韵股份": "stock", +"000663": "stock", +"永安林业": "stock", +"300041": "stock", +"回天新材": "stock", +"603810": "stock", +"丰山集团": "stock", +"300725": "stock", +"药石科技": "stock", +"603535": "stock", +"嘉诚国际": "stock", +"600846": "stock", +"同济科技": "stock", +"002953": "stock", +"日丰股份": "stock", +"002911": "stock", +"佛燃能源": "stock", +"300271": "stock", +"华宇软件": "stock", +"002220": "stock", +"天宝退": "stock", +"002892": "stock", +"科力尔": "stock", +"001378": "stock", +"德冠新材": "stock", +"002272": "stock", +"川润股份": "stock", +"002230": "stock", +"科大讯飞": "stock", +"688286": "stock", +"敏芯股份": "stock", +"600422": "stock", +"昆药集团": "stock", +"300178": "stock", +"腾邦退": "stock", +"000755": "stock", +"山西路桥": "stock", +"300431": "stock", +"暴风退": "stock", +"688548": "stock", +"广钢气体": "stock", +"688401": "stock", +"路维光电": "stock", +"601216": "stock", +"君正集团": "stock", +"603015": "stock", +"弘讯科技": "stock", +"603890": "stock", +"春秋电子": "stock", +"300236": "stock", +"上海新阳": "stock", +"600891": "stock", +"退市秋林": "stock", +"000972": "stock", +"中基健康": "stock", +"300409": "stock", +"道氏技术": "stock", +"000061": "stock", +"农产品": "stock", +"605178": "stock", +"时空科技": "stock", +"600362": "stock", +"江西铜业": "stock", +"300865": "stock", +"大宏立": "stock", +"603201": "stock", +"常润股份": "stock", +"002729": "stock", +"好利科技": "stock", +"000024": "stock", +"招商地产": "stock", +"600604": "stock", +"市北高新": "stock", +"688298": "stock", +"东方生物": "stock", +"600752": "stock", +"*ST哈慈": "stock", +"002079": "stock", +"苏州固锝": "stock", +"000659": "stock", +"珠海中富": "stock", +"830809": "stock", +"安达科技": "stock", +"002125": "stock", +"湘潭电化": "stock", +"002796": "stock", +"世嘉科技": "stock", +"688518": "stock", +"联赢激光": "stock", +"688368": "stock", +"晶丰明源": "stock", +"601012": "stock", +"隆基绿能": "stock", +"002846": "stock", +"英联股份": "stock", +"301042": "stock", +"安联锐视": "stock", +"832735": "stock", +"德源药业": "stock", +"301310": "stock", +"鑫宏业": "stock", +"833943": "stock", +"优机股份": "stock", +"002917": "stock", +"金奥博": "stock", +"601588": "stock", +"北辰实业": "stock", +"688317": "stock", +"之江生物": "stock", +"300068": "stock", +"南都电源": "stock", +"300405": "stock", +"科隆股份": "stock", +"301217": "stock", +"铜冠铜箔": "stock", +"000768": "stock", +"中航西飞": "stock", +"600837": "stock", +"海通证券": "stock", +"002383": "stock", +"合众思壮": "stock", +"601818": "stock", +"光大银行": "stock", +"300737": "stock", +"科顺股份": "stock", +"000963": "stock", +"华东医药": "stock", +"300080": "stock", +"易成新能": "stock", +"003040": "stock", +"楚天龙": "stock", +"300504": "stock", +"天邑股份": "stock", +"603982": "stock", +"泉峰汽车": "stock", +"300739": "stock", +"明阳电路": "stock", +"688516": "stock", +"奥特维": "stock", +"000892": "stock", +"欢瑞世纪": "stock", +"300993": "stock", +"玉马遮阳": "stock", +"300076": "stock", +"GQY视讯": "stock", +"600900": "stock", +"长江电力": "stock", +"300458": "stock", +"全志科技": "stock", +"300038": "stock", +"数知退": "stock", +"300680": "stock", +"隆盛科技": "stock", +"603159": "stock", +"上海亚虹": "stock", +"300333": "stock", +"兆日科技": "stock", +"600996": "stock", +"贵广网络": "stock", +"002304": "stock", +"洋河股份": "stock", +"603838": "stock", +"四通股份": "stock", +"002558": "stock", +"巨人网络": "stock", +"603466": "stock", +"风语筑": "stock", +"300118": "stock", +"东方日升": "stock", +"600379": "stock", +"宝光股份": "stock", +"688450": "stock", +"光格科技": "stock", +"605080": "stock", +"浙江自然": "stock", +"002019": "stock", +"亿帆医药": "stock", +"301115": "stock", +"建科股份": "stock", +"603153": "stock", +"上海建科": "stock", +"000693": "stock", +"华泽退": "stock", +"300317": "stock", +"珈伟新能": "stock", +"000157": "stock", +"中联重科": "stock", +"300424": "stock", +"航新科技": "stock", +"600109": "stock", +"国金证券": "stock", +"002590": "stock", +"万安科技": "stock", +"000155": "stock", +"川能动力": "stock", +"600869": "stock", +"远东股份": "stock", +"688638": "stock", +"誉辰智能": "stock", +"600812": "stock", +"华北制药": "stock", +"300943": "stock", +"春晖智控": "stock", +"600234": "stock", +"科新发展": "stock", +"603190": "stock", +"亚通精工": "stock", +"603922": "stock", +"金鸿顺": "stock", +"301106": "stock", +"骏成科技": "stock", +"000850": "stock", +"华茂股份": "stock", +"600217": "stock", +"中再资环": "stock", +"002907": "stock", +"华森制药": "stock", +"603187": "stock", +"海容冷链": "stock", +"002206": "stock", +"海利得": "stock", +"601020": "stock", +"华钰矿业": "stock", +"000823": "stock", +"超声电子": "stock", +"833429": "stock", +"康比特": "stock", +"002274": "stock", +"华昌化工": "stock", +"603858": "stock", +"步长制药": "stock", +"605186": "stock", +"健麾信息": "stock", +"603122": "stock", +"合富中国": "stock", +"301278": "stock", +"快可电子": "stock", +"300845": "stock", +"捷安高科": "stock", +"002226": "stock", +"江南化工": "stock", +"300240": "stock", +"飞力达": "stock", +"000689": "stock", +"ST宏业": "stock", +"600989": "stock", +"宝丰能源": "stock", +"002738": "stock", +"中矿资源": "stock", +"600470": "stock", +"六国化工": "stock", +"600236": "stock", +"桂冠电力": "stock", +"000868": "stock", +"安凯客车": "stock", +"300149": "stock", +"睿智医药": "stock", +"003022": "stock", +"联泓新科": "stock", +"001336": "stock", +"楚环科技": "stock", +"601228": "stock", +"广州港": "stock", +"002101": "stock", +"广东鸿图": "stock", +"300780": "stock", +"德恩精工": "stock", +"000562": "stock", +"宏源证券": "stock", +"600626": "stock", +"申达股份": "stock", +"601177": "stock", +"杭齿前进": "stock", +"600530": "stock", +"ST交昂": "stock", +"688127": "stock", +"蓝特光学": "stock", +"001218": "stock", +"丽臣实业": "stock", +"000564": "stock", +"ST大集": "stock", +"300128": "stock", +"锦富技术": "stock", +"601975": "stock", +"招商南油": "stock", +"002815": "stock", +"崇达技术": "stock", +"000780": "stock", +"ST平能": "stock", +"833266": "stock", +"生物谷": "stock", +"000535": "stock", +"*ST猴王": "stock", +"688156": "stock", +"路德环境": "stock", +"300682": "stock", +"朗新科技": "stock", +"300941": "stock", +"创识科技": "stock", +"605167": "stock", +"利柏特": "stock", +"600522": "stock", +"中天科技": "stock", +"688110": "stock", +"东芯股份": "stock", +"605009": "stock", +"豪悦护理": "stock", +"002725": "stock", +"跃岭股份": "stock", +"600378": "stock", +"昊华科技": "stock", +"000998": "stock", +"隆平高科": "stock", +"600273": "stock", +"嘉化能源": "stock", +"603819": "stock", +"神力股份": "stock", +"301048": "stock", +"金鹰重工": "stock", +"603928": "stock", +"兴业股份": "stock", +"834261": "stock", +"一诺威": "stock", +"000408": "stock", +"藏格矿业": "stock", +"600623": "stock", +"华谊集团": "stock", +"300610": "stock", +"晨化股份": "stock", +"603161": "stock", +"科华控股": "stock", +"601700": "stock", +"风范股份": "stock", +"002786": "stock", +"银宝山新": "stock", +"000567": "stock", +"海德股份": "stock", +"600976": "stock", +"健民集团": "stock", +"000809": "stock", +"铁岭新城": "stock", +"603088": "stock", +"宁波精达": "stock", +"000797": "stock", +"中国武夷": "stock", +"601798": "stock", +"蓝科高新": "stock", +"002299": "stock", +"圣农发展": "stock", +"002996": "stock", +"顺博合金": "stock", +"002650": "stock", +"加加食品": "stock", +"836395": "stock", +"朗鸿科技": "stock", +"603269": "stock", +"海鸥股份": "stock", +"002429": "stock", +"兆驰股份": "stock", +"002371": "stock", +"北方华创": "stock", +"605118": "stock", +"力鼎光电": "stock", +"000782": "stock", +"美达股份": "stock", +"300031": "stock", +"宝通科技": "stock", +"300326": "stock", +"凯利泰": "stock", +"688081": "stock", +"兴图新科": "stock", +"600078": "stock", +"ST澄星": "stock", +"301378": "stock", +"通达海": "stock", +"000058": "stock", +"深赛格": "stock", +"002529": "stock", +"海源复材": "stock", +"300541": "stock", +"先进数通": "stock", +"300272": "stock", +"开能健康": "stock", +"300344": "stock", +"立方数科": "stock", +"603228": "stock", +"景旺电子": "stock", +"301153": "stock", +"中科江南": "stock", +"605108": "stock", +"同庆楼": "stock", +"300313": "stock", +"*ST天山": "stock", +"000698": "stock", +"沈阳化工": "stock", +"300955": "stock", +"嘉亨家化": "stock", +"002943": "stock", +"宇晶股份": "stock", +"600809": "stock", +"山西汾酒": "stock", +"839167": "stock", +"同享科技": "stock", +"300803": "stock", +"指南针": "stock", +"002801": "stock", +"微光股份": "stock", +"002960": "stock", +"青鸟消防": "stock", +"830879": "stock", +"基康仪器": "stock", +"688339": "stock", +"亿华通-U": "stock", +"603193": "stock", +"润本股份": "stock", +"688416": "stock", +"恒烁股份": "stock", +"688599": "stock", +"天合光能": "stock", +"002392": "stock", +"北京利尔": "stock", +"600778": "stock", +"友好集团": "stock", +"300195": "stock", +"长荣股份": "stock", +"300510": "stock", +"金冠股份": "stock", +"300942": "stock", +"易瑞生物": "stock", +"300738": "stock", +"奥飞数据": "stock", +"002460": "stock", +"赣锋锂业": "stock", +"600557": "stock", +"康缘药业": "stock", +"600769": "stock", +"祥龙电业": "stock", +"600145": "stock", +"退市新亿": "stock", +"002578": "stock", +"闽发铝业": "stock", +"300203": "stock", +"聚光科技": "stock", +"300427": "stock", +"*ST红相": "stock", +"600184": "stock", +"光电股份": "stock", +"002733": "stock", +"雄韬股份": "stock", +"300426": "stock", +"唐德影视": "stock", +"688006": "stock", +"杭可科技": "stock", +"000514": "stock", +"渝开发": "stock", +"688026": "stock", +"洁特生物": "stock", +"002054": "stock", +"德美化工": "stock", +"600776": "stock", +"东方通信": "stock", +"000737": "stock", +"北方铜业": "stock", +"002843": "stock", +"泰嘉股份": "stock", +"603261": "stock", +"立航科技": "stock", +"300685": "stock", +"艾德生物": "stock", +"600642": "stock", +"申能股份": "stock", +"600589": "stock", +"*ST榕泰": "stock", +"600975": "stock", +"新五丰": "stock", +"301117": "stock", +"佳缘科技": "stock", +"600851": "stock", +"海欣股份": "stock", +"835305": "stock", +"云创数据": "stock", +"603609": "stock", +"禾丰股份": "stock", +"300040": "stock", +"九洲集团": "stock", +"688501": "stock", +"青达环保": "stock", +"600876": "stock", +"凯盛新能": "stock", +"300811": "stock", +"铂科新材": "stock", +"833509": "stock", +"同惠电子": "stock", +"300785": "stock", +"值得买": "stock", +"300800": "stock", +"力合科技": "stock", +"002923": "stock", +"润都股份": "stock", +"000676": "stock", +"智度股份": "stock", +"688300": "stock", +"联瑞新材": "stock", +"603915": "stock", +"国茂股份": "stock", +"300632": "stock", +"光莆股份": "stock", +"600613": "stock", +"神奇制药": "stock", +"301295": "stock", +"美硕科技": "stock", +"300265": "stock", +"通光线缆": "stock", +"301067": "stock", +"显盈科技": "stock", +"831278": "stock", +"泰德股份": "stock", +"601838": "stock", +"成都银行": "stock", +"300384": "stock", +"三联虹普": "stock", +"601869": "stock", +"长飞光纤": "stock", +"300748": "stock", +"金力永磁": "stock", +"002192": "stock", +"融捷股份": "stock", +"601633": "stock", +"长城汽车": "stock", +"002493": "stock", +"荣盛石化": "stock", +"300437": "stock", +"清水源": "stock", +"603867": "stock", +"新化股份": "stock", +"600594": "stock", +"益佰制药": "stock", +"837592": "stock", +"华信永道": "stock", +"000063": "stock", +"中兴通讯": "stock", +"002823": "stock", +"凯中精密": "stock", +"002068": "stock", +"黑猫股份": "stock", +"301069": "stock", +"凯盛新材": "stock", +"688084": "stock", +"晶品特装": "stock", +"603230": "stock", +"内蒙新华": "stock", +"002497": "stock", +"雅化集团": "stock", +"601199": "stock", +"江南水务": "stock", +"300016": "stock", +"北陆药业": "stock", +"600568": "stock", +"ST中珠": "stock", +"002638": "stock", +"勤上股份": "stock", +"002336": "stock", +"人人乐": "stock", +"301166": "stock", +"优宁维": "stock", +"300223": "stock", +"北京君正": "stock", +"601933": "stock", +"永辉超市": "stock", +"000821": "stock", +"京山轻机": "stock", +"300162": "stock", +"雷曼光电": "stock", +"300842": "stock", +"帝科股份": "stock", +"603566": "stock", +"普莱柯": "stock", +"603125": "stock", +"常青科技": "stock", +"300030": "stock", +"阳普医疗": "stock", +"600783": "stock", +"鲁信创投": "stock", +"688172": "stock", +"燕东微": "stock", +"002089": "stock", +"*ST新海": "stock", +"300840": "stock", +"酷特智能": "stock", +"600845": "stock", +"宝信软件": "stock", +"301059": "stock", +"金三江": "stock", +"600141": "stock", +"兴发集团": "stock", +"002290": "stock", +"禾盛新材": "stock", +"600688": "stock", +"上海石化": "stock", +"300096": "stock", +"易联众": "stock", +"603500": "stock", +"祥和实业": "stock", +"301037": "stock", +"保立佳": "stock", +"300114": "stock", +"中航电测": "stock", +"601187": "stock", +"厦门银行": "stock", +"000886": "stock", +"海南高速": "stock", +"600862": "stock", +"中航高科": "stock", +"300984": "stock", +"金沃股份": "stock", +"000990": "stock", +"诚志股份": "stock", +"002103": "stock", +"广博股份": "stock", +"002718": "stock", +"友邦吊顶": "stock", +"601857": "stock", +"中国石油": "stock", +"601211": "stock", +"国泰君安": "stock", +"002763": "stock", +"汇洁股份": "stock", +"002791": "stock", +"坚朗五金": "stock", +"301359": "stock", +"东南电子": "stock", +"002040": "stock", +"南京港": "stock", +"836826": "stock", +"盖世食品": "stock", +"600596": "stock", +"新安股份": "stock", +"001219": "stock", +"青岛食品": "stock", +"600183": "stock", +"生益科技": "stock", +"300150": "stock", +"世纪瑞尔": "stock", +"688373": "stock", +"盟科药业-U": "stock", +"601311": "stock", +"骆驼股份": "stock", +"688555": "stock", +"退市泽达": "stock", +"300180": "stock", +"华峰超纤": "stock", +"688589": "stock", +"力合微": "stock", +"300324": "stock", +"旋极信息": "stock", +"300939": "stock", +"秋田微": "stock", +"688319": "stock", +"欧林生物": "stock", +"300285": "stock", +"国瓷材料": "stock", +"301268": "stock", +"铭利达": "stock", +"600126": "stock", +"杭钢股份": "stock", +"002539": "stock", +"云图控股": "stock", +"600817": "stock", +"宇通重工": "stock", +"600336": "stock", +"澳柯玛": "stock", +"600823": "stock", +"ST世茂": "stock", +"688376": "stock", +"美埃科技": "stock", +"300568": "stock", +"星源材质": "stock", +"301306": "stock", +"西测测试": "stock", +"002028": "stock", +"思源电气": "stock", +"601868": "stock", +"中国能建": "stock", +"002400": "stock", +"省广集团": "stock", +"831039": "stock", +"国义招标": "stock", +"300833": "stock", +"浩洋股份": "stock", +"300181": "stock", +"佐力药业": "stock", +"600722": "stock", +"金牛化工": "stock", +"600786": "stock", +"东方锅炉": "stock", +"002191": "stock", +"劲嘉股份": "stock", +"002864": "stock", +"盘龙药业": "stock", +"301121": "stock", +"紫建电子": "stock", +"301120": "stock", +"新特电气": "stock", +"000935": "stock", +"四川双马": "stock", +"300112": "stock", +"万讯自控": "stock", +"688683": "stock", +"莱尔科技": "stock", +"300529": "stock", +"健帆生物": "stock", +"002591": "stock", +"恒大高新": "stock", +"300518": "stock", +"盛讯达": "stock", +"000800": "stock", +"一汽解放": "stock", +"300805": "stock", +"电声股份": "stock", +"688137": "stock", +"近岸蛋白": "stock", +"603345": "stock", +"安井食品": "stock", +"300888": "stock", +"稳健医疗": "stock", +"000558": "stock", +"莱茵体育": "stock", +"000045": "stock", +"深纺织A": "stock", +"688433": "stock", +"华曙高科": "stock", +"002203": "stock", +"海亮股份": "stock", +"300871": "stock", +"回盛生物": "stock", +"300722": "stock", +"新余国科": "stock", +"688777": "stock", +"中控技术": "stock", +"688235": "stock", +"百济神州-U": "stock", +"688299": "stock", +"长阳科技": "stock", +"301339": "stock", +"通行宝": "stock", +"601500": "stock", +"通用股份": "stock", +"002884": "stock", +"凌霄泵业": "stock", +"300490": "stock", +"华自科技": "stock", +"601375": "stock", +"中原证券": "stock", +"600072": "stock", +"中船科技": "stock", +"688115": "stock", +"思林杰": "stock", +"300064": "stock", +"金刚退": "stock", +"002777": "stock", +"久远银海": "stock", +"300281": "stock", +"金明精机": "stock", +"300260": "stock", +"新莱应材": "stock", +"603536": "stock", +"惠发食品": "stock", +"600607": "stock", +"上实医药": "stock", +"603778": "stock", +"乾景园林": "stock", +"688409": "stock", +"富创精密": "stock", +"000790": "stock", +"华神科技": "stock", +"300021": "stock", +"大禹节水": "stock", +"000042": "stock", +"中洲控股": "stock", +"688255": "stock", +"凯尔达": "stock", +"000860": "stock", +"顺鑫农业": "stock", +"300860": "stock", +"锋尚文化": "stock", +"688573": "stock", +"信宇人": "stock", +"605016": "stock", +"百龙创园": "stock", +"600879": "stock", +"航天电子": "stock", +"000520": "stock", +"凤凰航运": "stock", +"300192": "stock", +"科德教育": "stock", +"301075": "stock", +"多瑞医药": "stock", +"600074": "stock", +"退市保千": "stock", +"836419": "stock", +"万德股份": "stock", +"000009": "stock", +"中国宝安": "stock", +"603776": "stock", +"永安行": "stock", +"688331": "stock", +"荣昌生物": "stock", +"688345": "stock", +"博力威": "stock", +"600116": "stock", +"三峡水利": "stock", +"000001": "stock", +"平安银行": "stock", +"300936": "stock", +"中英科技": "stock", +"300664": "stock", +"鹏鹞环保": "stock", +"688506": "stock", +"百利天恒-U": "stock", +"002470": "stock", +"金正大": "stock", +"002517": "stock", +"恺英网络": "stock", +"000501": "stock", +"武商集团": "stock", +"300698": "stock", +"万马科技": "stock", +"688679": "stock", +"通源环境": "stock", +"688553": "stock", +"汇宇制药-W": "stock", +"839946": "stock", +"华阳变速": "stock", +"600190": "stock", +"锦州港": "stock", +"002017": "stock", +"东信和平": "stock", +"603992": "stock", +"松霖科技": "stock", +"600877": "stock", +"电科芯片": "stock", +"603286": "stock", +"日盈电子": "stock", +"000618": "stock", +"吉林化工": "stock", +"603758": "stock", +"秦安股份": "stock", +"002354": "stock", +"天娱数科": "stock", +"300797": "stock", +"钢研纳克": "stock", +"301446": "stock", +"福事特": "stock", +"002139": "stock", +"拓邦股份": "stock", +"002508": "stock", +"老板电器": "stock", +"688225": "stock", +"亚信安全": "stock", +"300103": "stock", +"达刚控股": "stock", +"601717": "stock", +"郑煤机": "stock", +"002745": "stock", +"木林森": "stock", +"002570": "stock", +"贝因美": "stock", +"301386": "stock", +"未来电器": "stock", +"301186": "stock", +"超达装备": "stock", +"002627": "stock", +"三峡旅游": "stock", +"300253": "stock", +"卫宁健康": "stock", +"301329": "stock", +"信音电子": "stock", +"301398": "stock", +"星源卓镁": "stock", +"301468": "stock", +"博盈特焊": "stock", +"300360": "stock", +"炬华科技": "stock", +"300639": "stock", +"凯普生物": "stock", +"688503": "stock", +"聚和材料": "stock", +"600173": "stock", +"卧龙地产": "stock", +"002588": "stock", +"史丹利": "stock", +"600497": "stock", +"驰宏锌锗": "stock", +"603051": "stock", +"鹿山新材": "stock", +"688478": "stock", +"晶升股份": "stock", +"831641": "stock", +"格利尔": "stock", +"002942": "stock", +"新农股份": "stock", +"603128": "stock", +"华贸物流": "stock", +"603329": "stock", +"上海雅仕": "stock", +"000660": "stock", +"*ST南华": "stock", +"002706": "stock", +"良信股份": "stock", +"001298": "stock", +"好上好": "stock", +"301218": "stock", +"华是科技": "stock", +"301285": "stock", +"鸿日达": "stock", +"603071": "stock", +"物产环能": "stock", +"603383": "stock", +"顶点软件": "stock", +"300174": "stock", +"元力股份": "stock", +"301205": "stock", +"联特科技": "stock", +"600880": "stock", +"博瑞传播": "stock", +"600320": "stock", +"振华重工": "stock", +"600681": "stock", +"百川能源": "stock", +"001328": "stock", +"登康口腔": "stock", +"603222": "stock", +"济民医疗": "stock", +"300493": "stock", +"润欣科技": "stock", +"301273": "stock", +"瑞晨环保": "stock", +"002457": "stock", +"青龙管业": "stock", +"601956": "stock", +"东贝集团": "stock", +"300905": "stock", +"宝丽迪": "stock", +"002559": "stock", +"亚威股份": "stock", +"002182": "stock", +"宝武镁业": "stock", +"002792": "stock", +"通宇通讯": "stock", +"000908": "stock", +"景峰医药": "stock", +"600808": "stock", +"马钢股份": "stock", +"300215": "stock", +"电科院": "stock", +"002569": "stock", +"ST步森": "stock", +"000866": "stock", +"扬子石化": "stock", +"600682": "stock", +"南京新百": "stock", +"603099": "stock", +"长白山": "stock", +"301138": "stock", +"华研精机": "stock", +"836720": "stock", +"吉冈精密": "stock", +"603330": "stock", +"天洋新材": "stock", +"603797": "stock", +"联泰环保": "stock", +"688656": "stock", +"浩欧博": "stock", +"002726": "stock", +"龙大美食": "stock", +"300662": "stock", +"科锐国际": "stock", +"688219": "stock", +"会通股份": "stock", +"873167": "stock", +"新赣江": "stock", +"688177": "stock", +"百奥泰": "stock", +"002746": "stock", +"仙坛股份": "stock", +"002083": "stock", +"孚日股份": "stock", +"600065": "stock", +"*ST联谊": "stock", +"600063": "stock", +"皖维高新": "stock", +"600343": "stock", +"航天动力": "stock", +"601916": "stock", +"浙商银行": "stock", +"300503": "stock", +"昊志机电": "stock", +"301011": "stock", +"华立科技": "stock", +"603127": "stock", +"昭衍新药": "stock", +"000920": "stock", +"沃顿科技": "stock", +"301052": "stock", +"果麦文化": "stock", +"002793": "stock", +"罗欣药业": "stock", +"300093": "stock", +"金刚光伏": "stock", +"688728": "stock", +"格科微": "stock", +"300740": "stock", +"水羊股份": "stock", +"601800": "stock", +"中国交建": "stock", +"838171": "stock", +"邦德股份": "stock", +"300847": "stock", +"中船汉光": "stock", +"430685": "stock", +"新芝生物": "stock", +"000029": "stock", +"深深房A": "stock", +"688315": "stock", +"诺禾致源": "stock", +"688171": "stock", +"纬德信息": "stock", +"603020": "stock", +"爱普股份": "stock", +"002520": "stock", +"日发精机": "stock", +"600654": "stock", +"ST中安": "stock", +"688049": "stock", +"炬芯科技": "stock", +"600617": "stock", +"国新能源": "stock", +"839273": "stock", +"一致魔芋": "stock", +"000718": "stock", +"苏宁环球": "stock", +"300345": "stock", +"华民股份": "stock", +"001366": "stock", +"播恩集团": "stock", +"834062": "stock", +"科润智控": "stock", +"688131": "stock", +"皓元医药": "stock", +"688702": "stock", +"盛科通信-U": "stock", +"001226": "stock", +"拓山重工": "stock", +"600558": "stock", +"大西洋": "stock", +"839371": "stock", +"欧福蛋业": "stock", +"301302": "stock", +"华如科技": "stock", +"600396": "stock", +"*ST金山": "stock", +"000806": "stock", +"银河退": "stock", +"605100": "stock", +"华丰股份": "stock", +"603326": "stock", +"我乐家居": "stock", +"600842": "stock", +"中西药业": "stock", +"002408": "stock", +"齐翔腾达": "stock", +"002395": "stock", +"双象股份": "stock", +"603300": "stock", +"华铁应急": "stock", +"600928": "stock", +"西安银行": "stock", +"600615": "stock", +"丰华股份": "stock", +"000969": "stock", +"安泰科技": "stock", +"836871": "stock", +"派特尔": "stock", +"601258": "stock", +"*ST庞大": "stock", +"002340": "stock", +"格林美": "stock", +"300322": "stock", +"硕贝德": "stock", +"000748": "stock", +"长城信息": "stock", +"300973": "stock", +"立高食品": "stock", +"600814": "stock", +"杭州解百": "stock", +"688616": "stock", +"西力科技": "stock", +"600939": "stock", +"重庆建工": "stock", +"600611": "stock", +"大众交通": "stock", +"002350": "stock", +"北京科锐": "stock", +"600741": "stock", +"华域汽车": "stock", +"002178": "stock", +"延华智能": "stock", +"603199": "stock", +"九华旅游": "stock", +"002285": "stock", +"世联行": "stock", +"300187": "stock", +"永清环保": "stock", +"301050": "stock", +"雷电微力": "stock", +"002287": "stock", +"奇正藏药": "stock", +"002886": "stock", +"沃特股份": "stock", +"600578": "stock", +"京能电力": "stock", +"300220": "stock", +"ST金运": "stock", +"003005": "stock", +"竞业达": "stock", +"000902": "stock", +"新洋丰": "stock", +"600262": "stock", +"北方股份": "stock", +"000488": "stock", +"晨鸣纸业": "stock", +"603276": "stock", +"恒兴新材": "stock", +"002735": "stock", +"王子新材": "stock", +"300596": "stock", +"利安隆": "stock", +"600591": "stock", +"*ST上航": "stock", +"688045": "stock", +"必易微": "stock", +"688205": "stock", +"德科立": "stock", +"603080": "stock", +"新疆火炬": "stock", +"300870": "stock", +"欧陆通": "stock", +"600664": "stock", +"哈药股份": "stock", +"003012": "stock", +"东鹏控股": "stock", +"601113": "stock", +"华鼎股份": "stock", +"601518": "stock", +"吉林高速": "stock", +"301210": "stock", +"金杨股份": "stock", +"600856": "stock", +"退市中天": "stock", +"600852": "stock", +"*ST中川": "stock", +"300122": "stock", +"智飞生物": "stock", +"300763": "stock", +"锦浪科技": "stock", +"301269": "stock", +"华大九天": "stock", +"600028": "stock", +"中国石化": "stock", +"300566": "stock", +"激智科技": "stock", +"688146": "stock", +"中船特气": "stock", +"688521": "stock", +"芯原股份": "stock", +"300968": "stock", +"格林精密": "stock", +"000566": "stock", +"海南海药": "stock", +"600521": "stock", +"华海药业": "stock", +"605286": "stock", +"同力日升": "stock", +"600519": "stock", +"贵州茅台": "stock", +"301517": "stock", +"陕西华达": "stock", +"300130": "stock", +"新国都": "stock", +"600602": "stock", +"云赛智联": "stock", +"300987": "stock", +"川网传媒": "stock", +"301051": "stock", +"信濠光电": "stock", +"872190": "stock", +"雷神科技": "stock", +"835508": "stock", +"殷图网联": "stock", +"002901": "stock", +"大博医疗": "stock", +"600581": "stock", +"八一钢铁": "stock", +"300644": "stock", +"南京聚隆": "stock", +"688090": "stock", +"瑞松科技": "stock", +"605020": "stock", +"永和股份": "stock", +"300900": "stock", +"广联航空": "stock", +"002076": "stock", +"星光股份": "stock", +"688676": "stock", +"金盘科技": "stock", +"002480": "stock", +"新筑股份": "stock", +"002633": "stock", +"申科股份": "stock", +"300889": "stock", +"爱克股份": "stock", +"000897": "stock", +"津滨发展": "stock", +"601888": "stock", +"中国中免": "stock", +"688595": "stock", +"芯海科技": "stock", +"605179": "stock", +"一鸣食品": "stock", +"600030": "stock", +"中信证券": "stock", +"600375": "stock", +"汉马科技": "stock", +"002105": "stock", +"信隆健康": "stock", +"000925": "stock", +"众合科技": "stock", +"601668": "stock", +"中国建筑": "stock", +"688132": "stock", +"邦彦技术": "stock", +"300074": "stock", +"华平股份": "stock", +"301305": "stock", +"朗坤环境": "stock", +"301338": "stock", +"凯格精机": "stock", +"000515": "stock", +"攀渝钛业": "stock", +"603396": "stock", +"金辰股份": "stock", +"002308": "stock", +"威创股份": "stock", +"300383": "stock", +"光环新网": "stock", +"688278": "stock", +"特宝生物": "stock", +"600853": "stock", +"龙建股份": "stock", +"603096": "stock", +"新经典": "stock", +"600428": "stock", +"中远海特": "stock", +"003037": "stock", +"三和管桩": "stock", +"000971": "stock", +"ST高升": "stock", +"300320": "stock", +"海达股份": "stock", +"605580": "stock", +"恒盛能源": "stock", +"688234": "stock", +"天岳先进": "stock", +"600874": "stock", +"创业环保": "stock", +"000923": "stock", +"河钢资源": "stock", +"603728": "stock", +"鸣志电器": "stock", +"688186": "stock", +"广大特材": "stock", +"301363": "stock", +"美好医疗": "stock", +"002909": "stock", +"集泰股份": "stock", +"688378": "stock", +"奥来德": "stock", +"600395": "stock", +"盘江股份": "stock", +"600711": "stock", +"盛屯矿业": "stock", +"688070": "stock", +"纵横股份": "stock", +"601898": "stock", +"中煤能源": "stock", +"300689": "stock", +"澄天伟业": "stock", +"300329": "stock", +"海伦钢琴": "stock", +"603601": "stock", +"再升科技": "stock", +"000059": "stock", +"华锦股份": "stock", +"002273": "stock", +"水晶光电": "stock", +"000556": "stock", +"PT南洋": "stock", +"300481": "stock", +"濮阳惠成": "stock", +"688267": "stock", +"中触媒": "stock", +"603885": "stock", +"吉祥航空": "stock", +"603616": "stock", +"韩建河山": "stock", +"002147": "stock", +"新光退": "stock", +"601077": "stock", +"渝农商行": "stock", +"601336": "stock", +"新华保险": "stock", +"003026": "stock", +"中晶科技": "stock", +"600620": "stock", +"天宸股份": "stock", +"300081": "stock", +"恒信东方": "stock", +"002143": "stock", +"印纪退": "stock", +"000503": "stock", +"国新健康": "stock", +"600836": "stock", +"上海易连": "stock", +"000929": "stock", +"兰州黄河": "stock", +"688218": "stock", +"江苏北人": "stock", +"002653": "stock", +"海思科": "stock", +"300537": "stock", +"广信材料": "stock", +"872351": "stock", +"华光源海": "stock", +"603018": "stock", +"华设集团": "stock", +"002498": "stock", +"汉缆股份": "stock", +"605255": "stock", +"天普股份": "stock", +"000917": "stock", +"电广传媒": "stock", +"688531": "stock", +"日联科技": "stock", +"600372": "stock", +"中航机载": "stock", +"002155": "stock", +"湖南黄金": "stock", +"300827": "stock", +"上能电气": "stock", +"605368": "stock", +"蓝天燃气": "stock", +"601618": "stock", +"中国中冶": "stock", +"002913": "stock", +"奥士康": "stock", +"601601": "stock", +"中国太保": "stock", +"603355": "stock", +"莱克电气": "stock", +"600526": "stock", +"菲达环保": "stock", +"603421": "stock", +"鼎信通讯": "stock", +"000517": "stock", +"荣安地产": "stock", +"301509": "stock", +"金凯生科": "stock", +"603348": "stock", +"文灿股份": "stock", +"002998": "stock", +"优彩资源": "stock", +"600875": "stock", +"东方电气": "stock", +"300815": "stock", +"玉禾田": "stock", +"000732": "stock", +"ST泰禾": "stock", +"000957": "stock", +"中通客车": "stock", +"831855": "stock", +"浙江大农": "stock", +"873339": "stock", +"恒太照明": "stock", +"605099": "stock", +"共创草坪": "stock", +"603220": "stock", +"中贝通信": "stock", +"301317": "stock", +"鑫磊股份": "stock", +"603019": "stock", +"中科曙光": "stock", +"603110": "stock", +"东方材料": "stock", +"001313": "stock", +"粤海饲料": "stock", +"001368": "stock", +"通达创智": "stock", +"002606": "stock", +"大连电瓷": "stock", +"603610": "stock", +"麒盛科技": "stock", +"300551": "stock", +"古鳌科技": "stock", +"600621": "stock", +"华鑫股份": "stock", +"001222": "stock", +"源飞宠物": "stock", +"600738": "stock", +"丽尚国潮": "stock", +"603600": "stock", +"永艺股份": "stock", +"601212": "stock", +"白银有色": "stock", +"605589": "stock", +"圣泉集团": "stock", +"000594": "stock", +"国恒退": "stock", +"301318": "stock", +"维海德": "stock", +"688080": "stock", +"映翰通": "stock", +"688103": "stock", +"国力股份": "stock", +"600327": "stock", +"大东方": "stock", +"300997": "stock", +"欢乐家": "stock", +"003031": "stock", +"中瓷电子": "stock", +"603089": "stock", +"正裕工业": "stock", +"300465": "stock", +"高伟达": "stock", +"300508": "stock", +"维宏股份": "stock", +"600468": "stock", +"百利电气": "stock", +"300916": "stock", +"朗特智能": "stock", +"603318": "stock", +"水发燃气": "stock", +"600788": "stock", +"*ST达曼": "stock", +"603595": "stock", +"东尼电子": "stock", +"600282": "stock", +"南钢股份": "stock", +"688392": "stock", +"骄成超声": "stock", +"603270": "stock", +"金帝股份": "stock", +"603129": "stock", +"春风动力": "stock", +"000999": "stock", +"华润三九": "stock", +"603301": "stock", +"振德医疗": "stock", +"688425": "stock", +"铁建重工": "stock", +"688670": "stock", +"金迪克": "stock", +"600515": "stock", +"海南机场": "stock", +"688566": "stock", +"吉贝尔": "stock", +"300161": "stock", +"华中数控": "stock", +"300239": "stock", +"东宝生物": "stock", +"000502": "stock", +"绿景退": "stock", +"300654": "stock", +"世纪天鸿": "stock", +"688135": "stock", +"利扬芯片": "stock", +"600965": "stock", +"福成股份": "stock", +"300719": "stock", +"安达维尔": "stock", +"300496": "stock", +"中科创达": "stock", +"688217": "stock", +"睿昂基因": "stock", +"000827": "stock", +"*ST长兴": "stock", +"603097": "stock", +"江苏华辰": "stock", +"301528": "stock", +"多浦乐": "stock", +"688701": "stock", +"卓锦股份": "stock", +"603583": "stock", +"捷昌驱动": "stock", +"834950": "stock", +"迅安科技": "stock", +"603258": "stock", +"电魂网络": "stock", +"600509": "stock", +"天富能源": "stock", +"000691": "stock", +"亚太实业": "stock", +"600279": "stock", +"重庆港": "stock", +"688380": "stock", +"中微半导": "stock", +"002512": "stock", +"达华智能": "stock", +"300052": "stock", +"中青宝": "stock", +"300007": "stock", +"汉威科技": "stock", +"300478": "stock", +"杭州高新": "stock", +"000789": "stock", +"万年青": "stock", +"002978": "stock", +"安宁股份": "stock", +"603439": "stock", +"贵州三力": "stock", +"000733": "stock", +"振华科技": "stock", +"002179": "stock", +"中航光电": "stock", +"301070": "stock", +"开勒股份": "stock", +"300989": "stock", +"蕾奥规划": "stock", +"832171": "stock", +"志晟信息": "stock", +"603880": "stock", +"ST南卫": "stock", +"002496": "stock", +"辉丰股份": "stock", +"000652": "stock", +"泰达股份": "stock", +"002959": "stock", +"小熊电器": "stock", +"600131": "stock", +"国网信通": "stock", +"301162": "stock", +"国能日新": "stock", +"836699": "stock", +"海达尔": "stock", +"603102": "stock", +"百合股份": "stock", +"300572": "stock", +"安车检测": "stock", +"688588": "stock", +"凌志软件": "stock", +"603186": "stock", +"华正新材": "stock", +"002654": "stock", +"万润科技": "stock", +"600213": "stock", +"亚星客车": "stock", +"300829": "stock", +"金丹科技": "stock", +"002778": "stock", +"中晟高科": "stock", +"300897": "stock", +"山科智能": "stock", +"601059": "stock", +"信达证券": "stock", +"000411": "stock", +"英特集团": "stock", +"300148": "stock", +"天舟文化": "stock", +"835207": "stock", +"众诚科技": "stock", +"600189": "stock", +"泉阳泉": "stock", +"300234": "stock", +"开尔新材": "stock", +"603163": "stock", +"圣晖集成": "stock", +"002055": "stock", +"得润电子": "stock", +"300891": "stock", +"惠云钛业": "stock", +"600286": "stock", +"S*ST国瓷": "stock", +"601108": "stock", +"财通证券": "stock", +"301321": "stock", +"翰博高新": "stock", +"603043": "stock", +"广州酒家": "stock", +"603456": "stock", +"九洲药业": "stock", +"300743": "stock", +"天地数码": "stock", +"301087": "stock", +"可孚医疗": "stock", +"600551": "stock", +"时代出版": "stock", +"871396": "stock", +"常辅股份": "stock", +"688390": "stock", +"固德威": "stock", +"002433": "stock", +"*ST太安": "stock", +"600984": "stock", +"建设机械": "stock", +"300476": "stock", +"胜宏科技": "stock", +"603214": "stock", +"爱婴室": "stock", +"688365": "stock", +"光云科技": "stock", +"000815": "stock", +"美利云": "stock", +"832491": "stock", +"奥迪威": "stock", +"002462": "stock", +"嘉事堂": "stock", +"300964": "stock", +"本川智能": "stock", +"603900": "stock", +"莱绅通灵": "stock", +"003033": "stock", +"征和工业": "stock", +"002518": "stock", +"科士达": "stock", +"601686": "stock", +"友发集团": "stock", +"002597": "stock", +"金禾实业": "stock", +"603367": "stock", +"辰欣药业": "stock", +"600486": "stock", +"扬农化工": "stock", +"603185": "stock", +"弘元绿能": "stock", +"300145": "stock", +"中金环境": "stock", +"688015": "stock", +"交控科技": "stock", +"000661": "stock", +"长春高新": "stock", +"600323": "stock", +"瀚蓝环境": "stock", +"300691": "stock", +"联合光电": "stock", +"600339": "stock", +"中油工程": "stock", +"839729": "stock", +"永顺生物": "stock", +"000852": "stock", +"石化机械": "stock", +"873593": "stock", +"鼎智科技": "stock", +"688538": "stock", +"和辉光电-U": "stock", +"300786": "stock", +"国林科技": "stock", +"603100": "stock", +"川仪股份": "stock", +"600419": "stock", +"天润乳业": "stock", +"300143": "stock", +"盈康生命": "stock", +"000981": "stock", +"山子股份": "stock", +"300420": "stock", +"五洋停车": "stock", +"300546": "stock", +"雄帝科技": "stock", +"300101": "stock", +"振芯科技": "stock", +"301276": "stock", +"嘉曼服饰": "stock", +"002636": "stock", +"金安国纪": "stock", +"600985": "stock", +"淮北矿业": "stock", +"605005": "stock", +"合兴股份": "stock", +"002294": "stock", +"信立泰": "stock", +"000536": "stock", +"华映科技": "stock", +"000922": "stock", +"佳电股份": "stock", +"600119": "stock", +"长江投资": "stock", +"300906": "stock", +"日月明": "stock", +"300854": "stock", +"中兰环保": "stock", +"688175": "stock", +"高凌信息": "stock", +"600971": "stock", +"恒源煤电": "stock", +"601138": "stock", +"工业富联": "stock", +"002681": "stock", +"奋达科技": "stock", +"300579": "stock", +"数字认证": "stock", +"002549": "stock", +"凯美特气": "stock", +"002116": "stock", +"中国海诚": "stock", +"002349": "stock", +"精华制药": "stock", +"301261": "stock", +"恒工精密": "stock", +"688117": "stock", +"圣诺生物": "stock", +"600179": "stock", +"安通控股": "stock", +"002013": "stock", +"中航机电": "stock", +"300506": "stock", +"名家汇": "stock", +"301396": "stock", +"宏景科技": "stock", +"605507": "stock", +"国邦医药": "stock", +"603283": "stock", +"赛腾股份": "stock", +"002187": "stock", +"广百股份": "stock", +"300366": "stock", +"创意信息": "stock", +"601231": "stock", +"环旭电子": "stock", +"301232": "stock", +"飞沃科技": "stock", +"835892": "stock", +"中科美菱": "stock", +"300883": "stock", +"龙利得": "stock", +"301160": "stock", +"翔楼新材": "stock", +"002701": "stock", +"奥瑞金": "stock", +"301065": "stock", +"本立科技": "stock", +"301225": "stock", +"恒勃股份": "stock", +"002598": "stock", +"山东章鼓": "stock", +"603833": "stock", +"欧派家居": "stock", +"000826": "stock", +"启迪环境": "stock", +"600156": "stock", +"华升股份": "stock", +"688569": "stock", +"铁科轨道": "stock", +"300083": "stock", +"创世纪": "stock", +"002280": "stock", +"联络互动": "stock", +"688279": "stock", +"峰岹科技": "stock", +"600731": "stock", +"湖南海利": "stock", +"600479": "stock", +"千金药业": "stock", +"603855": "stock", +"华荣股份": "stock", +"300067": "stock", +"安诺其": "stock", +"002875": "stock", +"安奈儿": "stock", +"002215": "stock", +"诺普信": "stock", +"601328": "stock", +"交通银行": "stock", +"300820": "stock", +"英杰电气": "stock", +"002713": "stock", +"东易日盛": "stock", +"601018": "stock", +"宁波港": "stock", +"688301": "stock", +"奕瑞科技": "stock", +"603150": "stock", +"万朗磁塑": "stock", +"000993": "stock", +"闽东电力": "stock", +"002962": "stock", +"五方光电": "stock", +"601007": "stock", +"金陵饭店": "stock", +"002707": "stock", +"众信旅游": "stock", +"002322": "stock", +"理工能科": "stock", +"688098": "stock", +"申联生物": "stock", +"002069": "stock", +"獐子岛": "stock", +"000719": "stock", +"中原传媒": "stock", +"600717": "stock", +"天津港": "stock", +"830839": "stock", +"万通液压": "stock", +"430718": "stock", +"合肥高科": "stock", +"002002": "stock", +"鸿达兴业": "stock", +"000593": "stock", +"德龙汇能": "stock", +"603993": "stock", +"洛阳钼业": "stock", +"603618": "stock", +"杭电股份": "stock", +"832225": "stock", +"利通科技": "stock", +"688562": "stock", +"航天软件": "stock", +"301220": "stock", +"亚香股份": "stock", +"300611": "stock", +"美力科技": "stock", +"603768": "stock", +"常青股份": "stock", +"688276": "stock", +"百克生物": "stock", +"688325": "stock", +"赛微微电": "stock", +"688321": "stock", +"微芯生物": "stock", +"603027": "stock", +"千禾味业": "stock", +"000065": "stock", +"北方国际": "stock", +"300354": "stock", +"东华测试": "stock", +"300494": "stock", +"盛天网络": "stock", +"600747": "stock", +"退市大控": "stock", +"000062": "stock", +"深圳华强": "stock", +"601218": "stock", +"吉鑫科技": "stock", +"600710": "stock", +"苏美达": "stock", +"000516": "stock", +"国际医学": "stock", +"836414": "stock", +"欧普泰": "stock", +"301287": "stock", +"康力源": "stock", +"300071": "stock", +"福石控股": "stock", +"300548": "stock", +"博创科技": "stock", +"002166": "stock", +"莱茵生物": "stock", +"003003": "stock", +"天元股份": "stock", +"603003": "stock", +"龙宇股份": "stock", +"688468": "stock", +"科美诊断": "stock", +"000525": "stock", +"ST红太阳": "stock", +"603324": "stock", +"盛剑环境": "stock", +"003011": "stock", +"海象新材": "stock", +"002431": "stock", +"棕榈股份": "stock", +"001236": "stock", +"弘业期货": "stock", +"002752": "stock", +"昇兴股份": "stock", +"003018": "stock", +"金富科技": "stock", +"002623": "stock", +"亚玛顿": "stock", +"600399": "stock", +"抚顺特钢": "stock", +"002888": "stock", +"惠威科技": "stock", +"603517": "stock", +"绝味食品": "stock", +"300675": "stock", +"建科院": "stock", +"002649": "stock", +"博彦科技": "stock", +"601198": "stock", +"东兴证券": "stock", +"600801": "stock", +"华新水泥": "stock", +"600098": "stock", +"广州发展": "stock", +"600816": "stock", +"ST建元": "stock", +"301221": "stock", +"光庭信息": "stock", +"600252": "stock", +"中恒集团": "stock", +"600848": "stock", +"上海临港": "stock", +"603518": "stock", +"锦泓集团": "stock", +"300982": "stock", +"苏文电能": "stock", +"002514": "stock", +"宝馨科技": "stock", +"300499": "stock", +"高澜股份": "stock", +"831768": "stock", +"拾比佰": "stock", +"603722": "stock", +"阿科力": "stock", +"600199": "stock", +"金种子酒": "stock", +"600006": "stock", +"东风汽车": "stock", +"688004": "stock", +"博汇科技": "stock", +"688190": "stock", +"云路股份": "stock", +"002254": "stock", +"泰和新材": "stock", +"003016": "stock", +"欣贺股份": "stock", +"600908": "stock", +"无锡银行": "stock", +"300156": "stock", +"神雾退": "stock", +"600859": "stock", +"王府井": "stock", +"301109": "stock", +"军信股份": "stock", +"000632": "stock", +"三木集团": "stock", +"603048": "stock", +"浙江黎明": "stock", +"600983": "stock", +"惠而浦": "stock", +"000616": "stock", +"*ST海投": "stock", +"002092": "stock", +"中泰化学": "stock", +"002505": "stock", +"鹏都农牧": "stock", +"301227": "stock", +"森鹰窗业": "stock", +"301165": "stock", +"锐捷网络": "stock", +"000830": "stock", +"鲁西化工": "stock", +"002850": "stock", +"科达利": "stock", +"600476": "stock", +"湘邮科技": "stock", +"002443": "stock", +"金洲管道": "stock", +"000859": "stock", +"国风新材": "stock", +"300828": "stock", +"锐新科技": "stock", +"688375": "stock", +"国博电子": "stock", +"000533": "stock", +"顺钠股份": "stock", +"002279": "stock", +"久其软件": "stock", +"603316": "stock", +"诚邦股份": "stock", +"605296": "stock", +"神农集团": "stock", +"600629": "stock", +"华建集团": "stock", +"002239": "stock", +"奥特佳": "stock", +"301279": "stock", +"金道科技": "stock", +"688089": "stock", +"嘉必优": "stock", +"000090": "stock", +"天健集团": "stock", +"300111": "stock", +"向日葵": "stock", +"002410": "stock", +"广联达": "stock", +"300605": "stock", +"恒锋信息": "stock", +"300813": "stock", +"泰林生物": "stock", +"002449": "stock", +"国星光电": "stock", +"002311": "stock", +"海大集团": "stock", +"603065": "stock", +"宿迁联盛": "stock", +"300136": "stock", +"信维通信": "stock", +"002719": "stock", +"麦趣尔": "stock", +"301128": "stock", +"强瑞技术": "stock", +"836270": "stock", +"天铭科技": "stock", +"600686": "stock", +"金龙汽车": "stock", +"002560": "stock", +"通达股份": "stock", +"000811": "stock", +"冰轮环境": "stock", +"688057": "stock", +"金达莱": "stock", +"688169": "stock", +"石头科技": "stock", +"300821": "stock", +"东岳硅材": "stock", +"002571": "stock", +"德力股份": "stock", +"300486": "stock", +"东杰智能": "stock", +"002006": "stock", +"精工科技": "stock", +"603268": "stock", +"松发股份": "stock", +"301307": "stock", +"美利信": "stock", +"300867": "stock", +"圣元环保": "stock", +"000762": "stock", +"西藏矿业": "stock", +"600296": "stock", +"S兰铝": "stock", +"688048": "stock", +"长光华芯": "stock", +"300693": "stock", +"盛弘股份": "stock", +"002532": "stock", +"天山铝业": "stock", +"002603": "stock", +"以岭药业": "stock", +"603685": "stock", +"晨丰科技": "stock", +"300466": "stock", +"赛摩智能": "stock", +"873122": "stock", +"中纺标": "stock", +"300415": "stock", +"伊之密": "stock", +"300569": "stock", +"天能重工": "stock", +"603388": "stock", +"元成股份": "stock", +"002468": "stock", +"申通快递": "stock", +"300736": "stock", +"百邦科技": "stock", +"300414": "stock", +"中光防雷": "stock", +"600306": "stock", +"*ST商城": "stock", +"600727": "stock", +"鲁北化工": "stock", +"601166": "stock", +"兴业银行": "stock", +"000710": "stock", +"贝瑞基因": "stock", +"837212": "stock", +"智新电子": "stock", +"600001": "stock", +"邯郸钢铁": "stock", +"002065": "stock", +"东华软件": "stock", +"300079": "stock", +"数码视讯": "stock", +"600276": "stock", +"恒瑞医药": "stock", +"000715": "stock", +"中兴商业": "stock", +"601828": "stock", +"美凯龙": "stock", +"002697": "stock", +"红旗连锁": "stock", +"605277": "stock", +"新亚电子": "stock", +"600185": "stock", +"格力地产": "stock", +"600627": "stock", +"上电股份": "stock", +"300442": "stock", +"润泽科技": "stock", +"300249": "stock", +"依米康": "stock", +"688485": "stock", +"九州一轨": "stock", +"300434": "stock", +"金石亚药": "stock", +"831195": "stock", +"三祥科技": "stock", +"688578": "stock", +"艾力斯": "stock", +"600322": "stock", +"天房发展": "stock", +"600097": "stock", +"开创国际": "stock", +"300255": "stock", +"常山药业": "stock", +"603106": "stock", +"恒银科技": "stock", +"600506": "stock", +"统一股份": "stock", +"300884": "stock", +"狄耐克": "stock", +"301049": "stock", +"超越科技": "stock", +"603000": "stock", +"人民网": "stock", +"600713": "stock", +"南京医药": "stock", +"002981": "stock", +"朝阳科技": "stock", +"301012": "stock", +"扬电科技": "stock", +"603656": "stock", +"泰禾智能": "stock", +"603822": "stock", +"嘉澳环保": "stock", +"600997": "stock", +"开滦股份": "stock", +"300707": "stock", +"威唐工业": "stock", +"600211": "stock", +"西藏药业": "stock", +"835184": "stock", +"国源科技": "stock", +"002348": "stock", +"高乐股份": "stock", +"688041": "stock", +"海光信息": "stock", +"301379": "stock", +"天山电子": "stock", +"000979": "stock", +"中弘退": "stock", +"600723": "stock", +"首商股份": "stock", +"301348": "stock", +"蓝箭电子": "stock", +"002945": "stock", +"华林证券": "stock", +"688296": "stock", +"和达科技": "stock", +"603095": "stock", +"越剑智能": "stock", +"000534": "stock", +"万泽股份": "stock", +"002190": "stock", +"成飞集成": "stock", +"600833": "stock", +"第一医药": "stock", +"601636": "stock", +"旗滨集团": "stock", +"600127": "stock", +"金健米业": "stock", +"688062": "stock", +"迈威生物-U": "stock", +"002316": "stock", +"亚联发展": "stock", +"002276": "stock", +"万马股份": "stock", +"605008": "stock", +"长鸿高科": "stock", +"605056": "stock", +"咸亨国际": "stock", +"301489": "stock", +"思泉新材": "stock", +"688239": "stock", +"航宇科技": "stock", +"603529": "stock", +"爱玛科技": "stock", +"600841": "stock", +"动力新科": "stock", +"601118": "stock", +"海南橡胶": "stock", +"600762": "stock", +"S*ST金荔": "stock", +"600787": "stock", +"中储股份": "stock", +"300881": "stock", +"盛德鑫泰": "stock", +"833523": "stock", +"德瑞锂电": "stock", +"002277": "stock", +"友阿股份": "stock", +"688515": "stock", +"裕太微-U": "stock", +"600084": "stock", +"中葡股份": "stock", +"600888": "stock", +"新疆众和": "stock", +"603486": "stock", +"科沃斯": "stock", +"300668": "stock", +"杰恩设计": "stock", +"000729": "stock", +"燕京啤酒": "stock", +"836247": "stock", +"华密新材": "stock", +"300919": "stock", +"中伟股份": "stock", +"600340": "stock", +"华夏幸福": "stock", +"301112": "stock", +"信邦智能": "stock", +"002310": "stock", +"东方园林": "stock", +"600163": "stock", +"中闽能源": "stock", +"002248": "stock", +"华东数控": "stock", +"000832": "stock", +"*ST龙涤": "stock", +"002769": "stock", +"普路通": "stock", +"603886": "stock", +"元祖股份": "stock", +"601066": "stock", +"中信建投": "stock", +"603708": "stock", +"家家悦": "stock", +"300723": "stock", +"一品红": "stock", +"300428": "stock", +"立中集团": "stock", +"002929": "stock", +"润建股份": "stock", +"300991": "stock", +"创益通": "stock", +"300703": "stock", +"创源股份": "stock", +"000508": "stock", +"琼民源A": "stock", +"688287": "stock", +"观典防务": "stock", +"603976": "stock", +"正川股份": "stock", +"301236": "stock", +"软通动力": "stock", +"603787": "stock", +"新日股份": "stock", +"688208": "stock", +"道通科技": "stock", +"600645": "stock", +"中源协和": "stock", +"605162": "stock", +"新中港": "stock", +"600550": "stock", +"保变电气": "stock", +"002416": "stock", +"爱施德": "stock", +"000423": "stock", +"东阿阿胶": "stock", +"000851": "stock", +"高鸿股份": "stock", +"300304": "stock", +"云意电气": "stock", +"603038": "stock", +"华立股份": "stock", +"300439": "stock", +"美康生物": "stock", +"300917": "stock", +"特发服务": "stock", +"688002": "stock", +"睿创微纳": "stock", +"600649": "stock", +"城投控股": "stock", +"300488": "stock", +"恒锋工具": "stock", +"600498": "stock", +"烽火通信": "stock", +"300257": "stock", +"开山股份": "stock", +"603008": "stock", +"喜临门": "stock", +"603322": "stock", +"超讯通信": "stock", +"830964": "stock", +"润农节水": "stock", +"300688": "stock", +"创业黑马": "stock", +"688620": "stock", +"安凯微": "stock", +"688448": "stock", +"磁谷科技": "stock", +"603217": "stock", +"元利科技": "stock", +"600582": "stock", +"天地科技": "stock", +"600503": "stock", +"华丽家族": "stock", +"002621": "stock", +"美吉姆": "stock", +"688039": "stock", +"当虹科技": "stock", +"002133": "stock", +"广宇集团": "stock", +"002267": "stock", +"陕天然气": "stock", +"603229": "stock", +"奥翔药业": "stock", +"603906": "stock", +"龙蟠科技": "stock", +"300284": "stock", +"苏交科": "stock", +"301505": "stock", +"苏州规划": "stock", +"688558": "stock", +"国盛智科": "stock", +"603130": "stock", +"云中马": "stock", +"834415": "stock", +"恒拓开源": "stock", +"688565": "stock", +"力源科技": "stock", +"688091": "stock", +"上海谊众": "stock", +"001360": "stock", +"南矿集团": "stock", +"000976": "stock", +"ST华铁": "stock", +"000805": "stock", +"*ST炎黄": "stock", +"301151": "stock", +"冠龙节能": "stock", +"600739": "stock", +"辽宁成大": "stock", +"000973": "stock", +"佛塑科技": "stock", +"688579": "stock", +"山大地纬": "stock", +"601900": "stock", +"南方传媒": "stock", +"605198": "stock", +"安德利": "stock", +"300392": "stock", +"腾信退": "stock", +"600840": "stock", +"新湖创业": "stock", +"300197": "stock", +"节能铁汉": "stock", +"300729": "stock", +"乐歌股份": "stock", +"300402": "stock", +"宝色股份": "stock", +"688201": "stock", +"信安世纪": "stock", +"000657": "stock", +"中钨高新": "stock", +"300483": "stock", +"首华燃气": "stock", +"000425": "stock", +"徐工机械": "stock", +"300370": "stock", +"安控科技": "stock", +"300667": "stock", +"必创科技": "stock", +"688580": "stock", +"伟思医疗": "stock", +"002261": "stock", +"拓维信息": "stock", +"838924": "stock", +"广脉科技": "stock", +"002057": "stock", +"中钢天源": "stock", +"300790": "stock", +"宇瞳光学": "stock", +"301015": "stock", +"百洋医药": "stock", +"600673": "stock", +"东阳光": "stock", +"002613": "stock", +"北玻股份": "stock", +"300511": "stock", +"雪榕生物": "stock", +"600556": "stock", +"天下秀": "stock", +"688193": "stock", +"仁度生物": "stock", +"300549": "stock", +"优德精密": "stock", +"002409": "stock", +"雅克科技": "stock", +"300901": "stock", +"中胤时尚": "stock", +"600705": "stock", +"中航产融": "stock", +"002870": "stock", +"香山股份": "stock", +"600651": "stock", +"飞乐音响": "stock", +"688586": "stock", +"江航装备": "stock", +"600099": "stock", +"林海股份": "stock", +"600861": "stock", +"北京人力": "stock", +"300796": "stock", +"贝斯美": "stock", +"001872": "stock", +"招商港口": "stock", +"688788": "stock", +"科思科技": "stock", +"688418": "stock", +"震有科技": "stock", +"300822": "stock", +"贝仕达克": "stock", +"605055": "stock", +"迎丰股份": "stock", +"301259": "stock", +"艾布鲁": "stock", +"603002": "stock", +"宏昌电子": "stock", +"601600": "stock", +"中国铝业": "stock", +"603682": "stock", +"锦和商管": "stock", +"688633": "stock", +"星球石墨": "stock", +"300816": "stock", +"艾可蓝": "stock", +"603378": "stock", +"亚士创能": "stock", +"603585": "stock", +"苏利股份": "stock", +"688085": "stock", +"三友医疗": "stock", +"300655": "stock", +"晶瑞电材": "stock", +"002052": "stock", +"ST同洲": "stock", +"002278": "stock", +"神开股份": "stock", +"300825": "stock", +"阿尔特": "stock", +"600004": "stock", +"白云机场": "stock", +"871642": "stock", +"通易航天": "stock", +"688020": "stock", +"方邦股份": "stock", +"002142": "stock", +"宁波银行": "stock", +"300332": "stock", +"天壕能源": "stock", +"688352": "stock", +"颀中科技": "stock", +"300022": "stock", +"吉峰科技": "stock", +"600844": "stock", +"丹化科技": "stock", +"833533": "stock", +"骏创科技": "stock", +"600849": "stock", +"上药转换": "stock", +"600201": "stock", +"生物股份": "stock", +"301026": "stock", +"浩通科技": "stock", +"002446": "stock", +"盛路通信": "stock", +"601858": "stock", +"中国科传": "stock", +"300129": "stock", +"泰胜风能": "stock", +"002806": "stock", +"华锋股份": "stock", +"002363": "stock", +"隆基机械": "stock", +"605258": "stock", +"协和电子": "stock", +"688209": "stock", +"英集芯": "stock", +"600839": "stock", +"四川长虹": "stock", +"300044": "stock", +"赛为智能": "stock", +"601001": "stock", +"晋控煤业": "stock", +"603333": "stock", +"尚纬股份": "stock", +"600104": "stock", +"上汽集团": "stock", +"603195": "stock", +"公牛集团": "stock", +"603707": "stock", +"健友股份": "stock", +"301198": "stock", +"喜悦智行": "stock", +"600608": "stock", +"ST沪科": "stock", +"002130": "stock", +"沃尔核材": "stock", +"600749": "stock", +"西藏旅游": "stock", +"300923": "stock", +"研奥股份": "stock", +"002630": "stock", +"华西能源": "stock", +"300474": "stock", +"景嘉微": "stock", +"300058": "stock", +"蓝色光标": "stock", +"000687": "stock", +"华讯退": "stock", +"002413": "stock", +"雷科防务": "stock", +"688261": "stock", +"东微半导": "stock", +"300325": "stock", +"德威退": "stock", +"600357": "stock", +"承德钒钛": "stock", +"002652": "stock", +"扬子新材": "stock", +"600899": "stock", +"*ST信联": "stock", +"688158": "stock", +"优刻得-W": "stock", +"601058": "stock", +"赛轮轮胎": "stock", +"000591": "stock", +"太阳能": "stock", +"300686": "stock", +"智动力": "stock", +"002845": "stock", +"同兴达": "stock", +"002556": "stock", +"辉隆股份": "stock", +"688681": "stock", +"科汇股份": "stock", +"002062": "stock", +"宏润建设": "stock", +"600773": "stock", +"西藏城投": "stock", +"603042": "stock", +"华脉科技": "stock", +"000799": "stock", +"酒鬼酒": "stock", +"600406": "stock", +"国电南瑞": "stock", +"002533": "stock", +"金杯电工": "stock", +"605566": "stock", +"福莱蒽特": "stock", +"300092": "stock", +"科新机电": "stock", +"300918": "stock", +"南山智尚": "stock", +"600231": "stock", +"凌钢股份": "stock", +"301148": "stock", +"嘉戎技术": "stock", +"688314": "stock", +"康拓医疗": "stock", +"300295": "stock", +"三六五网": "stock", +"603681": "stock", +"永冠新材": "stock", +"688086": "stock", +"退市紫晶": "stock", +"002885": "stock", +"京泉华": "stock", +"603703": "stock", +"盛洋科技": "stock", +"000803": "stock", +"山高环能": "stock", +"300996": "stock", +"普联软件": "stock", +"000779": "stock", +"甘咨询": "stock", +"601116": "stock", +"三江购物": "stock", +"002227": "stock", +"奥特迅": "stock", +"688529": "stock", +"豪森股份": "stock", +"600235": "stock", +"民丰特纸": "stock", +"601718": "stock", +"际华集团": "stock", +"872808": "stock", +"曙光数创": "stock", +"600303": "stock", +"ST曙光": "stock", +"600757": "stock", +"长江传媒": "stock", +"002303": "stock", +"美盈森": "stock", +"002509": "stock", +"天茂退": "stock", +"300462": "stock", +"华铭智能": "stock", +"301201": "stock", +"诚达药业": "stock", +"600436": "stock", +"片仔癀": "stock", +"688210": "stock", +"统联精密": "stock", +"300251": "stock", +"光线传媒": "stock", +"002039": "stock", +"黔源电力": "stock", +"300631": "stock", +"久吾高科": "stock", +"002873": "stock", +"新天药业": "stock", +"831445": "stock", +"龙竹科技": "stock", +"000731": "stock", +"四川美丰": "stock", +"600222": "stock", +"太龙药业": "stock", +"002066": "stock", +"瑞泰科技": "stock", +"600287": "stock", +"江苏舜天": "stock", +"000829": "stock", +"天音控股": "stock", +"601279": "stock", +"英利汽车": "stock", +"301510": "stock", +"固高科技": "stock", +"002833": "stock", +"弘亚数控": "stock", +"002903": "stock", +"宇环数控": "stock", +"830946": "stock", +"森萱医药": "stock", +"834639": "stock", +"晨光电缆": "stock", +"688269": "stock", +"凯立新材": "stock", +"300532": "stock", +"今天国际": "stock", +"603959": "stock", +"百利科技": "stock", +"301158": "stock", +"德石股份": "stock", +"688334": "stock", +"西高院": "stock", +"833873": "stock", +"中设咨询": "stock", +"002172": "stock", +"澳洋健康": "stock", +"300557": "stock", +"理工光科": "stock", +"002185": "stock", +"华天科技": "stock", +"002750": "stock", +"龙津药业": "stock", +"000548": "stock", +"湖南投资": "stock", +"600219": "stock", +"南山铝业": "stock", +"688309": "stock", +"恒誉环保": "stock", +"600438": "stock", +"通威股份": "stock", +"603178": "stock", +"圣龙股份": "stock", +"600435": "stock", +"北方导航": "stock", +"003007": "stock", +"直真科技": "stock", +"301188": "stock", +"力诺特玻": "stock", +"300765": "stock", +"新诺威": "stock", +"000034": "stock", +"神州数码": "stock", +"688088": "stock", +"虹软科技": "stock", +"300436": "stock", +"广生堂": "stock", +"688577": "stock", +"浙海德曼": "stock", +"688510": "stock", +"航亚科技": "stock", +"300359": "stock", +"全通教育": "stock", +"603726": "stock", +"朗迪集团": "stock", +"603916": "stock", +"苏博特": "stock", +"300398": "stock", +"飞凯材料": "stock", +"688139": "stock", +"海尔生物": "stock", +"601702": "stock", +"华峰铝业": "stock", +"600316": "stock", +"洪都航空": "stock", +"000801": "stock", +"四川九洲": "stock", +"002131": "stock", +"利欧股份": "stock", +"301234": "stock", +"五洲医疗": "stock", +"301387": "stock", +"光大同创": "stock", +"301267": "stock", +"华厦眼科": "stock", +"600188": "stock", +"兖矿能源": "stock", +"002411": "stock", +"必康退": "stock", +"688035": "stock", +"德邦科技": "stock", +"002232": "stock", +"启明信息": "stock", +"600083": "stock", +"博信股份": "stock", +"601658": "stock", +"邮储银行": "stock", +"301086": "stock", +"鸿富瀚": "stock", +"000538": "stock", +"云南白药": "stock", +"002059": "stock", +"云南旅游": "stock", +"301095": "stock", +"广立微": "stock", +"002180": "stock", +"纳思达": "stock", +"002840": "stock", +"华统股份": "stock", +"838227": "stock", +"美登科技": "stock", +"000952": "stock", +"广济药业": "stock", +"600012": "stock", +"皖通高速": "stock", +"301256": "stock", +"华融化学": "stock", +"002717": "stock", +"岭南股份": "stock", +"300024": "stock", +"机器人": "stock", +"301365": "stock", +"矩阵股份": "stock", +"300194": "stock", +"福安药业": "stock", +"603927": "stock", +"中科软": "stock", +"600258": "stock", +"首旅酒店": "stock", +"300607": "stock", +"拓斯达": "stock", +"600707": "stock", +"彩虹股份": "stock", +"688183": "stock", +"生益电子": "stock", +"601229": "stock", +"上海银行": "stock", +"600559": "stock", +"老白干酒": "stock", +"603716": "stock", +"塞力医疗": "stock", +"600482": "stock", +"中国动力": "stock", +"300809": "stock", +"华辰装备": "stock", +"600239": "stock", +"云南城投": "stock", +"300837": "stock", +"浙矿股份": "stock", +"002106": "stock", +"莱宝高科": "stock", +"000668": "stock", +"荣丰控股": "stock", +"000529": "stock", +"广弘控股": "stock", +"300695": "stock", +"兆丰股份": "stock", +"000861": "stock", +"海印股份": "stock", +"837242": "stock", +"建邦科技": "stock", +"002825": "stock", +"纳尔股份": "stock", +"001373": "stock", +"翔腾新材": "stock", +"002222": "stock", +"福晶科技": "stock", +"000965": "stock", +"天保基建": "stock", +"002659": "stock", +"凯文教育": "stock", +"603067": "stock", +"振华股份": "stock", +"688232": "stock", +"新点软件": "stock", +"870436": "stock", +"大地电气": "stock", +"002313": "stock", +"*ST日海": "stock", +"605500": "stock", +"森林包装": "stock", +"600062": "stock", +"华润双鹤": "stock", +"300417": "stock", +"南华仪器": "stock", +"002093": "stock", +"国脉科技": "stock", +"600593": "stock", +"大连圣亚": "stock", +"600271": "stock", +"航天信息": "stock", +"002643": "stock", +"万润股份": "stock", +"002314": "stock", +"南山控股": "stock", +"002030": "stock", +"达安基因": "stock", +"300242": "stock", +"佳云科技": "stock", +"603216": "stock", +"梦天家居": "stock", +"002952": "stock", +"亚世光电": "stock", +"600545": "stock", +"卓郎智能": "stock", +"301189": "stock", +"奥尼电子": "stock", +"002928": "stock", +"华夏航空": "stock", +"002511": "stock", +"中顺洁柔": "stock", +"600625": "stock", +"PT水仙": "stock", +"603307": "stock", +"扬州金泉": "stock", +"600742": "stock", +"一汽富维": "stock", +"000521": "stock", +"长虹美菱": "stock", +"300025": "stock", +"华星创业": "stock", +"002587": "stock", +"奥拓电子": "stock", +"603718": "stock", +"海利生物": "stock", +"001216": "stock", +"华瓷股份": "stock", +"601918": "stock", +"新集能源": "stock", +"601155": "stock", +"新城控股": "stock", +"605069": "stock", +"正和生态": "stock", +"688381": "stock", +"帝奥微": "stock", +"605117": "stock", +"德业股份": "stock", +"603948": "stock", +"建业股份": "stock", +"688768": "stock", +"容知日新": "stock", +"300757": "stock", +"罗博特科": "stock", +"000008": "stock", +"神州高铁": "stock", +"600160": "stock", +"巨化股份": "stock", +"300776": "stock", +"帝尔激光": "stock", +"301499": "stock", +"维科精密": "stock", +"300188": "stock", +"美亚柏科": "stock", +"001309": "stock", +"德明利": "stock", +"600614": "stock", +"退市鹏起": "stock", +"300374": "stock", +"中铁装配": "stock", +"000675": "stock", +"ST银山": "stock", +"600087": "stock", +"退市长油": "stock", +"000018": "stock", +"神城A退": "stock", +"688659": "stock", +"元琛科技": "stock", +"300245": "stock", +"天玑科技": "stock", +"605177": "stock", +"东亚药业": "stock", +"600117": "stock", +"*ST西钢": "stock", +"300966": "stock", +"共同药业": "stock", +"301429": "stock", +"森泰股份": "stock", +"000727": "stock", +"冠捷科技": "stock", +"002605": "stock", +"姚记科技": "stock", +"000980": "stock", +"众泰汽车": "stock", +"688369": "stock", +"致远互联": "stock", +"688669": "stock", +"聚石化学": "stock", +"300604": "stock", +"长川科技": "stock", +"300169": "stock", +"天晟新材": "stock", +"002378": "stock", +"章源钨业": "stock", +"002608": "stock", +"江苏国信": "stock", +"688371": "stock", +"菲沃泰": "stock", +"601339": "stock", +"百隆东方": "stock", +"000813": "stock", +"德展健康": "stock", +"300709": "stock", +"精研科技": "stock", +"002011": "stock", +"盾安环境": "stock", +"301126": "stock", +"达嘉维康": "stock", +"600553": "stock", +"太行水泥": "stock", +"300062": "stock", +"中能电气": "stock", +"002976": "stock", +"瑞玛精密": "stock", +"301390": "stock", +"经纬股份": "stock", +"300048": "stock", +"合康新能": "stock", +"300630": "stock", +"普利制药": "stock", +"002209": "stock", +"达意隆": "stock", +"688159": "stock", +"有方科技": "stock", +"301515": "stock", +"港通医疗": "stock", +"603011": "stock", +"合锻智能": "stock", +"002346": "stock", +"柘中股份": "stock", +"603219": "stock", +"富佳股份": "stock", +"300601": "stock", +"康泰生物": "stock", +"300851": "stock", +"交大思诺": "stock", +"688206": "stock", +"概伦电子": "stock", +"600122": "stock", +"*ST宏图": "stock", +"002576": "stock", +"通达动力": "stock", +"688157": "stock", +"松井股份": "stock", +"300388": "stock", +"节能国祯": "stock", +"301061": "stock", +"匠心家居": "stock", +"688646": "stock", +"逸飞激光": "stock", +"000156": "stock", +"华数传媒": "stock", +"002205": "stock", +"国统股份": "stock", +"300311": "stock", +"任子行": "stock", +"600926": "stock", +"杭州银行": "stock", +"000918": "stock", +"*ST嘉凯": "stock", +"000817": "stock", +"辽河油田": "stock", +"000720": "stock", +"新能泰山": "stock", +"002161": "stock", +"远望谷": "stock", +"603192": "stock", +"汇得科技": "stock", +"688323": "stock", +"瑞华泰": "stock", +"688111": "stock", +"金山办公": "stock", +"002369": "stock", +"卓翼科技": "stock", +"600275": "stock", +"退市昌鱼": "stock", +"603005": "stock", +"晶方科技": "stock", +"002379": "stock", +"宏创控股": "stock", +"603630": "stock", +"拉芳家化": "stock", +"002399": "stock", +"海普瑞": "stock", +"301293": "stock", +"三博脑科": "stock", +"688525": "stock", +"佰维存储": "stock", +"605266": "stock", +"健之佳": "stock", +"600560": "stock", +"金自天正": "stock", +"002921": "stock", +"联诚精密": "stock", +"603809": "stock", +"豪能股份": "stock", +"605089": "stock", +"味知香": "stock", +"002732": "stock", +"燕塘乳业": "stock", +"600580": "stock", +"卧龙电驱": "stock", +"601799": "stock", +"星宇股份": "stock", +"600330": "stock", +"天通股份": "stock", +"688092": "stock", +"爱科科技": "stock", +"688598": "stock", +"金博股份": "stock", +"002122": "stock", +"汇洲智能": "stock", +"688223": "stock", +"晶科能源": "stock", +"300609": "stock", +"汇纳科技": "stock", +"603937": "stock", +"丽岛新材": "stock", +"603717": "stock", +"天域生态": "stock", +"301018": "stock", +"申菱环境": "stock", +"002442": "stock", +"龙星化工": "stock", +"603290": "stock", +"斯达半导": "stock", +"600091": "stock", +"退市明科": "stock", +"300625": "stock", +"三雄极光": "stock", +"300868": "stock", +"杰美特": "stock", +"300928": "stock", +"华安鑫创": "stock", +"000928": "stock", +"中钢国际": "stock", +"688552": "stock", +"航天南湖": "stock", +"688739": "stock", +"成大生物": "stock", +"603901": "stock", +"永创智能": "stock", +"000927": "stock", +"中国铁物": "stock", +"600118": "stock", +"中国卫星": "stock", +"300759": "stock", +"康龙化成": "stock", +"002684": "stock", +"猛狮退": "stock", +"688116": "stock", +"天奈科技": "stock", +"603101": "stock", +"汇嘉时代": "stock", +"600355": "stock", +"精伦电子": "stock", +"300839": "stock", +"博汇股份": "stock", +"603997": "stock", +"继峰股份": "stock", +"300502": "stock", +"新易盛": "stock", +"300323": "stock", +"华灿光电": "stock", +"603123": "stock", +"翠微股份": "stock", +"000802": "stock", +"北京文化": "stock", +"300817": "stock", +"双飞股份": "stock", +"688722": "stock", +"同益中": "stock", +"601616": "stock", +"广电电气": "stock", +"835985": "stock", +"海泰新能": "stock", +"300363": "stock", +"博腾股份": "stock", +"001255": "stock", +"博菲电气": "stock", +"300929": "stock", +"华骐环保": "stock", +"002808": "stock", +"ST恒久": "stock", +"002877": "stock", +"智能自控": "stock", +"300244": "stock", +"迪安诊断": "stock", +"300767": "stock", +"震安科技": "stock", +"600216": "stock", +"浙江医药": "stock", +"830779": "stock", +"武汉蓝电": "stock", +"000626": "stock", +"远大控股": "stock", +"300713": "stock", +"英可瑞": "stock", +"000978": "stock", +"桂林旅游": "stock", +"600641": "stock", +"万业企业": "stock", +"600051": "stock", +"宁波联合": "stock", +"833819": "stock", +"XD颖泰生": "stock", +"601333": "stock", +"广深铁路": "stock", +"300876": "stock", +"蒙泰高新": "stock", +"002095": "stock", +"生意宝": "stock", +"600039": "stock", +"四川路桥": "stock", +"600092": "stock", +"S*ST精密": "stock", +"002642": "stock", +"荣联科技": "stock", +"603956": "stock", +"威派格": "stock", +"300484": "stock", +"蓝海华腾": "stock", +"002789": "stock", +"建艺集团": "stock", +"600220": "stock", +"江苏阳光": "stock", +"600588": "stock", +"用友网络": "stock", +"600499": "stock", +"科达制造": "stock", +"835640": "stock", +"富士达": "stock", +"688076": "stock", +"诺泰生物": "stock", +"002972": "stock", +"科安达": "stock", +"300911": "stock", +"亿田智能": "stock", +"870299": "stock", +"灿能电力": "stock", +"605218": "stock", +"伟时电子": "stock", +"600102": "stock", +"莱钢股份": "stock", +"300701": "stock", +"森霸传感": "stock", +"601089": "stock", +"福元医药": "stock", +"600815": "stock", +"厦工股份": "stock", +"002120": "stock", +"韵达股份": "stock", +"600609": "stock", +"金杯汽车": "stock", +"300482": "stock", +"万孚生物": "stock", +"301167": "stock", +"建研设计": "stock", +"600115": "stock", +"中国东航": "stock", +"300214": "stock", +"日科化学": "stock", +"000046": "stock", +"*ST泛海": "stock", +"002869": "stock", +"金溢科技": "stock", +"300527": "stock", +"中船应急": "stock", +"603877": "stock", +"太平鸟": "stock", +"002148": "stock", +"北纬科技": "stock", +"000970": "stock", +"中科三环": "stock", +"300940": "stock", +"南极光": "stock", +"002312": "stock", +"川发龙蟒": "stock", +"605183": "stock", +"确成股份": "stock", +"688776": "stock", +"国光电气": "stock", +"002958": "stock", +"青农商行": "stock", +"688032": "stock", +"禾迈股份": "stock", +"605128": "stock", +"上海沿浦": "stock", +"688663": "stock", +"新风光": "stock", +"301320": "stock", +"豪江智能": "stock", +"300152": "stock", +"新动力": "stock", +"603789": "stock", +"星光农机": "stock", +"301038": "stock", +"深水规院": "stock", +"002071": "stock", +"长城退": "stock", +"603559": "stock", +"ST通脉": "stock", +"000751": "stock", +"锌业股份": "stock", +"002144": "stock", +"宏达高科": "stock", +"833394": "stock", +"民士达": "stock", +"688507": "stock", +"索辰科技": "stock", +"833914": "stock", +"远航精密": "stock", +"000905": "stock", +"厦门港务": "stock", +"836221": "stock", +"易实精密": "stock", +"600565": "stock", +"迪马股份": "stock", +"603598": "stock", +"引力传媒": "stock", +"601369": "stock", +"陕鼓动力": "stock", +"603989": "stock", +"艾华集团": "stock", +"600527": "stock", +"江南高纤": "stock", +"603025": "stock", +"大豪科技": "stock", +"603206": "stock", +"嘉环科技": "stock", +"688336": "stock", +"三生国健": "stock", +"300357": "stock", +"我武生物": "stock", +"688061": "stock", +"灿瑞科技": "stock", +"003043": "stock", +"华亚智能": "stock", +"603903": "stock", +"中持股份": "stock", +"002767": "stock", +"先锋电子": "stock", +"300879": "stock", +"大叶股份": "stock", +"002852": "stock", +"道道全": "stock", +"688285": "stock", +"高铁电气": "stock", +"871478": "stock", +"巨能股份": "stock", +"601988": "stock", +"中国银行": "stock", +"688312": "stock", +"燕麦科技": "stock", +"600112": "stock", +"ST天成": "stock", +"601990": "stock", +"南京证券": "stock", +"600397": "stock", +"安源煤业": "stock", +"600348": "stock", +"华阳股份": "stock", +"000583": "stock", +"S*ST托普": "stock", +"301277": "stock", +"新天地": "stock", +"002628": "stock", +"成都路桥": "stock", +"603801": "stock", +"志邦家居": "stock", +"605066": "stock", +"天正电气": "stock", +"600768": "stock", +"宁波富邦": "stock", +"688687": "stock", +"凯因科技": "stock", +"600958": "stock", +"东方证券": "stock", +"002782": "stock", +"可立克": "stock", +"002955": "stock", +"鸿合科技": "stock", +"603882": "stock", +"金域医学": "stock", +"603766": "stock", +"隆鑫通用": "stock", +"002851": "stock", +"麦格米特": "stock", +"600196": "stock", +"复星医药": "stock", +"600158": "stock", +"中体产业": "stock", +"002766": "stock", +"索菱股份": "stock", +"301029": "stock", +"怡合达": "stock", +"300314": "stock", +"戴维医疗": "stock", +"600177": "stock", +"雅戈尔": "stock", +"002246": "stock", +"北化股份": "stock", +"301251": "stock", +"威尔高": "stock", +"600298": "stock", +"安琪酵母": "stock", +"834021": "stock", +"流金科技": "stock", +"002482": "stock", +"*ST广田": "stock", +"002235": "stock", +"安妮股份": "stock", +"301281": "stock", +"科源制药": "stock", +"002985": "stock", +"北摩高科": "stock", +"601388": "stock", +"怡球资源": "stock", +"300042": "stock", +"朗科科技": "stock", +"300099": "stock", +"精准信息": "stock", +"000678": "stock", +"襄阳轴承": "stock", +"600988": "stock", +"赤峰黄金": "stock", +"688359": "stock", +"三孚新科": "stock", +"688229": "stock", +"博睿数据": "stock", +"873576": "stock", +"天力复合": "stock", +"300268": "stock", +"*ST佳沃": "stock", +"300628": "stock", +"亿联网络": "stock", +"603156": "stock", +"养元饮品": "stock", +"601808": "stock", +"中海油服": "stock", +"300217": "stock", +"东方电热": "stock", +"600377": "stock", +"宁沪高速": "stock", +"601886": "stock", +"江河集团": "stock", +"688435": "stock", +"英方软件": "stock", +"002528": "stock", +"英飞拓": "stock", +"300766": "stock", +"每日互动": "stock", +"002956": "stock", +"西麦食品": "stock", +"301373": "stock", +"凌玮科技": "stock", +"301255": "stock", +"通力科技": "stock", +"300315": "stock", +"掌趣科技": "stock", +"600979": "stock", +"广安爱众": "stock", +"603033": "stock", +"三维股份": "stock", +"002368": "stock", +"太极股份": "stock", +"001209": "stock", +"洪兴股份": "stock", +"000056": "stock", +"皇庭国际": "stock", +"600854": "stock", +"春兰股份": "stock", +"605169": "stock", +"洪通燃气": "stock", +"002021": "stock", +"*ST中捷": "stock", +"002128": "stock", +"电投能源": "stock", +"601519": "stock", +"大智慧": "stock", +"300606": "stock", +"金太阳": "stock", +"300421": "stock", +"力星股份": "stock", +"600540": "stock", +"新赛股份": "stock", +"300603": "stock", +"立昂技术": "stock", +"688128": "stock", +"中国电研": "stock", +"300163": "stock", +"先锋新材": "stock", +"600936": "stock", +"广西广电": "stock", +"002158": "stock", +"汉钟精机": "stock", +"301099": "stock", +"雅创电子": "stock", +"600005": "stock", +"武钢股份": "stock", +"600866": "stock", +"星湖科技": "stock", +"605081": "stock", +"太和水": "stock", +"002302": "stock", +"西部建设": "stock", +"002471": "stock", +"中超控股": "stock", +"688353": "stock", +"华盛锂电": "stock", +"301082": "stock", +"久盛电气": "stock", +"002644": "stock", +"佛慈制药": "stock", +"688213": "stock", +"思特威-W": "stock", +"000605": "stock", +"渤海股份": "stock", +"000030": "stock", +"富奥股份": "stock", +"301176": "stock", +"逸豪新材": "stock", +"603826": "stock", +"坤彩科技": "stock", +"002971": "stock", +"和远气体": "stock", +"300231": "stock", +"银信科技": "stock", +"301311": "stock", +"昆船智能": "stock", +"603779": "stock", +"威龙股份": "stock", +"300850": "stock", +"新强联": "stock", +"831087": "stock", +"秋乐种业": "stock", +"688778": "stock", +"厦钨新能": "stock", +"000948": "stock", +"南天信息": "stock", +"600313": "stock", +"农发种业": "stock", +"833580": "stock", +"科创新材": "stock", +"300418": "stock", +"昆仑万维": "stock", +"300018": "stock", +"中元股份": "stock", +"000932": "stock", +"华菱钢铁": "stock", +"300651": "stock", +"金陵体育": "stock", +"603093": "stock", +"南华期货": "stock", +"834058": "stock", +"华洋赛车": "stock", +"300702": "stock", +"天宇股份": "stock", +"300408": "stock", +"三环集团": "stock", +"834765": "stock", +"美之高": "stock", +"002415": "stock", +"海康威视": "stock", +"603393": "stock", +"新天然气": "stock", +"300248": "stock", +"新开普": "stock", +"002425": "stock", +"凯撒文化": "stock", +"300305": "stock", +"裕兴股份": "stock", +"605369": "stock", +"拱东医疗": "stock", +"301207": "stock", +"华兰疫苗": "stock", +"600903": "stock", +"贵州燃气": "stock", +"688400": "stock", +"凌云光": "stock", +"603256": "stock", +"宏和科技": "stock", +"600229": "stock", +"城市传媒": "stock", +"688711": "stock", +"宏微科技": "stock", +"603555": "stock", +"ST贵人": "stock", +"300191": "stock", +"潜能恒信": "stock", +"603936": "stock", +"博敏电子": "stock", +"688133": "stock", +"泰坦科技": "stock", +"300526": "stock", +"中潜退": "stock", +"600753": "stock", +"庚星股份": "stock", +"688160": "stock", +"步科股份": "stock", +"300715": "stock", +"凯伦股份": "stock", +"000504": "stock", +"南华生物": "stock", +"000711": "stock", +"*ST京蓝": "stock", +"002948": "stock", +"青岛银行": "stock", +"002742": "stock", +"ST三圣": "stock", +"000712": "stock", +"锦龙股份": "stock", +"603010": "stock", +"万盛股份": "stock", +"832566": "stock", +"梓橦宫": "stock", +"603189": "stock", +"网达软件": "stock", +"605138": "stock", +"盛泰集团": "stock", +"600536": "stock", +"中国软件": "stock", +"300986": "stock", +"志特新材": "stock", +"600516": "stock", +"方大炭素": "stock", +"000777": "stock", +"中核科技": "stock", +"603979": "stock", +"金诚信": "stock", +"300364": "stock", +"中文在线": "stock", +"301125": "stock", +"腾亚精工": "stock", +"300656": "stock", +"民德电子": "stock", +"300497": "stock", +"富祥药业": "stock", +"000955": "stock", +"欣龙控股": "stock", +"300896": "stock", +"爱美客": "stock", +"833346": "stock", +"威贸电子": "stock", +"002889": "stock", +"东方嘉盛": "stock", +"003039": "stock", +"顺控发展": "stock", +"001211": "stock", +"双枪科技": "stock", +"834407": "stock", +"驰诚股份": "stock", +"002776": "stock", +"*ST柏龙": "stock", +"002685": "stock", +"华东重机": "stock", +"838810": "stock", +"春光药装": "stock", +"688556": "stock", +"高测股份": "stock", +"688252": "stock", +"天德钰": "stock", +"600660": "stock", +"福耀玻璃": "stock", +"301013": "stock", +"利和兴": "stock", +"603039": "stock", +"泛微网络": "stock", +"300051": "stock", +"琏升科技": "stock", +"603803": "stock", +"瑞斯康达": "stock", +"000524": "stock", +"岭南控股": "stock", +"603179": "stock", +"新泉股份": "stock", +"600535": "stock", +"天士力": "stock", +"300480": "stock", +"光力科技": "stock", +"600000": "stock", +"浦发银行": "stock", +"688101": "stock", +"三达膜": "stock", +"603315": "stock", +"福鞍股份": "stock", +"300491": "stock", +"通合科技": "stock", +"600806": "stock", +"退市昆机": "stock", +"600080": "stock", +"金花股份": "stock", +"300429": "stock", +"强力新材": "stock", +"688328": "stock", +"深科达": "stock", +"300992": "stock", +"泰福泵业": "stock", +"301206": "stock", +"三元生物": "stock", +"603203": "stock", +"快克智能": "stock", +"301190": "stock", +"善水科技": "stock", +"603998": "stock", +"XD方盛制": "stock", +"001270": "stock", +"铖昌科技": "stock", +"600161": "stock", +"天坛生物": "stock", +"600238": "stock", +"海南椰岛": "stock", +"300976": "stock", +"达瑞电子": "stock", +"002567": "stock", +"唐人神": "stock", +"002475": "stock", +"立讯精密": "stock", +"002382": "stock", +"蓝帆医疗": "stock", +"603138": "stock", +"海量数据": "stock", +"688138": "stock", +"清溢光电": "stock", +"000551": "stock", +"创元科技": "stock", +"002291": "stock", +"遥望科技": "stock", +"600352": "stock", +"浙江龙盛": "stock", +"002662": "stock", +"京威股份": "stock", +"000956": "stock", +"中原油气": "stock", +"002252": "stock", +"上海莱士": "stock", +"000785": "stock", +"居然之家": "stock", +"688320": "stock", +"禾川科技": "stock", +"301239": "stock", +"普瑞眼科": "stock", +"603659": "stock", +"璞泰来": "stock", +"300447": "stock", +"全信股份": "stock", +"300779": "stock", +"惠城环保": "stock", +"600704": "stock", +"物产中大": "stock", +"002677": "stock", +"浙江美大": "stock", +"600818": "stock", +"中路股份": "stock", +"603896": "stock", +"寿仙谷": "stock", +"300001": "stock", +"特锐德": "stock", +"002922": "stock", +"伊戈尔": "stock", +"001256": "stock", +"炜冈科技": "stock", +"600897": "stock", +"厦门空港": "stock", +"002695": "stock", +"煌上煌": "stock", +"300008": "stock", +"天海防务": "stock", +"002396": "stock", +"星网锐捷": "stock", +"603221": "stock", +"爱丽家居": "stock", +"300856": "stock", +"科思股份": "stock", +"300212": "stock", +"易华录": "stock", +"600892": "stock", +"大晟文化": "stock", +"002565": "stock", +"顺灏股份": "stock", +"600059": "stock", +"古越龙山": "stock", +"603767": "stock", +"中马传动": "stock", +"688058": "stock", +"宝兰德": "stock", +"430478": "stock", +"峆一药业": "stock", +"688636": "stock", +"智明达": "stock", +"300700": "stock", +"岱勒新材": "stock", +"002751": "stock", +"易尚退": "stock", +"002268": "stock", +"电科网安": "stock", +"688689": "stock", +"银河微电": "stock", +"600071": "stock", +"凤凰光学": "stock", +"300196": "stock", +"长海股份": "stock", +"300720": "stock", +"海川智能": "stock", +"300454": "stock", +"深信服": "stock", +"688275": "stock", +"万润新能": "stock", +"002402": "stock", +"和而泰": "stock", +"002329": "stock", +"皇氏集团": "stock", +"000088": "stock", +"盐田港": "stock", +"002800": "stock", +"ST天顺": "stock", +"603551": "stock", +"奥普家居": "stock", +"000672": "stock", +"上峰水泥": "stock", +"603881": "stock", +"数据港": "stock", +"301068": "stock", +"大地海洋": "stock", +"002658": "stock", +"雪迪龙": "stock", +"600466": "stock", +"*ST蓝光": "stock", +"002736": "stock", +"国信证券": "stock", +"688069": "stock", +"德林海": "stock", +"688707": "stock", +"振华新材": "stock", +"300077": "stock", +"国民技术": "stock", +"300404": "stock", +"博济医药": "stock", +"002380": "stock", +"科远智慧": "stock", +"301056": "stock", +"森赫股份": "stock", +"002524": "stock", +"光正眼科": "stock", +"605376": "stock", +"博迁新材": "stock", +"301316": "stock", +"慧博云通": "stock", +"600831": "stock", +"广电网络": "stock", +"605208": "stock", +"永茂泰": "stock", +"300990": "stock", +"同飞股份": "stock", +"600307": "stock", +"酒钢宏兴": "stock", +"600986": "stock", +"浙文互联": "stock", +"002474": "stock", +"榕基软件": "stock", +"603836": "stock", +"海程邦达": "stock", +"002074": "stock", +"国轩高科": "stock", +"002737": "stock", +"葵花药业": "stock", +"300035": "stock", +"中科电气": "stock", +"603800": "stock", +"道森股份": "stock", +"600756": "stock", +"浪潮软件": "stock", +"301093": "stock", +"华兰股份": "stock", +"688479": "stock", +"友车科技": "stock", +"600382": "stock", +"广东明珠": "stock", +"603933": "stock", +"睿能科技": "stock", +"300172": "stock", +"中电环保": "stock", +"600729": "stock", +"重庆百货": "stock", +"603599": "stock", +"广信股份": "stock", +"688819": "stock", +"天能股份": "stock", +"300861": "stock", +"美畅股份": "stock", +"688366": "stock", +"昊海生科": "stock", +"836077": "stock", +"吉林碳谷": "stock", +"600278": "stock", +"东方创业": "stock", +"300752": "stock", +"隆利科技": "stock", +"002117": "stock", +"东港股份": "stock", +"688716": "stock", +"中研股份": "stock", +"600200": "stock", +"江苏吴中": "stock", +"300448": "stock", +"浩云科技": "stock", +"002712": "stock", +"思美传媒": "stock", +"300798": "stock", +"锦鸡股份": "stock", +"600977": "stock", +"中国电影": "stock", +"603488": "stock", +"展鹏科技": "stock", +"300988": "stock", +"津荣天宇": "stock", +"002023": "stock", +"海特高新": "stock", +"300505": "stock", +"川金诺": "stock", +"688282": "stock", +"理工导航": "stock", +"300535": "stock", +"达威股份": "stock", +"300652": "stock", +"雷迪克": "stock", +"603811": "stock", +"诚意药业": "stock", +"301208": "stock", +"中亦科技": "stock", +"603587": "stock", +"地素时尚": "stock", +"002448": "stock", +"中原内配": "stock", +"601628": "stock", +"中国人寿": "stock", +"600674": "stock", +"川投能源": "stock", +"600827": "stock", +"百联股份": "stock", +"000099": "stock", +"中信海直": "stock", +"600202": "stock", +"哈空调": "stock", +"002775": "stock", +"文科园林": "stock", +"603081": "stock", +"大丰实业": "stock", +"603926": "stock", +"铁流股份": "stock", +"000807": "stock", +"云铝股份": "stock", +"600318": "stock", +"新力金融": "stock", +"002345": "stock", +"潮宏基": "stock", +"300500": "stock", +"启迪设计": "stock", +"301355": "stock", +"南王科技": "stock", +"000537": "stock", +"广宇发展": "stock", +"688136": "stock", +"科兴制药": "stock", +"000988": "stock", +"华工科技": "stock", +"688592": "stock", +"司南导航": "stock", +"600668": "stock", +"尖峰集团": "stock", +"300142": "stock", +"沃森生物": "stock", +"002432": "stock", +"九安医疗": "stock", +"300522": "stock", +"世名科技": "stock", +"002688": "stock", +"金河生物": "stock", +"688668": "stock", +"鼎通科技": "stock", +"002741": "stock", +"光华科技": "stock", +"605001": "stock", +"威奥股份": "stock", +"600770": "stock", +"综艺股份": "stock", +"300337": "stock", +"银邦股份": "stock", +"000753": "stock", +"漳州发展": "stock", +"300665": "stock", +"飞鹿股份": "stock", +"002385": "stock", +"大北农": "stock", +"603657": "stock", +"春光科技": "stock", +"873527": "stock", +"夜光明": "stock", +"600259": "stock", +"广晟有色": "stock", +"300425": "stock", +"中建环能": "stock", +"430476": "stock", +"海能技术": "stock", +"600393": "stock", +"ST粤泰": "stock", +"002547": "stock", +"春兴精工": "stock", +"300912": "stock", +"凯龙高科": "stock", +"688277": "stock", +"天智航-U": "stock", +"300614": "stock", +"百川畅银": "stock", +"002770": "stock", +"科迪退": "stock", +"300823": "stock", +"建科机械": "stock", +"832471": "stock", +"美邦科技": "stock", +"002970": "stock", +"锐明技术": "stock", +"603977": "stock", +"国泰集团": "stock", +"002866": "stock", +"传艺科技": "stock", +"688297": "stock", +"中无人机": "stock", +"688322": "stock", +"奥比中光-UW": "stock", +"002993": "stock", +"奥海科技": "stock", +"300908": "stock", +"仲景食品": "stock", +"600168": "stock", +"武汉控股": "stock", +"600973": "stock", +"宝胜股份": "stock", +"300273": "stock", +"和佳退": "stock", +"688511": "stock", +"天微电子": "stock", +"603788": "stock", +"宁波高发": "stock", +"002739": "stock", +"万达电影": "stock", +"300676": "stock", +"华大基因": "stock", +"002883": "stock", +"中设股份": "stock", +"300617": "stock", +"安靠智电": "stock", +"003041": "stock", +"真爱美家": "stock", +"603607": "stock", +"京华激光": "stock", +"603995": "stock", +"甬金股份": "stock", +"002935": "stock", +"天奥电子": "stock", +"000759": "stock", +"中百集团": "stock", +"002412": "stock", +"汉森制药": "stock", +"002787": "stock", +"华源控股": "stock", +"601558": "stock", +"退市锐电": "stock", +"603613": "stock", +"国联股份": "stock", +"300793": "stock", +"佳禾智能": "stock", +"000885": "stock", +"城发环境": "stock", +"600857": "stock", +"宁波中百": "stock", +"600860": "stock", +"京城股份": "stock", +"000005": "stock", +"ST星源": "stock", +"601368": "stock", +"绿城水务": "stock", +"000916": "stock", +"华北高速": "stock", +"000598": "stock", +"兴蓉环境": "stock", +"002091": "stock", +"江苏国泰": "stock", +"000100": "stock", +"TCL科技": "stock", +"301053": "stock", +"远信工业": "stock", +"603508": "stock", +"思维列控": "stock", +"603879": "stock", +"永悦科技": "stock", +"688568": "stock", +"中科星图": "stock", +"603999": "stock", +"读者传媒": "stock", +"601696": "stock", +"中银证券": "stock", +"837821": "stock", +"则成电子": "stock", +"603232": "stock", +"格尔软件": "stock", +"600212": "stock", +"绿能慧充": "stock", +"002243": "stock", +"力合科创": "stock", +"603338": "stock", +"浙江鼎力": "stock", +"002152": "stock", +"广电运通": "stock", +"300373": "stock", +"扬杰科技": "stock", +"002073": "stock", +"软控股份": "stock", +"002384": "stock", +"东山精密": "stock", +"603308": "stock", +"应流股份": "stock", +"600759": "stock", +"*ST洲际": "stock", +"688308": "stock", +"欧科亿": "stock", +"600758": "stock", +"辽宁能源": "stock", +"603136": "stock", +"天目湖": "stock", +"300903": "stock", +"科翔股份": "stock", +"688608": "stock", +"恒玄科技": "stock", +"600176": "stock", +"中国巨石": "stock", +"832149": "stock", +"利尔达": "stock", +"002949": "stock", +"华阳国际": "stock", +"300648": "stock", +"星云股份": "stock", +"002530": "stock", +"金财互联": "stock", +"300638": "stock", +"广和通": "stock", +"002655": "stock", +"共达电声": "stock", +"301157": "stock", +"华塑科技": "stock", +"300138": "stock", +"晨光生物": "stock", +"603119": "stock", +"浙江荣泰": "stock", +"002453": "stock", +"华软科技": "stock", +"300219": "stock", +"鸿利智汇": "stock", +"600328": "stock", +"中盐化工": "stock", +"000961": "stock", +"中南建设": "stock", +"002210": "stock", +"飞马国际": "stock", +"605333": "stock", +"沪光股份": "stock", +"605077": "stock", +"华康股份": "stock", +"603808": "stock", +"歌力思": "stock", +"300589": "stock", +"江龙船艇": "stock", +"601949": "stock", +"中国出版": "stock", +"002301": "stock", +"齐心集团": "stock", +"300097": "stock", +"智云股份": "stock", +"833230": "stock", +"欧康医药": "stock", +"600086": "stock", +"退市金钰": "stock", +"605337": "stock", +"李子园": "stock", +"000572": "stock", +"海马汽车": "stock", +"601226": "stock", +"华电重工": "stock", +"300254": "stock", +"仟源医药": "stock", +"830832": "stock", +"齐鲁华信": "stock", +"000835": "stock", +"长动退": "stock", +"600991": "stock", +"广汽长丰": "stock", +"300199": "stock", +"翰宇药业": "stock", +"000612": "stock", +"焦作万方": "stock", +"603727": "stock", +"博迈科": "stock", +"837748": "stock", +"路桥信息": "stock", +"002696": "stock", +"百洋股份": "stock", +"000856": "stock", +"冀东装备": "stock", +"300673": "stock", +"佩蒂股份": "stock", +"300309": "stock", +"吉艾退": "stock", +"603861": "stock", +"白云电器": "stock", +"003000": "stock", +"劲仔食品": "stock", +"603105": "stock", +"芯能科技": "stock", +"301428": "stock", +"世纪恒通": "stock", +"301337": "stock", +"亚华电子": "stock", +"600646": "stock", +"ST国嘉": "stock", +"601766": "stock", +"中国中车": "stock", +"600193": "stock", +"创兴资源": "stock", +"002771": "stock", +"真视通": "stock", +"600432": "stock", +"退市吉恩": "stock", +"300475": "stock", +"香农芯创": "stock", +"002229": "stock", +"鸿博股份": "stock", +"002164": "stock", +"宁波东力": "stock", +"000913": "stock", +"钱江摩托": "stock", +"300545": "stock", +"联得装备": "stock", +"300788": "stock", +"中信出版": "stock", +"000716": "stock", +"黑芝麻": "stock", +"301362": "stock", +"民爆光电": "stock", +"000702": "stock", +"正虹科技": "stock", +"688147": "stock", +"微导纳米": "stock", +"831726": "stock", +"朱老六": "stock", +"000402": "stock", +"金融街": "stock", +"301141": "stock", +"中科磁业": "stock", +"300608": "stock", +"思特奇": "stock", +"000019": "stock", +"深粮控股": "stock", +"300528": "stock", +"幸福蓝海": "stock", +"605358": "stock", +"立昂微": "stock", +"000937": "stock", +"冀中能源": "stock", +"600933": "stock", +"爱柯迪": "stock", +"600901": "stock", +"江苏金租": "stock", +"688093": "stock", +"世华科技": "stock", +"002358": "stock", +"森源电气": "stock", +"600847": "stock", +"万里股份": "stock", +"000600": "stock", +"建投能源": "stock", +"688097": "stock", +"博众精工": "stock", +"600425": "stock", +"青松建化": "stock", +"600771": "stock", +"广誉远": "stock", +"301178": "stock", +"天亿马": "stock", +"836263": "stock", +"中航泰达": "stock", +"605336": "stock", +"帅丰电器": "stock", +"002037": "stock", +"保利联合": "stock", +"002747": "stock", +"埃斯顿": "stock", +"000895": "stock", +"双汇发展": "stock", +"301103": "stock", +"何氏眼科": "stock", +"002660": "stock", +"茂硕电源": "stock", +"838163": "stock", +"方大新材": "stock", +"300102": "stock", +"乾照光电": "stock", +"688391": "stock", +"钜泉科技": "stock", +"600813": "stock", +"ST鞍一工": "stock", +"300124": "stock", +"汇川技术": "stock", +"002931": "stock", +"锋龙股份": "stock", +"835670": "stock", +"数字人": "stock", +"002500": "stock", +"山西证券": "stock", +"300519": "stock", +"新光药业": "stock", +"300292": "stock", +"吴通控股": "stock", +"002709": "stock", +"天赐材料": "stock", +"601139": "stock", +"深圳燃气": "stock", +"600068": "stock", +"葛洲坝": "stock", +"600873": "stock", +"梅花生物": "stock", +"601678": "stock", +"滨化股份": "stock", +"600795": "stock", +"国电电力": "stock", +"603171": "stock", +"税友股份": "stock", +"688166": "stock", +"博瑞医药": "stock", +"600736": "stock", +"苏州高新": "stock", +"300792": "stock", +"壹网壹创": "stock", +"600415": "stock", +"小商品城": "stock", +"002667": "stock", +"威领股份": "stock", +"836957": "stock", +"汉维科技": "stock", +"300659": "stock", +"中孚信息": "stock", +"603037": "stock", +"凯众股份": "stock", +"002364": "stock", +"中恒电气": "stock", +"300308": "stock", +"中际旭创": "stock", +"605488": "stock", +"福莱新材": "stock", +"688551": "stock", +"科威尔": "stock", +"300318": "stock", +"博晖创新": "stock", +"600634": "stock", +"退市富控": "stock", +"300961": "stock", +"深水海纳": "stock", +"600610": "stock", +"中毅达": "stock", +"300296": "stock", +"利亚德": "stock", +"603589": "stock", +"口子窖": "stock", +"600689": "stock", +"上海三毛": "stock", +"002506": "stock", +"协鑫集成": "stock", +"600893": "stock", +"航发动力": "stock", +"301219": "stock", +"腾远钴业": "stock", +"688513": "stock", +"苑东生物": "stock", +"002937": "stock", +"兴瑞科技": "stock", +"688523": "stock", +"航天环宇": "stock", +"002317": "stock", +"众生药业": "stock", +"301357": "stock", +"北方长龙": "stock", +"688386": "stock", +"泛亚微透": "stock", +"600172": "stock", +"黄河旋风": "stock", +"600797": "stock", +"浙大网新": "stock", +"603688": "stock", +"石英股份": "stock", +"002616": "stock", +"长青集团": "stock", +"300960": "stock", +"通业科技": "stock", +"000050": "stock", +"深天马A": "stock", +"002263": "stock", +"大东南": "stock", +"603686": "stock", +"福龙马": "stock", +"002835": "stock", +"同为股份": "stock", +"300229": "stock", +"拓尔思": "stock", +"002983": "stock", +"芯瑞达": "stock", +"002436": "stock", +"兴森科技": "stock", +"301283": "stock", +"聚胶股份": "stock", +"002879": "stock", +"长缆科技": "stock", +"002075": "stock", +"沙钢股份": "stock", +"600570": "stock", +"恒生电子": "stock", +"002856": "stock", +"美芝股份": "stock", +"688512": "stock", +"慧智微-U": "stock", +"300312": "stock", +"邦讯退": "stock", +"603663": "stock", +"三祥新材": "stock", +"002722": "stock", +"物产金轮": "stock", +"002881": "stock", +"美格智能": "stock", +"688230": "stock", +"芯导科技": "stock", +"603725": "stock", +"天安新材": "stock", +"605050": "stock", +"福然德": "stock", +"831961": "stock", +"创远信科": "stock", +"603713": "stock", +"密尔克卫": "stock", +"301290": "stock", +"东星医疗": "stock", +"002876": "stock", +"三利谱": "stock", +"300121": "stock", +"阳谷华泰": "stock", +"601963": "stock", +"重庆银行": "stock", +"003008": "stock", +"开普检测": "stock", +"600281": "stock", +"华阳新材": "stock", +"603075": "stock", +"热威股份": "stock", +"600365": "stock", +"ST通葡": "stock", +"301314": "stock", +"科瑞思": "stock", +"300241": "stock", +"瑞丰光电": "stock", +"301060": "stock", +"兰卫医学": "stock", +"605299": "stock", +"舒华体育": "stock", +"003038": "stock", +"鑫铂股份": "stock", +"688233": "stock", +"神工股份": "stock", +"603825": "stock", +"华扬联众": "stock", +"300445": "stock", +"康斯特": "stock", +"300716": "stock", +"泉为科技": "stock", +"300485": "stock", +"赛升药业": "stock", +"603040": "stock", +"新坐标": "stock", +"300339": "stock", +"润和软件": "stock", +"000522": "stock", +"白云山A": "stock", +"301019": "stock", +"宁波色母": "stock", +"600585": "stock", +"海螺水泥": "stock", +"301335": "stock", +"天元宠物": "stock", +"300880": "stock", +"迦南智能": "stock", +"688189": "stock", +"南新制药": "stock", +"600966": "stock", +"博汇纸业": "stock", +"833781": "stock", +"瑞奇智造": "stock", +"000793": "stock", +"华闻集团": "stock", +"002730": "stock", +"电光科技": "stock", +"002111": "stock", +"威海广泰": "stock", +"300473": "stock", +"德尔股份": "stock", +"000032": "stock", +"深桑达A": "stock", +"600657": "stock", +"信达地产": "stock", +"002759": "stock", +"天际股份": "stock", +"300468": "stock", +"四方精创": "stock", +"603172": "stock", +"万丰股份": "stock", +"002977": "stock", +"天箭科技": "stock", +"600639": "stock", +"浦东金桥": "stock", +"603506": "stock", +"南都物业": "stock", +"002849": "stock", +"威星智能": "stock", +"833454": "stock", +"同心传动": "stock", +"001330": "stock", +"博纳影业": "stock", +"王": "firstnm", +"李": "firstnm", +"张": "firstnm", +"刘": "firstnm", +"陈": "firstnm", +"杨": "firstnm", +"黄": "firstnm", +"吴": "firstnm", +"赵": "firstnm", +"周": "firstnm", +"徐": "firstnm", +"孙": "firstnm", +"马": "firstnm", +"朱": "firstnm", +"胡": "firstnm", +"林": "firstnm", +"郭": "firstnm", +"何": "firstnm", +"高": "firstnm", +"罗": "firstnm", +"郑": "firstnm", +"梁": "firstnm", +"谢": "firstnm", +"宋": "firstnm", +"唐": "firstnm", +"许": "firstnm", +"邓": "firstnm", +"冯": "firstnm", +"韩": "firstnm", +"曹": "firstnm", +"曾": "firstnm", +"彭": "firstnm", +"肖": "firstnm", +"蔡": "firstnm", +"潘": "firstnm", +"田": "firstnm", +"董": "firstnm", +"袁": "firstnm", +"于": "firstnm", +"余": "firstnm", +"蒋": "firstnm", +"叶": "firstnm", +"杜": "firstnm", +"苏": "firstnm", +"魏": "firstnm", +"程": "firstnm", +"吕": "firstnm", +"丁": "firstnm", +"沈": "firstnm", +"任": "firstnm", +"姚": "firstnm", +"卢": "firstnm", +"钟": "firstnm", +"姜": "firstnm", +"崔": "firstnm", +"谭": "firstnm", +"廖": "firstnm", +"范": "firstnm", +"汪": "firstnm", +"陆": "firstnm", +"金": "firstnm", +"石": "firstnm", +"戴": "firstnm", +"贾": "firstnm", +"韦": "firstnm", +"夏": "firstnm", +"邱": "firstnm", +"方": "firstnm", +"侯": "firstnm", +"邹": "firstnm", +"熊": "firstnm", +"孟": "firstnm", +"秦": "firstnm", +"白": "firstnm", +"毛": "firstnm", +"江": "firstnm", +"闫": "firstnm", +"薛": "firstnm", +"尹": "firstnm", +"付": "firstnm", +"段": "firstnm", +"雷": "firstnm", +"黎": "firstnm", +"史": "firstnm", +"龙": "firstnm", +"钱": "firstnm", +"贺": "firstnm", +"陶": "firstnm", +"顾": "firstnm", +"龚": "firstnm", +"郝": "firstnm", +"邵": "firstnm", +"万": "firstnm", +"严": "firstnm", +"洪": "firstnm", +"赖": "firstnm", +"武": "firstnm", +"傅": "firstnm", +"莫": "firstnm", +"孔": "firstnm", +"汤": "firstnm", +"向": "firstnm", +"常": "firstnm", +"温": "firstnm", +"康": "firstnm", +"施": "firstnm", +"文": "firstnm", +"牛": "firstnm", +"樊": "firstnm", +"葛": "firstnm", +"邢": "firstnm", +"安": "firstnm", +"齐": "firstnm", +"易": "firstnm", +"乔": "firstnm", +"伍": "firstnm", +"庞": "firstnm", +"颜": "firstnm", +"倪": "firstnm", +"庄": "firstnm", +"聂": "firstnm", +"章": "firstnm", +"鲁": "firstnm", +"岳": "firstnm", +"翟": "firstnm", +"申": "firstnm", +"殷": "firstnm", +"詹": "firstnm", +"欧": "firstnm", +"耿": "firstnm", +"关": "firstnm", +"覃": "firstnm", +"兰": "firstnm", +"焦": "firstnm", +"俞": "firstnm", +"左": "firstnm", +"柳": "firstnm", +"甘": "firstnm", +"祝": "firstnm", +"包": "firstnm", +"代": "firstnm", +"宁": "firstnm", +"符": "firstnm", +"阮": "firstnm", +"尚": "firstnm", +"舒": "firstnm", +"纪": "firstnm", +"柯": "firstnm", +"梅": "firstnm", +"童": "firstnm", +"毕": "firstnm", +"凌": "firstnm", +"单": "firstnm", +"季": "firstnm", +"成": "firstnm", +"霍": "firstnm", +"苗": "firstnm", +"裴": "firstnm", +"涂": "firstnm", +"谷": "firstnm", +"曲": "firstnm", +"盛": "firstnm", +"冉": "firstnm", +"翁": "firstnm", +"蓝": "firstnm", +"骆": "firstnm", +"路": "firstnm", +"游": "firstnm", +"靳": "firstnm", +"辛": "firstnm", +"管": "firstnm", +"柴": "firstnm", +"蒙": "firstnm", +"鲍": "firstnm", +"华": "firstnm", +"喻": "firstnm", +"祁": "firstnm", +"房": "firstnm", +"蒲": "firstnm", +"滕": "firstnm", +"萧": "firstnm", +"屈": "firstnm", +"饶": "firstnm", +"解": "firstnm", +"牟": "firstnm", +"艾": "firstnm", +"尤": "firstnm", +"时": "firstnm", +"阳": "firstnm", +"阎": "firstnm", +"穆": "firstnm", +"应": "firstnm", +"农": "firstnm", +"司": "firstnm", +"古": "firstnm", +"吉": "firstnm", +"卓": "firstnm", +"车": "firstnm", +"简": "firstnm", +"连": "firstnm", +"缪": "firstnm", +"项": "firstnm", +"麦": "firstnm", +"褚": "firstnm", +"窦": "firstnm", +"娄": "firstnm", +"戚": "firstnm", +"岑": "firstnm", +"党": "firstnm", +"宫": "firstnm", +"景": "firstnm", +"卜": "firstnm", +"费": "firstnm", +"冷": "firstnm", +"晏": "firstnm", +"卫": "firstnm", +"席": "firstnm", +"柏": "firstnm", +"米": "firstnm", +"隋": "firstnm", +"宗": "firstnm", +"桂": "firstnm", +"瞿": "firstnm", +"全": "firstnm", +"苟": "firstnm", +"楼": "firstnm", +"闵": "firstnm", +"佟": "firstnm", +"臧": "firstnm", +"边": "firstnm", +"卞": "firstnm", +"姬": "firstnm", +"邬": "firstnm", +"和": "firstnm", +"师": "firstnm", +"仇": "firstnm", +"栾": "firstnm", +"丘": "firstnm", +"刁": "firstnm", +"沙": "firstnm", +"商": "firstnm", +"寇": "firstnm", +"荣": "firstnm", +"巫": "firstnm", +"郎": "firstnm", +"桑": "firstnm", +"丛": "firstnm", +"甄": "firstnm", +"敖": "firstnm", +"虞": "firstnm", +"仲": "firstnm", +"池": "firstnm", +"巩": "firstnm", +"明": "firstnm", +"佘": "firstnm", +"查": "firstnm", +"麻": "firstnm", +"苑": "firstnm", +"迟": "firstnm", +"邝": "firstnm", +"封": "firstnm", +"官": "firstnm", +"谈": "firstnm", +"鞠": "firstnm", +"匡": "firstnm", +"惠": "firstnm", +"荆": "firstnm", +"乐": "firstnm", +"冀": "firstnm", +"胥": "firstnm", +"郁": "firstnm", +"南": "firstnm", +"班": "firstnm", +"储": "firstnm", +"芦": "firstnm", +"原": "firstnm", +"栗": "firstnm", +"燕": "firstnm", +"楚": "firstnm", +"鄢": "firstnm", +"扬": "firstnm", +"劳": "firstnm", +"谌": "firstnm", +"奚": "firstnm", +"皮": "firstnm", +"蔺": "firstnm", +"粟": "firstnm", +"冼": "firstnm", +"盘": "firstnm", +"满": "firstnm", +"闻": "firstnm", +"厉": "firstnm", +"伊": "firstnm", +"候": "firstnm", +"仝": "firstnm", +"百里": "firstnm", +"淳于": "firstnm", +"澹台": "firstnm", +"第五": "firstnm", +"东方": "firstnm", +"独孤": "firstnm", +"端木": "firstnm", +"段干": "firstnm", +"公孙": "firstnm", +"公西": "firstnm", +"公羊": "firstnm", +"公冶": "firstnm", +"赫连": "firstnm", +"呼延": "firstnm", +"皇甫": "firstnm", +"乐正": "firstnm", +"冷狐": "firstnm", +"令狐": "firstnm", +"刘付": "firstnm", +"刘傅": "firstnm", +"闾丘": "firstnm", +"慕容": "firstnm", +"纳兰": "firstnm", +"南宫": "firstnm", +"南门": "firstnm", +"殴阳": "firstnm", +"濮阳": "firstnm", +"亓官": "firstnm", +"上官": "firstnm", +"申屠": "firstnm", +"司空": "firstnm", +"司寇": "firstnm", +"司马": "firstnm", +"司徒": "firstnm", +"太史": "firstnm", +"太叔": "firstnm", +"拓跋": "firstnm", +"完颜": "firstnm", +"万俟": "firstnm", +"尉迟": "firstnm", +"闻人": "firstnm", +"巫马": "firstnm", +"西门": "firstnm", +"夏侯": "firstnm", +"夏候": "firstnm", +"鲜于": "firstnm", +"轩辕": "firstnm", +"宇文": "firstnm", +"长孙": "firstnm", +"钟离": "firstnm", +"仲孙": "firstnm", +"诸葛": "firstnm", +"颛孙": "firstnm", +"宗政": "firstnm", +"左丘": "firstnm" +} diff --git a/rag/settings.py b/rag/settings.py index 3b4ce1b4ee2edc52bd88af9094bef91baa305c23..50053787bd2c90236716325763a9e3d8f07c71e3 100644 --- a/rag/settings.py +++ b/rag/settings.py @@ -1,55 +1,55 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -from api.utils import get_base_config, decrypt_database_config -from api.utils.file_utils import get_project_base_directory -from api.utils.log_utils import LoggerFactory, getLogger - - -# Server -RAG_CONF_PATH = os.path.join(get_project_base_directory(), "conf") -SUBPROCESS_STD_LOG_NAME = "std.log" - -ES = get_base_config("es", {}) -MINIO = decrypt_database_config(name="minio") -try: - REDIS = decrypt_database_config(name="redis") -except Exception as e: - REDIS = {} - pass -DOC_MAXIMUM_SIZE = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024)) - -# Logger -LoggerFactory.set_directory( - os.path.join( - get_project_base_directory(), - "logs", - "rag")) -# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0} -LoggerFactory.LEVEL = 30 - -es_logger = getLogger("es") -minio_logger = getLogger("minio") -cron_logger = getLogger("cron_logger") -cron_logger.setLevel(20) -chunk_logger = getLogger("chunk_logger") -database_logger = getLogger("database") - -SVR_QUEUE_NAME = "rag_flow_svr_queue" -SVR_QUEUE_RETENTION = 60*60 -SVR_QUEUE_MAX_LEN = 1024 -SVR_CONSUMER_NAME = "rag_flow_svr_consumer" -SVR_CONSUMER_GROUP_NAME = "rag_flow_svr_consumer_group" +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +from api.utils import get_base_config, decrypt_database_config +from api.utils.file_utils import get_project_base_directory +from api.utils.log_utils import LoggerFactory, getLogger + + +# Server +RAG_CONF_PATH = os.path.join(get_project_base_directory(), "conf") +SUBPROCESS_STD_LOG_NAME = "std.log" + +ES = get_base_config("es", {}) +MINIO = decrypt_database_config(name="minio") +try: + REDIS = decrypt_database_config(name="redis") +except Exception as e: + REDIS = {} + pass +DOC_MAXIMUM_SIZE = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024)) + +# Logger +LoggerFactory.set_directory( + os.path.join( + get_project_base_directory(), + "logs", + "rag")) +# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0} +LoggerFactory.LEVEL = 30 + +es_logger = getLogger("es") +minio_logger = getLogger("minio") +cron_logger = getLogger("cron_logger") +cron_logger.setLevel(20) +chunk_logger = getLogger("chunk_logger") +database_logger = getLogger("database") + +SVR_QUEUE_NAME = "rag_flow_svr_queue" +SVR_QUEUE_RETENTION = 60*60 +SVR_QUEUE_MAX_LEN = 1024 +SVR_CONSUMER_NAME = "rag_flow_svr_consumer" +SVR_CONSUMER_GROUP_NAME = "rag_flow_svr_consumer_group" diff --git a/rag/svr/cache_file_svr.py b/rag/svr/cache_file_svr.py index dcd6778982919c0753a614197b726f36478899e1..caa9ce5303ba89379a5962779c648f479f2be269 100644 --- a/rag/svr/cache_file_svr.py +++ b/rag/svr/cache_file_svr.py @@ -1,59 +1,59 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import random -import time -import traceback - -from api.db.db_models import close_connection -from api.db.services.task_service import TaskService -from rag.settings import cron_logger -from rag.utils.minio_conn import MINIO -from rag.utils.redis_conn import REDIS_CONN - - -def collect(): - doc_locations = TaskService.get_ongoing_doc_name() - print(doc_locations) - if len(doc_locations) == 0: - time.sleep(1) - return - return doc_locations - -def main(): - locations = collect() - if not locations:return - print("TASKS:", len(locations)) - for kb_id, loc in locations: - try: - if REDIS_CONN.is_alive(): - try: - key = "{}/{}".format(kb_id, loc) - if REDIS_CONN.exist(key):continue - file_bin = MINIO.get(kb_id, loc) - REDIS_CONN.transaction(key, file_bin, 12 * 60) - cron_logger.info("CACHE: {}".format(loc)) - except Exception as e: - traceback.print_stack(e) - except Exception as e: - traceback.print_stack(e) - - - -if __name__ == "__main__": - while True: - main() - close_connection() +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import random +import time +import traceback + +from api.db.db_models import close_connection +from api.db.services.task_service import TaskService +from rag.settings import cron_logger +from rag.utils.minio_conn import MINIO +from rag.utils.redis_conn import REDIS_CONN + + +def collect(): + doc_locations = TaskService.get_ongoing_doc_name() + print(doc_locations) + if len(doc_locations) == 0: + time.sleep(1) + return + return doc_locations + +def main(): + locations = collect() + if not locations:return + print("TASKS:", len(locations)) + for kb_id, loc in locations: + try: + if REDIS_CONN.is_alive(): + try: + key = "{}/{}".format(kb_id, loc) + if REDIS_CONN.exist(key):continue + file_bin = MINIO.get(kb_id, loc) + REDIS_CONN.transaction(key, file_bin, 12 * 60) + cron_logger.info("CACHE: {}".format(loc)) + except Exception as e: + traceback.print_stack(e) + except Exception as e: + traceback.print_stack(e) + + + +if __name__ == "__main__": + while True: + main() + close_connection() time.sleep(1) \ No newline at end of file diff --git a/rag/svr/discord_svr.py b/rag/svr/discord_svr.py index 85ec98a7b1e700c4c9fed58e7f26eacc54403ed8..5426826c99ae003dbf725a2132cc2ff93162251a 100644 --- a/rag/svr/discord_svr.py +++ b/rag/svr/discord_svr.py @@ -1,80 +1,80 @@ -# -# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import discord -import requests -import base64 -import asyncio - -URL = '{YOUR_IP_ADDRESS:PORT}/v1/api/completion_aibotk' # Default: https://demo.ragflow.io/v1/api/completion_aibotk - -JSON_DATA = { - "conversation_id": "xxxxxxxxxxxxxxxxxxxxxxxxxxx", # Get conversation id from /api/new_conversation - "Authorization": "ragflow-xxxxxxxxxxxxxxxxxxxxxxxxxxxxx", # RAGFlow Assistant Chat Bot API Key - "word": "" # User question, don't need to initialize -} - -DISCORD_BOT_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxx" #Get DISCORD_BOT_KEY from Discord Application - - -intents = discord.Intents.default() -intents.message_content = True -client = discord.Client(intents=intents) - - -@client.event -async def on_ready(): - print(f'We have logged in as {client.user}') - - -@client.event -async def on_message(message): - if message.author == client.user: - return - - if client.user.mentioned_in(message): - - if len(message.content.split('> ')) == 1: - await message.channel.send("Hi~ How can I help you? ") - else: - JSON_DATA['word']=message.content.split('> ')[1] - response = requests.post(URL, json=JSON_DATA) - response_data = response.json().get('data', []) - image_bool = False - - for i in response_data: - if i['type'] == 1: - res = i['content'] - if i['type'] == 3: - image_bool = True - image_data = base64.b64decode(i['url']) - with open('tmp_image.png','wb') as file: - file.write(image_data) - image= discord.File('tmp_image.png') - - await message.channel.send(f"{message.author.mention}{res}") - - if image_bool: - await message.channel.send(file=image) - - -loop = asyncio.get_event_loop() - -try: - loop.run_until_complete(client.start(DISCORD_BOT_KEY)) -except KeyboardInterrupt: - loop.run_until_complete(client.close()) -finally: - loop.close() +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import discord +import requests +import base64 +import asyncio + +URL = '{YOUR_IP_ADDRESS:PORT}/v1/api/completion_aibotk' # Default: https://demo.ragflow.io/v1/api/completion_aibotk + +JSON_DATA = { + "conversation_id": "xxxxxxxxxxxxxxxxxxxxxxxxxxx", # Get conversation id from /api/new_conversation + "Authorization": "ragflow-xxxxxxxxxxxxxxxxxxxxxxxxxxxxx", # RAGFlow Assistant Chat Bot API Key + "word": "" # User question, don't need to initialize +} + +DISCORD_BOT_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxx" #Get DISCORD_BOT_KEY from Discord Application + + +intents = discord.Intents.default() +intents.message_content = True +client = discord.Client(intents=intents) + + +@client.event +async def on_ready(): + print(f'We have logged in as {client.user}') + + +@client.event +async def on_message(message): + if message.author == client.user: + return + + if client.user.mentioned_in(message): + + if len(message.content.split('> ')) == 1: + await message.channel.send("Hi~ How can I help you? ") + else: + JSON_DATA['word']=message.content.split('> ')[1] + response = requests.post(URL, json=JSON_DATA) + response_data = response.json().get('data', []) + image_bool = False + + for i in response_data: + if i['type'] == 1: + res = i['content'] + if i['type'] == 3: + image_bool = True + image_data = base64.b64decode(i['url']) + with open('tmp_image.png','wb') as file: + file.write(image_data) + image= discord.File('tmp_image.png') + + await message.channel.send(f"{message.author.mention}{res}") + + if image_bool: + await message.channel.send(file=image) + + +loop = asyncio.get_event_loop() + +try: + loop.run_until_complete(client.start(DISCORD_BOT_KEY)) +except KeyboardInterrupt: + loop.run_until_complete(client.close()) +finally: + loop.close() diff --git a/rag/utils/redis_conn.py b/rag/utils/redis_conn.py index 7d6dd4655ba45b51fc30dbbd442c142e50fb0643..af78ece3c15d7bb5c4442e65844de2889c267120 100644 --- a/rag/utils/redis_conn.py +++ b/rag/utils/redis_conn.py @@ -1,150 +1,150 @@ -import json - -import redis -import logging -from rag import settings -from rag.utils import singleton - - -class Payload: - def __init__(self, consumer, queue_name, group_name, msg_id, message): - self.__consumer = consumer - self.__queue_name = queue_name - self.__group_name = group_name - self.__msg_id = msg_id - self.__message = json.loads(message['message']) - - def ack(self): - try: - self.__consumer.xack(self.__queue_name, self.__group_name, self.__msg_id) - return True - except Exception as e: - logging.warning("[EXCEPTION]ack" + str(self.__queue_name) + "||" + str(e)) - return False - - def get_message(self): - return self.__message - - -@singleton -class RedisDB: - def __init__(self): - self.REDIS = None - self.config = settings.REDIS - self.__open__() - - def __open__(self): - try: - self.REDIS = redis.StrictRedis(host=self.config["host"].split(":")[0], - port=int(self.config.get("host", ":6379").split(":")[1]), - db=int(self.config.get("db", 1)), - password=self.config.get("password"), - decode_responses=True) - except Exception as e: - logging.warning("Redis can't be connected.") - return self.REDIS - - def health(self): - - self.REDIS.ping() - a, b = 'xx', 'yy' - self.REDIS.set(a, b, 3) - - if self.REDIS.get(a) == b: - return True - - def is_alive(self): - return self.REDIS is not None - - def exist(self, k): - if not self.REDIS: return - try: - return self.REDIS.exists(k) - except Exception as e: - logging.warning("[EXCEPTION]exist" + str(k) + "||" + str(e)) - self.__open__() - - def get(self, k): - if not self.REDIS: return - try: - return self.REDIS.get(k) - except Exception as e: - logging.warning("[EXCEPTION]get" + str(k) + "||" + str(e)) - self.__open__() - - def set_obj(self, k, obj, exp=3600): - try: - self.REDIS.set(k, json.dumps(obj, ensure_ascii=False), exp) - return True - except Exception as e: - logging.warning("[EXCEPTION]set_obj" + str(k) + "||" + str(e)) - self.__open__() - return False - - def set(self, k, v, exp=3600): - try: - self.REDIS.set(k, v, exp) - return True - except Exception as e: - logging.warning("[EXCEPTION]set" + str(k) + "||" + str(e)) - self.__open__() - return False - - def transaction(self, key, value, exp=3600): - try: - pipeline = self.REDIS.pipeline(transaction=True) - pipeline.set(key, value, exp, nx=True) - pipeline.execute() - return True - except Exception as e: - logging.warning("[EXCEPTION]set" + str(key) + "||" + str(e)) - self.__open__() - return False - - def queue_product(self, queue, message, exp=settings.SVR_QUEUE_RETENTION) -> bool: - for _ in range(3): - try: - payload = {"message": json.dumps(message)} - pipeline = self.REDIS.pipeline() - pipeline.xadd(queue, payload) - pipeline.expire(queue, exp) - pipeline.execute() - return True - except Exception as e: - print(e) - logging.warning("[EXCEPTION]producer" + str(queue) + "||" + str(e)) - return False - - def queue_consumer(self, queue_name, group_name, consumer_name, msg_id=b">") -> Payload: - try: - group_info = self.REDIS.xinfo_groups(queue_name) - if not any(e["name"] == group_name for e in group_info): - self.REDIS.xgroup_create( - queue_name, - group_name, - id="0", - mkstream=True - ) - args = { - "groupname": group_name, - "consumername": consumer_name, - "count": 1, - "block": 10000, - "streams": {queue_name: msg_id}, - } - messages = self.REDIS.xreadgroup(**args) - if not messages: - return None - stream, element_list = messages[0] - msg_id, payload = element_list[0] - res = Payload(self.REDIS, queue_name, group_name, msg_id, payload) - return res - except Exception as e: - if 'key' in str(e): - pass - else: - logging.warning("[EXCEPTION]consumer" + str(queue_name) + "||" + str(e)) - return None - - -REDIS_CONN = RedisDB() +import json + +import redis +import logging +from rag import settings +from rag.utils import singleton + + +class Payload: + def __init__(self, consumer, queue_name, group_name, msg_id, message): + self.__consumer = consumer + self.__queue_name = queue_name + self.__group_name = group_name + self.__msg_id = msg_id + self.__message = json.loads(message['message']) + + def ack(self): + try: + self.__consumer.xack(self.__queue_name, self.__group_name, self.__msg_id) + return True + except Exception as e: + logging.warning("[EXCEPTION]ack" + str(self.__queue_name) + "||" + str(e)) + return False + + def get_message(self): + return self.__message + + +@singleton +class RedisDB: + def __init__(self): + self.REDIS = None + self.config = settings.REDIS + self.__open__() + + def __open__(self): + try: + self.REDIS = redis.StrictRedis(host=self.config["host"].split(":")[0], + port=int(self.config.get("host", ":6379").split(":")[1]), + db=int(self.config.get("db", 1)), + password=self.config.get("password"), + decode_responses=True) + except Exception as e: + logging.warning("Redis can't be connected.") + return self.REDIS + + def health(self): + + self.REDIS.ping() + a, b = 'xx', 'yy' + self.REDIS.set(a, b, 3) + + if self.REDIS.get(a) == b: + return True + + def is_alive(self): + return self.REDIS is not None + + def exist(self, k): + if not self.REDIS: return + try: + return self.REDIS.exists(k) + except Exception as e: + logging.warning("[EXCEPTION]exist" + str(k) + "||" + str(e)) + self.__open__() + + def get(self, k): + if not self.REDIS: return + try: + return self.REDIS.get(k) + except Exception as e: + logging.warning("[EXCEPTION]get" + str(k) + "||" + str(e)) + self.__open__() + + def set_obj(self, k, obj, exp=3600): + try: + self.REDIS.set(k, json.dumps(obj, ensure_ascii=False), exp) + return True + except Exception as e: + logging.warning("[EXCEPTION]set_obj" + str(k) + "||" + str(e)) + self.__open__() + return False + + def set(self, k, v, exp=3600): + try: + self.REDIS.set(k, v, exp) + return True + except Exception as e: + logging.warning("[EXCEPTION]set" + str(k) + "||" + str(e)) + self.__open__() + return False + + def transaction(self, key, value, exp=3600): + try: + pipeline = self.REDIS.pipeline(transaction=True) + pipeline.set(key, value, exp, nx=True) + pipeline.execute() + return True + except Exception as e: + logging.warning("[EXCEPTION]set" + str(key) + "||" + str(e)) + self.__open__() + return False + + def queue_product(self, queue, message, exp=settings.SVR_QUEUE_RETENTION) -> bool: + for _ in range(3): + try: + payload = {"message": json.dumps(message)} + pipeline = self.REDIS.pipeline() + pipeline.xadd(queue, payload) + pipeline.expire(queue, exp) + pipeline.execute() + return True + except Exception as e: + print(e) + logging.warning("[EXCEPTION]producer" + str(queue) + "||" + str(e)) + return False + + def queue_consumer(self, queue_name, group_name, consumer_name, msg_id=b">") -> Payload: + try: + group_info = self.REDIS.xinfo_groups(queue_name) + if not any(e["name"] == group_name for e in group_info): + self.REDIS.xgroup_create( + queue_name, + group_name, + id="0", + mkstream=True + ) + args = { + "groupname": group_name, + "consumername": consumer_name, + "count": 1, + "block": 10000, + "streams": {queue_name: msg_id}, + } + messages = self.REDIS.xreadgroup(**args) + if not messages: + return None + stream, element_list = messages[0] + msg_id, payload = element_list[0] + res = Payload(self.REDIS, queue_name, group_name, msg_id, payload) + return res + except Exception as e: + if 'key' in str(e): + pass + else: + logging.warning("[EXCEPTION]consumer" + str(queue_name) + "||" + str(e)) + return None + + +REDIS_CONN = RedisDB() diff --git a/web/.gitignore b/web/.gitignore index 4cbac851d246e4a77f8a950a5205c792258a712f..dc9be38c3c8644a20bca86e6eee0768ef16db881 100644 --- a/web/.gitignore +++ b/web/.gitignore @@ -1,9 +1,9 @@ -/node_modules -/.env.local -/.umirc.local.ts -/config/config.local.ts -/src/.umi/* -/src/.umi-production/* -/src/.umi-test -/dist -.swc +/node_modules +/.env.local +/.umirc.local.ts +/config/config.local.ts +/src/.umi/* +/src/.umi-production/* +/src/.umi-test +/dist +.swc diff --git a/web/.npmrc b/web/.npmrc index 80b972bd8b32eb7832444e4048526dbee4f3a530..8f46dd2575db36faa55d4973525cef17b4f17a72 100644 --- a/web/.npmrc +++ b/web/.npmrc @@ -1,2 +1,2 @@ -registry=https://registry.npmmirror.com/ - +registry=https://registry.npmmirror.com/ + diff --git a/web/reducer.js b/web/reducer.js index a38a1bae259ddc264befaa6d0f4ab09868cb5eae..5a8d5cc6fb9b7f030d7bdc1992b52c7a0e3f96ae 100644 --- a/web/reducer.js +++ b/web/reducer.js @@ -1,27 +1,26 @@ -import React, { useReducer } from 'react' -const CHANGE_LOCALE = 'CHANGE_LOCALE' - -const mainContext = React.createContext() - -const reducer = (state, action) => { - switch (action.type) { - case CHANGE_LOCALE: - return { ...state, locale: action.locale || 'zh' } - default: - return state - } -} - -const ContextProvider = (props) => { - const [state, dispatch] = useReducer(reducer, { - locale: 'zh' - }) - return ( - - {props.children} - - ) -} - -export { reducer, mainContext, ContextProvider } - +import React, { useReducer } from 'react'; +const CHANGE_LOCALE = 'CHANGE_LOCALE'; + +const mainContext = React.createContext(); + +const reducer = (state, action) => { + switch (action.type) { + case CHANGE_LOCALE: + return { ...state, locale: action.locale || 'zh' }; + default: + return state; + } +}; + +const ContextProvider = (props) => { + const [state, dispatch] = useReducer(reducer, { + locale: 'zh', + }); + return ( + + {props.children} + + ); +}; + +export { ContextProvider, mainContext, reducer }; diff --git a/web/src/assets/svg/llm/gemini.svg b/web/src/assets/svg/llm/gemini.svg index 3f06bf3b5a18ba8a5ef42d49639f163afa57c7bf..91b0bf4cfcf477b2b3e15be32549b63e27a11bc6 100644 --- a/web/src/assets/svg/llm/gemini.svg +++ b/web/src/assets/svg/llm/gemini.svg @@ -1,114 +1,114 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/src/layouts/index.less b/web/src/layouts/index.less index 60d35219bb279c26816423c077d75d78f5856f43..342d2df81d6a851222ccdabf1ad53c418062c96d 100644 --- a/web/src/layouts/index.less +++ b/web/src/layouts/index.less @@ -1,27 +1,27 @@ -.navs { - ul { - padding: 0; - list-style: none; - display: flex; - } - - li { - margin-right: 1em; - } -} - -.layout { - height: 100vh; -} - -body { - margin: 0; -} - -.divider { - margin: 0; -} - -.clickAvailable { - cursor: pointer; -} +.navs { + ul { + padding: 0; + list-style: none; + display: flex; + } + + li { + margin-right: 1em; + } +} + +.layout { + height: 100vh; +} + +body { + margin: 0; +} + +.divider { + margin: 0; +} + +.clickAvailable { + cursor: pointer; +} diff --git a/web/src/layouts/index.tsx b/web/src/layouts/index.tsx index ded00fa81b59f1f8c92dc79ed9e4c7bd2d32ed35..a186cb5375d30c40b203430acad728607eb65759 100644 --- a/web/src/layouts/index.tsx +++ b/web/src/layouts/index.tsx @@ -1,37 +1,37 @@ -import { Divider, Layout, theme } from 'antd'; -import React from 'react'; -import { Outlet } from 'umi'; -import '../locales/config'; -import Header from './components/header'; - -import styles from './index.less'; - -const { Content } = Layout; - -const App: React.FC = () => { - const { - token: { colorBgContainer, borderRadiusLG }, - } = theme.useToken(); - - return ( - - -
- - - - -
-
- ); -}; - -export default App; +import { Divider, Layout, theme } from 'antd'; +import React from 'react'; +import { Outlet } from 'umi'; +import '../locales/config'; +import Header from './components/header'; + +import styles from './index.less'; + +const { Content } = Layout; + +const App: React.FC = () => { + const { + token: { colorBgContainer, borderRadiusLG }, + } = theme.useToken(); + + return ( + + +
+ + + + +
+
+ ); +}; + +export default App; diff --git a/web/src/locales/config.ts b/web/src/locales/config.ts index b95f9dd073ec96e73a57850e9df0715fdf5ca3eb..ba5f05584ca336ec187af171e0d80547f162c724 100644 --- a/web/src/locales/config.ts +++ b/web/src/locales/config.ts @@ -1,30 +1,30 @@ -import i18n from 'i18next'; -import LanguageDetector from 'i18next-browser-languagedetector'; -import { initReactI18next } from 'react-i18next'; - -import translation_en from './en'; -import translation_zh from './zh'; -import translation_zh_traditional from './zh-traditional'; - -const resources = { - en: translation_en, - zh: translation_zh, - 'zh-TRADITIONAL': translation_zh_traditional, -}; - -i18n - .use(initReactI18next) - .use(LanguageDetector) - .init({ - detection: { - lookupLocalStorage: 'lng', - }, - supportedLngs: ['en', 'zh', 'zh-TRADITIONAL'], - resources, - fallbackLng: 'en', - interpolation: { - escapeValue: false, - }, - }); - -export default i18n; +import i18n from 'i18next'; +import LanguageDetector from 'i18next-browser-languagedetector'; +import { initReactI18next } from 'react-i18next'; + +import translation_en from './en'; +import translation_zh from './zh'; +import translation_zh_traditional from './zh-traditional'; + +const resources = { + en: translation_en, + zh: translation_zh, + 'zh-TRADITIONAL': translation_zh_traditional, +}; + +i18n + .use(initReactI18next) + .use(LanguageDetector) + .init({ + detection: { + lookupLocalStorage: 'lng', + }, + supportedLngs: ['en', 'zh', 'zh-TRADITIONAL'], + resources, + fallbackLng: 'en', + interpolation: { + escapeValue: false, + }, + }); + +export default i18n; diff --git a/web/src/pages/404.jsx b/web/src/pages/404.jsx index 6a40f34d5be2c6b41527ca0d488b26f358cec137..e55396a473328dac5ab65f94cb6b7bb9601203f9 100644 --- a/web/src/pages/404.jsx +++ b/web/src/pages/404.jsx @@ -1,16 +1,19 @@ -import { Button, Result } from 'antd'; -import { history } from 'umi'; - -const NoFoundPage = () => { - return ( history.push('/')}> - 返回主页 - } - /> - ) -}; - -export default NoFoundPage; +import { Button, Result } from 'antd'; +import { history } from 'umi'; + +const NoFoundPage = () => { + return ( + history.push('/')}> + 返回主页 + + } + /> + ); +}; + +export default NoFoundPage; diff --git a/web/src/pages/add-knowledge/components/knowledge-file/index.less b/web/src/pages/add-knowledge/components/knowledge-file/index.less index fa817e1a8229291ba9e9a7443b6486333c84eaa0..6962a89294cdc67fbaf9369a9e6eddce57c20fa7 100644 --- a/web/src/pages/add-knowledge/components/knowledge-file/index.less +++ b/web/src/pages/add-knowledge/components/knowledge-file/index.less @@ -1,54 +1,54 @@ -.datasetWrapper { - padding: 30px 30px 0; - height: 100%; -} - -.documentTable { - tbody { - // height: calc(100vh - 508px); - } -} - -.filter { - height: 32px; - display: flex; - margin: 10px 0; - justify-content: space-between; - padding: 24px 0; - align-items: center; -} - -.deleteIconWrapper { - width: 22px; - text-align: center; -} - -.img { - height: 24px; - width: 24px; - display: inline-block; - vertical-align: middle; -} - -.column { - min-width: 200px; -} - -.toChunks { - cursor: pointer; -} - -.pageInputNumber { - width: 220px; -} - -.questionIcon { - margin-inline-start: 4px; - color: rgba(0, 0, 0, 0.45); - cursor: help; - writing-mode: horizontal-tb; -} - -.nameText { - color: #1677ff; -} +.datasetWrapper { + padding: 30px 30px 0; + height: 100%; +} + +.documentTable { + tbody { + // height: calc(100vh - 508px); + } +} + +.filter { + height: 32px; + display: flex; + margin: 10px 0; + justify-content: space-between; + padding: 24px 0; + align-items: center; +} + +.deleteIconWrapper { + width: 22px; + text-align: center; +} + +.img { + height: 24px; + width: 24px; + display: inline-block; + vertical-align: middle; +} + +.column { + min-width: 200px; +} + +.toChunks { + cursor: pointer; +} + +.pageInputNumber { + width: 220px; +} + +.questionIcon { + margin-inline-start: 4px; + color: rgba(0, 0, 0, 0.45); + cursor: help; + writing-mode: horizontal-tb; +} + +.nameText { + color: #1677ff; +} diff --git a/web/src/pages/add-knowledge/components/knowledge-setting/index.less b/web/src/pages/add-knowledge/components/knowledge-setting/index.less index 7386c3cd5fb08c8f9aad491bbfada7e23f681a06..4889e37762d528eed1d520af5613a50902d53ea6 100644 --- a/web/src/pages/add-knowledge/components/knowledge-setting/index.less +++ b/web/src/pages/add-knowledge/components/knowledge-setting/index.less @@ -1,45 +1,45 @@ -.tags { - margin-bottom: 24px; -} - -.preset { - display: flex; - height: 80px; - background-color: rgba(0, 0, 0, 0.1); - border-radius: 5px; - padding: 5px; - margin-bottom: 24px; - - .left { - flex: 1; - } - - .right { - width: 100px; - border-left: 1px solid rgba(0, 0, 0, 0.4); - margin: 10px 0px; - padding: 5px; - } -} - -.configurationWrapper { - padding: 0 52px; - .buttonWrapper { - text-align: right; - } - .variableSlider { - width: 100%; - } -} - -.categoryPanelWrapper { - .topTitle { - margin-top: 0; - } - .imageRow { - margin-top: 16px; - } - .image { - width: 100%; - } -} +.tags { + margin-bottom: 24px; +} + +.preset { + display: flex; + height: 80px; + background-color: rgba(0, 0, 0, 0.1); + border-radius: 5px; + padding: 5px; + margin-bottom: 24px; + + .left { + flex: 1; + } + + .right { + width: 100px; + border-left: 1px solid rgba(0, 0, 0, 0.4); + margin: 10px 0px; + padding: 5px; + } +} + +.configurationWrapper { + padding: 0 52px; + .buttonWrapper { + text-align: right; + } + .variableSlider { + width: 100%; + } +} + +.categoryPanelWrapper { + .topTitle { + margin-top: 0; + } + .imageRow { + margin-top: 16px; + } + .image { + width: 100%; + } +} diff --git a/web/src/pages/add-knowledge/index.less b/web/src/pages/add-knowledge/index.less index 122898c4cfe31270e5624893f3504a37b7994c0a..bd8f3ecd37711934da5dd520e3a06f6aaad6c140 100644 --- a/web/src/pages/add-knowledge/index.less +++ b/web/src/pages/add-knowledge/index.less @@ -1,19 +1,19 @@ -.container { - display: flex; - height: 100%; - width: 100%; - .contentWrapper { - flex: 1; - overflow-x: auto; - height: 100%; - background-color: rgba(247, 248, 250, 1); - padding: 16px 20px 28px 40px; - display: flex; - flex-direction: column; - } - .content { - background-color: white; - margin-top: 16px; - flex: 1; - } -} +.container { + display: flex; + height: 100%; + width: 100%; + .contentWrapper { + flex: 1; + overflow-x: auto; + height: 100%; + background-color: rgba(247, 248, 250, 1); + padding: 16px 20px 28px 40px; + display: flex; + flex-direction: column; + } + .content { + background-color: white; + margin-top: 16px; + flex: 1; + } +} diff --git a/web/src/pages/knowledge/index.less b/web/src/pages/knowledge/index.less index a13208101395da48b604a93740c9a20b6f5dbcb2..2479453fa1f48bc70bfde154d7242722d1e967e5 100644 --- a/web/src/pages/knowledge/index.less +++ b/web/src/pages/knowledge/index.less @@ -1,50 +1,50 @@ -// @import '~@/less/variable.less'; - -.knowledge { - padding: 48px 0; -} - -.topWrapper { - display: flex; - justify-content: space-between; - align-items: flex-start; - padding: 0 60px 72px; - - .title { - font-family: Inter; - font-size: 30px; - font-style: normal; - font-weight: @fontWeight600; - line-height: 38px; - color: rgba(16, 24, 40, 1); - } - .description { - font-family: Inter; - font-size: 16px; - font-style: normal; - font-weight: 400; - line-height: 24px; - color: rgba(71, 84, 103, 1); - } - - .topButton { - font-family: Inter; - font-size: 14px; - font-style: normal; - font-weight: @fontWeight600; - line-height: 20px; - } - - .filterButton { - display: flex; - align-items: center; - .topButton(); - } -} -.knowledgeCardContainer { - padding: 0 60px; - overflow: auto; - .knowledgeEmpty { - width: 100%; - } -} +// @import '~@/less/variable.less'; + +.knowledge { + padding: 48px 0; +} + +.topWrapper { + display: flex; + justify-content: space-between; + align-items: flex-start; + padding: 0 60px 72px; + + .title { + font-family: Inter; + font-size: 30px; + font-style: normal; + font-weight: @fontWeight600; + line-height: 38px; + color: rgba(16, 24, 40, 1); + } + .description { + font-family: Inter; + font-size: 16px; + font-style: normal; + font-weight: 400; + line-height: 24px; + color: rgba(71, 84, 103, 1); + } + + .topButton { + font-family: Inter; + font-size: 14px; + font-style: normal; + font-weight: @fontWeight600; + line-height: 20px; + } + + .filterButton { + display: flex; + align-items: center; + .topButton(); + } +} +.knowledgeCardContainer { + padding: 0 60px; + overflow: auto; + .knowledgeEmpty { + width: 100%; + } +} diff --git a/web/src/pages/login/index.less b/web/src/pages/login/index.less index b64041429a94c26e3e88d7a97934e61dbc94f72a..cafee5c616e9efa2e54e098514870b6c420d86a0 100644 --- a/web/src/pages/login/index.less +++ b/web/src/pages/login/index.less @@ -1,106 +1,106 @@ -@import '../../theme/vars'; - -.loginPage { - display: flex; - .loginLeft { - // width: 610px; - width: 40%; - background-color: #fff; - height: 100vh; - display: flex; - align-items: center; - } - - .leftContainer { - width: 60%; - padding: 5px, 0px, 5px, 0px; - margin: 0 auto; - } - - .loginRight { - display: flex; - align-items: center; - justify-content: center; - flex: 1; - position: relative; - &::before { - content: ' '; - position: absolute; - top: 0; - bottom: 0; - left: 0; - right: 0; - background-color: rgba(24, 73, 169, 0.6); - background-image: url('@/assets/svg/login-background.svg'); - background-size: cover; - background-blend-mode: multiply; - filter: blur(3px); - background-position: center; - z-index: -1; - } - .white { - color: #fff; - } - .pink { - color: #e9d7fe; - } - .rightPanel { - max-width: 670px; - .loginTitle { - font-size: 68px; - font-style: normal; - font-weight: 600; - line-height: 90px; - letter-spacing: -1.44px; - } - .loginDescription { - font-size: 20px; - font-style: normal; - font-weight: 500; - line-height: 30px; - } - .loginRateNumber { - font-size: 16px; - font-style: normal; - font-weight: 600; - line-height: 24px; - } - .loginRateReviews { - font-size: 16px; - font-style: normal; - font-weight: 500; - line-height: 24px; - } - } - } - - .loginTitle { - //styleName: Heading/1; - font-size: 38px; - font-weight: 600; - line-height: 46px; - letter-spacing: 0em; - height: 80px; - margin-bottom: 69px; - - // text-align: center; - span { - font-size: 16px; - line-height: 24px; - - color: #000000a6; - } - } - - @media screen and (max-width: 957px) { - .loginLeft { - width: 100%; - background-color: #fff; - height: 100%; - } - - .modal { - width: 80%; - } - } -} +@import '../../theme/vars'; + +.loginPage { + display: flex; + .loginLeft { + // width: 610px; + width: 40%; + background-color: #fff; + height: 100vh; + display: flex; + align-items: center; + } + + .leftContainer { + width: 60%; + padding: 5px, 0px, 5px, 0px; + margin: 0 auto; + } + + .loginRight { + display: flex; + align-items: center; + justify-content: center; + flex: 1; + position: relative; + &::before { + content: ' '; + position: absolute; + top: 0; + bottom: 0; + left: 0; + right: 0; + background-color: rgba(24, 73, 169, 0.6); + background-image: url('@/assets/svg/login-background.svg'); + background-size: cover; + background-blend-mode: multiply; + filter: blur(3px); + background-position: center; + z-index: -1; + } + .white { + color: #fff; + } + .pink { + color: #e9d7fe; + } + .rightPanel { + max-width: 670px; + .loginTitle { + font-size: 68px; + font-style: normal; + font-weight: 600; + line-height: 90px; + letter-spacing: -1.44px; + } + .loginDescription { + font-size: 20px; + font-style: normal; + font-weight: 500; + line-height: 30px; + } + .loginRateNumber { + font-size: 16px; + font-style: normal; + font-weight: 600; + line-height: 24px; + } + .loginRateReviews { + font-size: 16px; + font-style: normal; + font-weight: 500; + line-height: 24px; + } + } + } + + .loginTitle { + //styleName: Heading/1; + font-size: 38px; + font-weight: 600; + line-height: 46px; + letter-spacing: 0em; + height: 80px; + margin-bottom: 69px; + + // text-align: center; + span { + font-size: 16px; + line-height: 24px; + + color: #000000a6; + } + } + + @media screen and (max-width: 957px) { + .loginLeft { + width: 100%; + background-color: #fff; + height: 100%; + } + + .modal { + width: 80%; + } + } +} diff --git a/web/src/theme/theme.ts b/web/src/theme/theme.ts index 8bbdf13fb2c3ed77146129d152ddb412bb7269f2..b09928fbb54876723c2223ee349849a05c15fadb 100644 --- a/web/src/theme/theme.ts +++ b/web/src/theme/theme.ts @@ -1,11 +1,11 @@ -module.exports = { - 'primary-color': '#338AFF', // '#338AFF', - 'border-radius-base': '4px', - // 'menu-dark-color': '', - // 'menu-dark-danger-color': '', - 'menu-dark-bg': '#092140', - 'menu-dark-item-active-bg': '#092140' - - // 'menu-dark-arrow-color': '', - // 'menu-dark-inline-submenu-bg': '', -}; +module.exports = { + 'primary-color': '#338AFF', // '#338AFF', + 'border-radius-base': '4px', + // 'menu-dark-color': '', + // 'menu-dark-danger-color': '', + 'menu-dark-bg': '#092140', + 'menu-dark-item-active-bg': '#092140', + + // 'menu-dark-arrow-color': '', + // 'menu-dark-inline-submenu-bg': '', +}; diff --git a/web/src/theme/vars.less b/web/src/theme/vars.less index 3c0cd7096e3da34f73e18afefc88c8f9cc88bc99..6a00f075fbac6d3e05243447c9115670c9a1ebcc 100644 --- a/web/src/theme/vars.less +++ b/web/src/theme/vars.less @@ -1,9 +1,9 @@ -@header-height: 64px; -@menu-width: 200px; -@menu-small-width: 83px; -@layout-bg: #f2f3f6; -@logo-font-size: 22px; -@border-color: #d9d9d9; -@dashboard-desc-color: #9d9fa2; -@primary-color: #338aff; -@primary-color-light: rgba(21, 65, 255, 0.5); +@header-height: 64px; +@menu-width: 200px; +@menu-small-width: 83px; +@layout-bg: #f2f3f6; +@logo-font-size: 22px; +@border-color: #d9d9d9; +@dashboard-desc-color: #9d9fa2; +@primary-color: #338aff; +@primary-color-light: rgba(21, 65, 255, 0.5); diff --git a/web/src/utils/date.ts b/web/src/utils/date.ts index 93472d25bee79c080f38be6725f6e09a53a9f943..5da36dc6430f443f38223f259fb6edd1139be2c8 100644 --- a/web/src/utils/date.ts +++ b/web/src/utils/date.ts @@ -1,20 +1,20 @@ -import dayjs from 'dayjs'; - -export function today() { - return formatDate(dayjs()); -} - -export function lastDay() { - return formatDate(dayjs().subtract(1, 'days')); -} - -export function lastWeek() { - return formatDate(dayjs().subtract(1, 'weeks')); -} - -export function formatDate(date: any) { - if (!date) { - return ''; - } - return dayjs(date).format('DD/MM/YYYY HH:mm:ss'); -} +import dayjs from 'dayjs'; + +export function today() { + return formatDate(dayjs()); +} + +export function lastDay() { + return formatDate(dayjs().subtract(1, 'days')); +} + +export function lastWeek() { + return formatDate(dayjs().subtract(1, 'weeks')); +} + +export function formatDate(date: any) { + if (!date) { + return ''; + } + return dayjs(date).format('DD/MM/YYYY HH:mm:ss'); +} diff --git a/web/src/utils/index.ts b/web/src/utils/index.ts index d0cdcef726387586977d1d4da5fe105e89a8a0f9..69cb76cc9c5999519e60de7e29adaddd623cdb58 100644 --- a/web/src/utils/index.ts +++ b/web/src/utils/index.ts @@ -1,30 +1,30 @@ -/** - * @param {String} url - * @param {Boolean} isNoCaseSensitive 是否区分大小写 - * @return {Object} - */ -// import numeral from 'numeral'; - -import { Base64 } from 'js-base64'; -import JSEncrypt from 'jsencrypt'; - -export const getWidth = () => { - return { width: window.innerWidth }; -}; -export const rsaPsw = (password: string) => { - const pub = - '-----BEGIN PUBLIC KEY-----MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB-----END PUBLIC KEY-----'; - const encryptor = new JSEncrypt(); - - encryptor.setPublicKey(pub); - - return encryptor.encrypt(Base64.encode(password)); -}; - -export default { - getWidth, - rsaPsw, -}; - -export const getFileExtension = (filename: string) => - filename.slice(filename.lastIndexOf('.') + 1).toLowerCase(); +/** + * @param {String} url + * @param {Boolean} isNoCaseSensitive 是否区分大小写 + * @return {Object} + */ +// import numeral from 'numeral'; + +import { Base64 } from 'js-base64'; +import JSEncrypt from 'jsencrypt'; + +export const getWidth = () => { + return { width: window.innerWidth }; +}; +export const rsaPsw = (password: string) => { + const pub = + '-----BEGIN PUBLIC KEY-----MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArq9XTUSeYr2+N1h3Afl/z8Dse/2yD0ZGrKwx+EEEcdsBLca9Ynmx3nIB5obmLlSfmskLpBo0UACBmB5rEjBp2Q2f3AG3Hjd4B+gNCG6BDaawuDlgANIhGnaTLrIqWrrcm4EMzJOnAOI1fgzJRsOOUEfaS318Eq9OVO3apEyCCt0lOQK6PuksduOjVxtltDav+guVAA068NrPYmRNabVKRNLJpL8w4D44sfth5RvZ3q9t+6RTArpEtc5sh5ChzvqPOzKGMXW83C95TxmXqpbK6olN4RevSfVjEAgCydH6HN6OhtOQEcnrU97r9H0iZOWwbw3pVrZiUkuRD1R56Wzs2wIDAQAB-----END PUBLIC KEY-----'; + const encryptor = new JSEncrypt(); + + encryptor.setPublicKey(pub); + + return encryptor.encrypt(Base64.encode(password)); +}; + +export default { + getWidth, + rsaPsw, +}; + +export const getFileExtension = (filename: string) => + filename.slice(filename.lastIndexOf('.') + 1).toLowerCase(); diff --git a/web/tsconfig.json b/web/tsconfig.json index 332e4a9a70d58957b638ac1d133ee9894b34be93..824e6cc8df0f53499dd1959ce5f32aff2195b27c 100644 --- a/web/tsconfig.json +++ b/web/tsconfig.json @@ -1,6 +1,4 @@ -{ - "extends": "./src/.umi/tsconfig.json", - "@@/*": [ - "src/.umi/*", - ], -} \ No newline at end of file +{ + "extends": "./src/.umi/tsconfig.json", + "@@/*": ["src/.umi/*"], +}