Fix errors detected by Ruff (#3918)
Browse files### What problem does this PR solve?
Fix errors detected by Ruff
### Type of change
- [x] Refactoring
This view is limited to 50 files because it contains too many changes.  
							See raw diff
- agent/canvas.py +23 -13
- agent/component/__init__.py +70 -0
- agent/component/base.py +10 -5
- agent/component/categorize.py +8 -5
- agent/component/deepl.py +0 -1
- agent/component/exesql.py +4 -2
- agent/component/generate.py +24 -12
- agent/component/rewrite.py +2 -1
- agent/component/switch.py +10 -7
- agent/component/template.py +2 -1
- agent/test/client.py +2 -1
- api/apps/api_app.py +8 -5
- api/apps/canvas_app.py +4 -4
- api/apps/chunk_app.py +2 -1
- api/apps/conversation_app.py +4 -2
- api/apps/dialog_app.py +4 -2
- api/apps/document_app.py +4 -3
- api/apps/llm_app.py +4 -2
- api/apps/sdk/agent.py +1 -1
- api/apps/sdk/doc.py +2 -3
- api/apps/sdk/session.py +8 -4
- api/apps/user_app.py +1 -1
- api/db/db_models.py +1 -1
- api/db/init_data.py +1 -1
- api/db/services/__init__.py +5 -4
- api/db/services/api_service.py +2 -1
- api/db/services/canvas_service.py +1 -3
- api/db/services/common_service.py +1 -1
- api/db/services/dialog_service.py +24 -14
- api/db/services/document_service.py +2 -1
- api/db/services/file2document_service.py +2 -2
- api/db/services/file_service.py +4 -2
- api/db/services/llm_service.py +2 -1
- api/db/services/task_service.py +66 -34
- api/db/services/user_service.py +1 -1
- api/ragflow_server.py +3 -3
- api/utils/api_utils.py +0 -1
- api/validation.py +1 -1
- deepdoc/parser/__init__.py +13 -1
- deepdoc/parser/excel_parser.py +12 -8
- deepdoc/parser/html_parser.py +1 -1
- deepdoc/parser/json_parser.py +1 -1
- deepdoc/parser/pdf_parser.py +9 -8
- deepdoc/parser/resume/__init__.py +60 -19
- deepdoc/parser/resume/entities/corporations.py +47 -21
- deepdoc/parser/resume/entities/degrees.py +20 -16
- deepdoc/parser/resume/entities/industries.py +684 -679
- deepdoc/parser/resume/entities/regions.py +758 -748
- deepdoc/parser/resume/entities/schools.py +28 -17
- deepdoc/parser/resume/step_two.py +202 -106
    	
        agent/canvas.py
    CHANGED
    
    | @@ -133,7 +133,8 @@ class Canvas(ABC): | |
| 133 | 
             
                        "components": {}
         | 
| 134 | 
             
                    }
         | 
| 135 | 
             
                    for k in self.dsl.keys():
         | 
| 136 | 
            -
                        if k in ["components"]: | 
|  | |
| 137 | 
             
                        dsl[k] = deepcopy(self.dsl[k])
         | 
| 138 |  | 
| 139 | 
             
                    for k, cpn in self.components.items():
         | 
| @@ -158,7 +159,8 @@ class Canvas(ABC): | |
| 158 |  | 
| 159 | 
             
                def get_compnent_name(self, cid):
         | 
| 160 | 
             
                    for n in self.dsl["graph"]["nodes"]:
         | 
| 161 | 
            -
                        if cid == n["id"]: | 
|  | |
| 162 | 
             
                    return ""
         | 
| 163 |  | 
| 164 | 
             
                def run(self, **kwargs):
         | 
| @@ -173,7 +175,8 @@ class Canvas(ABC): | |
| 173 | 
             
                        if kwargs.get("stream"):
         | 
| 174 | 
             
                            for an in ans():
         | 
| 175 | 
             
                                yield an
         | 
| 176 | 
            -
                        else: | 
|  | |
| 177 | 
             
                        return
         | 
| 178 |  | 
| 179 | 
             
                    if not self.path:
         | 
| @@ -188,7 +191,8 @@ class Canvas(ABC): | |
| 188 | 
             
                    def prepare2run(cpns):
         | 
| 189 | 
             
                        nonlocal ran, ans
         | 
| 190 | 
             
                        for c in cpns:
         | 
| 191 | 
            -
                            if self.path[-1] and c == self.path[-1][-1]: | 
|  | |
| 192 | 
             
                            cpn = self.components[c]["obj"]
         | 
| 193 | 
             
                            if cpn.component_name == "Answer":
         | 
| 194 | 
             
                                self.answer.append(c)
         | 
| @@ -197,7 +201,8 @@ class Canvas(ABC): | |
| 197 | 
             
                                if c not in without_dependent_checking:
         | 
| 198 | 
             
                                    cpids = cpn.get_dependent_components()
         | 
| 199 | 
             
                                    if any([cc not in self.path[-1] for cc in cpids]):
         | 
| 200 | 
            -
                                        if c not in waiting: | 
|  | |
| 201 | 
             
                                        continue
         | 
| 202 | 
             
                                yield "*'{}'* is running...🕞".format(self.get_compnent_name(c))
         | 
| 203 | 
             
                                ans = cpn.run(self.history, **kwargs)
         | 
| @@ -211,10 +216,12 @@ class Canvas(ABC): | |
| 211 | 
             
                        logging.debug(f"Canvas.run: {ran} {self.path}")
         | 
| 212 | 
             
                        cpn_id = self.path[-1][ran]
         | 
| 213 | 
             
                        cpn = self.get_component(cpn_id)
         | 
| 214 | 
            -
                        if not cpn["downstream"]: | 
|  | |
| 215 |  | 
| 216 | 
             
                        loop = self._find_loop()
         | 
| 217 | 
            -
                        if loop: | 
|  | |
| 218 |  | 
| 219 | 
             
                        if cpn["obj"].component_name.lower() in ["switch", "categorize", "relevant"]:
         | 
| 220 | 
             
                            switch_out = cpn["obj"].output()[1].iloc[0, 0]
         | 
| @@ -283,19 +290,22 @@ class Canvas(ABC): | |
| 283 |  | 
| 284 | 
             
                def _find_loop(self, max_loops=6):
         | 
| 285 | 
             
                    path = self.path[-1][::-1]
         | 
| 286 | 
            -
                    if len(path) < 2: | 
|  | |
| 287 |  | 
| 288 | 
             
                    for i in range(len(path)):
         | 
| 289 | 
             
                        if path[i].lower().find("answer") >= 0:
         | 
| 290 | 
             
                            path = path[:i]
         | 
| 291 | 
             
                            break
         | 
| 292 |  | 
| 293 | 
            -
                    if len(path) < 2: | 
|  | |
| 294 |  | 
| 295 | 
            -
                    for  | 
| 296 | 
            -
                        pat = ",".join(path[0: | 
| 297 | 
             
                        path_str = ",".join(path)
         | 
| 298 | 
            -
                        if len(pat) >= len(path_str): | 
|  | |
| 299 | 
             
                        loop = max_loops
         | 
| 300 | 
             
                        while path_str.find(pat) == 0 and loop >= 0:
         | 
| 301 | 
             
                            loop -= 1
         | 
| @@ -303,7 +313,7 @@ class Canvas(ABC): | |
| 303 | 
             
                                return False
         | 
| 304 | 
             
                            path_str = path_str[len(pat)+1:]
         | 
| 305 | 
             
                        if loop < 0:
         | 
| 306 | 
            -
                            pat = " => ".join([p.split(":")[0] for p in path[0: | 
| 307 | 
             
                            return pat + " => " + pat
         | 
| 308 |  | 
| 309 | 
             
                    return False
         | 
|  | |
| 133 | 
             
                        "components": {}
         | 
| 134 | 
             
                    }
         | 
| 135 | 
             
                    for k in self.dsl.keys():
         | 
| 136 | 
            +
                        if k in ["components"]:
         | 
| 137 | 
            +
                            continue
         | 
| 138 | 
             
                        dsl[k] = deepcopy(self.dsl[k])
         | 
| 139 |  | 
| 140 | 
             
                    for k, cpn in self.components.items():
         | 
|  | |
| 159 |  | 
| 160 | 
             
                def get_compnent_name(self, cid):
         | 
| 161 | 
             
                    for n in self.dsl["graph"]["nodes"]:
         | 
| 162 | 
            +
                        if cid == n["id"]:
         | 
| 163 | 
            +
                            return n["data"]["name"]
         | 
| 164 | 
             
                    return ""
         | 
| 165 |  | 
| 166 | 
             
                def run(self, **kwargs):
         | 
|  | |
| 175 | 
             
                        if kwargs.get("stream"):
         | 
| 176 | 
             
                            for an in ans():
         | 
| 177 | 
             
                                yield an
         | 
| 178 | 
            +
                        else:
         | 
| 179 | 
            +
                            yield ans
         | 
| 180 | 
             
                        return
         | 
| 181 |  | 
| 182 | 
             
                    if not self.path:
         | 
|  | |
| 191 | 
             
                    def prepare2run(cpns):
         | 
| 192 | 
             
                        nonlocal ran, ans
         | 
| 193 | 
             
                        for c in cpns:
         | 
| 194 | 
            +
                            if self.path[-1] and c == self.path[-1][-1]:
         | 
| 195 | 
            +
                                continue
         | 
| 196 | 
             
                            cpn = self.components[c]["obj"]
         | 
| 197 | 
             
                            if cpn.component_name == "Answer":
         | 
| 198 | 
             
                                self.answer.append(c)
         | 
|  | |
| 201 | 
             
                                if c not in without_dependent_checking:
         | 
| 202 | 
             
                                    cpids = cpn.get_dependent_components()
         | 
| 203 | 
             
                                    if any([cc not in self.path[-1] for cc in cpids]):
         | 
| 204 | 
            +
                                        if c not in waiting:
         | 
| 205 | 
            +
                                            waiting.append(c)
         | 
| 206 | 
             
                                        continue
         | 
| 207 | 
             
                                yield "*'{}'* is running...🕞".format(self.get_compnent_name(c))
         | 
| 208 | 
             
                                ans = cpn.run(self.history, **kwargs)
         | 
|  | |
| 216 | 
             
                        logging.debug(f"Canvas.run: {ran} {self.path}")
         | 
| 217 | 
             
                        cpn_id = self.path[-1][ran]
         | 
| 218 | 
             
                        cpn = self.get_component(cpn_id)
         | 
| 219 | 
            +
                        if not cpn["downstream"]:
         | 
| 220 | 
            +
                            break
         | 
| 221 |  | 
| 222 | 
             
                        loop = self._find_loop()
         | 
| 223 | 
            +
                        if loop:
         | 
| 224 | 
            +
                            raise OverflowError(f"Too much loops: {loop}")
         | 
| 225 |  | 
| 226 | 
             
                        if cpn["obj"].component_name.lower() in ["switch", "categorize", "relevant"]:
         | 
| 227 | 
             
                            switch_out = cpn["obj"].output()[1].iloc[0, 0]
         | 
|  | |
| 290 |  | 
| 291 | 
             
                def _find_loop(self, max_loops=6):
         | 
| 292 | 
             
                    path = self.path[-1][::-1]
         | 
| 293 | 
            +
                    if len(path) < 2:
         | 
| 294 | 
            +
                        return False
         | 
| 295 |  | 
| 296 | 
             
                    for i in range(len(path)):
         | 
| 297 | 
             
                        if path[i].lower().find("answer") >= 0:
         | 
| 298 | 
             
                            path = path[:i]
         | 
| 299 | 
             
                            break
         | 
| 300 |  | 
| 301 | 
            +
                    if len(path) < 2:
         | 
| 302 | 
            +
                        return False
         | 
| 303 |  | 
| 304 | 
            +
                    for loc in range(2, len(path) // 2):
         | 
| 305 | 
            +
                        pat = ",".join(path[0:loc])
         | 
| 306 | 
             
                        path_str = ",".join(path)
         | 
| 307 | 
            +
                        if len(pat) >= len(path_str):
         | 
| 308 | 
            +
                            return False
         | 
| 309 | 
             
                        loop = max_loops
         | 
| 310 | 
             
                        while path_str.find(pat) == 0 and loop >= 0:
         | 
| 311 | 
             
                            loop -= 1
         | 
|  | |
| 313 | 
             
                                return False
         | 
| 314 | 
             
                            path_str = path_str[len(pat)+1:]
         | 
| 315 | 
             
                        if loop < 0:
         | 
| 316 | 
            +
                            pat = " => ".join([p.split(":")[0] for p in path[0:loc]])
         | 
| 317 | 
             
                            return pat + " => " + pat
         | 
| 318 |  | 
| 319 | 
             
                    return False
         | 
    	
        agent/component/__init__.py
    CHANGED
    
    | @@ -39,3 +39,73 @@ def component_class(class_name): | |
| 39 | 
             
                m = importlib.import_module("agent.component")
         | 
| 40 | 
             
                c = getattr(m, class_name)
         | 
| 41 | 
             
                return c
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 39 | 
             
                m = importlib.import_module("agent.component")
         | 
| 40 | 
             
                c = getattr(m, class_name)
         | 
| 41 | 
             
                return c
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            __all__ = [
         | 
| 44 | 
            +
                "Begin",
         | 
| 45 | 
            +
                "BeginParam",
         | 
| 46 | 
            +
                "Generate",
         | 
| 47 | 
            +
                "GenerateParam",
         | 
| 48 | 
            +
                "Retrieval",
         | 
| 49 | 
            +
                "RetrievalParam",
         | 
| 50 | 
            +
                "Answer",
         | 
| 51 | 
            +
                "AnswerParam",
         | 
| 52 | 
            +
                "Categorize",
         | 
| 53 | 
            +
                "CategorizeParam",
         | 
| 54 | 
            +
                "Switch",
         | 
| 55 | 
            +
                "SwitchParam",
         | 
| 56 | 
            +
                "Relevant",
         | 
| 57 | 
            +
                "RelevantParam",
         | 
| 58 | 
            +
                "Message",
         | 
| 59 | 
            +
                "MessageParam",
         | 
| 60 | 
            +
                "RewriteQuestion",
         | 
| 61 | 
            +
                "RewriteQuestionParam",
         | 
| 62 | 
            +
                "KeywordExtract",
         | 
| 63 | 
            +
                "KeywordExtractParam",
         | 
| 64 | 
            +
                "Concentrator",
         | 
| 65 | 
            +
                "ConcentratorParam",
         | 
| 66 | 
            +
                "Baidu",
         | 
| 67 | 
            +
                "BaiduParam",
         | 
| 68 | 
            +
                "DuckDuckGo",
         | 
| 69 | 
            +
                "DuckDuckGoParam",
         | 
| 70 | 
            +
                "Wikipedia",
         | 
| 71 | 
            +
                "WikipediaParam",
         | 
| 72 | 
            +
                "PubMed",
         | 
| 73 | 
            +
                "PubMedParam",
         | 
| 74 | 
            +
                "ArXiv",
         | 
| 75 | 
            +
                "ArXivParam",
         | 
| 76 | 
            +
                "Google",
         | 
| 77 | 
            +
                "GoogleParam",
         | 
| 78 | 
            +
                "Bing",
         | 
| 79 | 
            +
                "BingParam",
         | 
| 80 | 
            +
                "GoogleScholar",
         | 
| 81 | 
            +
                "GoogleScholarParam",
         | 
| 82 | 
            +
                "DeepL",
         | 
| 83 | 
            +
                "DeepLParam",
         | 
| 84 | 
            +
                "GitHub",
         | 
| 85 | 
            +
                "GitHubParam",
         | 
| 86 | 
            +
                "BaiduFanyi",
         | 
| 87 | 
            +
                "BaiduFanyiParam",
         | 
| 88 | 
            +
                "QWeather",
         | 
| 89 | 
            +
                "QWeatherParam",
         | 
| 90 | 
            +
                "ExeSQL",
         | 
| 91 | 
            +
                "ExeSQLParam",
         | 
| 92 | 
            +
                "YahooFinance",
         | 
| 93 | 
            +
                "YahooFinanceParam",
         | 
| 94 | 
            +
                "WenCai",
         | 
| 95 | 
            +
                "WenCaiParam",
         | 
| 96 | 
            +
                "Jin10",
         | 
| 97 | 
            +
                "Jin10Param",
         | 
| 98 | 
            +
                "TuShare",
         | 
| 99 | 
            +
                "TuShareParam",
         | 
| 100 | 
            +
                "AkShare",
         | 
| 101 | 
            +
                "AkShareParam",
         | 
| 102 | 
            +
                "Crawler",
         | 
| 103 | 
            +
                "CrawlerParam",
         | 
| 104 | 
            +
                "Invoke",
         | 
| 105 | 
            +
                "InvokeParam",
         | 
| 106 | 
            +
                "Template",
         | 
| 107 | 
            +
                "TemplateParam",
         | 
| 108 | 
            +
                "Email",
         | 
| 109 | 
            +
                "EmailParam",
         | 
| 110 | 
            +
                "component_class"
         | 
| 111 | 
            +
            ]
         | 
    	
        agent/component/base.py
    CHANGED
    
    | @@ -428,7 +428,8 @@ class ComponentBase(ABC): | |
| 428 | 
             
                def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]:
         | 
| 429 | 
             
                    o = getattr(self._param, self._param.output_var_name)
         | 
| 430 | 
             
                    if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
         | 
| 431 | 
            -
                        if not isinstance(o, list): | 
|  | |
| 432 | 
             
                        o = pd.DataFrame(o)
         | 
| 433 |  | 
| 434 | 
             
                    if allow_partial or not isinstance(o, partial):
         | 
| @@ -440,7 +441,8 @@ class ComponentBase(ABC): | |
| 440 | 
             
                    for oo in o():
         | 
| 441 | 
             
                        if not isinstance(oo, pd.DataFrame):
         | 
| 442 | 
             
                            outs = pd.DataFrame(oo if isinstance(oo, list) else [oo])
         | 
| 443 | 
            -
                        else: | 
|  | |
| 444 | 
             
                    return self._param.output_var_name, outs
         | 
| 445 |  | 
| 446 | 
             
                def reset(self):
         | 
| @@ -482,13 +484,15 @@ class ComponentBase(ABC): | |
| 482 | 
             
                                outs.append(pd.DataFrame([{"content": q["value"]}]))
         | 
| 483 | 
             
                        if outs:
         | 
| 484 | 
             
                            df = pd.concat(outs, ignore_index=True)
         | 
| 485 | 
            -
                            if "content" in df: | 
|  | |
| 486 | 
             
                            return df
         | 
| 487 |  | 
| 488 | 
             
                    upstream_outs = []
         | 
| 489 |  | 
| 490 | 
             
                    for u in reversed_cpnts[::-1]:
         | 
| 491 | 
            -
                        if self.get_component_name(u) in ["switch", "concentrator"]: | 
|  | |
| 492 | 
             
                        if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
         | 
| 493 | 
             
                            o = self._canvas.get_component(u)["obj"].output(allow_partial=False)[1]
         | 
| 494 | 
             
                            if o is not None:
         | 
| @@ -532,7 +536,8 @@ class ComponentBase(ABC): | |
| 532 | 
             
                    reversed_cpnts.extend(self._canvas.path[-1])
         | 
| 533 |  | 
| 534 | 
             
                    for u in reversed_cpnts[::-1]:
         | 
| 535 | 
            -
                        if self.get_component_name(u) in ["switch", "answer"]: | 
|  | |
| 536 | 
             
                        return self._canvas.get_component(u)["obj"].output()[1]
         | 
| 537 |  | 
| 538 | 
             
                @staticmethod
         | 
|  | |
| 428 | 
             
                def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]:
         | 
| 429 | 
             
                    o = getattr(self._param, self._param.output_var_name)
         | 
| 430 | 
             
                    if not isinstance(o, partial) and not isinstance(o, pd.DataFrame):
         | 
| 431 | 
            +
                        if not isinstance(o, list):
         | 
| 432 | 
            +
                            o = [o]
         | 
| 433 | 
             
                        o = pd.DataFrame(o)
         | 
| 434 |  | 
| 435 | 
             
                    if allow_partial or not isinstance(o, partial):
         | 
|  | |
| 441 | 
             
                    for oo in o():
         | 
| 442 | 
             
                        if not isinstance(oo, pd.DataFrame):
         | 
| 443 | 
             
                            outs = pd.DataFrame(oo if isinstance(oo, list) else [oo])
         | 
| 444 | 
            +
                        else:
         | 
| 445 | 
            +
                            outs = oo
         | 
| 446 | 
             
                    return self._param.output_var_name, outs
         | 
| 447 |  | 
| 448 | 
             
                def reset(self):
         | 
|  | |
| 484 | 
             
                                outs.append(pd.DataFrame([{"content": q["value"]}]))
         | 
| 485 | 
             
                        if outs:
         | 
| 486 | 
             
                            df = pd.concat(outs, ignore_index=True)
         | 
| 487 | 
            +
                            if "content" in df:
         | 
| 488 | 
            +
                                df = df.drop_duplicates(subset=['content']).reset_index(drop=True)
         | 
| 489 | 
             
                            return df
         | 
| 490 |  | 
| 491 | 
             
                    upstream_outs = []
         | 
| 492 |  | 
| 493 | 
             
                    for u in reversed_cpnts[::-1]:
         | 
| 494 | 
            +
                        if self.get_component_name(u) in ["switch", "concentrator"]:
         | 
| 495 | 
            +
                            continue
         | 
| 496 | 
             
                        if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
         | 
| 497 | 
             
                            o = self._canvas.get_component(u)["obj"].output(allow_partial=False)[1]
         | 
| 498 | 
             
                            if o is not None:
         | 
|  | |
| 536 | 
             
                    reversed_cpnts.extend(self._canvas.path[-1])
         | 
| 537 |  | 
| 538 | 
             
                    for u in reversed_cpnts[::-1]:
         | 
| 539 | 
            +
                        if self.get_component_name(u) in ["switch", "answer"]:
         | 
| 540 | 
            +
                            continue
         | 
| 541 | 
             
                        return self._canvas.get_component(u)["obj"].output()[1]
         | 
| 542 |  | 
| 543 | 
             
                @staticmethod
         | 
    	
        agent/component/categorize.py
    CHANGED
    
    | @@ -34,15 +34,18 @@ class CategorizeParam(GenerateParam): | |
| 34 | 
             
                    super().check()
         | 
| 35 | 
             
                    self.check_empty(self.category_description, "[Categorize] Category examples")
         | 
| 36 | 
             
                    for k, v in self.category_description.items():
         | 
| 37 | 
            -
                        if not k: | 
| 38 | 
            -
             | 
|  | |
|  | |
| 39 |  | 
| 40 | 
             
                def get_prompt(self):
         | 
| 41 | 
             
                    cate_lines = []
         | 
| 42 | 
             
                    for c, desc in self.category_description.items():
         | 
| 43 | 
            -
                        for  | 
| 44 | 
            -
                            if not  | 
| 45 | 
            -
             | 
|  | |
| 46 | 
             
                    descriptions = []
         | 
| 47 | 
             
                    for c, desc in self.category_description.items():
         | 
| 48 | 
             
                        if desc.get("description"):
         | 
|  | |
| 34 | 
             
                    super().check()
         | 
| 35 | 
             
                    self.check_empty(self.category_description, "[Categorize] Category examples")
         | 
| 36 | 
             
                    for k, v in self.category_description.items():
         | 
| 37 | 
            +
                        if not k:
         | 
| 38 | 
            +
                            raise ValueError("[Categorize] Category name can not be empty!")
         | 
| 39 | 
            +
                        if not v.get("to"):
         | 
| 40 | 
            +
                            raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
         | 
| 41 |  | 
| 42 | 
             
                def get_prompt(self):
         | 
| 43 | 
             
                    cate_lines = []
         | 
| 44 | 
             
                    for c, desc in self.category_description.items():
         | 
| 45 | 
            +
                        for line in desc.get("examples", "").split("\n"):
         | 
| 46 | 
            +
                            if not line:
         | 
| 47 | 
            +
                                continue
         | 
| 48 | 
            +
                            cate_lines.append("Question: {}\tCategory: {}".format(line, c))
         | 
| 49 | 
             
                    descriptions = []
         | 
| 50 | 
             
                    for c, desc in self.category_description.items():
         | 
| 51 | 
             
                        if desc.get("description"):
         | 
    	
        agent/component/deepl.py
    CHANGED
    
    | @@ -14,7 +14,6 @@ | |
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
| 16 | 
             
            from abc import ABC
         | 
| 17 | 
            -
            import re
         | 
| 18 | 
             
            from agent.component.base import ComponentBase, ComponentParamBase
         | 
| 19 | 
             
            import deepl
         | 
| 20 |  | 
|  | |
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
| 16 | 
             
            from abc import ABC
         | 
|  | |
| 17 | 
             
            from agent.component.base import ComponentBase, ComponentParamBase
         | 
| 18 | 
             
            import deepl
         | 
| 19 |  | 
    	
        agent/component/exesql.py
    CHANGED
    
    | @@ -46,8 +46,10 @@ class ExeSQLParam(ComponentParamBase): | |
| 46 | 
             
                    self.check_empty(self.password, "Database password")
         | 
| 47 | 
             
                    self.check_positive_integer(self.top_n, "Number of records")
         | 
| 48 | 
             
                    if self.database == "rag_flow":
         | 
| 49 | 
            -
                        if self.host == "ragflow-mysql": | 
| 50 | 
            -
             | 
|  | |
|  | |
| 51 |  | 
| 52 |  | 
| 53 | 
             
            class ExeSQL(ComponentBase, ABC):
         | 
|  | |
| 46 | 
             
                    self.check_empty(self.password, "Database password")
         | 
| 47 | 
             
                    self.check_positive_integer(self.top_n, "Number of records")
         | 
| 48 | 
             
                    if self.database == "rag_flow":
         | 
| 49 | 
            +
                        if self.host == "ragflow-mysql":
         | 
| 50 | 
            +
                            raise ValueError("The host is not accessible.")
         | 
| 51 | 
            +
                        if self.password == "infini_rag_flow":
         | 
| 52 | 
            +
                            raise ValueError("The host is not accessible.")
         | 
| 53 |  | 
| 54 |  | 
| 55 | 
             
            class ExeSQL(ComponentBase, ABC):
         | 
    	
        agent/component/generate.py
    CHANGED
    
    | @@ -51,11 +51,16 @@ class GenerateParam(ComponentParamBase): | |
| 51 |  | 
| 52 | 
             
                def gen_conf(self):
         | 
| 53 | 
             
                    conf = {}
         | 
| 54 | 
            -
                    if self.max_tokens > 0: | 
| 55 | 
            -
             | 
| 56 | 
            -
                    if self. | 
| 57 | 
            -
             | 
| 58 | 
            -
                    if self. | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 59 | 
             
                    return conf
         | 
| 60 |  | 
| 61 |  | 
| @@ -83,7 +88,8 @@ class Generate(ComponentBase): | |
| 83 | 
             
                    recall_docs = []
         | 
| 84 | 
             
                    for i in idx:
         | 
| 85 | 
             
                        did = retrieval_res.loc[int(i), "doc_id"]
         | 
| 86 | 
            -
                        if did in doc_ids: | 
|  | |
| 87 | 
             
                        doc_ids.add(did)
         | 
| 88 | 
             
                        recall_docs.append({"doc_id": did, "doc_name": retrieval_res.loc[int(i), "docnm_kwd"]})
         | 
| 89 |  | 
| @@ -108,7 +114,8 @@ class Generate(ComponentBase): | |
| 108 | 
             
                    retrieval_res = []
         | 
| 109 | 
             
                    self._param.inputs = []
         | 
| 110 | 
             
                    for para in self._param.parameters:
         | 
| 111 | 
            -
                        if not para.get("component_id"): | 
|  | |
| 112 | 
             
                        component_id = para["component_id"].split("@")[0]
         | 
| 113 | 
             
                        if para["component_id"].lower().find("@") >= 0:
         | 
| 114 | 
             
                            cpn_id, key = para["component_id"].split("@")
         | 
| @@ -142,7 +149,8 @@ class Generate(ComponentBase): | |
| 142 |  | 
| 143 | 
             
                    if retrieval_res:
         | 
| 144 | 
             
                        retrieval_res = pd.concat(retrieval_res, ignore_index=True)
         | 
| 145 | 
            -
                    else: | 
|  | |
| 146 |  | 
| 147 | 
             
                    for n, v in kwargs.items():
         | 
| 148 | 
             
                        prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
         | 
| @@ -164,9 +172,11 @@ class Generate(ComponentBase): | |
| 164 | 
             
                        return pd.DataFrame([res])
         | 
| 165 |  | 
| 166 | 
             
                    msg = self._canvas.get_history(self._param.message_history_window_size)
         | 
| 167 | 
            -
                    if len(msg) < 1: | 
|  | |
| 168 | 
             
                    _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
         | 
| 169 | 
            -
                    if len(msg) < 2: | 
|  | |
| 170 | 
             
                    ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf())
         | 
| 171 |  | 
| 172 | 
             
                    if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
         | 
| @@ -185,9 +195,11 @@ class Generate(ComponentBase): | |
| 185 | 
             
                        return
         | 
| 186 |  | 
| 187 | 
             
                    msg = self._canvas.get_history(self._param.message_history_window_size)
         | 
| 188 | 
            -
                    if len(msg) < 1: | 
|  | |
| 189 | 
             
                    _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
         | 
| 190 | 
            -
                    if len(msg) < 2: | 
|  | |
| 191 | 
             
                    answer = ""
         | 
| 192 | 
             
                    for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf()):
         | 
| 193 | 
             
                        res = {"content": ans, "reference": []}
         | 
|  | |
| 51 |  | 
| 52 | 
             
                def gen_conf(self):
         | 
| 53 | 
             
                    conf = {}
         | 
| 54 | 
            +
                    if self.max_tokens > 0:
         | 
| 55 | 
            +
                        conf["max_tokens"] = self.max_tokens
         | 
| 56 | 
            +
                    if self.temperature > 0:
         | 
| 57 | 
            +
                        conf["temperature"] = self.temperature
         | 
| 58 | 
            +
                    if self.top_p > 0:
         | 
| 59 | 
            +
                        conf["top_p"] = self.top_p
         | 
| 60 | 
            +
                    if self.presence_penalty > 0:
         | 
| 61 | 
            +
                        conf["presence_penalty"] = self.presence_penalty
         | 
| 62 | 
            +
                    if self.frequency_penalty > 0:
         | 
| 63 | 
            +
                        conf["frequency_penalty"] = self.frequency_penalty
         | 
| 64 | 
             
                    return conf
         | 
| 65 |  | 
| 66 |  | 
|  | |
| 88 | 
             
                    recall_docs = []
         | 
| 89 | 
             
                    for i in idx:
         | 
| 90 | 
             
                        did = retrieval_res.loc[int(i), "doc_id"]
         | 
| 91 | 
            +
                        if did in doc_ids:
         | 
| 92 | 
            +
                            continue
         | 
| 93 | 
             
                        doc_ids.add(did)
         | 
| 94 | 
             
                        recall_docs.append({"doc_id": did, "doc_name": retrieval_res.loc[int(i), "docnm_kwd"]})
         | 
| 95 |  | 
|  | |
| 114 | 
             
                    retrieval_res = []
         | 
| 115 | 
             
                    self._param.inputs = []
         | 
| 116 | 
             
                    for para in self._param.parameters:
         | 
| 117 | 
            +
                        if not para.get("component_id"):
         | 
| 118 | 
            +
                            continue
         | 
| 119 | 
             
                        component_id = para["component_id"].split("@")[0]
         | 
| 120 | 
             
                        if para["component_id"].lower().find("@") >= 0:
         | 
| 121 | 
             
                            cpn_id, key = para["component_id"].split("@")
         | 
|  | |
| 149 |  | 
| 150 | 
             
                    if retrieval_res:
         | 
| 151 | 
             
                        retrieval_res = pd.concat(retrieval_res, ignore_index=True)
         | 
| 152 | 
            +
                    else:
         | 
| 153 | 
            +
                        retrieval_res = pd.DataFrame([])
         | 
| 154 |  | 
| 155 | 
             
                    for n, v in kwargs.items():
         | 
| 156 | 
             
                        prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
         | 
|  | |
| 172 | 
             
                        return pd.DataFrame([res])
         | 
| 173 |  | 
| 174 | 
             
                    msg = self._canvas.get_history(self._param.message_history_window_size)
         | 
| 175 | 
            +
                    if len(msg) < 1:
         | 
| 176 | 
            +
                        msg.append({"role": "user", "content": ""})
         | 
| 177 | 
             
                    _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
         | 
| 178 | 
            +
                    if len(msg) < 2:
         | 
| 179 | 
            +
                        msg.append({"role": "user", "content": ""})
         | 
| 180 | 
             
                    ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf())
         | 
| 181 |  | 
| 182 | 
             
                    if self._param.cite and "content_ltks" in retrieval_res.columns and "vector" in retrieval_res.columns:
         | 
|  | |
| 195 | 
             
                        return
         | 
| 196 |  | 
| 197 | 
             
                    msg = self._canvas.get_history(self._param.message_history_window_size)
         | 
| 198 | 
            +
                    if len(msg) < 1:
         | 
| 199 | 
            +
                        msg.append({"role": "user", "content": ""})
         | 
| 200 | 
             
                    _, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(chat_mdl.max_length * 0.97))
         | 
| 201 | 
            +
                    if len(msg) < 2:
         | 
| 202 | 
            +
                        msg.append({"role": "user", "content": ""})
         | 
| 203 | 
             
                    answer = ""
         | 
| 204 | 
             
                    for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf()):
         | 
| 205 | 
             
                        res = {"content": ans, "reference": []}
         | 
    	
        agent/component/rewrite.py
    CHANGED
    
    | @@ -95,7 +95,8 @@ class RewriteQuestion(Generate, ABC): | |
| 95 | 
             
                    hist = self._canvas.get_history(4)
         | 
| 96 | 
             
                    conv = []
         | 
| 97 | 
             
                    for m in hist:
         | 
| 98 | 
            -
                        if m["role"] not in ["user", "assistant"]: | 
|  | |
| 99 | 
             
                        conv.append("{}: {}".format(m["role"].upper(), m["content"]))
         | 
| 100 | 
             
                    conv = "\n".join(conv)
         | 
| 101 |  | 
|  | |
| 95 | 
             
                    hist = self._canvas.get_history(4)
         | 
| 96 | 
             
                    conv = []
         | 
| 97 | 
             
                    for m in hist:
         | 
| 98 | 
            +
                        if m["role"] not in ["user", "assistant"]:
         | 
| 99 | 
            +
                            continue
         | 
| 100 | 
             
                        conv.append("{}: {}".format(m["role"].upper(), m["content"]))
         | 
| 101 | 
             
                    conv = "\n".join(conv)
         | 
| 102 |  | 
    	
        agent/component/switch.py
    CHANGED
    
    | @@ -41,7 +41,8 @@ class SwitchParam(ComponentParamBase): | |
| 41 | 
             
                def check(self):
         | 
| 42 | 
             
                    self.check_empty(self.conditions, "[Switch] conditions")
         | 
| 43 | 
             
                    for cond in self.conditions:
         | 
| 44 | 
            -
                        if not cond["to"]: | 
|  | |
| 45 |  | 
| 46 |  | 
| 47 | 
             
            class Switch(ComponentBase, ABC):
         | 
| @@ -51,7 +52,8 @@ class Switch(ComponentBase, ABC): | |
| 51 | 
             
                    res = []
         | 
| 52 | 
             
                    for cond in self._param.conditions:
         | 
| 53 | 
             
                        for item in cond["items"]:
         | 
| 54 | 
            -
                            if not item["cpn_id"]: | 
|  | |
| 55 | 
             
                            if item["cpn_id"].find("begin") >= 0:
         | 
| 56 | 
             
                                continue
         | 
| 57 | 
             
                            cid = item["cpn_id"].split("@")[0]
         | 
| @@ -63,7 +65,8 @@ class Switch(ComponentBase, ABC): | |
| 63 | 
             
                    for cond in self._param.conditions:
         | 
| 64 | 
             
                        res = []
         | 
| 65 | 
             
                        for item in cond["items"]:
         | 
| 66 | 
            -
                            if not item["cpn_id"]: | 
|  | |
| 67 | 
             
                            cid = item["cpn_id"].split("@")[0]
         | 
| 68 | 
             
                            if item["cpn_id"].find("@") > 0:
         | 
| 69 | 
             
                                cpn_id, key = item["cpn_id"].split("@")
         | 
| @@ -107,22 +110,22 @@ class Switch(ComponentBase, ABC): | |
| 107 | 
             
                    elif operator == ">":
         | 
| 108 | 
             
                        try:
         | 
| 109 | 
             
                            return True if float(input) > float(value) else False
         | 
| 110 | 
            -
                        except Exception | 
| 111 | 
             
                            return True if input > value else False
         | 
| 112 | 
             
                    elif operator == "<":
         | 
| 113 | 
             
                        try:
         | 
| 114 | 
             
                            return True if float(input) < float(value) else False
         | 
| 115 | 
            -
                        except Exception | 
| 116 | 
             
                            return True if input < value else False
         | 
| 117 | 
             
                    elif operator == "≥":
         | 
| 118 | 
             
                        try:
         | 
| 119 | 
             
                            return True if float(input) >= float(value) else False
         | 
| 120 | 
            -
                        except Exception | 
| 121 | 
             
                            return True if input >= value else False
         | 
| 122 | 
             
                    elif operator == "≤":
         | 
| 123 | 
             
                        try:
         | 
| 124 | 
             
                            return True if float(input) <= float(value) else False
         | 
| 125 | 
            -
                        except Exception | 
| 126 | 
             
                            return True if input <= value else False
         | 
| 127 |  | 
| 128 | 
             
                    raise ValueError('Not supported operator' + operator)
         | 
|  | |
| 41 | 
             
                def check(self):
         | 
| 42 | 
             
                    self.check_empty(self.conditions, "[Switch] conditions")
         | 
| 43 | 
             
                    for cond in self.conditions:
         | 
| 44 | 
            +
                        if not cond["to"]:
         | 
| 45 | 
            +
                            raise ValueError("[Switch] 'To' can not be empty!")
         | 
| 46 |  | 
| 47 |  | 
| 48 | 
             
            class Switch(ComponentBase, ABC):
         | 
|  | |
| 52 | 
             
                    res = []
         | 
| 53 | 
             
                    for cond in self._param.conditions:
         | 
| 54 | 
             
                        for item in cond["items"]:
         | 
| 55 | 
            +
                            if not item["cpn_id"]:
         | 
| 56 | 
            +
                                continue
         | 
| 57 | 
             
                            if item["cpn_id"].find("begin") >= 0:
         | 
| 58 | 
             
                                continue
         | 
| 59 | 
             
                            cid = item["cpn_id"].split("@")[0]
         | 
|  | |
| 65 | 
             
                    for cond in self._param.conditions:
         | 
| 66 | 
             
                        res = []
         | 
| 67 | 
             
                        for item in cond["items"]:
         | 
| 68 | 
            +
                            if not item["cpn_id"]:
         | 
| 69 | 
            +
                                continue
         | 
| 70 | 
             
                            cid = item["cpn_id"].split("@")[0]
         | 
| 71 | 
             
                            if item["cpn_id"].find("@") > 0:
         | 
| 72 | 
             
                                cpn_id, key = item["cpn_id"].split("@")
         | 
|  | |
| 110 | 
             
                    elif operator == ">":
         | 
| 111 | 
             
                        try:
         | 
| 112 | 
             
                            return True if float(input) > float(value) else False
         | 
| 113 | 
            +
                        except Exception:
         | 
| 114 | 
             
                            return True if input > value else False
         | 
| 115 | 
             
                    elif operator == "<":
         | 
| 116 | 
             
                        try:
         | 
| 117 | 
             
                            return True if float(input) < float(value) else False
         | 
| 118 | 
            +
                        except Exception:
         | 
| 119 | 
             
                            return True if input < value else False
         | 
| 120 | 
             
                    elif operator == "≥":
         | 
| 121 | 
             
                        try:
         | 
| 122 | 
             
                            return True if float(input) >= float(value) else False
         | 
| 123 | 
            +
                        except Exception:
         | 
| 124 | 
             
                            return True if input >= value else False
         | 
| 125 | 
             
                    elif operator == "≤":
         | 
| 126 | 
             
                        try:
         | 
| 127 | 
             
                            return True if float(input) <= float(value) else False
         | 
| 128 | 
            +
                        except Exception:
         | 
| 129 | 
             
                            return True if input <= value else False
         | 
| 130 |  | 
| 131 | 
             
                    raise ValueError('Not supported operator' + operator)
         | 
    	
        agent/component/template.py
    CHANGED
    
    | @@ -47,7 +47,8 @@ class Template(ComponentBase): | |
| 47 |  | 
| 48 | 
             
                    self._param.inputs = []
         | 
| 49 | 
             
                    for para in self._param.parameters:
         | 
| 50 | 
            -
                        if not para.get("component_id"): | 
|  | |
| 51 | 
             
                        component_id = para["component_id"].split("@")[0]
         | 
| 52 | 
             
                        if para["component_id"].lower().find("@") >= 0:
         | 
| 53 | 
             
                            cpn_id, key = para["component_id"].split("@")
         | 
|  | |
| 47 |  | 
| 48 | 
             
                    self._param.inputs = []
         | 
| 49 | 
             
                    for para in self._param.parameters:
         | 
| 50 | 
            +
                        if not para.get("component_id"):
         | 
| 51 | 
            +
                            continue
         | 
| 52 | 
             
                        component_id = para["component_id"].split("@")[0]
         | 
| 53 | 
             
                        if para["component_id"].lower().find("@") >= 0:
         | 
| 54 | 
             
                            cpn_id, key = para["component_id"].split("@")
         | 
    	
        agent/test/client.py
    CHANGED
    
    | @@ -43,6 +43,7 @@ if __name__ == '__main__': | |
| 43 | 
             
                    else:
         | 
| 44 | 
             
                        print(ans["content"])
         | 
| 45 |  | 
| 46 | 
            -
                    if DEBUG: | 
|  | |
| 47 | 
             
                    question = input("\n==================== User =====================\n> ")
         | 
| 48 | 
             
                    canvas.add_user_input(question)
         | 
|  | |
| 43 | 
             
                    else:
         | 
| 44 | 
             
                        print(ans["content"])
         | 
| 45 |  | 
| 46 | 
            +
                    if DEBUG:
         | 
| 47 | 
            +
                        print(canvas.path)
         | 
| 48 | 
             
                    question = input("\n==================== User =====================\n> ")
         | 
| 49 | 
             
                    canvas.add_user_input(question)
         | 
    	
        api/apps/api_app.py
    CHANGED
    
    | @@ -142,7 +142,6 @@ def set_conversation(): | |
| 142 | 
             
                if not objs:
         | 
| 143 | 
             
                    return get_json_result(
         | 
| 144 | 
             
                        data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
         | 
| 145 | 
            -
                req = request.json
         | 
| 146 | 
             
                try:
         | 
| 147 | 
             
                    if objs[0].source == "agent":
         | 
| 148 | 
             
                        e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
         | 
| @@ -188,7 +187,8 @@ def completion(): | |
| 188 | 
             
                e, conv = API4ConversationService.get_by_id(req["conversation_id"])
         | 
| 189 | 
             
                if not e:
         | 
| 190 | 
             
                    return get_data_error_result(message="Conversation not found!")
         | 
| 191 | 
            -
                if "quote" not in req: | 
|  | |
| 192 |  | 
| 193 | 
             
                msg = []
         | 
| 194 | 
             
                for m in req["messages"]:
         | 
| @@ -197,7 +197,8 @@ def completion(): | |
| 197 | 
             
                    if m["role"] == "assistant" and not msg:
         | 
| 198 | 
             
                        continue
         | 
| 199 | 
             
                    msg.append(m)
         | 
| 200 | 
            -
                if not msg[-1].get("id"): | 
|  | |
| 201 | 
             
                message_id = msg[-1]["id"]
         | 
| 202 |  | 
| 203 | 
             
                def fillin_conv(ans):
         | 
| @@ -674,11 +675,13 @@ def completion_faq(): | |
| 674 | 
             
                e, conv = API4ConversationService.get_by_id(req["conversation_id"])
         | 
| 675 | 
             
                if not e:
         | 
| 676 | 
             
                    return get_data_error_result(message="Conversation not found!")
         | 
| 677 | 
            -
                if "quote" not in req: | 
|  | |
| 678 |  | 
| 679 | 
             
                msg = []
         | 
| 680 | 
             
                msg.append({"role": "user", "content": req["word"]})
         | 
| 681 | 
            -
                if not msg[-1].get("id"): | 
|  | |
| 682 | 
             
                message_id = msg[-1]["id"]
         | 
| 683 |  | 
| 684 | 
             
                def fillin_conv(ans):
         | 
|  | |
| 142 | 
             
                if not objs:
         | 
| 143 | 
             
                    return get_json_result(
         | 
| 144 | 
             
                        data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR)
         | 
|  | |
| 145 | 
             
                try:
         | 
| 146 | 
             
                    if objs[0].source == "agent":
         | 
| 147 | 
             
                        e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id)
         | 
|  | |
| 187 | 
             
                e, conv = API4ConversationService.get_by_id(req["conversation_id"])
         | 
| 188 | 
             
                if not e:
         | 
| 189 | 
             
                    return get_data_error_result(message="Conversation not found!")
         | 
| 190 | 
            +
                if "quote" not in req:
         | 
| 191 | 
            +
                    req["quote"] = False
         | 
| 192 |  | 
| 193 | 
             
                msg = []
         | 
| 194 | 
             
                for m in req["messages"]:
         | 
|  | |
| 197 | 
             
                    if m["role"] == "assistant" and not msg:
         | 
| 198 | 
             
                        continue
         | 
| 199 | 
             
                    msg.append(m)
         | 
| 200 | 
            +
                if not msg[-1].get("id"):
         | 
| 201 | 
            +
                    msg[-1]["id"] = get_uuid()
         | 
| 202 | 
             
                message_id = msg[-1]["id"]
         | 
| 203 |  | 
| 204 | 
             
                def fillin_conv(ans):
         | 
|  | |
| 675 | 
             
                e, conv = API4ConversationService.get_by_id(req["conversation_id"])
         | 
| 676 | 
             
                if not e:
         | 
| 677 | 
             
                    return get_data_error_result(message="Conversation not found!")
         | 
| 678 | 
            +
                if "quote" not in req:
         | 
| 679 | 
            +
                    req["quote"] = True
         | 
| 680 |  | 
| 681 | 
             
                msg = []
         | 
| 682 | 
             
                msg.append({"role": "user", "content": req["word"]})
         | 
| 683 | 
            +
                if not msg[-1].get("id"):
         | 
| 684 | 
            +
                    msg[-1]["id"] = get_uuid()
         | 
| 685 | 
             
                message_id = msg[-1]["id"]
         | 
| 686 |  | 
| 687 | 
             
                def fillin_conv(ans):
         | 
    	
        api/apps/canvas_app.py
    CHANGED
    
    | @@ -13,10 +13,8 @@ | |
| 13 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
| 16 | 
            -
            import logging
         | 
| 17 | 
             
            import json
         | 
| 18 | 
             
            import traceback
         | 
| 19 | 
            -
            from functools import partial
         | 
| 20 | 
             
            from flask import request, Response
         | 
| 21 | 
             
            from flask_login import login_required, current_user
         | 
| 22 | 
             
            from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
         | 
| @@ -60,7 +58,8 @@ def rm(): | |
| 60 | 
             
            def save():
         | 
| 61 | 
             
                req = request.json
         | 
| 62 | 
             
                req["user_id"] = current_user.id
         | 
| 63 | 
            -
                if not isinstance(req["dsl"], str): | 
|  | |
| 64 |  | 
| 65 | 
             
                req["dsl"] = json.loads(req["dsl"])
         | 
| 66 | 
             
                if "id" not in req:
         | 
| @@ -153,7 +152,8 @@ def run(): | |
| 153 | 
             
                    return resp
         | 
| 154 |  | 
| 155 | 
             
                for answer in canvas.run(stream=False):
         | 
| 156 | 
            -
                    if answer.get("running_status"): | 
|  | |
| 157 | 
             
                    final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
         | 
| 158 | 
             
                    canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
         | 
| 159 | 
             
                    if final_ans.get("reference"):
         | 
|  | |
| 13 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
|  | |
| 16 | 
             
            import json
         | 
| 17 | 
             
            import traceback
         | 
|  | |
| 18 | 
             
            from flask import request, Response
         | 
| 19 | 
             
            from flask_login import login_required, current_user
         | 
| 20 | 
             
            from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
         | 
|  | |
| 58 | 
             
            def save():
         | 
| 59 | 
             
                req = request.json
         | 
| 60 | 
             
                req["user_id"] = current_user.id
         | 
| 61 | 
            +
                if not isinstance(req["dsl"], str):
         | 
| 62 | 
            +
                    req["dsl"] = json.dumps(req["dsl"], ensure_ascii=False)
         | 
| 63 |  | 
| 64 | 
             
                req["dsl"] = json.loads(req["dsl"])
         | 
| 65 | 
             
                if "id" not in req:
         | 
|  | |
| 152 | 
             
                    return resp
         | 
| 153 |  | 
| 154 | 
             
                for answer in canvas.run(stream=False):
         | 
| 155 | 
            +
                    if answer.get("running_status"):
         | 
| 156 | 
            +
                        continue
         | 
| 157 | 
             
                    final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
         | 
| 158 | 
             
                    canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
         | 
| 159 | 
             
                    if final_ans.get("reference"):
         | 
    	
        api/apps/chunk_app.py
    CHANGED
    
    | @@ -237,7 +237,8 @@ def create(): | |
| 237 | 
             
                    e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
         | 
| 238 | 
             
                    if not e:
         | 
| 239 | 
             
                        return get_data_error_result(message="Knowledgebase not found!")
         | 
| 240 | 
            -
                    if kb.pagerank: | 
|  | |
| 241 |  | 
| 242 | 
             
                    embd_id = DocumentService.get_embd_id(req["doc_id"])
         | 
| 243 | 
             
                    embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
         | 
|  | |
| 237 | 
             
                    e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
         | 
| 238 | 
             
                    if not e:
         | 
| 239 | 
             
                        return get_data_error_result(message="Knowledgebase not found!")
         | 
| 240 | 
            +
                    if kb.pagerank:
         | 
| 241 | 
            +
                        d["pagerank_fea"] = kb.pagerank
         | 
| 242 |  | 
| 243 | 
             
                    embd_id = DocumentService.get_embd_id(req["doc_id"])
         | 
| 244 | 
             
                    embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
         | 
    	
        api/apps/conversation_app.py
    CHANGED
    
    | @@ -281,10 +281,12 @@ def thumbup(): | |
| 281 | 
             
                    if req["message_id"] == msg.get("id", "") and msg.get("role", "") == "assistant":
         | 
| 282 | 
             
                        if up_down:
         | 
| 283 | 
             
                            msg["thumbup"] = True
         | 
| 284 | 
            -
                            if "feedback" in msg: | 
|  | |
| 285 | 
             
                        else:
         | 
| 286 | 
             
                            msg["thumbup"] = False
         | 
| 287 | 
            -
                            if feedback: | 
|  | |
| 288 | 
             
                        break
         | 
| 289 |  | 
| 290 | 
             
                ConversationService.update_by_id(conv["id"], conv)
         | 
|  | |
| 281 | 
             
                    if req["message_id"] == msg.get("id", "") and msg.get("role", "") == "assistant":
         | 
| 282 | 
             
                        if up_down:
         | 
| 283 | 
             
                            msg["thumbup"] = True
         | 
| 284 | 
            +
                            if "feedback" in msg:
         | 
| 285 | 
            +
                                del msg["feedback"]
         | 
| 286 | 
             
                        else:
         | 
| 287 | 
             
                            msg["thumbup"] = False
         | 
| 288 | 
            +
                            if feedback:
         | 
| 289 | 
            +
                                msg["feedback"] = feedback
         | 
| 290 | 
             
                        break
         | 
| 291 |  | 
| 292 | 
             
                ConversationService.update_by_id(conv["id"], conv)
         | 
    	
        api/apps/dialog_app.py
    CHANGED
    
    | @@ -37,10 +37,12 @@ def set_dialog(): | |
| 37 | 
             
                top_n = req.get("top_n", 6)
         | 
| 38 | 
             
                top_k = req.get("top_k", 1024)
         | 
| 39 | 
             
                rerank_id = req.get("rerank_id", "")
         | 
| 40 | 
            -
                if not rerank_id: | 
|  | |
| 41 | 
             
                similarity_threshold = req.get("similarity_threshold", 0.1)
         | 
| 42 | 
             
                vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
         | 
| 43 | 
            -
                if vector_similarity_weight is None: | 
|  | |
| 44 | 
             
                llm_setting = req.get("llm_setting", {})
         | 
| 45 | 
             
                default_prompt = {
         | 
| 46 | 
             
                    "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
         | 
|  | |
| 37 | 
             
                top_n = req.get("top_n", 6)
         | 
| 38 | 
             
                top_k = req.get("top_k", 1024)
         | 
| 39 | 
             
                rerank_id = req.get("rerank_id", "")
         | 
| 40 | 
            +
                if not rerank_id:
         | 
| 41 | 
            +
                    req["rerank_id"] = ""
         | 
| 42 | 
             
                similarity_threshold = req.get("similarity_threshold", 0.1)
         | 
| 43 | 
             
                vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
         | 
| 44 | 
            +
                if vector_similarity_weight is None:
         | 
| 45 | 
            +
                    vector_similarity_weight = 0.3
         | 
| 46 | 
             
                llm_setting = req.get("llm_setting", {})
         | 
| 47 | 
             
                default_prompt = {
         | 
| 48 | 
             
                    "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
         | 
    	
        api/apps/document_app.py
    CHANGED
    
    | @@ -13,7 +13,6 @@ | |
| 13 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
             
            #  limitations under the License
         | 
| 15 | 
             
            #
         | 
| 16 | 
            -
            import json
         | 
| 17 | 
             
            import os.path
         | 
| 18 | 
             
            import pathlib
         | 
| 19 | 
             
            import re
         | 
| @@ -90,7 +89,8 @@ def web_crawl(): | |
| 90 | 
             
                    raise LookupError("Can't find this knowledgebase!")
         | 
| 91 |  | 
| 92 | 
             
                blob = html2pdf(url)
         | 
| 93 | 
            -
                if not blob: | 
|  | |
| 94 |  | 
| 95 | 
             
                root_folder = FileService.get_root_folder(current_user.id)
         | 
| 96 | 
             
                pf_id = root_folder["id"]
         | 
| @@ -290,7 +290,8 @@ def change_status(): | |
| 290 | 
             
            def rm():
         | 
| 291 | 
             
                req = request.json
         | 
| 292 | 
             
                doc_ids = req["doc_id"]
         | 
| 293 | 
            -
                if isinstance(doc_ids, str): | 
|  | |
| 294 |  | 
| 295 | 
             
                for doc_id in doc_ids:
         | 
| 296 | 
             
                    if not DocumentService.accessible4deletion(doc_id, current_user.id):
         | 
|  | |
| 13 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
             
            #  limitations under the License
         | 
| 15 | 
             
            #
         | 
|  | |
| 16 | 
             
            import os.path
         | 
| 17 | 
             
            import pathlib
         | 
| 18 | 
             
            import re
         | 
|  | |
| 89 | 
             
                    raise LookupError("Can't find this knowledgebase!")
         | 
| 90 |  | 
| 91 | 
             
                blob = html2pdf(url)
         | 
| 92 | 
            +
                if not blob:
         | 
| 93 | 
            +
                    return server_error_response(ValueError("Download failure."))
         | 
| 94 |  | 
| 95 | 
             
                root_folder = FileService.get_root_folder(current_user.id)
         | 
| 96 | 
             
                pf_id = root_folder["id"]
         | 
|  | |
| 290 | 
             
            def rm():
         | 
| 291 | 
             
                req = request.json
         | 
| 292 | 
             
                doc_ids = req["doc_id"]
         | 
| 293 | 
            +
                if isinstance(doc_ids, str):
         | 
| 294 | 
            +
                    doc_ids = [doc_ids]
         | 
| 295 |  | 
| 296 | 
             
                for doc_id in doc_ids:
         | 
| 297 | 
             
                    if not DocumentService.accessible4deletion(doc_id, current_user.id):
         | 
    	
        api/apps/llm_app.py
    CHANGED
    
    | @@ -351,8 +351,10 @@ def list_app(): | |
| 351 |  | 
| 352 | 
             
                    llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
         | 
| 353 | 
             
                    for o in objs:
         | 
| 354 | 
            -
                        if not o.api_key: | 
| 355 | 
            -
             | 
|  | |
|  | |
| 356 | 
             
                        llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
         | 
| 357 |  | 
| 358 | 
             
                    res = {}
         | 
|  | |
| 351 |  | 
| 352 | 
             
                    llm_set = set([m["llm_name"] + "@" + m["fid"] for m in llms])
         | 
| 353 | 
             
                    for o in objs:
         | 
| 354 | 
            +
                        if not o.api_key:
         | 
| 355 | 
            +
                            continue
         | 
| 356 | 
            +
                        if o.llm_name + "@" + o.llm_factory in llm_set:
         | 
| 357 | 
            +
                            continue
         | 
| 358 | 
             
                        llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
         | 
| 359 |  | 
| 360 | 
             
                    res = {}
         | 
    	
        api/apps/sdk/agent.py
    CHANGED
    
    | @@ -14,7 +14,7 @@ | |
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
| 16 |  | 
| 17 | 
            -
            from api.db.services.canvas_service import  | 
| 18 | 
             
            from api.utils.api_utils import get_error_data_result, token_required
         | 
| 19 | 
             
            from api.utils.api_utils import get_result
         | 
| 20 | 
             
            from flask import request
         | 
|  | |
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
| 16 |  | 
| 17 | 
            +
            from api.db.services.canvas_service import UserCanvasService
         | 
| 18 | 
             
            from api.utils.api_utils import get_error_data_result, token_required
         | 
| 19 | 
             
            from api.utils.api_utils import get_result
         | 
| 20 | 
             
            from flask import request
         | 
    	
        api/apps/sdk/doc.py
    CHANGED
    
    | @@ -41,7 +41,6 @@ from api.utils.api_utils import construct_json_result, get_parser_config | |
| 41 | 
             
            from rag.nlp import search
         | 
| 42 | 
             
            from rag.utils import rmSpace
         | 
| 43 | 
             
            from rag.utils.storage_factory import STORAGE_IMPL
         | 
| 44 | 
            -
            import os
         | 
| 45 |  | 
| 46 | 
             
            MAXIMUM_OF_UPLOADING_FILES = 256
         | 
| 47 |  | 
| @@ -976,12 +975,12 @@ def add_chunk(tenant_id, dataset_id, document_id): | |
| 976 | 
             
                if not req.get("content"):
         | 
| 977 | 
             
                    return get_error_data_result(message="`content` is required")
         | 
| 978 | 
             
                if "important_keywords" in req:
         | 
| 979 | 
            -
                    if  | 
| 980 | 
             
                        return get_error_data_result(
         | 
| 981 | 
             
                            "`important_keywords` is required to be a list"
         | 
| 982 | 
             
                        )
         | 
| 983 | 
             
                if "questions" in req:
         | 
| 984 | 
            -
                    if  | 
| 985 | 
             
                        return get_error_data_result(
         | 
| 986 | 
             
                            "`questions` is required to be a list"
         | 
| 987 | 
             
                        )
         | 
|  | |
| 41 | 
             
            from rag.nlp import search
         | 
| 42 | 
             
            from rag.utils import rmSpace
         | 
| 43 | 
             
            from rag.utils.storage_factory import STORAGE_IMPL
         | 
|  | |
| 44 |  | 
| 45 | 
             
            MAXIMUM_OF_UPLOADING_FILES = 256
         | 
| 46 |  | 
|  | |
| 975 | 
             
                if not req.get("content"):
         | 
| 976 | 
             
                    return get_error_data_result(message="`content` is required")
         | 
| 977 | 
             
                if "important_keywords" in req:
         | 
| 978 | 
            +
                    if not isinstance(req["important_keywords"], list):
         | 
| 979 | 
             
                        return get_error_data_result(
         | 
| 980 | 
             
                            "`important_keywords` is required to be a list"
         | 
| 981 | 
             
                        )
         | 
| 982 | 
             
                if "questions" in req:
         | 
| 983 | 
            +
                    if not isinstance(req["questions"], list):
         | 
| 984 | 
             
                        return get_error_data_result(
         | 
| 985 | 
             
                            "`questions` is required to be a list"
         | 
| 986 | 
             
                        )
         | 
    	
        api/apps/sdk/session.py
    CHANGED
    
    | @@ -143,8 +143,10 @@ def completion(tenant_id, chat_id): | |
| 143 | 
             
                }
         | 
| 144 | 
             
                conv.message.append(question)
         | 
| 145 | 
             
                for m in conv.message:
         | 
| 146 | 
            -
                    if m["role"] == "system": | 
| 147 | 
            -
             | 
|  | |
|  | |
| 148 | 
             
                    msg.append(m)
         | 
| 149 | 
             
                message_id = msg[-1].get("id")
         | 
| 150 | 
             
                e, dia = DialogService.get_by_id(conv.dialog_id)
         | 
| @@ -267,7 +269,8 @@ def agent_completion(tenant_id, agent_id): | |
| 267 | 
             
                    if m["role"] == "assistant" and not msg:
         | 
| 268 | 
             
                        continue
         | 
| 269 | 
             
                    msg.append(m)
         | 
| 270 | 
            -
                if not msg[-1].get("id"): | 
|  | |
| 271 | 
             
                message_id = msg[-1]["id"]
         | 
| 272 |  | 
| 273 | 
             
                stream = req.get("stream", True)
         | 
| @@ -361,7 +364,8 @@ def agent_completion(tenant_id, agent_id): | |
| 361 | 
             
                    return resp
         | 
| 362 |  | 
| 363 | 
             
                for answer in canvas.run(stream=False):
         | 
| 364 | 
            -
                    if answer.get("running_status"): | 
|  | |
| 365 | 
             
                    final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
         | 
| 366 | 
             
                    canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
         | 
| 367 | 
             
                    if final_ans.get("reference"):
         | 
|  | |
| 143 | 
             
                }
         | 
| 144 | 
             
                conv.message.append(question)
         | 
| 145 | 
             
                for m in conv.message:
         | 
| 146 | 
            +
                    if m["role"] == "system":
         | 
| 147 | 
            +
                        continue
         | 
| 148 | 
            +
                    if m["role"] == "assistant" and not msg:
         | 
| 149 | 
            +
                        continue
         | 
| 150 | 
             
                    msg.append(m)
         | 
| 151 | 
             
                message_id = msg[-1].get("id")
         | 
| 152 | 
             
                e, dia = DialogService.get_by_id(conv.dialog_id)
         | 
|  | |
| 269 | 
             
                    if m["role"] == "assistant" and not msg:
         | 
| 270 | 
             
                        continue
         | 
| 271 | 
             
                    msg.append(m)
         | 
| 272 | 
            +
                if not msg[-1].get("id"):
         | 
| 273 | 
            +
                    msg[-1]["id"] = get_uuid()
         | 
| 274 | 
             
                message_id = msg[-1]["id"]
         | 
| 275 |  | 
| 276 | 
             
                stream = req.get("stream", True)
         | 
|  | |
| 364 | 
             
                    return resp
         | 
| 365 |  | 
| 366 | 
             
                for answer in canvas.run(stream=False):
         | 
| 367 | 
            +
                    if answer.get("running_status"):
         | 
| 368 | 
            +
                        continue
         | 
| 369 | 
             
                    final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
         | 
| 370 | 
             
                    canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
         | 
| 371 | 
             
                    if final_ans.get("reference"):
         | 
    	
        api/apps/user_app.py
    CHANGED
    
    | @@ -330,7 +330,7 @@ def user_info_from_github(access_token): | |
| 330 | 
             
                    headers=headers,
         | 
| 331 | 
             
                ).json()
         | 
| 332 | 
             
                user_info["email"] = next(
         | 
| 333 | 
            -
                    (email for email in email_info if email["primary"] | 
| 334 | 
             
                )["email"]
         | 
| 335 | 
             
                return user_info
         | 
| 336 |  | 
|  | |
| 330 | 
             
                    headers=headers,
         | 
| 331 | 
             
                ).json()
         | 
| 332 | 
             
                user_info["email"] = next(
         | 
| 333 | 
            +
                    (email for email in email_info if email["primary"]), None
         | 
| 334 | 
             
                )["email"]
         | 
| 335 | 
             
                return user_info
         | 
| 336 |  | 
    	
        api/db/db_models.py
    CHANGED
    
    | @@ -130,7 +130,7 @@ def is_continuous_field(cls: typing.Type) -> bool: | |
| 130 | 
             
                for p in cls.__bases__:
         | 
| 131 | 
             
                    if p in CONTINUOUS_FIELD_TYPE:
         | 
| 132 | 
             
                        return True
         | 
| 133 | 
            -
                    elif p  | 
| 134 | 
             
                        if is_continuous_field(p):
         | 
| 135 | 
             
                            return True
         | 
| 136 | 
             
                else:
         | 
|  | |
| 130 | 
             
                for p in cls.__bases__:
         | 
| 131 | 
             
                    if p in CONTINUOUS_FIELD_TYPE:
         | 
| 132 | 
             
                        return True
         | 
| 133 | 
            +
                    elif p is not Field and p is not object:
         | 
| 134 | 
             
                        if is_continuous_field(p):
         | 
| 135 | 
             
                            return True
         | 
| 136 | 
             
                else:
         | 
    	
        api/db/init_data.py
    CHANGED
    
    | @@ -170,7 +170,7 @@ def add_graph_templates(): | |
| 170 | 
             
                        cnvs = json.load(open(os.path.join(dir, fnm), "r"))
         | 
| 171 | 
             
                        try:
         | 
| 172 | 
             
                            CanvasTemplateService.save(**cnvs)
         | 
| 173 | 
            -
                        except:
         | 
| 174 | 
             
                            CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
         | 
| 175 | 
             
                    except Exception:
         | 
| 176 | 
             
                        logging.exception("Add graph templates error: ")
         | 
|  | |
| 170 | 
             
                        cnvs = json.load(open(os.path.join(dir, fnm), "r"))
         | 
| 171 | 
             
                        try:
         | 
| 172 | 
             
                            CanvasTemplateService.save(**cnvs)
         | 
| 173 | 
            +
                        except Exception:
         | 
| 174 | 
             
                            CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
         | 
| 175 | 
             
                    except Exception:
         | 
| 176 | 
             
                        logging.exception("Add graph templates error: ")
         | 
    	
        api/db/services/__init__.py
    CHANGED
    
    | @@ -15,13 +15,14 @@ | |
| 15 | 
             
            #
         | 
| 16 | 
             
            import pathlib
         | 
| 17 | 
             
            import re
         | 
| 18 | 
            -
            from .user_service import UserService
         | 
| 19 |  | 
| 20 |  | 
| 21 | 
             
            def duplicate_name(query_func, **kwargs):
         | 
| 22 | 
             
                fnm = kwargs["name"]
         | 
| 23 | 
             
                objs = query_func(**kwargs)
         | 
| 24 | 
            -
                if not objs: | 
|  | |
| 25 | 
             
                ext = pathlib.Path(fnm).suffix #.jpg
         | 
| 26 | 
             
                nm = re.sub(r"%s$"%ext, "", fnm)
         | 
| 27 | 
             
                r = re.search(r"\(([0-9]+)\)$", nm)
         | 
| @@ -31,8 +32,8 @@ def duplicate_name(query_func, **kwargs): | |
| 31 | 
             
                    nm = re.sub(r"\([0-9]+\)$", "", nm)
         | 
| 32 | 
             
                c += 1
         | 
| 33 | 
             
                nm = f"{nm}({c})"
         | 
| 34 | 
            -
                if ext: | 
|  | |
| 35 |  | 
| 36 | 
             
                kwargs["name"] = nm
         | 
| 37 | 
             
                return duplicate_name(query_func, **kwargs)
         | 
| 38 | 
            -
             | 
|  | |
| 15 | 
             
            #
         | 
| 16 | 
             
            import pathlib
         | 
| 17 | 
             
            import re
         | 
| 18 | 
            +
            from .user_service import UserService as UserService
         | 
| 19 |  | 
| 20 |  | 
| 21 | 
             
            def duplicate_name(query_func, **kwargs):
         | 
| 22 | 
             
                fnm = kwargs["name"]
         | 
| 23 | 
             
                objs = query_func(**kwargs)
         | 
| 24 | 
            +
                if not objs:
         | 
| 25 | 
            +
                    return fnm
         | 
| 26 | 
             
                ext = pathlib.Path(fnm).suffix #.jpg
         | 
| 27 | 
             
                nm = re.sub(r"%s$"%ext, "", fnm)
         | 
| 28 | 
             
                r = re.search(r"\(([0-9]+)\)$", nm)
         | 
|  | |
| 32 | 
             
                    nm = re.sub(r"\([0-9]+\)$", "", nm)
         | 
| 33 | 
             
                c += 1
         | 
| 34 | 
             
                nm = f"{nm}({c})"
         | 
| 35 | 
            +
                if ext:
         | 
| 36 | 
            +
                    nm += f"{ext}"
         | 
| 37 |  | 
| 38 | 
             
                kwargs["name"] = nm
         | 
| 39 | 
             
                return duplicate_name(query_func, **kwargs)
         | 
|  | 
    	
        api/db/services/api_service.py
    CHANGED
    
    | @@ -64,7 +64,8 @@ class API4ConversationService(CommonService): | |
| 64 | 
             
                @classmethod
         | 
| 65 | 
             
                @DB.connection_context()
         | 
| 66 | 
             
                def stats(cls, tenant_id, from_date, to_date, source=None):
         | 
| 67 | 
            -
                    if len(to_date) == 10: | 
|  | |
| 68 | 
             
                    return cls.model.select(
         | 
| 69 | 
             
                        cls.model.create_date.truncate("day").alias("dt"),
         | 
| 70 | 
             
                        peewee.fn.COUNT(
         | 
|  | |
| 64 | 
             
                @classmethod
         | 
| 65 | 
             
                @DB.connection_context()
         | 
| 66 | 
             
                def stats(cls, tenant_id, from_date, to_date, source=None):
         | 
| 67 | 
            +
                    if len(to_date) == 10:
         | 
| 68 | 
            +
                        to_date += " 23:59:59"
         | 
| 69 | 
             
                    return cls.model.select(
         | 
| 70 | 
             
                        cls.model.create_date.truncate("day").alias("dt"),
         | 
| 71 | 
             
                        peewee.fn.COUNT(
         | 
    	
        api/db/services/canvas_service.py
    CHANGED
    
    | @@ -13,9 +13,7 @@ | |
| 13 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
| 16 | 
            -
            from  | 
| 17 | 
            -
            import peewee
         | 
| 18 | 
            -
            from api.db.db_models import DB, API4Conversation, APIToken, Dialog, CanvasTemplate, UserCanvas
         | 
| 19 | 
             
            from api.db.services.common_service import CommonService
         | 
| 20 |  | 
| 21 |  | 
|  | |
| 13 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 14 | 
             
            #  limitations under the License.
         | 
| 15 | 
             
            #
         | 
| 16 | 
            +
            from api.db.db_models import DB, CanvasTemplate, UserCanvas
         | 
|  | |
|  | |
| 17 | 
             
            from api.db.services.common_service import CommonService
         | 
| 18 |  | 
| 19 |  | 
    	
        api/db/services/common_service.py
    CHANGED
    
    | @@ -115,7 +115,7 @@ class CommonService: | |
| 115 | 
             
                    try:
         | 
| 116 | 
             
                        obj = cls.model.query(id=pid)[0]
         | 
| 117 | 
             
                        return True, obj
         | 
| 118 | 
            -
                    except Exception | 
| 119 | 
             
                        return False, None
         | 
| 120 |  | 
| 121 | 
             
                @classmethod
         | 
|  | |
| 115 | 
             
                    try:
         | 
| 116 | 
             
                        obj = cls.model.query(id=pid)[0]
         | 
| 117 | 
             
                        return True, obj
         | 
| 118 | 
            +
                    except Exception:
         | 
| 119 | 
             
                        return False, None
         | 
| 120 |  | 
| 121 | 
             
                @classmethod
         | 
    	
        api/db/services/dialog_service.py
    CHANGED
    
    | @@ -106,15 +106,15 @@ def message_fit_in(msg, max_length=4000): | |
| 106 | 
             
                    return c, msg
         | 
| 107 |  | 
| 108 | 
             
                ll = num_tokens_from_string(msg_[0]["content"])
         | 
| 109 | 
            -
                 | 
| 110 | 
            -
                if ll / (ll +  | 
| 111 | 
             
                    m = msg_[0]["content"]
         | 
| 112 | 
            -
                    m = encoder.decode(encoder.encode(m)[:max_length -  | 
| 113 | 
             
                    msg[0]["content"] = m
         | 
| 114 | 
             
                    return max_length, msg
         | 
| 115 |  | 
| 116 | 
             
                m = msg_[1]["content"]
         | 
| 117 | 
            -
                m = encoder.decode(encoder.encode(m)[:max_length -  | 
| 118 | 
             
                msg[1]["content"] = m
         | 
| 119 | 
             
                return max_length, msg
         | 
| 120 |  | 
| @@ -257,7 +257,8 @@ def chat(dialog, messages, stream=True, **kwargs): | |
| 257 | 
             
                        idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
         | 
| 258 | 
             
                        recall_docs = [
         | 
| 259 | 
             
                            d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
         | 
| 260 | 
            -
                        if not recall_docs: | 
|  | |
| 261 | 
             
                        kbinfos["doc_aggs"] = recall_docs
         | 
| 262 |  | 
| 263 | 
             
                        refs = deepcopy(kbinfos)
         | 
| @@ -433,13 +434,15 @@ def relevant(tenant_id, llm_id, question, contents: list): | |
| 433 | 
             
                    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
         | 
| 434 | 
             
                    No other words needed except 'yes' or 'no'.
         | 
| 435 | 
             
                """
         | 
| 436 | 
            -
                if not contents: | 
|  | |
| 437 | 
             
                contents = "Documents: \n" + "   - ".join(contents)
         | 
| 438 | 
             
                contents = f"Question: {question}\n" + contents
         | 
| 439 | 
             
                if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
         | 
| 440 | 
             
                    contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
         | 
| 441 | 
             
                ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
         | 
| 442 | 
            -
                if ans.lower().find("yes") >= 0: | 
|  | |
| 443 | 
             
                return False
         | 
| 444 |  | 
| 445 |  | 
| @@ -481,8 +484,10 @@ Requirements: | |
| 481 | 
             
                ]
         | 
| 482 | 
             
                _, msg = message_fit_in(msg, chat_mdl.max_length)
         | 
| 483 | 
             
                kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
         | 
| 484 | 
            -
                if isinstance(kwd, tuple): | 
| 485 | 
            -
             | 
|  | |
|  | |
| 486 | 
             
                return kwd
         | 
| 487 |  | 
| 488 |  | 
| @@ -508,8 +513,10 @@ Requirements: | |
| 508 | 
             
                ]
         | 
| 509 | 
             
                _, msg = message_fit_in(msg, chat_mdl.max_length)
         | 
| 510 | 
             
                kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
         | 
| 511 | 
            -
                if isinstance(kwd, tuple): | 
| 512 | 
            -
             | 
|  | |
|  | |
| 513 | 
             
                return kwd
         | 
| 514 |  | 
| 515 |  | 
| @@ -520,7 +527,8 @@ def full_question(tenant_id, llm_id, messages): | |
| 520 | 
             
                    chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
         | 
| 521 | 
             
                conv = []
         | 
| 522 | 
             
                for m in messages:
         | 
| 523 | 
            -
                    if m["role"] not in ["user", "assistant"]: | 
|  | |
| 524 | 
             
                    conv.append("{}: {}".format(m["role"].upper(), m["content"]))
         | 
| 525 | 
             
                conv = "\n".join(conv)
         | 
| 526 | 
             
                today = datetime.date.today().isoformat()
         | 
| @@ -581,7 +589,8 @@ Output: What's the weather in Rochester on {tomorrow}? | |
| 581 |  | 
| 582 |  | 
| 583 | 
             
            def tts(tts_mdl, text):
         | 
| 584 | 
            -
                if not tts_mdl or not text: | 
|  | |
| 585 | 
             
                bin = b""
         | 
| 586 | 
             
                for chunk in tts_mdl.tts(text):
         | 
| 587 | 
             
                    bin += chunk
         | 
| @@ -641,7 +650,8 @@ def ask(question, kb_ids, tenant_id): | |
| 641 | 
             
                    idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
         | 
| 642 | 
             
                    recall_docs = [
         | 
| 643 | 
             
                        d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
         | 
| 644 | 
            -
                    if not recall_docs: | 
|  | |
| 645 | 
             
                    kbinfos["doc_aggs"] = recall_docs
         | 
| 646 | 
             
                    refs = deepcopy(kbinfos)
         | 
| 647 | 
             
                    for c in refs["chunks"]:
         | 
|  | |
| 106 | 
             
                    return c, msg
         | 
| 107 |  | 
| 108 | 
             
                ll = num_tokens_from_string(msg_[0]["content"])
         | 
| 109 | 
            +
                ll2 = num_tokens_from_string(msg_[-1]["content"])
         | 
| 110 | 
            +
                if ll / (ll + ll2) > 0.8:
         | 
| 111 | 
             
                    m = msg_[0]["content"]
         | 
| 112 | 
            +
                    m = encoder.decode(encoder.encode(m)[:max_length - ll2])
         | 
| 113 | 
             
                    msg[0]["content"] = m
         | 
| 114 | 
             
                    return max_length, msg
         | 
| 115 |  | 
| 116 | 
             
                m = msg_[1]["content"]
         | 
| 117 | 
            +
                m = encoder.decode(encoder.encode(m)[:max_length - ll2])
         | 
| 118 | 
             
                msg[1]["content"] = m
         | 
| 119 | 
             
                return max_length, msg
         | 
| 120 |  | 
|  | |
| 257 | 
             
                        idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
         | 
| 258 | 
             
                        recall_docs = [
         | 
| 259 | 
             
                            d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
         | 
| 260 | 
            +
                        if not recall_docs:
         | 
| 261 | 
            +
                            recall_docs = kbinfos["doc_aggs"]
         | 
| 262 | 
             
                        kbinfos["doc_aggs"] = recall_docs
         | 
| 263 |  | 
| 264 | 
             
                        refs = deepcopy(kbinfos)
         | 
|  | |
| 434 | 
             
                    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
         | 
| 435 | 
             
                    No other words needed except 'yes' or 'no'.
         | 
| 436 | 
             
                """
         | 
| 437 | 
            +
                if not contents:
         | 
| 438 | 
            +
                    return False
         | 
| 439 | 
             
                contents = "Documents: \n" + "   - ".join(contents)
         | 
| 440 | 
             
                contents = f"Question: {question}\n" + contents
         | 
| 441 | 
             
                if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
         | 
| 442 | 
             
                    contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
         | 
| 443 | 
             
                ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
         | 
| 444 | 
            +
                if ans.lower().find("yes") >= 0:
         | 
| 445 | 
            +
                    return True
         | 
| 446 | 
             
                return False
         | 
| 447 |  | 
| 448 |  | 
|  | |
| 484 | 
             
                ]
         | 
| 485 | 
             
                _, msg = message_fit_in(msg, chat_mdl.max_length)
         | 
| 486 | 
             
                kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
         | 
| 487 | 
            +
                if isinstance(kwd, tuple):
         | 
| 488 | 
            +
                    kwd = kwd[0]
         | 
| 489 | 
            +
                if kwd.find("**ERROR**") >=0:
         | 
| 490 | 
            +
                    return ""
         | 
| 491 | 
             
                return kwd
         | 
| 492 |  | 
| 493 |  | 
|  | |
| 513 | 
             
                ]
         | 
| 514 | 
             
                _, msg = message_fit_in(msg, chat_mdl.max_length)
         | 
| 515 | 
             
                kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
         | 
| 516 | 
            +
                if isinstance(kwd, tuple):
         | 
| 517 | 
            +
                    kwd = kwd[0]
         | 
| 518 | 
            +
                if kwd.find("**ERROR**") >= 0:
         | 
| 519 | 
            +
                    return ""
         | 
| 520 | 
             
                return kwd
         | 
| 521 |  | 
| 522 |  | 
|  | |
| 527 | 
             
                    chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
         | 
| 528 | 
             
                conv = []
         | 
| 529 | 
             
                for m in messages:
         | 
| 530 | 
            +
                    if m["role"] not in ["user", "assistant"]:
         | 
| 531 | 
            +
                        continue
         | 
| 532 | 
             
                    conv.append("{}: {}".format(m["role"].upper(), m["content"]))
         | 
| 533 | 
             
                conv = "\n".join(conv)
         | 
| 534 | 
             
                today = datetime.date.today().isoformat()
         | 
|  | |
| 589 |  | 
| 590 |  | 
| 591 | 
             
            def tts(tts_mdl, text):
         | 
| 592 | 
            +
                if not tts_mdl or not text:
         | 
| 593 | 
            +
                    return
         | 
| 594 | 
             
                bin = b""
         | 
| 595 | 
             
                for chunk in tts_mdl.tts(text):
         | 
| 596 | 
             
                    bin += chunk
         | 
|  | |
| 650 | 
             
                    idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
         | 
| 651 | 
             
                    recall_docs = [
         | 
| 652 | 
             
                        d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
         | 
| 653 | 
            +
                    if not recall_docs:
         | 
| 654 | 
            +
                        recall_docs = kbinfos["doc_aggs"]
         | 
| 655 | 
             
                    kbinfos["doc_aggs"] = recall_docs
         | 
| 656 | 
             
                    refs = deepcopy(kbinfos)
         | 
| 657 | 
             
                    for c in refs["chunks"]:
         | 
    	
        api/db/services/document_service.py
    CHANGED
    
    | @@ -532,7 +532,8 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): | |
| 532 | 
             
                        try:
         | 
| 533 | 
             
                            mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output,
         | 
| 534 | 
             
                                                  ensure_ascii=False, indent=2)
         | 
| 535 | 
            -
                            if len(mind_map) < 32: | 
|  | |
| 536 | 
             
                            cks.append({
         | 
| 537 | 
             
                                "id": get_uuid(),
         | 
| 538 | 
             
                                "doc_id": doc_id,
         | 
|  | |
| 532 | 
             
                        try:
         | 
| 533 | 
             
                            mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output,
         | 
| 534 | 
             
                                                  ensure_ascii=False, indent=2)
         | 
| 535 | 
            +
                            if len(mind_map) < 32:
         | 
| 536 | 
            +
                                raise Exception("Few content: " + mind_map)
         | 
| 537 | 
             
                            cks.append({
         | 
| 538 | 
             
                                "id": get_uuid(),
         | 
| 539 | 
             
                                "doc_id": doc_id,
         | 
    	
        api/db/services/file2document_service.py
    CHANGED
    
    | @@ -20,7 +20,7 @@ from api.db.db_models import DB | |
| 20 | 
             
            from api.db.db_models import File, File2Document
         | 
| 21 | 
             
            from api.db.services.common_service import CommonService
         | 
| 22 | 
             
            from api.db.services.document_service import DocumentService
         | 
| 23 | 
            -
            from api.utils import current_timestamp, datetime_format | 
| 24 |  | 
| 25 |  | 
| 26 | 
             
            class File2DocumentService(CommonService):
         | 
| @@ -63,7 +63,7 @@ class File2DocumentService(CommonService): | |
| 63 | 
             
                def update_by_file_id(cls, file_id, obj):
         | 
| 64 | 
             
                    obj["update_time"] = current_timestamp()
         | 
| 65 | 
             
                    obj["update_date"] = datetime_format(datetime.now())
         | 
| 66 | 
            -
                    num = cls.model.update(obj).where(cls.model.id == file_id).execute()
         | 
| 67 | 
             
                    e, obj = cls.get_by_id(cls.model.id)
         | 
| 68 | 
             
                    return obj
         | 
| 69 |  | 
|  | |
| 20 | 
             
            from api.db.db_models import File, File2Document
         | 
| 21 | 
             
            from api.db.services.common_service import CommonService
         | 
| 22 | 
             
            from api.db.services.document_service import DocumentService
         | 
| 23 | 
            +
            from api.utils import current_timestamp, datetime_format
         | 
| 24 |  | 
| 25 |  | 
| 26 | 
             
            class File2DocumentService(CommonService):
         | 
|  | |
| 63 | 
             
                def update_by_file_id(cls, file_id, obj):
         | 
| 64 | 
             
                    obj["update_time"] = current_timestamp()
         | 
| 65 | 
             
                    obj["update_date"] = datetime_format(datetime.now())
         | 
| 66 | 
            +
                    # num = cls.model.update(obj).where(cls.model.id == file_id).execute()
         | 
| 67 | 
             
                    e, obj = cls.get_by_id(cls.model.id)
         | 
| 68 | 
             
                    return obj
         | 
| 69 |  | 
    	
        api/db/services/file_service.py
    CHANGED
    
    | @@ -85,7 +85,8 @@ class FileService(CommonService): | |
| 85 | 
             
                           .join(Document, on=(File2Document.document_id == Document.id))
         | 
| 86 | 
             
                           .join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id))
         | 
| 87 | 
             
                           .where(cls.model.id == file_id))
         | 
| 88 | 
            -
                    if not kbs: | 
|  | |
| 89 | 
             
                    kbs_info_list = []
         | 
| 90 | 
             
                    for kb in list(kbs.dicts()):
         | 
| 91 | 
             
                        kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']})
         | 
| @@ -304,7 +305,8 @@ class FileService(CommonService): | |
| 304 | 
             
                @classmethod
         | 
| 305 | 
             
                @DB.connection_context()
         | 
| 306 | 
             
                def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
         | 
| 307 | 
            -
                    for _ in File2DocumentService.get_by_document_id(doc["id"]): | 
|  | |
| 308 | 
             
                    file = {
         | 
| 309 | 
             
                        "id": get_uuid(),
         | 
| 310 | 
             
                        "parent_id": kb_folder_id,
         | 
|  | |
| 85 | 
             
                           .join(Document, on=(File2Document.document_id == Document.id))
         | 
| 86 | 
             
                           .join(Knowledgebase, on=(Knowledgebase.id == Document.kb_id))
         | 
| 87 | 
             
                           .where(cls.model.id == file_id))
         | 
| 88 | 
            +
                    if not kbs:
         | 
| 89 | 
            +
                        return []
         | 
| 90 | 
             
                    kbs_info_list = []
         | 
| 91 | 
             
                    for kb in list(kbs.dicts()):
         | 
| 92 | 
             
                        kbs_info_list.append({"kb_id": kb['id'], "kb_name": kb['name']})
         | 
|  | |
| 305 | 
             
                @classmethod
         | 
| 306 | 
             
                @DB.connection_context()
         | 
| 307 | 
             
                def add_file_from_kb(cls, doc, kb_folder_id, tenant_id):
         | 
| 308 | 
            +
                    for _ in File2DocumentService.get_by_document_id(doc["id"]):
         | 
| 309 | 
            +
                        return
         | 
| 310 | 
             
                    file = {
         | 
| 311 | 
             
                        "id": get_uuid(),
         | 
| 312 | 
             
                        "parent_id": kb_folder_id,
         | 
    	
        api/db/services/llm_service.py
    CHANGED
    
    | @@ -107,7 +107,8 @@ class TenantLLMService(CommonService): | |
| 107 |  | 
| 108 | 
             
                    model_config = cls.get_api_key(tenant_id, mdlnm)
         | 
| 109 | 
             
                    mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
         | 
| 110 | 
            -
                    if model_config: | 
|  | |
| 111 | 
             
                    if not model_config:
         | 
| 112 | 
             
                        if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
         | 
| 113 | 
             
                            llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
         | 
|  | |
| 107 |  | 
| 108 | 
             
                    model_config = cls.get_api_key(tenant_id, mdlnm)
         | 
| 109 | 
             
                    mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
         | 
| 110 | 
            +
                    if model_config:
         | 
| 111 | 
            +
                        model_config = model_config.to_dict()
         | 
| 112 | 
             
                    if not model_config:
         | 
| 113 | 
             
                        if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
         | 
| 114 | 
             
                            llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
         | 
    	
        api/db/services/task_service.py
    CHANGED
    
    | @@ -57,28 +57,33 @@ class TaskService(CommonService): | |
| 57 | 
             
                        Tenant.img2txt_id,
         | 
| 58 | 
             
                        Tenant.asr_id,
         | 
| 59 | 
             
                        Tenant.llm_id,
         | 
| 60 | 
            -
                        cls.model.update_time | 
| 61 | 
            -
                     | 
| 62 | 
            -
             | 
| 63 | 
            -
                        . | 
| 64 | 
            -
                        .join( | 
|  | |
|  | |
| 65 | 
             
                        .where(cls.model.id == task_id)
         | 
|  | |
| 66 | 
             
                    docs = list(docs.dicts())
         | 
| 67 | 
            -
                    if not docs: | 
|  | |
| 68 |  | 
| 69 | 
             
                    msg = "\nTask has been received."
         | 
| 70 | 
            -
                    prog = random.random() / 10.
         | 
| 71 | 
             
                    if docs[0]["retry_count"] >= 3:
         | 
| 72 | 
             
                        msg = "\nERROR: Task is abandoned after 3 times attempts."
         | 
| 73 | 
             
                        prog = -1
         | 
| 74 |  | 
| 75 | 
            -
                    cls.model.update( | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 |  | 
| 81 | 
            -
                    if docs[0]["retry_count"] >= 3: | 
|  | |
| 82 |  | 
| 83 | 
             
                    return docs[0]
         | 
| 84 |  | 
| @@ -86,21 +91,44 @@ class TaskService(CommonService): | |
| 86 | 
             
                @DB.connection_context()
         | 
| 87 | 
             
                def get_ongoing_doc_name(cls):
         | 
| 88 | 
             
                    with DB.lock("get_task", -1):
         | 
| 89 | 
            -
                        docs =  | 
| 90 | 
            -
                             | 
| 91 | 
            -
             | 
| 92 | 
            -
                             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 93 | 
             
                            .where(
         | 
| 94 | 
             
                                Document.status == StatusEnum.VALID.value,
         | 
| 95 | 
             
                                Document.run == TaskStatus.RUNNING.value,
         | 
| 96 | 
             
                                ~(Document.type == FileType.VIRTUAL.value),
         | 
| 97 | 
             
                                cls.model.progress < 1,
         | 
| 98 | 
            -
                                cls.model.create_time >= current_timestamp() - 1000 * 600
         | 
| 99 | 
             
                            )
         | 
|  | |
| 100 | 
             
                        docs = list(docs.dicts())
         | 
| 101 | 
            -
                        if not docs: | 
| 102 | 
            -
             | 
| 103 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 104 |  | 
| 105 | 
             
                @classmethod
         | 
| 106 | 
             
                @DB.connection_context()
         | 
| @@ -118,28 +146,30 @@ class TaskService(CommonService): | |
| 118 | 
             
                def update_progress(cls, id, info):
         | 
| 119 | 
             
                    if os.environ.get("MACOS"):
         | 
| 120 | 
             
                        if info["progress_msg"]:
         | 
| 121 | 
            -
                            cls.model.update( | 
| 122 | 
            -
                                cls.model. | 
|  | |
| 123 | 
             
                        if "progress" in info:
         | 
| 124 | 
             
                            cls.model.update(progress=info["progress"]).where(
         | 
| 125 | 
            -
                                cls.model.id == id | 
|  | |
| 126 | 
             
                        return
         | 
| 127 |  | 
| 128 | 
             
                    with DB.lock("update_progress", -1):
         | 
| 129 | 
             
                        if info["progress_msg"]:
         | 
| 130 | 
            -
                            cls.model.update( | 
| 131 | 
            -
                                cls.model. | 
|  | |
| 132 | 
             
                        if "progress" in info:
         | 
| 133 | 
             
                            cls.model.update(progress=info["progress"]).where(
         | 
| 134 | 
            -
                                cls.model.id == id | 
|  | |
| 135 |  | 
| 136 |  | 
| 137 | 
             
            def queue_tasks(doc: dict, bucket: str, name: str):
         | 
| 138 | 
             
                def new_task():
         | 
| 139 | 
            -
                    return {
         | 
| 140 | 
            -
             | 
| 141 | 
            -
                        "doc_id": doc["id"]
         | 
| 142 | 
            -
                    }
         | 
| 143 | 
             
                tsks = []
         | 
| 144 |  | 
| 145 | 
             
                if doc["type"] == FileType.PDF.value:
         | 
| @@ -150,8 +180,8 @@ def queue_tasks(doc: dict, bucket: str, name: str): | |
| 150 | 
             
                    if doc["parser_id"] == "paper":
         | 
| 151 | 
             
                        page_size = doc["parser_config"].get("task_page_size", 22)
         | 
| 152 | 
             
                    if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
         | 
| 153 | 
            -
                        page_size = 10 | 
| 154 | 
            -
                    page_ranges = doc["parser_config"].get("pages") or [(1, 10 | 
| 155 | 
             
                    for s, e in page_ranges:
         | 
| 156 | 
             
                        s -= 1
         | 
| 157 | 
             
                        s = max(0, s)
         | 
| @@ -177,4 +207,6 @@ def queue_tasks(doc: dict, bucket: str, name: str): | |
| 177 | 
             
                DocumentService.begin2parse(doc["id"])
         | 
| 178 |  | 
| 179 | 
             
                for t in tsks:
         | 
| 180 | 
            -
                    assert REDIS_CONN.queue_product( | 
|  | |
|  | 
|  | |
| 57 | 
             
                        Tenant.img2txt_id,
         | 
| 58 | 
             
                        Tenant.asr_id,
         | 
| 59 | 
             
                        Tenant.llm_id,
         | 
| 60 | 
            +
                        cls.model.update_time,
         | 
| 61 | 
            +
                    ]
         | 
| 62 | 
            +
                    docs = (
         | 
| 63 | 
            +
                        cls.model.select(*fields)
         | 
| 64 | 
            +
                        .join(Document, on=(cls.model.doc_id == Document.id))
         | 
| 65 | 
            +
                        .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
         | 
| 66 | 
            +
                        .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
         | 
| 67 | 
             
                        .where(cls.model.id == task_id)
         | 
| 68 | 
            +
                    )
         | 
| 69 | 
             
                    docs = list(docs.dicts())
         | 
| 70 | 
            +
                    if not docs:
         | 
| 71 | 
            +
                        return None
         | 
| 72 |  | 
| 73 | 
             
                    msg = "\nTask has been received."
         | 
| 74 | 
            +
                    prog = random.random() / 10.0
         | 
| 75 | 
             
                    if docs[0]["retry_count"] >= 3:
         | 
| 76 | 
             
                        msg = "\nERROR: Task is abandoned after 3 times attempts."
         | 
| 77 | 
             
                        prog = -1
         | 
| 78 |  | 
| 79 | 
            +
                    cls.model.update(
         | 
| 80 | 
            +
                        progress_msg=cls.model.progress_msg + msg,
         | 
| 81 | 
            +
                        progress=prog,
         | 
| 82 | 
            +
                        retry_count=docs[0]["retry_count"] + 1,
         | 
| 83 | 
            +
                    ).where(cls.model.id == docs[0]["id"]).execute()
         | 
| 84 |  | 
| 85 | 
            +
                    if docs[0]["retry_count"] >= 3:
         | 
| 86 | 
            +
                        return None
         | 
| 87 |  | 
| 88 | 
             
                    return docs[0]
         | 
| 89 |  | 
|  | |
| 91 | 
             
                @DB.connection_context()
         | 
| 92 | 
             
                def get_ongoing_doc_name(cls):
         | 
| 93 | 
             
                    with DB.lock("get_task", -1):
         | 
| 94 | 
            +
                        docs = (
         | 
| 95 | 
            +
                            cls.model.select(
         | 
| 96 | 
            +
                                *[Document.id, Document.kb_id, Document.location, File.parent_id]
         | 
| 97 | 
            +
                            )
         | 
| 98 | 
            +
                            .join(Document, on=(cls.model.doc_id == Document.id))
         | 
| 99 | 
            +
                            .join(
         | 
| 100 | 
            +
                                File2Document,
         | 
| 101 | 
            +
                                on=(File2Document.document_id == Document.id),
         | 
| 102 | 
            +
                                join_type=JOIN.LEFT_OUTER,
         | 
| 103 | 
            +
                            )
         | 
| 104 | 
            +
                            .join(
         | 
| 105 | 
            +
                                File,
         | 
| 106 | 
            +
                                on=(File2Document.file_id == File.id),
         | 
| 107 | 
            +
                                join_type=JOIN.LEFT_OUTER,
         | 
| 108 | 
            +
                            )
         | 
| 109 | 
             
                            .where(
         | 
| 110 | 
             
                                Document.status == StatusEnum.VALID.value,
         | 
| 111 | 
             
                                Document.run == TaskStatus.RUNNING.value,
         | 
| 112 | 
             
                                ~(Document.type == FileType.VIRTUAL.value),
         | 
| 113 | 
             
                                cls.model.progress < 1,
         | 
| 114 | 
            +
                                cls.model.create_time >= current_timestamp() - 1000 * 600,
         | 
| 115 | 
             
                            )
         | 
| 116 | 
            +
                        )
         | 
| 117 | 
             
                        docs = list(docs.dicts())
         | 
| 118 | 
            +
                        if not docs:
         | 
| 119 | 
            +
                            return []
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                        return list(
         | 
| 122 | 
            +
                            set(
         | 
| 123 | 
            +
                                [
         | 
| 124 | 
            +
                                    (
         | 
| 125 | 
            +
                                        d["parent_id"] if d["parent_id"] else d["kb_id"],
         | 
| 126 | 
            +
                                        d["location"],
         | 
| 127 | 
            +
                                    )
         | 
| 128 | 
            +
                                    for d in docs
         | 
| 129 | 
            +
                                ]
         | 
| 130 | 
            +
                            )
         | 
| 131 | 
            +
                        )
         | 
| 132 |  | 
| 133 | 
             
                @classmethod
         | 
| 134 | 
             
                @DB.connection_context()
         | 
|  | |
| 146 | 
             
                def update_progress(cls, id, info):
         | 
| 147 | 
             
                    if os.environ.get("MACOS"):
         | 
| 148 | 
             
                        if info["progress_msg"]:
         | 
| 149 | 
            +
                            cls.model.update(
         | 
| 150 | 
            +
                                progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
         | 
| 151 | 
            +
                            ).where(cls.model.id == id).execute()
         | 
| 152 | 
             
                        if "progress" in info:
         | 
| 153 | 
             
                            cls.model.update(progress=info["progress"]).where(
         | 
| 154 | 
            +
                                cls.model.id == id
         | 
| 155 | 
            +
                            ).execute()
         | 
| 156 | 
             
                        return
         | 
| 157 |  | 
| 158 | 
             
                    with DB.lock("update_progress", -1):
         | 
| 159 | 
             
                        if info["progress_msg"]:
         | 
| 160 | 
            +
                            cls.model.update(
         | 
| 161 | 
            +
                                progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
         | 
| 162 | 
            +
                            ).where(cls.model.id == id).execute()
         | 
| 163 | 
             
                        if "progress" in info:
         | 
| 164 | 
             
                            cls.model.update(progress=info["progress"]).where(
         | 
| 165 | 
            +
                                cls.model.id == id
         | 
| 166 | 
            +
                            ).execute()
         | 
| 167 |  | 
| 168 |  | 
| 169 | 
             
            def queue_tasks(doc: dict, bucket: str, name: str):
         | 
| 170 | 
             
                def new_task():
         | 
| 171 | 
            +
                    return {"id": get_uuid(), "doc_id": doc["id"]}
         | 
| 172 | 
            +
             | 
|  | |
|  | |
| 173 | 
             
                tsks = []
         | 
| 174 |  | 
| 175 | 
             
                if doc["type"] == FileType.PDF.value:
         | 
|  | |
| 180 | 
             
                    if doc["parser_id"] == "paper":
         | 
| 181 | 
             
                        page_size = doc["parser_config"].get("task_page_size", 22)
         | 
| 182 | 
             
                    if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
         | 
| 183 | 
            +
                        page_size = 10**9
         | 
| 184 | 
            +
                    page_ranges = doc["parser_config"].get("pages") or [(1, 10**5)]
         | 
| 185 | 
             
                    for s, e in page_ranges:
         | 
| 186 | 
             
                        s -= 1
         | 
| 187 | 
             
                        s = max(0, s)
         | 
|  | |
| 207 | 
             
                DocumentService.begin2parse(doc["id"])
         | 
| 208 |  | 
| 209 | 
             
                for t in tsks:
         | 
| 210 | 
            +
                    assert REDIS_CONN.queue_product(
         | 
| 211 | 
            +
                        SVR_QUEUE_NAME, message=t
         | 
| 212 | 
            +
                    ), "Can't access Redis. Please check the Redis' status."
         | 
    	
        api/db/services/user_service.py
    CHANGED
    
    | @@ -22,7 +22,7 @@ from api.db import UserTenantRole | |
| 22 | 
             
            from api.db.db_models import DB, UserTenant
         | 
| 23 | 
             
            from api.db.db_models import User, Tenant
         | 
| 24 | 
             
            from api.db.services.common_service import CommonService
         | 
| 25 | 
            -
            from api.utils import get_uuid,  | 
| 26 | 
             
            from api.db import StatusEnum
         | 
| 27 |  | 
| 28 |  | 
|  | |
| 22 | 
             
            from api.db.db_models import DB, UserTenant
         | 
| 23 | 
             
            from api.db.db_models import User, Tenant
         | 
| 24 | 
             
            from api.db.services.common_service import CommonService
         | 
| 25 | 
            +
            from api.utils import get_uuid, current_timestamp, datetime_format
         | 
| 26 | 
             
            from api.db import StatusEnum
         | 
| 27 |  | 
| 28 |  | 
    	
        api/ragflow_server.py
    CHANGED
    
    | @@ -21,10 +21,7 @@ | |
| 21 | 
             
            import logging
         | 
| 22 | 
             
            import os
         | 
| 23 | 
             
            from api.utils.log_utils import initRootLogger
         | 
| 24 | 
            -
            LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
         | 
| 25 | 
            -
            initRootLogger("ragflow_server", LOG_LEVELS)
         | 
| 26 |  | 
| 27 | 
            -
            import os
         | 
| 28 | 
             
            import signal
         | 
| 29 | 
             
            import sys
         | 
| 30 | 
             
            import time
         | 
| @@ -44,6 +41,9 @@ from api.versions import get_ragflow_version | |
| 44 | 
             
            from api.utils import show_configs
         | 
| 45 | 
             
            from rag.settings import print_rag_settings
         | 
| 46 |  | 
|  | |
|  | |
|  | |
| 47 |  | 
| 48 | 
             
            def update_progress():
         | 
| 49 | 
             
                while True:
         | 
|  | |
| 21 | 
             
            import logging
         | 
| 22 | 
             
            import os
         | 
| 23 | 
             
            from api.utils.log_utils import initRootLogger
         | 
|  | |
|  | |
| 24 |  | 
|  | |
| 25 | 
             
            import signal
         | 
| 26 | 
             
            import sys
         | 
| 27 | 
             
            import time
         | 
|  | |
| 41 | 
             
            from api.utils import show_configs
         | 
| 42 | 
             
            from rag.settings import print_rag_settings
         | 
| 43 |  | 
| 44 | 
            +
            LOG_LEVELS = os.environ.get("LOG_LEVELS", "")
         | 
| 45 | 
            +
            initRootLogger("ragflow_server", LOG_LEVELS)
         | 
| 46 | 
            +
             | 
| 47 |  | 
| 48 | 
             
            def update_progress():
         | 
| 49 | 
             
                while True:
         | 
    	
        api/utils/api_utils.py
    CHANGED
    
    | @@ -36,7 +36,6 @@ from werkzeug.http import HTTP_STATUS_CODES | |
| 36 | 
             
            from api.db.db_models import APIToken
         | 
| 37 | 
             
            from api import settings
         | 
| 38 |  | 
| 39 | 
            -
            from api import settings
         | 
| 40 | 
             
            from api.utils import CustomJSONEncoder, get_uuid
         | 
| 41 | 
             
            from api.utils import json_dumps
         | 
| 42 | 
             
            from api.constants import REQUEST_WAIT_SEC, REQUEST_MAX_WAIT_SEC
         | 
|  | |
| 36 | 
             
            from api.db.db_models import APIToken
         | 
| 37 | 
             
            from api import settings
         | 
| 38 |  | 
|  | |
| 39 | 
             
            from api.utils import CustomJSONEncoder, get_uuid
         | 
| 40 | 
             
            from api.utils import json_dumps
         | 
| 41 | 
             
            from api.constants import REQUEST_WAIT_SEC, REQUEST_MAX_WAIT_SEC
         | 
    	
        api/validation.py
    CHANGED
    
    | @@ -45,5 +45,5 @@ try: | |
| 45 | 
             
                pool = Pool(processes=1)
         | 
| 46 | 
             
                thread = pool.apply_async(download_nltk_data)
         | 
| 47 | 
             
                binary = thread.get(timeout=60)
         | 
| 48 | 
            -
            except Exception | 
| 49 | 
             
                print('\x1b[6;37;41m WARNING \x1b[0m' + "Downloading NLTK data failure.", flush=True)
         | 
|  | |
| 45 | 
             
                pool = Pool(processes=1)
         | 
| 46 | 
             
                thread = pool.apply_async(download_nltk_data)
         | 
| 47 | 
             
                binary = thread.get(timeout=60)
         | 
| 48 | 
            +
            except Exception:
         | 
| 49 | 
             
                print('\x1b[6;37;41m WARNING \x1b[0m' + "Downloading NLTK data failure.", flush=True)
         | 
    	
        deepdoc/parser/__init__.py
    CHANGED
    
    | @@ -18,4 +18,16 @@ from .ppt_parser import RAGFlowPptParser as PptParser | |
| 18 | 
             
            from .html_parser import RAGFlowHtmlParser as HtmlParser
         | 
| 19 | 
             
            from .json_parser import RAGFlowJsonParser as JsonParser
         | 
| 20 | 
             
            from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser
         | 
| 21 | 
            -
            from .txt_parser import RAGFlowTxtParser as TxtParser
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 18 | 
             
            from .html_parser import RAGFlowHtmlParser as HtmlParser
         | 
| 19 | 
             
            from .json_parser import RAGFlowJsonParser as JsonParser
         | 
| 20 | 
             
            from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser
         | 
| 21 | 
            +
            from .txt_parser import RAGFlowTxtParser as TxtParser
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            __all__ = [
         | 
| 24 | 
            +
                "PdfParser",
         | 
| 25 | 
            +
                "PlainParser",
         | 
| 26 | 
            +
                "DocxParser",
         | 
| 27 | 
            +
                "ExcelParser",
         | 
| 28 | 
            +
                "PptParser",
         | 
| 29 | 
            +
                "HtmlParser",
         | 
| 30 | 
            +
                "JsonParser",
         | 
| 31 | 
            +
                "MarkdownParser",
         | 
| 32 | 
            +
                "TxtParser",
         | 
| 33 | 
            +
            ]
         | 
    	
        deepdoc/parser/excel_parser.py
    CHANGED
    
    | @@ -29,7 +29,8 @@ class RAGFlowExcelParser: | |
| 29 | 
             
                    for sheetname in wb.sheetnames:
         | 
| 30 | 
             
                        ws = wb[sheetname]
         | 
| 31 | 
             
                        rows = list(ws.rows)
         | 
| 32 | 
            -
                        if not rows: | 
|  | |
| 33 |  | 
| 34 | 
             
                        tb_rows_0 = "<tr>"
         | 
| 35 | 
             
                        for t in list(rows[0]):
         | 
| @@ -40,7 +41,9 @@ class RAGFlowExcelParser: | |
| 40 | 
             
                            tb = ""
         | 
| 41 | 
             
                            tb += f"<table><caption>{sheetname}</caption>"
         | 
| 42 | 
             
                            tb += tb_rows_0
         | 
| 43 | 
            -
                            for r in list( | 
|  | |
|  | |
| 44 | 
             
                                tb += "<tr>"
         | 
| 45 | 
             
                                for i, c in enumerate(r):
         | 
| 46 | 
             
                                    if c.value is None:
         | 
| @@ -62,20 +65,21 @@ class RAGFlowExcelParser: | |
| 62 | 
             
                    for sheetname in wb.sheetnames:
         | 
| 63 | 
             
                        ws = wb[sheetname]
         | 
| 64 | 
             
                        rows = list(ws.rows)
         | 
| 65 | 
            -
                        if not rows: | 
|  | |
| 66 | 
             
                        ti = list(rows[0])
         | 
| 67 | 
             
                        for r in list(rows[1:]):
         | 
| 68 | 
            -
                             | 
| 69 | 
             
                            for i, c in enumerate(r):
         | 
| 70 | 
             
                                if not c.value:
         | 
| 71 | 
             
                                    continue
         | 
| 72 | 
             
                                t = str(ti[i].value) if i < len(ti) else ""
         | 
| 73 | 
             
                                t += (":" if t else "") + str(c.value)
         | 
| 74 | 
            -
                                 | 
| 75 | 
            -
                             | 
| 76 | 
             
                            if sheetname.lower().find("sheet") < 0:
         | 
| 77 | 
            -
                                 | 
| 78 | 
            -
                            res.append( | 
| 79 | 
             
                    return res
         | 
| 80 |  | 
| 81 | 
             
                @staticmethod
         | 
|  | |
| 29 | 
             
                    for sheetname in wb.sheetnames:
         | 
| 30 | 
             
                        ws = wb[sheetname]
         | 
| 31 | 
             
                        rows = list(ws.rows)
         | 
| 32 | 
            +
                        if not rows:
         | 
| 33 | 
            +
                            continue
         | 
| 34 |  | 
| 35 | 
             
                        tb_rows_0 = "<tr>"
         | 
| 36 | 
             
                        for t in list(rows[0]):
         | 
|  | |
| 41 | 
             
                            tb = ""
         | 
| 42 | 
             
                            tb += f"<table><caption>{sheetname}</caption>"
         | 
| 43 | 
             
                            tb += tb_rows_0
         | 
| 44 | 
            +
                            for r in list(
         | 
| 45 | 
            +
                                rows[1 + chunk_i * chunk_rows : 1 + (chunk_i + 1) * chunk_rows]
         | 
| 46 | 
            +
                            ):
         | 
| 47 | 
             
                                tb += "<tr>"
         | 
| 48 | 
             
                                for i, c in enumerate(r):
         | 
| 49 | 
             
                                    if c.value is None:
         | 
|  | |
| 65 | 
             
                    for sheetname in wb.sheetnames:
         | 
| 66 | 
             
                        ws = wb[sheetname]
         | 
| 67 | 
             
                        rows = list(ws.rows)
         | 
| 68 | 
            +
                        if not rows:
         | 
| 69 | 
            +
                            continue
         | 
| 70 | 
             
                        ti = list(rows[0])
         | 
| 71 | 
             
                        for r in list(rows[1:]):
         | 
| 72 | 
            +
                            fields = []
         | 
| 73 | 
             
                            for i, c in enumerate(r):
         | 
| 74 | 
             
                                if not c.value:
         | 
| 75 | 
             
                                    continue
         | 
| 76 | 
             
                                t = str(ti[i].value) if i < len(ti) else ""
         | 
| 77 | 
             
                                t += (":" if t else "") + str(c.value)
         | 
| 78 | 
            +
                                fields.append(t)
         | 
| 79 | 
            +
                            line = "; ".join(fields)
         | 
| 80 | 
             
                            if sheetname.lower().find("sheet") < 0:
         | 
| 81 | 
            +
                                line += " ——" + sheetname
         | 
| 82 | 
            +
                            res.append(line)
         | 
| 83 | 
             
                    return res
         | 
| 84 |  | 
| 85 | 
             
                @staticmethod
         | 
    	
        deepdoc/parser/html_parser.py
    CHANGED
    
    | @@ -36,7 +36,7 @@ class RAGFlowHtmlParser: | |
| 36 |  | 
| 37 | 
             
                @classmethod
         | 
| 38 | 
             
                def parser_txt(cls, txt):
         | 
| 39 | 
            -
                    if  | 
| 40 | 
             
                        raise TypeError("txt type should be str!")
         | 
| 41 | 
             
                    html_doc = readability.Document(txt)
         | 
| 42 | 
             
                    title = html_doc.title()
         | 
|  | |
| 36 |  | 
| 37 | 
             
                @classmethod
         | 
| 38 | 
             
                def parser_txt(cls, txt):
         | 
| 39 | 
            +
                    if not isinstance(txt, str):
         | 
| 40 | 
             
                        raise TypeError("txt type should be str!")
         | 
| 41 | 
             
                    html_doc = readability.Document(txt)
         | 
| 42 | 
             
                    title = html_doc.title()
         | 
    	
        deepdoc/parser/json_parser.py
    CHANGED
    
    | @@ -22,7 +22,7 @@ class RAGFlowJsonParser: | |
| 22 | 
             
                    txt = binary.decode(encoding, errors="ignore")
         | 
| 23 | 
             
                    json_data = json.loads(txt)
         | 
| 24 | 
             
                    chunks = self.split_json(json_data, True)   
         | 
| 25 | 
            -
                    sections = [json.dumps( | 
| 26 | 
             
                    return sections
         | 
| 27 |  | 
| 28 | 
             
                @staticmethod
         | 
|  | |
| 22 | 
             
                    txt = binary.decode(encoding, errors="ignore")
         | 
| 23 | 
             
                    json_data = json.loads(txt)
         | 
| 24 | 
             
                    chunks = self.split_json(json_data, True)   
         | 
| 25 | 
            +
                    sections = [json.dumps(line, ensure_ascii=False) for line in chunks if line]
         | 
| 26 | 
             
                    return sections
         | 
| 27 |  | 
| 28 | 
             
                @staticmethod
         | 
    	
        deepdoc/parser/pdf_parser.py
    CHANGED
    
    | @@ -752,7 +752,7 @@ class RAGFlowPdfParser: | |
| 752 | 
             
                                "x1": np.max([b["x1"] for b in bxs]),
         | 
| 753 | 
             
                                "bottom": np.max([b["bottom"] for b in bxs]) - ht
         | 
| 754 | 
             
                            }
         | 
| 755 | 
            -
                            louts = [ | 
| 756 | 
             
                            ii = Recognizer.find_overlapped(b, louts, naive=True)
         | 
| 757 | 
             
                            if ii is not None:
         | 
| 758 | 
             
                                b = louts[ii]
         | 
| @@ -763,7 +763,8 @@ class RAGFlowPdfParser: | |
| 763 | 
             
                                        "layoutno", "")))
         | 
| 764 |  | 
| 765 | 
             
                            left, top, right, bott = b["x0"], b["top"], b["x1"], b["bottom"]
         | 
| 766 | 
            -
                            if right < left: | 
|  | |
| 767 | 
             
                            poss.append((pn + self.page_from, left, right, top, bott))
         | 
| 768 | 
             
                            return self.page_images[pn] \
         | 
| 769 | 
             
                                .crop((left * ZM, top * ZM,
         | 
| @@ -845,7 +846,8 @@ class RAGFlowPdfParser: | |
| 845 | 
             
                    top = bx["top"] - self.page_cum_height[pn[0] - 1]
         | 
| 846 | 
             
                    bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
         | 
| 847 | 
             
                    page_images_cnt = len(self.page_images)
         | 
| 848 | 
            -
                    if pn[-1] - 1 >= page_images_cnt: | 
|  | |
| 849 | 
             
                    while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
         | 
| 850 | 
             
                        bott -= self.page_images[pn[-1] - 1].size[1] / ZM
         | 
| 851 | 
             
                        pn.append(pn[-1] + 1)
         | 
| @@ -889,7 +891,6 @@ class RAGFlowPdfParser: | |
| 889 | 
             
                            nonlocal mh, pw, lines, widths
         | 
| 890 | 
             
                            lines.append(line)
         | 
| 891 | 
             
                            widths.append(width(line))
         | 
| 892 | 
            -
                            width_mean = np.mean(widths)
         | 
| 893 | 
             
                            mmj = self.proj_match(
         | 
| 894 | 
             
                                line["text"]) or line.get(
         | 
| 895 | 
             
                                "layout_type",
         | 
| @@ -994,7 +995,7 @@ class RAGFlowPdfParser: | |
| 994 | 
             
                    else:
         | 
| 995 | 
             
                        self.is_english = False
         | 
| 996 |  | 
| 997 | 
            -
                    st = timer()
         | 
| 998 | 
             
                    for i, img in enumerate(self.page_images_x2):
         | 
| 999 | 
             
                        chars = self.page_chars[i] if not self.is_english else []
         | 
| 1000 | 
             
                        self.mean_height.append(
         | 
| @@ -1028,8 +1029,8 @@ class RAGFlowPdfParser: | |
| 1028 |  | 
| 1029 | 
             
                    self.page_cum_height = np.cumsum(self.page_cum_height)
         | 
| 1030 | 
             
                    assert len(self.page_cum_height) == len(self.page_images) + 1
         | 
| 1031 | 
            -
                    if len(self.boxes) == 0 and zoomin < 9: | 
| 1032 | 
            -
             | 
| 1033 |  | 
| 1034 | 
             
                def __call__(self, fnm, need_image=True, zoomin=3, return_html=False):
         | 
| 1035 | 
             
                    self.__images__(fnm, zoomin)
         | 
| @@ -1168,7 +1169,7 @@ class PlainParser(object): | |
| 1168 | 
             
                    if not self.outlines:
         | 
| 1169 | 
             
                        logging.warning("Miss outlines")
         | 
| 1170 |  | 
| 1171 | 
            -
                    return [( | 
| 1172 |  | 
| 1173 | 
             
                def crop(self, ck, need_position):
         | 
| 1174 | 
             
                    raise NotImplementedError
         | 
|  | |
| 752 | 
             
                                "x1": np.max([b["x1"] for b in bxs]),
         | 
| 753 | 
             
                                "bottom": np.max([b["bottom"] for b in bxs]) - ht
         | 
| 754 | 
             
                            }
         | 
| 755 | 
            +
                            louts = [layout for layout in self.page_layout[pn] if layout["type"] == ltype]
         | 
| 756 | 
             
                            ii = Recognizer.find_overlapped(b, louts, naive=True)
         | 
| 757 | 
             
                            if ii is not None:
         | 
| 758 | 
             
                                b = louts[ii]
         | 
|  | |
| 763 | 
             
                                        "layoutno", "")))
         | 
| 764 |  | 
| 765 | 
             
                            left, top, right, bott = b["x0"], b["top"], b["x1"], b["bottom"]
         | 
| 766 | 
            +
                            if right < left:
         | 
| 767 | 
            +
                                right = left + 1
         | 
| 768 | 
             
                            poss.append((pn + self.page_from, left, right, top, bott))
         | 
| 769 | 
             
                            return self.page_images[pn] \
         | 
| 770 | 
             
                                .crop((left * ZM, top * ZM,
         | 
|  | |
| 846 | 
             
                    top = bx["top"] - self.page_cum_height[pn[0] - 1]
         | 
| 847 | 
             
                    bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
         | 
| 848 | 
             
                    page_images_cnt = len(self.page_images)
         | 
| 849 | 
            +
                    if pn[-1] - 1 >= page_images_cnt:
         | 
| 850 | 
            +
                        return ""
         | 
| 851 | 
             
                    while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
         | 
| 852 | 
             
                        bott -= self.page_images[pn[-1] - 1].size[1] / ZM
         | 
| 853 | 
             
                        pn.append(pn[-1] + 1)
         | 
|  | |
| 891 | 
             
                            nonlocal mh, pw, lines, widths
         | 
| 892 | 
             
                            lines.append(line)
         | 
| 893 | 
             
                            widths.append(width(line))
         | 
|  | |
| 894 | 
             
                            mmj = self.proj_match(
         | 
| 895 | 
             
                                line["text"]) or line.get(
         | 
| 896 | 
             
                                "layout_type",
         | 
|  | |
| 995 | 
             
                    else:
         | 
| 996 | 
             
                        self.is_english = False
         | 
| 997 |  | 
| 998 | 
            +
                    # st = timer()
         | 
| 999 | 
             
                    for i, img in enumerate(self.page_images_x2):
         | 
| 1000 | 
             
                        chars = self.page_chars[i] if not self.is_english else []
         | 
| 1001 | 
             
                        self.mean_height.append(
         | 
|  | |
| 1029 |  | 
| 1030 | 
             
                    self.page_cum_height = np.cumsum(self.page_cum_height)
         | 
| 1031 | 
             
                    assert len(self.page_cum_height) == len(self.page_images) + 1
         | 
| 1032 | 
            +
                    if len(self.boxes) == 0 and zoomin < 9:
         | 
| 1033 | 
            +
                        self.__images__(fnm, zoomin * 3, page_from, page_to, callback)
         | 
| 1034 |  | 
| 1035 | 
             
                def __call__(self, fnm, need_image=True, zoomin=3, return_html=False):
         | 
| 1036 | 
             
                    self.__images__(fnm, zoomin)
         | 
|  | |
| 1169 | 
             
                    if not self.outlines:
         | 
| 1170 | 
             
                        logging.warning("Miss outlines")
         | 
| 1171 |  | 
| 1172 | 
            +
                    return [(line, "") for line in lines], []
         | 
| 1173 |  | 
| 1174 | 
             
                def crop(self, ck, need_position):
         | 
| 1175 | 
             
                    raise NotImplementedError
         | 
    	
        deepdoc/parser/resume/__init__.py
    CHANGED
    
    | @@ -15,21 +15,42 @@ import datetime | |
| 15 |  | 
| 16 |  | 
| 17 | 
             
            def refactor(cv):
         | 
| 18 | 
            -
                for n in [ | 
| 19 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 20 | 
             
                cv["is_deleted"] = 0
         | 
| 21 | 
            -
                if "basic" not in cv: | 
| 22 | 
            -
             | 
|  | |
|  | |
| 23 |  | 
| 24 | 
            -
                for n in [ | 
| 25 | 
            -
                     | 
| 26 | 
            -
                     | 
| 27 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 28 | 
             
                        del cv[n]
         | 
| 29 | 
             
                        continue
         | 
| 30 | 
             
                    vv = []
         | 
| 31 | 
             
                    for v in cv[n]:
         | 
| 32 | 
            -
                        if "external" in v and v["external"] is not None: | 
|  | |
| 33 | 
             
                        vv.append(v)
         | 
| 34 | 
             
                    cv[n] = {str(i): vv[i] for i in range(len(vv))}
         | 
| 35 |  | 
| @@ -42,24 +63,44 @@ def refactor(cv): | |
| 42 | 
             
                        cv["basic"][t] = cv["basic"][n]
         | 
| 43 | 
             
                        del cv["basic"][n]
         | 
| 44 |  | 
| 45 | 
            -
                work = sorted( | 
| 46 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 47 |  | 
| 48 | 
             
                if work:
         | 
| 49 | 
             
                    cv["basic"]["work_start_time"] = work[0].get("start_time", "")
         | 
| 50 | 
            -
                    cv["basic"]["management_experience"] =  | 
| 51 | 
            -
                         | 
|  | |
|  | |
|  | |
| 52 | 
             
                    cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0")
         | 
| 53 |  | 
| 54 | 
            -
                    for n in [ | 
| 55 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 56 | 
             
                        cv["basic"][n] = work[-1].get(n, "")
         | 
| 57 |  | 
| 58 | 
             
                if edu:
         | 
| 59 | 
             
                    for n in ["school_name", "discipline_name"]:
         | 
| 60 | 
            -
                        if n in edu[-1]: | 
|  | |
| 61 |  | 
| 62 | 
             
                cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         | 
| 63 | 
            -
                if "contact" not in cv: | 
| 64 | 
            -
             | 
| 65 | 
            -
                 | 
|  | |
|  | 
|  | |
| 15 |  | 
| 16 |  | 
| 17 | 
             
            def refactor(cv):
         | 
| 18 | 
            +
                for n in [
         | 
| 19 | 
            +
                    "raw_txt",
         | 
| 20 | 
            +
                    "parser_name",
         | 
| 21 | 
            +
                    "inference",
         | 
| 22 | 
            +
                    "ori_text",
         | 
| 23 | 
            +
                    "use_time",
         | 
| 24 | 
            +
                    "time_stat",
         | 
| 25 | 
            +
                ]:
         | 
| 26 | 
            +
                    if n in cv and cv[n] is not None:
         | 
| 27 | 
            +
                        del cv[n]
         | 
| 28 | 
             
                cv["is_deleted"] = 0
         | 
| 29 | 
            +
                if "basic" not in cv:
         | 
| 30 | 
            +
                    cv["basic"] = {}
         | 
| 31 | 
            +
                if cv["basic"].get("photo2"):
         | 
| 32 | 
            +
                    del cv["basic"]["photo2"]
         | 
| 33 |  | 
| 34 | 
            +
                for n in [
         | 
| 35 | 
            +
                    "education",
         | 
| 36 | 
            +
                    "work",
         | 
| 37 | 
            +
                    "certificate",
         | 
| 38 | 
            +
                    "project",
         | 
| 39 | 
            +
                    "language",
         | 
| 40 | 
            +
                    "skill",
         | 
| 41 | 
            +
                    "training",
         | 
| 42 | 
            +
                ]:
         | 
| 43 | 
            +
                    if n not in cv or cv[n] is None:
         | 
| 44 | 
            +
                        continue
         | 
| 45 | 
            +
                    if isinstance(cv[n], dict):
         | 
| 46 | 
            +
                        cv[n] = [v for _, v in cv[n].items()]
         | 
| 47 | 
            +
                    if not isinstance(cv[n], list):
         | 
| 48 | 
             
                        del cv[n]
         | 
| 49 | 
             
                        continue
         | 
| 50 | 
             
                    vv = []
         | 
| 51 | 
             
                    for v in cv[n]:
         | 
| 52 | 
            +
                        if "external" in v and v["external"] is not None:
         | 
| 53 | 
            +
                            del v["external"]
         | 
| 54 | 
             
                        vv.append(v)
         | 
| 55 | 
             
                    cv[n] = {str(i): vv[i] for i in range(len(vv))}
         | 
| 56 |  | 
|  | |
| 63 | 
             
                        cv["basic"][t] = cv["basic"][n]
         | 
| 64 | 
             
                        del cv["basic"][n]
         | 
| 65 |  | 
| 66 | 
            +
                work = sorted(
         | 
| 67 | 
            +
                    [v for _, v in cv.get("work", {}).items()],
         | 
| 68 | 
            +
                    key=lambda x: x.get("start_time", ""),
         | 
| 69 | 
            +
                )
         | 
| 70 | 
            +
                edu = sorted(
         | 
| 71 | 
            +
                    [v for _, v in cv.get("education", {}).items()],
         | 
| 72 | 
            +
                    key=lambda x: x.get("start_time", ""),
         | 
| 73 | 
            +
                )
         | 
| 74 |  | 
| 75 | 
             
                if work:
         | 
| 76 | 
             
                    cv["basic"]["work_start_time"] = work[0].get("start_time", "")
         | 
| 77 | 
            +
                    cv["basic"]["management_experience"] = (
         | 
| 78 | 
            +
                        "Y"
         | 
| 79 | 
            +
                        if any([w.get("management_experience", "") == "Y" for w in work])
         | 
| 80 | 
            +
                        else "N"
         | 
| 81 | 
            +
                    )
         | 
| 82 | 
             
                    cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0")
         | 
| 83 |  | 
| 84 | 
            +
                    for n in [
         | 
| 85 | 
            +
                        "annual_salary_from",
         | 
| 86 | 
            +
                        "annual_salary_to",
         | 
| 87 | 
            +
                        "industry_name",
         | 
| 88 | 
            +
                        "position_name",
         | 
| 89 | 
            +
                        "responsibilities",
         | 
| 90 | 
            +
                        "corporation_type",
         | 
| 91 | 
            +
                        "scale",
         | 
| 92 | 
            +
                        "corporation_name",
         | 
| 93 | 
            +
                    ]:
         | 
| 94 | 
             
                        cv["basic"][n] = work[-1].get(n, "")
         | 
| 95 |  | 
| 96 | 
             
                if edu:
         | 
| 97 | 
             
                    for n in ["school_name", "discipline_name"]:
         | 
| 98 | 
            +
                        if n in edu[-1]:
         | 
| 99 | 
            +
                            cv["basic"][n] = edu[-1][n]
         | 
| 100 |  | 
| 101 | 
             
                cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         | 
| 102 | 
            +
                if "contact" not in cv:
         | 
| 103 | 
            +
                    cv["contact"] = {}
         | 
| 104 | 
            +
                if not cv["contact"].get("name"):
         | 
| 105 | 
            +
                    cv["contact"]["name"] = cv["basic"].get("name", "")
         | 
| 106 | 
            +
                return cv
         | 
    	
        deepdoc/parser/resume/entities/corporations.py
    CHANGED
    
    | @@ -21,13 +21,18 @@ from . import regions | |
| 21 |  | 
| 22 |  | 
| 23 | 
             
            current_file_path = os.path.dirname(os.path.abspath(__file__))
         | 
| 24 | 
            -
            GOODS = pd.read_csv( | 
|  | |
|  | |
| 25 | 
             
            GOODS["cid"] = GOODS["cid"].astype(str)
         | 
| 26 | 
             
            GOODS = GOODS.set_index(["cid"])
         | 
| 27 | 
            -
            CORP_TKS = json.load( | 
|  | |
|  | |
| 28 | 
             
            GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r"))
         | 
| 29 | 
             
            CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r"))
         | 
| 30 |  | 
|  | |
| 31 | 
             
            def baike(cid, default_v=0):
         | 
| 32 | 
             
                global GOODS
         | 
| 33 | 
             
                try:
         | 
| @@ -39,27 +44,41 @@ def baike(cid, default_v=0): | |
| 39 |  | 
| 40 | 
             
            def corpNorm(nm, add_region=True):
         | 
| 41 | 
             
                global CORP_TKS
         | 
| 42 | 
            -
                if not nm or  | 
|  | |
| 43 | 
             
                nm = rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(nm)).lower()
         | 
| 44 | 
             
                nm = re.sub(r"&", "&", nm)
         | 
| 45 | 
             
                nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm)
         | 
| 46 | 
            -
                nm = re.sub( | 
| 47 | 
            -
             | 
| 48 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 49 |  | 
| 50 | 
             
                tks = rag_tokenizer.tokenize(nm).split()
         | 
| 51 | 
            -
                reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
         | 
| 52 | 
             
                nm = ""
         | 
| 53 | 
             
                for t in tks:
         | 
| 54 | 
            -
                    if regions.isName(t) or t in CORP_TKS: | 
| 55 | 
            -
             | 
|  | |
|  | |
| 56 | 
             
                    nm += t
         | 
| 57 |  | 
| 58 | 
             
                r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip())
         | 
| 59 | 
            -
                if r: | 
|  | |
| 60 | 
             
                r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip())
         | 
| 61 | 
            -
                if r: | 
| 62 | 
            -
             | 
|  | |
| 63 |  | 
| 64 |  | 
| 65 | 
             
            def rmNoise(n):
         | 
| @@ -67,33 +86,40 @@ def rmNoise(n): | |
| 67 | 
             
                n = re.sub(r"[,. &()()]+", "", n)
         | 
| 68 | 
             
                return n
         | 
| 69 |  | 
|  | |
| 70 | 
             
            GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
         | 
| 71 | 
            -
            for c,v in CORP_TAG.items():
         | 
| 72 | 
             
                cc = corpNorm(rmNoise(c), False)
         | 
| 73 | 
             
                if not cc:
         | 
| 74 | 
             
                    logging.debug(c)
         | 
| 75 | 
            -
            CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
         | 
|  | |
| 76 |  | 
| 77 | 
             
            def is_good(nm):
         | 
| 78 | 
             
                global GOOD_CORP
         | 
| 79 | 
            -
                if nm.find("外派")>=0: | 
|  | |
| 80 | 
             
                nm = rmNoise(nm)
         | 
| 81 | 
             
                nm = corpNorm(nm, False)
         | 
| 82 | 
             
                for n in GOOD_CORP:
         | 
| 83 | 
             
                    if re.match(r"[0-9a-zA-Z]+$", n):
         | 
| 84 | 
            -
                        if n == nm: | 
| 85 | 
            -
             | 
|  | |
|  | |
| 86 | 
             
                return False
         | 
| 87 |  | 
|  | |
| 88 | 
             
            def corp_tag(nm):
         | 
| 89 | 
             
                global CORP_TAG
         | 
| 90 | 
             
                nm = rmNoise(nm)
         | 
| 91 | 
             
                nm = corpNorm(nm, False)
         | 
| 92 | 
             
                for n in CORP_TAG.keys():
         | 
| 93 | 
             
                    if re.match(r"[0-9a-zA-Z., ]+$", n):
         | 
| 94 | 
            -
                        if n == nm: | 
| 95 | 
            -
             | 
| 96 | 
            -
             | 
|  | |
|  | |
| 97 | 
             
                        return CORP_TAG[n]
         | 
| 98 | 
             
                return []
         | 
| 99 | 
            -
             | 
|  | |
| 21 |  | 
| 22 |  | 
| 23 | 
             
            current_file_path = os.path.dirname(os.path.abspath(__file__))
         | 
| 24 | 
            +
            GOODS = pd.read_csv(
         | 
| 25 | 
            +
                os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0
         | 
| 26 | 
            +
            ).fillna(0)
         | 
| 27 | 
             
            GOODS["cid"] = GOODS["cid"].astype(str)
         | 
| 28 | 
             
            GOODS = GOODS.set_index(["cid"])
         | 
| 29 | 
            +
            CORP_TKS = json.load(
         | 
| 30 | 
            +
                open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r")
         | 
| 31 | 
            +
            )
         | 
| 32 | 
             
            GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r"))
         | 
| 33 | 
             
            CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r"))
         | 
| 34 |  | 
| 35 | 
            +
             | 
| 36 | 
             
            def baike(cid, default_v=0):
         | 
| 37 | 
             
                global GOODS
         | 
| 38 | 
             
                try:
         | 
|  | |
| 44 |  | 
| 45 | 
             
            def corpNorm(nm, add_region=True):
         | 
| 46 | 
             
                global CORP_TKS
         | 
| 47 | 
            +
                if not nm or isinstance(nm, str):
         | 
| 48 | 
            +
                    return ""
         | 
| 49 | 
             
                nm = rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(nm)).lower()
         | 
| 50 | 
             
                nm = re.sub(r"&", "&", nm)
         | 
| 51 | 
             
                nm = re.sub(r"[\(\)()\+'\"\t \*\\【】-]+", " ", nm)
         | 
| 52 | 
            +
                nm = re.sub(
         | 
| 53 | 
            +
                    r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE
         | 
| 54 | 
            +
                )
         | 
| 55 | 
            +
                nm = re.sub(
         | 
| 56 | 
            +
                    r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$",
         | 
| 57 | 
            +
                    "",
         | 
| 58 | 
            +
                    nm,
         | 
| 59 | 
            +
                    10000,
         | 
| 60 | 
            +
                    re.IGNORECASE,
         | 
| 61 | 
            +
                )
         | 
| 62 | 
            +
                if not nm or (len(nm) < 5 and not regions.isName(nm[0:2])):
         | 
| 63 | 
            +
                    return nm
         | 
| 64 |  | 
| 65 | 
             
                tks = rag_tokenizer.tokenize(nm).split()
         | 
| 66 | 
            +
                reg = [t for i, t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
         | 
| 67 | 
             
                nm = ""
         | 
| 68 | 
             
                for t in tks:
         | 
| 69 | 
            +
                    if regions.isName(t) or t in CORP_TKS:
         | 
| 70 | 
            +
                        continue
         | 
| 71 | 
            +
                    if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):
         | 
| 72 | 
            +
                        nm += " "
         | 
| 73 | 
             
                    nm += t
         | 
| 74 |  | 
| 75 | 
             
                r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip())
         | 
| 76 | 
            +
                if r:
         | 
| 77 | 
            +
                    nm = r.group(1)
         | 
| 78 | 
             
                r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip())
         | 
| 79 | 
            +
                if r:
         | 
| 80 | 
            +
                    nm = r.group(1)
         | 
| 81 | 
            +
                return nm.strip() + (("" if not reg else "(%s)" % reg[0]) if add_region else "")
         | 
| 82 |  | 
| 83 |  | 
| 84 | 
             
            def rmNoise(n):
         | 
|  | |
| 86 | 
             
                n = re.sub(r"[,. &()()]+", "", n)
         | 
| 87 | 
             
                return n
         | 
| 88 |  | 
| 89 | 
            +
             | 
| 90 | 
             
            GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
         | 
| 91 | 
            +
            for c, v in CORP_TAG.items():
         | 
| 92 | 
             
                cc = corpNorm(rmNoise(c), False)
         | 
| 93 | 
             
                if not cc:
         | 
| 94 | 
             
                    logging.debug(c)
         | 
| 95 | 
            +
            CORP_TAG = {corpNorm(rmNoise(c), False): v for c, v in CORP_TAG.items()}
         | 
| 96 | 
            +
             | 
| 97 |  | 
| 98 | 
             
            def is_good(nm):
         | 
| 99 | 
             
                global GOOD_CORP
         | 
| 100 | 
            +
                if nm.find("外派") >= 0:
         | 
| 101 | 
            +
                    return False
         | 
| 102 | 
             
                nm = rmNoise(nm)
         | 
| 103 | 
             
                nm = corpNorm(nm, False)
         | 
| 104 | 
             
                for n in GOOD_CORP:
         | 
| 105 | 
             
                    if re.match(r"[0-9a-zA-Z]+$", n):
         | 
| 106 | 
            +
                        if n == nm:
         | 
| 107 | 
            +
                            return True
         | 
| 108 | 
            +
                    elif nm.find(n) >= 0:
         | 
| 109 | 
            +
                        return True
         | 
| 110 | 
             
                return False
         | 
| 111 |  | 
| 112 | 
            +
             | 
| 113 | 
             
            def corp_tag(nm):
         | 
| 114 | 
             
                global CORP_TAG
         | 
| 115 | 
             
                nm = rmNoise(nm)
         | 
| 116 | 
             
                nm = corpNorm(nm, False)
         | 
| 117 | 
             
                for n in CORP_TAG.keys():
         | 
| 118 | 
             
                    if re.match(r"[0-9a-zA-Z., ]+$", n):
         | 
| 119 | 
            +
                        if n == nm:
         | 
| 120 | 
            +
                            return CORP_TAG[n]
         | 
| 121 | 
            +
                    elif nm.find(n) >= 0:
         | 
| 122 | 
            +
                        if len(n) < 3 and len(nm) / len(n) >= 2:
         | 
| 123 | 
            +
                            continue
         | 
| 124 | 
             
                        return CORP_TAG[n]
         | 
| 125 | 
             
                return []
         | 
|  | 
    	
        deepdoc/parser/resume/entities/degrees.py
    CHANGED
    
    | @@ -11,27 +11,31 @@ | |
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
| 13 |  | 
| 14 | 
            -
            TBL = { | 
| 15 | 
            -
            " | 
| 16 | 
            -
            " | 
| 17 | 
            -
            " | 
| 18 | 
            -
            " | 
| 19 | 
            -
            " | 
| 20 | 
            -
            " | 
| 21 | 
            -
            " | 
| 22 | 
            -
            " | 
| 23 | 
            -
            " | 
| 24 | 
            -
            " | 
| 25 | 
            -
            " | 
| 26 | 
            -
            " | 
| 27 | 
            -
            " | 
|  | |
| 28 | 
             
            }
         | 
| 29 |  | 
| 30 | 
            -
            TBL_ = {v:k for k,v in TBL.items()}
         | 
|  | |
| 31 |  | 
| 32 | 
             
            def get_name(id):
         | 
| 33 | 
             
                return TBL.get(str(id), "")
         | 
| 34 |  | 
|  | |
| 35 | 
             
            def get_id(nm):
         | 
| 36 | 
            -
                if not nm: | 
|  | |
| 37 | 
             
                return TBL_.get(nm.upper().strip(), "")
         | 
|  | |
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
| 13 |  | 
| 14 | 
            +
            TBL = {
         | 
| 15 | 
            +
                "94": "EMBA",
         | 
| 16 | 
            +
                "6": "MBA",
         | 
| 17 | 
            +
                "95": "MPA",
         | 
| 18 | 
            +
                "92": "专升本",
         | 
| 19 | 
            +
                "4": "专科",
         | 
| 20 | 
            +
                "90": "中专",
         | 
| 21 | 
            +
                "91": "中技",
         | 
| 22 | 
            +
                "86": "初中",
         | 
| 23 | 
            +
                "3": "博士",
         | 
| 24 | 
            +
                "10": "博士后",
         | 
| 25 | 
            +
                "1": "本科",
         | 
| 26 | 
            +
                "2": "硕士",
         | 
| 27 | 
            +
                "87": "职高",
         | 
| 28 | 
            +
                "89": "高中",
         | 
| 29 | 
             
            }
         | 
| 30 |  | 
| 31 | 
            +
            TBL_ = {v: k for k, v in TBL.items()}
         | 
| 32 | 
            +
             | 
| 33 |  | 
| 34 | 
             
            def get_name(id):
         | 
| 35 | 
             
                return TBL.get(str(id), "")
         | 
| 36 |  | 
| 37 | 
            +
             | 
| 38 | 
             
            def get_id(nm):
         | 
| 39 | 
            +
                if not nm:
         | 
| 40 | 
            +
                    return ""
         | 
| 41 | 
             
                return TBL_.get(nm.upper().strip(), "")
         | 
    	
        deepdoc/parser/resume/entities/industries.py
    CHANGED
    
    | @@ -11,694 +11,699 @@ | |
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
| 13 |  | 
| 14 | 
            -
            TBL = { | 
| 15 | 
            -
            " | 
| 16 | 
            -
            " | 
| 17 | 
            -
            " | 
| 18 | 
            -
            " | 
| 19 | 
            -
            " | 
| 20 | 
            -
            " | 
| 21 | 
            -
            " | 
| 22 | 
            -
            " | 
| 23 | 
            -
            " | 
| 24 | 
            -
            " | 
| 25 | 
            -
            " | 
| 26 | 
            -
            " | 
| 27 | 
            -
            " | 
| 28 | 
            -
            " | 
| 29 | 
            -
            " | 
| 30 | 
            -
            " | 
| 31 | 
            -
            " | 
| 32 | 
            -
            " | 
| 33 | 
            -
            " | 
| 34 | 
            -
            " | 
| 35 | 
            -
            " | 
| 36 | 
            -
            " | 
| 37 | 
            -
            " | 
| 38 | 
            -
            " | 
| 39 | 
            -
            " | 
| 40 | 
            -
            " | 
| 41 | 
            -
            " | 
| 42 | 
            -
            " | 
| 43 | 
            -
            " | 
| 44 | 
            -
            " | 
| 45 | 
            -
            " | 
| 46 | 
            -
            " | 
| 47 | 
            -
            " | 
| 48 | 
            -
            " | 
| 49 | 
            -
            " | 
| 50 | 
            -
            " | 
| 51 | 
            -
            " | 
| 52 | 
            -
            " | 
| 53 | 
            -
            " | 
| 54 | 
            -
            " | 
| 55 | 
            -
            " | 
| 56 | 
            -
            " | 
| 57 | 
            -
            " | 
| 58 | 
            -
            " | 
| 59 | 
            -
            " | 
| 60 | 
            -
            " | 
| 61 | 
            -
            " | 
| 62 | 
            -
            " | 
| 63 | 
            -
            " | 
| 64 | 
            -
            " | 
| 65 | 
            -
            " | 
| 66 | 
            -
            " | 
| 67 | 
            -
            " | 
| 68 | 
            -
            " | 
| 69 | 
            -
            " | 
| 70 | 
            -
            " | 
| 71 | 
            -
            " | 
| 72 | 
            -
            " | 
| 73 | 
            -
            " | 
| 74 | 
            -
            " | 
| 75 | 
            -
            " | 
| 76 | 
            -
            " | 
| 77 | 
            -
            " | 
| 78 | 
            -
            " | 
| 79 | 
            -
            " | 
| 80 | 
            -
            " | 
| 81 | 
            -
            " | 
| 82 | 
            -
            " | 
| 83 | 
            -
            " | 
| 84 | 
            -
            " | 
| 85 | 
            -
            " | 
| 86 | 
            -
            " | 
| 87 | 
            -
            " | 
| 88 | 
            -
            " | 
| 89 | 
            -
            " | 
| 90 | 
            -
            " | 
| 91 | 
            -
            " | 
| 92 | 
            -
            " | 
| 93 | 
            -
            " | 
| 94 | 
            -
            " | 
| 95 | 
            -
            " | 
| 96 | 
            -
            " | 
| 97 | 
            -
            " | 
| 98 | 
            -
            " | 
| 99 | 
            -
            " | 
| 100 | 
            -
            " | 
| 101 | 
            -
            " | 
| 102 | 
            -
            " | 
| 103 | 
            -
            " | 
| 104 | 
            -
            " | 
| 105 | 
            -
            " | 
| 106 | 
            -
            " | 
| 107 | 
            -
            " | 
| 108 | 
            -
            " | 
| 109 | 
            -
            " | 
| 110 | 
            -
            " | 
| 111 | 
            -
            " | 
| 112 | 
            -
            " | 
| 113 | 
            -
            " | 
| 114 | 
            -
            " | 
| 115 | 
            -
            " | 
| 116 | 
            -
            " | 
| 117 | 
            -
            " | 
| 118 | 
            -
            " | 
| 119 | 
            -
            " | 
| 120 | 
            -
            " | 
| 121 | 
            -
            " | 
| 122 | 
            -
            " | 
| 123 | 
            -
            " | 
| 124 | 
            -
            " | 
| 125 | 
            -
            " | 
| 126 | 
            -
            " | 
| 127 | 
            -
            " | 
| 128 | 
            -
            " | 
| 129 | 
            -
            " | 
| 130 | 
            -
            " | 
| 131 | 
            -
            " | 
| 132 | 
            -
            " | 
| 133 | 
            -
            " | 
| 134 | 
            -
            " | 
| 135 | 
            -
            " | 
| 136 | 
            -
            " | 
| 137 | 
            -
            " | 
| 138 | 
            -
            " | 
| 139 | 
            -
            " | 
| 140 | 
            -
            " | 
| 141 | 
            -
            " | 
| 142 | 
            -
            " | 
| 143 | 
            -
            " | 
| 144 | 
            -
            " | 
| 145 | 
            -
            " | 
| 146 | 
            -
            " | 
| 147 | 
            -
            " | 
| 148 | 
            -
            " | 
| 149 | 
            -
            " | 
| 150 | 
            -
            " | 
| 151 | 
            -
            " | 
| 152 | 
            -
            " | 
| 153 | 
            -
            " | 
| 154 | 
            -
            " | 
| 155 | 
            -
            " | 
| 156 | 
            -
            " | 
| 157 | 
            -
            " | 
| 158 | 
            -
            " | 
| 159 | 
            -
            " | 
| 160 | 
            -
            " | 
| 161 | 
            -
            " | 
| 162 | 
            -
            " | 
| 163 | 
            -
            " | 
| 164 | 
            -
            " | 
| 165 | 
            -
            " | 
| 166 | 
            -
            " | 
| 167 | 
            -
            " | 
| 168 | 
            -
            " | 
| 169 | 
            -
            " | 
| 170 | 
            -
            " | 
| 171 | 
            -
            " | 
| 172 | 
            -
            " | 
| 173 | 
            -
            " | 
| 174 | 
            -
            " | 
| 175 | 
            -
            " | 
| 176 | 
            -
            " | 
| 177 | 
            -
            " | 
| 178 | 
            -
            " | 
| 179 | 
            -
            " | 
| 180 | 
            -
            " | 
| 181 | 
            -
            " | 
| 182 | 
            -
            " | 
| 183 | 
            -
            " | 
| 184 | 
            -
            " | 
| 185 | 
            -
            " | 
| 186 | 
            -
            " | 
| 187 | 
            -
            " | 
| 188 | 
            -
            " | 
| 189 | 
            -
            " | 
| 190 | 
            -
            " | 
| 191 | 
            -
            " | 
| 192 | 
            -
            " | 
| 193 | 
            -
            " | 
| 194 | 
            -
            " | 
| 195 | 
            -
            " | 
| 196 | 
            -
            " | 
| 197 | 
            -
            " | 
| 198 | 
            -
            " | 
| 199 | 
            -
            " | 
| 200 | 
            -
            " | 
| 201 | 
            -
            " | 
| 202 | 
            -
            " | 
| 203 | 
            -
            " | 
| 204 | 
            -
            " | 
| 205 | 
            -
            " | 
| 206 | 
            -
            " | 
| 207 | 
            -
            " | 
| 208 | 
            -
            " | 
| 209 | 
            -
            " | 
| 210 | 
            -
            " | 
| 211 | 
            -
            " | 
| 212 | 
            -
            " | 
| 213 | 
            -
            " | 
| 214 | 
            -
            " | 
| 215 | 
            -
            " | 
| 216 | 
            -
            " | 
| 217 | 
            -
            " | 
| 218 | 
            -
            " | 
| 219 | 
            -
            " | 
| 220 | 
            -
            " | 
| 221 | 
            -
            " | 
| 222 | 
            -
            " | 
| 223 | 
            -
            " | 
| 224 | 
            -
            " | 
| 225 | 
            -
            " | 
| 226 | 
            -
            " | 
| 227 | 
            -
            " | 
| 228 | 
            -
            " | 
| 229 | 
            -
            " | 
| 230 | 
            -
            " | 
| 231 | 
            -
            " | 
| 232 | 
            -
            " | 
| 233 | 
            -
            " | 
| 234 | 
            -
            " | 
| 235 | 
            -
            " | 
| 236 | 
            -
            " | 
| 237 | 
            -
            " | 
| 238 | 
            -
            " | 
| 239 | 
            -
            " | 
| 240 | 
            -
            " | 
| 241 | 
            -
            " | 
| 242 | 
            -
            " | 
| 243 | 
            -
            " | 
| 244 | 
            -
            " | 
| 245 | 
            -
            " | 
| 246 | 
            -
            " | 
| 247 | 
            -
            " | 
| 248 | 
            -
            " | 
| 249 | 
            -
            " | 
| 250 | 
            -
            " | 
| 251 | 
            -
            " | 
| 252 | 
            -
            " | 
| 253 | 
            -
            " | 
| 254 | 
            -
            " | 
| 255 | 
            -
            " | 
| 256 | 
            -
            " | 
| 257 | 
            -
            " | 
| 258 | 
            -
            " | 
| 259 | 
            -
            " | 
| 260 | 
            -
            " | 
| 261 | 
            -
            " | 
| 262 | 
            -
            " | 
| 263 | 
            -
            " | 
| 264 | 
            -
            " | 
| 265 | 
            -
            " | 
| 266 | 
            -
            " | 
| 267 | 
            -
            " | 
| 268 | 
            -
            " | 
| 269 | 
            -
            " | 
| 270 | 
            -
            " | 
| 271 | 
            -
            " | 
| 272 | 
            -
            " | 
| 273 | 
            -
            " | 
| 274 | 
            -
            " | 
| 275 | 
            -
            " | 
| 276 | 
            -
            " | 
| 277 | 
            -
            " | 
| 278 | 
            -
            " | 
| 279 | 
            -
            " | 
| 280 | 
            -
            " | 
| 281 | 
            -
            " | 
| 282 | 
            -
            " | 
| 283 | 
            -
            " | 
| 284 | 
            -
            " | 
| 285 | 
            -
            " | 
| 286 | 
            -
            " | 
| 287 | 
            -
            " | 
| 288 | 
            -
            " | 
| 289 | 
            -
            " | 
| 290 | 
            -
            " | 
| 291 | 
            -
            " | 
| 292 | 
            -
            " | 
| 293 | 
            -
            " | 
| 294 | 
            -
            " | 
| 295 | 
            -
            " | 
| 296 | 
            -
            " | 
| 297 | 
            -
            " | 
| 298 | 
            -
            " | 
| 299 | 
            -
            " | 
| 300 | 
            -
            " | 
| 301 | 
            -
            " | 
| 302 | 
            -
            " | 
| 303 | 
            -
            " | 
| 304 | 
            -
            " | 
| 305 | 
            -
            " | 
| 306 | 
            -
            " | 
| 307 | 
            -
            " | 
| 308 | 
            -
            " | 
| 309 | 
            -
            " | 
| 310 | 
            -
            " | 
| 311 | 
            -
            " | 
| 312 | 
            -
            " | 
| 313 | 
            -
            " | 
| 314 | 
            -
            " | 
| 315 | 
            -
            " | 
| 316 | 
            -
            " | 
| 317 | 
            -
            " | 
| 318 | 
            -
            " | 
| 319 | 
            -
            " | 
| 320 | 
            -
            " | 
| 321 | 
            -
            " | 
| 322 | 
            -
            " | 
| 323 | 
            -
            " | 
| 324 | 
            -
            " | 
| 325 | 
            -
            " | 
| 326 | 
            -
            " | 
| 327 | 
            -
            " | 
| 328 | 
            -
            " | 
| 329 | 
            -
            " | 
| 330 | 
            -
            " | 
| 331 | 
            -
            " | 
| 332 | 
            -
            " | 
| 333 | 
            -
            " | 
| 334 | 
            -
            " | 
| 335 | 
            -
            " | 
| 336 | 
            -
            " | 
| 337 | 
            -
            " | 
| 338 | 
            -
            " | 
| 339 | 
            -
            " | 
| 340 | 
            -
            " | 
| 341 | 
            -
            " | 
| 342 | 
            -
            " | 
| 343 | 
            -
            " | 
| 344 | 
            -
            " | 
| 345 | 
            -
            " | 
| 346 | 
            -
            " | 
| 347 | 
            -
            " | 
| 348 | 
            -
            " | 
| 349 | 
            -
            " | 
| 350 | 
            -
            " | 
| 351 | 
            -
            " | 
| 352 | 
            -
            " | 
| 353 | 
            -
            " | 
| 354 | 
            -
            " | 
| 355 | 
            -
            " | 
| 356 | 
            -
            " | 
| 357 | 
            -
            " | 
| 358 | 
            -
            " | 
| 359 | 
            -
            " | 
| 360 | 
            -
            " | 
| 361 | 
            -
            " | 
| 362 | 
            -
            " | 
| 363 | 
            -
            " | 
| 364 | 
            -
            " | 
| 365 | 
            -
            " | 
| 366 | 
            -
            " | 
| 367 | 
            -
            " | 
| 368 | 
            -
            " | 
| 369 | 
            -
            " | 
| 370 | 
            -
            " | 
| 371 | 
            -
            " | 
| 372 | 
            -
            " | 
| 373 | 
            -
            " | 
| 374 | 
            -
            " | 
| 375 | 
            -
            " | 
| 376 | 
            -
            " | 
| 377 | 
            -
            " | 
| 378 | 
            -
            " | 
| 379 | 
            -
            " | 
| 380 | 
            -
            " | 
| 381 | 
            -
            " | 
| 382 | 
            -
            " | 
| 383 | 
            -
            " | 
| 384 | 
            -
            " | 
| 385 | 
            -
            " | 
| 386 | 
            -
            " | 
| 387 | 
            -
            " | 
| 388 | 
            -
            " | 
| 389 | 
            -
            " | 
| 390 | 
            -
            " | 
| 391 | 
            -
            " | 
| 392 | 
            -
            " | 
| 393 | 
            -
            " | 
| 394 | 
            -
            " | 
| 395 | 
            -
            " | 
| 396 | 
            -
            " | 
| 397 | 
            -
            " | 
| 398 | 
            -
            " | 
| 399 | 
            -
            " | 
| 400 | 
            -
            " | 
| 401 | 
            -
            " | 
| 402 | 
            -
            " | 
| 403 | 
            -
            " | 
| 404 | 
            -
            " | 
| 405 | 
            -
            " | 
| 406 | 
            -
            " | 
| 407 | 
            -
            " | 
| 408 | 
            -
            " | 
| 409 | 
            -
            " | 
| 410 | 
            -
            " | 
| 411 | 
            -
            " | 
| 412 | 
            -
            " | 
| 413 | 
            -
            " | 
| 414 | 
            -
            " | 
| 415 | 
            -
            " | 
| 416 | 
            -
            " | 
| 417 | 
            -
            " | 
| 418 | 
            -
            " | 
| 419 | 
            -
            " | 
| 420 | 
            -
            " | 
| 421 | 
            -
            " | 
| 422 | 
            -
            " | 
| 423 | 
            -
            " | 
| 424 | 
            -
            " | 
| 425 | 
            -
            " | 
| 426 | 
            -
            " | 
| 427 | 
            -
            " | 
| 428 | 
            -
            " | 
| 429 | 
            -
            " | 
| 430 | 
            -
            " | 
| 431 | 
            -
            " | 
| 432 | 
            -
            " | 
| 433 | 
            -
            " | 
| 434 | 
            -
            " | 
| 435 | 
            -
            " | 
| 436 | 
            -
            " | 
| 437 | 
            -
            " | 
| 438 | 
            -
            " | 
| 439 | 
            -
            " | 
| 440 | 
            -
            " | 
| 441 | 
            -
            " | 
| 442 | 
            -
            " | 
| 443 | 
            -
            " | 
| 444 | 
            -
            " | 
| 445 | 
            -
            " | 
| 446 | 
            -
            " | 
| 447 | 
            -
            " | 
| 448 | 
            -
            " | 
| 449 | 
            -
            " | 
| 450 | 
            -
            " | 
| 451 | 
            -
            " | 
| 452 | 
            -
            " | 
| 453 | 
            -
            " | 
| 454 | 
            -
            " | 
| 455 | 
            -
            " | 
| 456 | 
            -
            " | 
| 457 | 
            -
            " | 
| 458 | 
            -
            " | 
| 459 | 
            -
            " | 
| 460 | 
            -
            " | 
| 461 | 
            -
            " | 
| 462 | 
            -
            " | 
| 463 | 
            -
            " | 
| 464 | 
            -
            " | 
| 465 | 
            -
            " | 
| 466 | 
            -
            " | 
| 467 | 
            -
            " | 
| 468 | 
            -
            " | 
| 469 | 
            -
            " | 
| 470 | 
            -
            " | 
| 471 | 
            -
            " | 
| 472 | 
            -
            " | 
| 473 | 
            -
            " | 
| 474 | 
            -
            " | 
| 475 | 
            -
            " | 
| 476 | 
            -
            " | 
| 477 | 
            -
            " | 
| 478 | 
            -
            " | 
| 479 | 
            -
            " | 
| 480 | 
            -
            " | 
| 481 | 
            -
            " | 
| 482 | 
            -
            " | 
| 483 | 
            -
            " | 
| 484 | 
            -
            " | 
| 485 | 
            -
            " | 
| 486 | 
            -
            " | 
| 487 | 
            -
            " | 
| 488 | 
            -
            " | 
| 489 | 
            -
            " | 
| 490 | 
            -
            " | 
| 491 | 
            -
            " | 
| 492 | 
            -
            " | 
| 493 | 
            -
            " | 
| 494 | 
            -
            " | 
| 495 | 
            -
            " | 
| 496 | 
            -
            " | 
| 497 | 
            -
            " | 
| 498 | 
            -
            " | 
| 499 | 
            -
            " | 
| 500 | 
            -
            " | 
| 501 | 
            -
            " | 
| 502 | 
            -
            " | 
| 503 | 
            -
            " | 
| 504 | 
            -
            " | 
| 505 | 
            -
            " | 
| 506 | 
            -
            " | 
| 507 | 
            -
            " | 
| 508 | 
            -
            " | 
| 509 | 
            -
            " | 
| 510 | 
            -
            " | 
| 511 | 
            -
            " | 
| 512 | 
            -
            " | 
| 513 | 
            -
            " | 
| 514 | 
            -
            " | 
| 515 | 
            -
            " | 
| 516 | 
            -
            " | 
| 517 | 
            -
            " | 
| 518 | 
            -
            " | 
| 519 | 
            -
            " | 
| 520 | 
            -
            " | 
| 521 | 
            -
            " | 
| 522 | 
            -
            " | 
| 523 | 
            -
            " | 
| 524 | 
            -
            " | 
| 525 | 
            -
            " | 
| 526 | 
            -
            " | 
| 527 | 
            -
            " | 
| 528 | 
            -
            " | 
| 529 | 
            -
            " | 
| 530 | 
            -
            " | 
| 531 | 
            -
            " | 
| 532 | 
            -
            " | 
| 533 | 
            -
            " | 
| 534 | 
            -
            " | 
| 535 | 
            -
            " | 
| 536 | 
            -
            " | 
| 537 | 
            -
            " | 
| 538 | 
            -
            " | 
| 539 | 
            -
            " | 
| 540 | 
            -
            " | 
| 541 | 
            -
            " | 
| 542 | 
            -
            " | 
| 543 | 
            -
            " | 
| 544 | 
            -
            " | 
| 545 | 
            -
            " | 
| 546 | 
            -
            " | 
| 547 | 
            -
            " | 
| 548 | 
            -
            " | 
| 549 | 
            -
            " | 
| 550 | 
            -
            " | 
| 551 | 
            -
            " | 
| 552 | 
            -
            " | 
| 553 | 
            -
            " | 
| 554 | 
            -
            " | 
| 555 | 
            -
            " | 
| 556 | 
            -
            " | 
| 557 | 
            -
            " | 
| 558 | 
            -
            " | 
| 559 | 
            -
            " | 
| 560 | 
            -
            " | 
| 561 | 
            -
            " | 
| 562 | 
            -
            " | 
| 563 | 
            -
            " | 
| 564 | 
            -
            " | 
| 565 | 
            -
            " | 
| 566 | 
            -
            " | 
| 567 | 
            -
            " | 
| 568 | 
            -
            " | 
| 569 | 
            -
            " | 
| 570 | 
            -
            " | 
| 571 | 
            -
            " | 
| 572 | 
            -
            " | 
| 573 | 
            -
            " | 
| 574 | 
            -
            " | 
| 575 | 
            -
            " | 
| 576 | 
            -
            " | 
| 577 | 
            -
            " | 
| 578 | 
            -
            " | 
| 579 | 
            -
            " | 
| 580 | 
            -
            " | 
| 581 | 
            -
            " | 
| 582 | 
            -
            " | 
| 583 | 
            -
            " | 
| 584 | 
            -
            " | 
| 585 | 
            -
            " | 
| 586 | 
            -
            " | 
| 587 | 
            -
            " | 
| 588 | 
            -
            " | 
| 589 | 
            -
            " | 
| 590 | 
            -
            " | 
| 591 | 
            -
            " | 
| 592 | 
            -
            " | 
| 593 | 
            -
            " | 
| 594 | 
            -
            " | 
| 595 | 
            -
            " | 
| 596 | 
            -
            " | 
| 597 | 
            -
            " | 
| 598 | 
            -
            " | 
| 599 | 
            -
            " | 
| 600 | 
            -
            " | 
| 601 | 
            -
            " | 
| 602 | 
            -
            " | 
| 603 | 
            -
            " | 
| 604 | 
            -
            " | 
| 605 | 
            -
            " | 
| 606 | 
            -
            " | 
| 607 | 
            -
            " | 
| 608 | 
            -
            " | 
| 609 | 
            -
            " | 
| 610 | 
            -
            " | 
| 611 | 
            -
            " | 
| 612 | 
            -
            " | 
| 613 | 
            -
            " | 
| 614 | 
            -
            " | 
| 615 | 
            -
            " | 
| 616 | 
            -
            " | 
| 617 | 
            -
            " | 
| 618 | 
            -
            " | 
| 619 | 
            -
            " | 
| 620 | 
            -
            " | 
| 621 | 
            -
            " | 
| 622 | 
            -
            " | 
| 623 | 
            -
            " | 
| 624 | 
            -
            " | 
| 625 | 
            -
            " | 
| 626 | 
            -
            " | 
| 627 | 
            -
            " | 
| 628 | 
            -
            " | 
| 629 | 
            -
            " | 
| 630 | 
            -
            " | 
| 631 | 
            -
            " | 
| 632 | 
            -
            " | 
| 633 | 
            -
            " | 
| 634 | 
            -
            " | 
| 635 | 
            -
            " | 
| 636 | 
            -
            " | 
| 637 | 
            -
            " | 
| 638 | 
            -
            " | 
| 639 | 
            -
            " | 
| 640 | 
            -
            " | 
| 641 | 
            -
            " | 
| 642 | 
            -
            " | 
| 643 | 
            -
            " | 
| 644 | 
            -
            " | 
| 645 | 
            -
            " | 
| 646 | 
            -
            " | 
| 647 | 
            -
            " | 
| 648 | 
            -
            " | 
| 649 | 
            -
            " | 
| 650 | 
            -
            " | 
| 651 | 
            -
            " | 
| 652 | 
            -
            " | 
| 653 | 
            -
            " | 
| 654 | 
            -
            " | 
| 655 | 
            -
            " | 
| 656 | 
            -
            " | 
| 657 | 
            -
            " | 
| 658 | 
            -
            " | 
| 659 | 
            -
            " | 
| 660 | 
            -
            " | 
| 661 | 
            -
            " | 
| 662 | 
            -
            " | 
| 663 | 
            -
            " | 
| 664 | 
            -
            " | 
| 665 | 
            -
            " | 
| 666 | 
            -
            " | 
| 667 | 
            -
            " | 
| 668 | 
            -
            " | 
| 669 | 
            -
            " | 
| 670 | 
            -
            " | 
| 671 | 
            -
            " | 
| 672 | 
            -
            " | 
| 673 | 
            -
            " | 
| 674 | 
            -
            " | 
| 675 | 
            -
            " | 
| 676 | 
            -
            " | 
| 677 | 
            -
            " | 
| 678 | 
            -
            " | 
| 679 | 
            -
            " | 
| 680 | 
            -
            " | 
| 681 | 
            -
            " | 
| 682 | 
            -
            " | 
| 683 | 
            -
            " | 
| 684 | 
            -
            " | 
| 685 | 
            -
            " | 
| 686 | 
            -
            " | 
| 687 | 
            -
            " | 
| 688 | 
            -
            " | 
| 689 | 
            -
            " | 
| 690 | 
            -
            " | 
|  | |
| 691 | 
             
            }
         | 
| 692 |  | 
|  | |
| 693 | 
             
            def get_names(id):
         | 
| 694 | 
             
                id = str(id)
         | 
| 695 | 
             
                nms = []
         | 
| 696 | 
             
                d = TBL.get(id)
         | 
| 697 | 
            -
                if not d: | 
|  | |
| 698 | 
             
                nms.append(d["name"])
         | 
| 699 | 
             
                p = get_names(d["parent"])
         | 
| 700 | 
            -
                if p: | 
|  | |
| 701 | 
             
                return nms
         | 
| 702 |  | 
|  | |
| 703 | 
             
            if __name__ == "__main__":
         | 
| 704 | 
             
                print(get_names("1119"))
         | 
|  | |
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
| 13 |  | 
| 14 | 
            +
            TBL = {
         | 
| 15 | 
            +
                "1": {"name": "IT/通信/电子", "parent": "0"},
         | 
| 16 | 
            +
                "2": {"name": "互联网", "parent": "0"},
         | 
| 17 | 
            +
                "3": {"name": "电子商务", "parent": "2"},
         | 
| 18 | 
            +
                "4": {"name": "互联网金融", "parent": "2"},
         | 
| 19 | 
            +
                "5": {"name": "网络游戏", "parent": "2"},
         | 
| 20 | 
            +
                "6": {"name": "社交网络平台", "parent": "2"},
         | 
| 21 | 
            +
                "7": {"name": "视频音乐", "parent": "2"},
         | 
| 22 | 
            +
                "9": {"name": "安全", "parent": "2"},
         | 
| 23 | 
            +
                "10": {"name": "云计算", "parent": "2"},
         | 
| 24 | 
            +
                "12": {"name": "工具类客户端应用", "parent": "2"},
         | 
| 25 | 
            +
                "13": {"name": "互联网广告", "parent": "2"},
         | 
| 26 | 
            +
                "14": {"name": "企业互联网服务", "parent": "2"},
         | 
| 27 | 
            +
                "16": {"name": "在线教育", "parent": "2"},
         | 
| 28 | 
            +
                "17": {"name": "在线医疗", "parent": "2"},
         | 
| 29 | 
            +
                "19": {"name": "B2B", "parent": "3"},
         | 
| 30 | 
            +
                "20": {"name": "B2C", "parent": "3"},
         | 
| 31 | 
            +
                "21": {"name": "C2C", "parent": "3"},
         | 
| 32 | 
            +
                "22": {"name": "生活信息本地化", "parent": "3"},
         | 
| 33 | 
            +
                "23": {"name": "在线旅游", "parent": "2"},
         | 
| 34 | 
            +
                "24": {"name": "第三方支付", "parent": "4"},
         | 
| 35 | 
            +
                "26": {"name": "客户端游戏", "parent": "5"},
         | 
| 36 | 
            +
                "27": {"name": "网页游戏", "parent": "5"},
         | 
| 37 | 
            +
                "28": {"name": "手机游戏", "parent": "5"},
         | 
| 38 | 
            +
                "29": {"name": "微博", "parent": "6"},
         | 
| 39 | 
            +
                "30": {"name": "社交网站", "parent": "6"},
         | 
| 40 | 
            +
                "31": {"name": "在线视频", "parent": "7"},
         | 
| 41 | 
            +
                "32": {"name": "在线音乐", "parent": "7"},
         | 
| 42 | 
            +
                "35": {"name": "企业安全", "parent": "9"},
         | 
| 43 | 
            +
                "36": {"name": "个人安全", "parent": "9"},
         | 
| 44 | 
            +
                "37": {"name": "企业级云服务", "parent": "10"},
         | 
| 45 | 
            +
                "38": {"name": "个人级云服务", "parent": "10"},
         | 
| 46 | 
            +
                "43": {"name": "输入法", "parent": "12"},
         | 
| 47 | 
            +
                "44": {"name": "浏览器", "parent": "12"},
         | 
| 48 | 
            +
                "45": {"name": "词典", "parent": "12"},
         | 
| 49 | 
            +
                "46": {"name": "播放器", "parent": "12"},
         | 
| 50 | 
            +
                "47": {"name": "下载器", "parent": "12"},
         | 
| 51 | 
            +
                "48": {"name": "IM", "parent": "12"},
         | 
| 52 | 
            +
                "49": {"name": "广告服务", "parent": "13"},
         | 
| 53 | 
            +
                "50": {"name": "第三方广告网络平台", "parent": "13"},
         | 
| 54 | 
            +
                "51": {"name": "媒体代理", "parent": "13"},
         | 
| 55 | 
            +
                "52": {"name": "创意代理", "parent": "13"},
         | 
| 56 | 
            +
                "53": {"name": "IT-综合", "parent": "1"},
         | 
| 57 | 
            +
                "71": {"name": "团购", "parent": "3"},
         | 
| 58 | 
            +
                "72": {"name": "地图", "parent": "2"},
         | 
| 59 | 
            +
                "73": {"name": "数据存储", "parent": "2"},
         | 
| 60 | 
            +
                "414": {"name": "计算机软件", "parent": "1"},
         | 
| 61 | 
            +
                "415": {"name": "计算机硬件", "parent": "1"},
         | 
| 62 | 
            +
                "416": {"name": "计算机服务(系统、数据服务、维修)", "parent": "1"},
         | 
| 63 | 
            +
                "417": {"name": "通信/电信/网络设备", "parent": "1"},
         | 
| 64 | 
            +
                "418": {"name": "通信/电信运营、增值服务", "parent": "1"},
         | 
| 65 | 
            +
                "419": {"name": "电子技术/半导体/集成电路", "parent": "1"},
         | 
| 66 | 
            +
                "472": {"name": "P2P网贷", "parent": "4"},
         | 
| 67 | 
            +
                "473": {"name": "互联网理财", "parent": "4"},
         | 
| 68 | 
            +
                "474": {"name": "婚恋", "parent": "6"},
         | 
| 69 | 
            +
                "476": {"name": "虚拟化", "parent": "10"},
         | 
| 70 | 
            +
                "477": {"name": "邮箱", "parent": "12"},
         | 
| 71 | 
            +
                "478": {"name": "商业智能", "parent": "14"},
         | 
| 72 | 
            +
                "479": {"name": "企业建站", "parent": "14"},
         | 
| 73 | 
            +
                "480": {"name": "安防", "parent": "14"},
         | 
| 74 | 
            +
                "481": {"name": "网络营销", "parent": "2"},
         | 
| 75 | 
            +
                "487": {"name": "智能终端", "parent": "2"},
         | 
| 76 | 
            +
                "488": {"name": "移动互联网", "parent": "2"},
         | 
| 77 | 
            +
                "489": {"name": "数字城市", "parent": "2"},
         | 
| 78 | 
            +
                "490": {"name": "大数据", "parent": "2"},
         | 
| 79 | 
            +
                "491": {"name": "互联网人力资源", "parent": "2"},
         | 
| 80 | 
            +
                "492": {"name": "舆情监控", "parent": "2"},
         | 
| 81 | 
            +
                "493": {"name": "移动营销", "parent": "481"},
         | 
| 82 | 
            +
                "494": {"name": "微博营销", "parent": "481"},
         | 
| 83 | 
            +
                "495": {"name": "精准营销", "parent": "481"},
         | 
| 84 | 
            +
                "496": {"name": "海外营销", "parent": "481"},
         | 
| 85 | 
            +
                "497": {"name": "微信营销", "parent": "481"},
         | 
| 86 | 
            +
                "498": {"name": "智能手机", "parent": "487"},
         | 
| 87 | 
            +
                "499": {"name": "可穿戴设备", "parent": "487"},
         | 
| 88 | 
            +
                "500": {"name": "智能电视", "parent": "487"},
         | 
| 89 | 
            +
                "501": {"name": "WAP", "parent": "488"},
         | 
| 90 | 
            +
                "502": {"name": "物联网", "parent": "489"},
         | 
| 91 | 
            +
                "503": {"name": "O2O", "parent": "489"},
         | 
| 92 | 
            +
                "504": {"name": "数字出版", "parent": "489"},
         | 
| 93 | 
            +
                "505": {"name": "搜索", "parent": "2"},
         | 
| 94 | 
            +
                "506": {"name": "垂直搜索", "parent": "505"},
         | 
| 95 | 
            +
                "507": {"name": "无线搜索", "parent": "505"},
         | 
| 96 | 
            +
                "508": {"name": "网页搜索", "parent": "505"},
         | 
| 97 | 
            +
                "509": {"name": "网址导航", "parent": "2"},
         | 
| 98 | 
            +
                "510": {"name": "门户", "parent": "2"},
         | 
| 99 | 
            +
                "511": {"name": "网络文学", "parent": "2"},
         | 
| 100 | 
            +
                "512": {"name": "自媒体", "parent": "2"},
         | 
| 101 | 
            +
                "513": {"name": "金融", "parent": "0"},
         | 
| 102 | 
            +
                "514": {"name": "建筑与房地产", "parent": "0"},
         | 
| 103 | 
            +
                "515": {"name": "专业服务", "parent": "0"},
         | 
| 104 | 
            +
                "516": {"name": "教育培训", "parent": "0"},
         | 
| 105 | 
            +
                "517": {"name": "文化传媒", "parent": "0"},
         | 
| 106 | 
            +
                "518": {"name": "消费品", "parent": "0"},
         | 
| 107 | 
            +
                "519": {"name": "工业", "parent": "0"},
         | 
| 108 | 
            +
                "520": {"name": "交通物流", "parent": "0"},
         | 
| 109 | 
            +
                "521": {"name": "贸易", "parent": "0"},
         | 
| 110 | 
            +
                "522": {"name": "医药", "parent": "0"},
         | 
| 111 | 
            +
                "523": {"name": "医疗器械", "parent": "522"},
         | 
| 112 | 
            +
                "524": {"name": "保健品", "parent": "518"},
         | 
| 113 | 
            +
                "525": {"name": "服务业", "parent": "0"},
         | 
| 114 | 
            +
                "526": {"name": "能源/矿产/环保", "parent": "0"},
         | 
| 115 | 
            +
                "527": {"name": "化工", "parent": "0"},
         | 
| 116 | 
            +
                "528": {"name": "政府", "parent": "0"},
         | 
| 117 | 
            +
                "529": {"name": "公共事业", "parent": "0"},
         | 
| 118 | 
            +
                "530": {"name": "非盈利机构", "parent": "0"},
         | 
| 119 | 
            +
                "531": {"name": "农业", "parent": "1131"},
         | 
| 120 | 
            +
                "532": {"name": "林业", "parent": "1131"},
         | 
| 121 | 
            +
                "533": {"name": "畜牧业", "parent": "1131"},
         | 
| 122 | 
            +
                "534": {"name": "渔业", "parent": "1131"},
         | 
| 123 | 
            +
                "535": {"name": "学术科研", "parent": "0"},
         | 
| 124 | 
            +
                "536": {"name": "零售", "parent": "0"},
         | 
| 125 | 
            +
                "537": {"name": "银行", "parent": "513"},
         | 
| 126 | 
            +
                "538": {"name": "保险", "parent": "513"},
         | 
| 127 | 
            +
                "539": {"name": "证券", "parent": "513"},
         | 
| 128 | 
            +
                "540": {"name": "基金", "parent": "513"},
         | 
| 129 | 
            +
                "541": {"name": "信托", "parent": "513"},
         | 
| 130 | 
            +
                "542": {"name": "担保", "parent": "513"},
         | 
| 131 | 
            +
                "543": {"name": "典当", "parent": "513"},
         | 
| 132 | 
            +
                "544": {"name": "拍卖", "parent": "513"},
         | 
| 133 | 
            +
                "545": {"name": "投资/融资", "parent": "513"},
         | 
| 134 | 
            +
                "546": {"name": "期货", "parent": "513"},
         | 
| 135 | 
            +
                "547": {"name": "房地产开发", "parent": "514"},
         | 
| 136 | 
            +
                "548": {"name": "工程施工", "parent": "514"},
         | 
| 137 | 
            +
                "549": {"name": "建筑设计", "parent": "514"},
         | 
| 138 | 
            +
                "550": {"name": "房地产代理", "parent": "514"},
         | 
| 139 | 
            +
                "551": {"name": "物业管理", "parent": "514"},
         | 
| 140 | 
            +
                "552": {"name": "室内设计", "parent": "514"},
         | 
| 141 | 
            +
                "553": {"name": "装修装潢", "parent": "514"},
         | 
| 142 | 
            +
                "554": {"name": "市政工程", "parent": "514"},
         | 
| 143 | 
            +
                "555": {"name": "工程造价", "parent": "514"},
         | 
| 144 | 
            +
                "556": {"name": "工程监理", "parent": "514"},
         | 
| 145 | 
            +
                "557": {"name": "环境工程", "parent": "514"},
         | 
| 146 | 
            +
                "558": {"name": "园林景观", "parent": "514"},
         | 
| 147 | 
            +
                "559": {"name": "法律", "parent": "515"},
         | 
| 148 | 
            +
                "560": {"name": "人力资源", "parent": "515"},
         | 
| 149 | 
            +
                "561": {"name": "会计", "parent": "1125"},
         | 
| 150 | 
            +
                "562": {"name": "审计", "parent": "515"},
         | 
| 151 | 
            +
                "563": {"name": "检测认证", "parent": "515"},
         | 
| 152 | 
            +
                "565": {"name": "翻译", "parent": "515"},
         | 
| 153 | 
            +
                "566": {"name": "中介", "parent": "515"},
         | 
| 154 | 
            +
                "567": {"name": "咨询", "parent": "515"},
         | 
| 155 | 
            +
                "568": {"name": "外包服务", "parent": "515"},
         | 
| 156 | 
            +
                "569": {"name": "家教", "parent": "516"},
         | 
| 157 | 
            +
                "570": {"name": "早教", "parent": "516"},
         | 
| 158 | 
            +
                "571": {"name": "职业技能培训", "parent": "516"},
         | 
| 159 | 
            +
                "572": {"name": "外语培训", "parent": "516"},
         | 
| 160 | 
            +
                "573": {"name": "设计培训", "parent": "516"},
         | 
| 161 | 
            +
                "574": {"name": "IT培训", "parent": "516"},
         | 
| 162 | 
            +
                "575": {"name": "文艺体育培训", "parent": "516"},
         | 
| 163 | 
            +
                "576": {"name": "学历教育", "parent": "516"},
         | 
| 164 | 
            +
                "577": {"name": "管理培训", "parent": "516"},
         | 
| 165 | 
            +
                "578": {"name": "民办基础教育", "parent": "516"},
         | 
| 166 | 
            +
                "579": {"name": "广告", "parent": "517"},
         | 
| 167 | 
            +
                "580": {"name": "媒体", "parent": "517"},
         | 
| 168 | 
            +
                "581": {"name": "会展", "parent": "517"},
         | 
| 169 | 
            +
                "582": {"name": "公关", "parent": "517"},
         | 
| 170 | 
            +
                "583": {"name": "影视", "parent": "517"},
         | 
| 171 | 
            +
                "584": {"name": "艺术", "parent": "517"},
         | 
| 172 | 
            +
                "585": {"name": "文化传播", "parent": "517"},
         | 
| 173 | 
            +
                "586": {"name": "娱乐", "parent": "517"},
         | 
| 174 | 
            +
                "587": {"name": "体育", "parent": "517"},
         | 
| 175 | 
            +
                "588": {"name": "出版", "parent": "517"},
         | 
| 176 | 
            +
                "589": {"name": "休闲", "parent": "517"},
         | 
| 177 | 
            +
                "590": {"name": "动漫", "parent": "517"},
         | 
| 178 | 
            +
                "591": {"name": "市场推广", "parent": "517"},
         | 
| 179 | 
            +
                "592": {"name": "市场研究", "parent": "517"},
         | 
| 180 | 
            +
                "593": {"name": "食品", "parent": "1129"},
         | 
| 181 | 
            +
                "594": {"name": "饮料", "parent": "1129"},
         | 
| 182 | 
            +
                "595": {"name": "烟草", "parent": "1129"},
         | 
| 183 | 
            +
                "596": {"name": "酒品", "parent": "518"},
         | 
| 184 | 
            +
                "597": {"name": "服饰", "parent": "518"},
         | 
| 185 | 
            +
                "598": {"name": "纺织", "parent": "518"},
         | 
| 186 | 
            +
                "599": {"name": "化妆品", "parent": "1129"},
         | 
| 187 | 
            +
                "600": {"name": "日用品", "parent": "1129"},
         | 
| 188 | 
            +
                "601": {"name": "家电", "parent": "518"},
         | 
| 189 | 
            +
                "602": {"name": "家具", "parent": "518"},
         | 
| 190 | 
            +
                "603": {"name": "办公用品", "parent": "518"},
         | 
| 191 | 
            +
                "604": {"name": "奢侈品", "parent": "518"},
         | 
| 192 | 
            +
                "605": {"name": "珠宝", "parent": "518"},
         | 
| 193 | 
            +
                "606": {"name": "数码产品", "parent": "518"},
         | 
| 194 | 
            +
                "607": {"name": "玩具", "parent": "518"},
         | 
| 195 | 
            +
                "608": {"name": "图书", "parent": "518"},
         | 
| 196 | 
            +
                "609": {"name": "音像", "parent": "518"},
         | 
| 197 | 
            +
                "610": {"name": "钟表", "parent": "518"},
         | 
| 198 | 
            +
                "611": {"name": "箱包", "parent": "518"},
         | 
| 199 | 
            +
                "612": {"name": "母婴", "parent": "518"},
         | 
| 200 | 
            +
                "613": {"name": "营养保健", "parent": "518"},
         | 
| 201 | 
            +
                "614": {"name": "户外用品", "parent": "518"},
         | 
| 202 | 
            +
                "615": {"name": "健身器材", "parent": "518"},
         | 
| 203 | 
            +
                "616": {"name": "乐器", "parent": "518"},
         | 
| 204 | 
            +
                "617": {"name": "汽车用品", "parent": "518"},
         | 
| 205 | 
            +
                "619": {"name": "厨具", "parent": "518"},
         | 
| 206 | 
            +
                "620": {"name": "机械制造", "parent": "519"},
         | 
| 207 | 
            +
                "621": {"name": "流体控制", "parent": "519"},
         | 
| 208 | 
            +
                "622": {"name": "自动化控制", "parent": "519"},
         | 
| 209 | 
            +
                "623": {"name": "仪器仪表", "parent": "519"},
         | 
| 210 | 
            +
                "624": {"name": "航空/航天", "parent": "519"},
         | 
| 211 | 
            +
                "625": {"name": "交通设施", "parent": "519"},
         | 
| 212 | 
            +
                "626": {"name": "工业电子", "parent": "519"},
         | 
| 213 | 
            +
                "627": {"name": "建材", "parent": "519"},
         | 
| 214 | 
            +
                "628": {"name": "五金材料", "parent": "519"},
         | 
| 215 | 
            +
                "629": {"name": "汽车", "parent": "519"},
         | 
| 216 | 
            +
                "630": {"name": "印刷", "parent": "519"},
         | 
| 217 | 
            +
                "631": {"name": "造纸", "parent": "519"},
         | 
| 218 | 
            +
                "632": {"name": "包装", "parent": "519"},
         | 
| 219 | 
            +
                "633": {"name": "原材料及加工", "parent": "519"},
         | 
| 220 | 
            +
                "634": {"name": "物流", "parent": "520"},
         | 
| 221 | 
            +
                "635": {"name": "仓储", "parent": "520"},
         | 
| 222 | 
            +
                "636": {"name": "客运", "parent": "520"},
         | 
| 223 | 
            +
                "637": {"name": "快递", "parent": "520"},
         | 
| 224 | 
            +
                "638": {"name": "化学药", "parent": "522"},
         | 
| 225 | 
            +
                "639": {"name": "中药", "parent": "522"},
         | 
| 226 | 
            +
                "640": {"name": "生物制药", "parent": "522"},
         | 
| 227 | 
            +
                "641": {"name": "兽药", "parent": "522"},
         | 
| 228 | 
            +
                "642": {"name": "农药", "parent": "522"},
         | 
| 229 | 
            +
                "643": {"name": "CRO", "parent": "522"},
         | 
| 230 | 
            +
                "644": {"name": "消毒", "parent": "522"},
         | 
| 231 | 
            +
                "645": {"name": "医药商业", "parent": "522"},
         | 
| 232 | 
            +
                "646": {"name": "医疗服务", "parent": "522"},
         | 
| 233 | 
            +
                "647": {"name": "医疗器械", "parent": "523"},
         | 
| 234 | 
            +
                "648": {"name": "制药设备", "parent": "523"},
         | 
| 235 | 
            +
                "649": {"name": "医用耗材", "parent": "523"},
         | 
| 236 | 
            +
                "650": {"name": "手术器械", "parent": "523"},
         | 
| 237 | 
            +
                "651": {"name": "保健器材", "parent": "524"},
         | 
| 238 | 
            +
                "652": {"name": "性保健品", "parent": "524"},
         | 
| 239 | 
            +
                "653": {"name": "医药保养", "parent": "524"},
         | 
| 240 | 
            +
                "654": {"name": "医用保健", "parent": "524"},
         | 
| 241 | 
            +
                "655": {"name": "酒店", "parent": "525"},
         | 
| 242 | 
            +
                "656": {"name": "餐饮", "parent": "525"},
         | 
| 243 | 
            +
                "657": {"name": "旅游", "parent": "525"},
         | 
| 244 | 
            +
                "658": {"name": "生活服务", "parent": "525"},
         | 
| 245 | 
            +
                "659": {"name": "保健服务", "parent": "525"},
         | 
| 246 | 
            +
                "660": {"name": "运动健身", "parent": "525"},
         | 
| 247 | 
            +
                "661": {"name": "家政服务", "parent": "525"},
         | 
| 248 | 
            +
                "662": {"name": "婚庆服务", "parent": "525"},
         | 
| 249 | 
            +
                "663": {"name": "租赁服务", "parent": "525"},
         | 
| 250 | 
            +
                "664": {"name": "维修服务", "parent": "525"},
         | 
| 251 | 
            +
                "665": {"name": "石油天然气", "parent": "526"},
         | 
| 252 | 
            +
                "666": {"name": "电力", "parent": "526"},
         | 
| 253 | 
            +
                "667": {"name": "新能源", "parent": "526"},
         | 
| 254 | 
            +
                "668": {"name": "水利", "parent": "526"},
         | 
| 255 | 
            +
                "669": {"name": "矿产", "parent": "526"},
         | 
| 256 | 
            +
                "670": {"name": "采掘业", "parent": "526"},
         | 
| 257 | 
            +
                "671": {"name": "冶炼", "parent": "526"},
         | 
| 258 | 
            +
                "672": {"name": "环保", "parent": "526"},
         | 
| 259 | 
            +
                "673": {"name": "无机化工原料", "parent": "527"},
         | 
| 260 | 
            +
                "674": {"name": "有机化工原料", "parent": "527"},
         | 
| 261 | 
            +
                "675": {"name": "精细化学品", "parent": "527"},
         | 
| 262 | 
            +
                "676": {"name": "化工设备", "parent": "527"},
         | 
| 263 | 
            +
                "677": {"name": "化工工程", "parent": "527"},
         | 
| 264 | 
            +
                "678": {"name": "资产管理", "parent": "513"},
         | 
| 265 | 
            +
                "679": {"name": "金融租赁", "parent": "513"},
         | 
| 266 | 
            +
                "680": {"name": "征信及信评机构", "parent": "513"},
         | 
| 267 | 
            +
                "681": {"name": "资产评估机构", "parent": "513"},
         | 
| 268 | 
            +
                "683": {"name": "金融监管机构", "parent": "513"},
         | 
| 269 | 
            +
                "684": {"name": "国际贸易", "parent": "521"},
         | 
| 270 | 
            +
                "685": {"name": "海关", "parent": "521"},
         | 
| 271 | 
            +
                "686": {"name": "购物中心", "parent": "536"},
         | 
| 272 | 
            +
                "687": {"name": "超市", "parent": "536"},
         | 
| 273 | 
            +
                "688": {"name": "便利店", "parent": "536"},
         | 
| 274 | 
            +
                "689": {"name": "专卖店", "parent": "536"},
         | 
| 275 | 
            +
                "690": {"name": "专业店", "parent": "536"},
         | 
| 276 | 
            +
                "691": {"name": "百货店", "parent": "536"},
         | 
| 277 | 
            +
                "692": {"name": "杂货店", "parent": "536"},
         | 
| 278 | 
            +
                "693": {"name": "个人银行", "parent": "537"},
         | 
| 279 | 
            +
                "695": {"name": "私人银行", "parent": "537"},
         | 
| 280 | 
            +
                "696": {"name": "公司银行", "parent": "537"},
         | 
| 281 | 
            +
                "697": {"name": "投资银行", "parent": "537"},
         | 
| 282 | 
            +
                "698": {"name": "政策性银行", "parent": "537"},
         | 
| 283 | 
            +
                "699": {"name": "中央银行", "parent": "537"},
         | 
| 284 | 
            +
                "700": {"name": "人寿险", "parent": "538"},
         | 
| 285 | 
            +
                "701": {"name": "财产险", "parent": "538"},
         | 
| 286 | 
            +
                "702": {"name": "再保险", "parent": "538"},
         | 
| 287 | 
            +
                "703": {"name": "养老险", "parent": "538"},
         | 
| 288 | 
            +
                "704": {"name": "保险代理公司", "parent": "538"},
         | 
| 289 | 
            +
                "705": {"name": "公募基金", "parent": "540"},
         | 
| 290 | 
            +
                "707": {"name": "私募基金", "parent": "540"},
         | 
| 291 | 
            +
                "708": {"name": "第三方理财", "parent": "679"},
         | 
| 292 | 
            +
                "709": {"name": "资产管理公司", "parent": "679"},
         | 
| 293 | 
            +
                "711": {"name": "房产中介", "parent": "566"},
         | 
| 294 | 
            +
                "712": {"name": "职业中介", "parent": "566"},
         | 
| 295 | 
            +
                "713": {"name": "婚姻中介", "parent": "566"},
         | 
| 296 | 
            +
                "714": {"name": "战略咨询", "parent": "567"},
         | 
| 297 | 
            +
                "715": {"name": "投资咨询", "parent": "567"},
         | 
| 298 | 
            +
                "716": {"name": "心理咨询", "parent": "567"},
         | 
| 299 | 
            +
                "717": {"name": "留学移民咨询", "parent": "567"},
         | 
| 300 | 
            +
                "718": {"name": "工商注册代理", "parent": "568"},
         | 
| 301 | 
            +
                "719": {"name": "商标专利代理", "parent": "568"},
         | 
| 302 | 
            +
                "720": {"name": "财务代理", "parent": "568"},
         | 
| 303 | 
            +
                "721": {"name": "工程机械", "parent": "620"},
         | 
| 304 | 
            +
                "722": {"name": "农业机械", "parent": "620"},
         | 
| 305 | 
            +
                "723": {"name": "海工设备", "parent": "620"},
         | 
| 306 | 
            +
                "724": {"name": "包装机械", "parent": "620"},
         | 
| 307 | 
            +
                "725": {"name": "印刷机械", "parent": "620"},
         | 
| 308 | 
            +
                "726": {"name": "数控机床", "parent": "620"},
         | 
| 309 | 
            +
                "727": {"name": "矿山机械", "parent": "620"},
         | 
| 310 | 
            +
                "728": {"name": "水泵", "parent": "621"},
         | 
| 311 | 
            +
                "729": {"name": "管道", "parent": "621"},
         | 
| 312 | 
            +
                "730": {"name": "阀门", "parent": "621"},
         | 
| 313 | 
            +
                "732": {"name": "压缩机", "parent": "621"},
         | 
| 314 | 
            +
                "733": {"name": "集散控制系统", "parent": "622"},
         | 
| 315 | 
            +
                "734": {"name": "远程控制", "parent": "622"},
         | 
| 316 | 
            +
                "735": {"name": "液压系统", "parent": "622"},
         | 
| 317 | 
            +
                "736": {"name": "楼宇智能化", "parent": "622"},
         | 
| 318 | 
            +
                "737": {"name": "飞机制造", "parent": "624"},
         | 
| 319 | 
            +
                "738": {"name": "航空公司", "parent": "624"},
         | 
| 320 | 
            +
                "739": {"name": "发动机", "parent": "624"},
         | 
| 321 | 
            +
                "740": {"name": "复合材料", "parent": "624"},
         | 
| 322 | 
            +
                "741": {"name": "高铁", "parent": "625"},
         | 
| 323 | 
            +
                "742": {"name": "地铁", "parent": "625"},
         | 
| 324 | 
            +
                "743": {"name": "信号传输", "parent": "625"},
         | 
| 325 | 
            +
                "745": {"name": "结构材料", "parent": "627"},
         | 
| 326 | 
            +
                "746": {"name": "装饰材料", "parent": "627"},
         | 
| 327 | 
            +
                "747": {"name": "专用材料", "parent": "627"},
         | 
| 328 | 
            +
                "749": {"name": "经销商集团", "parent": "629"},
         | 
| 329 | 
            +
                "750": {"name": "整车制造", "parent": "629"},
         | 
| 330 | 
            +
                "751": {"name": "汽车零配件", "parent": "629"},
         | 
| 331 | 
            +
                "752": {"name": "外型设计", "parent": "629"},
         | 
| 332 | 
            +
                "753": {"name": "平版印刷", "parent": "630"},
         | 
| 333 | 
            +
                "754": {"name": "凸版印刷", "parent": "630"},
         | 
| 334 | 
            +
                "755": {"name": "凹版印刷", "parent": "630"},
         | 
| 335 | 
            +
                "756": {"name": "孔版印刷", "parent": "630"},
         | 
| 336 | 
            +
                "757": {"name": "印刷用纸", "parent": "631"},
         | 
| 337 | 
            +
                "758": {"name": "书写、制图及复制用纸", "parent": "631"},
         | 
| 338 | 
            +
                "759": {"name": "包装用纸", "parent": "631"},
         | 
| 339 | 
            +
                "760": {"name": "生活、卫生及装饰用纸", "parent": "631"},
         | 
| 340 | 
            +
                "761": {"name": "技术用纸", "parent": "631"},
         | 
| 341 | 
            +
                "762": {"name": "加工纸原纸", "parent": "631"},
         | 
| 342 | 
            +
                "763": {"name": "食品包装", "parent": "632"},
         | 
| 343 | 
            +
                "764": {"name": "医药包装", "parent": "632"},
         | 
| 344 | 
            +
                "765": {"name": "日化包装", "parent": "632"},
         | 
| 345 | 
            +
                "766": {"name": "物流包装", "parent": "632"},
         | 
| 346 | 
            +
                "767": {"name": "礼品包装", "parent": "632"},
         | 
| 347 | 
            +
                "768": {"name": "电子五金包装", "parent": "632"},
         | 
| 348 | 
            +
                "769": {"name": "汽车服务", "parent": "525"},
         | 
| 349 | 
            +
                "770": {"name": "汽车保养", "parent": "769"},
         | 
| 350 | 
            +
                "771": {"name": "租车", "parent": "769"},
         | 
| 351 | 
            +
                "773": {"name": "出租车", "parent": "769"},
         | 
| 352 | 
            +
                "774": {"name": "代驾", "parent": "769"},
         | 
| 353 | 
            +
                "775": {"name": "发电", "parent": "666"},
         | 
| 354 | 
            +
                "777": {"name": "输配电", "parent": "666"},
         | 
| 355 | 
            +
                "779": {"name": "风电", "parent": "667"},
         | 
| 356 | 
            +
                "780": {"name": "光伏/太阳能", "parent": "667"},
         | 
| 357 | 
            +
                "781": {"name": "生物质发电", "parent": "667"},
         | 
| 358 | 
            +
                "782": {"name": "煤化工", "parent": "667"},
         | 
| 359 | 
            +
                "783": {"name": "垃圾发电", "parent": "667"},
         | 
| 360 | 
            +
                "784": {"name": "核电", "parent": "667"},
         | 
| 361 | 
            +
                "785": {"name": "能源矿产", "parent": "669"},
         | 
| 362 | 
            +
                "786": {"name": "金属矿产", "parent": "669"},
         | 
| 363 | 
            +
                "787": {"name": "非金属矿产", "parent": "669"},
         | 
| 364 | 
            +
                "788": {"name": "水气矿产", "parent": "669"},
         | 
| 365 | 
            +
                "789": {"name": "锅炉", "parent": "775"},
         | 
| 366 | 
            +
                "790": {"name": "发电机", "parent": "775"},
         | 
| 367 | 
            +
                "791": {"name": "汽轮机", "parent": "775"},
         | 
| 368 | 
            +
                "792": {"name": "燃机", "parent": "775"},
         | 
| 369 | 
            +
                "793": {"name": "冷却", "parent": "775"},
         | 
| 370 | 
            +
                "794": {"name": "电力设计院", "parent": "775"},
         | 
| 371 | 
            +
                "795": {"name": "高压输配电", "parent": "777"},
         | 
| 372 | 
            +
                "796": {"name": "中压输配电", "parent": "777"},
         | 
| 373 | 
            +
                "797": {"name": "低压输配电", "parent": "777"},
         | 
| 374 | 
            +
                "798": {"name": "继电保护", "parent": "777"},
         | 
| 375 | 
            +
                "799": {"name": "智能电网", "parent": "777"},
         | 
| 376 | 
            +
                "800": {"name": "小学", "parent": "516"},
         | 
| 377 | 
            +
                "801": {"name": "电动车", "parent": "519"},
         | 
| 378 | 
            +
                "802": {"name": "皮具箱包", "parent": "518"},
         | 
| 379 | 
            +
                "803": {"name": "医药制造", "parent": "522"},
         | 
| 380 | 
            +
                "804": {"name": "电器销售", "parent": "536"},
         | 
| 381 | 
            +
                "805": {"name": "塑料制品", "parent": "527"},
         | 
| 382 | 
            +
                "806": {"name": "公益基金会", "parent": "530"},
         | 
| 383 | 
            +
                "807": {"name": "美发服务", "parent": "525"},
         | 
| 384 | 
            +
                "808": {"name": "农业养殖", "parent": "531"},
         | 
| 385 | 
            +
                "809": {"name": "金融服务", "parent": "513"},
         | 
| 386 | 
            +
                "810": {"name": "商业地产综合体", "parent": "514"},
         | 
| 387 | 
            +
                "811": {"name": "美容服务", "parent": "525"},
         | 
| 388 | 
            +
                "812": {"name": "灯饰", "parent": "518"},
         | 
| 389 | 
            +
                "813": {"name": "油墨颜料产品", "parent": "527"},
         | 
| 390 | 
            +
                "814": {"name": "眼镜制造", "parent": "518"},
         | 
| 391 | 
            +
                "815": {"name": "农业生物技术", "parent": "531"},
         | 
| 392 | 
            +
                "816": {"name": "体育用品", "parent": "518"},
         | 
| 393 | 
            +
                "817": {"name": "保健用品", "parent": "524"},
         | 
| 394 | 
            +
                "818": {"name": "化学化工产品", "parent": "527"},
         | 
| 395 | 
            +
                "819": {"name": "饲料", "parent": "531"},
         | 
| 396 | 
            +
                "821": {"name": "保安服务", "parent": "525"},
         | 
| 397 | 
            +
                "822": {"name": "干细胞技术", "parent": "522"},
         | 
| 398 | 
            +
                "824": {"name": "农药化肥", "parent": "527"},
         | 
| 399 | 
            +
                "825": {"name": "卫生洁具", "parent": "518"},
         | 
| 400 | 
            +
                "826": {"name": "体育器材、场馆", "parent": "518"},
         | 
| 401 | 
            +
                "827": {"name": "饲料加工", "parent": "531"},
         | 
| 402 | 
            +
                "828": {"name": "测绘服务", "parent": "529"},
         | 
| 403 | 
            +
                "830": {"name": "金属船舶制造", "parent": "519"},
         | 
| 404 | 
            +
                "831": {"name": "基因工程", "parent": "522"},
         | 
| 405 | 
            +
                "832": {"name": "花卉服务", "parent": "536"},
         | 
| 406 | 
            +
                "833": {"name": "农业种植", "parent": "531"},
         | 
| 407 | 
            +
                "834": {"name": "皮革制品", "parent": "518"},
         | 
| 408 | 
            +
                "835": {"name": "地理信息加工服务", "parent": "529"},
         | 
| 409 | 
            +
                "836": {"name": "机器人", "parent": "519"},
         | 
| 410 | 
            +
                "837": {"name": "礼品", "parent": "518"},
         | 
| 411 | 
            +
                "838": {"name": "理发及美容服务", "parent": "525"},
         | 
| 412 | 
            +
                "839": {"name": "其他清洁服务", "parent": "525"},
         | 
| 413 | 
            +
                "840": {"name": "硅胶材料", "parent": "527"},
         | 
| 414 | 
            +
                "841": {"name": "茶叶销售", "parent": "518"},
         | 
| 415 | 
            +
                "842": {"name": "彩票活动", "parent": "529"},
         | 
| 416 | 
            +
                "843": {"name": "化妆培训", "parent": "516"},
         | 
| 417 | 
            +
                "844": {"name": "鞋业", "parent": "518"},
         | 
| 418 | 
            +
                "845": {"name": "酒店用品", "parent": "518"},
         | 
| 419 | 
            +
                "846": {"name": "复合材料", "parent": "527"},
         | 
| 420 | 
            +
                "847": {"name": "房地产工程建设", "parent": "548"},
         | 
| 421 | 
            +
                "848": {"name": "知识产权服务", "parent": "559"},
         | 
| 422 | 
            +
                "849": {"name": "新型建材", "parent": "627"},
         | 
| 423 | 
            +
                "850": {"name": "企业投资咨询", "parent": "567"},
         | 
| 424 | 
            +
                "851": {"name": "含乳饮料和植物蛋白饮料制造", "parent": "594"},
         | 
| 425 | 
            +
                "852": {"name": "汽车检测设备", "parent": "629"},
         | 
| 426 | 
            +
                "853": {"name": "手机通讯器材", "parent": "417"},
         | 
| 427 | 
            +
                "854": {"name": "环保材料", "parent": "672"},
         | 
| 428 | 
            +
                "855": {"name": "交通设施", "parent": "554"},
         | 
| 429 | 
            +
                "856": {"name": "电子器件", "parent": "419"},
         | 
| 430 | 
            +
                "857": {"name": "啤酒", "parent": "594"},
         | 
| 431 | 
            +
                "858": {"name": "生态旅游", "parent": "657"},
         | 
| 432 | 
            +
                "859": {"name": "自动化设备", "parent": "626"},
         | 
| 433 | 
            +
                "860": {"name": "软件开发", "parent": "414"},
         | 
| 434 | 
            +
                "861": {"name": "葡萄酒销售", "parent": "594"},
         | 
| 435 | 
            +
                "862": {"name": "钢材", "parent": "633"},
         | 
| 436 | 
            +
                "863": {"name": "餐饮培训", "parent": "656"},
         | 
| 437 | 
            +
                "864": {"name": "速冻食品", "parent": "593"},
         | 
| 438 | 
            +
                "865": {"name": "空气环保", "parent": "672"},
         | 
| 439 | 
            +
                "866": {"name": "互联网房地产经纪服务", "parent": "550"},
         | 
| 440 | 
            +
                "867": {"name": "食品添加剂", "parent": "593"},
         | 
| 441 | 
            +
                "868": {"name": "演艺传播", "parent": "585"},
         | 
| 442 | 
            +
                "869": {"name": "信用卡", "parent": "537"},
         | 
| 443 | 
            +
                "870": {"name": "报纸期刊广告", "parent": "579"},
         | 
| 444 | 
            +
                "871": {"name": "摄影", "parent": "525"},
         | 
| 445 | 
            +
                "872": {"name": "手机软件", "parent": "414"},
         | 
| 446 | 
            +
                "873": {"name": "地坪建材", "parent": "627"},
         | 
| 447 | 
            +
                "874": {"name": "企业管理咨询", "parent": "567"},
         | 
| 448 | 
            +
                "875": {"name": "幼儿教育", "parent": "570"},
         | 
| 449 | 
            +
                "876": {"name": "系统集成", "parent": "416"},
         | 
| 450 | 
            +
                "877": {"name": "皮革服饰", "parent": "597"},
         | 
| 451 | 
            +
                "878": {"name": "保健食品", "parent": "593"},
         | 
| 452 | 
            +
                "879": {"name": "叉车", "parent": "620"},
         | 
| 453 | 
            +
                "880": {"name": "厨卫电器", "parent": "601"},
         | 
| 454 | 
            +
                "882": {"name": "地暖设备", "parent": "627"},
         | 
| 455 | 
            +
                "883": {"name": "钢结构制造", "parent": "548"},
         | 
| 456 | 
            +
                "884": {"name": "投影机", "parent": "606"},
         | 
| 457 | 
            +
                "885": {"name": "啤酒销售", "parent": "594"},
         | 
| 458 | 
            +
                "886": {"name": "度假村旅游", "parent": "657"},
         | 
| 459 | 
            +
                "887": {"name": "电力元件设备", "parent": "626"},
         | 
| 460 | 
            +
                "888": {"name": "管理软件", "parent": "414"},
         | 
| 461 | 
            +
                "889": {"name": "轴承", "parent": "628"},
         | 
| 462 | 
            +
                "890": {"name": "餐饮设备", "parent": "656"},
         | 
| 463 | 
            +
                "891": {"name": "肉制品及副产品加工", "parent": "593"},
         | 
| 464 | 
            +
                "892": {"name": "艺术收藏品投资交易", "parent": "584"},
         | 
| 465 | 
            +
                "893": {"name": "净水器", "parent": "601"},
         | 
| 466 | 
            +
                "894": {"name": "进口食品", "parent": "593"},
         | 
| 467 | 
            +
                "895": {"name": "娱乐文化传播", "parent": "585"},
         | 
| 468 | 
            +
                "896": {"name": "文化传播", "parent": "585"},
         | 
| 469 | 
            +
                "897": {"name": "商旅传媒", "parent": "580"},
         | 
| 470 | 
            +
                "898": {"name": "广告设计制作", "parent": "579"},
         | 
| 471 | 
            +
                "899": {"name": "金属丝绳及其制品制造", "parent": "627"},
         | 
| 472 | 
            +
                "900": {"name": "建筑涂料", "parent": "627"},
         | 
| 473 | 
            +
                "901": {"name": "抵押贷款", "parent": "543"},
         | 
| 474 | 
            +
                "902": {"name": "早教", "parent": "570"},
         | 
| 475 | 
            +
                "903": {"name": "电影放映", "parent": "583"},
         | 
| 476 | 
            +
                "904": {"name": "内衣服饰", "parent": "597"},
         | 
| 477 | 
            +
                "905": {"name": "无线网络通信", "parent": "418"},
         | 
| 478 | 
            +
                "906": {"name": "记忆卡", "parent": "415"},
         | 
| 479 | 
            +
                "907": {"name": "女装服饰", "parent": "597"},
         | 
| 480 | 
            +
                "908": {"name": "建筑机械", "parent": "620"},
         | 
| 481 | 
            +
                "909": {"name": "制冷电器", "parent": "601"},
         | 
| 482 | 
            +
                "910": {"name": "通信设备", "parent": "417"},
         | 
| 483 | 
            +
                "911": {"name": "空调设备", "parent": "601"},
         | 
| 484 | 
            +
                "912": {"name": "建筑装饰", "parent": "553"},
         | 
| 485 | 
            +
                "913": {"name": "办公设备", "parent": "603"},
         | 
| 486 | 
            +
                "916": {"name": "数据处理软件", "parent": "414"},
         | 
| 487 | 
            +
                "917": {"name": "葡萄酒贸易", "parent": "594"},
         | 
| 488 | 
            +
                "918": {"name": "通讯器材", "parent": "417"},
         | 
| 489 | 
            +
                "919": {"name": "铜业", "parent": "633"},
         | 
| 490 | 
            +
                "920": {"name": "食堂", "parent": "656"},
         | 
| 491 | 
            +
                "921": {"name": "糖果零食", "parent": "593"},
         | 
| 492 | 
            +
                "922": {"name": "文化艺术传播", "parent": "584"},
         | 
| 493 | 
            +
                "923": {"name": "太阳能电器", "parent": "601"},
         | 
| 494 | 
            +
                "924": {"name": "药品零售", "parent": "645"},
         | 
| 495 | 
            +
                "925": {"name": "果蔬食品", "parent": "593"},
         | 
| 496 | 
            +
                "926": {"name": "文化活动策划", "parent": "585"},
         | 
| 497 | 
            +
                "928": {"name": "汽车广告", "parent": "657"},
         | 
| 498 | 
            +
                "929": {"name": "条码设备", "parent": "630"},
         | 
| 499 | 
            +
                "930": {"name": "建筑石材", "parent": "627"},
         | 
| 500 | 
            +
                "931": {"name": "贵金属", "parent": "545"},
         | 
| 501 | 
            +
                "932": {"name": "体育", "parent": "660"},
         | 
| 502 | 
            +
                "933": {"name": "金融信息服务", "parent": "414"},
         | 
| 503 | 
            +
                "934": {"name": "玻璃建材", "parent": "627"},
         | 
| 504 | 
            +
                "935": {"name": "家教", "parent": "569"},
         | 
| 505 | 
            +
                "936": {"name": "歌舞厅娱乐活动", "parent": "586"},
         | 
| 506 | 
            +
                "937": {"name": "计算机服务器", "parent": "415"},
         | 
| 507 | 
            +
                "938": {"name": "管道", "parent": "627"},
         | 
| 508 | 
            +
                "939": {"name": "婴幼儿服饰", "parent": "597"},
         | 
| 509 | 
            +
                "940": {"name": "热水器", "parent": "601"},
         | 
| 510 | 
            +
                "941": {"name": "计算机及零部件制造", "parent": "415"},
         | 
| 511 | 
            +
                "942": {"name": "钢铁贸易", "parent": "633"},
         | 
| 512 | 
            +
                "944": {"name": "包装材料", "parent": "632"},
         | 
| 513 | 
            +
                "945": {"name": "计算机办公设备", "parent": "603"},
         | 
| 514 | 
            +
                "946": {"name": "白酒", "parent": "594"},
         | 
| 515 | 
            +
                "948": {"name": "发动机", "parent": "620"},
         | 
| 516 | 
            +
                "949": {"name": "快餐服务", "parent": "656"},
         | 
| 517 | 
            +
                "950": {"name": "酒类销售", "parent": "594"},
         | 
| 518 | 
            +
                "951": {"name": "电子产品、机电设备", "parent": "626"},
         | 
| 519 | 
            +
                "952": {"name": "激光设备", "parent": "626"},
         | 
| 520 | 
            +
                "953": {"name": "餐饮策划", "parent": "656"},
         | 
| 521 | 
            +
                "954": {"name": "饮料、食品", "parent": "594"},
         | 
| 522 | 
            +
                "955": {"name": "文化娱乐经纪", "parent": "585"},
         | 
| 523 | 
            +
                "956": {"name": "天然气", "parent": "665"},
         | 
| 524 | 
            +
                "957": {"name": "农副食品", "parent": "593"},
         | 
| 525 | 
            +
                "958": {"name": "艺术表演", "parent": "585"},
         | 
| 526 | 
            +
                "959": {"name": "石膏、水泥制品及类似制品制造", "parent": "627"},
         | 
| 527 | 
            +
                "960": {"name": "橱柜", "parent": "602"},
         | 
| 528 | 
            +
                "961": {"name": "管理培训", "parent": "577"},
         | 
| 529 | 
            +
                "962": {"name": "男装服饰", "parent": "597"},
         | 
| 530 | 
            +
                "963": {"name": "化肥制造", "parent": "675"},
         | 
| 531 | 
            +
                "964": {"name": "童装服饰", "parent": "597"},
         | 
| 532 | 
            +
                "965": {"name": "电源电池", "parent": "626"},
         | 
| 533 | 
            +
                "966": {"name": "家电维修", "parent": "664"},
         | 
| 534 | 
            +
                "967": {"name": "光电子器件", "parent": "419"},
         | 
| 535 | 
            +
                "968": {"name": "旅行社服务", "parent": "657"},
         | 
| 536 | 
            +
                "969": {"name": "电线、电缆制造", "parent": "626"},
         | 
| 537 | 
            +
                "970": {"name": "软件开发、信息系统集成", "parent": "419"},
         | 
| 538 | 
            +
                "971": {"name": "白酒制造", "parent": "594"},
         | 
| 539 | 
            +
                "973": {"name": "甜品服务", "parent": "656"},
         | 
| 540 | 
            +
                "974": {"name": "糕点、面包制造", "parent": "593"},
         | 
| 541 | 
            +
                "975": {"name": "木工机械", "parent": "620"},
         | 
| 542 | 
            +
                "976": {"name": "酒吧服务", "parent": "656"},
         | 
| 543 | 
            +
                "977": {"name": "火腿肠", "parent": "593"},
         | 
| 544 | 
            +
                "978": {"name": "广告策划推广", "parent": "579"},
         | 
| 545 | 
            +
                "979": {"name": "新能源产品和生产装备制造", "parent": "667"},
         | 
| 546 | 
            +
                "980": {"name": "调味品", "parent": "593"},
         | 
| 547 | 
            +
                "981": {"name": "礼仪表演", "parent": "585"},
         | 
| 548 | 
            +
                "982": {"name": "劳务派遣", "parent": "560"},
         | 
| 549 | 
            +
                "983": {"name": "建材零售", "parent": "627"},
         | 
| 550 | 
            +
                "984": {"name": "商品交易中心", "parent": "545"},
         | 
| 551 | 
            +
                "985": {"name": "体育推广", "parent": "585"},
         | 
| 552 | 
            +
                "986": {"name": "茶饮��及其他饮料制造", "parent": "594"},
         | 
| 553 | 
            +
                "987": {"name": "金属建材", "parent": "627"},
         | 
| 554 | 
            +
                "988": {"name": "职业技能培训", "parent": "571"},
         | 
| 555 | 
            +
                "989": {"name": "网吧活动", "parent": "586"},
         | 
| 556 | 
            +
                "990": {"name": "洗衣服务", "parent": "658"},
         | 
| 557 | 
            +
                "991": {"name": "管道工程", "parent": "554"},
         | 
| 558 | 
            +
                "992": {"name": "通信工程", "parent": "417"},
         | 
| 559 | 
            +
                "993": {"name": "电子元器件", "parent": "626"},
         | 
| 560 | 
            +
                "994": {"name": "电子设备", "parent": "419"},
         | 
| 561 | 
            +
                "995": {"name": "茶馆服务", "parent": "656"},
         | 
| 562 | 
            +
                "996": {"name": "旅游开发", "parent": "657"},
         | 
| 563 | 
            +
                "997": {"name": "视频通讯", "parent": "417"},
         | 
| 564 | 
            +
                "998": {"name": "白酒销售", "parent": "594"},
         | 
| 565 | 
            +
                "1000": {"name": "咖啡馆服务", "parent": "656"},
         | 
| 566 | 
            +
                "1001": {"name": "食品零售", "parent": "593"},
         | 
| 567 | 
            +
                "1002": {"name": "健康疗养旅游", "parent": "655"},
         | 
| 568 | 
            +
                "1003": {"name": "粮油食品", "parent": "593"},
         | 
| 569 | 
            +
                "1004": {"name": "儿童教育影视", "parent": "583"},
         | 
| 570 | 
            +
                "1005": {"name": "新能源发电", "parent": "667"},
         | 
| 571 | 
            +
                "1006": {"name": "旅游策划", "parent": "657"},
         | 
| 572 | 
            +
                "1007": {"name": "绘画", "parent": "575"},
         | 
| 573 | 
            +
                "1008": {"name": "方便面及其他方便食品", "parent": "593"},
         | 
| 574 | 
            +
                "1009": {"name": "房地产经纪", "parent": "550"},
         | 
| 575 | 
            +
                "1010": {"name": "母婴家政", "parent": "661"},
         | 
| 576 | 
            +
                "1011": {"name": "居家养老健康服务", "parent": "661"},
         | 
| 577 | 
            +
                "1012": {"name": "文化艺术投资", "parent": "545"},
         | 
| 578 | 
            +
                "1013": {"name": "运动健身", "parent": "660"},
         | 
| 579 | 
            +
                "1014": {"name": "瓶(罐)装饮用水制造", "parent": "594"},
         | 
| 580 | 
            +
                "1015": {"name": "金属门窗", "parent": "627"},
         | 
| 581 | 
            +
                "1016": {"name": "机动车检测", "parent": "563"},
         | 
| 582 | 
            +
                "1017": {"name": "货物运输", "parent": "634"},
         | 
| 583 | 
            +
                "1018": {"name": "服饰专卖", "parent": "690"},
         | 
| 584 | 
            +
                "1019": {"name": "酒店服装", "parent": "597"},
         | 
| 585 | 
            +
                "1020": {"name": "通讯软件", "parent": "417"},
         | 
| 586 | 
            +
                "1021": {"name": "消防工程", "parent": "554"},
         | 
| 587 | 
            +
                "1022": {"name": "嵌入式电子系统", "parent": "419"},
         | 
| 588 | 
            +
                "1023": {"name": "航空票务", "parent": "636"},
         | 
| 589 | 
            +
                "1024": {"name": "电气设备", "parent": "626"},
         | 
| 590 | 
            +
                "1025": {"name": "酒业贸易", "parent": "594"},
         | 
| 591 | 
            +
                "1027": {"name": "其他饮料及冷饮服务", "parent": "656"},
         | 
| 592 | 
            +
                "1028": {"name": "乳制品", "parent": "593"},
         | 
| 593 | 
            +
                "1029": {"name": "新闻期刊出版", "parent": "588"},
         | 
| 594 | 
            +
                "1030": {"name": "水污染治理", "parent": "672"},
         | 
| 595 | 
            +
                "1031": {"name": "谷物食品", "parent": "593"},
         | 
| 596 | 
            +
                "1032": {"name": "数字动漫设计制造服务", "parent": "590"},
         | 
| 597 | 
            +
                "1033": {"name": "医院", "parent": "646"},
         | 
| 598 | 
            +
                "1034": {"name": "旅游广告", "parent": "657"},
         | 
| 599 | 
            +
                "1035": {"name": "办公家具", "parent": "602"},
         | 
| 600 | 
            +
                "1036": {"name": "房地产营销策划", "parent": "550"},
         | 
| 601 | 
            +
                "1037": {"name": "保洁家政", "parent": "661"},
         | 
| 602 | 
            +
                "1038": {"name": "水泥制造", "parent": "627"},
         | 
| 603 | 
            +
                "1039": {"name": "市场研究咨询", "parent": "567"},
         | 
| 604 | 
            +
                "1040": {"name": "驾校", "parent": "571"},
         | 
| 605 | 
            +
                "1041": {"name": "正餐服务", "parent": "656"},
         | 
| 606 | 
            +
                "1043": {"name": "机动车燃油", "parent": "665"},
         | 
| 607 | 
            +
                "1044": {"name": "食品", "parent": "593"},
         | 
| 608 | 
            +
                "1045": {"name": "新能源汽车", "parent": "629"},
         | 
| 609 | 
            +
                "1046": {"name": "手机无线网络推广", "parent": "417"},
         | 
| 610 | 
            +
                "1047": {"name": "环保设备", "parent": "672"},
         | 
| 611 | 
            +
                "1048": {"name": "通讯工程", "parent": "418"},
         | 
| 612 | 
            +
                "1049": {"name": "半导体集成电路", "parent": "419"},
         | 
| 613 | 
            +
                "1050": {"name": "航空服务", "parent": "636"},
         | 
| 614 | 
            +
                "1051": {"name": "电机设备", "parent": "626"},
         | 
| 615 | 
            +
                "1052": {"name": "档案软件", "parent": "414"},
         | 
| 616 | 
            +
                "1053": {"name": "冷链物流服务", "parent": "634"},
         | 
| 617 | 
            +
                "1054": {"name": "小吃服务", "parent": "656"},
         | 
| 618 | 
            +
                "1055": {"name": "水产品加工", "parent": "593"},
         | 
| 619 | 
            +
                "1056": {"name": "图书出版", "parent": "588"},
         | 
| 620 | 
            +
                "1057": {"name": "固体废物治理", "parent": "672"},
         | 
| 621 | 
            +
                "1059": {"name": "坚果食品", "parent": "593"},
         | 
| 622 | 
            +
                "1060": {"name": "广告传媒", "parent": "579"},
         | 
| 623 | 
            +
                "1061": {"name": "电梯", "parent": "622"},
         | 
| 624 | 
            +
                "1062": {"name": "社区医疗与卫生院", "parent": "646"},
         | 
| 625 | 
            +
                "1063": {"name": "广告、印刷包装", "parent": "630"},
         | 
| 626 | 
            +
                "1064": {"name": "婚纱礼服", "parent": "662"},
         | 
| 627 | 
            +
                "1065": {"name": "地毯", "parent": "602"},
         | 
| 628 | 
            +
                "1066": {"name": "互联网物业", "parent": "551"},
         | 
| 629 | 
            +
                "1067": {"name": "跨境电商", "parent": "3"},
         | 
| 630 | 
            +
                "1068": {"name": "信息安全、系统集成", "parent": "9"},
         | 
| 631 | 
            +
                "1069": {"name": "专用汽车制造", "parent": "750"},
         | 
| 632 | 
            +
                "1070": {"name": "商品贸易", "parent": "3"},
         | 
| 633 | 
            +
                "1071": {"name": "墙壁装饰材料", "parent": "746"},
         | 
| 634 | 
            +
                "1072": {"name": "窗帘装饰材料", "parent": "746"},
         | 
| 635 | 
            +
                "1073": {"name": "电子商务、本地生活服务", "parent": "3"},
         | 
| 636 | 
            +
                "1075": {"name": "白酒电子商务", "parent": "3"},
         | 
| 637 | 
            +
                "1076": {"name": "商品贸易、电子商务", "parent": "3"},
         | 
| 638 | 
            +
                "1077": {"name": "木质装饰材料", "parent": "746"},
         | 
| 639 | 
            +
                "1078": {"name": "���子商务、汽车电商交易平台", "parent": "3"},
         | 
| 640 | 
            +
                "1079": {"name": "汽车轮胎", "parent": "751"},
         | 
| 641 | 
            +
                "1080": {"name": "气体压缩机械制造", "parent": "732"},
         | 
| 642 | 
            +
                "1081": {"name": "家装家具电子商务", "parent": "3"},
         | 
| 643 | 
            +
                "1082": {"name": "化妆品电子商务", "parent": "3"},
         | 
| 644 | 
            +
                "1083": {"name": "汽车销售", "parent": "749"},
         | 
| 645 | 
            +
                "1084": {"name": "新闻资讯网站", "parent": "510"},
         | 
| 646 | 
            +
                "1085": {"name": "母婴电商", "parent": "3"},
         | 
| 647 | 
            +
                "1086": {"name": "电商商务、收藏品交易", "parent": "3"},
         | 
| 648 | 
            +
                "1088": {"name": "电子商务、数码产品", "parent": "3"},
         | 
| 649 | 
            +
                "1089": {"name": "二手车交易", "parent": "749"},
         | 
| 650 | 
            +
                "1090": {"name": "游戏制作服务", "parent": "5"},
         | 
| 651 | 
            +
                "1091": {"name": "母婴服务", "parent": "510"},
         | 
| 652 | 
            +
                "1092": {"name": "家具电子商务", "parent": "3"},
         | 
| 653 | 
            +
                "1093": {"name": "汽车配件电子商务", "parent": "3"},
         | 
| 654 | 
            +
                "1094": {"name": "输配电设备", "parent": "777"},
         | 
| 655 | 
            +
                "1095": {"name": "矿山设备", "parent": "727"},
         | 
| 656 | 
            +
                "1096": {"name": "机床机械", "parent": "726"},
         | 
| 657 | 
            +
                "1097": {"name": "农产品电商", "parent": "3"},
         | 
| 658 | 
            +
                "1098": {"name": "陶瓷装饰材料", "parent": "746"},
         | 
| 659 | 
            +
                "1099": {"name": "车载联网设备", "parent": "487"},
         | 
| 660 | 
            +
                "1100": {"name": "汽车销售电子商务", "parent": "3"},
         | 
| 661 | 
            +
                "1101": {"name": "石油设备", "parent": "730"},
         | 
| 662 | 
            +
                "1102": {"name": "智能家居", "parent": "487"},
         | 
| 663 | 
            +
                "1103": {"name": "散热器", "parent": "751"},
         | 
| 664 | 
            +
                "1104": {"name": "电力工程", "parent": "775"},
         | 
| 665 | 
            +
                "1105": {"name": "生鲜电商", "parent": "3"},
         | 
| 666 | 
            +
                "1106": {"name": "互联网数据服务", "parent": "490"},
         | 
| 667 | 
            +
                "1107": {"name": "房车、商务车销售", "parent": "749"},
         | 
| 668 | 
            +
                "1108": {"name": "茶叶电子商务", "parent": "3"},
         | 
| 669 | 
            +
                "1109": {"name": "酒类电子商务", "parent": "3"},
         | 
| 670 | 
            +
                "1110": {"name": "阀门", "parent": "730"},
         | 
| 671 | 
            +
                "1111": {"name": "食品电商", "parent": "3"},
         | 
| 672 | 
            +
                "1112": {"name": "儿童摄影", "parent": "871"},
         | 
| 673 | 
            +
                "1113": {"name": "广告摄影", "parent": "871"},
         | 
| 674 | 
            +
                "1114": {"name": "婚纱摄影", "parent": "871"},
         | 
| 675 | 
            +
                "1115": {"name": "模具制造", "parent": "620"},
         | 
| 676 | 
            +
                "1116": {"name": "汽车模具", "parent": "629"},
         | 
| 677 | 
            +
                "1117": {"name": "认证咨询", "parent": "567"},
         | 
| 678 | 
            +
                "1118": {"name": "数字视觉制作服务", "parent": "590"},
         | 
| 679 | 
            +
                "1119": {"name": "牙科及医疗器械", "parent": "646"},
         | 
| 680 | 
            +
                "1120": {"name": "猎头招聘", "parent": "560"},
         | 
| 681 | 
            +
                "1121": {"name": "家居", "parent": "518"},
         | 
| 682 | 
            +
                "1122": {"name": "收藏品", "parent": "518"},
         | 
| 683 | 
            +
                "1123": {"name": "首饰", "parent": "518"},
         | 
| 684 | 
            +
                "1124": {"name": "工艺品", "parent": "518"},
         | 
| 685 | 
            +
                "1125": {"name": "财务", "parent": "515"},
         | 
| 686 | 
            +
                "1126": {"name": "税务", "parent": "515"},
         | 
| 687 | 
            +
                "1127": {"name": "分类信息", "parent": "2"},
         | 
| 688 | 
            +
                "1128": {"name": "宠物", "parent": "0"},
         | 
| 689 | 
            +
                "1129": {"name": "快消品", "parent": "518"},
         | 
| 690 | 
            +
                "1130": {"name": "人工智能", "parent": "2"},
         | 
| 691 | 
            +
                "1131": {"name": "农/林/牧/渔", "parent": "0"},
         | 
| 692 | 
             
            }
         | 
| 693 |  | 
| 694 | 
            +
             | 
| 695 | 
             
            def get_names(id):
         | 
| 696 | 
             
                id = str(id)
         | 
| 697 | 
             
                nms = []
         | 
| 698 | 
             
                d = TBL.get(id)
         | 
| 699 | 
            +
                if not d:
         | 
| 700 | 
            +
                    return []
         | 
| 701 | 
             
                nms.append(d["name"])
         | 
| 702 | 
             
                p = get_names(d["parent"])
         | 
| 703 | 
            +
                if p:
         | 
| 704 | 
            +
                    nms.extend(p)
         | 
| 705 | 
             
                return nms
         | 
| 706 |  | 
| 707 | 
            +
             | 
| 708 | 
             
            if __name__ == "__main__":
         | 
| 709 | 
             
                print(get_names("1119"))
         | 
    	
        deepdoc/parser/resume/entities/regions.py
    CHANGED
    
    | @@ -10,766 +10,776 @@ | |
| 10 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
|  | |
| 13 |  | 
| 14 | 
             
            TBL = {
         | 
| 15 | 
            -
            "2":{"name":"北京","parent":"1"},
         | 
| 16 | 
            -
            "3":{"name":"天津","parent":"1"},
         | 
| 17 | 
            -
            "4":{"name":"河北","parent":"1"},
         | 
| 18 | 
            -
            "5":{"name":"山西","parent":"1"},
         | 
| 19 | 
            -
            "6":{"name":"内蒙古","parent":"1"},
         | 
| 20 | 
            -
            "7":{"name":"辽宁","parent":"1"},
         | 
| 21 | 
            -
            "8":{"name":"吉林","parent":"1"},
         | 
| 22 | 
            -
            "9":{"name":"黑龙江","parent":"1"},
         | 
| 23 | 
            -
            "10":{"name":"上海","parent":"1"},
         | 
| 24 | 
            -
            "11":{"name":"江苏","parent":"1"},
         | 
| 25 | 
            -
            "12":{"name":"浙江","parent":"1"},
         | 
| 26 | 
            -
            "13":{"name":"安徽","parent":"1"},
         | 
| 27 | 
            -
            "14":{"name":"福建","parent":"1"},
         | 
| 28 | 
            -
            "15":{"name":"江西","parent":"1"},
         | 
| 29 | 
            -
            "16":{"name":"山东","parent":"1"},
         | 
| 30 | 
            -
            "17":{"name":"河南","parent":"1"},
         | 
| 31 | 
            -
            "18":{"name":"湖北","parent":"1"},
         | 
| 32 | 
            -
            "19":{"name":"湖南","parent":"1"},
         | 
| 33 | 
            -
            "20":{"name":"广东","parent":"1"},
         | 
| 34 | 
            -
            "21":{"name":"广西","parent":"1"},
         | 
| 35 | 
            -
            "22":{"name":"海南","parent":"1"},
         | 
| 36 | 
            -
            "23":{"name":"重庆","parent":"1"},
         | 
| 37 | 
            -
            "24":{"name":"四川","parent":"1"},
         | 
| 38 | 
            -
            "25":{"name":"贵州","parent":"1"},
         | 
| 39 | 
            -
            "26":{"name":"云南","parent":"1"},
         | 
| 40 | 
            -
            "27":{"name":"西藏","parent":"1"},
         | 
| 41 | 
            -
            "28":{"name":"陕西","parent":"1"},
         | 
| 42 | 
            -
            "29":{"name":"甘肃","parent":"1"},
         | 
| 43 | 
            -
            "30":{"name":"青海","parent":"1"},
         | 
| 44 | 
            -
            "31":{"name":"宁夏","parent":"1"},
         | 
| 45 | 
            -
            "32":{"name":"新疆","parent":"1"},
         | 
| 46 | 
            -
            "33":{"name":"北京市","parent":"2"},
         | 
| 47 | 
            -
            "34":{"name":"天津市","parent":"3"},
         | 
| 48 | 
            -
            "35":{"name":"石家庄市","parent":"4"},
         | 
| 49 | 
            -
            "36":{"name":"唐山市","parent":"4"},
         | 
| 50 | 
            -
            "37":{"name":"秦皇岛市","parent":"4"},
         | 
| 51 | 
            -
            "38":{"name":"邯郸市","parent":"4"},
         | 
| 52 | 
            -
            "39":{"name":"邢台市","parent":"4"},
         | 
| 53 | 
            -
            "40":{"name":"保定市","parent":"4"},
         | 
| 54 | 
            -
            "41":{"name":"张家口市","parent":"4"},
         | 
| 55 | 
            -
            "42":{"name":"承德市","parent":"4"},
         | 
| 56 | 
            -
            "43":{"name":"沧州市","parent":"4"},
         | 
| 57 | 
            -
            "44":{"name":" | 
| 58 | 
            -
            "45":{"name":"衡水市","parent":"4"},
         | 
| 59 | 
            -
            "46":{"name":"太原市","parent":"5"},
         | 
| 60 | 
            -
            "47":{"name":"大同市","parent":"5"},
         | 
| 61 | 
            -
            "48":{"name":"阳泉市","parent":"5"},
         | 
| 62 | 
            -
            "49":{"name":"长治市","parent":"5"},
         | 
| 63 | 
            -
            "50":{"name":"晋城市","parent":"5"},
         | 
| 64 | 
            -
            "51":{"name":"朔州市","parent":"5"},
         | 
| 65 | 
            -
            "52":{"name":"晋中市","parent":"5"},
         | 
| 66 | 
            -
            "53":{"name":"运城市","parent":"5"},
         | 
| 67 | 
            -
            "54":{"name":"忻州市","parent":"5"},
         | 
| 68 | 
            -
            "55":{"name":"临汾市","parent":"5"},
         | 
| 69 | 
            -
            "56":{"name":"吕梁市","parent":"5"},
         | 
| 70 | 
            -
            "57":{"name":"呼和浩特市","parent":"6"},
         | 
| 71 | 
            -
            "58":{"name":"包头市","parent":"6"},
         | 
| 72 | 
            -
            "59":{"name":"乌海市","parent":"6"},
         | 
| 73 | 
            -
            "60":{"name":"赤峰市","parent":"6"},
         | 
| 74 | 
            -
            "61":{"name":"通辽市","parent":"6"},
         | 
| 75 | 
            -
            "62":{"name":"鄂尔多斯市","parent":"6"},
         | 
| 76 | 
            -
            "63":{"name":"呼伦贝尔市","parent":"6"},
         | 
| 77 | 
            -
            "64":{"name":"巴彦淖尔市","parent":"6"},
         | 
| 78 | 
            -
            "65":{"name":"乌兰察布市","parent":"6"},
         | 
| 79 | 
            -
            "66":{"name":"兴安盟","parent":"6"},
         | 
| 80 | 
            -
            "67":{"name":"锡林郭勒盟","parent":"6"},
         | 
| 81 | 
            -
            "68":{"name":"阿拉善盟","parent":"6"},
         | 
| 82 | 
            -
            "69":{"name":"沈阳市","parent":"7"},
         | 
| 83 | 
            -
            "70":{"name":"大连市","parent":"7"},
         | 
| 84 | 
            -
            "71":{"name":"鞍山市","parent":"7"},
         | 
| 85 | 
            -
            "72":{"name":"抚顺市","parent":"7"},
         | 
| 86 | 
            -
            "73":{"name":"本溪市","parent":"7"},
         | 
| 87 | 
            -
            "74":{"name":"丹东市","parent":"7"},
         | 
| 88 | 
            -
            "75":{"name":"锦州市","parent":"7"},
         | 
| 89 | 
            -
            "76":{"name":"营口市","parent":"7"},
         | 
| 90 | 
            -
            "77":{"name":"阜新市","parent":"7"},
         | 
| 91 | 
            -
            "78":{"name":"辽阳市","parent":"7"},
         | 
| 92 | 
            -
            "79":{"name":"盘锦市","parent":"7"},
         | 
| 93 | 
            -
            "80":{"name":"铁岭市","parent":"7"},
         | 
| 94 | 
            -
            "81":{"name":"朝阳市","parent":"7"},
         | 
| 95 | 
            -
            "82":{"name":"葫芦岛市","parent":"7"},
         | 
| 96 | 
            -
            "83":{"name":"长春市","parent":"8"},
         | 
| 97 | 
            -
            "84":{"name":"吉林市","parent":"8"},
         | 
| 98 | 
            -
            "85":{"name":"四平市","parent":"8"},
         | 
| 99 | 
            -
            "86":{"name":"辽源市","parent":"8"},
         | 
| 100 | 
            -
            "87":{"name":"通化市","parent":"8"},
         | 
| 101 | 
            -
            "88":{"name":"白山市","parent":"8"},
         | 
| 102 | 
            -
            "89":{"name":"松原市","parent":"8"},
         | 
| 103 | 
            -
            "90":{"name":"白城市","parent":"8"},
         | 
| 104 | 
            -
            "91":{"name":"延边朝鲜族自治州","parent":"8"},
         | 
| 105 | 
            -
            "92":{"name":"哈尔滨市","parent":"9"},
         | 
| 106 | 
            -
            "93":{"name":"齐齐哈尔市","parent":"9"},
         | 
| 107 | 
            -
            "94":{"name":"鸡西市","parent":"9"},
         | 
| 108 | 
            -
            "95":{"name":"鹤岗市","parent":"9"},
         | 
| 109 | 
            -
            "96":{"name":"双鸭山市","parent":"9"},
         | 
| 110 | 
            -
            "97":{"name":"大庆市","parent":"9"},
         | 
| 111 | 
            -
            "98":{"name":"伊春市","parent":"9"},
         | 
| 112 | 
            -
            "99":{"name":"佳木斯市","parent":"9"},
         | 
| 113 | 
            -
            "100":{"name":"七台河市","parent":"9"},
         | 
| 114 | 
            -
            "101":{"name":"牡丹江市","parent":"9"},
         | 
| 115 | 
            -
            "102":{"name":"黑河市","parent":"9"},
         | 
| 116 | 
            -
            "103":{"name":"绥化市","parent":"9"},
         | 
| 117 | 
            -
            "104":{"name":"大兴安岭地区","parent":"9"},
         | 
| 118 | 
            -
            "105":{"name":"上海市","parent":"10"},
         | 
| 119 | 
            -
            "106":{"name":"南京市","parent":"11"},
         | 
| 120 | 
            -
            "107":{"name":"无锡市","parent":"11"},
         | 
| 121 | 
            -
            "108":{"name":"徐州市","parent":"11"},
         | 
| 122 | 
            -
            "109":{"name":"常州市","parent":"11"},
         | 
| 123 | 
            -
            "110":{"name":"苏州市","parent":"11"},
         | 
| 124 | 
            -
            "111":{"name":"南通市","parent":"11"},
         | 
| 125 | 
            -
            "112":{"name":"连云港市","parent":"11"},
         | 
| 126 | 
            -
            "113":{"name":"淮安市","parent":"11"},
         | 
| 127 | 
            -
            "114":{"name":"盐城市","parent":"11"},
         | 
| 128 | 
            -
            "115":{"name":"扬州市","parent":"11"},
         | 
| 129 | 
            -
            "116":{"name":"镇江市","parent":"11"},
         | 
| 130 | 
            -
            "117":{"name":"泰州市","parent":"11"},
         | 
| 131 | 
            -
            "118":{"name":"宿迁市","parent":"11"},
         | 
| 132 | 
            -
            "119":{"name":"杭州市","parent":"12"},
         | 
| 133 | 
            -
            "120":{"name":"宁波市","parent":"12"},
         | 
| 134 | 
            -
            "121":{"name":"温州市","parent":"12"},
         | 
| 135 | 
            -
            "122":{"name":"嘉兴市","parent":"12"},
         | 
| 136 | 
            -
            "123":{"name":"湖州市","parent":"12"},
         | 
| 137 | 
            -
            "124":{"name":"绍兴市","parent":"12"},
         | 
| 138 | 
            -
            "125":{"name":"金华市","parent":"12"},
         | 
| 139 | 
            -
            "126":{"name":"衢州市","parent":"12"},
         | 
| 140 | 
            -
            "127":{"name":"舟山市","parent":"12"},
         | 
| 141 | 
            -
            "128":{"name":"台州市","parent":"12"},
         | 
| 142 | 
            -
            "129":{"name":"丽水市","parent":"12"},
         | 
| 143 | 
            -
            "130":{"name":"合肥市","parent":"13"},
         | 
| 144 | 
            -
            "131":{"name":"芜湖市","parent":"13"},
         | 
| 145 | 
            -
            "132":{"name":"蚌埠市","parent":"13"},
         | 
| 146 | 
            -
            "133":{"name":"淮南市","parent":"13"},
         | 
| 147 | 
            -
            "134":{"name":"马鞍山市","parent":"13"},
         | 
| 148 | 
            -
            "135":{"name":"淮北市","parent":"13"},
         | 
| 149 | 
            -
            "136":{"name":"铜陵市","parent":"13"},
         | 
| 150 | 
            -
            "137":{"name":"安庆市","parent":"13"},
         | 
| 151 | 
            -
            "138":{"name":"黄山市","parent":"13"},
         | 
| 152 | 
            -
            "139":{"name":"滁州市","parent":"13"},
         | 
| 153 | 
            -
            "140":{"name":"阜阳市","parent":"13"},
         | 
| 154 | 
            -
            "141":{"name":"宿州市","parent":"13"},
         | 
| 155 | 
            -
            "143":{"name":"六安市","parent":"13"},
         | 
| 156 | 
            -
            "144":{"name":"亳州市","parent":"13"},
         | 
| 157 | 
            -
            "145":{"name":"池州市","parent":"13"},
         | 
| 158 | 
            -
            "146":{"name":"宣城市","parent":"13"},
         | 
| 159 | 
            -
            "147":{"name":"福州市","parent":"14"},
         | 
| 160 | 
            -
            "148":{"name":"厦门市","parent":"14"},
         | 
| 161 | 
            -
            "149":{"name":"莆田市","parent":"14"},
         | 
| 162 | 
            -
            "150":{"name":"三明市","parent":"14"},
         | 
| 163 | 
            -
            "151":{"name":"泉州市","parent":"14"},
         | 
| 164 | 
            -
            "152":{"name":"漳州市","parent":"14"},
         | 
| 165 | 
            -
            "153":{"name":"南平市","parent":"14"},
         | 
| 166 | 
            -
            "154":{"name":"龙岩市","parent":"14"},
         | 
| 167 | 
            -
            "155":{"name":"宁德市","parent":"14"},
         | 
| 168 | 
            -
            "156":{"name":"南昌市","parent":"15"},
         | 
| 169 | 
            -
            "157":{"name":"景德镇市","parent":"15"},
         | 
| 170 | 
            -
            "158":{"name":"萍乡市","parent":"15"},
         | 
| 171 | 
            -
            "159":{"name":"九江市","parent":"15"},
         | 
| 172 | 
            -
            "160":{"name":"新余市","parent":"15"},
         | 
| 173 | 
            -
            "161":{"name":"鹰潭市","parent":"15"},
         | 
| 174 | 
            -
            "162":{"name":"赣州市","parent":"15"},
         | 
| 175 | 
            -
            "163":{"name":"吉安市","parent":"15"},
         | 
| 176 | 
            -
            "164":{"name":"宜春市","parent":"15"},
         | 
| 177 | 
            -
            "165":{"name":"抚州市","parent":"15"},
         | 
| 178 | 
            -
            "166":{"name":"上饶市","parent":"15"},
         | 
| 179 | 
            -
            "167":{"name":"济南市","parent":"16"},
         | 
| 180 | 
            -
            "168":{"name":"青岛市","parent":"16"},
         | 
| 181 | 
            -
            "169":{"name":"淄博市","parent":"16"},
         | 
| 182 | 
            -
            "170":{"name":"枣庄市","parent":"16"},
         | 
| 183 | 
            -
            "171":{"name":"东营市","parent":"16"},
         | 
| 184 | 
            -
            "172":{"name":"烟台市","parent":"16"},
         | 
| 185 | 
            -
            "173":{"name":"潍坊市","parent":"16"},
         | 
| 186 | 
            -
            "174":{"name":"济宁市","parent":"16"},
         | 
| 187 | 
            -
            "175":{"name":"泰安市","parent":"16"},
         | 
| 188 | 
            -
            "176":{"name":"威海市","parent":"16"},
         | 
| 189 | 
            -
            "177":{"name":"日照市","parent":"16"},
         | 
| 190 | 
            -
            "179":{"name":"临沂市","parent":"16"},
         | 
| 191 | 
            -
            "180":{"name":"德州市","parent":"16"},
         | 
| 192 | 
            -
            "181":{"name":"聊城市","parent":"16"},
         | 
| 193 | 
            -
            "182":{"name":"滨州市","parent":"16"},
         | 
| 194 | 
            -
            "183":{"name":"菏泽市","parent":"16"},
         | 
| 195 | 
            -
            "184":{"name":"郑州市","parent":"17"},
         | 
| 196 | 
            -
            "185":{"name":"开封市","parent":"17"},
         | 
| 197 | 
            -
            "186":{"name":"洛阳市","parent":"17"},
         | 
| 198 | 
            -
            "187":{"name":"平顶山市","parent":"17"},
         | 
| 199 | 
            -
            "188":{"name":"安阳市","parent":"17"},
         | 
| 200 | 
            -
            "189":{"name":"鹤壁市","parent":"17"},
         | 
| 201 | 
            -
            "190":{"name":"新乡市","parent":"17"},
         | 
| 202 | 
            -
            "191":{"name":"焦作市","parent":"17"},
         | 
| 203 | 
            -
            "192":{"name":"濮阳市","parent":"17"},
         | 
| 204 | 
            -
            "193":{"name":"许昌市","parent":"17"},
         | 
| 205 | 
            -
            "194":{"name":"漯河市","parent":"17"},
         | 
| 206 | 
            -
            "195":{"name":"三门峡市","parent":"17"},
         | 
| 207 | 
            -
            "196":{"name":"南阳市","parent":"17"},
         | 
| 208 | 
            -
            "197":{"name":"商丘市","parent":"17"},
         | 
| 209 | 
            -
            "198":{"name":"信阳市","parent":"17"},
         | 
| 210 | 
            -
            "199":{"name":"周口市","parent":"17"},
         | 
| 211 | 
            -
            "200":{"name":"驻马店市","parent":"17"},
         | 
| 212 | 
            -
            "201":{"name":"武汉市","parent":"18"},
         | 
| 213 | 
            -
            "202":{"name":"黄石市","parent":"18"},
         | 
| 214 | 
            -
            "203":{"name":"十堰市","parent":"18"},
         | 
| 215 | 
            -
            "204":{"name":"宜昌市","parent":"18"},
         | 
| 216 | 
            -
            "205":{"name":"襄阳市","parent":"18"},
         | 
| 217 | 
            -
            "206":{"name":"鄂州市","parent":"18"},
         | 
| 218 | 
            -
            "207":{"name":"荆门市","parent":"18"},
         | 
| 219 | 
            -
            "208":{"name":"孝感市","parent":"18"},
         | 
| 220 | 
            -
            "209":{"name":"荆州市","parent":"18"},
         | 
| 221 | 
            -
            "210":{"name":"黄冈市","parent":"18"},
         | 
| 222 | 
            -
            "211":{"name":"咸宁市","parent":"18"},
         | 
| 223 | 
            -
            "212":{"name":"随州市","parent":"18"},
         | 
| 224 | 
            -
            "213":{"name":"恩施土家族苗族自治州","parent":"18"},
         | 
| 225 | 
            -
            "215":{"name":"长沙市","parent":"19"},
         | 
| 226 | 
            -
            "216":{"name":"株洲市","parent":"19"},
         | 
| 227 | 
            -
            "217":{"name":"湘潭市","parent":"19"},
         | 
| 228 | 
            -
            "218":{"name":"衡阳市","parent":"19"},
         | 
| 229 | 
            -
            "219":{"name":"邵阳市","parent":"19"},
         | 
| 230 | 
            -
            "220":{"name":"岳阳市","parent":"19"},
         | 
| 231 | 
            -
            "221":{"name":"常德市","parent":"19"},
         | 
| 232 | 
            -
            "222":{"name":"张家界市","parent":"19"},
         | 
| 233 | 
            -
            "223":{"name":"益阳市","parent":"19"},
         | 
| 234 | 
            -
            "224":{"name":"郴州市","parent":"19"},
         | 
| 235 | 
            -
            "225":{"name":"永州市","parent":"19"},
         | 
| 236 | 
            -
            "226":{"name":"怀化市","parent":"19"},
         | 
| 237 | 
            -
            "227":{"name":"娄底市","parent":"19"},
         | 
| 238 | 
            -
            "228":{"name":"湘西土家族苗族自治州","parent":"19"},
         | 
| 239 | 
            -
            "229":{"name":"广州市","parent":"20"},
         | 
| 240 | 
            -
            "230":{"name":"韶关市","parent":"20"},
         | 
| 241 | 
            -
            "231":{"name":"深圳市","parent":"20"},
         | 
| 242 | 
            -
            "232":{"name":"珠海市","parent":"20"},
         | 
| 243 | 
            -
            "233":{"name":"汕头市","parent":"20"},
         | 
| 244 | 
            -
            "234":{"name":"佛山市","parent":"20"},
         | 
| 245 | 
            -
            "235":{"name":"江门市","parent":"20"},
         | 
| 246 | 
            -
            "236":{"name":"湛江市","parent":"20"},
         | 
| 247 | 
            -
            "237":{"name":"茂名市","parent":"20"},
         | 
| 248 | 
            -
            "238":{"name":"肇庆市","parent":"20"},
         | 
| 249 | 
            -
            "239":{"name":"惠州市","parent":"20"},
         | 
| 250 | 
            -
            "240":{"name":"梅州市","parent":"20"},
         | 
| 251 | 
            -
            "241":{"name":"汕尾市","parent":"20"},
         | 
| 252 | 
            -
            "242":{"name":"河源市","parent":"20"},
         | 
| 253 | 
            -
            "243":{"name":"阳江市","parent":"20"},
         | 
| 254 | 
            -
            "244":{"name":"清远市","parent":"20"},
         | 
| 255 | 
            -
            "245":{"name":"东莞市","parent":"20"},
         | 
| 256 | 
            -
            "246":{"name":"中山市","parent":"20"},
         | 
| 257 | 
            -
            "247":{"name":"潮州市","parent":"20"},
         | 
| 258 | 
            -
            "248":{"name":"揭阳市","parent":"20"},
         | 
| 259 | 
            -
            "249":{"name":"云浮市","parent":"20"},
         | 
| 260 | 
            -
            "250":{"name":"南宁市","parent":"21"},
         | 
| 261 | 
            -
            "251":{"name":"柳州市","parent":"21"},
         | 
| 262 | 
            -
            "252":{"name":"桂林市","parent":"21"},
         | 
| 263 | 
            -
            "253":{"name":"梧州市","parent":"21"},
         | 
| 264 | 
            -
            "254":{"name":"北海市","parent":"21"},
         | 
| 265 | 
            -
            "255":{"name":"防城港市","parent":"21"},
         | 
| 266 | 
            -
            "256":{"name":"钦州市","parent":"21"},
         | 
| 267 | 
            -
            "257":{"name":"贵港市","parent":"21"},
         | 
| 268 | 
            -
            "258":{"name":"玉林市","parent":"21"},
         | 
| 269 | 
            -
            "259":{"name":"百色市","parent":"21"},
         | 
| 270 | 
            -
            "260":{"name":"贺州市","parent":"21"},
         | 
| 271 | 
            -
            "261":{"name":"河池市","parent":"21"},
         | 
| 272 | 
            -
            "262":{"name":"来宾市","parent":"21"},
         | 
| 273 | 
            -
            "263":{"name":"崇左市","parent":"21"},
         | 
| 274 | 
            -
            "264":{"name":"海口市","parent":"22"},
         | 
| 275 | 
            -
            "265":{"name":"三亚市","parent":"22"},
         | 
| 276 | 
            -
            "267":{"name":"重庆市","parent":"23"},
         | 
| 277 | 
            -
            "268":{"name":"成都市","parent":"24"},
         | 
| 278 | 
            -
            "269":{"name":"自贡市","parent":"24"},
         | 
| 279 | 
            -
            "270":{"name":"攀枝花市","parent":"24"},
         | 
| 280 | 
            -
            "271":{"name":"泸州市","parent":"24"},
         | 
| 281 | 
            -
            "272":{"name":"德阳市","parent":"24"},
         | 
| 282 | 
            -
            "273":{"name":"绵阳市","parent":"24"},
         | 
| 283 | 
            -
            "274":{"name":"广元市","parent":"24"},
         | 
| 284 | 
            -
            "275":{"name":"遂宁市","parent":"24"},
         | 
| 285 | 
            -
            "276":{"name":"内江市","parent":"24"},
         | 
| 286 | 
            -
            "277":{"name":"乐山市","parent":"24"},
         | 
| 287 | 
            -
            "278":{"name":"南充市","parent":"24"},
         | 
| 288 | 
            -
            "279":{"name":"眉山市","parent":"24"},
         | 
| 289 | 
            -
            "280":{"name":"宜宾市","parent":"24"},
         | 
| 290 | 
            -
            "281":{"name":"广安市","parent":"24"},
         | 
| 291 | 
            -
            "282":{"name":"达州市","parent":"24"},
         | 
| 292 | 
            -
            "283":{"name":"雅安市","parent":"24"},
         | 
| 293 | 
            -
            "284":{"name":"巴中市","parent":"24"},
         | 
| 294 | 
            -
            "285":{"name":"资阳市","parent":"24"},
         | 
| 295 | 
            -
            "286":{"name":"阿坝藏族羌族自治州","parent":"24"},
         | 
| 296 | 
            -
            "287":{"name":"甘孜藏族自治州","parent":"24"},
         | 
| 297 | 
            -
            "288":{"name":"凉山彝族自治州","parent":"24"},
         | 
| 298 | 
            -
            "289":{"name":"贵阳市","parent":"25"},
         | 
| 299 | 
            -
            "290":{"name":"六盘水市","parent":"25"},
         | 
| 300 | 
            -
            "291":{"name":"遵义市","parent":"25"},
         | 
| 301 | 
            -
            "292":{"name":"安顺市","parent":"25"},
         | 
| 302 | 
            -
            "293":{"name":"铜仁市","parent":"25"},
         | 
| 303 | 
            -
            "294":{"name":"黔西南布依族苗族自治州","parent":"25"},
         | 
| 304 | 
            -
            "295":{"name":"毕节市","parent":"25"},
         | 
| 305 | 
            -
            "296":{"name":"黔东南苗族侗族自治州","parent":"25"},
         | 
| 306 | 
            -
            "297":{"name":"黔南布依族苗族自治州","parent":"25"},
         | 
| 307 | 
            -
            "298":{"name":"昆明市","parent":"26"},
         | 
| 308 | 
            -
            "299":{"name":"曲靖市","parent":"26"},
         | 
| 309 | 
            -
            "300":{"name":"玉溪市","parent":"26"},
         | 
| 310 | 
            -
            "301":{"name":"保山市","parent":"26"},
         | 
| 311 | 
            -
            "302":{"name":"昭通市","parent":"26"},
         | 
| 312 | 
            -
            "303":{"name":"丽江市","parent":"26"},
         | 
| 313 | 
            -
            "304":{"name":"普洱市","parent":"26"},
         | 
| 314 | 
            -
            "305":{"name":"临沧市","parent":"26"},
         | 
| 315 | 
            -
            "306":{"name":"楚雄彝族自治州","parent":"26"},
         | 
| 316 | 
            -
            "307":{"name":"红河哈尼族彝族自治州","parent":"26"},
         | 
| 317 | 
            -
            "308":{"name":"文山壮族苗族自治州","parent":"26"},
         | 
| 318 | 
            -
            "309":{"name":"西双版纳傣族自治州","parent":"26"},
         | 
| 319 | 
            -
            "310":{"name":"大理白族自治州","parent":"26"},
         | 
| 320 | 
            -
            "311":{"name":"德宏傣族景颇族自治州","parent":"26"},
         | 
| 321 | 
            -
            "312":{"name":"怒江傈僳族自治州","parent":"26"},
         | 
| 322 | 
            -
            "313":{"name":"迪庆藏族自治州","parent":"26"},
         | 
| 323 | 
            -
            "314":{"name":"拉萨市","parent":"27"},
         | 
| 324 | 
            -
            "315":{"name":"昌都市","parent":"27"},
         | 
| 325 | 
            -
            "316":{"name":"山南市","parent":"27"},
         | 
| 326 | 
            -
            "317":{"name":"日喀则市","parent":"27"},
         | 
| 327 | 
            -
            "318":{"name":"那曲市","parent":"27"},
         | 
| 328 | 
            -
            "319":{"name":"阿里地区","parent":"27"},
         | 
| 329 | 
            -
            "320":{"name":"林芝市","parent":"27"},
         | 
| 330 | 
            -
            "321":{"name":"西安市","parent":"28"},
         | 
| 331 | 
            -
            "322":{"name":"铜川市","parent":"28"},
         | 
| 332 | 
            -
            "323":{"name":"宝鸡市","parent":"28"},
         | 
| 333 | 
            -
            "324":{"name":"咸阳市","parent":"28"},
         | 
| 334 | 
            -
            "325":{"name":"渭南市","parent":"28"},
         | 
| 335 | 
            -
            "326":{"name":"延安市","parent":"28"},
         | 
| 336 | 
            -
            "327":{"name":"汉中市","parent":"28"},
         | 
| 337 | 
            -
            "328":{"name":"榆林市","parent":"28"},
         | 
| 338 | 
            -
            "329":{"name":"安康市","parent":"28"},
         | 
| 339 | 
            -
            "330":{"name":"商洛市","parent":"28"},
         | 
| 340 | 
            -
            "331":{"name":"兰州市","parent":"29"},
         | 
| 341 | 
            -
            "332":{"name":"嘉峪关市","parent":"29"},
         | 
| 342 | 
            -
            "333":{"name":"金昌市","parent":"29"},
         | 
| 343 | 
            -
            "334":{"name":"白银市","parent":"29"},
         | 
| 344 | 
            -
            "335":{"name":"天水市","parent":"29"},
         | 
| 345 | 
            -
            "336":{"name":"武威市","parent":"29"},
         | 
| 346 | 
            -
            "337":{"name":"张掖市","parent":"29"},
         | 
| 347 | 
            -
            "338":{"name":"平凉市","parent":"29"},
         | 
| 348 | 
            -
            "339":{"name":"酒泉市","parent":"29"},
         | 
| 349 | 
            -
            "340":{"name":"庆阳市","parent":"29"},
         | 
| 350 | 
            -
            "341":{"name":"定西市","parent":"29"},
         | 
| 351 | 
            -
            "342":{"name":"陇南市","parent":"29"},
         | 
| 352 | 
            -
            "343":{"name":"临夏回族自治州","parent":"29"},
         | 
| 353 | 
            -
            "344":{"name":"甘南藏族自治州","parent":"29"},
         | 
| 354 | 
            -
            "345":{"name":"西宁市","parent":"30"},
         | 
| 355 | 
            -
            "346":{"name":"海东市","parent":"30"},
         | 
| 356 | 
            -
            "347":{"name":"海北藏族自治州","parent":"30"},
         | 
| 357 | 
            -
            "348":{"name":"黄南藏族自治州","parent":"30"},
         | 
| 358 | 
            -
            "349":{"name":"海南藏族自治州","parent":"30"},
         | 
| 359 | 
            -
            "350":{"name":"果洛藏族自治州","parent":"30"},
         | 
| 360 | 
            -
            "351":{"name":"玉树藏族自治州","parent":"30"},
         | 
| 361 | 
            -
            "352":{"name":"海西蒙古族藏族自治州","parent":"30"},
         | 
| 362 | 
            -
            "353":{"name":"银川市","parent":"31"},
         | 
| 363 | 
            -
            "354":{"name":"石嘴山市","parent":"31"},
         | 
| 364 | 
            -
            "355":{"name":"吴忠市","parent":"31"},
         | 
| 365 | 
            -
            "356":{"name":"固原市","parent":"31"},
         | 
| 366 | 
            -
            "357":{"name":"中卫市","parent":"31"},
         | 
| 367 | 
            -
            "358":{"name":"乌鲁木齐市","parent":"32"},
         | 
| 368 | 
            -
            "359":{"name":"克拉玛依市","parent":"32"},
         | 
| 369 | 
            -
            "360":{"name":"吐鲁番市","parent":"32"},
         | 
| 370 | 
            -
            "361":{"name":"哈密市","parent":"32"},
         | 
| 371 | 
            -
            "362":{"name":"昌吉回族自治州","parent":"32"},
         | 
| 372 | 
            -
            "363":{"name":"博尔塔拉蒙古自治州","parent":"32"},
         | 
| 373 | 
            -
            "364":{"name":"巴音郭楞蒙古自治州","parent":"32"},
         | 
| 374 | 
            -
            "365":{"name":"阿克苏地区","parent":"32"},
         | 
| 375 | 
            -
            "366":{"name":"克孜勒苏柯尔克孜自治州","parent":"32"},
         | 
| 376 | 
            -
            "367":{"name":"喀什地区","parent":"32"},
         | 
| 377 | 
            -
            "368":{"name":"和田地区","parent":"32"},
         | 
| 378 | 
            -
            "369":{"name":"伊犁哈萨克自治州","parent":"32"},
         | 
| 379 | 
            -
            "370":{"name":"塔城地区","parent":"32"},
         | 
| 380 | 
            -
            "371":{"name":"阿勒泰地区","parent":"32"},
         | 
| 381 | 
            -
            "372":{"name":"新疆省直辖行政单位","parent":"32"},
         | 
| 382 | 
            -
            "373":{"name":"可克达拉市","parent":"32"},
         | 
| 383 | 
            -
            "374":{"name":"昆玉市","parent":"32"},
         | 
| 384 | 
            -
            "375":{"name":"胡杨河市","parent":"32"},
         | 
| 385 | 
            -
            "376":{"name":"双河市","parent":"32"},
         | 
| 386 | 
            -
            "3560":{"name":"北票市","parent":"7"},
         | 
| 387 | 
            -
            "3615":{"name":"高州市","parent":"20"},
         | 
| 388 | 
            -
            "3651":{"name":"济源市","parent":"17"},
         | 
| 389 | 
            -
            "3662":{"name":"胶南市","parent":"16"},
         | 
| 390 | 
            -
            "3683":{"name":"老河口市","parent":"18"},
         | 
| 391 | 
            -
            "3758":{"name":"沙河市","parent":"4"},
         | 
| 392 | 
            -
            "3822":{"name":"宜城市","parent":"18"},
         | 
| 393 | 
            -
            "3842":{"name":"枣阳市","parent":"18"},
         | 
| 394 | 
            -
            "3850":{"name":"肇东市","parent":"9"},
         | 
| 395 | 
            -
            "3905":{"name":"澳门","parent":"1"},
         | 
| 396 | 
            -
            "3906":{"name":"澳门","parent":"3905"},
         | 
| 397 | 
            -
            "3907":{"name":"香港","parent":"1"},
         | 
| 398 | 
            -
            "3908":{"name":"香港","parent":"3907"},
         | 
| 399 | 
            -
            "3947":{"name":"仙桃市","parent":"18"},
         | 
| 400 | 
            -
            "3954":{"name":"台湾","parent":"1"},
         | 
| 401 | 
            -
            "3955":{"name":"台湾","parent":"3954"},
         | 
| 402 | 
            -
            "3956":{"name":"海外","parent":"1"},
         | 
| 403 | 
            -
            "3957":{"name":"海外","parent":"3956"},
         | 
| 404 | 
            -
            "3958":{"name":"美国","parent":"3956"},
         | 
| 405 | 
            -
            "3959":{"name":"加拿大","parent":"3956"},
         | 
| 406 | 
            -
            "3961":{"name":"日本","parent":"3956"},
         | 
| 407 | 
            -
            "3962":{"name":"韩国","parent":"3956"},
         | 
| 408 | 
            -
            "3963":{"name":"德国","parent":"3956"},
         | 
| 409 | 
            -
            "3964":{"name":"英国","parent":"3956"},
         | 
| 410 | 
            -
            "3965":{"name":"意大利","parent":"3956"},
         | 
| 411 | 
            -
            "3966":{"name":"西班牙","parent":"3956"},
         | 
| 412 | 
            -
            "3967":{"name":"法国","parent":"3956"},
         | 
| 413 | 
            -
            "3968":{"name":"澳大利亚","parent":"3956"},
         | 
| 414 | 
            -
            "3969":{"name":"东城区","parent":"2"},
         | 
| 415 | 
            -
            "3970":{"name":"西城区","parent":"2"},
         | 
| 416 | 
            -
            "3971":{"name":"崇文区","parent":"2"},
         | 
| 417 | 
            -
            "3972":{"name":"宣武区","parent":"2"},
         | 
| 418 | 
            -
            "3973":{"name":"朝阳区","parent":"2"},
         | 
| 419 | 
            -
            "3974":{"name":"海淀区","parent":"2"},
         | 
| 420 | 
            -
            "3975":{"name":"丰台区","parent":"2"},
         | 
| 421 | 
            -
            "3976":{"name":"石景山区","parent":"2"},
         | 
| 422 | 
            -
            "3977":{"name":"门头沟区","parent":"2"},
         | 
| 423 | 
            -
            "3978":{"name":"房山区","parent":"2"},
         | 
| 424 | 
            -
            "3979":{"name":"通州区","parent":"2"},
         | 
| 425 | 
            -
            "3980":{"name":"顺义区","parent":"2"},
         | 
| 426 | 
            -
            "3981":{"name":"昌平区","parent":"2"},
         | 
| 427 | 
            -
            "3982":{"name":"大兴区","parent":"2"},
         | 
| 428 | 
            -
            "3983":{"name":"平谷区","parent":"2"},
         | 
| 429 | 
            -
            "3984":{"name":"怀柔区","parent":"2"},
         | 
| 430 | 
            -
            "3985":{"name":"密云区","parent":"2"},
         | 
| 431 | 
            -
            "3986":{"name":"延庆区","parent":"2"},
         | 
| 432 | 
            -
            "3987":{"name":"黄浦区","parent":"10"},
         | 
| 433 | 
            -
            "3988":{"name":"徐汇区","parent":"10"},
         | 
| 434 | 
            -
            "3989":{"name":"长宁区","parent":"10"},
         | 
| 435 | 
            -
            "3990":{"name":"静安区","parent":"10"},
         | 
| 436 | 
            -
            "3991":{"name":"普陀区","parent":"10"},
         | 
| 437 | 
            -
            "3992":{"name":"闸北区","parent":"10"},
         | 
| 438 | 
            -
            "3993":{"name":"虹口区","parent":"10"},
         | 
| 439 | 
            -
            "3994":{"name":"杨浦区","parent":"10"},
         | 
| 440 | 
            -
            "3995":{"name":"宝山区","parent":"10"},
         | 
| 441 | 
            -
            "3996":{"name":"闵行区","parent":"10"},
         | 
| 442 | 
            -
            "3997":{"name":"嘉定区","parent":"10"},
         | 
| 443 | 
            -
            "3998":{"name":"浦东新区","parent":"10"},
         | 
| 444 | 
            -
            "3999":{"name":"松江区","parent":"10"},
         | 
| 445 | 
            -
            "4000":{"name":"金山区","parent":"10"},
         | 
| 446 | 
            -
            "4001":{"name":"青浦区","parent":"10"},
         | 
| 447 | 
            -
            "4002":{"name":"奉贤区","parent":"10"},
         | 
| 448 | 
            -
            "4003":{"name":"崇明区","parent":"10"},
         | 
| 449 | 
            -
            "4004":{"name":"和平区","parent":"3"},
         | 
| 450 | 
            -
            "4005":{"name":"河东区","parent":"3"},
         | 
| 451 | 
            -
            "4006":{"name":"河西区","parent":"3"},
         | 
| 452 | 
            -
            "4007":{"name":"南开区","parent":"3"},
         | 
| 453 | 
            -
            "4008":{"name":"红桥区","parent":"3"},
         | 
| 454 | 
            -
            "4009":{"name":"河北区","parent":"3"},
         | 
| 455 | 
            -
            "4010":{"name":"滨海新区","parent":"3"},
         | 
| 456 | 
            -
            "4011":{"name":"东丽区","parent":"3"},
         | 
| 457 | 
            -
            "4012":{"name":"西青区","parent":"3"},
         | 
| 458 | 
            -
            "4013":{"name":"北辰区","parent":"3"},
         | 
| 459 | 
            -
            "4014":{"name":"津南区","parent":"3"},
         | 
| 460 | 
            -
            "4015":{"name":"武清区","parent":"3"},
         | 
| 461 | 
            -
            "4016":{"name":"宝坻区","parent":"3"},
         | 
| 462 | 
            -
            "4017":{"name":"静海区","parent":"3"},
         | 
| 463 | 
            -
            "4018":{"name":"宁河区","parent":"3"},
         | 
| 464 | 
            -
            "4019":{"name":"蓟州区","parent":"3"},
         | 
| 465 | 
            -
            "4020":{"name":"渝中区","parent":"23"},
         | 
| 466 | 
            -
            "4021":{"name":"江北区","parent":"23"},
         | 
| 467 | 
            -
            "4022":{"name":"南岸区","parent":"23"},
         | 
| 468 | 
            -
            "4023":{"name":"沙坪坝区","parent":"23"},
         | 
| 469 | 
            -
            "4024":{"name":"九龙坡区","parent":"23"},
         | 
| 470 | 
            -
            "4025":{"name":"大渡口区","parent":"23"},
         | 
| 471 | 
            -
            "4026":{"name":"渝北区","parent":"23"},
         | 
| 472 | 
            -
            "4027":{"name":"巴南区","parent":"23"},
         | 
| 473 | 
            -
            "4028":{"name":"北碚区","parent":"23"},
         | 
| 474 | 
            -
            "4029":{"name":"万州区","parent":"23"},
         | 
| 475 | 
            -
            "4030":{"name":"黔江区","parent":"23"},
         | 
| 476 | 
            -
            "4031":{"name":"永川区","parent":"23"},
         | 
| 477 | 
            -
            "4032":{"name":" | 
| 478 | 
            -
            "4033":{"name":"江津区","parent":"23"},
         | 
| 479 | 
            -
            "4034":{"name":"合川区","parent":"23"},
         | 
| 480 | 
            -
            "4035":{"name":"双桥区","parent":"23"},
         | 
| 481 | 
            -
            "4036":{"name":"万盛区","parent":"23"},
         | 
| 482 | 
            -
            "4037":{"name":"荣昌区","parent":"23"},
         | 
| 483 | 
            -
            "4038":{"name":"大足区","parent":"23"},
         | 
| 484 | 
            -
            "4039":{"name":"璧山区","parent":"23"},
         | 
| 485 | 
            -
            "4040":{"name":"铜梁区","parent":"23"},
         | 
| 486 | 
            -
            "4041":{"name":"潼南区","parent":"23"},
         | 
| 487 | 
            -
            "4042":{"name":"綦江区","parent":"23"},
         | 
| 488 | 
            -
            "4043":{"name":"忠县","parent":"23"},
         | 
| 489 | 
            -
            "4044":{"name":"开州区","parent":"23"},
         | 
| 490 | 
            -
            "4045":{"name":"云阳县","parent":"23"},
         | 
| 491 | 
            -
            "4046":{"name":"梁平区","parent":"23"},
         | 
| 492 | 
            -
            "4047":{"name":"垫江县","parent":"23"},
         | 
| 493 | 
            -
            "4048":{"name":"丰都县","parent":"23"},
         | 
| 494 | 
            -
            "4049":{"name":"奉节县","parent":"23"},
         | 
| 495 | 
            -
            "4050":{"name":"巫山县","parent":"23"},
         | 
| 496 | 
            -
            "4051":{"name":"巫溪县","parent":"23"},
         | 
| 497 | 
            -
            "4052":{"name":"城口县","parent":"23"},
         | 
| 498 | 
            -
            "4053":{"name":"武隆区","parent":"23"},
         | 
| 499 | 
            -
            "4054":{"name":"石柱土家族自治县","parent":"23"},
         | 
| 500 | 
            -
            "4055":{"name":"秀山土家族苗族自治县","parent":"23"},
         | 
| 501 | 
            -
            "4056":{"name":"酉阳土家族苗族自治县","parent":"23"},
         | 
| 502 | 
            -
            "4057":{"name":"彭水苗族土家族自治县","parent":"23"},
         | 
| 503 | 
            -
            "4058":{"name":"潜江市","parent":"18"},
         | 
| 504 | 
            -
            "4059":{"name":"三沙市","parent":"22"},
         | 
| 505 | 
            -
            "4060":{"name":"石河子市","parent":"32"},
         | 
| 506 | 
            -
            "4061":{"name":"阿拉尔市","parent":"32"},
         | 
| 507 | 
            -
            "4062":{"name":"图木舒克市","parent":"32"},
         | 
| 508 | 
            -
            "4063":{"name":"五家渠市","parent":"32"},
         | 
| 509 | 
            -
            "4064":{"name":"北屯市","parent":"32"},
         | 
| 510 | 
            -
            "4065":{"name":"铁门关市","parent":"32"},
         | 
| 511 | 
            -
            "4066":{"name":"儋州市","parent":"22"},
         | 
| 512 | 
            -
            "4067":{"name":"五指山市","parent":"22"},
         | 
| 513 | 
            -
            "4068":{"name":"文昌市","parent":"22"},
         | 
| 514 | 
            -
            "4069":{"name":"琼海市","parent":"22"},
         | 
| 515 | 
            -
            "4070":{"name":"万宁市","parent":"22"},
         | 
| 516 | 
            -
            "4072":{"name":"定安县","parent":"22"},
         | 
| 517 | 
            -
            "4073":{"name":"屯昌县","parent":"22"},
         | 
| 518 | 
            -
            "4074":{"name":"澄迈县","parent":"22"},
         | 
| 519 | 
            -
            "4075":{"name":"临高县","parent":"22"},
         | 
| 520 | 
            -
            "4076":{"name":"琼中黎族苗族自治县","parent":"22"},
         | 
| 521 | 
            -
            "4077":{"name":"保亭黎族苗族自治县","parent":"22"},
         | 
| 522 | 
            -
            "4078":{"name":"白沙黎族自治县","parent":"22"},
         | 
| 523 | 
            -
            "4079":{"name":"昌江黎族自治县","parent":"22"},
         | 
| 524 | 
            -
            "4080":{"name":"乐东黎族自治县","parent":"22"},
         | 
| 525 | 
            -
            "4081":{"name":"陵水黎族自治县","parent":"22"},
         | 
| 526 | 
            -
            "4082":{"name":"马来西亚","parent":"3956"},
         | 
| 527 | 
            -
            "6047":{"name":"长寿区","parent":"23"},
         | 
| 528 | 
            -
            "6857":{"name":"阿富汗","parent":"3956"},
         | 
| 529 | 
            -
            "6858":{"name":"阿尔巴尼亚","parent":"3956"},
         | 
| 530 | 
            -
            "6859":{"name":"阿尔及利亚","parent":"3956"},
         | 
| 531 | 
            -
            "6860":{"name":"美属萨摩亚","parent":"3956"},
         | 
| 532 | 
            -
            "6861":{"name":"安道尔","parent":"3956"},
         | 
| 533 | 
            -
            "6862":{"name":"安哥拉","parent":"3956"},
         | 
| 534 | 
            -
            "6863":{"name":"安圭拉","parent":"3956"},
         | 
| 535 | 
            -
            "6864":{"name":"南极洲","parent":"3956"},
         | 
| 536 | 
            -
            "6865":{"name":"安提瓜和巴布达","parent":"3956"},
         | 
| 537 | 
            -
            "6866":{"name":"阿根廷","parent":"3956"},
         | 
| 538 | 
            -
            "6867":{"name":"亚美尼亚","parent":"3956"},
         | 
| 539 | 
            -
            "6869":{"name":"奥地利","parent":"3956"},
         | 
| 540 | 
            -
            "6870":{"name":"阿塞拜疆","parent":"3956"},
         | 
| 541 | 
            -
            "6871":{"name":"巴哈马","parent":"3956"},
         | 
| 542 | 
            -
            "6872":{"name":"巴林","parent":"3956"},
         | 
| 543 | 
            -
            "6873":{"name":"孟加拉国","parent":"3956"},
         | 
| 544 | 
            -
            "6874":{"name":"巴巴多斯","parent":"3956"},
         | 
| 545 | 
            -
            "6875":{"name":"白俄罗斯","parent":"3956"},
         | 
| 546 | 
            -
            "6876":{"name":"比利时","parent":"3956"},
         | 
| 547 | 
            -
            "6877":{"name":"伯利兹","parent":"3956"},
         | 
| 548 | 
            -
            "6878":{"name":"贝宁","parent":"3956"},
         | 
| 549 | 
            -
            "6879":{"name":"百慕大","parent":"3956"},
         | 
| 550 | 
            -
            "6880":{"name":"不丹","parent":"3956"},
         | 
| 551 | 
            -
            "6881":{"name":"玻利维亚","parent":"3956"},
         | 
| 552 | 
            -
            "6882":{"name":"波黑","parent":"3956"},
         | 
| 553 | 
            -
            "6883":{"name":"博茨瓦纳","parent":"3956"},
         | 
| 554 | 
            -
            "6884":{"name":"布维岛","parent":"3956"},
         | 
| 555 | 
            -
            "6885":{"name":"巴西","parent":"3956"},
         | 
| 556 | 
            -
            "6886":{"name":"英属印度洋领土","parent":"3956"},
         | 
| 557 | 
            -
            "6887":{"name":"文莱","parent":"3956"},
         | 
| 558 | 
            -
            "6888":{"name":"保加利亚","parent":"3956"},
         | 
| 559 | 
            -
            "6889":{"name":"布基纳法索","parent":"3956"},
         | 
| 560 | 
            -
            "6890":{"name":"布隆迪","parent":"3956"},
         | 
| 561 | 
            -
            "6891":{"name":"柬埔寨","parent":"3956"},
         | 
| 562 | 
            -
            "6892":{"name":"喀麦隆","parent":"3956"},
         | 
| 563 | 
            -
            "6893":{"name":"佛得角","parent":"3956"},
         | 
| 564 | 
            -
            "6894":{"name":"开曼群岛","parent":"3956"},
         | 
| 565 | 
            -
            "6895":{"name":"中非","parent":"3956"},
         | 
| 566 | 
            -
            "6896":{"name":"乍得","parent":"3956"},
         | 
| 567 | 
            -
            "6897":{"name":"智利","parent":"3956"},
         | 
| 568 | 
            -
            "6898":{"name":"圣诞岛","parent":"3956"},
         | 
| 569 | 
            -
            "6899":{"name":"科科斯(基林)群岛","parent":"3956"},
         | 
| 570 | 
            -
            "6900":{"name":"哥伦比亚","parent":"3956"},
         | 
| 571 | 
            -
            "6901":{"name":"科摩罗","parent":"3956"},
         | 
| 572 | 
            -
            "6902":{"name":"刚果(布)","parent":"3956"},
         | 
| 573 | 
            -
            "6903":{"name":"刚果(金)","parent":"3956"},
         | 
| 574 | 
            -
            "6904":{"name":"库克群岛","parent":"3956"},
         | 
| 575 | 
            -
            "6905":{"name":"哥斯达黎加","parent":"3956"},
         | 
| 576 | 
            -
            "6906":{"name":"科特迪瓦","parent":"3956"},
         | 
| 577 | 
            -
            "6907":{"name":"克罗地亚","parent":"3956"},
         | 
| 578 | 
            -
            "6908":{"name":"古巴","parent":"3956"},
         | 
| 579 | 
            -
            "6909":{"name":"塞浦路斯","parent":"3956"},
         | 
| 580 | 
            -
            "6910":{"name":"捷克","parent":"3956"},
         | 
| 581 | 
            -
            "6911":{"name":"丹麦","parent":"3956"},
         | 
| 582 | 
            -
            "6912":{"name":"吉布提","parent":"3956"},
         | 
| 583 | 
            -
            "6913":{"name":"多米尼克","parent":"3956"},
         | 
| 584 | 
            -
            "6914":{"name":"多米尼加共和国","parent":"3956"},
         | 
| 585 | 
            -
            "6915":{"name":"东帝汶","parent":"3956"},
         | 
| 586 | 
            -
            "6916":{"name":"厄瓜多尔","parent":"3956"},
         | 
| 587 | 
            -
            "6917":{"name":"埃及","parent":"3956"},
         | 
| 588 | 
            -
            "6918":{"name":"萨尔瓦多","parent":"3956"},
         | 
| 589 | 
            -
            "6919":{"name":"赤道几内亚","parent":"3956"},
         | 
| 590 | 
            -
            "6920":{"name":"厄立特里亚","parent":"3956"},
         | 
| 591 | 
            -
            "6921":{"name":"爱沙尼亚","parent":"3956"},
         | 
| 592 | 
            -
            "6922":{"name":"埃塞俄比亚","parent":"3956"},
         | 
| 593 | 
            -
            "6923":{"name":"福克兰群岛(马尔维纳斯)","parent":"3956"},
         | 
| 594 | 
            -
            "6924":{"name":"法罗群岛","parent":"3956"},
         | 
| 595 | 
            -
            "6925":{"name":"斐济","parent":"3956"},
         | 
| 596 | 
            -
            "6926":{"name":"芬兰","parent":"3956"},
         | 
| 597 | 
            -
            "6927":{"name":"法属圭亚那","parent":"3956"},
         | 
| 598 | 
            -
            "6928":{"name":"法属波利尼西亚","parent":"3956"},
         | 
| 599 | 
            -
            "6929":{"name":"法属南部领土","parent":"3956"},
         | 
| 600 | 
            -
            "6930":{"name":"加蓬","parent":"3956"},
         | 
| 601 | 
            -
            "6931":{"name":"冈比亚","parent":"3956"},
         | 
| 602 | 
            -
            "6932":{"name":"格鲁吉亚","parent":"3956"},
         | 
| 603 | 
            -
            "6933":{"name":"加纳","parent":"3956"},
         | 
| 604 | 
            -
            "6934":{"name":"直布罗陀","parent":"3956"},
         | 
| 605 | 
            -
            "6935":{"name":"希腊","parent":"3956"},
         | 
| 606 | 
            -
            "6936":{"name":"格陵兰","parent":"3956"},
         | 
| 607 | 
            -
            "6937":{"name":"格林纳达","parent":"3956"},
         | 
| 608 | 
            -
            "6938":{"name":"瓜德罗普","parent":"3956"},
         | 
| 609 | 
            -
            "6939":{"name":"关岛","parent":"3956"},
         | 
| 610 | 
            -
            "6940":{"name":"危地马拉","parent":"3956"},
         | 
| 611 | 
            -
            "6941":{"name":"几内亚","parent":"3956"},
         | 
| 612 | 
            -
            "6942":{"name":"几内亚比绍","parent":"3956"},
         | 
| 613 | 
            -
            "6943":{"name":"圭亚那","parent":"3956"},
         | 
| 614 | 
            -
            "6944":{"name":"海地","parent":"3956"},
         | 
| 615 | 
            -
            "6945":{"name":"赫德岛和麦克唐纳岛","parent":"3956"},
         | 
| 616 | 
            -
            "6946":{"name":"洪都拉斯","parent":"3956"},
         | 
| 617 | 
            -
            "6947":{"name":"匈牙利","parent":"3956"},
         | 
| 618 | 
            -
            "6948":{"name":"冰岛","parent":"3956"},
         | 
| 619 | 
            -
            "6949":{"name":"印度","parent":"3956"},
         | 
| 620 | 
            -
            "6950":{"name":"印度尼西亚","parent":"3956"},
         | 
| 621 | 
            -
            "6951":{"name":"伊朗","parent":"3956"},
         | 
| 622 | 
            -
            "6952":{"name":"伊拉克","parent":"3956"},
         | 
| 623 | 
            -
            "6953":{"name":"爱尔兰","parent":"3956"},
         | 
| 624 | 
            -
            "6954":{"name":"以色列","parent":"3956"},
         | 
| 625 | 
            -
            "6955":{"name":"牙买加","parent":"3956"},
         | 
| 626 | 
            -
            "6956":{"name":"约旦","parent":"3956"},
         | 
| 627 | 
            -
            "6957":{"name":"哈萨克斯坦","parent":"3956"},
         | 
| 628 | 
            -
            "6958":{"name":"肯尼亚","parent":"3956"},
         | 
| 629 | 
            -
            "6959":{"name":"基里巴斯","parent":"3956"},
         | 
| 630 | 
            -
            "6960":{"name":"朝鲜","parent":"3956"},
         | 
| 631 | 
            -
            "6961":{"name":"科威特","parent":"3956"},
         | 
| 632 | 
            -
            "6962":{"name":"吉尔吉斯斯坦","parent":"3956"},
         | 
| 633 | 
            -
            "6963":{"name":"老挝","parent":"3956"},
         | 
| 634 | 
            -
            "6964":{"name":"拉脱维亚","parent":"3956"},
         | 
| 635 | 
            -
            "6965":{"name":"黎巴嫩","parent":"3956"},
         | 
| 636 | 
            -
            "6966":{"name":"莱索托","parent":"3956"},
         | 
| 637 | 
            -
            "6967":{"name":"利比里亚","parent":"3956"},
         | 
| 638 | 
            -
            "6968":{"name":"利比亚","parent":"3956"},
         | 
| 639 | 
            -
            "6969":{"name":"列支敦士登","parent":"3956"},
         | 
| 640 | 
            -
            "6970":{"name":"立陶宛","parent":"3956"},
         | 
| 641 | 
            -
            "6971":{"name":"卢森堡","parent":"3956"},
         | 
| 642 | 
            -
            "6972":{"name":"前南马其顿","parent":"3956"},
         | 
| 643 | 
            -
            "6973":{"name":"马达加斯加","parent":"3956"},
         | 
| 644 | 
            -
            "6974":{"name":"马拉维","parent":"3956"},
         | 
| 645 | 
            -
            "6975":{"name":"马尔代夫","parent":"3956"},
         | 
| 646 | 
            -
            "6976":{"name":"马里","parent":"3956"},
         | 
| 647 | 
            -
            "6977":{"name":"马耳他","parent":"3956"},
         | 
| 648 | 
            -
            "6978":{"name":"马绍尔群岛","parent":"3956"},
         | 
| 649 | 
            -
            "6979":{"name":"马提尼克","parent":"3956"},
         | 
| 650 | 
            -
            "6980":{"name":"毛里塔尼亚","parent":"3956"},
         | 
| 651 | 
            -
            "6981":{"name":"毛里求斯","parent":"3956"},
         | 
| 652 | 
            -
            "6982":{"name":"马约特","parent":"3956"},
         | 
| 653 | 
            -
            "6983":{"name":"墨西哥","parent":"3956"},
         | 
| 654 | 
            -
            "6984":{"name":"密克罗尼西亚联邦","parent":"3956"},
         | 
| 655 | 
            -
            "6985":{"name":"摩尔多瓦","parent":"3956"},
         | 
| 656 | 
            -
            "6986":{"name":"摩纳哥","parent":"3956"},
         | 
| 657 | 
            -
            "6987":{"name":"蒙古","parent":"3956"},
         | 
| 658 | 
            -
            "6988":{"name":"蒙特塞拉特","parent":"3956"},
         | 
| 659 | 
            -
            "6989":{"name":"摩洛哥","parent":"3956"},
         | 
| 660 | 
            -
            "6990":{"name":"莫桑比克","parent":"3956"},
         | 
| 661 | 
            -
            "6991":{"name":"缅甸","parent":"3956"},
         | 
| 662 | 
            -
            "6992":{"name":"纳米比亚","parent":"3956"},
         | 
| 663 | 
            -
            "6993":{"name":"瑙鲁","parent":"3956"},
         | 
| 664 | 
            -
            "6994":{"name":"尼泊尔","parent":"3956"},
         | 
| 665 | 
            -
            "6995":{"name":"荷兰","parent":"3956"},
         | 
| 666 | 
            -
            "6996":{"name":"荷属安的列斯","parent":"3956"},
         | 
| 667 | 
            -
            "6997":{"name":"新喀里多尼亚","parent":"3956"},
         | 
| 668 | 
            -
            "6998":{"name":"新西兰","parent":"3956"},
         | 
| 669 | 
            -
            "6999":{"name":"尼加拉瓜","parent":"3956"},
         | 
| 670 | 
            -
            "7000":{"name":"尼日尔","parent":"3956"},
         | 
| 671 | 
            -
            "7001":{"name":"尼日利亚","parent":"3956"},
         | 
| 672 | 
            -
            "7002":{"name":"纽埃","parent":"3956"},
         | 
| 673 | 
            -
            "7003":{"name":"诺福克岛","parent":"3956"},
         | 
| 674 | 
            -
            "7004":{"name":"北马里亚纳","parent":"3956"},
         | 
| 675 | 
            -
            "7005":{"name":"挪威","parent":"3956"},
         | 
| 676 | 
            -
            "7006":{"name":"阿曼","parent":"3956"},
         | 
| 677 | 
            -
            "7007":{"name":"巴基斯坦","parent":"3956"},
         | 
| 678 | 
            -
            "7008":{"name":"帕劳","parent":"3956"},
         | 
| 679 | 
            -
            "7009":{"name":"巴勒斯坦","parent":"3956"},
         | 
| 680 | 
            -
            "7010":{"name":"巴拿马","parent":"3956"},
         | 
| 681 | 
            -
            "7011":{"name":"巴布亚新几内亚","parent":"3956"},
         | 
| 682 | 
            -
            "7012":{"name":"巴拉圭","parent":"3956"},
         | 
| 683 | 
            -
            "7013":{"name":"秘鲁","parent":"3956"},
         | 
| 684 | 
            -
            "7014":{"name":"菲律宾","parent":"3956"},
         | 
| 685 | 
            -
            "7015":{"name":"皮特凯恩群岛","parent":"3956"},
         | 
| 686 | 
            -
            "7016":{"name":"波兰","parent":"3956"},
         | 
| 687 | 
            -
            "7017":{"name":"葡萄牙","parent":"3956"},
         | 
| 688 | 
            -
            "7018":{"name":"波多黎各","parent":"3956"},
         | 
| 689 | 
            -
            "7019":{"name":"卡塔尔","parent":"3956"},
         | 
| 690 | 
            -
            "7020":{"name":"留尼汪","parent":"3956"},
         | 
| 691 | 
            -
            "7021":{"name":"罗马尼亚","parent":"3956"},
         | 
| 692 | 
            -
            "7022":{"name":"俄罗斯联邦","parent":"3956"},
         | 
| 693 | 
            -
            "7023":{"name":"卢旺达","parent":"3956"},
         | 
| 694 | 
            -
            "7024":{"name":"圣赫勒拿","parent":"3956"},
         | 
| 695 | 
            -
            "7025":{"name":"圣基茨和尼维斯","parent":"3956"},
         | 
| 696 | 
            -
            "7026":{"name":"圣卢西亚","parent":"3956"},
         | 
| 697 | 
            -
            "7027":{"name":"圣皮埃尔和密克隆","parent":"3956"},
         | 
| 698 | 
            -
            "7028":{"name":"圣文森特和格林纳丁斯","parent":"3956"},
         | 
| 699 | 
            -
            "7029":{"name":"萨摩亚","parent":"3956"},
         | 
| 700 | 
            -
            "7030":{"name":"圣马力诺","parent":"3956"},
         | 
| 701 | 
            -
            "7031":{"name":"圣多美和普林西比","parent":"3956"},
         | 
| 702 | 
            -
            "7032":{"name":"沙特阿拉伯","parent":"3956"},
         | 
| 703 | 
            -
            "7033":{"name":"塞内加尔","parent":"3956"},
         | 
| 704 | 
            -
            "7034":{"name":"塞舌尔","parent":"3956"},
         | 
| 705 | 
            -
            "7035":{"name":"塞拉利昂","parent":"3956"},
         | 
| 706 | 
            -
            "7036":{"name":"新加坡","parent":"3956"},
         | 
| 707 | 
            -
            "7037":{"name":"斯洛伐克","parent":"3956"},
         | 
| 708 | 
            -
            "7038":{"name":"斯洛文尼亚","parent":"3956"},
         | 
| 709 | 
            -
            "7039":{"name":"所罗门群岛","parent":"3956"},
         | 
| 710 | 
            -
            "7040":{"name":"索马里","parent":"3956"},
         | 
| 711 | 
            -
            "7041":{"name":"南非","parent":"3956"},
         | 
| 712 | 
            -
            "7042":{"name":"南乔治亚岛和南桑德韦奇岛","parent":"3956"},
         | 
| 713 | 
            -
            "7043":{"name":"斯里兰卡","parent":"3956"},
         | 
| 714 | 
            -
            "7044":{"name":"苏丹","parent":"3956"},
         | 
| 715 | 
            -
            "7045":{"name":"苏里南","parent":"3956"},
         | 
| 716 | 
            -
            "7046":{"name":"斯瓦尔巴群岛","parent":"3956"},
         | 
| 717 | 
            -
            "7047":{"name":"斯威士兰","parent":"3956"},
         | 
| 718 | 
            -
            "7048":{"name":"瑞典","parent":"3956"},
         | 
| 719 | 
            -
            "7049":{"name":"瑞士","parent":"3956"},
         | 
| 720 | 
            -
            "7050":{"name":"叙利亚","parent":"3956"},
         | 
| 721 | 
            -
            "7051":{"name":"塔吉克斯坦","parent":"3956"},
         | 
| 722 | 
            -
            "7052":{"name":"坦桑尼亚","parent":"3956"},
         | 
| 723 | 
            -
            "7053":{"name":"泰国","parent":"3956"},
         | 
| 724 | 
            -
            "7054":{"name":"多哥","parent":"3956"},
         | 
| 725 | 
            -
            "7055":{"name":"托克劳","parent":"3956"},
         | 
| 726 | 
            -
            "7056":{"name":"汤加","parent":"3956"},
         | 
| 727 | 
            -
            "7057":{"name":"特立尼达和多巴哥","parent":"3956"},
         | 
| 728 | 
            -
            "7058":{"name":"突尼斯","parent":"3956"},
         | 
| 729 | 
            -
            "7059":{"name":"土耳其","parent":"3956"},
         | 
| 730 | 
            -
            "7060":{"name":"土库曼斯坦","parent":"3956"},
         | 
| 731 | 
            -
            "7061":{"name":"特克斯科斯群岛","parent":"3956"},
         | 
| 732 | 
            -
            "7062":{"name":"图瓦卢","parent":"3956"},
         | 
| 733 | 
            -
            "7063":{"name":"乌干达","parent":"3956"},
         | 
| 734 | 
            -
            "7064":{"name":"乌克兰","parent":"3956"},
         | 
| 735 | 
            -
            "7065":{"name":"阿联酋","parent":"3956"},
         | 
| 736 | 
            -
            "7066":{"name":"美国本土外小岛屿","parent":"3956"},
         | 
| 737 | 
            -
            "7067":{"name":"乌拉圭","parent":"3956"},
         | 
| 738 | 
            -
            "7068":{"name":"乌兹别克斯坦","parent":"3956"},
         | 
| 739 | 
            -
            "7069":{"name":"瓦努阿图","parent":"3956"},
         | 
| 740 | 
            -
            "7070":{"name":"梵蒂冈","parent":"3956"},
         | 
| 741 | 
            -
            "7071":{"name":"委内瑞拉","parent":"3956"},
         | 
| 742 | 
            -
            "7072":{"name":"越南","parent":"3956"},
         | 
| 743 | 
            -
            "7073":{"name":"英属维尔京群岛","parent":"3956"},
         | 
| 744 | 
            -
            "7074":{"name":"美属维尔京群岛","parent":"3956"},
         | 
| 745 | 
            -
            "7075":{"name":"瓦利斯和富图纳","parent":"3956"},
         | 
| 746 | 
            -
            "7076":{"name":"西撒哈拉","parent":"3956"},
         | 
| 747 | 
            -
            "7077":{"name":"也门","parent":"3956"},
         | 
| 748 | 
            -
            "7078":{"name":"南斯拉夫","parent":"3956"},
         | 
| 749 | 
            -
            "7079":{"name":"赞比亚","parent":"3956"},
         | 
| 750 | 
            -
            "7080":{"name":"津巴布韦","parent":"3956"},
         | 
| 751 | 
            -
            "7081":{"name":"塞尔维亚","parent":"3956"},
         | 
| 752 | 
            -
            "7082":{"name":"雄安新区","parent":"4"},
         | 
| 753 | 
            -
            "7084":{"name":"天门市","parent":"18"}
         | 
| 754 | 
             
            }
         | 
| 755 |  | 
| 756 | 
            -
            NM_SET = set([v["name"] for _,v in TBL.items()])
         | 
|  | |
| 757 |  | 
| 758 | 
             
            def get_names(id):
         | 
| 759 | 
            -
                if not id or str(id).lower() == "none": | 
|  | |
| 760 | 
             
                id = str(id)
         | 
| 761 | 
            -
                if not re.match("[0-9]+$", id.strip()): | 
|  | |
| 762 | 
             
                nms = []
         | 
| 763 | 
             
                d = TBL.get(id)
         | 
| 764 | 
            -
                if not d: | 
|  | |
| 765 | 
             
                nms.append(d["name"])
         | 
| 766 | 
             
                p = get_names(d["parent"])
         | 
| 767 | 
            -
                if p: | 
|  | |
| 768 | 
             
                return nms
         | 
| 769 |  | 
| 770 | 
            -
             | 
|  | |
| 771 | 
             
            def isName(nm):
         | 
| 772 | 
            -
                if nm in NM_SET: | 
| 773 | 
            -
             | 
| 774 | 
            -
                if  | 
|  | |
|  | |
|  | |
| 775 | 
             
                return False
         | 
|  | |
| 10 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
| 13 | 
            +
            import re
         | 
| 14 |  | 
| 15 | 
             
            TBL = {
         | 
| 16 | 
            +
                "2": {"name": "北京", "parent": "1"},
         | 
| 17 | 
            +
                "3": {"name": "天津", "parent": "1"},
         | 
| 18 | 
            +
                "4": {"name": "河北", "parent": "1"},
         | 
| 19 | 
            +
                "5": {"name": "山西", "parent": "1"},
         | 
| 20 | 
            +
                "6": {"name": "内蒙古", "parent": "1"},
         | 
| 21 | 
            +
                "7": {"name": "辽宁", "parent": "1"},
         | 
| 22 | 
            +
                "8": {"name": "吉林", "parent": "1"},
         | 
| 23 | 
            +
                "9": {"name": "黑龙江", "parent": "1"},
         | 
| 24 | 
            +
                "10": {"name": "上海", "parent": "1"},
         | 
| 25 | 
            +
                "11": {"name": "江苏", "parent": "1"},
         | 
| 26 | 
            +
                "12": {"name": "浙江", "parent": "1"},
         | 
| 27 | 
            +
                "13": {"name": "安徽", "parent": "1"},
         | 
| 28 | 
            +
                "14": {"name": "福建", "parent": "1"},
         | 
| 29 | 
            +
                "15": {"name": "江西", "parent": "1"},
         | 
| 30 | 
            +
                "16": {"name": "山东", "parent": "1"},
         | 
| 31 | 
            +
                "17": {"name": "河南", "parent": "1"},
         | 
| 32 | 
            +
                "18": {"name": "湖北", "parent": "1"},
         | 
| 33 | 
            +
                "19": {"name": "湖南", "parent": "1"},
         | 
| 34 | 
            +
                "20": {"name": "广东", "parent": "1"},
         | 
| 35 | 
            +
                "21": {"name": "广西", "parent": "1"},
         | 
| 36 | 
            +
                "22": {"name": "海南", "parent": "1"},
         | 
| 37 | 
            +
                "23": {"name": "重庆", "parent": "1"},
         | 
| 38 | 
            +
                "24": {"name": "四川", "parent": "1"},
         | 
| 39 | 
            +
                "25": {"name": "贵州", "parent": "1"},
         | 
| 40 | 
            +
                "26": {"name": "云南", "parent": "1"},
         | 
| 41 | 
            +
                "27": {"name": "西藏", "parent": "1"},
         | 
| 42 | 
            +
                "28": {"name": "陕西", "parent": "1"},
         | 
| 43 | 
            +
                "29": {"name": "甘肃", "parent": "1"},
         | 
| 44 | 
            +
                "30": {"name": "青海", "parent": "1"},
         | 
| 45 | 
            +
                "31": {"name": "宁夏", "parent": "1"},
         | 
| 46 | 
            +
                "32": {"name": "新疆", "parent": "1"},
         | 
| 47 | 
            +
                "33": {"name": "北京市", "parent": "2"},
         | 
| 48 | 
            +
                "34": {"name": "天津市", "parent": "3"},
         | 
| 49 | 
            +
                "35": {"name": "石家庄市", "parent": "4"},
         | 
| 50 | 
            +
                "36": {"name": "唐山市", "parent": "4"},
         | 
| 51 | 
            +
                "37": {"name": "秦皇岛市", "parent": "4"},
         | 
| 52 | 
            +
                "38": {"name": "邯郸市", "parent": "4"},
         | 
| 53 | 
            +
                "39": {"name": "邢台市", "parent": "4"},
         | 
| 54 | 
            +
                "40": {"name": "保定市", "parent": "4"},
         | 
| 55 | 
            +
                "41": {"name": "张家口市", "parent": "4"},
         | 
| 56 | 
            +
                "42": {"name": "承德市", "parent": "4"},
         | 
| 57 | 
            +
                "43": {"name": "沧州市", "parent": "4"},
         | 
| 58 | 
            +
                "44": {"name": "廊��市", "parent": "4"},
         | 
| 59 | 
            +
                "45": {"name": "衡水市", "parent": "4"},
         | 
| 60 | 
            +
                "46": {"name": "太原市", "parent": "5"},
         | 
| 61 | 
            +
                "47": {"name": "大同市", "parent": "5"},
         | 
| 62 | 
            +
                "48": {"name": "阳泉市", "parent": "5"},
         | 
| 63 | 
            +
                "49": {"name": "长治市", "parent": "5"},
         | 
| 64 | 
            +
                "50": {"name": "晋城市", "parent": "5"},
         | 
| 65 | 
            +
                "51": {"name": "朔州市", "parent": "5"},
         | 
| 66 | 
            +
                "52": {"name": "晋中市", "parent": "5"},
         | 
| 67 | 
            +
                "53": {"name": "运城市", "parent": "5"},
         | 
| 68 | 
            +
                "54": {"name": "忻州市", "parent": "5"},
         | 
| 69 | 
            +
                "55": {"name": "临汾市", "parent": "5"},
         | 
| 70 | 
            +
                "56": {"name": "吕梁市", "parent": "5"},
         | 
| 71 | 
            +
                "57": {"name": "呼和浩特市", "parent": "6"},
         | 
| 72 | 
            +
                "58": {"name": "包头市", "parent": "6"},
         | 
| 73 | 
            +
                "59": {"name": "乌海市", "parent": "6"},
         | 
| 74 | 
            +
                "60": {"name": "赤峰市", "parent": "6"},
         | 
| 75 | 
            +
                "61": {"name": "通辽市", "parent": "6"},
         | 
| 76 | 
            +
                "62": {"name": "鄂尔多斯市", "parent": "6"},
         | 
| 77 | 
            +
                "63": {"name": "呼伦贝尔市", "parent": "6"},
         | 
| 78 | 
            +
                "64": {"name": "巴彦淖尔市", "parent": "6"},
         | 
| 79 | 
            +
                "65": {"name": "乌兰察布市", "parent": "6"},
         | 
| 80 | 
            +
                "66": {"name": "兴安盟", "parent": "6"},
         | 
| 81 | 
            +
                "67": {"name": "锡林郭勒盟", "parent": "6"},
         | 
| 82 | 
            +
                "68": {"name": "阿拉善盟", "parent": "6"},
         | 
| 83 | 
            +
                "69": {"name": "沈阳市", "parent": "7"},
         | 
| 84 | 
            +
                "70": {"name": "大连市", "parent": "7"},
         | 
| 85 | 
            +
                "71": {"name": "鞍山市", "parent": "7"},
         | 
| 86 | 
            +
                "72": {"name": "抚顺市", "parent": "7"},
         | 
| 87 | 
            +
                "73": {"name": "本溪市", "parent": "7"},
         | 
| 88 | 
            +
                "74": {"name": "丹东市", "parent": "7"},
         | 
| 89 | 
            +
                "75": {"name": "锦州市", "parent": "7"},
         | 
| 90 | 
            +
                "76": {"name": "营口市", "parent": "7"},
         | 
| 91 | 
            +
                "77": {"name": "阜新市", "parent": "7"},
         | 
| 92 | 
            +
                "78": {"name": "辽阳市", "parent": "7"},
         | 
| 93 | 
            +
                "79": {"name": "盘锦市", "parent": "7"},
         | 
| 94 | 
            +
                "80": {"name": "铁岭市", "parent": "7"},
         | 
| 95 | 
            +
                "81": {"name": "朝阳市", "parent": "7"},
         | 
| 96 | 
            +
                "82": {"name": "葫芦岛市", "parent": "7"},
         | 
| 97 | 
            +
                "83": {"name": "长春市", "parent": "8"},
         | 
| 98 | 
            +
                "84": {"name": "吉林市", "parent": "8"},
         | 
| 99 | 
            +
                "85": {"name": "四平市", "parent": "8"},
         | 
| 100 | 
            +
                "86": {"name": "辽源市", "parent": "8"},
         | 
| 101 | 
            +
                "87": {"name": "通化市", "parent": "8"},
         | 
| 102 | 
            +
                "88": {"name": "白山市", "parent": "8"},
         | 
| 103 | 
            +
                "89": {"name": "松原市", "parent": "8"},
         | 
| 104 | 
            +
                "90": {"name": "白城市", "parent": "8"},
         | 
| 105 | 
            +
                "91": {"name": "延边朝鲜族自治州", "parent": "8"},
         | 
| 106 | 
            +
                "92": {"name": "哈尔滨市", "parent": "9"},
         | 
| 107 | 
            +
                "93": {"name": "齐齐哈尔市", "parent": "9"},
         | 
| 108 | 
            +
                "94": {"name": "鸡西市", "parent": "9"},
         | 
| 109 | 
            +
                "95": {"name": "鹤岗市", "parent": "9"},
         | 
| 110 | 
            +
                "96": {"name": "双鸭山市", "parent": "9"},
         | 
| 111 | 
            +
                "97": {"name": "大庆市", "parent": "9"},
         | 
| 112 | 
            +
                "98": {"name": "伊春市", "parent": "9"},
         | 
| 113 | 
            +
                "99": {"name": "佳木斯市", "parent": "9"},
         | 
| 114 | 
            +
                "100": {"name": "七台河市", "parent": "9"},
         | 
| 115 | 
            +
                "101": {"name": "牡丹江市", "parent": "9"},
         | 
| 116 | 
            +
                "102": {"name": "黑河市", "parent": "9"},
         | 
| 117 | 
            +
                "103": {"name": "绥化市", "parent": "9"},
         | 
| 118 | 
            +
                "104": {"name": "大兴安岭地区", "parent": "9"},
         | 
| 119 | 
            +
                "105": {"name": "上海市", "parent": "10"},
         | 
| 120 | 
            +
                "106": {"name": "南京市", "parent": "11"},
         | 
| 121 | 
            +
                "107": {"name": "无锡市", "parent": "11"},
         | 
| 122 | 
            +
                "108": {"name": "徐州市", "parent": "11"},
         | 
| 123 | 
            +
                "109": {"name": "常州市", "parent": "11"},
         | 
| 124 | 
            +
                "110": {"name": "苏州市", "parent": "11"},
         | 
| 125 | 
            +
                "111": {"name": "南通市", "parent": "11"},
         | 
| 126 | 
            +
                "112": {"name": "连云港市", "parent": "11"},
         | 
| 127 | 
            +
                "113": {"name": "淮安市", "parent": "11"},
         | 
| 128 | 
            +
                "114": {"name": "盐城市", "parent": "11"},
         | 
| 129 | 
            +
                "115": {"name": "扬州市", "parent": "11"},
         | 
| 130 | 
            +
                "116": {"name": "镇江市", "parent": "11"},
         | 
| 131 | 
            +
                "117": {"name": "泰州市", "parent": "11"},
         | 
| 132 | 
            +
                "118": {"name": "宿迁市", "parent": "11"},
         | 
| 133 | 
            +
                "119": {"name": "杭州市", "parent": "12"},
         | 
| 134 | 
            +
                "120": {"name": "宁波市", "parent": "12"},
         | 
| 135 | 
            +
                "121": {"name": "温州市", "parent": "12"},
         | 
| 136 | 
            +
                "122": {"name": "嘉兴市", "parent": "12"},
         | 
| 137 | 
            +
                "123": {"name": "湖州市", "parent": "12"},
         | 
| 138 | 
            +
                "124": {"name": "绍兴市", "parent": "12"},
         | 
| 139 | 
            +
                "125": {"name": "金华市", "parent": "12"},
         | 
| 140 | 
            +
                "126": {"name": "衢州市", "parent": "12"},
         | 
| 141 | 
            +
                "127": {"name": "舟山市", "parent": "12"},
         | 
| 142 | 
            +
                "128": {"name": "台州市", "parent": "12"},
         | 
| 143 | 
            +
                "129": {"name": "丽水市", "parent": "12"},
         | 
| 144 | 
            +
                "130": {"name": "合肥市", "parent": "13"},
         | 
| 145 | 
            +
                "131": {"name": "芜湖市", "parent": "13"},
         | 
| 146 | 
            +
                "132": {"name": "蚌埠市", "parent": "13"},
         | 
| 147 | 
            +
                "133": {"name": "淮南市", "parent": "13"},
         | 
| 148 | 
            +
                "134": {"name": "马鞍山市", "parent": "13"},
         | 
| 149 | 
            +
                "135": {"name": "淮北市", "parent": "13"},
         | 
| 150 | 
            +
                "136": {"name": "铜陵市", "parent": "13"},
         | 
| 151 | 
            +
                "137": {"name": "安庆市", "parent": "13"},
         | 
| 152 | 
            +
                "138": {"name": "黄山市", "parent": "13"},
         | 
| 153 | 
            +
                "139": {"name": "滁州市", "parent": "13"},
         | 
| 154 | 
            +
                "140": {"name": "阜阳市", "parent": "13"},
         | 
| 155 | 
            +
                "141": {"name": "宿州市", "parent": "13"},
         | 
| 156 | 
            +
                "143": {"name": "六安市", "parent": "13"},
         | 
| 157 | 
            +
                "144": {"name": "亳州市", "parent": "13"},
         | 
| 158 | 
            +
                "145": {"name": "池州市", "parent": "13"},
         | 
| 159 | 
            +
                "146": {"name": "宣城市", "parent": "13"},
         | 
| 160 | 
            +
                "147": {"name": "福州市", "parent": "14"},
         | 
| 161 | 
            +
                "148": {"name": "厦门市", "parent": "14"},
         | 
| 162 | 
            +
                "149": {"name": "莆田市", "parent": "14"},
         | 
| 163 | 
            +
                "150": {"name": "三明市", "parent": "14"},
         | 
| 164 | 
            +
                "151": {"name": "泉州市", "parent": "14"},
         | 
| 165 | 
            +
                "152": {"name": "漳州市", "parent": "14"},
         | 
| 166 | 
            +
                "153": {"name": "南平市", "parent": "14"},
         | 
| 167 | 
            +
                "154": {"name": "龙岩市", "parent": "14"},
         | 
| 168 | 
            +
                "155": {"name": "宁德市", "parent": "14"},
         | 
| 169 | 
            +
                "156": {"name": "南昌市", "parent": "15"},
         | 
| 170 | 
            +
                "157": {"name": "景德镇市", "parent": "15"},
         | 
| 171 | 
            +
                "158": {"name": "萍乡市", "parent": "15"},
         | 
| 172 | 
            +
                "159": {"name": "九江市", "parent": "15"},
         | 
| 173 | 
            +
                "160": {"name": "新余市", "parent": "15"},
         | 
| 174 | 
            +
                "161": {"name": "鹰潭市", "parent": "15"},
         | 
| 175 | 
            +
                "162": {"name": "赣州市", "parent": "15"},
         | 
| 176 | 
            +
                "163": {"name": "吉安市", "parent": "15"},
         | 
| 177 | 
            +
                "164": {"name": "宜春市", "parent": "15"},
         | 
| 178 | 
            +
                "165": {"name": "抚州市", "parent": "15"},
         | 
| 179 | 
            +
                "166": {"name": "上饶市", "parent": "15"},
         | 
| 180 | 
            +
                "167": {"name": "济南市", "parent": "16"},
         | 
| 181 | 
            +
                "168": {"name": "青岛市", "parent": "16"},
         | 
| 182 | 
            +
                "169": {"name": "淄博市", "parent": "16"},
         | 
| 183 | 
            +
                "170": {"name": "枣庄市", "parent": "16"},
         | 
| 184 | 
            +
                "171": {"name": "东营市", "parent": "16"},
         | 
| 185 | 
            +
                "172": {"name": "烟台市", "parent": "16"},
         | 
| 186 | 
            +
                "173": {"name": "潍坊市", "parent": "16"},
         | 
| 187 | 
            +
                "174": {"name": "济宁市", "parent": "16"},
         | 
| 188 | 
            +
                "175": {"name": "泰安市", "parent": "16"},
         | 
| 189 | 
            +
                "176": {"name": "威海市", "parent": "16"},
         | 
| 190 | 
            +
                "177": {"name": "日照市", "parent": "16"},
         | 
| 191 | 
            +
                "179": {"name": "临沂市", "parent": "16"},
         | 
| 192 | 
            +
                "180": {"name": "德州市", "parent": "16"},
         | 
| 193 | 
            +
                "181": {"name": "聊城市", "parent": "16"},
         | 
| 194 | 
            +
                "182": {"name": "滨州市", "parent": "16"},
         | 
| 195 | 
            +
                "183": {"name": "菏泽市", "parent": "16"},
         | 
| 196 | 
            +
                "184": {"name": "郑州市", "parent": "17"},
         | 
| 197 | 
            +
                "185": {"name": "开封市", "parent": "17"},
         | 
| 198 | 
            +
                "186": {"name": "洛阳市", "parent": "17"},
         | 
| 199 | 
            +
                "187": {"name": "平顶山市", "parent": "17"},
         | 
| 200 | 
            +
                "188": {"name": "安阳市", "parent": "17"},
         | 
| 201 | 
            +
                "189": {"name": "鹤壁市", "parent": "17"},
         | 
| 202 | 
            +
                "190": {"name": "新乡市", "parent": "17"},
         | 
| 203 | 
            +
                "191": {"name": "焦作市", "parent": "17"},
         | 
| 204 | 
            +
                "192": {"name": "濮阳市", "parent": "17"},
         | 
| 205 | 
            +
                "193": {"name": "许昌市", "parent": "17"},
         | 
| 206 | 
            +
                "194": {"name": "漯河市", "parent": "17"},
         | 
| 207 | 
            +
                "195": {"name": "三门峡市", "parent": "17"},
         | 
| 208 | 
            +
                "196": {"name": "南阳市", "parent": "17"},
         | 
| 209 | 
            +
                "197": {"name": "商丘市", "parent": "17"},
         | 
| 210 | 
            +
                "198": {"name": "信阳市", "parent": "17"},
         | 
| 211 | 
            +
                "199": {"name": "周口市", "parent": "17"},
         | 
| 212 | 
            +
                "200": {"name": "驻马店市", "parent": "17"},
         | 
| 213 | 
            +
                "201": {"name": "武汉市", "parent": "18"},
         | 
| 214 | 
            +
                "202": {"name": "黄石市", "parent": "18"},
         | 
| 215 | 
            +
                "203": {"name": "十堰市", "parent": "18"},
         | 
| 216 | 
            +
                "204": {"name": "宜昌市", "parent": "18"},
         | 
| 217 | 
            +
                "205": {"name": "襄阳市", "parent": "18"},
         | 
| 218 | 
            +
                "206": {"name": "鄂州市", "parent": "18"},
         | 
| 219 | 
            +
                "207": {"name": "荆门市", "parent": "18"},
         | 
| 220 | 
            +
                "208": {"name": "孝感市", "parent": "18"},
         | 
| 221 | 
            +
                "209": {"name": "荆州市", "parent": "18"},
         | 
| 222 | 
            +
                "210": {"name": "黄冈市", "parent": "18"},
         | 
| 223 | 
            +
                "211": {"name": "咸宁市", "parent": "18"},
         | 
| 224 | 
            +
                "212": {"name": "随州市", "parent": "18"},
         | 
| 225 | 
            +
                "213": {"name": "恩施土家族苗族自治州", "parent": "18"},
         | 
| 226 | 
            +
                "215": {"name": "长沙市", "parent": "19"},
         | 
| 227 | 
            +
                "216": {"name": "株洲市", "parent": "19"},
         | 
| 228 | 
            +
                "217": {"name": "湘潭市", "parent": "19"},
         | 
| 229 | 
            +
                "218": {"name": "衡阳市", "parent": "19"},
         | 
| 230 | 
            +
                "219": {"name": "邵阳市", "parent": "19"},
         | 
| 231 | 
            +
                "220": {"name": "岳阳市", "parent": "19"},
         | 
| 232 | 
            +
                "221": {"name": "常德市", "parent": "19"},
         | 
| 233 | 
            +
                "222": {"name": "张家界市", "parent": "19"},
         | 
| 234 | 
            +
                "223": {"name": "益阳市", "parent": "19"},
         | 
| 235 | 
            +
                "224": {"name": "郴州市", "parent": "19"},
         | 
| 236 | 
            +
                "225": {"name": "永州市", "parent": "19"},
         | 
| 237 | 
            +
                "226": {"name": "怀化市", "parent": "19"},
         | 
| 238 | 
            +
                "227": {"name": "娄底市", "parent": "19"},
         | 
| 239 | 
            +
                "228": {"name": "湘西土家族苗族自治州", "parent": "19"},
         | 
| 240 | 
            +
                "229": {"name": "广州市", "parent": "20"},
         | 
| 241 | 
            +
                "230": {"name": "韶关市", "parent": "20"},
         | 
| 242 | 
            +
                "231": {"name": "深圳市", "parent": "20"},
         | 
| 243 | 
            +
                "232": {"name": "珠海市", "parent": "20"},
         | 
| 244 | 
            +
                "233": {"name": "汕头市", "parent": "20"},
         | 
| 245 | 
            +
                "234": {"name": "佛山市", "parent": "20"},
         | 
| 246 | 
            +
                "235": {"name": "江门市", "parent": "20"},
         | 
| 247 | 
            +
                "236": {"name": "湛江市", "parent": "20"},
         | 
| 248 | 
            +
                "237": {"name": "茂名市", "parent": "20"},
         | 
| 249 | 
            +
                "238": {"name": "肇庆市", "parent": "20"},
         | 
| 250 | 
            +
                "239": {"name": "惠州市", "parent": "20"},
         | 
| 251 | 
            +
                "240": {"name": "梅州市", "parent": "20"},
         | 
| 252 | 
            +
                "241": {"name": "汕尾市", "parent": "20"},
         | 
| 253 | 
            +
                "242": {"name": "河源市", "parent": "20"},
         | 
| 254 | 
            +
                "243": {"name": "阳江市", "parent": "20"},
         | 
| 255 | 
            +
                "244": {"name": "清远市", "parent": "20"},
         | 
| 256 | 
            +
                "245": {"name": "东莞市", "parent": "20"},
         | 
| 257 | 
            +
                "246": {"name": "中山市", "parent": "20"},
         | 
| 258 | 
            +
                "247": {"name": "潮州市", "parent": "20"},
         | 
| 259 | 
            +
                "248": {"name": "揭阳市", "parent": "20"},
         | 
| 260 | 
            +
                "249": {"name": "云浮市", "parent": "20"},
         | 
| 261 | 
            +
                "250": {"name": "南宁市", "parent": "21"},
         | 
| 262 | 
            +
                "251": {"name": "柳州市", "parent": "21"},
         | 
| 263 | 
            +
                "252": {"name": "桂林市", "parent": "21"},
         | 
| 264 | 
            +
                "253": {"name": "梧州市", "parent": "21"},
         | 
| 265 | 
            +
                "254": {"name": "北海市", "parent": "21"},
         | 
| 266 | 
            +
                "255": {"name": "防城港市", "parent": "21"},
         | 
| 267 | 
            +
                "256": {"name": "钦州市", "parent": "21"},
         | 
| 268 | 
            +
                "257": {"name": "贵港市", "parent": "21"},
         | 
| 269 | 
            +
                "258": {"name": "玉林市", "parent": "21"},
         | 
| 270 | 
            +
                "259": {"name": "百色市", "parent": "21"},
         | 
| 271 | 
            +
                "260": {"name": "贺州市", "parent": "21"},
         | 
| 272 | 
            +
                "261": {"name": "河池市", "parent": "21"},
         | 
| 273 | 
            +
                "262": {"name": "来宾市", "parent": "21"},
         | 
| 274 | 
            +
                "263": {"name": "崇左市", "parent": "21"},
         | 
| 275 | 
            +
                "264": {"name": "海口市", "parent": "22"},
         | 
| 276 | 
            +
                "265": {"name": "三亚市", "parent": "22"},
         | 
| 277 | 
            +
                "267": {"name": "重庆市", "parent": "23"},
         | 
| 278 | 
            +
                "268": {"name": "成都市", "parent": "24"},
         | 
| 279 | 
            +
                "269": {"name": "自贡市", "parent": "24"},
         | 
| 280 | 
            +
                "270": {"name": "攀枝花市", "parent": "24"},
         | 
| 281 | 
            +
                "271": {"name": "泸州市", "parent": "24"},
         | 
| 282 | 
            +
                "272": {"name": "德阳市", "parent": "24"},
         | 
| 283 | 
            +
                "273": {"name": "绵阳市", "parent": "24"},
         | 
| 284 | 
            +
                "274": {"name": "广元市", "parent": "24"},
         | 
| 285 | 
            +
                "275": {"name": "遂宁市", "parent": "24"},
         | 
| 286 | 
            +
                "276": {"name": "内江市", "parent": "24"},
         | 
| 287 | 
            +
                "277": {"name": "乐山市", "parent": "24"},
         | 
| 288 | 
            +
                "278": {"name": "南充市", "parent": "24"},
         | 
| 289 | 
            +
                "279": {"name": "眉山市", "parent": "24"},
         | 
| 290 | 
            +
                "280": {"name": "宜宾市", "parent": "24"},
         | 
| 291 | 
            +
                "281": {"name": "广安市", "parent": "24"},
         | 
| 292 | 
            +
                "282": {"name": "达州市", "parent": "24"},
         | 
| 293 | 
            +
                "283": {"name": "雅安市", "parent": "24"},
         | 
| 294 | 
            +
                "284": {"name": "巴中市", "parent": "24"},
         | 
| 295 | 
            +
                "285": {"name": "资阳市", "parent": "24"},
         | 
| 296 | 
            +
                "286": {"name": "阿坝藏族羌族自治州", "parent": "24"},
         | 
| 297 | 
            +
                "287": {"name": "甘孜藏族自治州", "parent": "24"},
         | 
| 298 | 
            +
                "288": {"name": "凉山彝族自治州", "parent": "24"},
         | 
| 299 | 
            +
                "289": {"name": "贵阳市", "parent": "25"},
         | 
| 300 | 
            +
                "290": {"name": "六盘水市", "parent": "25"},
         | 
| 301 | 
            +
                "291": {"name": "遵义市", "parent": "25"},
         | 
| 302 | 
            +
                "292": {"name": "安顺市", "parent": "25"},
         | 
| 303 | 
            +
                "293": {"name": "铜仁市", "parent": "25"},
         | 
| 304 | 
            +
                "294": {"name": "黔西南布依族苗族自治州", "parent": "25"},
         | 
| 305 | 
            +
                "295": {"name": "毕节市", "parent": "25"},
         | 
| 306 | 
            +
                "296": {"name": "黔东南苗族侗族自治州", "parent": "25"},
         | 
| 307 | 
            +
                "297": {"name": "黔南布依族苗族自治州", "parent": "25"},
         | 
| 308 | 
            +
                "298": {"name": "昆明市", "parent": "26"},
         | 
| 309 | 
            +
                "299": {"name": "曲靖市", "parent": "26"},
         | 
| 310 | 
            +
                "300": {"name": "玉溪市", "parent": "26"},
         | 
| 311 | 
            +
                "301": {"name": "保山市", "parent": "26"},
         | 
| 312 | 
            +
                "302": {"name": "昭通市", "parent": "26"},
         | 
| 313 | 
            +
                "303": {"name": "丽江市", "parent": "26"},
         | 
| 314 | 
            +
                "304": {"name": "普洱市", "parent": "26"},
         | 
| 315 | 
            +
                "305": {"name": "临沧市", "parent": "26"},
         | 
| 316 | 
            +
                "306": {"name": "楚雄彝族自治州", "parent": "26"},
         | 
| 317 | 
            +
                "307": {"name": "红河哈尼族彝族自治州", "parent": "26"},
         | 
| 318 | 
            +
                "308": {"name": "文山壮族苗族自治州", "parent": "26"},
         | 
| 319 | 
            +
                "309": {"name": "西双版纳傣族自治州", "parent": "26"},
         | 
| 320 | 
            +
                "310": {"name": "大理白族自治州", "parent": "26"},
         | 
| 321 | 
            +
                "311": {"name": "德宏傣族景颇族自治州", "parent": "26"},
         | 
| 322 | 
            +
                "312": {"name": "怒江傈僳族自治州", "parent": "26"},
         | 
| 323 | 
            +
                "313": {"name": "迪庆藏族自治州", "parent": "26"},
         | 
| 324 | 
            +
                "314": {"name": "拉萨市", "parent": "27"},
         | 
| 325 | 
            +
                "315": {"name": "昌都市", "parent": "27"},
         | 
| 326 | 
            +
                "316": {"name": "山南市", "parent": "27"},
         | 
| 327 | 
            +
                "317": {"name": "日喀则市", "parent": "27"},
         | 
| 328 | 
            +
                "318": {"name": "那曲市", "parent": "27"},
         | 
| 329 | 
            +
                "319": {"name": "阿里地区", "parent": "27"},
         | 
| 330 | 
            +
                "320": {"name": "林芝市", "parent": "27"},
         | 
| 331 | 
            +
                "321": {"name": "西安市", "parent": "28"},
         | 
| 332 | 
            +
                "322": {"name": "铜川市", "parent": "28"},
         | 
| 333 | 
            +
                "323": {"name": "宝鸡市", "parent": "28"},
         | 
| 334 | 
            +
                "324": {"name": "咸阳市", "parent": "28"},
         | 
| 335 | 
            +
                "325": {"name": "渭南市", "parent": "28"},
         | 
| 336 | 
            +
                "326": {"name": "延安市", "parent": "28"},
         | 
| 337 | 
            +
                "327": {"name": "汉中市", "parent": "28"},
         | 
| 338 | 
            +
                "328": {"name": "榆林市", "parent": "28"},
         | 
| 339 | 
            +
                "329": {"name": "安康市", "parent": "28"},
         | 
| 340 | 
            +
                "330": {"name": "商洛市", "parent": "28"},
         | 
| 341 | 
            +
                "331": {"name": "兰州市", "parent": "29"},
         | 
| 342 | 
            +
                "332": {"name": "嘉峪关市", "parent": "29"},
         | 
| 343 | 
            +
                "333": {"name": "金昌市", "parent": "29"},
         | 
| 344 | 
            +
                "334": {"name": "白银市", "parent": "29"},
         | 
| 345 | 
            +
                "335": {"name": "天水市", "parent": "29"},
         | 
| 346 | 
            +
                "336": {"name": "武威市", "parent": "29"},
         | 
| 347 | 
            +
                "337": {"name": "张掖市", "parent": "29"},
         | 
| 348 | 
            +
                "338": {"name": "平凉市", "parent": "29"},
         | 
| 349 | 
            +
                "339": {"name": "酒泉市", "parent": "29"},
         | 
| 350 | 
            +
                "340": {"name": "庆阳市", "parent": "29"},
         | 
| 351 | 
            +
                "341": {"name": "定西市", "parent": "29"},
         | 
| 352 | 
            +
                "342": {"name": "陇南市", "parent": "29"},
         | 
| 353 | 
            +
                "343": {"name": "临夏回族自治州", "parent": "29"},
         | 
| 354 | 
            +
                "344": {"name": "甘南藏族自治州", "parent": "29"},
         | 
| 355 | 
            +
                "345": {"name": "西宁市", "parent": "30"},
         | 
| 356 | 
            +
                "346": {"name": "海东市", "parent": "30"},
         | 
| 357 | 
            +
                "347": {"name": "海北藏族自治州", "parent": "30"},
         | 
| 358 | 
            +
                "348": {"name": "黄南藏族自治州", "parent": "30"},
         | 
| 359 | 
            +
                "349": {"name": "海南藏族自治州", "parent": "30"},
         | 
| 360 | 
            +
                "350": {"name": "果洛藏族自治州", "parent": "30"},
         | 
| 361 | 
            +
                "351": {"name": "玉树藏族自治州", "parent": "30"},
         | 
| 362 | 
            +
                "352": {"name": "海西蒙古族藏族自治州", "parent": "30"},
         | 
| 363 | 
            +
                "353": {"name": "银川市", "parent": "31"},
         | 
| 364 | 
            +
                "354": {"name": "石嘴山市", "parent": "31"},
         | 
| 365 | 
            +
                "355": {"name": "吴忠市", "parent": "31"},
         | 
| 366 | 
            +
                "356": {"name": "固原市", "parent": "31"},
         | 
| 367 | 
            +
                "357": {"name": "中卫市", "parent": "31"},
         | 
| 368 | 
            +
                "358": {"name": "乌鲁木齐市", "parent": "32"},
         | 
| 369 | 
            +
                "359": {"name": "克拉玛依市", "parent": "32"},
         | 
| 370 | 
            +
                "360": {"name": "吐鲁番市", "parent": "32"},
         | 
| 371 | 
            +
                "361": {"name": "哈密市", "parent": "32"},
         | 
| 372 | 
            +
                "362": {"name": "昌吉回族自治州", "parent": "32"},
         | 
| 373 | 
            +
                "363": {"name": "博尔塔拉蒙古自治州", "parent": "32"},
         | 
| 374 | 
            +
                "364": {"name": "巴音郭楞蒙古自治州", "parent": "32"},
         | 
| 375 | 
            +
                "365": {"name": "阿克苏地区", "parent": "32"},
         | 
| 376 | 
            +
                "366": {"name": "克孜勒苏柯尔克孜自治州", "parent": "32"},
         | 
| 377 | 
            +
                "367": {"name": "喀什地区", "parent": "32"},
         | 
| 378 | 
            +
                "368": {"name": "和田地区", "parent": "32"},
         | 
| 379 | 
            +
                "369": {"name": "伊犁哈萨克自治州", "parent": "32"},
         | 
| 380 | 
            +
                "370": {"name": "塔城地区", "parent": "32"},
         | 
| 381 | 
            +
                "371": {"name": "阿勒泰地区", "parent": "32"},
         | 
| 382 | 
            +
                "372": {"name": "新疆省直辖行政单位", "parent": "32"},
         | 
| 383 | 
            +
                "373": {"name": "可克达拉市", "parent": "32"},
         | 
| 384 | 
            +
                "374": {"name": "昆玉市", "parent": "32"},
         | 
| 385 | 
            +
                "375": {"name": "胡杨河市", "parent": "32"},
         | 
| 386 | 
            +
                "376": {"name": "双河市", "parent": "32"},
         | 
| 387 | 
            +
                "3560": {"name": "北票市", "parent": "7"},
         | 
| 388 | 
            +
                "3615": {"name": "高州市", "parent": "20"},
         | 
| 389 | 
            +
                "3651": {"name": "济源市", "parent": "17"},
         | 
| 390 | 
            +
                "3662": {"name": "胶南市", "parent": "16"},
         | 
| 391 | 
            +
                "3683": {"name": "老河口市", "parent": "18"},
         | 
| 392 | 
            +
                "3758": {"name": "沙河市", "parent": "4"},
         | 
| 393 | 
            +
                "3822": {"name": "宜城市", "parent": "18"},
         | 
| 394 | 
            +
                "3842": {"name": "枣阳市", "parent": "18"},
         | 
| 395 | 
            +
                "3850": {"name": "肇东市", "parent": "9"},
         | 
| 396 | 
            +
                "3905": {"name": "澳门", "parent": "1"},
         | 
| 397 | 
            +
                "3906": {"name": "澳门", "parent": "3905"},
         | 
| 398 | 
            +
                "3907": {"name": "香港", "parent": "1"},
         | 
| 399 | 
            +
                "3908": {"name": "香港", "parent": "3907"},
         | 
| 400 | 
            +
                "3947": {"name": "仙桃市", "parent": "18"},
         | 
| 401 | 
            +
                "3954": {"name": "台湾", "parent": "1"},
         | 
| 402 | 
            +
                "3955": {"name": "台湾", "parent": "3954"},
         | 
| 403 | 
            +
                "3956": {"name": "海外", "parent": "1"},
         | 
| 404 | 
            +
                "3957": {"name": "海外", "parent": "3956"},
         | 
| 405 | 
            +
                "3958": {"name": "美国", "parent": "3956"},
         | 
| 406 | 
            +
                "3959": {"name": "加拿大", "parent": "3956"},
         | 
| 407 | 
            +
                "3961": {"name": "日本", "parent": "3956"},
         | 
| 408 | 
            +
                "3962": {"name": "韩国", "parent": "3956"},
         | 
| 409 | 
            +
                "3963": {"name": "德国", "parent": "3956"},
         | 
| 410 | 
            +
                "3964": {"name": "英国", "parent": "3956"},
         | 
| 411 | 
            +
                "3965": {"name": "意大利", "parent": "3956"},
         | 
| 412 | 
            +
                "3966": {"name": "西班牙", "parent": "3956"},
         | 
| 413 | 
            +
                "3967": {"name": "法国", "parent": "3956"},
         | 
| 414 | 
            +
                "3968": {"name": "澳大利亚", "parent": "3956"},
         | 
| 415 | 
            +
                "3969": {"name": "东城区", "parent": "2"},
         | 
| 416 | 
            +
                "3970": {"name": "西城区", "parent": "2"},
         | 
| 417 | 
            +
                "3971": {"name": "崇文区", "parent": "2"},
         | 
| 418 | 
            +
                "3972": {"name": "宣武区", "parent": "2"},
         | 
| 419 | 
            +
                "3973": {"name": "朝阳区", "parent": "2"},
         | 
| 420 | 
            +
                "3974": {"name": "海淀区", "parent": "2"},
         | 
| 421 | 
            +
                "3975": {"name": "丰台区", "parent": "2"},
         | 
| 422 | 
            +
                "3976": {"name": "石景山区", "parent": "2"},
         | 
| 423 | 
            +
                "3977": {"name": "门头沟区", "parent": "2"},
         | 
| 424 | 
            +
                "3978": {"name": "房山区", "parent": "2"},
         | 
| 425 | 
            +
                "3979": {"name": "通州区", "parent": "2"},
         | 
| 426 | 
            +
                "3980": {"name": "顺义区", "parent": "2"},
         | 
| 427 | 
            +
                "3981": {"name": "昌平区", "parent": "2"},
         | 
| 428 | 
            +
                "3982": {"name": "大兴区", "parent": "2"},
         | 
| 429 | 
            +
                "3983": {"name": "平谷区", "parent": "2"},
         | 
| 430 | 
            +
                "3984": {"name": "怀柔区", "parent": "2"},
         | 
| 431 | 
            +
                "3985": {"name": "密云区", "parent": "2"},
         | 
| 432 | 
            +
                "3986": {"name": "延庆区", "parent": "2"},
         | 
| 433 | 
            +
                "3987": {"name": "黄浦区", "parent": "10"},
         | 
| 434 | 
            +
                "3988": {"name": "徐汇区", "parent": "10"},
         | 
| 435 | 
            +
                "3989": {"name": "长宁区", "parent": "10"},
         | 
| 436 | 
            +
                "3990": {"name": "静安区", "parent": "10"},
         | 
| 437 | 
            +
                "3991": {"name": "普陀区", "parent": "10"},
         | 
| 438 | 
            +
                "3992": {"name": "闸北区", "parent": "10"},
         | 
| 439 | 
            +
                "3993": {"name": "虹口区", "parent": "10"},
         | 
| 440 | 
            +
                "3994": {"name": "杨浦区", "parent": "10"},
         | 
| 441 | 
            +
                "3995": {"name": "宝山区", "parent": "10"},
         | 
| 442 | 
            +
                "3996": {"name": "闵行区", "parent": "10"},
         | 
| 443 | 
            +
                "3997": {"name": "嘉定区", "parent": "10"},
         | 
| 444 | 
            +
                "3998": {"name": "浦东新区", "parent": "10"},
         | 
| 445 | 
            +
                "3999": {"name": "松江区", "parent": "10"},
         | 
| 446 | 
            +
                "4000": {"name": "金山区", "parent": "10"},
         | 
| 447 | 
            +
                "4001": {"name": "青浦区", "parent": "10"},
         | 
| 448 | 
            +
                "4002": {"name": "奉贤区", "parent": "10"},
         | 
| 449 | 
            +
                "4003": {"name": "崇明区", "parent": "10"},
         | 
| 450 | 
            +
                "4004": {"name": "和平区", "parent": "3"},
         | 
| 451 | 
            +
                "4005": {"name": "河东区", "parent": "3"},
         | 
| 452 | 
            +
                "4006": {"name": "河西区", "parent": "3"},
         | 
| 453 | 
            +
                "4007": {"name": "南开区", "parent": "3"},
         | 
| 454 | 
            +
                "4008": {"name": "红桥区", "parent": "3"},
         | 
| 455 | 
            +
                "4009": {"name": "河北区", "parent": "3"},
         | 
| 456 | 
            +
                "4010": {"name": "滨海新区", "parent": "3"},
         | 
| 457 | 
            +
                "4011": {"name": "东丽区", "parent": "3"},
         | 
| 458 | 
            +
                "4012": {"name": "西青区", "parent": "3"},
         | 
| 459 | 
            +
                "4013": {"name": "北辰区", "parent": "3"},
         | 
| 460 | 
            +
                "4014": {"name": "津南区", "parent": "3"},
         | 
| 461 | 
            +
                "4015": {"name": "武清区", "parent": "3"},
         | 
| 462 | 
            +
                "4016": {"name": "宝坻区", "parent": "3"},
         | 
| 463 | 
            +
                "4017": {"name": "静海区", "parent": "3"},
         | 
| 464 | 
            +
                "4018": {"name": "宁河区", "parent": "3"},
         | 
| 465 | 
            +
                "4019": {"name": "蓟州区", "parent": "3"},
         | 
| 466 | 
            +
                "4020": {"name": "渝中区", "parent": "23"},
         | 
| 467 | 
            +
                "4021": {"name": "江北区", "parent": "23"},
         | 
| 468 | 
            +
                "4022": {"name": "南岸区", "parent": "23"},
         | 
| 469 | 
            +
                "4023": {"name": "沙坪坝区", "parent": "23"},
         | 
| 470 | 
            +
                "4024": {"name": "九龙坡区", "parent": "23"},
         | 
| 471 | 
            +
                "4025": {"name": "大渡口区", "parent": "23"},
         | 
| 472 | 
            +
                "4026": {"name": "渝北区", "parent": "23"},
         | 
| 473 | 
            +
                "4027": {"name": "巴南区", "parent": "23"},
         | 
| 474 | 
            +
                "4028": {"name": "北碚区", "parent": "23"},
         | 
| 475 | 
            +
                "4029": {"name": "万州区", "parent": "23"},
         | 
| 476 | 
            +
                "4030": {"name": "黔江区", "parent": "23"},
         | 
| 477 | 
            +
                "4031": {"name": "永川区", "parent": "23"},
         | 
| 478 | 
            +
                "4032": {"name": "涪陵区", "parent": "23"},
         | 
| 479 | 
            +
                "4033": {"name": "江津区", "parent": "23"},
         | 
| 480 | 
            +
                "4034": {"name": "合川区", "parent": "23"},
         | 
| 481 | 
            +
                "4035": {"name": "双桥区", "parent": "23"},
         | 
| 482 | 
            +
                "4036": {"name": "万盛区", "parent": "23"},
         | 
| 483 | 
            +
                "4037": {"name": "荣昌区", "parent": "23"},
         | 
| 484 | 
            +
                "4038": {"name": "大足区", "parent": "23"},
         | 
| 485 | 
            +
                "4039": {"name": "璧山区", "parent": "23"},
         | 
| 486 | 
            +
                "4040": {"name": "铜梁区", "parent": "23"},
         | 
| 487 | 
            +
                "4041": {"name": "潼南区", "parent": "23"},
         | 
| 488 | 
            +
                "4042": {"name": "綦江区", "parent": "23"},
         | 
| 489 | 
            +
                "4043": {"name": "忠县", "parent": "23"},
         | 
| 490 | 
            +
                "4044": {"name": "开州区", "parent": "23"},
         | 
| 491 | 
            +
                "4045": {"name": "云阳县", "parent": "23"},
         | 
| 492 | 
            +
                "4046": {"name": "梁平区", "parent": "23"},
         | 
| 493 | 
            +
                "4047": {"name": "垫江县", "parent": "23"},
         | 
| 494 | 
            +
                "4048": {"name": "丰都县", "parent": "23"},
         | 
| 495 | 
            +
                "4049": {"name": "奉节县", "parent": "23"},
         | 
| 496 | 
            +
                "4050": {"name": "巫山县", "parent": "23"},
         | 
| 497 | 
            +
                "4051": {"name": "巫溪县", "parent": "23"},
         | 
| 498 | 
            +
                "4052": {"name": "城口县", "parent": "23"},
         | 
| 499 | 
            +
                "4053": {"name": "武隆区", "parent": "23"},
         | 
| 500 | 
            +
                "4054": {"name": "石柱土家族自治县", "parent": "23"},
         | 
| 501 | 
            +
                "4055": {"name": "秀山土家族苗族自治县", "parent": "23"},
         | 
| 502 | 
            +
                "4056": {"name": "酉阳土家族苗族自治县", "parent": "23"},
         | 
| 503 | 
            +
                "4057": {"name": "彭水苗族土家族自治县", "parent": "23"},
         | 
| 504 | 
            +
                "4058": {"name": "潜江市", "parent": "18"},
         | 
| 505 | 
            +
                "4059": {"name": "三沙市", "parent": "22"},
         | 
| 506 | 
            +
                "4060": {"name": "石河子市", "parent": "32"},
         | 
| 507 | 
            +
                "4061": {"name": "阿拉尔市", "parent": "32"},
         | 
| 508 | 
            +
                "4062": {"name": "图木舒克市", "parent": "32"},
         | 
| 509 | 
            +
                "4063": {"name": "五家渠市", "parent": "32"},
         | 
| 510 | 
            +
                "4064": {"name": "北屯市", "parent": "32"},
         | 
| 511 | 
            +
                "4065": {"name": "铁门关市", "parent": "32"},
         | 
| 512 | 
            +
                "4066": {"name": "儋州市", "parent": "22"},
         | 
| 513 | 
            +
                "4067": {"name": "五指山市", "parent": "22"},
         | 
| 514 | 
            +
                "4068": {"name": "文昌市", "parent": "22"},
         | 
| 515 | 
            +
                "4069": {"name": "琼海市", "parent": "22"},
         | 
| 516 | 
            +
                "4070": {"name": "万宁市", "parent": "22"},
         | 
| 517 | 
            +
                "4072": {"name": "定安县", "parent": "22"},
         | 
| 518 | 
            +
                "4073": {"name": "屯昌县", "parent": "22"},
         | 
| 519 | 
            +
                "4074": {"name": "澄迈县", "parent": "22"},
         | 
| 520 | 
            +
                "4075": {"name": "临高县", "parent": "22"},
         | 
| 521 | 
            +
                "4076": {"name": "琼中黎族苗族自治县", "parent": "22"},
         | 
| 522 | 
            +
                "4077": {"name": "保亭黎族苗族自治县", "parent": "22"},
         | 
| 523 | 
            +
                "4078": {"name": "白沙黎族自治县", "parent": "22"},
         | 
| 524 | 
            +
                "4079": {"name": "昌江黎族自治县", "parent": "22"},
         | 
| 525 | 
            +
                "4080": {"name": "乐东黎族自治县", "parent": "22"},
         | 
| 526 | 
            +
                "4081": {"name": "陵水黎族自治县", "parent": "22"},
         | 
| 527 | 
            +
                "4082": {"name": "马来西亚", "parent": "3956"},
         | 
| 528 | 
            +
                "6047": {"name": "长寿区", "parent": "23"},
         | 
| 529 | 
            +
                "6857": {"name": "阿富汗", "parent": "3956"},
         | 
| 530 | 
            +
                "6858": {"name": "阿尔巴尼亚", "parent": "3956"},
         | 
| 531 | 
            +
                "6859": {"name": "阿尔及利亚", "parent": "3956"},
         | 
| 532 | 
            +
                "6860": {"name": "美属萨摩亚", "parent": "3956"},
         | 
| 533 | 
            +
                "6861": {"name": "安道尔", "parent": "3956"},
         | 
| 534 | 
            +
                "6862": {"name": "安哥拉", "parent": "3956"},
         | 
| 535 | 
            +
                "6863": {"name": "安圭拉", "parent": "3956"},
         | 
| 536 | 
            +
                "6864": {"name": "南极洲", "parent": "3956"},
         | 
| 537 | 
            +
                "6865": {"name": "安提瓜和巴布达", "parent": "3956"},
         | 
| 538 | 
            +
                "6866": {"name": "阿根廷", "parent": "3956"},
         | 
| 539 | 
            +
                "6867": {"name": "亚美尼亚", "parent": "3956"},
         | 
| 540 | 
            +
                "6869": {"name": "奥地利", "parent": "3956"},
         | 
| 541 | 
            +
                "6870": {"name": "阿塞拜疆", "parent": "3956"},
         | 
| 542 | 
            +
                "6871": {"name": "巴哈马", "parent": "3956"},
         | 
| 543 | 
            +
                "6872": {"name": "巴林", "parent": "3956"},
         | 
| 544 | 
            +
                "6873": {"name": "孟加拉国", "parent": "3956"},
         | 
| 545 | 
            +
                "6874": {"name": "巴巴多斯", "parent": "3956"},
         | 
| 546 | 
            +
                "6875": {"name": "白俄罗斯", "parent": "3956"},
         | 
| 547 | 
            +
                "6876": {"name": "比利时", "parent": "3956"},
         | 
| 548 | 
            +
                "6877": {"name": "伯利兹", "parent": "3956"},
         | 
| 549 | 
            +
                "6878": {"name": "贝宁", "parent": "3956"},
         | 
| 550 | 
            +
                "6879": {"name": "百慕大", "parent": "3956"},
         | 
| 551 | 
            +
                "6880": {"name": "不丹", "parent": "3956"},
         | 
| 552 | 
            +
                "6881": {"name": "玻利维亚", "parent": "3956"},
         | 
| 553 | 
            +
                "6882": {"name": "波黑", "parent": "3956"},
         | 
| 554 | 
            +
                "6883": {"name": "博茨瓦纳", "parent": "3956"},
         | 
| 555 | 
            +
                "6884": {"name": "布维岛", "parent": "3956"},
         | 
| 556 | 
            +
                "6885": {"name": "巴西", "parent": "3956"},
         | 
| 557 | 
            +
                "6886": {"name": "英属印度洋领土", "parent": "3956"},
         | 
| 558 | 
            +
                "6887": {"name": "文莱", "parent": "3956"},
         | 
| 559 | 
            +
                "6888": {"name": "保加利亚", "parent": "3956"},
         | 
| 560 | 
            +
                "6889": {"name": "布基纳法索", "parent": "3956"},
         | 
| 561 | 
            +
                "6890": {"name": "布隆迪", "parent": "3956"},
         | 
| 562 | 
            +
                "6891": {"name": "柬埔寨", "parent": "3956"},
         | 
| 563 | 
            +
                "6892": {"name": "喀麦隆", "parent": "3956"},
         | 
| 564 | 
            +
                "6893": {"name": "佛得角", "parent": "3956"},
         | 
| 565 | 
            +
                "6894": {"name": "开曼群岛", "parent": "3956"},
         | 
| 566 | 
            +
                "6895": {"name": "中非", "parent": "3956"},
         | 
| 567 | 
            +
                "6896": {"name": "乍得", "parent": "3956"},
         | 
| 568 | 
            +
                "6897": {"name": "智利", "parent": "3956"},
         | 
| 569 | 
            +
                "6898": {"name": "圣诞岛", "parent": "3956"},
         | 
| 570 | 
            +
                "6899": {"name": "科科斯(基林)群岛", "parent": "3956"},
         | 
| 571 | 
            +
                "6900": {"name": "哥伦比亚", "parent": "3956"},
         | 
| 572 | 
            +
                "6901": {"name": "科摩罗", "parent": "3956"},
         | 
| 573 | 
            +
                "6902": {"name": "刚果(布)", "parent": "3956"},
         | 
| 574 | 
            +
                "6903": {"name": "刚果(金)", "parent": "3956"},
         | 
| 575 | 
            +
                "6904": {"name": "库克群岛", "parent": "3956"},
         | 
| 576 | 
            +
                "6905": {"name": "哥斯达黎加", "parent": "3956"},
         | 
| 577 | 
            +
                "6906": {"name": "科特迪瓦", "parent": "3956"},
         | 
| 578 | 
            +
                "6907": {"name": "克罗地亚", "parent": "3956"},
         | 
| 579 | 
            +
                "6908": {"name": "古巴", "parent": "3956"},
         | 
| 580 | 
            +
                "6909": {"name": "塞浦路斯", "parent": "3956"},
         | 
| 581 | 
            +
                "6910": {"name": "捷克", "parent": "3956"},
         | 
| 582 | 
            +
                "6911": {"name": "丹麦", "parent": "3956"},
         | 
| 583 | 
            +
                "6912": {"name": "吉布提", "parent": "3956"},
         | 
| 584 | 
            +
                "6913": {"name": "多米尼克", "parent": "3956"},
         | 
| 585 | 
            +
                "6914": {"name": "多米尼加共和国", "parent": "3956"},
         | 
| 586 | 
            +
                "6915": {"name": "东帝汶", "parent": "3956"},
         | 
| 587 | 
            +
                "6916": {"name": "厄瓜多尔", "parent": "3956"},
         | 
| 588 | 
            +
                "6917": {"name": "埃及", "parent": "3956"},
         | 
| 589 | 
            +
                "6918": {"name": "萨尔瓦多", "parent": "3956"},
         | 
| 590 | 
            +
                "6919": {"name": "赤道几内亚", "parent": "3956"},
         | 
| 591 | 
            +
                "6920": {"name": "厄立特里亚", "parent": "3956"},
         | 
| 592 | 
            +
                "6921": {"name": "爱沙尼亚", "parent": "3956"},
         | 
| 593 | 
            +
                "6922": {"name": "埃塞俄比亚", "parent": "3956"},
         | 
| 594 | 
            +
                "6923": {"name": "福克兰群岛(马尔维纳斯)", "parent": "3956"},
         | 
| 595 | 
            +
                "6924": {"name": "法罗群岛", "parent": "3956"},
         | 
| 596 | 
            +
                "6925": {"name": "斐济", "parent": "3956"},
         | 
| 597 | 
            +
                "6926": {"name": "芬兰", "parent": "3956"},
         | 
| 598 | 
            +
                "6927": {"name": "法属圭亚那", "parent": "3956"},
         | 
| 599 | 
            +
                "6928": {"name": "法属波利尼西亚", "parent": "3956"},
         | 
| 600 | 
            +
                "6929": {"name": "法属南部领土", "parent": "3956"},
         | 
| 601 | 
            +
                "6930": {"name": "加蓬", "parent": "3956"},
         | 
| 602 | 
            +
                "6931": {"name": "冈比亚", "parent": "3956"},
         | 
| 603 | 
            +
                "6932": {"name": "格鲁吉亚", "parent": "3956"},
         | 
| 604 | 
            +
                "6933": {"name": "加纳", "parent": "3956"},
         | 
| 605 | 
            +
                "6934": {"name": "直布罗陀", "parent": "3956"},
         | 
| 606 | 
            +
                "6935": {"name": "希腊", "parent": "3956"},
         | 
| 607 | 
            +
                "6936": {"name": "格陵兰", "parent": "3956"},
         | 
| 608 | 
            +
                "6937": {"name": "格林纳达", "parent": "3956"},
         | 
| 609 | 
            +
                "6938": {"name": "瓜德罗普", "parent": "3956"},
         | 
| 610 | 
            +
                "6939": {"name": "关岛", "parent": "3956"},
         | 
| 611 | 
            +
                "6940": {"name": "危地马拉", "parent": "3956"},
         | 
| 612 | 
            +
                "6941": {"name": "几内亚", "parent": "3956"},
         | 
| 613 | 
            +
                "6942": {"name": "几内亚比绍", "parent": "3956"},
         | 
| 614 | 
            +
                "6943": {"name": "圭亚那", "parent": "3956"},
         | 
| 615 | 
            +
                "6944": {"name": "海地", "parent": "3956"},
         | 
| 616 | 
            +
                "6945": {"name": "赫德岛和麦克唐纳岛", "parent": "3956"},
         | 
| 617 | 
            +
                "6946": {"name": "洪都拉斯", "parent": "3956"},
         | 
| 618 | 
            +
                "6947": {"name": "匈牙利", "parent": "3956"},
         | 
| 619 | 
            +
                "6948": {"name": "冰岛", "parent": "3956"},
         | 
| 620 | 
            +
                "6949": {"name": "印度", "parent": "3956"},
         | 
| 621 | 
            +
                "6950": {"name": "印度尼西亚", "parent": "3956"},
         | 
| 622 | 
            +
                "6951": {"name": "伊朗", "parent": "3956"},
         | 
| 623 | 
            +
                "6952": {"name": "伊拉克", "parent": "3956"},
         | 
| 624 | 
            +
                "6953": {"name": "爱尔兰", "parent": "3956"},
         | 
| 625 | 
            +
                "6954": {"name": "以色列", "parent": "3956"},
         | 
| 626 | 
            +
                "6955": {"name": "牙买加", "parent": "3956"},
         | 
| 627 | 
            +
                "6956": {"name": "约旦", "parent": "3956"},
         | 
| 628 | 
            +
                "6957": {"name": "哈萨克斯坦", "parent": "3956"},
         | 
| 629 | 
            +
                "6958": {"name": "肯尼亚", "parent": "3956"},
         | 
| 630 | 
            +
                "6959": {"name": "基里巴斯", "parent": "3956"},
         | 
| 631 | 
            +
                "6960": {"name": "朝鲜", "parent": "3956"},
         | 
| 632 | 
            +
                "6961": {"name": "科威特", "parent": "3956"},
         | 
| 633 | 
            +
                "6962": {"name": "吉尔吉斯斯坦", "parent": "3956"},
         | 
| 634 | 
            +
                "6963": {"name": "老挝", "parent": "3956"},
         | 
| 635 | 
            +
                "6964": {"name": "拉脱维亚", "parent": "3956"},
         | 
| 636 | 
            +
                "6965": {"name": "黎巴嫩", "parent": "3956"},
         | 
| 637 | 
            +
                "6966": {"name": "莱索托", "parent": "3956"},
         | 
| 638 | 
            +
                "6967": {"name": "利比里亚", "parent": "3956"},
         | 
| 639 | 
            +
                "6968": {"name": "利比亚", "parent": "3956"},
         | 
| 640 | 
            +
                "6969": {"name": "列支敦士登", "parent": "3956"},
         | 
| 641 | 
            +
                "6970": {"name": "立陶宛", "parent": "3956"},
         | 
| 642 | 
            +
                "6971": {"name": "卢森堡", "parent": "3956"},
         | 
| 643 | 
            +
                "6972": {"name": "前南马其顿", "parent": "3956"},
         | 
| 644 | 
            +
                "6973": {"name": "马达加斯加", "parent": "3956"},
         | 
| 645 | 
            +
                "6974": {"name": "马拉维", "parent": "3956"},
         | 
| 646 | 
            +
                "6975": {"name": "马尔代夫", "parent": "3956"},
         | 
| 647 | 
            +
                "6976": {"name": "马里", "parent": "3956"},
         | 
| 648 | 
            +
                "6977": {"name": "马耳他", "parent": "3956"},
         | 
| 649 | 
            +
                "6978": {"name": "马绍尔群岛", "parent": "3956"},
         | 
| 650 | 
            +
                "6979": {"name": "马提尼克", "parent": "3956"},
         | 
| 651 | 
            +
                "6980": {"name": "毛里塔尼亚", "parent": "3956"},
         | 
| 652 | 
            +
                "6981": {"name": "毛里求斯", "parent": "3956"},
         | 
| 653 | 
            +
                "6982": {"name": "马约特", "parent": "3956"},
         | 
| 654 | 
            +
                "6983": {"name": "墨西哥", "parent": "3956"},
         | 
| 655 | 
            +
                "6984": {"name": "密克罗尼西亚联邦", "parent": "3956"},
         | 
| 656 | 
            +
                "6985": {"name": "摩尔多瓦", "parent": "3956"},
         | 
| 657 | 
            +
                "6986": {"name": "摩纳哥", "parent": "3956"},
         | 
| 658 | 
            +
                "6987": {"name": "蒙古", "parent": "3956"},
         | 
| 659 | 
            +
                "6988": {"name": "蒙特塞拉特", "parent": "3956"},
         | 
| 660 | 
            +
                "6989": {"name": "摩洛哥", "parent": "3956"},
         | 
| 661 | 
            +
                "6990": {"name": "莫桑比克", "parent": "3956"},
         | 
| 662 | 
            +
                "6991": {"name": "缅甸", "parent": "3956"},
         | 
| 663 | 
            +
                "6992": {"name": "纳米比亚", "parent": "3956"},
         | 
| 664 | 
            +
                "6993": {"name": "瑙鲁", "parent": "3956"},
         | 
| 665 | 
            +
                "6994": {"name": "尼泊尔", "parent": "3956"},
         | 
| 666 | 
            +
                "6995": {"name": "荷兰", "parent": "3956"},
         | 
| 667 | 
            +
                "6996": {"name": "荷属安的列斯", "parent": "3956"},
         | 
| 668 | 
            +
                "6997": {"name": "新喀里多尼亚", "parent": "3956"},
         | 
| 669 | 
            +
                "6998": {"name": "新西兰", "parent": "3956"},
         | 
| 670 | 
            +
                "6999": {"name": "尼加拉瓜", "parent": "3956"},
         | 
| 671 | 
            +
                "7000": {"name": "尼日尔", "parent": "3956"},
         | 
| 672 | 
            +
                "7001": {"name": "尼日利亚", "parent": "3956"},
         | 
| 673 | 
            +
                "7002": {"name": "纽埃", "parent": "3956"},
         | 
| 674 | 
            +
                "7003": {"name": "诺福克岛", "parent": "3956"},
         | 
| 675 | 
            +
                "7004": {"name": "北马里亚纳", "parent": "3956"},
         | 
| 676 | 
            +
                "7005": {"name": "挪威", "parent": "3956"},
         | 
| 677 | 
            +
                "7006": {"name": "阿曼", "parent": "3956"},
         | 
| 678 | 
            +
                "7007": {"name": "巴基斯坦", "parent": "3956"},
         | 
| 679 | 
            +
                "7008": {"name": "帕劳", "parent": "3956"},
         | 
| 680 | 
            +
                "7009": {"name": "巴勒斯坦", "parent": "3956"},
         | 
| 681 | 
            +
                "7010": {"name": "巴拿马", "parent": "3956"},
         | 
| 682 | 
            +
                "7011": {"name": "巴布亚新几内亚", "parent": "3956"},
         | 
| 683 | 
            +
                "7012": {"name": "巴拉圭", "parent": "3956"},
         | 
| 684 | 
            +
                "7013": {"name": "秘鲁", "parent": "3956"},
         | 
| 685 | 
            +
                "7014": {"name": "菲律宾", "parent": "3956"},
         | 
| 686 | 
            +
                "7015": {"name": "皮特凯恩群岛", "parent": "3956"},
         | 
| 687 | 
            +
                "7016": {"name": "波兰", "parent": "3956"},
         | 
| 688 | 
            +
                "7017": {"name": "葡萄牙", "parent": "3956"},
         | 
| 689 | 
            +
                "7018": {"name": "波多黎各", "parent": "3956"},
         | 
| 690 | 
            +
                "7019": {"name": "卡塔尔", "parent": "3956"},
         | 
| 691 | 
            +
                "7020": {"name": "留尼汪", "parent": "3956"},
         | 
| 692 | 
            +
                "7021": {"name": "罗马尼亚", "parent": "3956"},
         | 
| 693 | 
            +
                "7022": {"name": "俄罗斯联邦", "parent": "3956"},
         | 
| 694 | 
            +
                "7023": {"name": "卢旺达", "parent": "3956"},
         | 
| 695 | 
            +
                "7024": {"name": "圣赫勒拿", "parent": "3956"},
         | 
| 696 | 
            +
                "7025": {"name": "圣基茨和尼维斯", "parent": "3956"},
         | 
| 697 | 
            +
                "7026": {"name": "圣卢西亚", "parent": "3956"},
         | 
| 698 | 
            +
                "7027": {"name": "圣皮埃尔和密克隆", "parent": "3956"},
         | 
| 699 | 
            +
                "7028": {"name": "圣文森特和格林纳丁斯", "parent": "3956"},
         | 
| 700 | 
            +
                "7029": {"name": "萨摩亚", "parent": "3956"},
         | 
| 701 | 
            +
                "7030": {"name": "圣马力诺", "parent": "3956"},
         | 
| 702 | 
            +
                "7031": {"name": "圣多美和普林西比", "parent": "3956"},
         | 
| 703 | 
            +
                "7032": {"name": "沙特阿拉伯", "parent": "3956"},
         | 
| 704 | 
            +
                "7033": {"name": "塞内加尔", "parent": "3956"},
         | 
| 705 | 
            +
                "7034": {"name": "塞舌尔", "parent": "3956"},
         | 
| 706 | 
            +
                "7035": {"name": "塞拉利昂", "parent": "3956"},
         | 
| 707 | 
            +
                "7036": {"name": "新加坡", "parent": "3956"},
         | 
| 708 | 
            +
                "7037": {"name": "斯洛伐克", "parent": "3956"},
         | 
| 709 | 
            +
                "7038": {"name": "斯洛文尼亚", "parent": "3956"},
         | 
| 710 | 
            +
                "7039": {"name": "所罗门群岛", "parent": "3956"},
         | 
| 711 | 
            +
                "7040": {"name": "索马里", "parent": "3956"},
         | 
| 712 | 
            +
                "7041": {"name": "南非", "parent": "3956"},
         | 
| 713 | 
            +
                "7042": {"name": "南乔治亚岛和南桑德韦奇岛", "parent": "3956"},
         | 
| 714 | 
            +
                "7043": {"name": "斯里兰卡", "parent": "3956"},
         | 
| 715 | 
            +
                "7044": {"name": "苏丹", "parent": "3956"},
         | 
| 716 | 
            +
                "7045": {"name": "苏里南", "parent": "3956"},
         | 
| 717 | 
            +
                "7046": {"name": "斯瓦尔巴群岛", "parent": "3956"},
         | 
| 718 | 
            +
                "7047": {"name": "斯威士兰", "parent": "3956"},
         | 
| 719 | 
            +
                "7048": {"name": "瑞典", "parent": "3956"},
         | 
| 720 | 
            +
                "7049": {"name": "瑞士", "parent": "3956"},
         | 
| 721 | 
            +
                "7050": {"name": "叙利亚", "parent": "3956"},
         | 
| 722 | 
            +
                "7051": {"name": "塔吉克斯坦", "parent": "3956"},
         | 
| 723 | 
            +
                "7052": {"name": "坦桑尼亚", "parent": "3956"},
         | 
| 724 | 
            +
                "7053": {"name": "泰国", "parent": "3956"},
         | 
| 725 | 
            +
                "7054": {"name": "多哥", "parent": "3956"},
         | 
| 726 | 
            +
                "7055": {"name": "托克劳", "parent": "3956"},
         | 
| 727 | 
            +
                "7056": {"name": "汤加", "parent": "3956"},
         | 
| 728 | 
            +
                "7057": {"name": "特立尼达和多巴哥", "parent": "3956"},
         | 
| 729 | 
            +
                "7058": {"name": "突尼斯", "parent": "3956"},
         | 
| 730 | 
            +
                "7059": {"name": "土耳其", "parent": "3956"},
         | 
| 731 | 
            +
                "7060": {"name": "土库曼斯坦", "parent": "3956"},
         | 
| 732 | 
            +
                "7061": {"name": "特克斯科斯群岛", "parent": "3956"},
         | 
| 733 | 
            +
                "7062": {"name": "图瓦卢", "parent": "3956"},
         | 
| 734 | 
            +
                "7063": {"name": "乌干达", "parent": "3956"},
         | 
| 735 | 
            +
                "7064": {"name": "乌克兰", "parent": "3956"},
         | 
| 736 | 
            +
                "7065": {"name": "阿联酋", "parent": "3956"},
         | 
| 737 | 
            +
                "7066": {"name": "美国本土外小岛屿", "parent": "3956"},
         | 
| 738 | 
            +
                "7067": {"name": "乌拉圭", "parent": "3956"},
         | 
| 739 | 
            +
                "7068": {"name": "乌兹别克斯坦", "parent": "3956"},
         | 
| 740 | 
            +
                "7069": {"name": "瓦努阿图", "parent": "3956"},
         | 
| 741 | 
            +
                "7070": {"name": "梵蒂冈", "parent": "3956"},
         | 
| 742 | 
            +
                "7071": {"name": "委内瑞拉", "parent": "3956"},
         | 
| 743 | 
            +
                "7072": {"name": "越南", "parent": "3956"},
         | 
| 744 | 
            +
                "7073": {"name": "英属维尔京群岛", "parent": "3956"},
         | 
| 745 | 
            +
                "7074": {"name": "美属维尔京群岛", "parent": "3956"},
         | 
| 746 | 
            +
                "7075": {"name": "瓦利斯和富图纳", "parent": "3956"},
         | 
| 747 | 
            +
                "7076": {"name": "西撒哈拉", "parent": "3956"},
         | 
| 748 | 
            +
                "7077": {"name": "也门", "parent": "3956"},
         | 
| 749 | 
            +
                "7078": {"name": "南斯拉夫", "parent": "3956"},
         | 
| 750 | 
            +
                "7079": {"name": "赞比亚", "parent": "3956"},
         | 
| 751 | 
            +
                "7080": {"name": "津巴布韦", "parent": "3956"},
         | 
| 752 | 
            +
                "7081": {"name": "塞尔维亚", "parent": "3956"},
         | 
| 753 | 
            +
                "7082": {"name": "雄安新区", "parent": "4"},
         | 
| 754 | 
            +
                "7084": {"name": "天门市", "parent": "18"},
         | 
| 755 | 
             
            }
         | 
| 756 |  | 
| 757 | 
            +
            NM_SET = set([v["name"] for _, v in TBL.items()])
         | 
| 758 | 
            +
             | 
| 759 |  | 
| 760 | 
             
            def get_names(id):
         | 
| 761 | 
            +
                if not id or str(id).lower() == "none":
         | 
| 762 | 
            +
                    return []
         | 
| 763 | 
             
                id = str(id)
         | 
| 764 | 
            +
                if not re.match("[0-9]+$", id.strip()):
         | 
| 765 | 
            +
                    return [id]
         | 
| 766 | 
             
                nms = []
         | 
| 767 | 
             
                d = TBL.get(id)
         | 
| 768 | 
            +
                if not d:
         | 
| 769 | 
            +
                    return []
         | 
| 770 | 
             
                nms.append(d["name"])
         | 
| 771 | 
             
                p = get_names(d["parent"])
         | 
| 772 | 
            +
                if p:
         | 
| 773 | 
            +
                    nms.extend(p)
         | 
| 774 | 
             
                return nms
         | 
| 775 |  | 
| 776 | 
            +
             | 
| 777 | 
            +
             | 
| 778 | 
             
            def isName(nm):
         | 
| 779 | 
            +
                if nm in NM_SET:
         | 
| 780 | 
            +
                    return True
         | 
| 781 | 
            +
                if nm + "市" in NM_SET:
         | 
| 782 | 
            +
                    return True
         | 
| 783 | 
            +
                if re.sub(r"(省|(回族|壮族|维吾尔)*自治区)$", "", nm) in NM_SET:
         | 
| 784 | 
            +
                    return True
         | 
| 785 | 
             
                return False
         | 
    	
        deepdoc/parser/resume/entities/schools.py
    CHANGED
    
    | @@ -16,8 +16,11 @@ import json | |
| 16 | 
             
            import re
         | 
| 17 | 
             
            import copy
         | 
| 18 | 
             
            import pandas as pd
         | 
|  | |
| 19 | 
             
            current_file_path = os.path.dirname(os.path.abspath(__file__))
         | 
| 20 | 
            -
            TBL = pd.read_csv( | 
|  | |
|  | |
| 21 | 
             
            TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
         | 
| 22 | 
             
            GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r"))
         | 
| 23 | 
             
            GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
         | 
| @@ -26,14 +29,15 @@ GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH]) | |
| 26 | 
             
            def loadRank(fnm):
         | 
| 27 | 
             
                global TBL
         | 
| 28 | 
             
                TBL["rank"] = 1000000
         | 
| 29 | 
            -
                with open(fnm, "r", encoding= | 
| 30 | 
             
                    while True:
         | 
| 31 | 
            -
                         | 
| 32 | 
            -
                        if not  | 
| 33 | 
            -
             | 
|  | |
| 34 | 
             
                        try:
         | 
| 35 | 
            -
                            nm,rk =  | 
| 36 | 
            -
                            #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
         | 
| 37 | 
             
                            TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk
         | 
| 38 | 
             
                        except Exception:
         | 
| 39 | 
             
                            pass
         | 
| @@ -44,27 +48,35 @@ loadRank(os.path.join(current_file_path, "res/school.rank.csv")) | |
| 44 |  | 
| 45 | 
             
            def split(txt):
         | 
| 46 | 
             
                tks = []
         | 
| 47 | 
            -
                for t in re.sub(r"[ \t]+", " ",txt).split():
         | 
| 48 | 
            -
                    if  | 
| 49 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 50 | 
             
                        tks[-1] = tks[-1] + " " + t
         | 
| 51 | 
            -
                    else: | 
|  | |
| 52 | 
             
                return tks
         | 
| 53 |  | 
| 54 |  | 
| 55 | 
             
            def select(nm):
         | 
| 56 | 
             
                global TBL
         | 
| 57 | 
            -
                if not nm: | 
| 58 | 
            -
             | 
|  | |
|  | |
| 59 | 
             
                nm = split(nm)[0]
         | 
| 60 | 
             
                nm = str(nm).lower().strip()
         | 
| 61 | 
             
                nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
         | 
| 62 | 
             
                nm = re.sub(r"(^the |[,.&()();;·]+|^(英国|美国|瑞士))", "", nm)
         | 
| 63 | 
             
                nm = re.sub(r"大学.*学院", "大学", nm)
         | 
| 64 | 
             
                tbl = copy.deepcopy(TBL)
         | 
| 65 | 
            -
                tbl["hit_alias"] = tbl["alias"].map(lambda x:nm in set(x.split("+")))
         | 
| 66 | 
            -
                res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) |  | 
| 67 | 
            -
                if res.empty: | 
|  | |
| 68 |  | 
| 69 | 
             
                return json.loads(res.to_json(orient="records"))[0]
         | 
| 70 |  | 
| @@ -74,4 +86,3 @@ def is_good(nm): | |
| 74 | 
             
                nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
         | 
| 75 | 
             
                nm = re.sub(r"[''`‘’“”,. &()();;]+", "", nm)
         | 
| 76 | 
             
                return nm in GOOD_SCH
         | 
| 77 | 
            -
             | 
|  | |
| 16 | 
             
            import re
         | 
| 17 | 
             
            import copy
         | 
| 18 | 
             
            import pandas as pd
         | 
| 19 | 
            +
             | 
| 20 | 
             
            current_file_path = os.path.dirname(os.path.abspath(__file__))
         | 
| 21 | 
            +
            TBL = pd.read_csv(
         | 
| 22 | 
            +
                os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0
         | 
| 23 | 
            +
            ).fillna("")
         | 
| 24 | 
             
            TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
         | 
| 25 | 
             
            GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r"))
         | 
| 26 | 
             
            GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH])
         | 
|  | |
| 29 | 
             
            def loadRank(fnm):
         | 
| 30 | 
             
                global TBL
         | 
| 31 | 
             
                TBL["rank"] = 1000000
         | 
| 32 | 
            +
                with open(fnm, "r", encoding="utf-8") as f:
         | 
| 33 | 
             
                    while True:
         | 
| 34 | 
            +
                        line = f.readline()
         | 
| 35 | 
            +
                        if not line:
         | 
| 36 | 
            +
                            break
         | 
| 37 | 
            +
                        line = line.strip("\n").split(",")
         | 
| 38 | 
             
                        try:
         | 
| 39 | 
            +
                            nm, rk = line[0].strip(), int(line[1])
         | 
| 40 | 
            +
                            # assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
         | 
| 41 | 
             
                            TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk
         | 
| 42 | 
             
                        except Exception:
         | 
| 43 | 
             
                            pass
         | 
|  | |
| 48 |  | 
| 49 | 
             
            def split(txt):
         | 
| 50 | 
             
                tks = []
         | 
| 51 | 
            +
                for t in re.sub(r"[ \t]+", " ", txt).split():
         | 
| 52 | 
            +
                    if (
         | 
| 53 | 
            +
                        tks
         | 
| 54 | 
            +
                        and re.match(r".*[a-zA-Z]$", tks[-1])
         | 
| 55 | 
            +
                        and re.match(r"[a-zA-Z]", t)
         | 
| 56 | 
            +
                        and tks
         | 
| 57 | 
            +
                    ):
         | 
| 58 | 
             
                        tks[-1] = tks[-1] + " " + t
         | 
| 59 | 
            +
                    else:
         | 
| 60 | 
            +
                        tks.append(t)
         | 
| 61 | 
             
                return tks
         | 
| 62 |  | 
| 63 |  | 
| 64 | 
             
            def select(nm):
         | 
| 65 | 
             
                global TBL
         | 
| 66 | 
            +
                if not nm:
         | 
| 67 | 
            +
                    return
         | 
| 68 | 
            +
                if isinstance(nm, list):
         | 
| 69 | 
            +
                    nm = str(nm[0])
         | 
| 70 | 
             
                nm = split(nm)[0]
         | 
| 71 | 
             
                nm = str(nm).lower().strip()
         | 
| 72 | 
             
                nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
         | 
| 73 | 
             
                nm = re.sub(r"(^the |[,.&()();;·]+|^(英国|美国|瑞士))", "", nm)
         | 
| 74 | 
             
                nm = re.sub(r"大学.*学院", "大学", nm)
         | 
| 75 | 
             
                tbl = copy.deepcopy(TBL)
         | 
| 76 | 
            +
                tbl["hit_alias"] = tbl["alias"].map(lambda x: nm in set(x.split("+")))
         | 
| 77 | 
            +
                res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | tbl.hit_alias)]
         | 
| 78 | 
            +
                if res.empty:
         | 
| 79 | 
            +
                    return
         | 
| 80 |  | 
| 81 | 
             
                return json.loads(res.to_json(orient="records"))[0]
         | 
| 82 |  | 
|  | |
| 86 | 
             
                nm = re.sub(r"[((][^()()]+[))]", "", nm.lower())
         | 
| 87 | 
             
                nm = re.sub(r"[''`‘’“”,. &()();;]+", "", nm)
         | 
| 88 | 
             
                return nm in GOOD_SCH
         | 
|  | 
    	
        deepdoc/parser/resume/step_two.py
    CHANGED
    
    | @@ -25,7 +25,8 @@ from xpinyin import Pinyin | |
| 25 | 
             
            from contextlib import contextmanager
         | 
| 26 |  | 
| 27 |  | 
| 28 | 
            -
            class TimeoutException(Exception): | 
|  | |
| 29 |  | 
| 30 |  | 
| 31 | 
             
            @contextmanager
         | 
| @@ -50,8 +51,10 @@ def rmHtmlTag(line): | |
| 50 |  | 
| 51 |  | 
| 52 | 
             
            def highest_degree(dg):
         | 
| 53 | 
            -
                if not dg: | 
| 54 | 
            -
             | 
|  | |
|  | |
| 55 | 
             
                m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8}
         | 
| 56 | 
             
                return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0]
         | 
| 57 |  | 
| @@ -68,10 +71,12 @@ def forEdu(cv): | |
| 68 | 
             
                for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))):
         | 
| 69 | 
             
                    e = {}
         | 
| 70 | 
             
                    if n.get("end_time"):
         | 
| 71 | 
            -
                        if n["end_time"] > edu_end_dt: | 
|  | |
| 72 | 
             
                        try:
         | 
| 73 | 
             
                            dt = n["end_time"]
         | 
| 74 | 
            -
                            if re.match(r"[0-9]{9,}", dt): | 
|  | |
| 75 | 
             
                            y, m, d = getYMD(dt)
         | 
| 76 | 
             
                            ed_dt.append(str(y))
         | 
| 77 | 
             
                            e["end_dt_kwd"] = str(y)
         | 
| @@ -80,7 +85,8 @@ def forEdu(cv): | |
| 80 | 
             
                    if n.get("start_time"):
         | 
| 81 | 
             
                        try:
         | 
| 82 | 
             
                            dt = n["start_time"]
         | 
| 83 | 
            -
                            if re.match(r"[0-9]{9,}", dt): | 
|  | |
| 84 | 
             
                            y, m, d = getYMD(dt)
         | 
| 85 | 
             
                            st_dt.append(str(y))
         | 
| 86 | 
             
                            e["start_dt_kwd"] = str(y)
         | 
| @@ -89,13 +95,20 @@ def forEdu(cv): | |
| 89 |  | 
| 90 | 
             
                    r = schools.select(n.get("school_name", ""))
         | 
| 91 | 
             
                    if r:
         | 
| 92 | 
            -
                        if str(r.get("type", "")) == "1": | 
| 93 | 
            -
             | 
| 94 | 
            -
                        if str(r.get(" | 
| 95 | 
            -
             | 
| 96 | 
            -
                        if str(r.get(" | 
| 97 | 
            -
             | 
| 98 | 
            -
                        if r.get(" | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 99 |  | 
| 100 | 
             
                    if n.get("school_name") and isinstance(n["school_name"], str):
         | 
| 101 | 
             
                        sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"]))
         | 
| @@ -106,22 +119,25 @@ def forEdu(cv): | |
| 106 | 
             
                        maj.append(n["discipline_name"])
         | 
| 107 | 
             
                        e["major_kwd"] = n["discipline_name"]
         | 
| 108 |  | 
| 109 | 
            -
                    if not n.get("degree") and "985" in fea and not first_fea: | 
|  | |
| 110 |  | 
| 111 | 
             
                    if n.get("degree"):
         | 
| 112 | 
             
                        d = degrees.get_name(n["degree"])
         | 
| 113 | 
            -
                        if d: | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 116 | 
            -
             | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 |  | 
| 120 | 
             
                        # for first degree
         | 
| 121 | 
             
                        if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]:
         | 
| 122 | 
             
                            fdeg = [d]
         | 
| 123 | 
            -
                            if n.get("school_name"): | 
| 124 | 
            -
             | 
|  | |
|  | |
| 125 | 
             
                            first_fea = copy.deepcopy(fea)
         | 
| 126 |  | 
| 127 | 
             
                    edu_nst.append(e)
         | 
| @@ -140,16 +156,26 @@ def forEdu(cv): | |
| 140 | 
             
                else:
         | 
| 141 | 
             
                    cv["sch_rank_kwd"].append("一般学校")
         | 
| 142 |  | 
| 143 | 
            -
                if edu_nst: | 
| 144 | 
            -
             | 
| 145 | 
            -
                if  | 
| 146 | 
            -
             | 
| 147 | 
            -
                if  | 
| 148 | 
            -
             | 
| 149 | 
            -
                if  | 
| 150 | 
            -
             | 
| 151 | 
            -
                if  | 
| 152 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 153 | 
             
                if deg:
         | 
| 154 | 
             
                    if "本科" in deg and "专科" in deg:
         | 
| 155 | 
             
                        deg.append("专升本")
         | 
| @@ -158,8 +184,10 @@ def forEdu(cv): | |
| 158 | 
             
                    cv["highest_degree_kwd"] = highest_degree(deg)
         | 
| 159 | 
             
                if edu_end_dt:
         | 
| 160 | 
             
                    try:
         | 
| 161 | 
            -
                        if re.match(r"[0-9]{9,}", edu_end_dt): | 
| 162 | 
            -
             | 
|  | |
|  | |
| 163 | 
             
                        y, m, d = getYMD(edu_end_dt)
         | 
| 164 | 
             
                        cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
         | 
| 165 | 
             
                    except Exception as e:
         | 
| @@ -171,7 +199,8 @@ def forEdu(cv): | |
| 171 | 
             
                            or not cv.get("degree_kwd"):
         | 
| 172 | 
             
                        for c in sch:
         | 
| 173 | 
             
                            if schools.is_good(c):
         | 
| 174 | 
            -
                                if "tag_kwd" not in cv: | 
|  | |
| 175 | 
             
                                cv["tag_kwd"].append("好学校")
         | 
| 176 | 
             
                                cv["tag_kwd"].append("好学历")
         | 
| 177 | 
             
                                break
         | 
| @@ -180,28 +209,39 @@ def forEdu(cv): | |
| 180 | 
             
                        any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
         | 
| 181 | 
             
                            or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
         | 
| 182 | 
             
                            or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
         | 
| 183 | 
            -
                        if "tag_kwd" not in cv: | 
| 184 | 
            -
             | 
| 185 | 
            -
             | 
| 186 | 
            -
             | 
| 187 | 
            -
             | 
| 188 | 
            -
                if cv.get(" | 
| 189 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 190 |  | 
| 191 | 
             
                return cv
         | 
| 192 |  | 
| 193 |  | 
| 194 | 
             
            def forProj(cv):
         | 
| 195 | 
            -
                if not cv.get("project_obj"): | 
|  | |
| 196 |  | 
| 197 | 
             
                pro_nms, desc = [], []
         | 
| 198 | 
             
                for i, n in enumerate(
         | 
| 199 | 
            -
                        sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if  | 
| 200 | 
             
                               reverse=True)):
         | 
| 201 | 
            -
                    if n.get("name"): | 
| 202 | 
            -
             | 
| 203 | 
            -
                    if n.get(" | 
| 204 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 205 |  | 
| 206 | 
             
                if pro_nms:
         | 
| 207 | 
             
                    # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms))
         | 
| @@ -233,15 +273,16 @@ def forWork(cv): | |
| 233 | 
             
                work_st_tm = ""
         | 
| 234 | 
             
                corp_tags = []
         | 
| 235 | 
             
                for i, n in enumerate(
         | 
| 236 | 
            -
                        sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if  | 
| 237 | 
             
                               reverse=True)):
         | 
| 238 | 
            -
                    if  | 
| 239 | 
             
                        try:
         | 
| 240 | 
             
                            n = json_loads(n)
         | 
| 241 | 
             
                        except Exception:
         | 
| 242 | 
             
                            continue
         | 
| 243 |  | 
| 244 | 
            -
                    if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): | 
|  | |
| 245 | 
             
                    for c in flds:
         | 
| 246 | 
             
                        if not n.get(c) or str(n[c]) == '0':
         | 
| 247 | 
             
                            fea[c].append("")
         | 
| @@ -262,14 +303,18 @@ def forWork(cv): | |
| 262 | 
             
                        fea[c].append(rmHtmlTag(str(n[c]).lower()))
         | 
| 263 |  | 
| 264 | 
             
                    y, m, d = getYMD(n.get("start_time"))
         | 
| 265 | 
            -
                    if not y or not m: | 
|  | |
| 266 | 
             
                    st = "%s-%02d-%02d" % (y, int(m), int(d))
         | 
| 267 | 
             
                    latest_job_tm = st
         | 
| 268 |  | 
| 269 | 
             
                    y, m, d = getYMD(n.get("end_time"))
         | 
| 270 | 
            -
                    if (not y or not m) and i > 0: | 
| 271 | 
            -
             | 
| 272 | 
            -
                    if not y or not m: | 
|  | |
|  | |
|  | |
| 273 | 
             
                    ed = "%s-%02d-%02d" % (y, int(m), int(d))
         | 
| 274 |  | 
| 275 | 
             
                    try:
         | 
| @@ -279,22 +324,28 @@ def forWork(cv): | |
| 279 |  | 
| 280 | 
             
                    if n.get("scale"):
         | 
| 281 | 
             
                        r = re.search(r"^([0-9]+)", str(n["scale"]))
         | 
| 282 | 
            -
                        if r: | 
|  | |
| 283 |  | 
| 284 | 
             
                if goodcorp:
         | 
| 285 | 
            -
                    if "tag_kwd" not in cv: | 
|  | |
| 286 | 
             
                    cv["tag_kwd"].append("好公司")
         | 
| 287 | 
             
                if goodcorp_:
         | 
| 288 | 
            -
                    if "tag_kwd" not in cv: | 
|  | |
| 289 | 
             
                    cv["tag_kwd"].append("好公司(曾)")
         | 
| 290 |  | 
| 291 | 
             
                if corp_tags:
         | 
| 292 | 
            -
                    if "tag_kwd" not in cv: | 
|  | |
| 293 | 
             
                    cv["tag_kwd"].extend(corp_tags)
         | 
| 294 | 
             
                    cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)]
         | 
| 295 |  | 
| 296 | 
            -
                if latest_job_tm: | 
| 297 | 
            -
             | 
|  | |
|  | |
| 298 |  | 
| 299 | 
             
                if fea["position_name"]:
         | 
| 300 | 
             
                    cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0])
         | 
| @@ -317,18 +368,23 @@ def forWork(cv): | |
| 317 | 
             
                    cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0])
         | 
| 318 | 
             
                    cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:]))
         | 
| 319 |  | 
| 320 | 
            -
                if fea["subordinates_count"]: | 
|  | |
| 321 | 
             
                                                                           re.match(r"[^0-9]+$", str(i))]
         | 
| 322 | 
            -
                if fea["subordinates_count"]: | 
|  | |
| 323 |  | 
| 324 | 
            -
                if  | 
| 325 | 
            -
             | 
|  | |
|  | |
| 326 | 
             
                for i in cv.get("corporation_id", []):
         | 
| 327 | 
             
                    cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0)
         | 
| 328 |  | 
| 329 | 
             
                if work_st_tm:
         | 
| 330 | 
             
                    try:
         | 
| 331 | 
            -
                        if re.match(r"[0-9]{9,}", work_st_tm): | 
|  | |
| 332 | 
             
                        y, m, d = getYMD(work_st_tm)
         | 
| 333 | 
             
                        cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
         | 
| 334 | 
             
                    except Exception as e:
         | 
| @@ -339,28 +395,37 @@ def forWork(cv): | |
| 339 | 
             
                    cv["dua_flt"] = np.mean(duas)
         | 
| 340 | 
             
                    cv["cur_dua_int"] = duas[0]
         | 
| 341 | 
             
                    cv["job_num_int"] = len(duas)
         | 
| 342 | 
            -
                if scales: | 
|  | |
| 343 | 
             
                return cv
         | 
| 344 |  | 
| 345 |  | 
| 346 | 
             
            def turnTm2Dt(b):
         | 
| 347 | 
            -
                if not b: | 
|  | |
| 348 | 
             
                b = str(b).strip()
         | 
| 349 | 
            -
                if re.match(r"[0-9]{10,}", b): | 
|  | |
| 350 | 
             
                return b
         | 
| 351 |  | 
| 352 |  | 
| 353 | 
             
            def getYMD(b):
         | 
| 354 | 
             
                y, m, d = "", "", "01"
         | 
| 355 | 
            -
                if not b: | 
|  | |
| 356 | 
             
                b = turnTm2Dt(b)
         | 
| 357 | 
            -
                if re.match(r"[0-9]{4}", b): | 
|  | |
| 358 | 
             
                r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b)
         | 
| 359 | 
            -
                if r: | 
|  | |
| 360 | 
             
                r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b)
         | 
| 361 | 
            -
                if r: | 
| 362 | 
            -
             | 
| 363 | 
            -
                if not  | 
|  | |
|  | |
|  | |
| 364 | 
             
                return (y, m, d)
         | 
| 365 |  | 
| 366 |  | 
| @@ -369,7 +434,8 @@ def birth(cv): | |
| 369 | 
             
                    cv["integerity_flt"] *= 0.9
         | 
| 370 | 
             
                    return cv
         | 
| 371 | 
             
                y, m, d = getYMD(cv["birth"])
         | 
| 372 | 
            -
                if not m or not y: | 
|  | |
| 373 | 
             
                b = "%s-%02d-%02d" % (y, int(m), int(d))
         | 
| 374 | 
             
                cv["birth_dt"] = b
         | 
| 375 | 
             
                cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d))
         | 
| @@ -380,7 +446,8 @@ def birth(cv): | |
| 380 |  | 
| 381 | 
             
            def parse(cv):
         | 
| 382 | 
             
                for k in cv.keys():
         | 
| 383 | 
            -
                    if cv[k] == '\\N': | 
|  | |
| 384 | 
             
                # cv = cv.asDict()
         | 
| 385 | 
             
                tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names",
         | 
| 386 | 
             
                           "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name",
         | 
| @@ -402,9 +469,12 @@ def parse(cv): | |
| 402 |  | 
| 403 | 
             
                rmkeys = []
         | 
| 404 | 
             
                for k in cv.keys():
         | 
| 405 | 
            -
                    if cv[k] is None: | 
| 406 | 
            -
             | 
| 407 | 
            -
             | 
|  | |
|  | |
|  | |
| 408 |  | 
| 409 | 
             
                integerity = 0.
         | 
| 410 | 
             
                flds_num = 0.
         | 
| @@ -414,7 +484,8 @@ def parse(cv): | |
| 414 | 
             
                    flds_num += len(flds)
         | 
| 415 | 
             
                    for f in flds:
         | 
| 416 | 
             
                        v = str(cv.get(f, ""))
         | 
| 417 | 
            -
                        if len(v) > 0 and v != '0' and v != '[]': | 
|  | |
| 418 |  | 
| 419 | 
             
                hasValues(tks_fld)
         | 
| 420 | 
             
                hasValues(small_tks_fld)
         | 
| @@ -433,7 +504,8 @@ def parse(cv): | |
| 433 | 
             
                                 (r"[ ()\(\)人/·0-9-]+", ""),
         | 
| 434 | 
             
                                 (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]:
         | 
| 435 | 
             
                        cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE)
         | 
| 436 | 
            -
                    if len(cv["corporation_type"]) < 2: | 
|  | |
| 437 |  | 
| 438 | 
             
                if cv.get("political_status"):
         | 
| 439 | 
             
                    for p, r in [
         | 
| @@ -441,9 +513,11 @@ def parse(cv): | |
| 441 | 
             
                        (r".*(无党派|公民).*", "群众"),
         | 
| 442 | 
             
                        (r".*团员.*", "团员")]:
         | 
| 443 | 
             
                        cv["political_status"] = re.sub(p, r, cv["political_status"])
         | 
| 444 | 
            -
                    if not re.search(r"[党团群]", cv["political_status"]): | 
|  | |
| 445 |  | 
| 446 | 
            -
                if cv.get("phone"): | 
|  | |
| 447 |  | 
| 448 | 
             
                keys = list(cv.keys())
         | 
| 449 | 
             
                for k in keys:
         | 
| @@ -454,9 +528,11 @@ def parse(cv): | |
| 454 | 
             
                            cv[k] = [a for _, a in cv[k].items()]
         | 
| 455 | 
             
                            nms = []
         | 
| 456 | 
             
                            for n in cv[k]:
         | 
| 457 | 
            -
                                if  | 
|  | |
| 458 | 
             
                                n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower()
         | 
| 459 | 
            -
                                if not n["name"]: | 
|  | |
| 460 | 
             
                                nms.append(n["name"])
         | 
| 461 | 
             
                            if nms:
         | 
| 462 | 
             
                                t = k[:-4]
         | 
| @@ -469,15 +545,18 @@ def parse(cv): | |
| 469 | 
             
                    # tokenize fields
         | 
| 470 | 
             
                    if k in tks_fld:
         | 
| 471 | 
             
                        cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k])
         | 
| 472 | 
            -
                        if k in small_tks_fld: | 
|  | |
| 473 |  | 
| 474 | 
             
                    # keyword fields
         | 
| 475 | 
            -
                    if k in kwd_fld: | 
|  | |
| 476 | 
             
                                                       for n in re.split(r"[\t,,;;. ]",
         | 
| 477 | 
             
                                                                         re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k])
         | 
| 478 | 
             
                                                                         ) if n]
         | 
| 479 |  | 
| 480 | 
            -
                    if k in num_fld and cv.get(k): | 
|  | |
| 481 |  | 
| 482 | 
             
                cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "")
         | 
| 483 | 
             
                # for name field
         | 
| @@ -501,10 +580,12 @@ def parse(cv): | |
| 501 | 
             
                    cv["name_py_pref0_tks"] = ""
         | 
| 502 | 
             
                    cv["name_py_pref_tks"] = ""
         | 
| 503 | 
             
                    for py in PY.get_pinyins(nm[:20], ''):
         | 
| 504 | 
            -
                        for i in range(2, len(py) + 1): | 
|  | |
| 505 | 
             
                    for py in PY.get_pinyins(nm[:20], ' '):
         | 
| 506 | 
             
                        py = py.split()
         | 
| 507 | 
            -
                        for i in range(1, len(py) + 1): | 
|  | |
| 508 |  | 
| 509 | 
             
                    cv["name_kwd"] = name
         | 
| 510 | 
             
                    cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3]
         | 
| @@ -526,22 +607,30 @@ def parse(cv): | |
| 526 | 
             
                    cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S')
         | 
| 527 | 
             
                else:
         | 
| 528 | 
             
                    y, m, d = getYMD(str(cv.get("updated_at", "")))
         | 
| 529 | 
            -
                    if not y: | 
| 530 | 
            -
             | 
| 531 | 
            -
                    if not  | 
|  | |
|  | |
|  | |
| 532 | 
             
                    cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
         | 
| 533 | 
             
                    # long text tokenize
         | 
| 534 |  | 
| 535 | 
            -
                if cv.get("responsibilities"): | 
|  | |
| 536 |  | 
| 537 | 
             
                # for yes or no field
         | 
| 538 | 
             
                fea = []
         | 
| 539 | 
             
                for f, y, n in is_fld:
         | 
| 540 | 
            -
                    if f not in cv: | 
| 541 | 
            -
             | 
| 542 | 
            -
                    if cv[f] == ' | 
|  | |
|  | |
|  | |
| 543 |  | 
| 544 | 
            -
                if fea: | 
|  | |
| 545 |  | 
| 546 | 
             
                cv = forEdu(cv)
         | 
| 547 | 
             
                cv = forProj(cv)
         | 
| @@ -550,9 +639,11 @@ def parse(cv): | |
| 550 |  | 
| 551 | 
             
                cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])]
         | 
| 552 | 
             
                for i in range(len(cv["corp_proj_sch_deg_kwd"])):
         | 
| 553 | 
            -
                    for j in cv.get("sch_rank_kwd", []): | 
|  | |
| 554 | 
             
                for i in range(len(cv["corp_proj_sch_deg_kwd"])):
         | 
| 555 | 
            -
                    if cv.get("highest_degree_kwd"): | 
|  | |
| 556 |  | 
| 557 | 
             
                try:
         | 
| 558 | 
             
                    if not cv.get("work_exp_flt") and cv.get("work_start_time"):
         | 
| @@ -565,17 +656,21 @@ def parse(cv): | |
| 565 | 
             
                            cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
         | 
| 566 | 
             
                except Exception as e:
         | 
| 567 | 
             
                    logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
         | 
| 568 | 
            -
                if "work_exp_flt" not in cv and cv.get("work_experience", 0): | 
|  | |
| 569 |  | 
| 570 | 
             
                keys = list(cv.keys())
         | 
| 571 | 
             
                for k in keys:
         | 
| 572 | 
            -
                    if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): | 
|  | |
| 573 | 
             
                for k in cv.keys():
         | 
| 574 | 
            -
                    if not re.search("_(kwd|id)$", k) or  | 
|  | |
| 575 | 
             
                    cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']]))
         | 
| 576 | 
             
                keys = [k for k in cv.keys() if re.search(r"_feas*$", k)]
         | 
| 577 | 
             
                for k in keys:
         | 
| 578 | 
            -
                    if cv[k] <= 0: | 
|  | |
| 579 |  | 
| 580 | 
             
                cv["tob_resume_id"] = str(cv["tob_resume_id"])
         | 
| 581 | 
             
                cv["id"] = cv["tob_resume_id"]
         | 
| @@ -592,5 +687,6 @@ def dealWithInt64(d): | |
| 592 | 
             
                if isinstance(d, list):
         | 
| 593 | 
             
                    d = [dealWithInt64(t) for t in d]
         | 
| 594 |  | 
| 595 | 
            -
                if isinstance(d, np.integer): | 
|  | |
| 596 | 
             
                return d
         | 
|  | |
| 25 | 
             
            from contextlib import contextmanager
         | 
| 26 |  | 
| 27 |  | 
| 28 | 
            +
            class TimeoutException(Exception):
         | 
| 29 | 
            +
                pass
         | 
| 30 |  | 
| 31 |  | 
| 32 | 
             
            @contextmanager
         | 
|  | |
| 51 |  | 
| 52 |  | 
| 53 | 
             
            def highest_degree(dg):
         | 
| 54 | 
            +
                if not dg:
         | 
| 55 | 
            +
                    return ""
         | 
| 56 | 
            +
                if isinstance(dg, str):
         | 
| 57 | 
            +
                    dg = [dg]
         | 
| 58 | 
             
                m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8}
         | 
| 59 | 
             
                return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0]
         | 
| 60 |  | 
|  | |
| 71 | 
             
                for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))):
         | 
| 72 | 
             
                    e = {}
         | 
| 73 | 
             
                    if n.get("end_time"):
         | 
| 74 | 
            +
                        if n["end_time"] > edu_end_dt:
         | 
| 75 | 
            +
                            edu_end_dt = n["end_time"]
         | 
| 76 | 
             
                        try:
         | 
| 77 | 
             
                            dt = n["end_time"]
         | 
| 78 | 
            +
                            if re.match(r"[0-9]{9,}", dt):
         | 
| 79 | 
            +
                                dt = turnTm2Dt(dt)
         | 
| 80 | 
             
                            y, m, d = getYMD(dt)
         | 
| 81 | 
             
                            ed_dt.append(str(y))
         | 
| 82 | 
             
                            e["end_dt_kwd"] = str(y)
         | 
|  | |
| 85 | 
             
                    if n.get("start_time"):
         | 
| 86 | 
             
                        try:
         | 
| 87 | 
             
                            dt = n["start_time"]
         | 
| 88 | 
            +
                            if re.match(r"[0-9]{9,}", dt):
         | 
| 89 | 
            +
                                dt = turnTm2Dt(dt)
         | 
| 90 | 
             
                            y, m, d = getYMD(dt)
         | 
| 91 | 
             
                            st_dt.append(str(y))
         | 
| 92 | 
             
                            e["start_dt_kwd"] = str(y)
         | 
|  | |
| 95 |  | 
| 96 | 
             
                    r = schools.select(n.get("school_name", ""))
         | 
| 97 | 
             
                    if r:
         | 
| 98 | 
            +
                        if str(r.get("type", "")) == "1":
         | 
| 99 | 
            +
                            fea.append("211")
         | 
| 100 | 
            +
                        if str(r.get("type", "")) == "2":
         | 
| 101 | 
            +
                            fea.append("211")
         | 
| 102 | 
            +
                        if str(r.get("is_abroad", "")) == "1":
         | 
| 103 | 
            +
                            fea.append("留学")
         | 
| 104 | 
            +
                        if str(r.get("is_double_first", "")) == "1":
         | 
| 105 | 
            +
                            fea.append("双一流")
         | 
| 106 | 
            +
                        if str(r.get("is_985", "")) == "1":
         | 
| 107 | 
            +
                            fea.append("985")
         | 
| 108 | 
            +
                        if str(r.get("is_world_known", "")) == "1":
         | 
| 109 | 
            +
                            fea.append("海外知名")
         | 
| 110 | 
            +
                        if r.get("rank") and cv["school_rank_int"] > r["rank"]:
         | 
| 111 | 
            +
                            cv["school_rank_int"] = r["rank"]
         | 
| 112 |  | 
| 113 | 
             
                    if n.get("school_name") and isinstance(n["school_name"], str):
         | 
| 114 | 
             
                        sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"]))
         | 
|  | |
| 119 | 
             
                        maj.append(n["discipline_name"])
         | 
| 120 | 
             
                        e["major_kwd"] = n["discipline_name"]
         | 
| 121 |  | 
| 122 | 
            +
                    if not n.get("degree") and "985" in fea and not first_fea:
         | 
| 123 | 
            +
                        n["degree"] = "1"
         | 
| 124 |  | 
| 125 | 
             
                    if n.get("degree"):
         | 
| 126 | 
             
                        d = degrees.get_name(n["degree"])
         | 
| 127 | 
            +
                        if d:
         | 
| 128 | 
            +
                            e["degree_kwd"] = d
         | 
| 129 | 
            +
                        if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", n.get("school_name",""))):
         | 
| 130 | 
            +
                            d = "专升本"
         | 
| 131 | 
            +
                        if d:
         | 
| 132 | 
            +
                            deg.append(d)
         | 
| 133 |  | 
| 134 | 
             
                        # for first degree
         | 
| 135 | 
             
                        if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]:
         | 
| 136 | 
             
                            fdeg = [d]
         | 
| 137 | 
            +
                            if n.get("school_name"):
         | 
| 138 | 
            +
                                fsch = [n["school_name"]]
         | 
| 139 | 
            +
                            if n.get("discipline_name"):
         | 
| 140 | 
            +
                                fmaj = [n["discipline_name"]]
         | 
| 141 | 
             
                            first_fea = copy.deepcopy(fea)
         | 
| 142 |  | 
| 143 | 
             
                    edu_nst.append(e)
         | 
|  | |
| 156 | 
             
                else:
         | 
| 157 | 
             
                    cv["sch_rank_kwd"].append("一般学校")
         | 
| 158 |  | 
| 159 | 
            +
                if edu_nst:
         | 
| 160 | 
            +
                    cv["edu_nst"] = edu_nst
         | 
| 161 | 
            +
                if fea:
         | 
| 162 | 
            +
                    cv["edu_fea_kwd"] = list(set(fea))
         | 
| 163 | 
            +
                if first_fea:
         | 
| 164 | 
            +
                    cv["edu_first_fea_kwd"] = list(set(first_fea))
         | 
| 165 | 
            +
                if maj:
         | 
| 166 | 
            +
                    cv["major_kwd"] = maj
         | 
| 167 | 
            +
                if fsch:
         | 
| 168 | 
            +
                    cv["first_school_name_kwd"] = fsch
         | 
| 169 | 
            +
                if fdeg:
         | 
| 170 | 
            +
                    cv["first_degree_kwd"] = fdeg
         | 
| 171 | 
            +
                if fmaj:
         | 
| 172 | 
            +
                    cv["first_major_kwd"] = fmaj
         | 
| 173 | 
            +
                if st_dt:
         | 
| 174 | 
            +
                    cv["edu_start_kwd"] = st_dt
         | 
| 175 | 
            +
                if ed_dt:
         | 
| 176 | 
            +
                    cv["edu_end_kwd"] = ed_dt
         | 
| 177 | 
            +
                if ed_dt:
         | 
| 178 | 
            +
                    cv["edu_end_int"] = max([int(t) for t in ed_dt])
         | 
| 179 | 
             
                if deg:
         | 
| 180 | 
             
                    if "本科" in deg and "专科" in deg:
         | 
| 181 | 
             
                        deg.append("专升本")
         | 
|  | |
| 184 | 
             
                    cv["highest_degree_kwd"] = highest_degree(deg)
         | 
| 185 | 
             
                if edu_end_dt:
         | 
| 186 | 
             
                    try:
         | 
| 187 | 
            +
                        if re.match(r"[0-9]{9,}", edu_end_dt):
         | 
| 188 | 
            +
                            edu_end_dt = turnTm2Dt(edu_end_dt)
         | 
| 189 | 
            +
                        if edu_end_dt.strip("\n") == "至今":
         | 
| 190 | 
            +
                            edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
         | 
| 191 | 
             
                        y, m, d = getYMD(edu_end_dt)
         | 
| 192 | 
             
                        cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
         | 
| 193 | 
             
                    except Exception as e:
         | 
|  | |
| 199 | 
             
                            or not cv.get("degree_kwd"):
         | 
| 200 | 
             
                        for c in sch:
         | 
| 201 | 
             
                            if schools.is_good(c):
         | 
| 202 | 
            +
                                if "tag_kwd" not in cv:
         | 
| 203 | 
            +
                                    cv["tag_kwd"] = []
         | 
| 204 | 
             
                                cv["tag_kwd"].append("好学校")
         | 
| 205 | 
             
                                cv["tag_kwd"].append("好学历")
         | 
| 206 | 
             
                                break
         | 
|  | |
| 209 | 
             
                        any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
         | 
| 210 | 
             
                            or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
         | 
| 211 | 
             
                            or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
         | 
| 212 | 
            +
                        if "tag_kwd" not in cv:
         | 
| 213 | 
            +
                            cv["tag_kwd"] = []
         | 
| 214 | 
            +
                        if "好学历" not in cv["tag_kwd"]:
         | 
| 215 | 
            +
                            cv["tag_kwd"].append("好学历")
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                if cv.get("major_kwd"):
         | 
| 218 | 
            +
                    cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj))
         | 
| 219 | 
            +
                if cv.get("school_name_kwd"):
         | 
| 220 | 
            +
                    cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch))
         | 
| 221 | 
            +
                if cv.get("first_school_name_kwd"):
         | 
| 222 | 
            +
                    cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch))
         | 
| 223 | 
            +
                if cv.get("first_major_kwd"):
         | 
| 224 | 
            +
                    cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj))
         | 
| 225 |  | 
| 226 | 
             
                return cv
         | 
| 227 |  | 
| 228 |  | 
| 229 | 
             
            def forProj(cv):
         | 
| 230 | 
            +
                if not cv.get("project_obj"):
         | 
| 231 | 
            +
                    return cv
         | 
| 232 |  | 
| 233 | 
             
                pro_nms, desc = [], []
         | 
| 234 | 
             
                for i, n in enumerate(
         | 
| 235 | 
            +
                        sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if isinstance(x, dict) else "",
         | 
| 236 | 
             
                               reverse=True)):
         | 
| 237 | 
            +
                    if n.get("name"):
         | 
| 238 | 
            +
                        pro_nms.append(n["name"])
         | 
| 239 | 
            +
                    if n.get("describe"):
         | 
| 240 | 
            +
                        desc.append(str(n["describe"]))
         | 
| 241 | 
            +
                    if n.get("responsibilities"):
         | 
| 242 | 
            +
                        desc.append(str(n["responsibilities"]))
         | 
| 243 | 
            +
                    if n.get("achivement"):
         | 
| 244 | 
            +
                        desc.append(str(n["achivement"]))
         | 
| 245 |  | 
| 246 | 
             
                if pro_nms:
         | 
| 247 | 
             
                    # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms))
         | 
|  | |
| 273 | 
             
                work_st_tm = ""
         | 
| 274 | 
             
                corp_tags = []
         | 
| 275 | 
             
                for i, n in enumerate(
         | 
| 276 | 
            +
                        sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if isinstance(x, dict) else "",
         | 
| 277 | 
             
                               reverse=True)):
         | 
| 278 | 
            +
                    if isinstance(n, str):
         | 
| 279 | 
             
                        try:
         | 
| 280 | 
             
                            n = json_loads(n)
         | 
| 281 | 
             
                        except Exception:
         | 
| 282 | 
             
                            continue
         | 
| 283 |  | 
| 284 | 
            +
                    if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm):
         | 
| 285 | 
            +
                        work_st_tm = n["start_time"]
         | 
| 286 | 
             
                    for c in flds:
         | 
| 287 | 
             
                        if not n.get(c) or str(n[c]) == '0':
         | 
| 288 | 
             
                            fea[c].append("")
         | 
|  | |
| 303 | 
             
                        fea[c].append(rmHtmlTag(str(n[c]).lower()))
         | 
| 304 |  | 
| 305 | 
             
                    y, m, d = getYMD(n.get("start_time"))
         | 
| 306 | 
            +
                    if not y or not m:
         | 
| 307 | 
            +
                        continue
         | 
| 308 | 
             
                    st = "%s-%02d-%02d" % (y, int(m), int(d))
         | 
| 309 | 
             
                    latest_job_tm = st
         | 
| 310 |  | 
| 311 | 
             
                    y, m, d = getYMD(n.get("end_time"))
         | 
| 312 | 
            +
                    if (not y or not m) and i > 0:
         | 
| 313 | 
            +
                        continue
         | 
| 314 | 
            +
                    if not y or not m or int(y) > 2022:
         | 
| 315 | 
            +
                        y, m, d = getYMD(str(n.get("updated_at", "")))
         | 
| 316 | 
            +
                    if not y or not m:
         | 
| 317 | 
            +
                        continue
         | 
| 318 | 
             
                    ed = "%s-%02d-%02d" % (y, int(m), int(d))
         | 
| 319 |  | 
| 320 | 
             
                    try:
         | 
|  | |
| 324 |  | 
| 325 | 
             
                    if n.get("scale"):
         | 
| 326 | 
             
                        r = re.search(r"^([0-9]+)", str(n["scale"]))
         | 
| 327 | 
            +
                        if r:
         | 
| 328 | 
            +
                            scales.append(int(r.group(1)))
         | 
| 329 |  | 
| 330 | 
             
                if goodcorp:
         | 
| 331 | 
            +
                    if "tag_kwd" not in cv:
         | 
| 332 | 
            +
                        cv["tag_kwd"] = []
         | 
| 333 | 
             
                    cv["tag_kwd"].append("好公司")
         | 
| 334 | 
             
                if goodcorp_:
         | 
| 335 | 
            +
                    if "tag_kwd" not in cv:
         | 
| 336 | 
            +
                        cv["tag_kwd"] = []
         | 
| 337 | 
             
                    cv["tag_kwd"].append("好公司(曾)")
         | 
| 338 |  | 
| 339 | 
             
                if corp_tags:
         | 
| 340 | 
            +
                    if "tag_kwd" not in cv:
         | 
| 341 | 
            +
                        cv["tag_kwd"] = []
         | 
| 342 | 
             
                    cv["tag_kwd"].extend(corp_tags)
         | 
| 343 | 
             
                    cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)]
         | 
| 344 |  | 
| 345 | 
            +
                if latest_job_tm:
         | 
| 346 | 
            +
                    cv["latest_job_dt"] = latest_job_tm
         | 
| 347 | 
            +
                if fea["corporation_id"]:
         | 
| 348 | 
            +
                    cv["corporation_id"] = fea["corporation_id"]
         | 
| 349 |  | 
| 350 | 
             
                if fea["position_name"]:
         | 
| 351 | 
             
                    cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0])
         | 
|  | |
| 368 | 
             
                    cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0])
         | 
| 369 | 
             
                    cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:]))
         | 
| 370 |  | 
| 371 | 
            +
                if fea["subordinates_count"]:
         | 
| 372 | 
            +
                    fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
         | 
| 373 | 
             
                                                                           re.match(r"[^0-9]+$", str(i))]
         | 
| 374 | 
            +
                if fea["subordinates_count"]:
         | 
| 375 | 
            +
                    cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])
         | 
| 376 |  | 
| 377 | 
            +
                if isinstance(cv.get("corporation_id"), int):
         | 
| 378 | 
            +
                    cv["corporation_id"] = [str(cv["corporation_id"])]
         | 
| 379 | 
            +
                if not cv.get("corporation_id"):
         | 
| 380 | 
            +
                    cv["corporation_id"] = []
         | 
| 381 | 
             
                for i in cv.get("corporation_id", []):
         | 
| 382 | 
             
                    cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0)
         | 
| 383 |  | 
| 384 | 
             
                if work_st_tm:
         | 
| 385 | 
             
                    try:
         | 
| 386 | 
            +
                        if re.match(r"[0-9]{9,}", work_st_tm):
         | 
| 387 | 
            +
                            work_st_tm = turnTm2Dt(work_st_tm)
         | 
| 388 | 
             
                        y, m, d = getYMD(work_st_tm)
         | 
| 389 | 
             
                        cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
         | 
| 390 | 
             
                    except Exception as e:
         | 
|  | |
| 395 | 
             
                    cv["dua_flt"] = np.mean(duas)
         | 
| 396 | 
             
                    cv["cur_dua_int"] = duas[0]
         | 
| 397 | 
             
                    cv["job_num_int"] = len(duas)
         | 
| 398 | 
            +
                if scales:
         | 
| 399 | 
            +
                    cv["scale_flt"] = np.max(scales)
         | 
| 400 | 
             
                return cv
         | 
| 401 |  | 
| 402 |  | 
| 403 | 
             
            def turnTm2Dt(b):
         | 
| 404 | 
            +
                if not b:
         | 
| 405 | 
            +
                    return
         | 
| 406 | 
             
                b = str(b).strip()
         | 
| 407 | 
            +
                if re.match(r"[0-9]{10,}", b):
         | 
| 408 | 
            +
                    b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
         | 
| 409 | 
             
                return b
         | 
| 410 |  | 
| 411 |  | 
| 412 | 
             
            def getYMD(b):
         | 
| 413 | 
             
                y, m, d = "", "", "01"
         | 
| 414 | 
            +
                if not b:
         | 
| 415 | 
            +
                    return (y, m, d)
         | 
| 416 | 
             
                b = turnTm2Dt(b)
         | 
| 417 | 
            +
                if re.match(r"[0-9]{4}", b):
         | 
| 418 | 
            +
                    y = int(b[:4])
         | 
| 419 | 
             
                r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b)
         | 
| 420 | 
            +
                if r:
         | 
| 421 | 
            +
                    m = r.group(1)
         | 
| 422 | 
             
                r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b)
         | 
| 423 | 
            +
                if r:
         | 
| 424 | 
            +
                    d = r.group(1)
         | 
| 425 | 
            +
                if not d or int(d) == 0 or int(d) > 31:
         | 
| 426 | 
            +
                    d = "1"
         | 
| 427 | 
            +
                if not m or int(m) > 12 or int(m) < 1:
         | 
| 428 | 
            +
                    m = "1"
         | 
| 429 | 
             
                return (y, m, d)
         | 
| 430 |  | 
| 431 |  | 
|  | |
| 434 | 
             
                    cv["integerity_flt"] *= 0.9
         | 
| 435 | 
             
                    return cv
         | 
| 436 | 
             
                y, m, d = getYMD(cv["birth"])
         | 
| 437 | 
            +
                if not m or not y:
         | 
| 438 | 
            +
                    return cv
         | 
| 439 | 
             
                b = "%s-%02d-%02d" % (y, int(m), int(d))
         | 
| 440 | 
             
                cv["birth_dt"] = b
         | 
| 441 | 
             
                cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d))
         | 
|  | |
| 446 |  | 
| 447 | 
             
            def parse(cv):
         | 
| 448 | 
             
                for k in cv.keys():
         | 
| 449 | 
            +
                    if cv[k] == '\\N':
         | 
| 450 | 
            +
                        cv[k] = ''
         | 
| 451 | 
             
                # cv = cv.asDict()
         | 
| 452 | 
             
                tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names",
         | 
| 453 | 
             
                           "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name",
         | 
|  | |
| 469 |  | 
| 470 | 
             
                rmkeys = []
         | 
| 471 | 
             
                for k in cv.keys():
         | 
| 472 | 
            +
                    if cv[k] is None:
         | 
| 473 | 
            +
                        rmkeys.append(k)
         | 
| 474 | 
            +
                    if (isinstance(cv[k], list) or isinstance(cv[k], str)) and len(cv[k]) == 0:
         | 
| 475 | 
            +
                        rmkeys.append(k)
         | 
| 476 | 
            +
                for k in rmkeys:
         | 
| 477 | 
            +
                    del cv[k]
         | 
| 478 |  | 
| 479 | 
             
                integerity = 0.
         | 
| 480 | 
             
                flds_num = 0.
         | 
|  | |
| 484 | 
             
                    flds_num += len(flds)
         | 
| 485 | 
             
                    for f in flds:
         | 
| 486 | 
             
                        v = str(cv.get(f, ""))
         | 
| 487 | 
            +
                        if len(v) > 0 and v != '0' and v != '[]':
         | 
| 488 | 
            +
                            integerity += 1
         | 
| 489 |  | 
| 490 | 
             
                hasValues(tks_fld)
         | 
| 491 | 
             
                hasValues(small_tks_fld)
         | 
|  | |
| 504 | 
             
                                 (r"[ ()\(\)人/·0-9-]+", ""),
         | 
| 505 | 
             
                                 (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]:
         | 
| 506 | 
             
                        cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE)
         | 
| 507 | 
            +
                    if len(cv["corporation_type"]) < 2:
         | 
| 508 | 
            +
                        del cv["corporation_type"]
         | 
| 509 |  | 
| 510 | 
             
                if cv.get("political_status"):
         | 
| 511 | 
             
                    for p, r in [
         | 
|  | |
| 513 | 
             
                        (r".*(无党派|公民).*", "群众"),
         | 
| 514 | 
             
                        (r".*团员.*", "团员")]:
         | 
| 515 | 
             
                        cv["political_status"] = re.sub(p, r, cv["political_status"])
         | 
| 516 | 
            +
                    if not re.search(r"[党团群]", cv["political_status"]):
         | 
| 517 | 
            +
                        del cv["political_status"]
         | 
| 518 |  | 
| 519 | 
            +
                if cv.get("phone"):
         | 
| 520 | 
            +
                    cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))
         | 
| 521 |  | 
| 522 | 
             
                keys = list(cv.keys())
         | 
| 523 | 
             
                for k in keys:
         | 
|  | |
| 528 | 
             
                            cv[k] = [a for _, a in cv[k].items()]
         | 
| 529 | 
             
                            nms = []
         | 
| 530 | 
             
                            for n in cv[k]:
         | 
| 531 | 
            +
                                if not isinstance(n, dict) or "name" not in n or not n.get("name"):
         | 
| 532 | 
            +
                                    continue
         | 
| 533 | 
             
                                n["name"] = re.sub(r"((442)|\t )", "", n["name"]).strip().lower()
         | 
| 534 | 
            +
                                if not n["name"]:
         | 
| 535 | 
            +
                                    continue
         | 
| 536 | 
             
                                nms.append(n["name"])
         | 
| 537 | 
             
                            if nms:
         | 
| 538 | 
             
                                t = k[:-4]
         | 
|  | |
| 545 | 
             
                    # tokenize fields
         | 
| 546 | 
             
                    if k in tks_fld:
         | 
| 547 | 
             
                        cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k])
         | 
| 548 | 
            +
                        if k in small_tks_fld:
         | 
| 549 | 
            +
                            cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"])
         | 
| 550 |  | 
| 551 | 
             
                    # keyword fields
         | 
| 552 | 
            +
                    if k in kwd_fld:
         | 
| 553 | 
            +
                        cv[f"{k}_kwd"] = [n.lower()
         | 
| 554 | 
             
                                                       for n in re.split(r"[\t,,;;. ]",
         | 
| 555 | 
             
                                                                         re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1,\2", cv[k])
         | 
| 556 | 
             
                                                                         ) if n]
         | 
| 557 |  | 
| 558 | 
            +
                    if k in num_fld and cv.get(k):
         | 
| 559 | 
            +
                        cv[f"{k}_int"] = cv[k]
         | 
| 560 |  | 
| 561 | 
             
                cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "")
         | 
| 562 | 
             
                # for name field
         | 
|  | |
| 580 | 
             
                    cv["name_py_pref0_tks"] = ""
         | 
| 581 | 
             
                    cv["name_py_pref_tks"] = ""
         | 
| 582 | 
             
                    for py in PY.get_pinyins(nm[:20], ''):
         | 
| 583 | 
            +
                        for i in range(2, len(py) + 1):
         | 
| 584 | 
            +
                            cv["name_py_pref_tks"] += " " + py[:i]
         | 
| 585 | 
             
                    for py in PY.get_pinyins(nm[:20], ' '):
         | 
| 586 | 
             
                        py = py.split()
         | 
| 587 | 
            +
                        for i in range(1, len(py) + 1):
         | 
| 588 | 
            +
                            cv["name_py_pref0_tks"] += " " + "".join(py[:i])
         | 
| 589 |  | 
| 590 | 
             
                    cv["name_kwd"] = name
         | 
| 591 | 
             
                    cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3]
         | 
|  | |
| 607 | 
             
                    cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S')
         | 
| 608 | 
             
                else:
         | 
| 609 | 
             
                    y, m, d = getYMD(str(cv.get("updated_at", "")))
         | 
| 610 | 
            +
                    if not y:
         | 
| 611 | 
            +
                        y = "2012"
         | 
| 612 | 
            +
                    if not m:
         | 
| 613 | 
            +
                        m = "01"
         | 
| 614 | 
            +
                    if not d:
         | 
| 615 | 
            +
                        d = "01"
         | 
| 616 | 
             
                    cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
         | 
| 617 | 
             
                    # long text tokenize
         | 
| 618 |  | 
| 619 | 
            +
                if cv.get("responsibilities"):
         | 
| 620 | 
            +
                    cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
         | 
| 621 |  | 
| 622 | 
             
                # for yes or no field
         | 
| 623 | 
             
                fea = []
         | 
| 624 | 
             
                for f, y, n in is_fld:
         | 
| 625 | 
            +
                    if f not in cv:
         | 
| 626 | 
            +
                        continue
         | 
| 627 | 
            +
                    if cv[f] == '是':
         | 
| 628 | 
            +
                        fea.append(y)
         | 
| 629 | 
            +
                    if cv[f] == '否':
         | 
| 630 | 
            +
                        fea.append(n)
         | 
| 631 |  | 
| 632 | 
            +
                if fea:
         | 
| 633 | 
            +
                    cv["tag_kwd"] = fea
         | 
| 634 |  | 
| 635 | 
             
                cv = forEdu(cv)
         | 
| 636 | 
             
                cv = forProj(cv)
         | 
|  | |
| 639 |  | 
| 640 | 
             
                cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])]
         | 
| 641 | 
             
                for i in range(len(cv["corp_proj_sch_deg_kwd"])):
         | 
| 642 | 
            +
                    for j in cv.get("sch_rank_kwd", []):
         | 
| 643 | 
            +
                        cv["corp_proj_sch_deg_kwd"][i] += "+" + j
         | 
| 644 | 
             
                for i in range(len(cv["corp_proj_sch_deg_kwd"])):
         | 
| 645 | 
            +
                    if cv.get("highest_degree_kwd"):
         | 
| 646 | 
            +
                        cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]
         | 
| 647 |  | 
| 648 | 
             
                try:
         | 
| 649 | 
             
                    if not cv.get("work_exp_flt") and cv.get("work_start_time"):
         | 
|  | |
| 656 | 
             
                            cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
         | 
| 657 | 
             
                except Exception as e:
         | 
| 658 | 
             
                    logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
         | 
| 659 | 
            +
                if "work_exp_flt" not in cv and cv.get("work_experience", 0):
         | 
| 660 | 
            +
                    cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
         | 
| 661 |  | 
| 662 | 
             
                keys = list(cv.keys())
         | 
| 663 | 
             
                for k in keys:
         | 
| 664 | 
            +
                    if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k):
         | 
| 665 | 
            +
                        del cv[k]
         | 
| 666 | 
             
                for k in cv.keys():
         | 
| 667 | 
            +
                    if not re.search("_(kwd|id)$", k) or not isinstance(cv[k], list):
         | 
| 668 | 
            +
                        continue
         | 
| 669 | 
             
                    cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']]))
         | 
| 670 | 
             
                keys = [k for k in cv.keys() if re.search(r"_feas*$", k)]
         | 
| 671 | 
             
                for k in keys:
         | 
| 672 | 
            +
                    if cv[k] <= 0:
         | 
| 673 | 
            +
                        del cv[k]
         | 
| 674 |  | 
| 675 | 
             
                cv["tob_resume_id"] = str(cv["tob_resume_id"])
         | 
| 676 | 
             
                cv["id"] = cv["tob_resume_id"]
         | 
|  | |
| 687 | 
             
                if isinstance(d, list):
         | 
| 688 | 
             
                    d = [dealWithInt64(t) for t in d]
         | 
| 689 |  | 
| 690 | 
            +
                if isinstance(d, np.integer):
         | 
| 691 | 
            +
                    d = int(d)
         | 
| 692 | 
             
                return d
         |