extraction sqlquery (#4027)
Browse filesclone https://github.com/infiniflow/ragflow/pull/4023 
improve the information extraction, most llm return results in markdown
format ````sql ___ query `____ ```
- agent/component/exesql.py +17 -3
    	
        agent/component/exesql.py
    CHANGED
    
    | @@ -20,7 +20,7 @@ import pymysql | |
| 20 | 
             
            import psycopg2
         | 
| 21 | 
             
            from agent.component.base import ComponentBase, ComponentParamBase
         | 
| 22 | 
             
            import pyodbc
         | 
| 23 | 
            -
             | 
| 24 |  | 
| 25 | 
             
            class ExeSQLParam(ComponentParamBase):
         | 
| 26 | 
             
                """
         | 
| @@ -65,13 +65,26 @@ class ExeSQL(ComponentBase, ABC): | |
| 65 | 
             
                    self._loop += 1
         | 
| 66 |  | 
| 67 | 
             
                    ans = self.get_input()
         | 
|  | |
|  | |
| 68 | 
             
                    ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
         | 
| 69 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 70 | 
             
                    ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
         | 
| 71 | 
             
                    ans = re.sub(r';[^;]*$', r';', ans)
         | 
| 72 | 
             
                    if not ans:
         | 
| 73 | 
             
                        raise Exception("SQL statement not found!")
         | 
| 74 |  | 
|  | |
| 75 | 
             
                    if self._param.db_type in ["mysql", "mariadb"]:
         | 
| 76 | 
             
                        db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
         | 
| 77 | 
             
                                             port=self._param.port, password=self._param.password)
         | 
| @@ -96,11 +109,12 @@ class ExeSQL(ComponentBase, ABC): | |
| 96 | 
             
                        if not single_sql:
         | 
| 97 | 
             
                            continue
         | 
| 98 | 
             
                        try:
         | 
|  | |
| 99 | 
             
                            cursor.execute(single_sql)
         | 
| 100 | 
             
                            if cursor.rowcount == 0:
         | 
| 101 | 
             
                                sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
         | 
| 102 | 
             
                                continue
         | 
| 103 | 
            -
                            single_res = pd.DataFrame([i for i in cursor.fetchmany( | 
| 104 | 
             
                            single_res.columns = [i[0] for i in cursor.description]
         | 
| 105 | 
             
                            sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
         | 
| 106 | 
             
                        except Exception as e:
         | 
|  | |
| 20 | 
             
            import psycopg2
         | 
| 21 | 
             
            from agent.component.base import ComponentBase, ComponentParamBase
         | 
| 22 | 
             
            import pyodbc
         | 
| 23 | 
            +
            import logging
         | 
| 24 |  | 
| 25 | 
             
            class ExeSQLParam(ComponentParamBase):
         | 
| 26 | 
             
                """
         | 
|  | |
| 65 | 
             
                    self._loop += 1
         | 
| 66 |  | 
| 67 | 
             
                    ans = self.get_input()
         | 
| 68 | 
            +
                  
         | 
| 69 | 
            +
             | 
| 70 | 
             
                    ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
         | 
| 71 | 
            +
                    if self._param.db_type == 'mssql':
         | 
| 72 | 
            +
                        # improve the information extraction, most llm return results in markdown format ```sql query ```
         | 
| 73 | 
            +
                        match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
         | 
| 74 | 
            +
                        if match:
         | 
| 75 | 
            +
                            ans = match.group(1)  # Query content
         | 
| 76 | 
            +
                            print(ans)
         | 
| 77 | 
            +
                        else:
         | 
| 78 | 
            +
                            print("no markdown")
         | 
| 79 | 
            +
                        ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
         | 
| 80 | 
            +
                    else:
         | 
| 81 | 
            +
                        ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE)
         | 
| 82 | 
             
                    ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
         | 
| 83 | 
             
                    ans = re.sub(r';[^;]*$', r';', ans)
         | 
| 84 | 
             
                    if not ans:
         | 
| 85 | 
             
                        raise Exception("SQL statement not found!")
         | 
| 86 |  | 
| 87 | 
            +
                    logging.info("db_type: ",self._param.db_type)
         | 
| 88 | 
             
                    if self._param.db_type in ["mysql", "mariadb"]:
         | 
| 89 | 
             
                        db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
         | 
| 90 | 
             
                                             port=self._param.port, password=self._param.password)
         | 
|  | |
| 109 | 
             
                        if not single_sql:
         | 
| 110 | 
             
                            continue
         | 
| 111 | 
             
                        try:
         | 
| 112 | 
            +
                            logging.info("single_sql: ",single_sql)
         | 
| 113 | 
             
                            cursor.execute(single_sql)
         | 
| 114 | 
             
                            if cursor.rowcount == 0:
         | 
| 115 | 
             
                                sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
         | 
| 116 | 
             
                                continue
         | 
| 117 | 
            +
                            single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)])
         | 
| 118 | 
             
                            single_res.columns = [i[0] for i in cursor.description]
         | 
| 119 | 
             
                            sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
         | 
| 120 | 
             
                        except Exception as e:
         |