Kevin Hu
		
	commited on
		
		
					Commit 
							
							·
						
						74b229a
	
1
								Parent(s):
							
							35dcaff
								
Catch the exception while parsing pptx. (#4202)
Browse files### What problem does this PR solve?
#4189
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
    	
        deepdoc/parser/ppt_parser.py
    CHANGED
    
    | @@ -10,7 +10,7 @@ | |
| 10 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
| 13 | 
            -
             | 
| 14 | 
             
            from io import BytesIO
         | 
| 15 | 
             
            from pptx import Presentation
         | 
| 16 |  | 
| @@ -53,9 +53,12 @@ class RAGFlowPptParser(object): | |
| 53 | 
             
                        texts = []
         | 
| 54 | 
             
                        for shape in sorted(
         | 
| 55 | 
             
                                slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
         | 
| 56 | 
            -
                             | 
| 57 | 
            -
             | 
| 58 | 
            -
                                 | 
|  | |
|  | |
|  | |
| 59 | 
             
                        txts.append("\n".join(texts))
         | 
| 60 |  | 
| 61 | 
             
                    return txts
         | 
|  | |
| 10 | 
             
            #  See the License for the specific language governing permissions and
         | 
| 11 | 
             
            #  limitations under the License.
         | 
| 12 | 
             
            #
         | 
| 13 | 
            +
            import logging
         | 
| 14 | 
             
            from io import BytesIO
         | 
| 15 | 
             
            from pptx import Presentation
         | 
| 16 |  | 
|  | |
| 53 | 
             
                        texts = []
         | 
| 54 | 
             
                        for shape in sorted(
         | 
| 55 | 
             
                                slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
         | 
| 56 | 
            +
                            try:
         | 
| 57 | 
            +
                                txt = self.__extract(shape)
         | 
| 58 | 
            +
                                if txt:
         | 
| 59 | 
            +
                                    texts.append(txt)
         | 
| 60 | 
            +
                            except Exception as e:
         | 
| 61 | 
            +
                                logging.exception(e)
         | 
| 62 | 
             
                        txts.append("\n".join(texts))
         | 
| 63 |  | 
| 64 | 
             
                    return txts
         |