Kevin Hu
commited on
Commit
·
74b229a
1
Parent(s):
35dcaff
Catch the exception while parsing pptx. (#4202)
Browse files### What problem does this PR solve?
#4189
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
deepdoc/parser/ppt_parser.py
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
# See the License for the specific language governing permissions and
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
-
|
| 14 |
from io import BytesIO
|
| 15 |
from pptx import Presentation
|
| 16 |
|
|
@@ -53,9 +53,12 @@ class RAGFlowPptParser(object):
|
|
| 53 |
texts = []
|
| 54 |
for shape in sorted(
|
| 55 |
slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
| 59 |
txts.append("\n".join(texts))
|
| 60 |
|
| 61 |
return txts
|
|
|
|
| 10 |
# See the License for the specific language governing permissions and
|
| 11 |
# limitations under the License.
|
| 12 |
#
|
| 13 |
+
import logging
|
| 14 |
from io import BytesIO
|
| 15 |
from pptx import Presentation
|
| 16 |
|
|
|
|
| 53 |
texts = []
|
| 54 |
for shape in sorted(
|
| 55 |
slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
|
| 56 |
+
try:
|
| 57 |
+
txt = self.__extract(shape)
|
| 58 |
+
if txt:
|
| 59 |
+
texts.append(txt)
|
| 60 |
+
except Exception as e:
|
| 61 |
+
logging.exception(e)
|
| 62 |
txts.append("\n".join(texts))
|
| 63 |
|
| 64 |
return txts
|