H
commited on
Commit
·
202a53b
1
Parent(s):
ac8ea20
Fix component PubMed (#2195)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- agent/component/pubmed.py +11 -15
agent/component/pubmed.py
CHANGED
@@ -15,6 +15,7 @@
|
|
15 |
#
|
16 |
from abc import ABC
|
17 |
from Bio import Entrez
|
|
|
18 |
import pandas as pd
|
19 |
import xml.etree.ElementTree as ET
|
20 |
from agent.settings import DEBUG
|
@@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
|
|
47 |
try:
|
48 |
Entrez.email = self._param.email
|
49 |
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
50 |
-
pubmedcnt = ET.fromstring(
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
url_tmp = '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find("MedlineCitation").find(
|
61 |
-
"PMID").text + '">' + '</a>'
|
62 |
-
abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
|
63 |
-
"AbstractText").text
|
64 |
-
pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
|
65 |
except Exception as e:
|
66 |
return PubMed.be_output("**ERROR**: " + str(e))
|
67 |
|
|
|
15 |
#
|
16 |
from abc import ABC
|
17 |
from Bio import Entrez
|
18 |
+
import re
|
19 |
import pandas as pd
|
20 |
import xml.etree.ElementTree as ET
|
21 |
from agent.settings import DEBUG
|
|
|
48 |
try:
|
49 |
Entrez.email = self._param.email
|
50 |
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
|
51 |
+
pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
|
52 |
+
retmode="xml").read().decode(
|
53 |
+
"utf-8")))
|
54 |
+
pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
|
55 |
+
"ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
|
56 |
+
"MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + (
|
57 |
+
child.find("MedlineCitation").find("Article").find("Abstract").find(
|
58 |
+
"AbstractText").text if child.find("MedlineCitation").find(
|
59 |
+
"Article").find("Abstract") else "No abstract available")} for child in
|
60 |
+
pubmedcnt.findall("PubmedArticle")]
|
|
|
|
|
|
|
|
|
|
|
61 |
except Exception as e:
|
62 |
return PubMed.be_output("**ERROR**: " + str(e))
|
63 |
|