H commited on
Commit
202a53b
·
1 Parent(s): ac8ea20

Fix component PubMed (#2195)

Browse files

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (1) hide show
  1. agent/component/pubmed.py +11 -15
agent/component/pubmed.py CHANGED
@@ -15,6 +15,7 @@
15
  #
16
  from abc import ABC
17
  from Bio import Entrez
 
18
  import pandas as pd
19
  import xml.etree.ElementTree as ET
20
  from agent.settings import DEBUG
@@ -47,21 +48,16 @@ class PubMed(ComponentBase, ABC):
47
  try:
48
  Entrez.email = self._param.email
49
  pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
50
- pubmedcnt = ET.fromstring(
51
- Entrez.efetch(db='pubmed', id=",".join(pubmedids), retmode="xml").read().decode("utf-8"))
52
- pubmed_res = []
53
- for child in pubmedcnt.findall("PubmedArticle"):
54
- if child.find("MedlineCitation").find("Article").find("ArticleTitle").text:
55
- title_tmp = 'Title:' + child.find("MedlineCitation").find("Article").find("ArticleTitle").text
56
- else:
57
- title_tmp = 'Title:' + "".join(
58
- [childtitle.text for childtitle in
59
- child.find("MedlineCitation").find("Article").find("ArticleTitle")])
60
- url_tmp = '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find("MedlineCitation").find(
61
- "PMID").text + '">' + '</a>'
62
- abstract_tmp = '\nAbstract:' + child.find("MedlineCitation").find("Article").find("Abstract").find(
63
- "AbstractText").text
64
- pubmed_res.append({"content": title_tmp + url_tmp + abstract_tmp})
65
  except Exception as e:
66
  return PubMed.be_output("**ERROR**: " + str(e))
67
 
 
15
  #
16
  from abc import ABC
17
  from Bio import Entrez
18
+ import re
19
  import pandas as pd
20
  import xml.etree.ElementTree as ET
21
  from agent.settings import DEBUG
 
48
  try:
49
  Entrez.email = self._param.email
50
  pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=ans))['IdList']
51
+ pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
52
+ retmode="xml").read().decode(
53
+ "utf-8")))
54
+ pubmed_res = [{"content": 'Title:' + child.find("MedlineCitation").find("Article").find(
55
+ "ArticleTitle").text + '\nUrl:<a href=" https://pubmed.ncbi.nlm.nih.gov/' + child.find(
56
+ "MedlineCitation").find("PMID").text + '">' + '</a>\n' + 'Abstract:' + (
57
+ child.find("MedlineCitation").find("Article").find("Abstract").find(
58
+ "AbstractText").text if child.find("MedlineCitation").find(
59
+ "Article").find("Abstract") else "No abstract available")} for child in
60
+ pubmedcnt.findall("PubmedArticle")]
 
 
 
 
 
61
  except Exception as e:
62
  return PubMed.be_output("**ERROR**: " + str(e))
63