I am new to python and would like to extract abstracts from pubmed using the entrez system from the bio package.
I got the esearch to give me my UIDs (stored in my_list_ges
) and I can also download an entry using efetch.
Now, however, the result is a list of dictionaries and the entries look like a dictionary but I cannot access them:
Entrez.email= "[email protected]"
handle=Entrez.efetch(db="pubmed",id=my_list_ges[0],rettype="null",retmode="xml")
record = Entrez.read(handle)
abstract=record["Abstract"]
handle.close()
The result is a TypeError:
TypeError: list indices must be integers, not str
And I get a KeyError
when trying to retrieve 'Abstract'
from the first record:
>>> record[0]["Abstract"]
KeyError: 'Abstract'
This is weird, because in the result of the esearch I could easily access my UIDs by a dictionary
The structure of record[0] is:
{u'MedlineCitation': DictElement({
u'OtherID': [],
u'OtherAbstract': [],
u'CitationSubset': ['IM'],
u'KeywordList': [],
u'DateCreated': {u'Month': '03', u'Day': '17', u'Year': '2016'},
u'SpaceFlightMission': [],
u'GeneralNote': [],
u'Article':
DictElement({
u'ArticleDate': [
DictElement({u'Month': '03', u'Day': '16', u'Year': '2016'}, attributes={u'DateType': u'Electronic'})],
u'Pagination': {u'MedlinePgn': 'e0151666'},
u'AuthorList': ListElement([
DictElement({
u'LastName': "O'Neill",
u'Initials': 'KE',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}],
u'ForeName': 'Kathy E'
}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Bredenkamp',
u'Initials': 'N', u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}],
u'ForeName': 'Nicholas'}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Tischner',
u'Initials': 'C',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}],
u'ForeName': 'Christin'}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Vaidya',
u'Initials': 'HJ',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}],
u'ForeName': 'Harsh J'}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Stenhouse',
u'Initials': 'FH',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}], u'ForeName': 'Frances H'}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Peddie',
u'Initials': 'CD',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}],
u'ForeName': 'C Diana'}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Nowell',
u'Initials': 'CS',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}],
u'ForeName': 'Craig S'}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Gaskell',
u'Initials': 'T',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}],
u'ForeName': 'Terri'}, attributes={u'ValidYN': u'Y'}),
DictElement({
u'LastName': 'Blackburn',
u'Initials': 'CC',
u'Identifier': [],
u'AffiliationInfo': [{
u'Affiliation': 'MRC Centre for Regenerative Medicine, Institute for Stem Cell Research, School of Biological Sciences, University of Edinburgh, SCRM Building, 5 Little France Drive, Edinburgh, EH16 4UU, UK.',
u'Identifier': []}], u'ForeName': 'C Clare'}, attributes={u'ValidYN': u'Y'})],
attributes={u'Type': u'authors', u'CompleteYN': u'Y'}),
u'Language': ['eng'],
u'PublicationTypeList': [StringElement('Journal Article', attributes={u'UI': u'D016428'})],
u'Journal': {
u'ISSN': StringElement('1932-6203', attributes={u'IssnType': u'Electronic'}),
u'ISOAbbreviation': 'PLoS ONE',
u'JournalIssue': DictElement({
u'Volume': '11',
u'Issue': '3',
u'PubDate': {u'Year': '2016'}}, attributes={u'CitedMedium': u'Internet'}),
u'Title': 'PloS one'},
u'ArticleTitle': 'Foxn1 Is Dynamically Regulated in Thymic Epithelial Cells during Embryogenesis and at the Onset of Thymic Involution.',
u'ELocationID': [StringElement('10.1371/journal.pone.0151666', attributes={u'ValidYN': u'Y', u'EIdType': u'doi'})],
u'Abstract': {u'AbstractText': ['--Unnecessarily long abstract removed --']}}, attributes={u'PubModel': u'Electronic-eCollection'}),
u'PMID': StringElement('26983083', attributes={u'Version': u'1'}),
u'MedlineJournalInfo': {
u'MedlineTA': 'PLoS One',
u'Country': 'United States',
u'NlmUniqueID': '101285081',
u'ISSNLinking': '1932-6203'}}, attributes={u'Owner': u'NLM', u'Status': u'In-Data-Review'}),
u'PubmedData': {
u'ArticleIdList': [
StringElement('10.1371/journal.pone.0151666', attributes={u'IdType': u'doi'}),
StringElement('PONE-D-15-47173', attributes={u'IdType': u'pii'}),
StringElement('26983083', attributes={u'IdType': u'pubmed'})],
u'PublicationStatus': 'epublish',
u'History': [
DictElement({u'Month': '', u'Day': '', u'Year': '2016'}, attributes={u'PubStatus': u'ecollection'}),
DictElement({u'Month': '10', u'Day': '28', u'Year': '2015'}, attributes={u'PubStatus': u'received'}),
DictElement({u'Month': '3', u'Day': '2', u'Year': '2016'}, attributes={u'PubStatus': u'accepted'}),
DictElement({u'Month': '3', u'Day': '16', u'Year': '2016'}, attributes={u'PubStatus': u'epublish'}),
DictElement({u'Minute': '0', u'Month': '3', u'Day': '17', u'Hour': '6', u'Year': '2016'}, attributes={u'PubStatus': u'entrez'}),
DictElement({u'Minute': '0', u'Month': '3', u'Day': '18', u'Hour': '6', u'Year': '2016'}, attributes={u'PubStatus': u'pubmed'}),
DictElement({u'Minute': '0', u'Month': '3', u'Day': '18', u'Hour': '6', u'Year': '2016'}, attributes={u'PubStatus': u'medline'})]}
}
I find it is much easier to return a Medline record and parse that. I insert my full working code for a related query: query = "Tischner[AU] Cortex-specific down-regulation"
. The key point in the code below is that fetch_rec()
function uses rettype='Medline', retmode='text'
and then parses the resulting records using BioPython's Medline module.
from StringIO import StringIO
from Bio import Entrez, Medline
def search_medline(query, email):
Entrez.email = email
search = Entrez.esearch(db='pubmed', term=query, usehistory='y')
handle = Entrez.read(search)
try:
return handle
except Exception as e:
raise IOError(str(e))
finally:
search.close()
def fetch_rec(rec_id, entrez_handle):
fetch_handle = Entrez.efetch(db='pubmed', id=rec_id,
rettype='Medline', retmode='text',
webenv=entrez_handle['WebEnv'],
query_key=entrez_handle['QueryKey'])
rec = fetch_handle.read()
return rec
def main(query, email):
rec_handler = search_medline(query, email)
for rec_id in rec_handler['IdList']:
rec = fetch_rec(rec_id, rec_handler)
rec_file = StringIO(rec)
medline_rec = Medline.read(rec_file)
if 'AB' in medline_rec:
print(medline_rec['AB'])
if __name__ == '__main__':
email = "[email protected]"
query = "Tischner[AU] Cortex-specific down-regulation"
main(query, email)
It will print out the abstract you seek, but with a change in the query
parameter, this script may be adapted to any search. There are more efficient ways of extracting large numbers of records, but for a small search this will do.
I don't know much about what the "right" thing to do in this situation is (not familiar with biopython), but the reason you are getting the KeyError
in that the 'Abstract'
key is nested in the 'MedlineCitation'
dictionary:
record[0]['MedlineCitation']['Article']['Abstract']
Should give you something like:
{'AbstractText': ['--Unnecessarily long abstract removed --']}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With