# papers_missing_doi.py
# prints model_ids with papers with no doi.
from urllib2 import urlopen
import urllib2
from bs4 import BeautifulSoup, Comment
# get list of all models
all_models = []
all_models_html = urlopen('https://senselab.med.yale.edu/ModelDB/ListByModelName.asp?c=19&lin=-1').read()
all_models_soup = BeautifulSoup(all_models_html, 'html5lib')
print "Retrieving model ids"
for link in all_models_soup.find_all('a'):
href = link.get('href')
if href is not None and 'ShowModel.asp?model=' in href:
model_id = int(href.split('=')[1])
all_models.append(model_id)
all_models.sort()
print all_models
# this dict will hold whether (doi) or not (None) a model_id has a paper with a doi
model_id_doi = {}
print "finding doi's or not:"
# load the modeldb entry
href_list=[]
cntr=0
for model_id in all_models:
model_html = urlopen('http://senselab.med.yale.edu/modeldb/ShowModel.asp?model=%d' % model_id).read()
model_soup = BeautifulSoup(model_html, 'html5lib')
paper_doi = None
for link in model_soup.find_all('a'):
href = link.get('href')
href_list.append(href)
if href is not None and 'http://dx.doi.org/' == href[ : 18]:
paper_doi = href[18 :]
print "*** found a doi: "+paper_doi
cntr = cntr + 1
model_id_doi[model_id]=paper_doi
print "model_id: doi"
for model_id in all_models:
print repr(model_id)+": "+repr(model_id_doi[model_id])