from urllib2 import urlopen
import urllib2
from bs4 import BeautifulSoup
import cPickle
from matplotlib import pyplot

# get list of neuron models
neuron_models = []
neuron_models_html = urlopen('http://senselab.med.yale.edu/modeldb/ListByModelName.asp?c=19&lin=-1').read()
neuron_models_soup = BeautifulSoup(neuron_models_html, 'html5lib')
for link in neuron_models_soup.find_all('a'):
    href = link.get('href')
    if href is not None and 'ShowModel.asp?model=' in href:
        model_id = int(href.split('=')[1])
        neuron_models.append(model_id)

neuron_models.sort()

zip_lengths = {}
no_zip = []

def analyze_zip(model_id):
    try:
        zipfile = urlopen('http://senselab.med.yale.edu/modeldb/eavBinDown.asp?o=%d&a=23&mime=application/zip' % model_id).read()
        zip_lengths[model_id] = len(zipfile)
    except urllib2.HTTPError:
        no_zip.append(model_id)
        return False
    return True

for model in neuron_models:
    analyze_zip(model)

with open('zip_lengths.txt', 'w') as f:
    f.write(cPickle.dumps({'zip_lengths': zip_lengths, 'no_zip': no_zip}))

pyplot.hist(zip_lengths.values(), 50)
pyplot.xlabel('Zip length (bytes)')
pyplot.ylabel('Count')
pyplot.title('%d dropped because no zip' % len(no_zip))
pyplot.show()