####################################################################### # UTILITIES.JL # Functions for analysis of data, I/O, plotting, etc. # ####################################################################### using DataFrames, CSV, Plots, PyCall, LsqFit, LaTeXStrings, Statistics, DataStructures, FourierAnalysis np = pyimport("numpy") ss = pyimport("scipy.stats") ####################################################################### # I/O Functions ####################################################################### """ create_directories_if_not_exist() Creates necessary directories if they don't yet exist. """ function create_directories_if_not_exist() if !("figures" ∈ readdir()) mkdir("figures") end for d ∈ ["raster-plots", "pattern-separation", "voltage-tracings", "fi-curves", "lfp"] if !(d ∈ readdir("figures")) mkdir("figures/"*d) end end end """ load_spike_files(patterns, model_label, neuron_ids; neurons_per_pattern, data_dir) Loads spike time files and concatenates them. File names from simulation are structured as follows: - "DGsp-{p}-{neurons_per_pattern}-{model_label}.txt" - "StimIn-{p}-{neurons_per_pattern}-{model_label}.txt" The `neuron_ids` object is a `Dict` which includes lower and upper bound ranges on neuron IDs: ``` neuron_ids = Dict( "DG" => [0, 500], "BC" => [500,506], "MC" => [506, 521], "HIPP" => [521, 527] ) ``` """ function load_spike_files( patterns::Union{UnitRange{Int64}, Vector{Int64}}, model_label::String, neuron_ids::Dict; neurons_per_pattern::Int64=6, # data_dir::String="data/dgnetwork/") df = DataFrame() for p ∈ patterns spike_fname = data_dir*"DGsp-"*string(p)*"-"*string(neurons_per_pattern)*"-"*model_label*".txt" stims_fname = data_dir*"StimIn-"*string(p)*"-"*string(neurons_per_pattern)*"-"*model_label*".txt" spikes = CSV.read(spike_fname, delim="\t", header=0, DataFrame) stimin = CSV.read(stims_fname, delim="\t", header=0, DataFrame) if size(spikes, 1) > 0 rename!(spikes, ["Time", "Neuron"]) spikes[:,"Population"] .= "" spikes[:,"Pattern"] .= p spikes[:,"NeuronsPerPattern"] .= neurons_per_pattern spikes[:,"Model"] .= model_label for k ∈ keys(neuron_ids) lb, ub = neuron_ids[k] spikes[lb .<= spikes[:, "Neuron"] .< ub, "Population"] .= k end df = [df; spikes] end if size(stimin, 1) > 0 rename!(stimin, ["Time", "Neuron"]) stimin[:,"Population"] .= "PP" stimin[:,"Pattern"] .= p stimin[:,"NeuronsPerPattern"] .= neurons_per_pattern stimin[:,"Model"] .= model_label df = [df; stimin] end end return df end ####################################################################### # FUNCTIONS FOR STATISTICAL ANALYSIS ####################################################################### """ spike_train_correlation(spike_train1, spike_train2, n_neurons, delta, n_bins, duration) Computes the similarity score for two spike trains using the Pearson correlation coefficient. This function was copied from that in Yim et al. (2015), but modified in a few ways. We added documentation, switched to using DataFrames (for clarity/brevity of code), and computed the correlation coefficient using the `scipy.pearsonr` function. """ function spike_train_correlation( spike_train1::DataFrame, spike_train2::DataFrame, n_neurons::Int64; delta::Int64=20, n_bins::Int64=1, duration::Float64=2000.0) # Create kernel tri_rng = collect(1:round(Int64, delta/n_bins)) triangular_kernel = [tri_rng; reverse(tri_rng)[2:end]]' # Create bins for histograms bins = collect(1:n_bins:(duration + n_bins)) x = zeros(round(Int64, n_neurons * duration / n_bins)) y = zeros(round(Int64, n_neurons * duration / n_bins)) for i ∈ 1:n_neurons s1 = spike_train1[spike_train1.Neuron .== (i-1), "Time"] s2 = spike_train2[spike_train2.Neuron .== (i-1), "Time"] timeseries1, _ = np.histogram(s1, bins) timeseries2, _ = np.histogram(s2, bins) idx_low = round(Int64, (i-1)*duration/n_bins + 1) idx_high = round(Int64, i*duration/n_bins) x[idx_low:idx_high] = np.convolve(timeseries1, triangular_kernel, "same") y[idx_low:idx_high] = np.convolve(timeseries2, triangular_kernel, "same") end return ss.pearsonr(x, y) end """ pattern_separation_curve(spike_times, n_pp_cells, n_granule_cells; delta, n_bins, duration) Computes pairwise correlations between input and output patterns """ function pattern_separation_curve( spike_times::DataFrame, n_pp_cells::Int64, n_granule_cells::Int64; delta::Int64=20, n_bins::Int64=1, duration::Float64=2000.0) out = DataFrame() n_patterns = unique(spike_times.Pattern) |> length for i ∈ 0:(n_patterns - 2) for j ∈ i:(n_patterns-1) pp_a = spike_times[(spike_times.Population .== "PP") .& (spike_times.Pattern .== i), ["Neuron", "Time"]] pp_b = spike_times[(spike_times.Population .== "PP") .& (spike_times.Pattern .== j), ["Neuron", "Time"]] r_in, p_in = spike_train_correlation(pp_a, pp_b, n_pp_cells; delta=delta, n_bins=n_bins, duration=duration) gc_a = spike_times[(spike_times.Population .== "DG") .& (spike_times.Pattern .== i), ["Neuron", "Time"]] gc_b = spike_times[(spike_times.Population .== "DG") .& (spike_times.Pattern .== j), ["Neuron", "Time"]] r_out, p_out = spike_train_correlation(gc_a, gc_b, n_granule_cells; delta=delta, n_bins=n_bins, duration=duration) out_ij = DataFrame( "Pattern A"=>[i], "Pattern B"=>[j], "Input Correlation"=>[r_in], "Output Correlation"=>[r_out], "Input Correlation p-Value"=>[p_in], "Output Correlation p-Value"=>[p_out], ) out = [out; out_ij] end end return out end """ fit_power_law(x, y) Fits `a + (1-a)*(x^b)` to the pattern separation data. """ function fit_power_law(x::Vector, y::Vector) # Clip for tractability x = max.(x, 0.0001) y = max.(y, 0.0001) @. model(x, w) = w[1] + (1-w[1])*(x^w[2]) res = curve_fit(model, x, y, [0., 1.]) a, b = res.param f(x) = a + (1-a)*(x^b) return f end function compute_auc(x::Vector, y::Vector) # Clip for tractability x = max.(x, 0.0001) y = max.(y, 0.0001) @. model(x, w) = w[1] + (1-w[1])*(x^w[2]) res = curve_fit(model, x, y, [0., 1.]) a, b = res.param auc = - ((2*a*b) - b + 1)/(2*(1+b)) return auc end ####################################################################### # FUNCTIONS FOR PLOTTING ####################################################################### """ raster_plot(spikes; xlims, xlab, ylab) Returns a raster plot """ function raster_plot( spikes::DataFrame; xlims::Vector=[0,2000], xlab::String="Time (ms)", ylab::String="Neuron ID" ) p = plot(xlab=xlab, ylab=ylab, xlims=xlims, yticks=false) p = scatter!(spikes[:,1], spikes[:,2], markershape=:vline, label=nothing, c=:black, msc=:black, msa=1, ma=1, markerstrokewidth=1) return p end