%%% post-hoc analysis of best clustered binsize, run after
%%% cluster_words_winsize
clear all
load('first_stage_analysis')


%---------------- firing stats of whole network --------------------------
firing_stats

%------------------ effect of changing analysis parameters ---------------
% can check effect of changing thetaH on the best one!!
% MAKE SURE YOU REGENERATE hc BEFORE DOING THIS....
HE = pdist(scount{btest}','hamming');
hc = squareform(HE);    % turn it into matrix - with zeroes on diagonal

thetaHvec = 0.05:0.05:0.3;
rtnd = cell(numel(thetaHvec),1);
Sc_thetaH = cell(numel(thetaHvec),1);
CmplxH = zeros(numel(thetaHvec),1);

for i = 1:numel(thetaHvec)
    i
    Ac = (hc <= thetaHvec(i));    % smallest error
    dgrs = sum(Ac);
    delN = find(dgrs < 2);
    rtnd{i} = setdiff(idxs,delN);    %% index back into Rn to recover original index of MSN...

    % get reduced version (all nodes degree at least 2 before reduction...)
    Acs = Ac(rtnd{i},rtnd{i});
    edgs(i) = sum(sum(Acs)); nds(i) = numel(rtnd{i});
    if nds(i) > nlimit & edgs(i) > log(nds(i))   % giant component phase transition 
        % only group if there are enough nodes and  edges to make it worthwhile...
        
        [Sc_thetaH{i},Uc] = multileadevsplit(Acs) ;% ,'refine'); % do refine step later when got time...
        thetaHngrps(i) = max(Sc_thetaH{i});
        siz = [];
        for j = 1:thetaHngrps(i)
            siz = [siz numel(find(Sc_thetaH{i} == j))];   
        end
        thetaHgrpsizes{i} = siz;
    else
        Sc_thetaH{i} = [];  % nothing to group!
        thetaHngrps(i) = 0;
        thetaHgrpsizes{i} = [];
    end
    %%% Hamming distance within groups vs whole retained network for
    %%% modularity method
    Hamming_all_H(i) = median(pdist(scount{btest}(:,rtnd{i})','hamming'));
    for j=1:thetaHngrps(i)
        cts = [scount{btest}(:,rtnd{i}(Sc_thetaH{i}==j))];
        Hamming_H{i}(j) = median(pdist(cts','hamming'));
    end

    % note that complexity measure now has same Hdiff all the time, so
    % changes only with changes in numberof groups and proportion of neuron
    if thetaHngrps(i)
        CmplxH(i) = sum(thetaHgrpsizes{i})/Nms * thetaHngrps(i)  * hdiff(btest);
    end
end

% and threshold group size of interest
grpthresh = round(Nms * 0.01);
thetaHngrpsT = zeros(numel(thetaHngrps),1);
for i = 1:numel(thetaHngrps)
    thetaHngrpsT(i) = sum(thetaHgrpsizes{i} >= grpthresh);
end

%%%% plot out some results...
figure(99), clf
subplot(211), plot(thetaHvec,thetaHngrps,'+-'), ylabel('# grps'), xlabel('Threshold')
subplot(212), plot(thetaHvec,CmplxH,'+-'), ylabel('\beta'),  xlabel('Threshold')
%plot(thetaHvec,thetaHngrpsT,'k+-')

% find most groups
% bH = find(thetaHngrps == max(thetaHngrps)); 
% bH = find(thetaHngrpsT == max(thetaHngrpsT));
bH = find(CmplxH == max(CmplxH));
if numel(bH) > 1 bH = bH(1); end
%bH = 2;

ScH = Sc_thetaH{bH};
RH = rtnd{bH};
Ct = scount{btest};

number_of_groups = thetaHngrps(bH)

grp_membersH = {};
for loop = 1:thetaHngrps(bH)
   grp_membersH{loop} = RH(ScH==loop); 
end

bins = 0:binsize(btest):simT;
sctsH = zeros(numel(bins),thetaHngrps(bH));
figure(103), clf
for loop = 1:thetaHngrps(bH)
    cts = [Ct(:,RH(ScH==loop))]';
    sctsH(:,loop) = [sum(cts)./thetaHgrpsizes{bH}(loop)]';
    subplot(max(ScH),1,loop), bar(bins,sctsH(:,loop),'histc')
    % subplot(max(ScH),1,loop), bar(smcts(:,loop))
    title('Summed spike count for graph-method')
end

figure(104), clf, hold on, %title('Grouped raster by graph-method (group 1 at the top)')
% full colour version
clrs = colormap; [rw cl] = size(clrs); clrstep = floor(rw / thetaHngrps(bH));
% alternating black and grey version
%clrs = repmat([0 0 0; 0.5 0.5 0.5],thetaHngrps(bH),1); clrstep = 1;

ctr = numel(RH);
for loop = 1:thetaHngrps(bH)
    inds = RH(ScH==loop);
    %inds = RH(ScH == grp_seq_hand(loop));
    stgrp = [];
    ixgrp = [];
    for i = 1:numel(inds)
        indst = find(MSspks(:,1) == inds(i));
        st = MSspks(indst,:);
        % plot(st(:,2), ones(length(st),1)+ctr,'.','Color',clrs(clrstep*loop,:));
        % plot(st(:,2), ones(length(st),1)+ctr,'.');
        
        stgrp = [stgrp; st(:,2)];
        ixgrp = [ixgrp; ones(numel(st(:,1)),1)+ctr];
        ctr = ctr-1;
    end
    plot(stgrp, ixgrp,'.','Color',clrs(clrstep*loop,:));
    %fname1 = ['ph_stgrp' num2str(loop) '.txt'];  fname2 = ['ph_ixgrp' num2str(loop) '.txt']; 
    %save(fname1,'stgrp','-ascii'); save(fname2,'ixgrp','-ascii');
end
% axis off

%%% plot unsorted raster to drive home how good the clustering is...
figure(105), clf, hold on, %title('Ungrouped raster of all clustered cells')
for loop = 1:numel(rtnd{bH})
    ixs = find(MSspks(:,1) == rtnd{bH}(loop));
    plot(MSspks(ixs,2),ones(numel(ixs),1)*loop,'k.');
end
% axis off

%%%% now go through and get index into best stuff from this analysis

%------------------ firing properties of each cluster --------------------
%%% get firing rates and spectra of spike trains in clusters....
grprates = cell(thetaHngrps(bH),1);
for loop = 1:thetaHngrps(bH)
   inds = grp_membersH{loop};
   for i = 1:thetaHgrpsizes{bH}(loop)
      currix = find(MSspks(:,1) == MSidxs(i));
      ts = MSspks(currix,2)*1e-3; % in seconds
      % difficult to compute spectrum cos of so few spikes in each cell...
   end
   grprates{loop} = MSrate(inds);
   grpmedian(loop) = median(MSrate(inds));
end



% find clusters that are antagonisitc
cxy = corrcoef(sctsH); % compute correlation coefficient between total spike counts...

% firing sequences?? Here looking at sequences of *silence* 
% 1. by min active vector in bin
TsctsH = sctsH(1:end-1,:);
min_in_bin = min(TsctsH');    % find min proportion active in each bin
grp_seq = zeros(numel(min_in_bin),1);
for i = 1:numel(min_in_bin)
    [x,grp_seq(i)] = ind2sub(size(TsctsH),find(TsctsH == min_in_bin(i),1));
end

% 2. by all vectors below some threshold in bin
grp_seq2 = cell(numel(min_in_bin),1);
for i = 1:numel(min_in_bin)
    grp_seq2{i} = find(TsctsH(i,:) < 0.2); 
end

%---------------- reconstruct correlation graph of best theta
Ac = (hc <= thetaHvec(bH));    % smallest error

% get reduced version (all nodes degree at least 2 before reduction...)
Acs = Ac(rtnd{bH},rtnd{bH});
Kdist = sum(Acs);

%----------------------------- properties of correlation graph  ---------------------------

% a few graph properties
Kmn = mean(Kdist); nds = numel(Kdist); dnsty = sum(Kdist) / (nds*(nds-1));
LR = log(nds) / log(Kmn);
CR = Kmn / nds;

%%----------------------------- do graph analysis of network --------------
%%%% get path-lengths of just MS-MS network using Olaf's code (reachdist.m)
Cmsms = SIMPARAMS.net.Cmsms +1; % was zero-indexed
Cmsms_b = SIMPARAMS.net.Cmsms_b+1; % was zero-indexed
Amsms = zeros(Nms);

% construct adjacency matrix for MS-MS connections
for i = 1:Nms
    % loop round and stick each MSN's connections into matrix....
    cur = Cmsms([Cmsms_b(i):Cmsms_b(i+1)-1]);
    Amsms(i,cur) = 1;
end

% properties
indgr = sum(Amsms);
outdrg = sum(Amsms');
Mmsms = sum(sum(indgr)); mK = Mmsms/Nms;  % approx mean degree...
msms_dnsty = Mmsms / (Nms*(Nms-1));

% get modularity!!!
[Smsms,U] = multileadevsplit(Amsms);

% get path lengths
[R,D] = reachdist(Amsms);
L = sum(sum(D))/Nms^2;    LR = log(Nms) / log(mK);
Ctri = clusttriang(Amsms);  CR = mK / Nms;
[msmsCws,Cws] = clustind(Amsms);
St = (Ctri/CR) / (L/LR);
Sws = (Cws/CR) / (L/LR); 

%--- go through groups and get within group MSN-MSN path length, clustering coeff..
grpL = zeros(thetaHngrps(bH),1);
grpCt = zeros(thetaHngrps(bH),1);
grpMC_WS = zeros(thetaHngrps(bH),1);
grpC_WS = cell(thetaHngrps(bH),1);
grpSCtri = zeros(thetaHngrps(bH),1);
grpSCws = zeros(thetaHngrps(bH),1);

figure(101), clf, hold on
for i = 1:thetaHngrps(bH)
    cur = grp_membersH{i};
    nmem = thetaHgrpsizes{bH}(i);
    curdist = D(cur,cur);
    % grpL(i) = median(curdist(:));
    % path lengths and clustering coefficients
    grpL(i) = sum(sum(D(cur,cur)))/nmem^2;
    grpCt(i) = clusttriang(Amsms(cur,cur));
    [grpC_WS{i},grpMC_WS(i)] = clustind(Amsms(cur,cur));
    % small-world-ness
    grp_m = sum(sum(Amsms(cur,cur)));   % number of edges
    mdeg =grp_m/ nmem;    % i.e. mean number of edges per node 
    % approximate random graphs values
    grpCRa = mdeg / nmem; grpCRb = mdeg / nmem;    % <k> / n
    grpLR = log(nmem) / log(mdeg); % log(n) / log <k>
    % Monte Carlo random graph values... [these groups are very small!!
%     grpLR = 0; grpCRa = 0; grpCRb = 0; nMC = 20;
%     for j = 1:nMC
%         % create random_graph of same number of edges
%         Arand = random_graph(nmem,0,grp_m,'directed');
%         % compute path length
%         [temp,tempD] = reachdist(Arand);
%         grpLR = grpLR + (sum(sum(tempD)) / nmem^2)/nMC;
%         % compute mean clustering coeffs
%         grpCRa = grpCRa + clusttriang(Arand)/nMC;
%         [temp,tempC] = clustind(Arand);
%         grpCRb = grpCRb + tempC/nMC;
%     end
    
    grpSCtri(i) = (grpCt(i)/grpCRa) / (grpL(i)/grpLR);
    grpSCws(i) = (grpMC_WS(i)/grpCRb) / (grpL(i)/grpLR);
    % plot ECDF of clustering coefficient distribution...
    subplot(211), hold on, if ~isempty(grpC_WS{i}) [h,stats] = cdfplot(grpC_WS{i}); end
    set(h,'Color',clrs(clrstep*i,:)); xlabel('x = clustering coefficient')
    % plot histogram of path length distribution in each group
    subplot(212), hold on, plot(hist(curdist(:),max(curdist(:)))./nmem.^2,'+-','Color',clrs(clrstep*i,:));
    xlabel('Path length within group'); ylabel('probability of path length')
end
Hgrps = Hamming{bH}';


%--- go through all anatagonistic group pairs and get path length, clustering
%etc...
c_up = triu(cxy);   % just upper triangular part as correlation matrix is symmetric
ant_pairs = find(c_up <= -0.1);
[r,c] = ind2sub(size(c_up),ant_pairs);
antL_AB = zeros(numel(ant_pairs),1);
antL_BA = zeros(numel(ant_pairs),1);

% colormap = jet;
% clrs = colormap; [rw cl] = size(clrs); antclrstep = floor(rw / numel(ant_pairs));
figure(102), clf, hold on
for i = 1:numel(ant_pairs)
    curA = grp_membersH{r(i)};
    curB = grp_membersH{c(i)};
    nmem = numel(curA)*numel(curB);
    curdistAB = D(curA,curB);
    curdistBA = D(curB,curA);
    % antL(i) = median(curdist(:));
    %antL(i) = sum(sum(D(curR,curC)))/nmem;
    antL_AB(i) = mean(curdistAB(:));
    antL_BA(i) = mean(curdistBA(:));
    plot(hist(curdist(:),max(curdist(:)))./nmem,'+-','Color',clrs(antclrstep*i,:)); %% something broken here??
    xlabel('Path length between groups'); ylabel('probability of path length')
end


%----- construct adjacency matrix for FS-MS connections
Nfs = SIMPARAMS.net.FS.N;
Cfsms = SIMPARAMS.net.Cfsms +1; % was zero-indexed
Cfsms_b = SIMPARAMS.net.Cfsms_b+1; % was zero-indexed

Afsms = zeros(Nfs,Nms);
for i = 1:Nfs
    % loop round and stick each FSN's connections into matrix....
    cur = Cfsms([Cfsms_b(i):Cfsms_b(i+1)-1]);
    Afsms(i,cur) = 1;
end

% check that all MSNs get at least one FS input..
fsmsin = sum(Afsms);

% get FS indexes for each group...
grpFS = zeros(Nfs,thetaHngrps(bH));
grpFSw = zeros(Nfs,thetaHngrps(bH));
for i = 1:thetaHngrps(bH)
    cur = grp_membersH{i};
    grpFS(:,i) = [sum(Afsms(:,cur)')]'./thetaHgrpsizes{bH}(i);
    % weight by firing rate of FS interneuron
    grpFSw(:,i) = (FSrate .* [sum(Afsms(:,cur)')]')./thetaHgrpsizes{bH}(i);
end

% compare this to cxy - correlation coefficient between spike counts?

% construct adjacency matrix for FS-FS connections
Cfsfs = SIMPARAMS.net.Cfsfs +1; % was zero-indexed
Cfsfs_b = SIMPARAMS.net.Cfsfs_b+1; % was zero-indexed
Afsfs = zeros(Nfs);
for i = 1:Nfs
    % loop round and stick each MSN's connections into matrix....
    cur = Cfsfs([Cfsfs_b(i):Cfsfs_b(i+1)-1]);
    Afsfs(i,cur) = 1;
end

%%% Cgapfs indexes into Pgap matrix of connected pairs... 
Pgapfs = SIMPARAMS.net.Pgapfs +1; % was zero-indexed
Agapfs = zeros(Nfs);
for i = 1:length(Pgapfs)
    % loop round and stick each MSN's connections into matrix....
    Agapfs(Pgapfs(i,1),Pgapfs(i,2)) = 1;
    Agapfs(Pgapfs(i,2),Pgapfs(i,1)) = 1;
end

%%% whole network??
Astr = zeros(Nfs+Nms);
Astr(Nfs+1:end,Nfs+1:end) = Amsms;  % add MS MS connections
Afstotal = Afsfs + Agapfs;
Astr(1:Nfs,1:Nfs) = Afstotal;   % Add FS-FS connections; for now, count double connections as two edges
Astr(1:Nfs,Nfs+1:end) = Afsms;  % Add FS-MS connections

% get modularity!!!
[Sstr,U] = multileadevsplit(Astr);

% plot groups in 3D structure... and compute distance from centre
Dims = SIMPARAMS.net.PhysicalDimensions;
centre = Dims/2;
mediandist = zeros(thetaHngrps,1);
cdist = cell(thetaHngrps,1);
figure(106), clf,    
for loop = 1:thetaHngrps(bH)
    %inds = R(Sc==Igrp(loop));
    inds = RH(ScH==loop);
    xyz = [SIMPARAMS.net.MS.Position(inds,1),SIMPARAMS.net.MS.Position(inds,2),SIMPARAMS.net.MS.Position(inds,3)];
    cdiff = xyz - repmat(centre,thetaHgrpsizes{bH}(loop),1);
    cdist{loop} = sqrt(cdiff(:,1).^2 + cdiff(:,2).^2 + cdiff(:,3).^2);
    mediandist(loop) = median(cdist{loop});
    subplot(211),plot3(SIMPARAMS.net.MS.Position(inds,1),SIMPARAMS.net.MS.Position(inds,2),SIMPARAMS.net.MS.Position(inds,3),'.','Color',clrs(clrstep*loop,:))
    grid on, hold on
    subplot(212), hold on, [h,stats] = cdfplot(cdist{loop}); xlabel('x = distance from centre')
    set(h,'Color',clrs(clrstep*loop,:))
end  


save posthoc_analysis