%% Symbolic calculation and numeric simulation for gaussian distriutions with pruning
clear all
close all
%% Parameters
N = 10000000; %number of pair connections
mean_value = 0.5; %mean value of the gaussian distribution of connections
standard_deviation = 1/10; %sqrt of variance. With 1/10 we are considering up to 5*sd. 3*sd is 99.7%
a = 0:0.1:0.9; %pruning values
number_points = size(a,2);
%% Plotting parameters
numericFontSize = 25;
axesFontSize = 30;
lineThickness = 2;
markLine = 1;
markSize = 12;
minv = [ 0, 0, -1, 0];
maxv = [ 1, 2, 1, 1];
n_bins = 200;
bin_dim = (maxv-minv) / n_bins;
%% Parameters and variables for symbolic calculation
syms u v w %symbolic variables
mu = [mean_value, 2*mean_value, 0, 0 ]; %values used to create the functions: single, sum, difference, absolute difference
va = [standard_deviation^2, 2*standard_deviation^2, 2*standard_deviation^2, 2*standard_deviation^2]; %values used to create the functions: single, sum, difference, absolute difference
mu_sym = zeros(1, size(mu,2)); %mean value obtained by symbolic integration
va_sym = zeros(1, size(va,2)); %variance obtained by symbolic integration
mu_bid_sym = zeros(1,2); %mean value obtained by symbolic integration
va_bid_sym = zeros(1,2); %variance obtained by symbolic integration
tol=0.99; %tolerance for the integration of the distribution
%% Printing vector
variable = char('x, y', 'x+y', 'x-y', '|x-y|'); %creates a matrix where each string is spread over a row, every character being an element of the matrix
x_axis_name = char ('w','Z_2','diff','Z_1');
y_axis_name = char ('f(w)','f(Z_2)','f(diff)','f(Z_1)');
figure_name = char ('Gaussian_initialPDF_prune0_1','Gaussian_sumPDF_prune0_1','Gaussian_diffPDF_prune0_1','Gaussian_absdiffPDF_prune0_1');
%% Sampling
for n = 1:number_points
%% Simulation samples
x_unp = normrnd(mean_value, standard_deviation, 1, N) .* (rand(1,N) > a(n));
y_unp = normrnd(mean_value, standard_deviation, 1, N) .* (rand(1,N) > a(n));
x = x_unp((x_unp+y_unp)~=0); %uses logical index to cut away the zero elements from the distributions when they have the same index in x and y (w_{ij}=w_{ji}=0)
y = y_unp((x_unp+y_unp)~=0); %uses logical index to cut away the zero elements from the distributions when they have the same index in x and y (w_{ij}=w_{ji}=0)
distr = [ x; x+y; x-y; abs(x-y)]; %vector of distributions
mean_val = [mean(x), mean(x+y), mean(x-y), mean(abs(x-y))]; %corresponding mean value
variance = [ var(x), var(x+y), var(x-y), var(abs(x-y))]; %corresponding variance
index_zero_x = find(x==0);
index_zero_y = find(y==0);
x_peak = [x(index_zero_x), x(index_zero_y)];
y_peak = [y(index_zero_x), y(index_zero_y)];
mean_peak = mean(x_peak+y_peak);
var_peak = var(x_peak+y_peak);
%% Single distributions
for i = 1:size(distr,1)
sample = minv(i):bin_dim(i):maxv(i);
%---SYMBOLIC
f1 = exp(-((u-mu(1))^2/(2*va(1)))) / sqrt(2*pi*va(1));
f2 = exp(-((u-mu(i))^2/(2*va(i)))) / sqrt(2*pi*va(i));
%Single distribution area check
area_f2 = double(int(f2,minv(i),maxv(i)));
if area_f2 > 1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif area_f2 < tol
f2 = f2 / area_f2;
end
%overall distribution
f = (2*a(n)/(1+a(n)))*f1 + ((1-a(n))/(1+a(n)))*f2;
area = int(f,minv(i),maxv(i));
%area check, normalization and sampling
if double(area) > 1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif double(area) < tol
f = f / double(area);
val = (((2*a(n)/(1+a(n)))*normpdf(sample,mu(1),sqrt(va(1))))+(((1-a(n))/(1+a(n)))*normpdf(sample,mu(i),sqrt(va(i)))/area_f2)) / double(area);
area = int(f,minv(i),maxv(i));
else
val = (((2*a(n)/(1+a(n)))*normpdf(sample,mu(1),sqrt(va(1))))+(((1-a(n))/(1+a(n)))*normpdf(sample,mu(i),sqrt(va(i)))/area_f2));
end
%mean and variance by integration - for the absolute difference they have to be different from the ones used to create the function
mu_sym(i) = int(f*u,minv(i),maxv(i));
va_sym(i) = int(f*(u-mu_sym(i))^2,minv(i),maxv(i));
%---SIMULATION
[counts,bin_location] = hist(distr(i,:),n_bins); %Note: bin_location==sample
bin_dim_hist = ( max(bin_location) - min(bin_location) ) / n_bins;
%print and plot
sprintf('Pruning a = %f. \nVariable: %s \nSimulation with N=%d pair connections: Mean %f. Variance %f. \nSymbolic calculation: Integral of the distribution with mean %f and variance %f: %f', a(n), variable(i,:), N, mean_val(i), variance(i), mu_sym(i), va_sym(i), double(area))
if n==1 || n==2 || n==6
figure;
bar(bin_location, counts/(sum(bin_dim_hist*counts))); %simulation --- Note: sum(bin_dim(i)*counts) is no longer equal to bin_dim*N because of the pruning
hold on
if i == 1
h = plot(sample,(1-a(n))*val); %theoretical
else
h = plot(sample,val); %theoretical
end
set(h, 'color', 'k', 'LineWidth', lineThickness);
set(gca,'fontsize',numericFontSize);
xlabel(x_axis_name(i, find(x_axis_name(i,:) ~= ' ')),'fontsize',axesFontSize);
ylabel(y_axis_name(i, find(y_axis_name(i,:) ~= ' ')),'fontsize',axesFontSize);
axis([minv(i)-0.5 maxv(i)+0.5 0 ceil(max( counts/(sum(bin_dim_hist*counts)) ))]);
colormap(gray);
caxis([-1 1.4]);
end
if (n == 2) && (i ~= 3)
print(gcf, '-depsc2', '-loose', figure_name(i, find(figure_name(i,:) ~= ' '))); % Print the figure in eps (first option) and uncropped (second object)
end
end
%% Joint distributions-bivariate gaussian
samplex = minv(2):bin_dim(2):maxv(2); %first distribution is the sum
sampley = minv(4):bin_dim(4):maxv(4); %second distribution is or the difference or the absolute difference
if n==1
%covariance and correlation from the simulation
covariance = sum((distr(2,:)-mean_val(2)).*(distr(4,:)-mean_val(4)))/N;
corr = covariance/(sqrt(variance(2))*sqrt(variance(4)));
sigma = [va(2), corr*sqrt(va(2))*sqrt(va(4)); corr*sqrt(va(2))*sqrt(va(4)), va(4)];
%---SYMBOLIC
g_un = exp( - (((v-mu(2))^2/(va(2))) + ((w-mu(4))^2/(va(4))) - 2*corr*(v-mu(2))*(w-mu(4))/(sqrt(va(2))*sqrt(va(4))) ) / (2*(1-(corr^2))) ) / ( 2*pi*sqrt (va(2)*va(4)*(1-(corr^2))) );
%g_un = exp(-((v-mu(2))^2/(2*va(2)))-((w-mu(4))^2/(2*va(4))))/(2*pi*sqrt(va(2)*va(4)));
area_bid = int((int(g_un,v,minv(2),maxv(2))),w,minv(4),maxv(4));
%area check, normalization and sampling
if double(area_bid)>1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif double(area_bid)<tol
g_un = g_un/double(area_bid);
[X1,X2] = meshgrid(samplex,sampley);
val = mvnpdf([X1(:) X2(:)], [mu(2),mu(4)], sigma)/double(area_bid); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
%area = int((int(g_un,v,minv(2),maxv(2))),w,minv(4),maxv(4));
else
[X1,X2] = meshgrid(samplex,sampley);
val = mvnpdf([X1(:) X2(:)], [mu(2),mu(4)], sigma); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
end
%mean and variance by integration - for the absolute difference they have to be different from the ones used to create the function
mu_bid_sym(1,1) = int((int(g_un*v,v,minv(2),maxv(2))),w,minv(4),maxv(4));
mu_bid_sym(1,2) = int((int(g_un*w,v,minv(2),maxv(2))),w,minv(4),maxv(4));
va_bid_sym(1,1) = int((int(g_un*(v-mu_bid_sym(1,1))^2,v,minv(2),maxv(2))),w,minv(4),maxv(4));
va_bid_sym(1,2) = int((int(g_un*(w-mu_bid_sym(1,2))^2,v,minv(2),maxv(2))),w,minv(4),maxv(4));
else
g2 = 0;
g = ((1-a(n))/(1+a(n)))*g_un + (2*a(n)/(1+a(n)))*g2;
[X1,X2] = meshgrid(samplex,sampley);
val = ((1-a(n))/(1+a(n)))*mvnpdf([X1(:) X2(:)], [mu(2),mu(4)], sigma)/double(area_bid); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
%area = int((int(g,v,minv(2),maxv(2))),w,minv(4),maxv(4));
end
%---SIMULATION
grid{1} = samplex;
grid{2} = sampley;
%[bidim_counts,Cell_location] = hist3([(distr(2,:))', (distr(i,:))'], grid);
[bidim_counts,Cell_location] = hist3([(distr(2,:))', (distr(i,:))'], [n_bins, n_bins]);
bin_dim_hist_1 = ( max(Cell_location{1}) - min(Cell_location{1}) ) / n_bins;
bin_dim_hist_2 = ( max(Cell_location{2}) - min(Cell_location{2}) ) / n_bins;
% %---PEAK DISTRIBUTION (without pruning)
% %simulation
% [bidim_counts_peak,Cell_location_peak] = hist3([(x_peak+y_peak)', (abs(x_peak-y_peak))'], grid);
% surf(Cell_location_peak{1},Cell_location_peak{2},(bidim_counts_peak/max(max(bidim_counts_peak)))'/(2*pi*va(2))); %(2*pi*va(2)) is the normalization from theory
% hold on
%
% %theory
% mu_peak = [0.5 0.5];
% va_peak = [(va(2)), 0; 0, (va(4))]; %covariance matrix
% X_peak = mvnrnd(mu_peak,va_peak,10000000);
% [counts_peak,Cell_peak] = hist3(X_peak, grid);
% mesh(Cell_peak{1},Cell_peak{2},(counts_peak)'/(sum(sum(bin_dim(2)*bin_dim(4)*counts_peak))));
% axis([0 1 0 1 0 10]);
%print and plot
sprintf('Pruning a = %f. \nVariables: %s, %s \nSimulation with N=%d pair connections: Mean %f, %f. Variance %f, %f. \nSymbolic calculation: Integral of the distribution with mean %f, %f and variance %f, %f: %f', a(n), variable(2,:), variable(4,:), N, mean_val(2), mean_val(4), variance(2), variance(4), mu_bid_sym(1,1), mu_bid_sym(1,2), va_bid_sym(1,1), va_bid_sym(1,2), double(area))
if n==1 || n==2 || n==6
figure;
mesh(Cell_location{1},Cell_location{2},bidim_counts'/(sum(sum(bin_dim_hist_1*bin_dim_hist_2*bidim_counts)))) %simulation
hold on
surf(samplex,sampley,val); %theoretical
set(gca,'fontsize',numericFontSize);
axis([0 2 0 1]);
%xlabel('Z_2','fontsize',axesFontSize);
%ylabel('Z_1','fontsize',axesFontSize);
%zlabel('f(Z_1,Z_2)','fontsize',axesFontSize);
view([130 14]);
colormap(gray);
caxis([0 140]);
end
if (n == 2)
print(gcf, '-depsc2', '-loose', 'Gaussian_jointPDF_prune0_1'); % Print the figure in eps (first option) and uncropped (second object)
end
end
%%%%%-----Checking
%Simulation. Verify that all the points of the distribution lie in the range [0,1]x[abs(x-y),2-abs(x-y)] --- Few points can lie outside, those that in the formers distributions x and y are outside [0,1]
k=0;
m=0;
for i=1:size(distr,2)
if and(((distr(2,i))>=(distr(4,i))),((distr(2,i))<=2-(distr(4,i))))
k=k+1;
else
m=m+1;
%display(i)
end
end
%Symbolic. Verify that the integral in the range [0,1]x[abs(x-y),2-abs(x-y)] is equal to 1
check = int((int(g_un,v,w,(2-w))),w,0,1);
%Print
sprintf('Checking the joint distribution f(x+y,|x-y|) in the definition interval [0,1]x[|x-y|,2-|x-y|]. \nSimulation. Number of points of the distribution outside: %d out of %f. \nSymbolic calculation. Area of the joint distribution: %f', m, N, double(check))