%% Symbolic calculation and numeric simulation for gaussian distriutions with no pruning
clear all
close all
%% Parameters
N = 10000000; %number of pair connections
mean_value = 0.5; %mean value of the gaussian distribution of connections
standard_deviation = 1/10; %sqrt of variance. With 1/10 we are considering up to 5*sd. 3*sd is 99.7%
%% Plotting parameters
numericFontSize = 25;
axesFontSize = 30;
lineThickness = 2;
markLine = 1;
markSize = 12;
minv = [ 0, 0, -1, 0];
maxv = [ 1, 2, 1, 1];
n_bins = 200;
bin_dim = (maxv-minv) / n_bins;
%% Parameters and variables for symbolic calculation
syms u v w %symbolic variables
mu = [mean_value, 2*mean_value, 0, 0 ]; %values used to create the functions: single, sum, difference, absolute difference
va = [standard_deviation^2, 2*standard_deviation^2, 2*standard_deviation^2, 2*standard_deviation^2]; %values used to create the functions: single, sum, difference, absolute difference
mu_sym = zeros(1, size(mu,2)); %mean value obtained by symbolic integration
va_sym = zeros(1, size(va,2)); %variance obtained by symbolic integration
mu_bid_sym = zeros(2); %mean value obtained by symbolic integration
va_bid_sym = zeros(2); %variance obtained by symbolic integration
tol=0.99; %tolerance for the integration of the distribution
%% Simulation samples
x = normrnd(mean_value, standard_deviation, 1, N);
y = normrnd(mean_value, standard_deviation, 1, N);
distr = [ x; x+y; x-y; abs(x-y)]; %vector of distributions
mean_val = [mean(x), mean(x+y), mean(x-y), mean(abs(x-y))]; %corresponding mean value
variance = [ var(x), var(x+y), var(x-y), var(abs(x-y))]; %corresponding variance
%% Printing vectors
variable = char('x, y', 'x+y', 'x-y', '|x-y|'); %creates a matrix where each string is spread over a row, every character being an element of the matrix
x_axis_name = char ('w','Z_2','diff','Z_1');
y_axis_name = char ('f(w)','f(Z_2)','f(diff)','f(Z_1)');
figure_name = char ('Gaussian_initialPDF_noprune','Gaussian_sumPDF_noprune','Gaussian_diffPDF_noprune','Gaussian_absdiffPDF_noprune');
%% Single distributions
for i = 1:size(distr,1)
sample = minv(i):bin_dim(i):maxv(i);
%---SYMBOLIC
f = exp(-((u-mu(i))^2/(2*va(i)))) / sqrt(2*pi*va(i));
area = int(f,minv(i),maxv(i));
%area check, normalization and sampling
if double(area) > 1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif double(area) < tol
f = f / double(area);
val = normpdf(sample, mu(i), sqrt(va(i))) / double(area);
area = int(f,minv(i),maxv(i));
else
val = normpdf(sample, mu(i), sqrt(va(i)));
end
%mean and variance by integration - for the absolute difference they have to be different from the ones used to create the function
mu_sym(i) = int(f*u,minv(i),maxv(i));
va_sym(i) = int(f*(u-mu_sym(i))^2,minv(i),maxv(i));
%---SIMULATION
[counts,bin_location] = hist(distr(i,:),n_bins); %Note: bin_location==sample
bin_dim_hist = ( max(bin_location) - min(bin_location) ) / n_bins;
%---PRINT & PLOT
sprintf('Variable: %s \nSimulation with N=%d pair connections: Mean %f. Variance %f. \nSymbolic calculation: Integral of the distribution with mean %f and variance %f: %f', variable(i,:), N, mean_val(i), variance(i), mu_sym(i), va_sym(i), double(area))
%double(area) converts sym to a variable double, so it calculates the value and it can be displayed. Otherwise I could also type vpa(area,6) directly on a sym object: it evaluates the numerical expression
figure(i);
bar(bin_location, counts/(sum(bin_dim_hist*counts))); %simulation --- Note: sum(bin_dim(i)*counts)==bin_dim*N
hold on
h = plot(sample,val); %theoretical
set(h, 'color', 'k', 'LineWidth', lineThickness);
set(gca,'fontsize',numericFontSize);
xlabel(x_axis_name(i, find(x_axis_name(i,:) ~= ' ')),'fontsize',axesFontSize);
ylabel(y_axis_name(i, find(y_axis_name(i,:) ~= ' ')),'fontsize',axesFontSize);
axis([minv(i)-0.5 maxv(i)+0.5 0 ceil(max( counts/(sum(bin_dim_hist*counts)) ))]);
colormap(gray);
caxis([-1 1.4]);
if i ~= 3
print(gcf, '-depsc2', '-loose', figure_name(i, find(figure_name(i,:) ~= ' '))); % Print the figure in eps (first option) and uncropped (second object)
end
end
%% Joint distributions-bivariate gaussian
for i = 3:4
samplex = minv(2):bin_dim(2):maxv(2); %first distribution is the sum
sampley = minv(i):bin_dim(i):maxv(i); %second distribution is either the difference or the absolute difference
%covariance and correlation from the simulation: the two variables are not independents
covariance = sum ((distr(2,:)-mean_val(2)).*(distr(i,:)-mean_val(i))) / N;
corr = covariance / (sqrt(variance(2))*sqrt(variance(i)));
sigma = [va(2), corr*sqrt(va(2))*sqrt(va(i)); corr*sqrt(va(2))*sqrt(va(i)), va(i)];
%---SYMBOLIC
f = exp( - (((v-mu(2))^2/(va(2))) + ((w-mu(i))^2/(va(i))) - 2*corr*(v-mu(2))*(w-mu(i))/(sqrt(va(2))*sqrt(va(i))) ) / (2*(1-(corr^2))) ) / ( 2*pi*sqrt (va(2)*va(i)*(1-(corr^2))) ); %bivariate depedent
%f = exp(-((v-mu(2))^2/(2*va(2)))-((w-mu(i))^2/(2*va(i))))/(2*pi*sqrt(va(2)*va(i))); %bivariate independent
area = int((int(f,v,minv(2),maxv(2))),w,minv(i),maxv(i));
%area check, normalization and sampling
if double(area) > 1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif double(area) < tol
f = f / double(area);
[X1,X2] = meshgrid(samplex,sampley); %Generates the grid from the two vectors
val = mvnpdf([X1(:) X2(:)], [mu(2),mu(i)], sigma) / double(area); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
area = int((int(f,v,minv(2),maxv(2))),w,minv(i),maxv(i));
else
[X1,X2] = meshgrid(samplex,sampley);
val = mvnpdf([X1(:) X2(:)], [mu(2),mu(i)], sigma); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
end
%mean and variance by integration - for the absolute difference they have to be different from the ones used to create the function
mu_bid_sym(i-2,1) = int((int(f*v,v,minv(2),maxv(2))),w,minv(i),maxv(i));
mu_bid_sym(i-2,2) = int((int(f*w,v,minv(2),maxv(2))),w,minv(i),maxv(i));
va_bid_sym(i-2,1) = int((int(f*(v-mu_bid_sym(i-2,1))^2,v,minv(2),maxv(2))),w,minv(i),maxv(i));
va_bid_sym(i-2,2) = int((int(f*(w-mu_bid_sym(i-2,2))^2,v,minv(2),maxv(2))),w,minv(i),maxv(i));
%---SIMULATION
grid{1} = samplex;
grid{2} = sampley;
[bidim_counts,Cell_location] = hist3([(distr(2,:))', (distr(i,:))'], grid);
%---PRINT AND PLOT
sprintf('Variables: %s, %s \nSimulation with N=%d pair connections: Mean %f, %f. Variance %f, %f. \nSymbolic calculation: Integral of the distribution with mean %f, %f and variance %f, %f: %f', variable(2,:), variable(i,:), N, mean_val(2), mean_val(i), variance(2), variance(i), mu_bid_sym(i-2,1), mu_bid_sym(i-2,2), va_bid_sym(i-2,1), va_bid_sym(i-2,2), double(area))
%double(area) converts sym to a variable double, so it calculates the value and it can be displayed. Otherwise I could also type vpa(area,6) directly on a sym object: it evaluates the numerical expression
figure;
mesh(Cell_location{1},Cell_location{2},bidim_counts'/(sum(sum(bin_dim(2)*bin_dim(i)*bidim_counts)))) %simulation
hold on
surf(samplex,sampley,val); %theoretical
set(gca,'fontsize',numericFontSize);
%xlabel('Z_2','fontsize',axesFontSize);
%ylabel('Z_1','fontsize',axesFontSize);
%zlabel('f(Z_1,Z_2)','fontsize',axesFontSize);
axis([0 2 0 1]);
colormap(gray);
caxis([-1 20]);
view([130 14]);
if i == 4
print(gcf, '-depsc2', '-loose', 'Gaussian_jointPDF_noprune'); % Print the figure in eps (first option) and uncropped (second object)
end
end
%% Check the validity of the restriction in [0,1]
%Verify that all the points of the distribution lie in the range [0,1]x[abs(x-y),2-abs(x-y)] and that the integral in this range is 1
%Few points can lie outside, those that in the initial distributions x and y are outside [0,1]
%---SIMULATION
k=0; %counts the points inside
m=0; %counts the points outside
for i=1:size(distr,2)
if and(((distr(2,i))>=(distr(4,i))),((distr(2,i))<=2-(distr(4,i))))
k=k+1;
else
m=m+1;
end
end
%---SYMBOLIC
area_check = int((int(f,v,w,(2-w))),w,0,1);
%---PRINT
sprintf('Checking the joint distribution f(x+y,|x-y|) in the definition interval [0,1]x[|x-y|,2-|x-y|]. \nSimulation. Number of points of the distribution outside: %d out of %f. \nSymbolic calculation. Area of the joint distribution: %f', m, N, double(area_check))