function [weightedMean,weightedStdOfMean,weightedStdOfSample] = weightedStats(data, weightsOrSigma,sw)
%wStats calculates weighted statistics (mean, stdev of mean) for a list of inputs with corresponding weights or {std}
%
%SYNOPSIS [weightedMean,weightedStd] = weightedStats(data, weightsOrSigma,sw)
%
%INPUT data: vector of imput values.
%
% weightsOrSigma: weights or standardDeviations of the input data
% sw (opt): switch, either 'w' or {'s'}
%
% -> if either data or weights are matrices, the computation is done
% column-wise
%
%OUTPUT weightedMean: mean of the data weighted with weights (rowVector)
% weightedStdOfMean: sigma1 = sqrt[sum_i{(yi-mw)^2/sigmai^2}/((n-1)*sum_i{1/sigmai^2})]
% which is the general weighted std OF THE MEAN (not the sample, i.e. it is divided by sqrt(n))
% weightedStdOfSample = weightedStdOfMean * sqrt(n)
%
% CAUTION: according to www-gsi-vms.gsi.de/eb/people/wolle/buch/error.ps, if
% the uncertainity of the data points is much larger than the
% difference between them, sigma1 underestimates the "true" sigma.
% Hence, sigma2 = sqrt[1/sum_i{1/sigmai^2}] should be used. In
% general, the true sigma is to be max(sigma1,sigma2)
%
%reference: Taschenbuch der Mathematik, p. 815
%
%c: 06/03 jonas
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%test input: count input arguments
if nargin < 2 | isempty(weightsOrSigma) | isempty(data)
error('not enough or empty input arguments!')
end
weights = weightsOrSigma;
%test input: data size
sDat = size(data);
sIS = size(weights);
if any(sDat ~= sIS)
%one is a matrix and the other a vector, or bad user input
if sDat(1) ~= sIS(1)
if sDat(1) == sIS(2) & sDat(2) == sIS(1)
%bad user input: badly arranged vectors: make col-vectors
if sDat(1) == 1
data = data';
else
weights = weights';
end
else
%bad user input: fatal
error('bad input data size: if you want to specify a vector and a matrix for input, use a column-vector!')
end
else
%one's a vector, the other a matrix
if sDat(2) == 1
%make data a matrix
data = data*ones(1,sIS(2));
elseif sIS(2) == 1
%make weights a matrix
weights = weights*ones(1,sDat(2));
else
%bad input
error('bad input data size: specify either two matrices of equal size or a matrix and a vector or two vectors')
end
end
else
if sDat(1) == 1
%make both col vectors
data = data';
weights = weights';
end
end
%get # of data points
numRows = size(data,1);
numCols = size(data,2);
%calculate weights if necessary
if nargin == 2 | ~(strcmp(sw,'w')|strcmp(sw,'s'))
sw = 's';
end
if strcmp(sw,'s')
%w = 1/sigma^2
if any(weights == 0)
warning('WEIGHTEDSTATS:SigmaIsZero','At least one sigma == 0; set to eps');
weights = max(weights,eps);
end
%assign weight 1 to the measurement with smallest error
weights = (repmat(min(weights,[],1),numRows,1)./weights).^2;
end
%make sure the weights are positive
weights = abs(weights);
%calc weightedMean : each dataPoint is multiplied by the corresponding weight, the sum is divided
%by the sum of the weights
sumWeights = nansum(weights,1);
weightedMean = nansum(weights.*data,1)./sumWeights;
%---calc weightedStd---
squareDiffs = (data-repmat(weightedMean,numRows,1)).^2;
weightedSSQ = nansum(squareDiffs.*weights,1);
switch sw
case 'w'
%weighted mean : each squared difference from mean is weighted and divided by
%the number of non-zero weights http://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weightsd.pdf
%get divisor (nnz is not defined for matrices)
for i=numCols:-1:1
% set NaN-weights to 0
nanWeights = isnan(weights(:,i));
weights(nanWeights,i) = 0;
nnzw = nnz(weights(:,i));
divisor(1,i) = (nnzw-1)/nnzw*sumWeights(i);
end
%assign output
sigma = sqrt(weightedSSQ./divisor);
weightedStdOfSample = sigma;
weightedStdOfMean = sigma/sqrt(nnzw);
case 's'
%calculate sigma1 = sqrt[sum_i{(yi-mw)^2/sigmai^2}/((n-1)*sum_i{1/sigmai^2})]
%which is the general weighted std OF THE MEAN (not the sample, i.e. it is divided by sqrt(n))
%
%CAUTION: according to www-gsi-vms.gsi.de/eb/people/wolle/buch/error.ps, if
%the uncertainity of the data points is much larger than the
%difference between them, sigma1 underestimates the "true" sigma.
%Hence, sigma2 = sqrt[1/sum_i{1/sigmai^2}] should be used. In
%general, the true sigma is to be max(sigma1,sigma2)
%sigma1. Correct number of observations
numRows = sum(~isnan(data) & ~isnan(weights),1);
divisor = (numRows-1).*sumWeights;
sigma1 = sqrt(weightedSSQ./divisor);
%sigma2
%sigma2 = sqrt(1/sumWeights);
%assign output
%weightedStdOfMean = max(sigma1,sigma2);
weightedStdOfMean = sigma1;
weightedStdOfSample = sigma1.*sqrt(numRows);
end