function npd = ksize(npd,type,varargin)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% KSIZE Find optimal kernel size for kernel density estimates
%
% Q=KSIZE(P) returns a KDE with the same points and type as "P", but whose
% bandwidth has been determined by one of a number of data-based selection methods.
% Optional arguments:
% KSIZE(P,'type' [,options]) where 'type' is one of:
% 'unif' or 'lcv' -- (default) spherical, uniform likelihd cross-valid. search (1d)
% 'local' [,P0] -- lcv optimization, preprocessed to be proportional to P0 [1xNpts]
% (Default: P0 = sqrt(N)-nearest nbr distance of each point)
% 'rot' -- (fast) "Rule of Thumb" asymptotic estimator (Silverman)
% 'msp' -- (fast) "Maximal Smoothing Principle" (close to ROT) (Terrel)
% 'hall' or 'hsjm' -- plug-in estimator of Hall,Sheather,Jones,Marron (91)
% 'maxmin' -- ad-hoc method: sqrt of max of nearest-nbr distances
%
% Ending the string with 'p' (e.g. 'lcvp') causes the data to be normalized by its
% variance before finding the bandwidth (and transformed back afterwards)
% This is useful when the scales of the dimensions are very different.
%
% See also: kde, adjustBW
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Copyright (C) 2003 Alexander Ihler; distributable under GPL -- see README.txt
Nd = npd.D; Np = npd.N;
if (Np==1) npd = kde(getPoints(npd),0,getWeights(npd),getType(npd)); end;
if (nargin<2) type='lcv'; end;
if (isa(type,'double')), ks = type; % Passed a value, not a type?
if (length(ks)==1) ks = ks + zeros(Nd,1); end;
npd = kde(getPoints(npd),type,getWeights(npd),getType(npd));
return;
end;
type = lower(type);
if (type(end)=='p')
stddv = covar(npd,0); % pre-equalize variances for
npd = rescale(npd, 1./stddv); % any 1-d calculations
end;
switch lower(type) %%%% KERNEL SIZE SELECTION METHODS %%%%%
%%%% LEAST-SQUARES CROSS-VALIDATION %%%%
case {'lscv','lscvp'}, % least-squares cross-validation
ks = ksizeLSCV(npd);
%%%% LIKELIHOOD CROSS-VALIDATION BASED %%%%
case {'lcv','unif','lcvp','unifp'}, % (uniform) likelihood cross-validation
[minm,maxm] = neighborMinMax(npd);
npd = kde(getPoints(npd),(minm+maxm)/2,getWeights(npd),getType(npd));
ks = golden(npd,@nLOO_LL,2*minm/(minm+maxm),1,2*maxm/(minm+maxm),1e-2);
ks = ks * (minm+maxm)/2;
case {'local','localp'}, % local likelihood cross-val
if (nargin < 3)
[prop,minm,maxm] = neighborDistance(npd,sqrt(getNpts(npd)));
else prop = varargin{1}; [minm,maxm] = neighborMinMax(npd);
end;
prop = prop / mean(prop);
npd = kde(getPoints(npd),(minm+maxm)/2 * prop,getWeights(npd),getType(npd));
ks = golden(npd,@nLOO_LL,2*minm/(minm+maxm),1,2*maxm/(minm+maxm),1e-2);
ks = ks * (minm+maxm)/2 * prop;
%%%% STANDARD-DEVIATION BASED %%%%
case {'rot'}, % "Rule of Thumb" (stddev-based)
ks = ksizeROT(npd);
case {'msp'}, % "Maximal Smoothing Principle"
ks = ksizeMSP(npd); % same as ROT but different constants
%%%% "PLUG-IN" BASED ON APPROX MISE ASYMPTOTICS %%%%
case {'hall','hsjm'},
ks = ksizeHall(npd);
case {'maxmin'}, % Maximum of the minimum neighbor distance
[nn,prop] = knn(npd,getPoints(npd),1+1);
ks = sqrt(max(prop));
end;
if (type(end)=='p')
ks = repmat(ks,[Nd,1]./[size(ks,1),1]);
ks = ks .* repmat(stddv,size(ks)./size(stddv)); % fix up prev. equalization
npd = rescale(npd, stddv); % and changed npd points
end;
npd = kde(getPoints(npd),ks,getWeights(npd),getType(npd));
function H = nLOO_LL(alpha,npd)
if (nargin < 2) error('ksize: LOO_LL: Error! Too few arguments'); end;
if (npd.type == 0) alpha = alpha.^2; end;
npd.bandwidth = npd.bandwidth * alpha;
H = entropy(npd,'lvout');
npd.bandwidth = npd.bandwidth / alpha;
function [prop,minm,maxm] = neighborDistance(npd,Nnear)
% if (exist('knn'))
[nn,prop] = knn(npd,getPoints(npd),round(Nnear)+1);
[minm, maxm] = neighborMinMax(npd);
% else
% points = getPoints(npd);
% [N1,N2] = size(points);
% X1 = repmat(points,[1,1,N2]);
% X2 = permute(repmat(points,[1,1,N2]),[1,3,2]);
% dist= permute(sum( (X1-X2).^2,1),[2,3,1]);
% dist= sqrt(sort(dist,1));
% prop= dist(round(Nnear),:);
% [minm,maxm] = neighborMinMax(npd);
% end;
function [minm,maxm] = neighborMinMax(npd)
maxm = sqrt(sum( (2*npd.ranges(:,1)).^2) );
minm = min(sqrt(sum( (2*npd.ranges(:,1:npd.N-1)).^2 ,1)),[],2);
minm = max(minm,1e-6);