function a_ranked_db = rankMatching(db, crit_db, props)
% rankMatching - Create a ranking db of row distances of db to given criterion db.
%
% Usage:
% a_ranked_db = rankMatching(db, crit_db, props)
%
% Parameters:
% db: A tests_db to rank.
% crit_db: A tests_db object holding the match criterion tests and stds.
% props: A structure with any optional properties.
% limitSTD: Truncate error values at this many STDs.
% tolerateNaNs: Multiplied by 3xSTD to replace NaN values. 0 means
% to skip NaNs in the distance calculation, scaling sum of
% errors by number of non-NaN entries. Negative values
% accepted; -1 means -3xSTDs error (default=+1).
% testWeights: Structure array associating tests and multiplicative weights.
% restoreWeights: Reverse the testWeights application after
% calculating distances.
% topRows: If given, only return this many of the top rows.
% useMahal: Use the Mahalonobis distance from the covariance
% matrix in crit_db.
%
% Returns:
% a_ranked_db: A ranked_db object.
%
% Example:
% Select a target row from mesured distribution (e.g., experimental data):
% >> a_crit_db = matchingRow(exp_db, 12);
% Rank another database by comparing to selected row:
% >> a_ranked_db = rankMatching(orig_db, a_crit_db);
% Look at top ranked rows:
% >> displayRows(a_ranked_db(1:5, :))
%
% Description:
% The crit_db parameter can be created with the matchingRow
% method. TestWeights modify the importance of each measure. Rows containing
% NaNs can be removed using noNaNRows before calling rankMatching.
% Warning: existing RowIndex column in db will be replaced with the
% ranking RowIndex used for joinOriginal.
%
% See also: matchingRow, joinOriginal, tests_db, noNaNRows
%
% $Id$
%
% Author: Cengiz Gunay <cgunay@emory.edu>, 2004/12/08
% Copyright (c) 2007-14 Cengiz Gunay <cengique@users.sf.net>.
% This work is licensed under the Academic Free License ("AFL")
% v. 3.0. To view a copy of this license, please look at the COPYING
% file distributed with this software or visit
% http://opensource.org/licenses/afl-3.0.php.
if ~ exist('props', 'var')
props = struct([]);
end
% Initialize row indices and delete existing RowIndex column
if isfield(db.col_idx, 'RowIndex')
db = delColumns(db, 'RowIndex');
% Using the existing RowIndex causes bug with all subsequent
% joinOriginal calls
%row_db = onlyRowsTests(db, ':', 'RowIndex');
%row_index = row_db.data;
end
row_index = (1:dbsize(db, 1))';
crit_tests = fieldnames(crit_db.col_idx);
if isa(crit_db, 'params_tests_db')
% If params_tests_db ignore params
crit_tests = {crit_tests{(get(crit_db, 'num_params') + 1):dbsize(crit_db, 2)}};
end
itemIndices = strmatch('ItemIndex', crit_tests);
% Strip out the NeuronId, RowIndex, and ItemIndex columns from criterion db
crit_tests = setdiff(crit_tests, {'NeuronId', 'RowIndex', crit_tests{itemIndices}});
% Use only non-NaN values available in the criterion
crit_row_db = onlyRowsTests(crit_db, 1, crit_tests);
nonnans = ~ isnan(crit_row_db.data);
crit_tests = crit_tests(nonnans);
% Vectorize diff over cols and do the diff^2
%####
% Replicate 1st row of crit_db
first_row = onlyRowsTests(crit_db, 1, crit_tests);
% Weigh according to 2nd row of crit_db
second_row = onlyRowsTests(crit_db, 2, crit_tests);
second_row_data = second_row.data;
if isfield(props, 'testWeights')
tests_weights = props.testWeights;
elseif isfield(crit_db.props, 'testWeights')
tests_weights = crit_db.props.testWeights;
end
if exist('tests_weights', 'var')
[weighted_tests_names weights_order] = ...
intersect(fieldnames(tests_weights), crit_tests);
if ~ isempty(weighted_tests_names)
weights_cell = struct2cell(tests_weights);
weights_tests = tests2cols(second_row, weighted_tests_names);
second_row_data(weights_tests) = ...
second_row_data(weights_tests) ./ cell2mat(weights_cell(weights_order)');
end
end
% Filter relevant columns, subtract from db and weight
% (do all at once without using temporary variables to save memory)
% Measure differences
wghd_data = get(onlyRowsTests(db, ':', crit_tests), 'data') - (ones(dbsize(db, 1), 1) * first_row.data);
% Replicate STD row for all
second_row_matx = ones(dbsize(db, 1), 1) * second_row_data;
% Look for NaN & Inf values
nans = isnan(wghd_data) | isinf(wghd_data);
% penalize NaNs by replacing them with 3 (meaning 3 STDs difference).
wghd_data(nans) = getFieldDefault(props, 'tolerateNaNs', 1) * 3 * second_row_matx(nans);
% if NaNs were ignored by skipping, count the non-NaN values to normalize
% the SS below.
if isfield(props, 'useMahal')
% Use Mahalonobis distance that factors in the covariations between measures
wghd_data = wghd_data * inv(get(onlyRowsTests(crit_db.props.cov, crit_tests, crit_tests), 'data'));
else
% normalized scale-invariant distance by dividing with STDs
% (equivalent to Mahalonobis distance if measures were independent)
wghd_data = wghd_data ./ second_row_matx;
end
if isfield(props, 'limitSTD')
% limit any measure to this many STDs max
exceedingSTDs = abs(wghd_data) > props.limitSTD;
wghd_data(exceedingSTDs) = sign(wghd_data(exceedingSTDs)) * props.limitSTD;
end
% Sum of absolute error: distance measure
ss_data = abs(wghd_data);
if ~ isfield(props, 'tolerateNaNs') || props.tolerateNaNs ~= 0
ss_data = sum(ss_data, 2) ./ size(ss_data, 2);
else
ss_data = sum(ss_data, 2) ./ sum(~nans, 2); % Sum distances and take average of non-NaNs
end
% clear those no longer needed
clear second_row_matx;
% restore original measure errors before weighting
if isfield(props, 'restoreWeights') && exist('tests_weights', 'var')
if ~ isempty(weighted_tests_names)
wghd_data(:, weights_tests) = ...
wghd_data(:, weights_tests) ./ (ones(size(wghd_data, 1), 1) * ...
cell2mat(weights_cell(weights_order)'));
end
end
% Ignore NaN rows (note: there must be non NaN rows after the above anyway!)
nans = isnan(ss_data) | isinf(ss_data);
ss_data = ss_data(~nans);
row_index = row_index(~nans);
wghd_data = wghd_data(~nans, :);
% put all into one variable to free the memory for the next step
% Sort increasing with distances
wghd_data = sortrows([wghd_data, ss_data, row_index], size(wghd_data, 2) + 1);
% Prune rest, if requested
if isfield(props, 'topRows')
wghd_data = wghd_data(1:props.topRows, :);
end
% Create a ranked_db with distances and row indices, sorted with distances
a_ranked_db = ranked_db(wghd_data, ...
{crit_tests{:}, 'Distance', 'RowIndex'}, db, crit_db, ...
[ lower(get(db, 'id')) ' ranked to ' ...
lower(get(crit_db, 'id')) ], props);