function [Abasis, Abasisi, Asubs] = getLinearIndependentCell(A,ignore_constant_shift)
% [Abasis, Abasisi, Asubs] = getLinearIndependentCell(A,ignore_constant_shift)
%
%
% Purpose: Takes in a matrix or cell array and identifies a
% subset of independent columns. It also clusters subsets of columns
% that are dependent upon each other.
%
%
% Details: If input is a matrix, behavior is exacly the same as
% getLinearIndependent. That is, for numeric inputs, "dependence" is
% determined based on linear dependence (e.g. If col2 = 2*col1, then col1 and
% col2 would be considered dependent. For non-numeric inputs, columns are
% automatically considered independent unless they are equivalent (see
% examples).
%
% Usage:
% [Abasis, Abasisi, Asubs] = getLinearIndependentCell(A)
% [Abasis, Abasisi, Asubs] = getLinearIndependentCell(A,ignore_constant_shift)
%
% Inputs:
% A: Input matrix or cell array. If a matrix is supplied, behavor is the
% same as getLinearIndependent.
%
% Inputs (Optional):
% ignore_constant_shift: Flag (true / false [default]) for ignoring a constant term
% in determining independence (e.g. if col2 = 10-col1, col1 and col2 will
% be grouped together if true; otherwise separately if false).
%
% Outputs:
% Abasis: The set of linearly independent basis vectors in A
%
% Abasisi: Index locations of original basis vectors in A, such
% that Abasis = A(:,Abasisi).
%
% Asub: Cell array with one element for each basis vector in A. Each cell
% in Asub identifies clusters of columns in the original matrix A that
% share linear dependence.
%
% % Example:
%
% A = {'a','b','c';'a','b','c';'d','e','f';1,2,3;1,2,5;5,3,@plot;5,3,@plot;2,4,6}';
%
% % A =
% % 'a' 'a' 'd' [1] [1] [ 5] [ 5] [2]
% % 'b' 'b' 'e' [2] [2] [ 3] [ 3] [4]
% % 'c' 'c' 'f' [3] [5] @plot @plot [6]
%
% [Abasis, Abasisi, Asubs] = getLinearIndependentCell(A)
%
% % Results:
% %
% % % % % % % % % % Abasis - the subset of basis columns % % % % % % % % %
% %
% % horzcat(Abasis{:})
% % 3x5 cell array
% % [1] [1] 'a' 'd' [ 5]
% % [2] [2] 'b' 'e' [ 3]
% % [3] [5] 'c' 'f' @plot
% %
% % % % % Abasisi - the indices of these columns in the original matrix % % %
% %
% % Abasisi =
% % 1x5 cell array
% % [4] [5] [1] [3] [6]
% %
% % % % % % % % % % Asubs - subsets ofl linearly dependent columns % % % % % % % % %
% %
% % Asubs =
% % 1x5 cell array
% % [1x2 double] [5] [1x2 double] [3] [1x2 double]
% % Asubs{1} : [4, 8]
% % Asubs{2} : [5]
% % Asubs{3} : [1, 2]
% % Asubs{4} : [3]
% % Asubs{5} : [6, 7]
%
%
% Submodules: getLinearIndependent, uniqueCellGeneralized, iscellnum
%
% Author: David Stanley, Boston University, 2017
%
% See also: getLinearIndependent, rref, unique
%%Code%%
if nargin < 2
ignore_constant_shift = false;
end
% Make sure working with a cell array initially
if isnumeric(A); A = num2cell(A); end
% Identify all columns that are numeric, cell str, or others
sz = size(A);
numeric_cols = false(1,sz(2));
cellstr_cols = false(1,sz(2));
unknown_cols = false(1,sz(2));
for j = 1:sz(2)
if iscellnum(A(:,j)); numeric_cols(j) = true;
elseif iscellstr(A(:,j)); cellstr_cols(j) = true;
else; unknown_cols(j) = true;
end
end
% ID clustering within numeric columns. Check for any form of linear
% dependence
if any(numeric_cols)
Anum = cell2mat(A(:,numeric_cols));
[~,~,covaried1] = getLinearIndependent(Anum,ignore_constant_shift);
for i = 1:length(covaried1); covaried1{i} = convert_submat_indices_to_fullmat_indices(covaried1{i},find(numeric_cols)); end
else
covaried1 = {};
end
% ID clustering within non-numeric columns
ind = cellstr_cols | unknown_cols; % The getLICellOnly function can handle any type of content (not just strings); it relies on the isequal() function to determine equality
if any(ind)
Aother = A(:,ind);
covaried2 = getLICellOnly(Aother);
for i = 1:length(covaried2); covaried2{i} = convert_submat_indices_to_fullmat_indices(covaried2{i},find(ind)); end
else
covaried2 = {};
end
Asubs=vertcat(covaried1(:),covaried2(:))';
Abasisi = {};
Abasis = {};
for i = 1:length(Asubs)
Abasisi{i} = Asubs{i}(1);
Abasis{i} = A(:,Abasisi{i});
end
end
% function covaried = idCovariedNumerics_Princomp(vary_numerics)
% % This method needs some modification to get it working. I believe
% % that using reduced row-echelon form will be simpler.
% [coef,score,latent] = princomp(zscore(vary_numerics));
%
% % Identify independent principle components in vary_params
% thresh = 0.01;
% coef = abs(coef) > thresh; % Threshold columns of vary_params for being governed by each principle component. Each column in coef denotes a principle component, and denotes which columns in vary_params are associated with that pc (1 for yes, 0 for no)
% latent = abs(latent) > thresh;
%
% pc = find(latent);
%
% covaried = cell(1,length(pc));
% for i = 1:length(pc)
% covaried{i} = find(coef(:,i));
% end
%
% end
function inds_out = convert_submat_indices_to_fullmat_indices(inds_in,mapping)
inds_out = mapping(inds_in);
end
function covaried = getLICellOnly(vary_cell)
A = nestCellColumns(vary_cell);
[C,ia,ic] = uniqueCellGeneralized(A);
covaried = cell(1,length(C));
for i = 1:length(C)
covaried{i} = find(ic == i);
end
end
function out = nestCellColumns(in)
if ~ismatrix(in); error('input must be of size NxM'); end
if ~iscell(in); error('input must be a cell'); end
sz = size(in);
for j = 1:sz(2)
out{j} = in(:,j)';
end
end