function [obj2, ro] = subset(obj,varargin)
% Purpose: get subset of data based on indicies for numeric or regular
%          expression for cellstring. also gets used by valSubset to index numerics 
%          based on values instead of indicies.
%
% Inputs:
%   varargin: each argument corresponds to each axis.
%       1) [] or ':' for all indicies
%       2) regular expression string for cellstring axis values
%       3) numeric array or cellnum of indicies for numeric or cellnum axis
%          values
%   Optional: 'numericsAsValuesFlag' - see MDD.valSubset
%
% Outputs:
%   obj2: object with subset of data
%   ro:  if regular expressions are used, contains regular expressions and 
%        results of regexp 'start' indicies.

debug_mode = false;

% Check for numericsAsValuesFlag
if ischar(varargin{end}) && strcmp(varargin{end}, 'numericsAsValuesFlag')
    numericsAsValuesFlag = true; % tells subset to use numerics as values
    varargin(end) = []; % remove string
%     nargin = nargin - 1; % reduce num input arguments
else
    numericsAsValuesFlag = false;
end


% Get params to validate that "selection" is the right size
selection = varargin(:);

% First, make sure that number of selections supplied matches the number of 
% dimensions of obj. (E.g. length(selection) == ndims(obj) must be true) 
[obj, selection] = fix_selection(obj,selection,numericsAsValuesFlag);    % Note: In certain circumstances, this function modifies obj and obj.data!

% Now that selection is properly formatted, call the core subset function
[obj2, ro] = subset_core(obj,selection, numericsAsValuesFlag);

if debug_mode
    warning('Implement this');
end

end



function [obj, selection] = fix_selection(obj,selection,numericsAsValuesFlag)
% This function makes sure that the number of selections supplied by the
% user matches the number of axes in obj. If not, they do not match, it
% attempts to correct (e.g. thorugh assuming linear indices) or throws an
% error if this fails.

Ns = length(selection);
Na = length(obj.axis_pr);
Nd = ndims(obj.data_pr);

% Handle vector data
if Ns == 1 && isvector(obj.data_pr)
    if isrow(obj.data_pr)
        selection(1:2) = {[], selection{1}};
    else % iscolumn
        selection(1:2) = {selection{1}, []};
    end
end

% Fix selection if improperly specified
Ns = length(selection);
if Ns < Na && Ns >= Nd              %     Nd <= Ns < Na     (more axes than selections, but still enough to fully query obj.data)
    % Fill out selection with empties if too short. For example, say
    % size(obj.data_pr) is MxNx1x1. The user might enter a selection with
    % length(selection) = 2, such as selection = {[A],[B]}. In this
    % case, we convert it to selection = {[A],[B],[],[]}.
    selection2 = repmat({[]},1,Na);
    selection2(1:Ns) = selection;
    selection = selection2;
    clear selection2


elseif Ns > Na                  % Na < Ns       (Ns too large)
    % Trim back selection if too long. If size(obj.data_pr) is MxN,
    % it is okay for selection to be {5,3,Y,Z} as long as Y and Z
    % are either empty (:) or ones. If they are anything else,
    % return error!
    % Make sure extra selection entries are either "1" or []
    selection_extra = selection(Na+1:end);
    are_empties = cellfun(@isempty,selection_extra);
    are_ones = cellfun(@(s) isequal(s,1), selection_extra);
    are_colons = strcmp(selection_extra,':');
    if ~all(are_empties | are_ones | are_colons)      % If any of the extra selections are NOT either empty or one's...
        error(['Index exceeds dimensions of ' class(obj) '.data']);
    end
    selection = selection(1:Na);
elseif (Ns < Nd)            % Fewer selections than there are dimensions in obj.data. In this case, use linear indexing.
    % In this case we will assume that the
    % LAST selection supplied is a linear index and represents all
    % remaining dimensions. This is how MATLAB handles
    % linear indexing of normal matrices as well. For example, try the
    % following: 
    %   temp = 1:125;
    %   temp = reshape(temp,[5,5,5]);
    %   temp(5,5,2)
    %   temp(5,10)
    %   temp(50)
    % MDD will follow this behavior.
    
    % First, run obj.subset() as normal for everything but the LAST
    % selection supplied. For the last selection and the remaining unspecified
    % dimensions, take everything. We will deal with these later. 
    % If Ns == 1, then just keep obj as is.
    if Ns > 1
        selection_part = repmat({':'},1,Na);
        selection_part(1:Ns-1) = selection(1:Ns-1);
        [obj, ro] = subset_core(obj,selection_part, numericsAsValuesFlag);
        
        % Mark the entries in selection we've already completed with selection_part
        % with a colon operators, since they are done.
        selection(1:Ns-1) = repmat({':'},1,Ns-1);
    else
        % Unchanged
    end
    
    sl = selection{Ns};    % The linear portion of selection
    
    % Now, we deal with the linear index portion of the selection. Two
    % cases: 1) sl is numeric and numericsAsValuesFlag is false, in which 
    % case we will use linear indexing.
    % 2) Otherwise, we just replicate sl and use it for all remaining axes
    % (this might error!)
    if (isnumeric(sl) || islogical(sl)) && ~numericsAsValuesFlag
        
        sz_dat = size(obj.data_pr);
        Ndat = numel(obj.data_pr);
        dim_dat = ndims(obj.data_pr);
        
        inds = linearize_inds(selection,sz_dat,Ndat);
        
        % Get rid of all except the chosen data
        if iscell(obj.data_pr)
            data_new = cell(Ndat,1);
        else
            data_new = zeros(Ndat,1);
        end
        data_new(inds) = obj.data_pr(inds);
        data_new = reshape(data_new,sz_dat);
        obj.data_pr = data_new;         % data_new will have the same dimensions as obj.data
                                        % But will have zeros / empties
                                        % for all entries except the selected data.
                                        
        % Finally, figure out the new set of subscripts that are still used in
        % obj.data_pr. Select only these.
        if islogical(sl); sl = find(sl); end     % (ind2sub, later, produces undesired behavior when feeding logical indices...)
        [subs{Ns:Nd}] = ind2sub(sz_dat(Ns:Nd),sl);       

        selection_new = repmat({':'},1,Na);     % Take everything by default
        for i = Ns:Nd
            selection_new{i} = unique(subs{i}); % For the linearized dimensions, figure out which indices to keep; drop everything else.
        end

        selection = selection_new;
        
    else 
        selection_new = repmat({':'},1,Na);     % Take everything by default
        selection_new(Ns:Nd) = repmat({sl},1,Nd-Ns+1);              % Use selection(end) for all remaining unspecified dimensions.
        %warning(['Number of inputs supplied is less than dimensionality of ' class(obj) '.data and selection(end) cannot be linearized.']);
        %fprintf(['Assuming all remaining inputs are equal to selection(end) = ' num2str(sl) '\n']);
        selection = selection_new;
    end

end


% If Ns is still wrong dimensions, return error
Ns = length(selection);
if Ns ~= Na
    error(['Number of inputs must match dimensionality of ' class(obj) '.data']);
end


end


function [selection, ro] = selection2sub(obj,selection,numericsAsValuesFlag)
% Each cell in selection can contain strings, numerics, logical indices,
% etc. This converts them all to numerical subscripts (e.g. subscripts
% as defined in MATLAB's ind2sub function)

ro = {};

Ns = length(selection);
selection0 = selection;         % Store original selections

% Replace any ':' entries in selection with []. Empty
% entries code for taking all entries along a dimension; ':' is
% an alias for this.
for i = 1:length(selection)
    if ischar(selection{i})
        if strcmp(selection{i},':')
            selection{i} = [];
        end
    end
end


axClasses = getclass_obj_axis_values(obj);

% Convert selection to index if using numeric values
if numericsAsValuesFlag
    for i = 1:Ns
        if isnumeric(selection{i})
            thisAxVals = obj.axis_pr(i).values;
            if strcmp('cellnum', axClasses{i})
                thisAxVals = [thisAxVals{:}]; % convert to numeric array
            end
            if ~isempty(selection{i})
                % Find which axis value intersects with the values provided
                % in selection.
                [~, temp] = intersect(thisAxVals, selection{i});
                
                % If no matches, throw out an error
                if isempty(temp)
                    if length(selection{i}) > 1
                        selection_str = ['{' num2str(selection{i}) '}'];
                    else
                        selection_str = num2str(selection{i});
                    end
                    error(['Queried values ' selection_str ' not found in axis #' num2str(i) ': ' obj.axis_pr(i).name '.']);
                end
                selection{i} = temp;
            end
            
        end
    end
end

re = '([\d.]*\s*[<>=]{0,2})\s*[a-z_A-Z]?\s*([<>=]{0,2}\s*[\d.]*)';
for i = 1:Ns
    if ischar(selection{i})
        if strcmp(axClasses{i}, 'cellstr')
            % Convert selection to index if using regular expressions
            ro{i}(1,:) = obj.axis_pr(i).values;
            [selection{i} ro{i}(2,:)] = MDD.regex_lookup(obj.axis_pr(i).values, selection{i});
        elseif numericsAsValuesFlag || strcmp(axClasses{i}, 'numeric')
            % Convert expression on vals for numerics to indicies
            
            thisAxVals = obj.axis_pr(i).values;
            if strcmp('cellnum', axClasses{i})
                thisAxVals = [thisAxVals{:}]; % convert to numeric array
            end
            
            % Parse expression
            expr = selection{i};
            tokens = regexpi(expr, re, 'tokens');
            tokens = tokens{1}; % enter outer cell
            tokens = regexprep(tokens, '\s',''); % remove whitespace
            tokens = tokens(~cellfun(@isempty, tokens)); % remove empty tokens
            if ~all(cellfun(@isempty, cellfun(@str2num, tokens,'UniformOutput',0)))
                tokens = {[tokens{:}]}; % cat since single expression got split up
            end
            lhsBool = isstrprop(cellfun(@(x) x(1), tokens, 'uniform', false), 'digit');
            lhsBool = [lhsBool{:}];
            
            % check LHS and RHS for up to 2 tokens
            parsedSelection = true(size(thisAxVals));
            for k=1:length(tokens)
                thisExpr = tokens{k};
                if lhsBool(k)
                    parsedSelection = parsedSelection & eval(['(' thisExpr 'thisAxVals)']);
                else
                    parsedSelection = parsedSelection & eval(['(thisAxVals' thisExpr ')']);
                end
            end
            
            % Return error message if no matching axis values found
            if all(parsedSelection == 0)
                error(['Query' selection0{i} ' found no matching axis values.'])
            end
            
            % Get index locations
            [~, selection{i}] = intersect(thisAxVals, thisAxVals(parsedSelection));
        end
    end
end

% Make sure that size of selection doesnt exceed size of data
sz = size(obj);
for i = 1:Ns
    if selection{i} > sz(i); error('Selection index exceeds dimensions'); end
end

end

function [obj2, ro] = subset_core(obj,selection, numericsAsValuesFlag)

sz = size(obj);
Ns = length(selection);

% First, converts selection (which can be in any of the accepted formats)
% to being MATLAB subscripts (as in ind2sub)
[subs, ro] = selection2sub(obj,selection,numericsAsValuesFlag);

% Now that we are dealing with subscripts, we can proceed to slice up the
% data and the axes

% Initialize
obj2 = obj;             % Create new class of same type as original
obj2 = obj2.reset;
obj2.meta = obj.meta;

% First update each axis with the corresponding selection and
% convert empty cells to code for full range.
for i = 1:Ns
    
    if isempty(subs{i})
        subs{i} = 1:sz(i);
    end
    
    obj2.axis_pr(i) = obj.axis_pr(i);       % Import axis information
    obj2.axis_pr(i).values = obj.axis_pr(i).values(subs{i});   % Overwrite values field; leave everything else the same.
end

% Update the data
obj2.data_pr = obj.data_pr(subs{:});

% Corrects number of axes. The above code automatically
% converts obj.data_pr from MxNx1x1 to MxN, whereas axis will stay
% as MxNx1x1 (e.g. length of 4). Thus, fixAxes corrects this.
% This should no longer be necessary!!!
% obj2 = fixAxes(obj2);

end


function inds = linearize_inds(selection,sz_dat,Ndat)

    
    master_inds = 1:Ndat;
    master_inds = reshape(master_inds,sz_dat);
    inds = master_inds(selection{:});
    inds = inds(:);

end