function [data,studyinfo] = ImportData(file,varargin)
%% [data,studyinfo]=ImportData(data_file)
% data=ImportData(data_file)
% Purpose: load data into DynaSim formatted data structure.
% Inputs:
%   data_file - data file name in accepted format (csv, mat, ...), or
%     also accepted: list of data files, studyinfo structure, study_dir, or studyinfo file
%   studyinfo - DynaSim studyinfo structure (see CheckStudyinfo)
%   options -
%     'verbose_flag',1,{0,1},...
%     'process_id',[],[],... % process identifier for loading studyinfo if necessary
%     'time_limits',[],[],... % [beg,end] ms (see NOTE 2)
%     'variables',[],[],... % cell array of matrix names (see NOTE 2)
% Outputs:
%   DynaSim data structure:
%     data.labels           : list of state variables and monitors recorded
%     data.(state_variables): state variable data matrix [time x cells]
%     data.(monitors)       : monitor data matrix [time x cells]
%     data.time             : time vector [time x 1]
%     data.simulator_options: simulator options used to generate simulated data
%     data.model            : model used to generate simulated data
%     [data.varied]         : list of varied model components
%     [data.results]        : list of derived data sets created by post-processing
% 
% NOTE 1: CSV file structure
%   assumes CSV file contains data organized according to output from
%   WriteDynaSimSolver: time points along rows; state variables and 
%   monitors are columns; first column is time vector; next columns are
%   state variables; final columns are monitors. first row has headers
%   for each column. if a population has more than one cell, different
%   cells are sequential columns with same header repeated for each cell.
% 
% NOTE 2: DynaSim data exported to MAT-files are HDF-compatible. To obtain
% partial data sets without having to load the entire file, use ImportData
% with options 'time_limits' and/or 'variables'. Alternatively, the entire
% data set can be loaded using ImportData with default options, then
% subsets extracted using SelectData with appropriate options.
% 
% Examples:
% 1) full data set
% data=ImportData('data.mat'); % load single data set
% data=ImportData(studyinfo); % load all data sets in studyinfo.study_dir
% 2) partial data set with HDF-style loading
% data=ImportData('data.mat','variables','pop1_v','time_limits',[1000 4000])
% 
% See also: SimulateModel, ExportData, CheckData, SelectData

% todo: specify subsets to return in terms of varied parameters, time_limits, ROIs, etc
% possible format for specifying range_varied:
% {'E','gNa',[.1 .3]; 'I->E','tauI',[15 25]; 'I','mechanism_list','+iM'}
% idea: achieve by calling function SelectData() at end of this function.

% Check inputs
options=CheckOptions(varargin,{...
  'verbose_flag',1,{0,1},...
  'process_id',[],[],... % process identifier for loading studyinfo if necessary
  'time_limits',[],[],...
  'variables',[],[],...
  },false);

if ischar(options.variables)
  options.variables={options.variables};
end
% check if input is a DynaSim studyinfo structure
if ischar(file) && isdir(file) % study directory
  study_dir=file;
  clear file
  file.study_dir=study_dir;
end
if isstruct(file) && isfield(file,'study_dir')
  % "file" is a studyinfo structure.
  % retrieve most up-to-date studyinfo structure from studyinfo.mat file
  studyinfo=CheckStudyinfo(file.study_dir,'process_id',options.process_id);
  % get list of data_files from studyinfo
  data_files={studyinfo.simulations.data_file};
  success=cellfun(@exist,data_files)==2;
  data_files=data_files(success);
  sim_info=studyinfo.simulations(success);
  % load each data set recursively
  keyvals=Options2Keyval(options);
  num_files=length(data_files);
  for i=1:num_files
    fprintf('loading file %g/%g: %s\n',i,num_files,data_files{i});
    tmp_data=ImportData(data_files{i},keyvals{:});
    num_sets_per_file=length(tmp_data);
    if ~isfield(tmp_data,'varied')
    % add varied info 
      % this is necessary here when loading .csv data lacking metadata
      tmp_data.varied={};
      modifications=sim_info(i).modifications;
      for j=1:size(modifications,1)
        varied=[modifications{j,1} '_' modifications{j,2}];
        for k=1:num_sets_per_file
          tmp_data(k).varied{end+1}=varied;
          tmp_data(k).(varied)=modifications{j,3};
        end
      end    
    end
    % store this data
    if i==1
      total_num_sets=num_sets_per_file*num_files;
      set_indices=0:num_sets_per_file:total_num_sets-1;
      % preallocate full data matrix based on first data file
      data(1:total_num_sets)=tmp_data(1);
%       data(1:length(data_files))=tmp_data;
%     else
%       data(i)=tmp_data;
    end
    % replace i-th set of data sets by these data sets
    data(set_indices(i)+(1:num_sets_per_file))=tmp_data;
  end
  return;
else
  studyinfo=[];
end
% check if input is a list of data files (todo: eliminate duplicate code by
% combining with the above recursive loading for studyinfo data_files)
if iscellstr(file)
  data_files=file;
  success=cellfun(@exist,data_files)==2;
  data_files=data_files(success);
  keyvals=Options2Keyval(options);
  % load each data set recursively
  for i=1:length(data_files)
    tmp_data=ImportData(data_files{i},keyvals{:});
    % store this data
    if i==1
      % preallocate full data matrix based on first data file
      data(1:length(data_files))=tmp_data;
    else
      % replace i-th data element by this data set
      data(i)=tmp_data;
    end
  end
  return;  
end

if ischar(file)
  [~,~,ext]=fileparts(file);
  switch lower(ext)
    case '.mat'
      % MAT-file contains data fields as separate variables (-v7.3 for HDF)
      if isempty(options.time_limits) && isempty(options.variables)
        % load full data set
        data=load(file);
        % if file only contains a structure called 'data' then return that
        if isfield(data,'data') && length(fieldnames(data))==1
          data=data.data;
        end
      else
        % load partial data set
        % use matfile() to load HDF subsets given varargin options...
        obj=matfile(file); % MAT-file object
        varlist=who(obj); % variables stored in mat-file
        labels=obj.labels; % list of state variables and monitors
        if iscellstr(options.variables) % restrict variables to load
          labels=labels(ismember(labels,options.variables));
        end
        simulator_options=obj.simulator_options;
        time=(simulator_options.tspan(1):simulator_options.dt:simulator_options.tspan(2))';
        time=time(1:simulator_options.downsample_factor:length(time));
        if ~isempty(options.time_limits)
          % determine time indices to load
          time_indices=nearest(time,options.time_limits(1)):nearest(time,options.time_limits(2));
        else
          % load all time points
          time_indices=1:length(time);
        end
        % create DynaSim data structure:
        data=[];
        data.labels=labels;
        % load state variables and monitors
        for i=1:length(labels)
          data.(labels{i})=obj.(labels{i})(time_indices,:);
        end
        data.time=time(time_indices);
        data.simulator_options=simulator_options;
        if ismember('model',varlist)
          data.model=obj.model;
        end
        if ismember('varied',varlist)
          varied=obj.varied;
          data.varied=varied;
          for i=1:length(varied)
            data.(varied{i})=obj.(varied{i});
          end
        end
        if ismember('results',varlist)
          results=obj.results;
          if iscellstr(options.variables)
            results=results(ismember(results,options.variables));
          end
          data.results=results;
          % load results
          for i=1:length(results)
            data.(results{i})=obj.(results{i})(time_indices,:);
          end
        end
      end
    case '.csv'
      % assumes CSV file contains data organized according to output from WriteDynaSimSolver:
      data=ImportCSV(file);
      if ~(isempty(options.time_limits) && isempty(options.variables))
        % limit to select subsets
        data=SelectData(data,varargin{:}); % todo: create SelectData()
      end
    otherwise
      error('file type not recognized. ImportData currently supports DynaSim data structure in MAT file, data values in CSV file.');
  end
end