Home > data-utility > main_compare_variables_among_subjects.m

main_compare_variables_among_subjects

PURPOSE ^

SYNOPSIS ^

function [] = main_compare_variables_among_subjects(subject_list, data_types, module_name, all_variables_list, dir_name, is_verbose, args)

DESCRIPTION ^

 this function performs data validation by checking several properties
 of the four data types. It goes through all the variables one by one
 and generate a report which will be saved in  a set of report files.
 
 INPUT
  subject_list     subject ID list
  data_types       list of data structure, four types of data types are
                   supported: 'cont', 'cevent', 'cstream', 'event'
  module_name      a string of module name, e.g., 'motion', it is used to  
                   specify which module you are interested in. Default is 
                   all modules if you doesn't provide this parameter or it is empty.
  all_variables_list: optional parameter, the list of variables to be
                   extracted, it can be a cell of variable names, or a csv file 
                   containing the list of variable names.
  dir_name         a string indicates where to save the results file.
                   Default is current directory.
  is_verbose       whether detailed progress will be displayed in console.

 Example:   
   sub_list = list_subjects(14);
   data_types = {'event','cont','cevent','cstream'}; 
   module_name = ''; % it can be 'motion', 'vision', 'inhand' and etc.
   % all_variable_list = {}; % empty means that list all the variables for
                             % the subject list; it can also be:
   % all_variable_list = 'var_names.csv'; 
   all_variable_list = {'cevent_inhand_child'}; 
 
    dir_name = '/ein/scratch/Zeth/variable_check';
   is_verbose = 1;  % 1 -- display progress; 0 - mute

   main_compare_variables_among_subjects(sub_list, data_types, module_name, dir_name, is_verbose)
 
   ...
   checking done for variable event_motion_rot_head_moving_child
   checking done for variable event_motion_rot_head_moving_parent
   checking done for data type event, results saved in /ein/scratch/Zeth/variable_check/exp14_compare_variables_event.csv

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [] =  main_compare_variables_among_subjects(subject_list, data_types, module_name, all_variables_list, dir_name, is_verbose, args)
0002 %
0003 % this function performs data validation by checking several properties
0004 % of the four data types. It goes through all the variables one by one
0005 % and generate a report which will be saved in  a set of report files.
0006 %
0007 % INPUT
0008 %  subject_list     subject ID list
0009 %  data_types       list of data structure, four types of data types are
0010 %                   supported: 'cont', 'cevent', 'cstream', 'event'
0011 %  module_name      a string of module name, e.g., 'motion', it is used to
0012 %                   specify which module you are interested in. Default is
0013 %                   all modules if you doesn't provide this parameter or it is empty.
0014 %  all_variables_list: optional parameter, the list of variables to be
0015 %                   extracted, it can be a cell of variable names, or a csv file
0016 %                   containing the list of variable names.
0017 %  dir_name         a string indicates where to save the results file.
0018 %                   Default is current directory.
0019 %  is_verbose       whether detailed progress will be displayed in console.
0020 %
0021 % Example:
0022 %   sub_list = list_subjects(14);
0023 %   data_types = {'event','cont','cevent','cstream'};
0024 %   module_name = ''; % it can be 'motion', 'vision', 'inhand' and etc.
0025 %   % all_variable_list = {}; % empty means that list all the variables for
0026 %                             % the subject list; it can also be:
0027 %   % all_variable_list = 'var_names.csv';
0028 %   all_variable_list = {'cevent_inhand_child'};
0029 %
0030 %    dir_name = '/ein/scratch/Zeth/variable_check';
0031 %   is_verbose = 1;  % 1 -- display progress; 0 - mute
0032 %
0033 %   main_compare_variables_among_subjects(sub_list, data_types, module_name, dir_name, is_verbose)
0034 %
0035 %   ...
0036 %   checking done for variable event_motion_rot_head_moving_child
0037 %   checking done for variable event_motion_rot_head_moving_parent
0038 %   checking done for data type event, results saved in /ein/scratch/Zeth/variable_check/exp14_compare_variables_event.csv
0039 
0040 if nargin < 2
0041     help main_compare_variables_among_subject
0042     error('You should provide subject list and data structure list.');
0043 end
0044 
0045 % if nargin < 3
0046 %     module_name = '';
0047 % end
0048 
0049 if nargin<3 || isempty(module_name)
0050     %all module
0051     module_name = '_';
0052 else
0053     %add '_' to the start and end of module_name if necessary
0054    if module_name(1) ~= '_'
0055        module_name = ['_' module_name];
0056    end
0057    if module_name(end) ~= '_'
0058        module_name = [module_name '_'];
0059    end
0060 end
0061     
0062 if nargin < 4
0063     all_variables_list = list_complete_variables(subject_list, data_types, module_name);
0064 else
0065     if isempty(all_variables_list)
0066         all_variables_list = list_complete_variables(subject_list, data_types, module_name);
0067     else
0068         all_variables_list = list_variables_by_module(all_variables_list, module_name, data_types);
0069     end
0070 end
0071 
0072 if nargin < 5 || isempty(dir_name)
0073     %if user doesn't give directory name, use the current directory
0074     dir_name=cd;
0075 end
0076 
0077 if ~exist('is_verbose', 'var')
0078     is_verbose = false;
0079 elseif isstruct(is_verbose)
0080     args = is_verbose;
0081     is_verbose = false;
0082 end
0083 
0084 GROUPING = 'trial_cat';
0085 
0086 if ~exist('args', 'var')
0087     args.grouping = GROUPING;
0088 end
0089 
0090 data_type_num = length(data_types);
0091 exp_list = unique(round(subject_list/100));
0092 subject_num = length(subject_list);
0093 
0094 % %to find all varaibles of each data_type
0095 % all_variables_list = cell(1, data_type_num);
0096 % subject_num = numel(subject_list);
0097 % for subject_index = 1 : subject_num
0098 %     subject_id = subject_list(subject_index);
0099 %     var_list = list_variables(subject_id);
0100 %     for var_index = 1 : numel(var_list)
0101 %         var_name = var_list{var_index};
0102 %         for data_type_index = 1 : data_type_num
0103 %             if strcmp(get_data_type(var_name), data_types{data_type_index}) ...
0104 %                 && ~isempty(regexp(var_name, module_name, 'once'))
0105 %                 found = 0;
0106 %                 exist_var_list = all_variables_list{data_type_index};
0107 %                 for index = 1 : numel(exist_var_list)
0108 %                     if strcmp(var_name, exist_var_list{index}) == 1
0109 %                         found = 1;
0110 %                         break;
0111 %                     end
0112 %                 end
0113 %                 if ~found
0114 %                     all_variables_list{data_type_index} = [all_variables_list{data_type_index}; {var_name}];
0115 %                     break;
0116 %                 end
0117 %             end
0118 %         end
0119 %     end
0120 % end
0121 
0122 for data_type_index = 1 : data_type_num
0123     data_type = data_types{data_type_index};
0124     listing = all_variables_list{:,data_type_index}; 
0125     
0126     if isempty(listing)
0127         fprintf('No %s type variables found.\n\n', data_type);
0128         continue
0129     end
0130     
0131     csv_cells_by_type = {};
0132 
0133     disp_sub_list = subject_list;
0134 
0135     if size(disp_sub_list, 1) < size(disp_sub_list, 2)
0136         disp_sub_list = disp_sub_list';
0137     end
0138     
0139     for var_index = 1 : numel(listing)
0140         variable_name = listing{var_index};
0141         
0142         has_variable_list = arrayfun(@(subject_id) ...
0143             has_variable(subject_id, variable_name), ...
0144             subject_list, ...
0145             'UniformOutput', false);
0146         has_variable_list = cell2mat(has_variable_list)';
0147         
0148         sub_list_valid = subject_list(has_variable_list);
0149         args.sub_list = sub_list_valid;
0150         
0151         if sum(has_variable_list) ~= subject_num
0152             is_missing_var = true;
0153         else
0154             is_missing_var = false;
0155         end
0156             
0157         chunks = get_variable_by_grouping('sub', ...
0158             sub_list_valid, variable_name, GROUPING);
0159         
0160         switch data_type 
0161             case 'cont'
0162                 results = cont_cal_stats(chunks, args);
0163                 
0164                 individual_mean = results.individual_mean;
0165                 individual_std = results.individual_std;
0166                 individual_median = results.individual_median;
0167                 individual_min = results.individual_min;
0168                 individual_max = results.individual_max;
0169                 individual_nonnan = results.individual_nonnan;
0170                 individual_hist = results.individual_hist;
0171                 
0172                 individual_mean = round(individual_mean*100)/100;
0173                 individual_std = round(individual_std*100)/100;
0174                 individual_median = round(individual_median*100)/100;
0175                 individual_min = round(individual_min*100)/100;
0176                 individual_max = round(individual_max*100)/100;
0177                 individual_nonnan = round(individual_nonnan*100)/100;
0178                 individual_hist = round(individual_hist*100)/100;
0179 %
0180 %                 size(disp_sub_list(has_variable_list))
0181 %                 size(individual_mean)
0182 %                 size(individual_std)
0183 %                 size(individual_median)
0184 %                 size(individual_min)
0185 %                 size(individual_max)
0186 %                 size(individual_nonnan)
0187                 
0188                 data_mat = [...
0189                     disp_sub_list(has_variable_list) ...
0190                     individual_mean ...
0191                     individual_std ...
0192                     individual_median...
0193                     individual_min ...
0194                     individual_max ...
0195                     individual_nonnan];
0196                 
0197                 data_cell = cell(subject_num, ...
0198                     (size(data_mat, 2)+size(individual_hist,2)+1));
0199                 
0200                 if is_missing_var;
0201                     for tmpidx = 1:subject_num
0202                         data_cell{tmpidx, 1} = subject_list(tmpidx);
0203                         data_cell{tmpidx, 2} = 'no such variable exist for this subject';
0204                     end
0205                 end
0206                 
0207                 data_cell(has_variable_list, 1:size(data_mat, 2)) ...
0208                     = num2cell(data_mat);
0209                 data_cell(has_variable_list, size(data_mat, 2)+2:end) ...
0210                     = num2cell(individual_hist);
0211 
0212                 csv_cell_by_var = cell(size(data_cell,1)+1, size(data_cell,2)+1);
0213                 
0214                 csv_cell_by_var(1, 1:size(data_mat, 2)+2) = {variable_name, ...
0215                     'sub_id', ...
0216                     'mean', 'std', 'median', 'min', 'max', 'nonnan', 'hist->'};
0217                 for tmpi = 2:size(csv_cell_by_var, 1)
0218                     csv_cell_by_var{tmpi, 1} = variable_name;
0219                 end
0220                 csv_cell_by_var(1, size(data_mat, 2)+3:end) = ...
0221                     num2cell(results.hist_bins);
0222                 csv_cell_by_var(2:end, 2:end) = data_cell;
0223                 
0224             case  'cstream'
0225                 results = cstream_cal_stats(chunks, args);
0226                 
0227                 individual_prop = results.individual_prop;
0228 %                 individual_switches =
0229 %                 results.cevent_stats.individual_switches_freq;
0230                 cat_idx = find(results.categories);
0231                 categories = results.categories(cat_idx);
0232                 individual_prop_by_cat = results.individual_prop_by_cat(:, cat_idx);                
0233                 
0234                 individual_prop = round(individual_prop*100)/100;
0235 %                 individual_switches = round(individual_switches*100)/100;
0236                 individual_prop_by_cat = round(individual_prop_by_cat*100)/100;
0237                 
0238                 data_mat = [...
0239                     disp_sub_list(has_variable_list) ...
0240                     individual_prop]; % individual_switches
0241                 
0242                 data_cell = cell(subject_num, ...
0243                     (size(data_mat, 2)+length(categories)+1));
0244                 
0245                 if is_missing_var;
0246                     for tmpidx = 1:subject_num
0247                         data_cell{tmpidx, 1} = subject_list(tmpidx);
0248                         data_cell{tmpidx, 2} = 'no such variable exist for this subject';
0249                     end
0250                 end
0251                 
0252                 data_cell(has_variable_list, 1:size(data_mat, 2)) ...
0253                     = num2cell(data_mat);
0254                 data_cell(has_variable_list, size(data_mat, 2)+2:end) ...
0255                     = num2cell(individual_prop_by_cat);
0256 
0257                 csv_cell_by_var = cell(size(data_cell,1)+1, size(data_cell,2)+1);
0258                 
0259                 csv_cell_by_var(1, 1:size(data_mat, 2)+2) = {variable_name, ...
0260                     'sub_id', ...
0261                     'prop', 'prop by cat->'}; % 'switches freq',
0262                 for tmpi = 2:size(csv_cell_by_var, 1)
0263                     csv_cell_by_var{tmpi, 1} = variable_name;
0264                 end
0265                 csv_cell_by_var(1, size(data_mat, 2)+3:end) = ...
0266                     num2cell(categories);
0267                 csv_cell_by_var(2:end, 2:end) = data_cell;
0268                 
0269             case  'cevent'
0270                 results = cevent_cal_stats(chunks, args);
0271                 
0272                 individual_number = results.individual_number;
0273                 individual_median_dur = results.individual_median_dur;
0274                 individual_mean_dur = results.individual_mean_dur;
0275                 individual_std_dur = results.individual_std_dur;
0276                 individual_switches = results.individual_switches_freq;
0277                 
0278                 individual_median_dur = round(individual_median_dur*100)/100;
0279                 individual_mean_dur = round(individual_mean_dur*100)/100;
0280                 individual_std_dur = round(individual_std_dur*100)/100;
0281                 individual_switches = round(individual_switches*100)/100;
0282                 
0283                 cat_idx = find(results.categories);
0284                 categories = results.categories(cat_idx);
0285                 individual_number_by_cat = results.individual_number_by_cat(:, cat_idx);
0286                 
0287                 data_mat = [...
0288                     disp_sub_list(has_variable_list) ...
0289                     individual_number ...
0290                     individual_median_dur ...
0291                     individual_mean_dur ...
0292                     individual_std_dur ...
0293                     individual_switches];
0294                 
0295                 data_cell = cell(subject_num, ...
0296                     (size(data_mat, 2)+length(categories)+1));
0297                 
0298                 if is_missing_var;
0299                     for tmpidx = 1:subject_num
0300                         data_cell{tmpidx, 1} = subject_list(tmpidx);
0301                         data_cell{tmpidx, 2} = 'no such variable exist for this subject';
0302                     end
0303                 end
0304                 
0305                 data_cell(has_variable_list, 1:size(data_mat, 2)) ...
0306                     = num2cell(data_mat);
0307                 data_cell(has_variable_list, size(data_mat, 2)+2:end) ...
0308                     = num2cell(individual_number_by_cat);
0309 
0310                 csv_cell_by_var = cell(size(data_cell,1)+1, size(data_cell,2)+1);
0311                 
0312                 csv_cell_by_var(1, 1:size(data_mat, 2)+2) = {variable_name, ...
0313                     'sub_id', ...
0314                     'num_cevents', 'median_dur', 'mean_dur', ...
0315                     'std_dur', 'switches freq', 'num by cat->'};
0316                 for tmpi = 2:size(csv_cell_by_var, 1)
0317                     csv_cell_by_var{tmpi, 1} = variable_name;
0318                 end
0319                 csv_cell_by_var(1, size(data_mat, 2)+3:end) = ...
0320                     num2cell(categories);
0321                 csv_cell_by_var(2:end, 2:end) = data_cell;
0322             case  'event'                
0323                 results = event_cal_stats(chunks, args);
0324                 
0325                 individual_number = results.individual_number;
0326                 individual_median_dur = results.individual_median_dur;
0327                 individual_mean_dur = results.individual_mean_dur;
0328                 individual_std_dur = results.individual_std_dur;
0329                 
0330                 individual_median_dur = round(individual_median_dur*100)/100;
0331                 individual_mean_dur = round(individual_mean_dur*100)/100;
0332                 individual_std_dur = round(individual_std_dur*100)/100;
0333                 
0334                 data_mat = [...
0335                     disp_sub_list(has_variable_list) ...
0336                     individual_number ...
0337                     individual_median_dur ...
0338                     individual_mean_dur ...
0339                     individual_std_dur];
0340                 
0341                 if is_missing_var
0342                     data_cell = cell(subject_num, size(data_mat, 2));
0343                     
0344                     for tmpidx = 1:subject_num
0345                         data_cell{tmpidx, 1} = subject_list(tmpidx);
0346                         data_cell{tmpidx, 2} = 'no such variable exist for this subject';
0347                     end
0348                     data_cell(has_variable_list, :) = num2cell(data_mat);
0349                 else
0350                     data_cell = num2cell(data_mat);
0351                 end
0352 
0353                 csv_cell_by_var = cell(size(data_cell,1)+1, size(data_cell,2)+1);
0354                 
0355                 csv_cell_by_var(1, :) = {variable_name, ...
0356                     'sub_id', ...
0357                     'num_events', 'median_dur', 'mean_dur', 'std_dur'};
0358                 for tmpi = 2:size(csv_cell_by_var, 1)
0359                     csv_cell_by_var{tmpi, 1} = variable_name;
0360                 end
0361                 csv_cell_by_var(2:end, 2:end) = data_cell;
0362             otherwise
0363                 error('Invalid data type!');
0364         end
0365         
0366         if size(csv_cells_by_type, 2) == size(csv_cell_by_var, 2)
0367             csv_cells_by_type = vertcat(csv_cells_by_type, ...
0368                 cell(1, size(csv_cell_by_var, 2)), csv_cell_by_var);
0369         else
0370             max_columns = max(size(csv_cells_by_type, 2), size(csv_cell_by_var, 2));
0371             max_rows = size(csv_cells_by_type, 1)+size(csv_cell_by_var, 1)+1;
0372             
0373             tmp_csv_cells = cell(max_rows, max_columns);
0374             tmp_csv_cells(1:size(csv_cells_by_type, 1), 1:size(csv_cells_by_type, 2))...
0375                 = csv_cells_by_type;
0376             tmp_csv_cells(size(csv_cells_by_type, 1)+2:end, 1:size(csv_cell_by_var, 2))...
0377                 = csv_cell_by_var;
0378             csv_cells_by_type = tmp_csv_cells;
0379             
0380             clear tmp_csv_cells;
0381         end
0382         
0383         if is_verbose
0384             fprintf('checking done for variable %s\n', variable_name);
0385         end
0386     end;
0387        
0388     tmp_clock = clock;
0389     tmp_clock_str = sprintf('%d-%d_%d_%d-%d', tmp_clock(2:3), tmp_clock(1), tmp_clock(4:5));
0390     
0391     file_name = sprintf('%s/exp%s_compare%svariables_%s_%s.csv', ...
0392         dir_name, num2str(exp_list'), module_name, data_type, tmp_clock_str);
0393     
0394     cell2csv(file_name, csv_cells_by_type);
0395     
0396     if is_verbose
0397         fprintf('checking done for data type %s, results saved in %s\n\n', ...
0398             data_type, file_name);
0399     end
0400 end
0401 
0402

Generated on Wed 24-May-2017 00:00:56 by m2html © 2005