Home > NoiseTools > nt_index.m

nt_index

PURPOSE ^

[status,p]=nt_index(name,p,forceUpdate) - index data files & directories

SYNOPSIS ^

function [status,p]=nt_index(name,p,forceUpdate)

DESCRIPTION ^

[status,p]=nt_index(name,p,forceUpdate) - index data files & directories

  status: 1: needed indexing, 0: didn't, -1: failed
  p: parameter structure

  name: name(s) of file(s) or directory to index
  p: parameters
  forceUpdate: if true force indexing [default: false]

 NoiseTools

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function [status,p]=nt_index(name,p,forceUpdate)
0002 %[status,p]=nt_index(name,p,forceUpdate) - index data files & directories
0003 %
0004 %  status: 1: needed indexing, 0: didn't, -1: failed
0005 %  p: parameter structure
0006 %
0007 %  name: name(s) of file(s) or directory to index
0008 %  p: parameters
0009 %  forceUpdate: if true force indexing [default: false]
0010 %
0011 % NoiseTools
0012 nt_greetings;
0013 
0014 if nargin<3 || isempty(forceUpdate); forceUpdate=0; end 
0015 if nargin<2||isempty(p) % set default parameters
0016     p=[];
0017     p.scale=1000;
0018     if nargin >= 1; p.name=name; end    
0019 end
0020 if nargin<1 || isempty(name)
0021     p.name=[];
0022     status=-1;
0023     return;  % just return default parameters
0024 end
0025 
0026 status=-1; % failed by default
0027 updateFlag=0;   % don't update unless necessary
0028 if forceUpdate; updateFlag=1; end
0029 
0030 % parse 'name' into path, etc., check it for various issues
0031 if ~ischar(name); error('name should be a string'); end
0032 avoid=['[',1:31, 127,']'];
0033 if regexp(name,avoid) 
0034     disp('bad character in file name, skip:'); disp(['   >',name,'<']); 
0035     return; 
0036 end
0037 if name=='.'; name=pwd; end
0038 if name(end)=='/'; name=name(1:end-1); end 
0039 [PATHSTR,NAME,EXT]=fileparts(name);
0040 if strcmp(EXT,'idx')
0041     disp(['warning: ', name, ' might be index file']); 
0042 end
0043 if isempty(PATHSTR)             % interpret relative to current directory
0044     name=[pwd,filesep,name];    % need full path to safely use 'exist'
0045 end
0046 [PATHSTR,NAME,EXT]=fileparts(name); 
0047 if 2==exist(name) 
0048     d=dir(name);
0049     filename=d.name;            % --> same case as file system
0050     PATHSTR=cd(cd(PATHSTR));    % --> same case as file system
0051     name=[PATHSTR,filesep,filename];
0052 elseif 7==exist(name)
0053     name=cd(cd(name));          % --> same case as file system
0054     [PATHSTR,NAME,EXT]=fileparts(name); 
0055 else
0056     disp(name);
0057     error('...is neither file nor directory');
0058 end
0059 
0060 
0061 hhh=[]; % this structure will contain info about this file or directory
0062 iii=[]; % this structure will contain the data index
0063 hhh.name=name;
0064 hhh.time_indexed=now;
0065 hhh.failed=0; % OK by default
0066 
0067 % test whether we're processing a file or a directory
0068 if 2==exist(name) 
0069     hhh.isdir=0;
0070 elseif 7==exist(name)
0071     hhh.isdir=1;
0072 else
0073     disp(name);
0074     error('...is neither file nor directory');
0075 end
0076 
0077 % special case:
0078 % CTF data are stored as directory, pretend it's a file
0079 if numel(name>=3) && strcmp(name(end-2:end), '.ds')
0080     hhh.isdir=0;
0081 end
0082         
0083 % create an index directory if it doesn't exist
0084 idxDir=[PATHSTR,filesep,'nt_idx'];
0085 if 7 ~= exist(idxDir) 
0086     disp(['creating index directory ', idxDir]);
0087     mkdir (idxDir);
0088     updateFlag=1;
0089 end
0090 
0091 % check if there is already an up-to-date index file for 'name'
0092 idxName=[idxDir,filesep,NAME,EXT,'.idx'];
0093 hhh.idxName=idxName;
0094 if ~2==exist(idxName); updateFlag=1; end
0095 if exist(idxName) && (dateModified(idxName) < dateModified(name)) % out of dat
0096     updateFlag=1;
0097 end
0098 
0099 %{
0100 Processing depends on whether 'name' is a file or a directory.
0101 If a file, we calculate statistics to index the data within that file.
0102 If a directory, we aggregate statistics on its files and subdirectories. 
0103 %}
0104 
0105 if hhh.isdir % directory
0106         
0107     disp([name,filesep]);
0108     
0109     % check that 'name' matches a name listed in parent directory (catch upper/lowercase inconsistencies)
0110     d=dir(PATHSTR);
0111     OKflag=0;
0112     for iFile=1:numel(d)
0113         if strcmp(d(iFile).name,[NAME,EXT])
0114             OKflag=1;
0115         end
0116     end
0117     if ~OKflag; error(['''', NAME,EXT, ''' does not match real file name']); end   
0118 
0119     % list items in this directory
0120     d=dir(name);
0121     iGood=ones(numel(d),1);
0122     nskip=0;
0123     for k=1:numel(iGood)       % weed out irrelevant/bad files
0124         if strcmp(d(k).name,'.') || strcmp(d(k).name,'..')  % me & parent dirs
0125             iGood(k)=0; 
0126         elseif d(k).name(1)=='.'                           % files starting with '.'
0127             iGood(k)=0; nskip = nskip+1; 
0128             disp(['skip, starts with ''.'': ',name,filesep,d(k).name]);
0129         end                             
0130         if strcmp(d(k).name,'nt_idx')                     % index directory
0131             iGood(k)=0; nskip=nskip+1; 
0132         end      
0133         if any(d(k).name<33) || any(d(k).name==127)      % files with bad chars in names
0134             iGood(k)=0; nskip=nskip+1;
0135             disp(['skip, bad char in name: ',name,filesep,d(k).name]);
0136         end 
0137         if isempty(d(k).date)                            % no date field ==> invalid file (soft link?)
0138             iGood(k)=0; nskip=nskip+1; 
0139             disp(['skip, invalid file: ',name,filesep,d(k).name]);
0140         end 
0141     end
0142     d=d(iGood~=0);
0143     nfiles=numel(d);
0144     
0145     % recursively index files in this directory
0146     for iFile=1:nfiles
0147         if d(iFile).name(end)=='/'
0148             disp(d)
0149         end
0150         if 1==nt_index([name,filesep,d(iFile).name],p,forceUpdate) % recurse
0151             updateFlag=1;  % one of my files updated, update me too
0152         end        
0153     end   
0154     
0155     % purge my index directory of any orphan index files (no associated data file)
0156     dd=dir(idxDir);
0157     iGood2=ones(numel(dd),1);
0158     for k=1:numel(iGood2)      
0159         if dd(k).name(1)=='.'; iGood2(k)=0; end             % files starting with '.'
0160         if strcmp(dd(k).name,'nt_idx'); iGood2(k)=0; end    % index directory
0161     end
0162     dd=dd(iGood2~=0);
0163     iGood3=ones(numel(dd),1);
0164     for iFile=1:numel(iGood3)
0165         [~,NAME2,EXT2]=fileparts(dd(iFile).name);           % name of index file
0166         theFile=[PATHSTR,filesep,NAME2];                    % associate data file
0167         if 2~=exist(theFile) ...        % neither file...
0168                 && 7~=exist(theFile)    % nor directory...
0169             disp(['>',dd(iFile).name,'<'])
0170             disp([theFile, ' not found, ']);
0171             disp(['deleting orphan index file ',[idxDir,filesep,dd(iFile).name]]);
0172             delete([idxDir,filesep,dd(iFile).name]);
0173             iGood3(iFile)=0;
0174         end
0175     end
0176     dd=dd(iGood3~=0);
0177     
0178     % all the files in this directory & subdirectories are now checked/updated
0179     
0180     % merge info about this directory, files, & subdirectories into index
0181     if updateFlag
0182         
0183         % info about this directory
0184         hhh.dir=d;                  % my directory structure, excluding bad files
0185                 
0186         % init aggregated statistics
0187         hhh.nfiles=uint64(1);   % number of files
0188         hhh.ndata=0;            % number of data files
0189         hhh.nbad=0;             % number of bad files
0190         hhh.nskip=nskip;        % number of files skipped
0191         hhh.ndirs=uint64(1);    % number of directories
0192         hhh.bytes=uint64(0);    % number of bytes
0193         hhh.ntypes=[];          % list with number of files of each type
0194         hhh.depth=1;            % depth of hierarchy (1=leaf)
0195         
0196         % init info variables for each file/directory in this directory
0197         hhh.filelist.bytes=zeros(nfiles,1,'uint64');       % bytes (file) or total bytes (directory)
0198         hhh.filelist.isdir=nan(nfiles,1);                  % directory?
0199         hhh.filelist.nfiles=zeros(nfiles,1,'uint64');      % number of files (including files in subdirectories)
0200         
0201         % visit each file or directory, aggregate information
0202         for iFile=1:nfiles
0203             
0204             % get info from this file's index file to save time
0205             load('-mat',[name,filesep,'nt_idx',filesep,d(iFile).name,'.idx'], 'hh');  
0206 
0207             % info specific to this file/directory
0208             hhh.filelist.bytes(iFile)=hh.bytes;
0209             hhh.filelist.isdir(iFile)=hh.isdir;
0210             hhh.filelist.nfiles(iFile)=hh.nfiles;
0211             
0212             % aggregate info
0213             hhh.bytes=hhh.bytes+hh.bytes;
0214             hhh.ndirs=hhh.ndirs+hh.ndirs;
0215             hhh.ndata=hhh.ndata+hh.ndata;
0216             hhh.nbad=hhh.nbad+hh.nbad;
0217             hhh.nfiles=hhh.nfiles+hh.nfiles;   
0218             hhh.bytes=hhh.bytes+hh.bytes;
0219             hhh.nskip=hhh.nskip+hh.nskip;
0220             hhh.depth=max(hhh.depth,1+hh.depth);
0221             
0222             % aggregate counts of each file type
0223             types=myfieldnamesr(hh.ntypes);
0224             for iType=1:numel(types)
0225                 if isfield(hhh.ntypes,types(iType))
0226                     %eval(['hhh.ntypes.',types{iType},'=hhh.ntypes.',types{iType},'+hh.ntypes.',types{iType},';']);
0227                     hhh.ntypes.(types{iType})=hhh.ntypes.(types{iType}) + hh.ntypes.(types{iType});
0228                 else
0229                     eval(['hhh.ntypes.',types{iType},'=hh.ntypes.',types{iType},';']);
0230 %                   hhh.ntypes.(types{iType})= hh.ntypes.(types{iType}); dumb matlab can't do this properly
0231                 end
0232             end
0233             
0234         end
0235         
0236         % merge the indexes of files & subdirectories to create an aggregate index
0237         
0238         iii=merge_file_indexes(d,[PATHSTR,filesep,NAME]);
0239         
0240     end % if updateflag
0241         
0242 else  % file
0243     
0244     %disp(name)
0245     hhh.isdata=0;
0246     
0247     if numel(name>=3) && strcmp(name(end-2:end), '.ds') % intercept CTF data
0248         [a,b,c] = fileparts(name);
0249         name=[name,filesep,b,'.meg4'];
0250     end
0251        
0252     if updateFlag
0253         
0254         % info common to all files
0255         hhh.nfiles=uint64(1); % just me
0256         d=dir(name);
0257         hhh.bytes=uint64(d.bytes);
0258         hhh.sr=[];
0259         hhh.depth=0;
0260         
0261         % default values:
0262         hhh.ndirs=uint64(0); 
0263         hhh.nbad=0; 
0264         hhh.ndata=0;
0265         hhh.nskip=0;
0266         
0267         % determine file type
0268         [isdata,type]=filetype(name);
0269         hhh.isdata=isdata;
0270         hhh.type=type;
0271         
0272         % set field in hhh.ntypes
0273         fixedtype=strrep(type,':','___'); % biosig uses ':' in type names
0274         try
0275             eval(['hhh.ntypes.',fixedtype,'=1;']);
0276         catch
0277             disp(['hhh.ntypes.',fixedtype,'=1;']);
0278             disp(name);
0279             disp(type);
0280             warning('eval failed');
0281         end
0282         
0283         % data: read it
0284         if hhh.isdata
0285             x=[];
0286             hhh.size=[];   
0287             hhh.originalsize=[]; % before reshape/transpose
0288             hhh.ndata=1;
0289             [a,b,c]=fileparts(type);
0290             if strcmp(b,'matlab')
0291                 % read matlab variable from file
0292                 variable_name=c(2:end); % was coded as extension to type
0293                 x=readmatlab(name,variable_name);
0294             elseif strcmp(type,'unknown') || strcmp(type,'matlab_non_numeric')
0295                 % shouldn't happen
0296                 error('!');
0297             else
0298                 % some data file, try to read with biosig
0299                 try
0300                     h=sopen(name);
0301                     hhh.sr=h.SampleRate;
0302                 catch ME
0303                     hhh.failed=1;
0304                     disp(name);
0305                     warning('...sopen failed');
0306                     disp(ME);
0307                 end
0308                 try
0309                     x=sread(h);
0310                 catch ME
0311                     hhh.failed=1;
0312                     disp(name)
0313                     disp(ME);
0314                     warning('...sread failed');
0315                     x=sread(h);
0316                 end
0317                 sclose(h);
0318             end
0319             
0320             % transpose if appropriate (this is a kludge)
0321             hhh.originalsize=size(x);
0322             if ndims(x)>2
0323                 % more than 3 dims, merge last dimensions
0324                 sizes=size(x);
0325                 x=reshape(x,prod(sizes(1:end-1)),sizes(end));
0326                 disp(['reshape -->', num2str(size(x))]);
0327             end
0328             if size(x,1)<size(x,2) 
0329                 % wider than tall: transpose
0330                 x=x'; 
0331                 disp(['transpose --> ',num2str(size(x))]);
0332             end
0333             hhh.size=size(x);
0334             nt_whoss;
0335 
0336             if ~isempty(x)
0337                 % calculate index
0338                 dsratio=100;
0339                 iii.card=[]; iii.min=[]; iii.max=[]; iii.mean=[]; iii.ssq=[];
0340                 iii=nt_idx(x,dsratio,iii);
0341             end % else iii==[]
0342         end
0343     end
0344 end   
0345 
0346 if updateFlag
0347     status=1;
0348     hh=hhh; ii=iii;
0349     save(idxName, 'hh','ii');
0350     disp(idxName)
0351 else 
0352     status=0;
0353 end
0354 end % function [status,p]=nt_index(name,p,forceUpdate)
0355 
0356 function ii=index(x,p)
0357 % index data
0358 if ndims(x)>2; error('!'); end
0359 [ii.nsamples,ii.nchans]=size(x);
0360 ii.scale=p.scale;
0361 ii.p=p;
0362 npairs=floor(ii.nsamples/p.scale);
0363 size(x)
0364 x_extra=x(npairs*p.scale+1:end,:);
0365 x=x(1:npairs*p.scale,:);
0366 x=reshape(x,[p.scale,npairs,ii.nchans]);
0367 ii.min=squeeze(min(x,[],1))';
0368 ii.max=squeeze(max(x,[],1))';
0369 if ~isempty(x_extra)
0370     [size(ii.min) size(x_extra)]
0371     ii.min=[ii.min;min(x_extra,[],1)];
0372     ii.max=[ii.max;max(x_extra,[],1)];
0373 end
0374 end
0375 
0376 function date=dateModified(name)
0377 % modification date of file or directory
0378 [PATHSTR,NAME,EXT]=fileparts(name);
0379 if isempty(PATHSTR); error('!'); end
0380 date=[];
0381 if 2==exist(name) % I'm a file, I own my date.
0382     d=dir(name); % get directly from file
0383     date=d.datenum;
0384 elseif 7==exist(name) % I'm a directory, my parent own's my date
0385     d=dir(PATHSTR);
0386     for iFile=1:numel(d)
0387         %disp(d(iFile).name)
0388         if strcmp(d(iFile).name,[NAME,EXT])
0389             date=d(iFile).datenum; % get indirectly from parent directory
0390             break
0391         end
0392     end
0393 else
0394     disp(name)
0395     error('!');
0396 end
0397 if isempty(date) 
0398     disp(['>',name,'<']);
0399     error('!'); 
0400 end
0401 end % function date=dateModified(name)
0402 
0403         
0404 
0405 function [isdata,type]=filetype(name)
0406 % try to guess type and whether it's data
0407 EXTENSIONS_TO_SKIP={'.idx', '.zip','.txt','.pdf','.doc','.docx','.ppt','.pptx','.xls','.html','.rtf',...
0408     '.jpg', '.png', '.tif','.tiff','.js', '.md', '.m', '.py', '.rar', '.wav', '.eps', '.pdfsync',...
0409     '.avi', '.PDF', '.gz', '.zip'};
0410 [PATHSTR,NAME,EXT]=fileparts(name);
0411 isdata=0; type='unknown'; transpose=0; % default
0412 d=dir(name);
0413 if d.bytes==0 
0414     type='empty';
0415 elseif ~isempty(EXT) && any(strcmpi(EXT,EXTENSIONS_TO_SKIP))
0416     isdata=0; type=lower(EXT); type=type(2:end); % intercept common types
0417     disp(['skip (extension): ',name])
0418 else
0419     fid=fopen(name);
0420     firstbytes=fread(fid,8,'uchar');
0421     fclose(fid);
0422     if ~isempty(EXT) && strcmp(EXT,'.mat') || (numel(firstbytes)>=4 && all(firstbytes(1:4)'=='MATL')) 
0423         % matlab file
0424         try
0425             s=whos('-file',name);
0426         catch ME
0427             disp('name');
0428             disp('... whos failed');
0429             disp(ME)
0430             type=[]; return
0431         end
0432         % find which variables are numeric
0433         numerics={'double','single','int64','int32','int16','int8'};
0434         matrix=strcmp(repmat({s.class},numel(numerics),1), ...
0435             repmat(numerics',1,numel(s))); 
0436 %         idx=find(any(matrix));
0437 %         if isempty(idx)
0438         if ~any(matrix)
0439             % no numeric variables
0440             isdata=0; type='matlab_non_numeric';
0441         else
0442             % some variables are numeric, choose the biggest one
0443             sizes=zeros(numel(s),1);
0444             for iVariable=1:numel(s)
0445                 if any(strcmp(s(iVariable),numerics))
0446                     sizes(iVariable)=prod(s(iVariable).size);
0447                 else 
0448                     sizes(iVariable)=0;
0449                 end
0450             end
0451             [~,biggest]=max(prod(sizes));
0452             isdata=1; type=['matlab.',s(biggest).name];
0453             disp(name);
0454             disp(['mat file, multiple numeric variables, chosing: ''', s(biggest).name, ''', size:',num2str(s(biggest).size)]);
0455         end
0456 %     elseif strcmp(char(firstbytes(2:8))','BIOSEMI')
0457 %         isdata=1; type='biosemi_bdf';
0458     else    % hand over to biosig
0459         try
0460             h=sopen(name);
0461             type=h.TYPE;
0462             sclose(h);
0463         catch ME
0464             disp(name);
0465             warning('... sopen failed');
0466             disp(ME);
0467             type='unknown'; return
0468         end                
0469         if strcmp(type,'unknown')
0470             isdata=0; 
0471         else 
0472             isdata=1;
0473         end
0474     end
0475 end
0476 end % function [isdata,type]=filetype(name)
0477     
0478 function x=readmatlab(name,varname)
0479 % read varname from matlab file
0480 load('-mat',name,varname);
0481 eval(['x=',varname, ';']);
0482 end % function x=readmatlab(name,varname)
0483 
0484 
0485 % recursive tally of field names at all depths
0486 function s=myfieldnamesr(x)
0487 if ~isstruct(x); s=[]; return; end
0488 fields=fieldnames(x);
0489 s={};
0490 for iField=1:numel(fields)
0491     xx=getfield(x,fields{iField});
0492     if isa(xx,'struct')
0493         subfields=myfieldnamesr(xx);
0494         for iSubfield=1:numel(subfields)
0495             s=[s,[char(fields(iField)),'.',char(subfields(iSubfield))]];
0496         end
0497     else
0498         s=[s,fields{iField}];
0499     end
0500 end
0501 end % function x=readmatlab(name,varname)
0502 
0503 
0504 % visit index files of all files in 'd', merge into aggregate index
0505 function iii=merge_file_indexes(d, dname)
0506     iii=[];
0507     % load all indexes into cell array
0508     all_indexes=[];       
0509     nfiles=numel(d);
0510     for iFile=1:nfiles
0511         name=d(iFile).name;
0512         load('-mat',[dname,filesep,'nt_idx',filesep,d(iFile).name,'.idx'], 'ii'); 
0513         all_indexes{iFile}=ii;
0514     end
0515     % estimate total size of statistics
0516     statNrows=[]; % cell array of stat sizes
0517     statNcols=[]; % cell array of stat sizes
0518     for iFile=1:nfiles
0519         ii=all_indexes{iFile};
0520         if isempty(ii); continue; end
0521         statNames=fieldnames(ii);
0522         for iField=1:numel(statNames)
0523             if ~isfield(statNcols,statNames{iField})
0524                 %setfield(statSizes,statNames{iField})=0;
0525                 statNrows.(statNames{iField})=0;
0526                 statNcols.(statNames{iField})=0;
0527             end
0528             [nrows,ncols]=size(getfield(ii, statNames{iField}));
0529             %tmp=getfield(statSizes,statNames{iField});
0530             %setfield(statSizes,statNames{iField},tmp);
0531             statNrows.(statNames{iField})=statNrows.(statNames{iField})+nrows;
0532             statNcols.(statNames{iField})=max(statNcols.(statNames{iField}),ncols);
0533         end
0534     end
0535     if isempty(statNcols)
0536         return; % no indexes to summarize
0537     end
0538     statNames=fieldnames(statNcols);
0539     iCounter=[];
0540     for iName=1:numel(statNames) 
0541         statName=statNames{iName};
0542         %setfield(iii,statNames{iName},zeros(statSizes(iName)));
0543         iii.(statNames{iName})=zeros(statNrows.(statName),statNcols.(statName));
0544         %setfield(iCounter, statnames{iName}, 0);
0545         iCounter.(statNames{iName})=0;
0546     end 
0547     for iFile=1:nfiles
0548         ii=all_indexes{iFile};
0549         for iName=1:numel(statNames)
0550             statName=statNames{iName};
0551             if isfield(ii,statName)
0552                 tmp=ii.(statName);
0553                 offset=iCounter.(statName);
0554                 iii.(statName)(offset+(1:size(tmp,1)),1:size(tmp,2))=tmp;
0555                 iCounter.(statName)=iCounter.(statName)+size(tmp,1);
0556             end
0557         end
0558     end
0559 end % function iii=merge_file_indexes(d)
0560 
0561            
0562         
0563         
0564             
0565

Generated on Sat 29-Apr-2023 17:15:46 by m2html © 2005