Home > NoiseTools > nt_index.m

nt_index

PURPOSE ^

[status,p]=nt_index(name,p,forceUpdate) - index data files & directories

SYNOPSIS ^

function [status,p]=nt_index(name,p,forceUpdate);

DESCRIPTION ^

[status,p]=nt_index(name,p,forceUpdate) - index data files & directories

  status: 1: needed indexing, 0: didn't, -1: failed
  p: parameter structure

  name: name(s) of file(s) or directory to index
  p: parameters
  forceUpdate: if true force indexing [default: false]

 NoiseTools

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function [status,p]=nt_index(name,p,forceUpdate);
0002 %[status,p]=nt_index(name,p,forceUpdate) - index data files & directories
0003 %
0004 %  status: 1: needed indexing, 0: didn't, -1: failed
0005 %  p: parameter structure
0006 %
0007 %  name: name(s) of file(s) or directory to index
0008 %  p: parameters
0009 %  forceUpdate: if true force indexing [default: false]
0010 %
0011 % NoiseTools
0012 nt_greetings;
0013 
0014 if nargin<3 || isempty(forceUpdate); forceUpdate=0; end 
0015 if nargin<2||isempty(p); % set default parameters
0016     p=[];
0017     p.scale=100;
0018     if nargin >= 1; p.name=name; end    
0019 end
0020 if nargin<1 || isempty(name);
0021     p.name=[];
0022     status=-1;
0023     return;  % just return default parameters
0024 end
0025 
0026 status=-1; % failed by default
0027 updateFlag=0;   % don't update unless necessary
0028 if forceUpdate; updateFlag=1; end
0029 
0030 % check 'name'
0031 if ~ischar(name); error('name should be a string'); end
0032 avoid=['[',1:31, 127,']'];
0033 if regexp(name,avoid); 
0034     disp('bad character in file name, skip:'); disp(['   >',name,'<']); 
0035     return; 
0036 end
0037 if name=='.'; name=pwd; end
0038 if name(end)=='/'; name=name(1:end-1); end % remove trailing slash
0039 [PATHSTR,NAME,EXT]=fileparts(name);
0040 if strcmp(EXT,'idx'); 
0041     disp(['warning: ', name, ' might be index file']); 
0042 end
0043 if isempty(PATHSTR); % interpret relative to current directory
0044     name=[pwd,filesep,name]; % full path, safe to use 'exist'
0045 end
0046 [PATHSTR,NAME,EXT]=fileparts(name); 
0047 if 2==exist(name) 
0048     d=dir(name);
0049     filename=d.name;            % match case to file system
0050     PATHSTR=cd(cd(PATHSTR));    % match case to file system
0051     name=[PATHSTR,filesep,filename];
0052 elseif 7==exist(name)
0053     name=cd(cd(name));          % match case to file system
0054     [PATHSTR,NAME,EXT]=fileparts(name); 
0055 else
0056     disp(name);
0057     error('...is neither file nor directory');
0058 end
0059 
0060 
0061 hhh=[]; % structure with info about this file or directory
0062 iii=[]; % structure with data indices
0063 hhh.name=name;
0064 hhh.time_indexed=now;
0065 
0066 % file or directory?
0067 if 2==exist(name) 
0068     hhh.isdir=0;
0069 elseif 7==exist(name)
0070     hhh.isdir=1;
0071 else
0072     disp(name);
0073     error('...is neither file nor directory');
0074 end
0075 
0076 % intercept CTF data (directory name ends with '.ds')
0077 if numel(name>=3) && strcmp(name(end-2:end), '.ds');
0078     hhh.isdir=0;
0079 end
0080         
0081 hhh.failed=0;
0082 hhh.isdata=0;
0083 
0084 % index directory
0085 idxDir=[PATHSTR,filesep,'nt_idx'];
0086 if 7 ~= exist(idxDir); 
0087     disp(['creating index directory ', idxDir]);
0088     mkdir (idxDir);
0089     updateFlag=1;
0090 end
0091 
0092 % index file
0093 idxName=[idxDir,filesep,NAME,EXT,'.idx'];
0094 hhh.idxName=idxName;
0095 if ~2==exist(idxName); updateFlag=1; end
0096 
0097 % is my index older than me?
0098 if exist(idxName) && (dateModified(idxName) < dateModified(name))
0099     updateFlag=1;
0100 end
0101 
0102 if hhh.isdir % directory (else file)
0103         
0104     disp([name,filesep']);
0105     
0106     % check that 'name' is consistent with name in parent directory
0107     d=dir(PATHSTR);
0108     OKflag=0;
0109     for iFile=1:numel(d)
0110         if strcmp(d(iFile).name,[NAME,EXT]);
0111             OKflag=1;
0112         end
0113     end
0114     if ~OKflag; error(['''', NAME,EXT, ''' does not match real file name']); end
0115     
0116 
0117     % list my files/directories
0118     d=dir(name);
0119     iGood=ones(numel(d),1);
0120     nskip=0;
0121     for k=1:numel(iGood);       % weed out irrelevant/bad files
0122         if strcmp(d(k).name,'.') || strcmp(d(k).name,'..')  % me & parent dirs
0123             iGood(k)=0; 
0124         elseif d(k).name(1)=='.';                               % files starting with '.'
0125             iGood(k)=0; nskip = nskip+1; 
0126             disp(['skip, starts with ''.'': ',name,filesep,d(k).name]);
0127         end;                              
0128         if strcmp(d(k).name,'nt_idx');                      % index directory
0129             iGood(k)=0; nskip=nskip+1; 
0130         end      
0131         if any(d(k).name<33) || any(d(k).name==127) ;       % files with bad chars in names
0132             iGood(k)=0; nskip=nskip+1;
0133             disp(['skip, bad char in name: ',name,filesep,d(k).name]);
0134         end 
0135         if isempty(d(k).date);                              % invalid file (soft link?)
0136             iGood(k)=0; nskip=nskip+1; 
0137             disp(['skip, invalid file: ',name,filesep,d(k).name]);
0138         end 
0139     end
0140     d=d(iGood~=0);
0141     nfiles=numel(d);
0142     
0143     % check / index all my files
0144     for iFile=1:nfiles
0145         if d(iFile).name(end)=='/';
0146             disp(d)
0147         end
0148         if 1==nt_index([name,filesep,d(iFile).name],p,forceUpdate);
0149             updateFlag=1; % one of my files updated, update me too
0150         end        
0151     end   
0152     
0153     % purge my index directory of orphan files
0154     dd=dir(idxDir);
0155     iGood2=ones(numel(dd),1);
0156     for k=1:numel(iGood2);      % weed out irrelevant stuff
0157         if dd(k).name(1)=='.'; iGood2(k)=0; end;            % files starting with '.'
0158         if strcmp(dd(k).name,'nt_idx'); iGood2(k)=0; end    % index directory
0159     end
0160     dd=dd(iGood2~=0);
0161     iGood3=ones(numel(dd),1);
0162     for iFile=1:numel(iGood3)
0163         [~,NAME2,EXT2]=fileparts(dd(iFile).name);           % name of index file
0164         theFile=[PATHSTR,filesep,NAME2];                    % file pointed by index
0165         if 2~=exist(theFile) ...    % neither file...
0166                 && 7~=exist(theFile) % nor directory...
0167             disp(['>',dd(iFile).name,'<'])
0168             delete([idxDir,filesep,dd(iFile).name]);
0169             disp([theFile, ' not found, ']);
0170             disp(['deleting orphan file ',[idxDir,filesep,dd(iFile).name]]);
0171             iGood3(iFile)=0;
0172         end
0173     end
0174     dd=dd(iGood3~=0);
0175     
0176     % all my files are now checked/updated
0177         
0178     % other checks?
0179     
0180     % merge info about me & my files into my index
0181     if updateFlag
0182         
0183         % info about me
0184         hhh.dir=d;              % my directory excluding bad files
0185                 
0186         % compile info about my files/directories
0187         hhh.myfiles.bytes=zeros(nfiles,1,'uint64');       % bytes (file) or total bytes (directory)
0188         hhh.myfiles.isdir=nan(nfiles,1);                  % directory?
0189         hhh.myfiles.nfiles=zeros(nfiles,1,'uint64');      % number of files (including files in subdirectories)
0190         hhh.nfiles=uint64(1);       % me
0191         hhh.ndirs=uint64(1);        % me
0192         hhh.bytes=uint64(0);
0193         hhh.nskip=nskip;  
0194         hhh.ndata=0; 
0195         hhh.nbad=0;
0196         hhh.ntypes=[];
0197         hhh.depth=1;
0198         for iFile=1:nfiles
0199             load('-mat',[name,filesep,'nt_idx',filesep,d(iFile).name,'.idx']);  % loads hh, ii
0200             % aggregate info for me & subdirectories
0201             hhh.bytes=hhh.bytes+hh.bytes;
0202             hhh.ndirs=hhh.ndirs+hh.ndirs;
0203             hhh.ndata=hhh.ndata+hh.ndata;
0204             hhh.nbad=hhh.nbad+hh.nbad;
0205             hhh.nfiles=hhh.nfiles+hh.nfiles;   
0206             hhh.bytes=hhh.bytes+hh.bytes;
0207             hhh.nskip=hhh.nskip+hh.nskip;
0208             % higher resolution data for my files
0209             hhh.myfiles.bytes(iFile)=hh.bytes;
0210             hhh.myfiles.isdir(iFile)=hh.isdir;
0211             hhh.myfiles.nfiles(iFile)=hh.nfiles;
0212             % aggregate counts of file types
0213             types=myfieldnamesr(hh.ntypes);
0214             for iType=1:numel(types)
0215                 if isfield(hhh.ntypes,types(iType));
0216                     eval(['hhh.ntypes.',types{iType},'=hhh.ntypes.',types{iType},'+hh.ntypes.',types{iType},';']);
0217                 else
0218                     eval(['hhh.ntypes.',types{iType},'=hh.ntypes.',types{iType},';']);
0219                 end
0220             end
0221             hhh.depth=max(hhh.depth,1+hh.depth);
0222            % TBD: compile data info (ii)
0223         end
0224                 
0225         hhh.date=dateModified(name);
0226         
0227     end % if updateflag
0228     
0229         
0230 else  % file
0231     
0232     disp(name)
0233     
0234     if numel(name>=3) && strcmp(name(end-2:end), '.ds'); % intercept CFT data
0235         [a,b,c] = fileparts(name);
0236         name=[name,filesep,b,'.meg4'];
0237     end
0238         
0239     if updateFlag
0240         
0241         hhh.nfiles=uint64(1); % just me
0242         d=dir(name);
0243         hhh.bytes=uint64(d.bytes);
0244         hhh.date=d.date;
0245         hhh.sr=[];
0246         hhh.depth=0;
0247         % needed for dirs:
0248         hhh.ndirs=uint64(0); 
0249         hhh.nbad=0; 
0250         hhh.ndata=0;
0251         hhh.nskip=0;
0252         if d.bytes==0; % empty, don't bother
0253             hhh.isdata=0;
0254             hhh.type='empty';
0255             hhh.ntypes.empty=1;
0256         else
0257 
0258             % check file type
0259             hhh.ext=EXT;
0260             [isdata,type]=filetype(name);
0261             
0262             type=strrep(type,':','___'); % biosig uses ':' in type names
0263 
0264             hhh.type=type;
0265             try
0266                 eval(['hhh.ntypes.',type,'=1;']);
0267             catch
0268                 disp(['hhh.ntypes.',type,'=1;']);
0269                 disp(name);
0270                 warning('eval failed');
0271             end
0272             
0273             hhh.isdata=isdata;
0274 
0275             hhh.size=[];   
0276             hhh.originalsize=[]; % before reshape/transpose
0277 
0278             if hhh.isdata
0279                 hhh.ndata=1;
0280                 [a,b,c]=fileparts(type);
0281                 x=[];
0282                 if strcmp(b,'matlab');
0283                     x=readmatlab(name,c(2:end));
0284                 elseif strcmp(type,'unknown') || strcmp(type,'matlab_non_numeric')
0285                     error('!');
0286                 else
0287                     try
0288                         h=sopen(name);
0289                         hhh.sr=h.SampleRate;
0290                     catch ME
0291                         hhh.failed=1;
0292                         disp(name);
0293                         warning('...sopen failed');
0294                         disp(ME);
0295                     end
0296                     try
0297                         x=sread(h);
0298                     catch ME
0299                         hhh.failed=1;
0300                         disp(name)
0301                         disp(ME);
0302                         warning('...sread failed');
0303                     end
0304                     sclose(h);
0305                 end
0306                 hhh.originalsize=size(x);
0307                 if ndims(x)>2;
0308                     sizes=size(x);
0309                     x=reshape(x,prod(sizes(1:end-1)),sizes(end));
0310                     disp(['reshape -->', num2str(size(x))]);
0311                 end
0312                 if size(x,1)<size(x,2); 
0313                     x=x'; 
0314                     disp(['transpose --> ',num2str(size(x))]);
0315                 end
0316                 hhh.size=size(x);
0317                 nt_whoss;
0318 
0319                 if ~isempty(x)
0320                     % calculate index
0321                     if ndims(x)>2; 
0322                         disp(name); 
0323                         disp(size(x));
0324                         error('!'); 
0325 
0326                     end
0327                     iii=index(x,p);
0328                 end % else iii==[]
0329             end
0330         end
0331     end
0332 end   
0333 
0334 if updateFlag
0335     status=1;
0336     hh=hhh; ii=iii;
0337     save(idxName, 'hh','ii');
0338 else 
0339     status=0;
0340 end
0341 
0342 
0343 function ii=index(x,p)
0344 % index data
0345 if ndims(x)>2; error('!'); end
0346 [ii.nsamples,ii.nchans]=size(x);
0347 ii.scale=p.scale;
0348 ii.p=p;
0349 npairs=floor(ii.nsamples/p.scale);
0350 x_extra=x(npairs*p.scale+1:end,:);
0351 x=x(1:npairs*p.scale,:);
0352 x=reshape(x,[p.scale,npairs,ii.nchans]);
0353 ii.min=squeeze(min(x,[],1));
0354 ii.max=squeeze(max(x,[],1));
0355 if ~isempty(x_extra);
0356     ii.min=[ii.min;min(x_extra,[],1)];
0357     ii.max=[ii.max;max(x_extra,[],1)];
0358 end
0359 
0360 function date=dateModified(name)
0361 % modification date of file or directory
0362 [PATHSTR,NAME,EXT]=fileparts(name);
0363 if isempty(PATHSTR); error('!'); end
0364 date=[];
0365 if 2==exist(name); % I'm a file, I own my date.
0366     d=dir(name); % get directly from file
0367     date=d.datenum;
0368 elseif 7==exist(name); % I'm a directory, my parent own's my date
0369     d=dir(PATHSTR);
0370     for iFile=1:numel(d)
0371         %disp(d(iFile).name)
0372         if strcmp(d(iFile).name,[NAME,EXT]);
0373             date=d(iFile).datenum; % get indirectly from parent directory
0374             break
0375         end
0376     end
0377 else
0378     disp(name)
0379     error('!');
0380 end
0381 if isempty(date); 
0382     disp(['>',name,'<']);
0383     error('!'); 
0384 end
0385 %date=datenum(date);
0386         
0387 
0388 function [isdata,type,readwith]=filetype(name)
0389 % try to guess type and whether it's data
0390 EXTENSIONS_TO_SKIP={'.idx', '.zip','.txt','.pdf','.doc','.docx','pptx','.xls','.html','.rtf',...
0391     '.jpg', '.tif','.tiff','.js', '.md', '.m', '.py', '.rar', '.wav'};
0392 [PATHSTR,NAME,EXT]=fileparts(name);
0393 isdata=0; type='unknown'; transpose=0; % default
0394 if ~isempty(EXT) && any(strcmp(lower(EXT),EXTENSIONS_TO_SKIP))
0395     isdata=0; type=lower(EXT); type=type(2:end); % intercept common types
0396     disp(['skip (extension): ',name])
0397 else
0398     fid=fopen(name);
0399     firstbytes=fread(fid,8,'uchar');
0400     fclose(fid);
0401     if ~isempty(EXT) && strcmp(EXT,'.mat') || (numel(firstbytes)>=4 && all(firstbytes(1:4)'=='MATL')) % matlab file
0402         try
0403             s=whos('-file',name);
0404         catch ME
0405             disp('name');
0406             disp('... whos failed');
0407             disp(ME)
0408             type=[]; return
0409         end
0410         % find which variables are numeric
0411         numerics={'double','single','int64','int32','int16','int8'};
0412         matrix=strcmp(repmat({s.class},numel(numerics),1), ...
0413             repmat(numerics',1,numel(s))); 
0414         idx=find(any(matrix));
0415         if isempty(idx);
0416             isdata=0; type='matlab_non_numeric';
0417         else                    % multiple variables
0418             sizes=zeros(numel(s),1);
0419             for iVar=1:numel(s);
0420                 sizes(iVar)=prod(s(iVar).size);
0421             end
0422             [~,biggest]=max(prod(sizes));
0423             isdata=1; type=['matlab.',s(biggest).name];
0424             disp(['mat file, multiple numeric variables, chosing: ''', s(biggest).name, ''', size:',num2str(size(s(biggest).name))]);
0425         end
0426 %     elseif strcmp(char(firstbytes(2:8))','BIOSEMI')
0427 %         isdata=1; type='biosemi_bdf';
0428     else    % hand over to biosig
0429         try
0430             h=sopen(name);
0431             type=h.TYPE;
0432             sclose(h);
0433         catch ME
0434             disp(name);
0435             warning('... sopen failed');
0436             disp(ME);
0437             type='unknown'; return
0438         end                
0439         if strcmp(type,'unknown'); 
0440             isdata=0; 
0441         else 
0442             isdata=1;
0443         end
0444     end
0445 end
0446     
0447 function x=readmatlab(name,varname)
0448 % read varname from matlab file
0449 load('-mat',name,varname);
0450 eval(['x=',varname, ';']);
0451 
0452 
0453 function s=myfieldnamesr(x)
0454 if ~isstruct(x); s=[]; return; end
0455 fields=fieldnames(x);
0456 s={};
0457 for iField=1:numel(fields);
0458     xx=getfield(x,fields{iField});
0459     if isa(xx,'struct');
0460         subfields=myfieldnamesr(xx);
0461         for iSubfield=1:numel(subfields);
0462             s=[s,[char(fields(iField)),'.',char(subfields(iSubfield))]];
0463         end
0464     else
0465         s=[s,fields{iField}];
0466     end
0467 end
0468 
0469         
0470         
0471         
0472     
0473            
0474         
0475         
0476             
0477

Generated on Thu 30-Nov-2017 17:26:18 by m2html © 2005