0001 function [status,p]=nt_index(name,p,forceUpdate)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 nt_greetings;
0013
0014 if nargin<3 || isempty(forceUpdate); forceUpdate=0; end
0015 if nargin<2||isempty(p)
0016 p=[];
0017 p.scale=1000;
0018 if nargin >= 1; p.name=name; end
0019 end
0020 if nargin<1 || isempty(name)
0021 p.name=[];
0022 status=-1;
0023 return;
0024 end
0025
0026 status=-1;
0027 updateFlag=0;
0028 if forceUpdate; updateFlag=1; end
0029
0030
0031 if ~ischar(name); error('name should be a string'); end
0032 avoid=['[',1:31, 127,']'];
0033 if regexp(name,avoid)
0034 disp('bad character in file name, skip:'); disp([' >',name,'<']);
0035 return;
0036 end
0037 if name=='.'; name=pwd; end
0038 if name(end)=='/'; name=name(1:end-1); end
0039 [PATHSTR,NAME,EXT]=fileparts(name);
0040 if strcmp(EXT,'idx')
0041 disp(['warning: ', name, ' might be index file']);
0042 end
0043 if isempty(PATHSTR)
0044 name=[pwd,filesep,name];
0045 end
0046 [PATHSTR,NAME,EXT]=fileparts(name);
0047 if 2==exist(name)
0048 d=dir(name);
0049 filename=d.name;
0050 PATHSTR=cd(cd(PATHSTR));
0051 name=[PATHSTR,filesep,filename];
0052 elseif 7==exist(name)
0053 name=cd(cd(name));
0054 [PATHSTR,NAME,EXT]=fileparts(name);
0055 else
0056 disp(name);
0057 error('...is neither file nor directory');
0058 end
0059
0060
0061 hhh=[];
0062 iii=[];
0063 hhh.name=name;
0064 hhh.time_indexed=now;
0065 hhh.failed=0;
0066
0067
0068 if 2==exist(name)
0069 hhh.isdir=0;
0070 elseif 7==exist(name)
0071 hhh.isdir=1;
0072 else
0073 disp(name);
0074 error('...is neither file nor directory');
0075 end
0076
0077
0078
0079 if numel(name>=3) && strcmp(name(end-2:end), '.ds')
0080 hhh.isdir=0;
0081 end
0082
0083
0084 idxDir=[PATHSTR,filesep,'nt_idx'];
0085 if 7 ~= exist(idxDir)
0086 disp(['creating index directory ', idxDir]);
0087 mkdir (idxDir);
0088 updateFlag=1;
0089 end
0090
0091
0092 idxName=[idxDir,filesep,NAME,EXT,'.idx'];
0093 hhh.idxName=idxName;
0094 if ~2==exist(idxName); updateFlag=1; end
0095 if exist(idxName) && (dateModified(idxName) < dateModified(name))
0096 updateFlag=1;
0097 end
0098
0099
0100 Processing depends on whether 'name' is a file or a directory.
0101 If a file, we calculate statistics to index the data within that file.
0102 If a directory, we aggregate statistics on its files and subdirectories.
0103
0104
0105 if hhh.isdir
0106
0107 disp([name,filesep]);
0108
0109
0110 d=dir(PATHSTR);
0111 OKflag=0;
0112 for iFile=1:numel(d)
0113 if strcmp(d(iFile).name,[NAME,EXT])
0114 OKflag=1;
0115 end
0116 end
0117 if ~OKflag; error(['''', NAME,EXT, ''' does not match real file name']); end
0118
0119
0120 d=dir(name);
0121 iGood=ones(numel(d),1);
0122 nskip=0;
0123 for k=1:numel(iGood)
0124 if strcmp(d(k).name,'.') || strcmp(d(k).name,'..')
0125 iGood(k)=0;
0126 elseif d(k).name(1)=='.'
0127 iGood(k)=0; nskip = nskip+1;
0128 disp(['skip, starts with ''.'': ',name,filesep,d(k).name]);
0129 end
0130 if strcmp(d(k).name,'nt_idx')
0131 iGood(k)=0; nskip=nskip+1;
0132 end
0133 if any(d(k).name<33) || any(d(k).name==127)
0134 iGood(k)=0; nskip=nskip+1;
0135 disp(['skip, bad char in name: ',name,filesep,d(k).name]);
0136 end
0137 if isempty(d(k).date)
0138 iGood(k)=0; nskip=nskip+1;
0139 disp(['skip, invalid file: ',name,filesep,d(k).name]);
0140 end
0141 end
0142 d=d(iGood~=0);
0143 nfiles=numel(d);
0144
0145
0146 for iFile=1:nfiles
0147 if d(iFile).name(end)=='/'
0148 disp(d)
0149 end
0150 if 1==nt_index([name,filesep,d(iFile).name],p,forceUpdate)
0151 updateFlag=1;
0152 end
0153 end
0154
0155
0156 dd=dir(idxDir);
0157 iGood2=ones(numel(dd),1);
0158 for k=1:numel(iGood2)
0159 if dd(k).name(1)=='.'; iGood2(k)=0; end
0160 if strcmp(dd(k).name,'nt_idx'); iGood2(k)=0; end
0161 end
0162 dd=dd(iGood2~=0);
0163 iGood3=ones(numel(dd),1);
0164 for iFile=1:numel(iGood3)
0165 [~,NAME2,EXT2]=fileparts(dd(iFile).name);
0166 theFile=[PATHSTR,filesep,NAME2];
0167 if 2~=exist(theFile) ...
0168 && 7~=exist(theFile)
0169 disp(['>',dd(iFile).name,'<'])
0170 disp([theFile, ' not found, ']);
0171 disp(['deleting orphan index file ',[idxDir,filesep,dd(iFile).name]]);
0172 delete([idxDir,filesep,dd(iFile).name]);
0173 iGood3(iFile)=0;
0174 end
0175 end
0176 dd=dd(iGood3~=0);
0177
0178
0179
0180
0181 if updateFlag
0182
0183
0184 hhh.dir=d;
0185
0186
0187 hhh.nfiles=uint64(1);
0188 hhh.ndata=0;
0189 hhh.nbad=0;
0190 hhh.nskip=nskip;
0191 hhh.ndirs=uint64(1);
0192 hhh.bytes=uint64(0);
0193 hhh.ntypes=[];
0194 hhh.depth=1;
0195
0196
0197 hhh.filelist.bytes=zeros(nfiles,1,'uint64');
0198 hhh.filelist.isdir=nan(nfiles,1);
0199 hhh.filelist.nfiles=zeros(nfiles,1,'uint64');
0200
0201
0202 for iFile=1:nfiles
0203
0204
0205 load('-mat',[name,filesep,'nt_idx',filesep,d(iFile).name,'.idx'], 'hh');
0206
0207
0208 hhh.filelist.bytes(iFile)=hh.bytes;
0209 hhh.filelist.isdir(iFile)=hh.isdir;
0210 hhh.filelist.nfiles(iFile)=hh.nfiles;
0211
0212
0213 hhh.bytes=hhh.bytes+hh.bytes;
0214 hhh.ndirs=hhh.ndirs+hh.ndirs;
0215 hhh.ndata=hhh.ndata+hh.ndata;
0216 hhh.nbad=hhh.nbad+hh.nbad;
0217 hhh.nfiles=hhh.nfiles+hh.nfiles;
0218 hhh.bytes=hhh.bytes+hh.bytes;
0219 hhh.nskip=hhh.nskip+hh.nskip;
0220 hhh.depth=max(hhh.depth,1+hh.depth);
0221
0222
0223 types=myfieldnamesr(hh.ntypes);
0224 for iType=1:numel(types)
0225 if isfield(hhh.ntypes,types(iType))
0226
0227 hhh.ntypes.(types{iType})=hhh.ntypes.(types{iType}) + hh.ntypes.(types{iType});
0228 else
0229 eval(['hhh.ntypes.',types{iType},'=hh.ntypes.',types{iType},';']);
0230
0231 end
0232 end
0233
0234 end
0235
0236
0237
0238 iii=merge_file_indexes(d,[PATHSTR,filesep,NAME]);
0239
0240 end
0241
0242 else
0243
0244
0245 hhh.isdata=0;
0246
0247 if numel(name>=3) && strcmp(name(end-2:end), '.ds')
0248 [a,b,c] = fileparts(name);
0249 name=[name,filesep,b,'.meg4'];
0250 end
0251
0252 if updateFlag
0253
0254
0255 hhh.nfiles=uint64(1);
0256 d=dir(name);
0257 hhh.bytes=uint64(d.bytes);
0258 hhh.sr=[];
0259 hhh.depth=0;
0260
0261
0262 hhh.ndirs=uint64(0);
0263 hhh.nbad=0;
0264 hhh.ndata=0;
0265 hhh.nskip=0;
0266
0267
0268 [isdata,type]=filetype(name);
0269 hhh.isdata=isdata;
0270 hhh.type=type;
0271
0272
0273 fixedtype=strrep(type,':','___');
0274 try
0275 eval(['hhh.ntypes.',fixedtype,'=1;']);
0276 catch
0277 disp(['hhh.ntypes.',fixedtype,'=1;']);
0278 disp(name);
0279 disp(type);
0280 warning('eval failed');
0281 end
0282
0283
0284 if hhh.isdata
0285 x=[];
0286 hhh.size=[];
0287 hhh.originalsize=[];
0288 hhh.ndata=1;
0289 [a,b,c]=fileparts(type);
0290 if strcmp(b,'matlab')
0291
0292 variable_name=c(2:end);
0293 x=readmatlab(name,variable_name);
0294 elseif strcmp(type,'unknown') || strcmp(type,'matlab_non_numeric')
0295
0296 error('!');
0297 else
0298
0299 try
0300 h=sopen(name);
0301 hhh.sr=h.SampleRate;
0302 catch ME
0303 hhh.failed=1;
0304 disp(name);
0305 warning('...sopen failed');
0306 disp(ME);
0307 end
0308 try
0309 x=sread(h);
0310 catch ME
0311 hhh.failed=1;
0312 disp(name)
0313 disp(ME);
0314 warning('...sread failed');
0315 x=sread(h);
0316 end
0317 sclose(h);
0318 end
0319
0320
0321 hhh.originalsize=size(x);
0322 if ndims(x)>2
0323
0324 sizes=size(x);
0325 x=reshape(x,prod(sizes(1:end-1)),sizes(end));
0326 disp(['reshape -->', num2str(size(x))]);
0327 end
0328 if size(x,1)<size(x,2)
0329
0330 x=x';
0331 disp(['transpose --> ',num2str(size(x))]);
0332 end
0333 hhh.size=size(x);
0334 nt_whoss;
0335
0336 if ~isempty(x)
0337
0338 dsratio=100;
0339 iii.card=[]; iii.min=[]; iii.max=[]; iii.mean=[]; iii.ssq=[];
0340 iii=nt_idx(x,dsratio,iii);
0341 end
0342 end
0343 end
0344 end
0345
0346 if updateFlag
0347 status=1;
0348 hh=hhh; ii=iii;
0349 save(idxName, 'hh','ii');
0350 disp(idxName)
0351 else
0352 status=0;
0353 end
0354 end
0355
0356 function ii=index(x,p)
0357
0358 if ndims(x)>2; error('!'); end
0359 [ii.nsamples,ii.nchans]=size(x);
0360 ii.scale=p.scale;
0361 ii.p=p;
0362 npairs=floor(ii.nsamples/p.scale);
0363 size(x)
0364 x_extra=x(npairs*p.scale+1:end,:);
0365 x=x(1:npairs*p.scale,:);
0366 x=reshape(x,[p.scale,npairs,ii.nchans]);
0367 ii.min=squeeze(min(x,[],1))';
0368 ii.max=squeeze(max(x,[],1))';
0369 if ~isempty(x_extra)
0370 [size(ii.min) size(x_extra)]
0371 ii.min=[ii.min;min(x_extra,[],1)];
0372 ii.max=[ii.max;max(x_extra,[],1)];
0373 end
0374 end
0375
0376 function date=dateModified(name)
0377
0378 [PATHSTR,NAME,EXT]=fileparts(name);
0379 if isempty(PATHSTR); error('!'); end
0380 date=[];
0381 if 2==exist(name)
0382 d=dir(name);
0383 date=d.datenum;
0384 elseif 7==exist(name)
0385 d=dir(PATHSTR);
0386 for iFile=1:numel(d)
0387
0388 if strcmp(d(iFile).name,[NAME,EXT])
0389 date=d(iFile).datenum;
0390 break
0391 end
0392 end
0393 else
0394 disp(name)
0395 error('!');
0396 end
0397 if isempty(date)
0398 disp(['>',name,'<']);
0399 error('!');
0400 end
0401 end
0402
0403
0404
0405 function [isdata,type]=filetype(name)
0406
0407 EXTENSIONS_TO_SKIP={'.idx', '.zip','.txt','.pdf','.doc','.docx','.ppt','.pptx','.xls','.html','.rtf',...
0408 '.jpg', '.png', '.tif','.tiff','.js', '.md', '.m', '.py', '.rar', '.wav', '.eps', '.pdfsync',...
0409 '.avi', '.PDF', '.gz', '.zip'};
0410 [PATHSTR,NAME,EXT]=fileparts(name);
0411 isdata=0; type='unknown'; transpose=0;
0412 d=dir(name);
0413 if d.bytes==0
0414 type='empty';
0415 elseif ~isempty(EXT) && any(strcmpi(EXT,EXTENSIONS_TO_SKIP))
0416 isdata=0; type=lower(EXT); type=type(2:end);
0417 disp(['skip (extension): ',name])
0418 else
0419 fid=fopen(name);
0420 firstbytes=fread(fid,8,'uchar');
0421 fclose(fid);
0422 if ~isempty(EXT) && strcmp(EXT,'.mat') || (numel(firstbytes)>=4 && all(firstbytes(1:4)'=='MATL'))
0423
0424 try
0425 s=whos('-file',name);
0426 catch ME
0427 disp('name');
0428 disp('... whos failed');
0429 disp(ME)
0430 type=[]; return
0431 end
0432
0433 numerics={'double','single','int64','int32','int16','int8'};
0434 matrix=strcmp(repmat({s.class},numel(numerics),1), ...
0435 repmat(numerics',1,numel(s)));
0436
0437
0438 if ~any(matrix)
0439
0440 isdata=0; type='matlab_non_numeric';
0441 else
0442
0443 sizes=zeros(numel(s),1);
0444 for iVariable=1:numel(s)
0445 if any(strcmp(s(iVariable),numerics))
0446 sizes(iVariable)=prod(s(iVariable).size);
0447 else
0448 sizes(iVariable)=0;
0449 end
0450 end
0451 [~,biggest]=max(prod(sizes));
0452 isdata=1; type=['matlab.',s(biggest).name];
0453 disp(name);
0454 disp(['mat file, multiple numeric variables, chosing: ''', s(biggest).name, ''', size:',num2str(s(biggest).size)]);
0455 end
0456
0457
0458 else
0459 try
0460 h=sopen(name);
0461 type=h.TYPE;
0462 sclose(h);
0463 catch ME
0464 disp(name);
0465 warning('... sopen failed');
0466 disp(ME);
0467 type='unknown'; return
0468 end
0469 if strcmp(type,'unknown')
0470 isdata=0;
0471 else
0472 isdata=1;
0473 end
0474 end
0475 end
0476 end
0477
0478 function x=readmatlab(name,varname)
0479
0480 load('-mat',name,varname);
0481 eval(['x=',varname, ';']);
0482 end
0483
0484
0485
0486 function s=myfieldnamesr(x)
0487 if ~isstruct(x); s=[]; return; end
0488 fields=fieldnames(x);
0489 s={};
0490 for iField=1:numel(fields)
0491 xx=getfield(x,fields{iField});
0492 if isa(xx,'struct')
0493 subfields=myfieldnamesr(xx);
0494 for iSubfield=1:numel(subfields)
0495 s=[s,[char(fields(iField)),'.',char(subfields(iSubfield))]];
0496 end
0497 else
0498 s=[s,fields{iField}];
0499 end
0500 end
0501 end
0502
0503
0504
0505 function iii=merge_file_indexes(d, dname)
0506 iii=[];
0507
0508 all_indexes=[];
0509 nfiles=numel(d);
0510 for iFile=1:nfiles
0511 name=d(iFile).name;
0512 load('-mat',[dname,filesep,'nt_idx',filesep,d(iFile).name,'.idx'], 'ii');
0513 all_indexes{iFile}=ii;
0514 end
0515
0516 statNrows=[];
0517 statNcols=[];
0518 for iFile=1:nfiles
0519 ii=all_indexes{iFile};
0520 if isempty(ii); continue; end
0521 statNames=fieldnames(ii);
0522 for iField=1:numel(statNames)
0523 if ~isfield(statNcols,statNames{iField})
0524
0525 statNrows.(statNames{iField})=0;
0526 statNcols.(statNames{iField})=0;
0527 end
0528 [nrows,ncols]=size(getfield(ii, statNames{iField}));
0529
0530
0531 statNrows.(statNames{iField})=statNrows.(statNames{iField})+nrows;
0532 statNcols.(statNames{iField})=max(statNcols.(statNames{iField}),ncols);
0533 end
0534 end
0535 if isempty(statNcols)
0536 return;
0537 end
0538 statNames=fieldnames(statNcols);
0539 iCounter=[];
0540 for iName=1:numel(statNames)
0541 statName=statNames{iName};
0542
0543 iii.(statNames{iName})=zeros(statNrows.(statName),statNcols.(statName));
0544
0545 iCounter.(statNames{iName})=0;
0546 end
0547 for iFile=1:nfiles
0548 ii=all_indexes{iFile};
0549 for iName=1:numel(statNames)
0550 statName=statNames{iName};
0551 if isfield(ii,statName)
0552 tmp=ii.(statName);
0553 offset=iCounter.(statName);
0554 iii.(statName)(offset+(1:size(tmp,1)),1:size(tmp,2))=tmp;
0555 iCounter.(statName)=iCounter.(statName)+size(tmp,1);
0556 end
0557 end
0558 end
0559 end
0560
0561
0562
0563
0564
0565