Home > NoiseTools > nt_idxx.m

nt_idxx

PURPOSE ^

nt_idxx(fname,p) - create an index file to summarize large data file

SYNOPSIS ^

function [bstats,wstats,cstats,sstats]=nt_idxx(fname,p)

DESCRIPTION ^

nt_idxx(fname,p) - create an index file to summarize large data file

 Usage:
   nt_idx(fname): calculate index structs, store in index file
   nt_idx(fname,p): override default parameters
   [bstats,wstats,cstats,sstats]=nt_idx(fname,...): return index structs:
     bstats: basic stats (size, etc.)
     wstats: waveform (min, max, mean, std)
     cstats: covariance
     sstats: psd

 Parameters:
  fname: name of data file to index
  p: struct to pass parameters:
    p.iname: name of index file to create [default: [PATH, 'idxx/', NAME, EXT, '.idxx']
    p.nstats: target number of stats samples [default: 1000 unless p.dsr set]
    p.dsr: downsampling ratio of wave stats
    p.dsrcov: downsampling ratio of covariance relative to wave [default: don't calculate cov]
    p.dsrpsd: downsampling ratio of PSD relative to wave [default: don't calculate psd]
    p.channels_to_keep: ignore other channels
    p.nfft: fft size for psd [default: 1024]
    p.nlin: number of full-resolution spectral bins [default: 32]
    p.chunksize: size of chunks to read from disk [default: 500000, 0 -->all]
    p.reader: header/data reader [default: 'BioSig']
    p.preproc: preprocessing function handle [default: none]
    p.quiet: suppress messages [default: false];

 Reader p.reader can be either 'FieldTrip' or 'BioSig' or a user-specified
 reader 'reader' callable as:
   [header,data]=reader(fname, chans, bouds)
 where bounds=[firstsample last sample], chans is a list of channels
 starting from 1, header.Fs contains the sample rate, and data has 
 dimensions samples X channels
 
 If p.preproc is provided, it will be called as:
    [y,w]=p.preproc(x);
 where x and y are time X channels data matrices (resp. before and after
 preprocessing), and w is a weight array (time X 1 or time X channels)
 indicating bad data. Examples:
    p.preproc=@(x) deal(max(min(x,10),-10),[]); % clip, no weight
    p.preproc=@(x) deal(max(min(x,10),-10),all(x==max(min(x,10),-10),2)); % clipping
    p.preproc=@(x) deal(max(min(x,10),-10),x==max(min(x,10),-10)); % clipping per channel

 NoiseTools

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [bstats,wstats,cstats,sstats]=nt_idxx(fname,p)
0002 %nt_idxx(fname,p) - create an index file to summarize large data file
0003 %
0004 % Usage:
0005 %   nt_idx(fname): calculate index structs, store in index file
0006 %   nt_idx(fname,p): override default parameters
0007 %   [bstats,wstats,cstats,sstats]=nt_idx(fname,...): return index structs:
0008 %     bstats: basic stats (size, etc.)
0009 %     wstats: waveform (min, max, mean, std)
0010 %     cstats: covariance
0011 %     sstats: psd
0012 %
0013 % Parameters:
0014 %  fname: name of data file to index
0015 %  p: struct to pass parameters:
0016 %    p.iname: name of index file to create [default: [PATH, 'idxx/', NAME, EXT, '.idxx']
0017 %    p.nstats: target number of stats samples [default: 1000 unless p.dsr set]
0018 %    p.dsr: downsampling ratio of wave stats
0019 %    p.dsrcov: downsampling ratio of covariance relative to wave [default: don't calculate cov]
0020 %    p.dsrpsd: downsampling ratio of PSD relative to wave [default: don't calculate psd]
0021 %    p.channels_to_keep: ignore other channels
0022 %    p.nfft: fft size for psd [default: 1024]
0023 %    p.nlin: number of full-resolution spectral bins [default: 32]
0024 %    p.chunksize: size of chunks to read from disk [default: 500000, 0 -->all]
0025 %    p.reader: header/data reader [default: 'BioSig']
0026 %    p.preproc: preprocessing function handle [default: none]
0027 %    p.quiet: suppress messages [default: false];
0028 %
0029 % Reader p.reader can be either 'FieldTrip' or 'BioSig' or a user-specified
0030 % reader 'reader' callable as:
0031 %   [header,data]=reader(fname, chans, bouds)
0032 % where bounds=[firstsample last sample], chans is a list of channels
0033 % starting from 1, header.Fs contains the sample rate, and data has
0034 % dimensions samples X channels
0035 %
0036 % If p.preproc is provided, it will be called as:
0037 %    [y,w]=p.preproc(x);
0038 % where x and y are time X channels data matrices (resp. before and after
0039 % preprocessing), and w is a weight array (time X 1 or time X channels)
0040 % indicating bad data. Examples:
0041 %    p.preproc=@(x) deal(max(min(x,10),-10),[]); % clip, no weight
0042 %    p.preproc=@(x) deal(max(min(x,10),-10),all(x==max(min(x,10),-10),2)); % clipping
0043 %    p.preproc=@(x) deal(max(min(x,10),-10),x==max(min(x,10),-10)); % clipping per channel
0044 %
0045 % NoiseTools
0046 nt_greetings;
0047 
0048 if nargin<1; error('!'); end
0049 if nargin<2 ; p=[]; end
0050 
0051 % defaults;
0052 if ~isfield(p,'iname')||isempty(p.iname); p.iname=[]; end % default assigned later
0053 if ~isfield(p,'dsr'); p.dsr=[]; end % block size unspecified
0054 if ~isfield(p,'nstats'); p.nstats=[]; end % number of stats unspecified
0055 if ~isfield(p,'dsrcov'); p.dsrcov=[]; end % don't calculate covariance
0056 if ~isfield(p,'dsrpsd'); p.dsrpsd=[]; end % don't calculate PSD
0057 if ~isfield(p,'channels_to_keep')||isempty(p.channels_to_keep); p.channels_to_keep=[]; end % all
0058 if ~isfield(p,'nfft')||isempty(p.nfft); p.nfft=4096; end
0059 if ~isfield(p,'nlin')||isempty(p.nlin); p.nlin=16; end
0060 if ~isfield(p,'chunksize')||isempty(p.chunksize); p.chunksize=500000; end
0061 if ~isfield(p,'reader')||isempty(p.reader); p.reader='BioSig'; end
0062 if ~isfield(p,'preproc'); p.preproc=[]; end
0063 if ~isfield(p,'quiet'); p.quiet=0; end
0064 
0065 % check for data file
0066 if ~exist(fname,'FILE');
0067     disp(fname);
0068     disp('not found');
0069     error('!');
0070 end
0071 
0072 % check for readers
0073 if strcmp(p.reader,'FieldTrip')
0074     if 2~=exist('ft_read_data'); disp('You must download FieldTrip from www.fieldtriptoolbox.org'); return; end
0075 elseif strcmp(p.reader,'BioSig')
0076     if 2~=exist('sopen'); disp('You must download BioSig from biosig.sourceforge.net'); return; end
0077 else
0078     if ~isa(p.reader,'function_handle'); error('function not found'); end
0079 end
0080 
0081 % use separate structs to make it easy to read just one kind of stats from file
0082 bstats=[]; % index structure for basic stats
0083 wstats=[]; % index structure for waveform
0084 cstats=[]; % index structure for covariance
0085 sstats=[]; % index structure for spectrogram
0086 
0087 % read header, store info in bstats
0088 if strcmp(p.reader,'FieldTrip')
0089     h=ft_read_header(fname);
0090     bstats.sr=h.Fs;
0091     bstats.nsamples=h.nSamples;
0092     bstats.nchans=h.nChans;
0093     bstats.label=h.label; 
0094 elseif strcmp(p.reader,'BioSig')
0095     if p.quiet
0096         evalc('h=sopen(fname)'); % suppress pesky verbose output
0097     else
0098         h=sopen(fname);
0099     end 
0100     bstats.sr=h.SampleRate;
0101     %if h.NRec>1; error('!'); end % need to figure out how to handle, if needed
0102     bstats.nsamples=h.SPR*h.NRec;
0103     idx=find(~strcmp(h.Label,'EDF Annotations'));
0104     bstats.nchans=numel(idx);
0105     bstats.label=h.Label(idx);
0106     sclose(h);
0107 else
0108     h=p.reader(fname);
0109     bstats.sr=h.sr;
0110     bstats.nsamples=h.nsamples;
0111     bstats.nchans=h.nchans;
0112     bstats.label=h.label;
0113 end
0114 bstats.header=h;
0115 bstats.fname=fname; 
0116 bstats.p=p;
0117 bstats.now=now;
0118 
0119 % store code of this function and of caller function
0120 bstats.nt_idxx.txt=evalc(['type ', mfilename]);
0121 stack=dbstack;
0122 if numel(stack)>1
0123     bstats.caller.file=stack(2).name;
0124     try
0125         txt=evalc(['type ', stack(2).name]); 
0126     catch
0127         warning('could not list caller function');
0128     end
0129     bstats.caller.txt=txt;
0130 end
0131 
0132 % channels
0133 if isempty(p.channels_to_keep) 
0134     p.channels_to_keep=1:bstats.nchans;
0135 end
0136 
0137 if any(p.channels_to_keep>bstats.nchans); error('!'); end
0138 bstats.nchans=numel(p.channels_to_keep);
0139 
0140 % idetermine time resolution of statistics
0141 if isempty(p.dsr) 
0142     % DSR unspecified, specify nstats
0143     if isempty(p.nstats)
0144         % set to default
0145         p.nstats=1000;
0146     end
0147     bstats.dsr=ceil(bstats.nsamples/p.nstats);
0148     nwstats=ceil(bstats.nsamples/bstats.dsr);
0149 else
0150     % DSR specified
0151     if ~isempty(p.nstats)
0152         % ntats also specify: try to ensure minimum nstats
0153         if ceil(bstats.nsamples/p.dsr)<p.nstats
0154             p.dsr=ceil(bstats.nsamples/p.nstats);
0155         end
0156     end
0157     bstats.dsr=p.dsr;
0158     nwstats=ceil(bstats.nsamples/bstats.dsr);
0159 end
0160 
0161 % allocate basic stats arrays:
0162 nwstats=ceil(bstats.nsamples/bstats.dsr); % total number of blocks for basic stats
0163 wstats.nwstats=nwstats;
0164 wstats.min=zeros(nwstats,bstats.nchans);
0165 wstats.max=zeros(nwstats,bstats.nchans); 
0166 wstats.mean=zeros(nwstats,bstats.nchans);
0167 wstats.rms=zeros(nwstats,bstats.nchans);
0168 
0169 % allocate weight matrix if needed
0170 % if ~isempty(p.preproc)
0171 %     % test to determine if it returns a weight matrix, and if so what size
0172 %     [~,w]=p.preproc(randn(100,10));
0173 %     clf; drawnow;
0174 %     if ~isempty(w)
0175 %         if size(w,2)==1
0176 %             wstats.w=zeros(nwstats,1);
0177 %         else
0178 %             wstats.w=zeros(nwstats,bstats.nchans);
0179 %         end
0180 %     end
0181 % end
0182 
0183 % size of chunk to read from disk, multiple of block size
0184 p.chunksize=floor(p.chunksize/bstats.dsr)*bstats.dsr; 
0185 
0186 % allocate covariance array
0187 if ~isempty(p.dsrcov)
0188     blksizecov=p.dsrcov*bstats.dsr;
0189     tmp=log2(p.dsrcov);
0190     assert(tmp==round(tmp), ...
0191         'p.dsrcov should be power of 2');
0192     ncstats=ceil(bstats.nsamples/blksizecov);
0193     cstats.cov=zeros(ncstats,bstats.nchans,bstats.nchans);
0194     cstats.blksizecov=blksizecov;
0195     p.chunksize=floor(p.chunksize/blksizecov)*blksizecov;
0196 end
0197 
0198 % allocate psd array
0199 if ~isempty(p.dsrpsd) 
0200     blksizepsd=p.dsrpsd*bstats.dsr;
0201     if blksizepsd < p.nfft; 
0202         disp([blksizepsd,p.nfft]);
0203         error('!'); 
0204     end;
0205     tmp=log2(p.dsrpsd);
0206     assert(tmp==round(tmp), ...
0207         'p.dsrpsd should be power of 2');
0208     nsstats=ceil(bstats.nsamples/blksizepsd);
0209     
0210     %{
0211     Matrix to transform linear frequency  to semilog frequency.
0212     The first nlin samples are full resolution, the next nlin/2 half
0213     resolution (spectral bins grouped by 2), the next nlin/2 quarter
0214     resolution, etc.
0215     %}
0216     M={};
0217     if p.nlin~=2^log2(p.nlin); error('!'); end
0218     M{1}=eye(p.nlin+1);
0219     for iBlk=1:log2(p.nfft/p.nlin)-1
0220         for k=1:p.nlin/2 
0221             MM{k}=ones(2^iBlk,1)/2^iBlk; 
0222         end
0223         M{iBlk+1}=blkdiag(MM{:});        
0224     end
0225     M=blkdiag(M{:});
0226     sstats.M=M;
0227 
0228     % scaled frequency axis:
0229     f0=(0:p.nfft/2)/p.nfft; % normalized frequency of fft
0230     f=f0*M; f=f./sum(M);
0231     sstats.f=f;
0232     
0233 %    figure(1); clf; plot(f); pause
0234 
0235     sstats.psd=zeros(nsstats,bstats.nchans,size(M,2));
0236     sstats.nfft=p.nfft;
0237     sstats.blksizepsd=blksizepsd;
0238     
0239     % ensure chunksize is multiple of blksepsd
0240     p.chunksize=floor(p.chunksize/blksizepsd)*blksizepsd;
0241 end
0242 
0243 
0244 foffset=0;
0245 boffset=0;
0246 coffset=0;
0247 soffset=0;
0248 
0249 % read and index;
0250 while true
0251     
0252     %if file_offset>=i.nsamples; break; end
0253     if ~p.quiet; 
0254         disp([foffset, bstats.nsamples]); 
0255         disp([num2str(foffset/bstats.nsamples*100), '%']);
0256     end
0257     
0258     % read chunk from disk
0259     tic
0260     begsample=foffset+1;
0261     endsample=min(foffset+p.chunksize,bstats.nsamples);
0262     if endsample<=begsample; error('!'); end
0263     if strcmp(p.reader,'FieldTrip')
0264         error('!');
0265         x=ft_read_data(fname, 'begsample',begsample,'endsample',endsample);
0266         x=x'; % --> time X channels
0267         x=x(:,p.channels_to_keep);
0268     elseif strcmp(p.reader,'BioSig')
0269         if p.quiet
0270             evalc('h=sopen(fname,''r'',p.channels_to_keep)');% suppress pesky verbose output
0271         else
0272             h=sopen(fname,'r',p.channels_to_keep);
0273         end
0274         NoS=(endsample-begsample-1)/bstats.sr;
0275         StartPos=(begsample-1)/bstats.sr;
0276         x=sread(h,NoS,StartPos);
0277         sclose(h);
0278     else
0279         [~,x]=p.reader(fname,p.channels_to_keep,[begsample endsample]);
0280     end
0281     if ~p.quiet; toc; end
0282     
0283     disp(size(x))
0284     
0285     % apply preprocessing
0286     if ~isempty(p.preproc)
0287         disp('preproc:'); tic;
0288         [x,w]=p.preproc(x);
0289         toc;
0290     else
0291         w=[];
0292     end
0293     if ~isempty(w); error('TBD'); end
0294     
0295     figure(1); clf; plot(x); drawnow;
0296     
0297     % fold chunk into blocks
0298     nblocks=floor(size(x,1)/bstats.dsr); % number of blocks in this chunk
0299     xb=x(1:nblocks*bstats.dsr,:); % chunk
0300     xb=reshape(xb,[bstats.dsr,nblocks,bstats.nchans]); % block matrix
0301 
0302     % time series of waveform statistics
0303     tic;
0304     wstats.min(boffset+(1:nblocks),:)=min(xb,[],1);
0305     wstats.max(boffset+(1:nblocks),:)=max(xb,[],1);
0306     wstats.mean(boffset+(1:nblocks),:)=mean(xb,1);
0307     wstats.rms(boffset+(1:nblocks),:)=sqrt(mean(xb.^2,1));
0308     boffset=boffset+nblocks; 
0309     if ~p.quiet; toc; end
0310     
0311     figure(2); clf; plot([wstats.min, wstats.max]); drawnow
0312     
0313     
0314     % extra bit at end of file?
0315     if size(x,1)>nblocks*bstats.dsr
0316         tmp=x(nblocks*bstats.dsr+1:end,:);
0317         wstats.min(boffset+1,:)=min(tmp,[],1);
0318         wstats.max(boffset+1,:)=max(tmp,[],1);
0319         wstats.mean(boffset+1,:)=mean(tmp,1);
0320         wstats.rms(boffset+1,:)=sqrt(mean(tmp.^2,1));
0321     end
0322     
0323     foffset=foffset+nblocks*bstats.dsr; % advance pointer in file
0324 
0325     % time series of covariance matrices
0326     if ~isempty(cstats) && isfield(cstats, 'cov')
0327         nblocks=floor(size(x,1)/blksizecov); % number of blocks
0328         xb=x(1:nblocks*blksizecov,:);        
0329         xb=reshape(xb,[blksizecov, nblocks, bstats.nchans]);
0330         for iBlock=1:nblocks
0331             tmp=squeeze(xb(:,iBlock,:));
0332             tmp=nt_demean(tmp);
0333             cstats.cov(coffset+iBlock,:,:) = tmp'*tmp;
0334         end
0335         coffset=coffset+size(xb,2);
0336         if size(x,1)>nblocks*blksizecov
0337             tmp=x(nblocks*blksizecov+1:end,:);
0338             tmp=nt_demean(tmp);
0339             cstats.cov(coffset+1,:,:)=tmp'*tmp;
0340         end              
0341     end
0342       
0343     % time series of power spectra
0344     if ~isempty(sstats) && isfield(sstats, 'psd')
0345         nblocks=floor(size(x,1)/blksizepsd); % number of blocks in chunk
0346         xb=x(1:nblocks*blksizepsd,:);        
0347         xb=reshape(xb,[blksizepsd, nblocks, bstats.nchans]);
0348         for iBlock=1:nblocks
0349             tmp=squeeze(xb(:,iBlock,:));    
0350             tmp=nt_demean(tmp);
0351             sstats.psd(soffset+iBlock,:,:) = pwelch(tmp, p.nfft, 'power')'*M;
0352         end
0353         soffset=soffset+size(xb,2);
0354         
0355         disp([soffset size(sstats.psd,1)]);
0356         
0357         if size(x,1)>nblocks*blksizepsd
0358             tmp=x(nblocks*blksizepsd+1:end,:);
0359             tmp=nt_demean(tmp);
0360             if size(tmp,1)<p.nfft; break; end
0361             sstats.psd(soffset+1,:,:) = pwelch(tmp, p.nfft, 'power')'*M;
0362         end              
0363     end
0364     
0365     if ~p.quiet; nt_whoss; end
0366     %disp([num2str(foffset/h.nSamples*100), '%']);
0367 %     disp([num2str(foffset), '/', num2str(h.nSamples), ' (', num2str(foffset/h.nSamples*100), '%)']);
0368 %     disp([boffset, coffset, soffset]);
0369     
0370     if endsample>=bstats.nsamples; break; end;
0371 end
0372    
0373 if ~nargout
0374     if isempty(p.iname)
0375         [FILEPATH,NAME,EXT]=fileparts(fname);
0376         if isempty(FILEPATH); FILEPATH=pwd; end
0377         if ~exist([FILEPATH,filesep,'idxx'], 'dir')
0378             mkdir([FILEPATH,filesep,'idxx']);
0379         end        
0380         p.iname=[FILEPATH,filesep,'idxx',filesep,NAME,EXT,'.idxx'];
0381     end
0382     wstats.min=nt_double2int(wstats.min); 
0383     wstats.max=nt_double2int(wstats.max);
0384     wstats.mean=nt_double2int(wstats.mean);
0385     wstats.rms=nt_double2int(wstats.rms);
0386     save(p.iname, 'bstats', 'wstats','cstats', 'sstats','-v7.3');
0387     clear bstats wstats cstats sstats;
0388 end
0389 
0390 
0391     
0392

Generated on Sat 29-Apr-2023 17:15:46 by m2html © 2005