Description of nt

0001  function [y,yy]=nt_inpaint(x,w)
0002 %function y=nt_inpaint(x,w) - weighted interpolation based on correlation structure
0003 %
0004 %  y: interpolated data (only over w==0, a la STAR)
0005 %  yy: interpolated data (all times, a la SNS)
0006 %
0007 %  x: data matrix
0008 %  w: weight vector or matrix
0009 %
0010 %
0011 % Noisetools.
0012 nt_greetings;
0013 
0014 PCA_THRESH=10^-15;
0015 NSMOOTH=0;
0016 thresh=0.5;
0017 nClosest=min(20,size(x,2)-1);
0018 %nClosest=40;
0019 
0020 if nargin<1; error('!'); end
0021 if nargin<2||isempty(w); w=ones(size(x)); end
0022 if ndims(x)>2; error('!'); end
0023 if ~all(size(x)==size(w)); error('!'); end
0024 [nsamples,nchan]=size(x);
0025 
0026 %{
0027 We have multichannel data x and a multichannel weighting function w (0 or
0028 1). There are many configurations of valid/invalid channels to consider.
0029 List them.
0030 %}
0031 
0032 [ww,nOccurrences,iBack]=patternDict(w); % all patterns of good/bad channels
0033 nPatterns=size(ww,1);
0034 disp(size(ww));
0035 
0036 %{
0037 Now we have a list of all the different weight patterns: ww. The
0038 vector iBack indicates which data samples fit each pattern: w = ww(iBack,:).
0039 %}
0040      
0041 %{
0042 Find which channels are 'neighbors' in terms of covariance.
0043 %}
0044 
0045 % weighted covariance matrix to determine which channels are close
0046 x0=x;
0047 [x,save_mean]=nt_demean(x,w); 
0048 [x,save_amp]=nt_normcol(x,w);
0049 xx=x.*w;
0050 c=(xx'*xx) ./ (w'*w); clear xx;
0051 c=abs(c); 
0052 sims=c+10*eye(size(c)); % make sure self always scores highest so we can skip it
0053 
0054 y=x;  
0055 
0056 %{
0057 We now have a matrix indicating proximity between channels. 
0058 %}
0059 
0060 %{
0061 For each channel, we calculate the projection matrix on the the subspace spanned 
0062 by other *valid* channels.  There are as many projection matrices as patterns 
0063 of valid/invalid channels.  Each projection matrix is estimated on data samples for
0064 which iChan is valid, and can be used to reconstruct data samples for which it is 
0065 invalid.
0066 %}
0067 
0068 for iChan=1:nchan
0069 
0070     %{
0071     We want to avoid having to consider all patterns of valid/unvalid 
0072     other channels. For that we'll group patterns. 
0073     First we order the other channels by decreasing similarity, putting
0074     invalid samples last. This needs to be done for each pattern.
0075     %}
0076 
0077     sim=sims(iChan,:);              % list of similarities with all other channels
0078     sim=repmat(sim,nPatterns,1);    % replicate so each pattern has own list
0079     sim((~ww))=0;                   % for each list, give bad channels a low score
0080     [~,closest]=sort(abs(sim),2,'descend');     % sort each list by decreasing similarity
0081     for k=1:size(closest,1);
0082         closest(k,find(sim(k,closest(k,:))==0))=0;     % mark bad channels as 0
0083     end
0084     for k=1:size(closest,1);
0085         if closest(k,1)==iChan; 
0086             closest(k,1:size(closest,2)-1) = closest(k,2:end); % skip first entry of list if same as iChan
0087         else
0088             % iChan was bad so not first
0089         end
0090     end
0091     closest=closest(:,1:end-1); % last not valid if first skipped
0092     
0093     %{
0094     We now have, for each pattern, a list of channels closest to iChan. 
0095     There are a lot of different patterns, so we merge those for which the nClosest 
0096     channels are the same.
0097     %}
0098     
0099     % group patterns for which the nClosest most similar channels are the same
0100     [C,IA,IC]=unique(closest(:,1:nClosest),'rows');
0101     iBack2=IC(iBack);       % maps each pattern to the data that fit it
0102     
0103     %{
0104     We now have a smaller array C of reduced patterns. The
0105     vector iBack2 indicates which data samples correspond to each pattern.
0106     %}
0107     
0108     %{
0109     For some patterns, iChan is valid throughout.  We can skip these.
0110     For others, only a few samples are invalid. To save time w can skip these
0111     too and fix them later using serial interpolation.
0112     %}
0113     
0114     toFix=[];
0115     NSKIP=2;
0116     www=ones(size(x,1),1);
0117     for iPattern=1:size(C,1)
0118         %if any(~w(find(iBack2==iPattern),iChan)); toFix=[toFix,iPattern]; end
0119         mySamples=find(iBack2==iPattern); 
0120         mySamples=mySamples(find(~w(mySamples,iChan)));
0121         if numel(mySamples)<=NSKIP
0122             www(mySamples)=0;
0123         else
0124             toFix=[toFix,iPattern];
0125         end
0126     end
0127     C=C(toFix,:);
0128            
0129     for iPattern=1:size(C,1)
0130 
0131         %{
0132         Estimate matrix to project iChan on the other channels listed in this
0133         pattern. 
0134         %}
0135 
0136         oChan=C(iPattern,:);
0137         oChan(find(oChan==0))=[]; % exclude bad channels
0138 
0139         % samples corresponding to this pattern
0140         mySamples=find(iBack2==toFix(iPattern)); 
0141         mySamples=mySamples(find(~w(mySamples,iChan)));
0142 %        mySamples=find(iBack2==iPattern);
0143                 
0144         % select data for which iChan *and* oChan are valid
0145         iBothValid=all(w(:,[iChan,oChan]),2);        
0146         xxx=x(iBothValid, [iChan,oChan]);  
0147         
0148         %if size(xxx,1)<8000; disp([iChan, iPattern]); disp(size(xxx,1)); end
0149 
0150 
0151         %%% --> we should be able to avoid this situation
0152         if isempty(xxx); 
0153             disp([iChan, iPattern]); disp('empty'); 
0154             continue; % we won't estimate or fix anything
0155         end
0156         
0157         % calculate covariance matrix
0158         mn=mean(xxx,1);
0159         xxx=nt_demean(xxx); % remove mean first
0160         ccc=xxx'*xxx;
0161 
0162         % PCA other channels to remove weak dimensions
0163         [topcs,eigenvalues]=nt_pcarot(ccc(2:end,2:end));
0164         idx=find(eigenvalues/max(eigenvalues) > PCA_THRESH); % discard weak dims
0165         topcs=topcs(:,idx);
0166 
0167         % projection matrix
0168         b=ccc(1,2:end)*topcs / (topcs'*ccc(2:end,2:end)*topcs); 
0169         
0170         %{
0171         We now have the projection matrix to project channel iChan on channels oChan,
0172         applicable to samples corresponding to this pattern.  We can use it
0173         to fix samples for which iChan is invalid.
0174         %}
0175         
0176         y(mySamples,iChan) = ...
0177             (x(mySamples,oChan) - repmat(mn(2:end),numel(mySamples),1))... % first remove mean of other chans...
0178             *(topcs*b') ...
0179             + mn(1); % ... then restore mean of this channel
0180         
0181     end
0182     
0183     %{
0184     Now we fix the isolated samples that we skipped using serial interpolation.
0185     %}
0186     MAXGAPSIZE=100;
0187     y(:,iChan)=fillgap(y(:,iChan),www,MAXGAPSIZE);
0188     
0189 end
0190 
0191 
0192 y=bsxfun(@times,y,save_amp); % restore the initial ampitude
0193 y=bsxfun(@plus,y,save_mean); % restore the initial mean
0194 
0195 yy=y; 
0196 y(w~=0) = x0(w~=0); % don't touch valid parts
0197 
0198 if ~nargout
0199     % plot, don't return values
0200     disp(nt_wpwr(y)/nt_wpwr(x));
0201     figure(11); clf;
0202     subplot 311; plot(x0); title('raw');
0203     subplot 312; plot(y); title('projected on other channels')
0204     subplot 313; plot(w); ylim([-.1 1.1]); title('weight');
0205     clear w y
0206 end
0207 
0208 function [y,w]=fillgap(x,w,maxGapSize)
0209 %y=fillgap(x,w,maxGapSize) - fill gaps using simple interpolation
0210 %
0211 %  y: interpolated data
0212 %
0213 %  x: data to interpolate
0214 %  w: weighting function
0215 %  maxGapSize: largest expected gap size
0216 if nargin<2; error('!'); end
0217 if nargin<3||isempty(maxGapSize); maxGapSize=1; end
0218 if size(x,2)>1; error('!'); end
0219 if size(x) ~= size(w); error('!'); end
0220 y=x;
0221 if all(w); return; end
0222 % simple case size one
0223 iToFix=1+find(~w(2:end-1)&w(1:end-2)&w(3:end)); 
0224 y(iToFix)=(y(iToFix-1)+y(iToFix+1))/2;
0225 w(iToFix)=1; 
0226 % general case size > 1
0227 iStart=find(w(1:end-2) & ~w(2:end-1));  % position preceding gap
0228 iStop=find(~w(1:end-1) & w(2:end));     % last position in gap
0229 if isempty(iStart)||isempty(iStop); return; end
0230 if iStop(1)<iStart(1);
0231     iStop=iStop(2:end);                 % ignore gap at beginning
0232 end
0233 iStart=iStart(1:numel(iStop));          % ignore gap at end
0234 for gapSize=2:maxGapSize
0235     idx=find(iStop-iStart==gapSize);
0236     for k=1:gapSize
0237         % interpolate between samples on either side of gap
0238         y(iStart(idx)+k) = ( y(iStart(idx)) * (gapSize-k+1) + y(iStart(idx)+gapSize+1) * k ) / (gapSize+1);
0239         w(iStart(idx)+k) = 1;
0240     end
0241 end
0242 
0243 
0244 % create a dictionary of weight patterns
0245 function [ww,nOccurrences,iBack]=patternDict(w)
0246 % ww: dictionary of patterns
0247 % nOccurrences: number of times each pattern occurred
0248 % iBack: index to reconstruct input from dictionary
0249 [ww,~,IC,nOccurrences]=nt_unique(w,'rows');
0250 [nOccurrences,iSort]=sort(nOccurrences, 'descend'); % sort by decreasing number
0251 [~,iReverse]=sort(iSort); %
0252 ww=ww(iSort,:); % same order for patterns, w = ww(iReverse1(IC),:)
0253 iBack=iReverse(IC); % w = ww(iBack,:)
0254 
0255 %%% TEST %%%
0256 if 0
0257     x0=sin(2*pi*(1:10000)'*(1:5)/10000);
0258     x=x0*randn(5,10)+1;
0259     x(1:4000,1)=x(1:4000,1)+0.3*randn(size(x(1:4000,1)));
0260     w=ones(size(x));  
0261     w(1:4000,1)=0; w(4001:6000,3)=0; %w(6001:7000,4)=0; w(7001:8000,5)=0; w(8001:9000,6)=0; w(9001:10000,7)=0;
0262     %w=randn(size(w))>0;
0263     %b=nt_regw(x(:,1),x(:,2:end),w(:,1)); y=x(:,2:end)*b;
0264     y=nt_inpaint(x,w);
0265     figure(2); clf
0266     subplot 211; plot([x(:,1),y(:,1)]); legend('raw','clean'); title('one channel'); subplot 212; plot(x(:,1)-y(:,1)); title('raw-clean');
0267 end
0268 if 0
0269     x0=sin(2*pi*(1:10000)'*(1:3)/10000);
0270     x=x0*nt_normcol(randn(3,5));
0271     w=ones(size(x));
0272     x(1:1000,1)=100; w(1:1000,1)=0;
0273     x(2001:3000,1)=100; w(2001:3000,1)=0;
0274     x(1:2000,2)=100; w(1:2000,2)=0;
0275     x=x+0.1*randn(size(x));
0276     [y,yy]=nt_inpaint(x,w);
0277     figure(1); clf
0278     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0279 end
0280 if 0
0281     N=3;
0282     nchans=50;
0283     x=zeros(1100,N);
0284     for k=1:N
0285         x(:,k)=sin(2*pi*k*(1:1100)'/1100);
0286     end
0287     x=x*randn(N,nchans);
0288     x=nt_normcol(x) + 0*randn(size(x));
0289     xx=x;
0290     w=ones(size(x));
0291     for k=1:nchans
0292         xx(k*20+(1:10),k)=100;
0293         w(k*20+(1:10),k)=0;
0294     end
0295     [y,yy]=nt_inpaint(xx,w);
0296     figure(1); clf;
0297     subplot 411; plot(x); title('orig'); subplot 412; plot(xx); title('w glitches'); 
0298     subplot 413; plot(y); title('clean'); subplot 414; plot(x-y); title('diff w orig');
0299 end
0300 if 0
0301     N=10;
0302     nchans=20;
0303     nsamples=1100;
0304     x=zeros(nsamples,N);
0305     for k=1:N
0306         x(:,k)=sin(2*pi*k*(1:nsamples)'/nsamples);
0307     end
0308     x=x*randn(N,nchans);
0309 %    x=x+1*randn(size(x)); % add noise
0310     xx=x;
0311     w=ones(size(x));
0312     for k=1:nchans
0313         xx(500+k*20+(1:40),k)=100;
0314         w(500+k*20+(1:40),k)=0;
0315     end
0316     [y]=nt_inpaint(xx,w);
0317     figure(1); clf;
0318     subplot 411; plot(x); title('original'); 
0319     subplot 412; plot(xx); title ('with glitches');
0320     subplot 413; plot (y); title ('fixed'); 
0321     subplot 414; plot(x-y); title ('error');
0322 end
0323 if 0
0324     [p,x]=nt_read_data('/data/meg/theoldmanandthesea/eeg/mc/MC_aespa_speech_45.mat'); x=x'; x=x(:,1:128); x=x(0+(1:10000),:);
0325     %[p,x]=nt_read_data('/data/meg/arzounian/ADC_DA_140521_p20/ADC_DA_140521_p20_01_calib'); x=x'; x=x(1:10000,:);
0326     
0327     x=nt_demean(x);
0328     [x,w]=nt_detrend(x,10);   
0329     profile on; y=nt_inpaint(x,w); profile report;
0330     figure(1); clf
0331     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0332     figure(2); clf
0333     ch=35;subplot 311; plot([x(:,ch),y(:,ch)]); subplot 312; plot(x(:,ch)-y(:,ch)); subplot 313; plot(w(:,ch), '.-');
0334 end
0335 
0336
nt_inpaint

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE