Home > NoiseTools > nt_inpaint_old.m

nt_inpaint_old

PURPOSE ^

y=nt_inpaint(x,w) - interpolate missing samples of matrix

SYNOPSIS ^

function y=nt_inpaint(x,w)

DESCRIPTION ^

y=nt_inpaint(x,w) - interpolate missing samples of matrix

  y: interpolated data

  x: data to interpolate
  w: weight matrix

 Missing samples (w==0) of a channel are replaced by the projection of
 that channel on the subspace spanned by intact channels, based on
 covariance estimates from intact data.
 
 See nt_detrend, nt_star.

 NoiseTools.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function y=nt_inpaint(x,w)
0002 %y=nt_inpaint(x,w) - interpolate missing samples of matrix
0003 %
0004 %  y: interpolated data
0005 %
0006 %  x: data to interpolate
0007 %  w: weight matrix
0008 %
0009 % Missing samples (w==0) of a channel are replaced by the projection of
0010 % that channel on the subspace spanned by intact channels, based on
0011 % covariance estimates from intact data.
0012 %
0013 % See nt_detrend, nt_star.
0014 %
0015 % NoiseTools.
0016 nt_greetings;
0017 
0018 % Flexible parameters
0019 PCA_THRESH=10^-25;
0020 minProp=0.5; % minimum proportion of data samples for covariance computation
0021 
0022 %{
0023 We estimate the weighted covariance matrix of the data, then project
0024 invalid (weight=0) samples on the subspace spanned by the valid (weight=1)
0025 samples.
0026 
0027 The main obstacle is computational, as there are a very large number of
0028 configurations of valid / invalid channels to cater to.  
0029 %}
0030 
0031 %% check parameters, check/fix sizes
0032 if nargin<2; error('!'); end
0033 
0034 if any(size(w) ~= size(x)); error('w must be of same size as x'); end
0035 if any(w ~= 0 & w ~= 1); error('weights must be 0 or 1'); end
0036 if any(0 == sum(w,2)); warning('for some samples all channels have zero weight'); end
0037 x=nt_unfold(x);
0038 w=nt_unfold(w);
0039 
0040 y=x;
0041 
0042 for iChan=1:size(x,2)
0043     if 0; fprintf('Fixing channel %d \n',iChan); end
0044     
0045     % Any artifacts ?
0046     badSamples=~w(:,iChan);
0047     if all(~badSamples); continue; end
0048     minDur=sum(w(:,iChan))*minProp;   
0049            
0050     % Find best data to compute covariance
0051     iOtherChannels=setdiff(1:size(x,2),iChan);      % channels on which to project
0052     iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);      % good samples common to all other channels
0053     nGoodPerChannel=sum(w(iOthersAllGood,:));       % number of good samples on each channel
0054     while sum(iOthersAllGood) < minDur
0055         [~, iWorst] = min(nGoodPerChannel(iOtherChannels));
0056         iOtherChannels(iWorst)=[];                  % discard channel with fewest good samples
0057         iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);
0058         nGoodPerChannel=sum(w(iOthersAllGood,:));      
0059     end    
0060     x=nt_demean(x,iOthersAllGood);
0061     mn=y-x; 
0062     cc=nt_cov(x(iOthersAllGood,:)); % covariance over good samples
0063       
0064     % Copy data to fix
0065     wBad=w(badSamples,:);
0066     xBad=x(badSamples,:);
0067     yBad=y(badSamples,:);
0068     
0069     % Get weights with sparse pattern dictionary for interporlation
0070     wBad(:,~ismember(1:size(x,2),iOtherChannels))=0; % Put weight of unusable channels to 0 to decrease pattern dictionary size.
0071     [~, wwSparse,~,iBackSparse]=sparsePatternDict(wBad);
0072     iBadPatternsSparse=find(~wwSparse(:,iChan));    
0073     
0074     % Browse patterns and fix using appropriate channels
0075     for iPattern=iBadPatternsSparse'
0076         iGood=find(wwSparse(iPattern,:) & ismember(1:size(x,2),iOtherChannels)); % indices of channels to use
0077         inPattern=iBackSparse==iPattern; % indices of samples to fix
0078         
0079         [V,D]=eig(cc(iGood,iGood)); V=real(V); D=real(D);
0080         topcs=V(:,D/max(D) > PCA_THRESH); % discard weak dims
0081         b=cc(iChan,iGood)*topcs/(topcs'*cc(iGood,iGood)*topcs); % projection matrix
0082         xx=(xBad(inPattern,iGood));
0083         yBad(inPattern,iChan)=xx*(topcs*b')+mn(inPattern,iChan); % projection
0084     end % of bad patterns loop
0085     
0086     % Inject fixed data back
0087     y(badSamples,:)=yBad;
0088         
0089 end % of channel loop
0090 
0091 if ~nargout
0092     figure(1); clf
0093     subplot 211; 
0094     plot(x)
0095     subplot 212; 
0096     plot(y)
0097     clear y w;
0098 end
0099 
0100 % Create parcimonious pattern dictionary
0101 function [w, ww,nOccurrences,iBack]=sparsePatternDict(w)
0102 
0103 %Parameters
0104 ratio1=1/100; % maximum iteration ratio
0105 ratio2=1/100; % minimum efficiency ratio
0106 minThresh=size(w,2)/3; % minimum number of good channels to keep in each pattern
0107 
0108 % Initialize before while loop
0109 [ww, nOccurrences, iBack]=patternDict(w);
0110 iSimPatterns=1:size(ww,1);
0111 maxIterNum=size(ww,1)*ratio1; % maximum number of iterations allowed
0112 minEff=size(ww,1)*ratio2; % minimum efficiency required: number of grouped pattern / dictionary size
0113 iIter=1;
0114 
0115 % Group patterns until it has taken too much time or is not efficient enough
0116 while iIter<=maxIterNum && numel(iSimPatterns)> minEff 
0117     
0118     % Compute similarity metric
0119 %     sim=2*(ww-0.5)*2*(ww'-0.5); % number of common bad channels AND common good channels
0120     sim=ww*ww'; % number of common good channels only
0121     sim_noDiag=reshape(sim(~logical(diag(ones(size(ww,1),1)))),size(ww,1)-1,size(ww,1)); % values excluding diagonal
0122     
0123     % Compute threshold for number of common good channels across grouped
0124     % patterns
0125     weight=(iIter-1)/maxIterNum;
0126     nThresh=max(minThresh,...
0127         (mean((1+weight)*sim_noDiag(:))+(1-weight)*max(sim_noDiag(:)))/2);
0128 
0129     % Pick the pattern with the highest number of similar patterns
0130     [~, iPat]=max(sum(sim>=nThresh));  
0131  
0132     % Find most similar patterns
0133     iSimPatterns=find(sim(iPat,:)>=nThresh); % indices of similar patterns
0134     
0135     % Change all to unique pattern with common good channels only
0136     inPat=ismember(iBack, iSimPatterns);
0137     w(inPat,:)=repmat(all(ww(iSimPatterns,:),1),sum(inPat),1); 
0138     
0139     % Update pattern dictionary
0140     [ww,nOccurrences,iBack]=patternDict(w); 
0141     if 0; fprintf('%dth iteration, nDiffThresh=%g, %d patterns regrouped, %d patterns in dictionary\n',...
0142         iIter, nThresh, numel(iSimPatterns), size(ww,1)); end
0143     
0144     % Update other parameters
0145     maxIterNum=min(maxIterNum,size(ww,1)*ratio1+iIter); % maximum number of iterations allowed
0146     minEff=size(ww,1)*ratio2; % minimum efficiency required: number of grouped pattern / dictionary size
0147     iIter=iIter+1;    
0148     
0149 end % of while loop
0150 
0151 % create a dictionary of weight patterns
0152 function [ww,nOccurrences,iBack]=patternDict(w)
0153 % ww: dictionary of patterns
0154 % nOccurrences: number of times each pattern occurred
0155 % iBack: index to reconstruct input from dictionary
0156 [ww,~,IC,nOccurrences]=nt_unique(w,'rows');
0157 [nOccurrences,iSort]=sort(nOccurrences, 'descend'); % sort by decreasing number
0158 [~,iReverse]=sort(iSort); %
0159 ww=ww(iSort,:); % same order for patterns, w = ww(iReverse1(IC),:)
0160 iBack=iReverse(IC); % w = ww(iBack,:)
0161 
0162 %%% TEST %%%
0163 if 0
0164     x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0165     x=x0*randn(10,20)+1;
0166     x(1:4000,1)=x(1:4000,1)+0.3*randn(size(x(1:4000,1)));
0167     w=ones(size(x));  
0168     w(1:4000,1)=0; w(4001:6000,3)=0; w(6000:7000,4)=0; w(7000:8000,5)=0; w(8000:9000,6)=0; w(9000:10000,7)=0;
0169     %w=randn(size(w))>0;
0170     %b=nt_regw(x(:,1),x(:,2:end),w(:,1)); y=x(:,2:end)*b;
0171     y=nt_inpaint(x,w);
0172     figure(2); clf
0173     subplot 211; plot([x(:,1),y(:,1)]); legend('raw','clean'); title('one channel'); subplot 212; plot(x(:,1)-y(:,1)); title('raw-clean');
0174 end
0175 if 0
0176     x=randn(10000,10);
0177     w=ones(size(x));
0178     y=nt_inpaint(x,w);
0179     figure(1); clf
0180     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0181 end
0182 if 0
0183     x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0184     x=x0*randn(10,20)+1;
0185     w=ones(size(x));
0186     x(1:1000,1:2)=100; w(1:1000,1:2)=0;
0187     y=nt_inpaint(x,w);
0188     figure(1); clf
0189     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0190 end
0191 if 0
0192     N=30;
0193     nchans=50;
0194     x=zeros(1100,N);
0195     for k=1:N
0196         x(:,k)=sin(2*pi*k*(1:1100)'/1100);
0197     end
0198     x=x*randn(N,50);
0199     xx=x;
0200     w=ones(size(x));
0201     for k=1:nchans
0202         xx(k*20+(1:20),k)=100;
0203         w(k*20+(1:20),k)=0;
0204     end
0205     [y]=nt_inpaint(xx,w);
0206     figure(1); clf;
0207     subplot 311; plot(x); subplot 312; plot(xx); subplot 313; plot(y);
0208 end
0209 if 0
0210     [p,x]=nt_read_data('/data/meg/theoldmanandthesea/eeg/mc/MC_aespa_speech_45.mat'); x=x'; x=x(:,1:128); x=x(1:10000,:);
0211     %[p,x]=nt_read_data('/data/meg/arzounian/ADC_DA_140521_p20/ADC_DA_140521_p20_01_calib'); x=x'; x=x(1:10000,:);
0212     
0213     x=nt_demean(x);
0214     [x,w]=nt_detrend(x,10);   
0215     profile on; y=nt_inpaint(x,w); profile report;
0216     figure(1); clf
0217     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0218     figure(2); clf
0219     ch=35;subplot 311; plot([x(:,ch),y(:,ch)]); subplot 312; plot(x(:,ch)-y(:,ch)); subplot 313; plot(w(:,ch), '.-');
0220 end
0221 
0222

Generated on Tue 09-May-2017 13:19:57 by m2html © 2005