Home > NoiseTools > nt_inpaint.m

nt_inpaint

PURPOSE ^

y=nt_inpaint(x,w) - interpolate missing samples of matrix

SYNOPSIS ^

function y=nt_inpaint(x,w)

DESCRIPTION ^

y=nt_inpaint(x,w) - interpolate missing samples of matrix

  y: interpolated data

  x: data to interpolate
  w: weight matrix

 Missing samples (w==0) of a channel are replaced by the projection of
 that channel on the subspace spanned by intact channels, based on
 covariance estimates from intact data.
 
 See nt_detrend, nt_star.

 NoiseTools.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function y=nt_inpaint(x,w)
0002 %y=nt_inpaint(x,w) - interpolate missing samples of matrix
0003 %
0004 %  y: interpolated data
0005 %
0006 %  x: data to interpolate
0007 %  w: weight matrix
0008 %
0009 % Missing samples (w==0) of a channel are replaced by the projection of
0010 % that channel on the subspace spanned by intact channels, based on
0011 % covariance estimates from intact data.
0012 %
0013 % See nt_detrend, nt_star.
0014 %
0015 % NoiseTools.
0016 nt_greetings;
0017 
0018 % Flexible parameters
0019 PCA_THRESH=10^-25;
0020 minProp=0.5; % minimum proportion of data samples for covariance computation
0021 
0022 %{
0023 We estimate the weighted covariance matrix of the data, then project
0024 invalid (weight=0) samples on the subspace spanned by the valid (weight=1)
0025 samples.
0026 
0027 The main obstacle is computational, as there are a very large number of
0028 configurations of valid / invalid channels to cater to.  
0029 %}
0030 
0031 %% check parameters, check/fix sizes
0032 if nargin<2; error('!'); end
0033 
0034 if any(size(w) ~= size(x)); error('w must be of same size as x'); end
0035 if any(w ~= 0 & w ~= 1); error('weights must be 0 or 1'); end
0036 if any(0 == sum(w,2)); warning('for some samples all channels have zero weight'); end
0037 x=nt_unfold(x);
0038 w=nt_unfold(w);
0039 
0040 y=x;
0041 
0042 for iChan=1:size(x,2)
0043     if 0; fprintf('Fixing channel %d \n',iChan); end
0044     
0045     % Any artifacts ?
0046     badSamples=~w(:,iChan);
0047     if all(~badSamples); continue; end
0048     minDur=sum(w(:,iChan))*minProp;   
0049            
0050     % Find best data to compute covariance
0051     iOtherChannels=setdiff(1:size(x,2),iChan);      % channels on which to project
0052     iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);      % good samples common to all other channels
0053     nGoodPerChannel=sum(w(iOthersAllGood,:));       % number of good samples on each channel
0054     while sum(iOthersAllGood) < minDur
0055         [~, iWorst] = min(nGoodPerChannel(iOtherChannels));
0056         iOtherChannels(iWorst)=[];                  % discard channel with fewest good samples
0057         iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);
0058         nGoodPerChannel=sum(w(iOthersAllGood,:));      
0059     end    
0060     x=nt_demean(x,iOthersAllGood);
0061     mn=y-x; 
0062     cc=nt_cov(x(iOthersAllGood,:)); % covariance over good samples
0063       
0064     % Copy data to fix
0065     wBad=w(badSamples,:);
0066     xBad=x(badSamples,:);
0067     yBad=y(badSamples,:);
0068     
0069     % Get weights with sparse pattern dictionary for interporlation
0070     wBad(:,~ismember(1:size(x,2),iOtherChannels))=0; % Put weight of unusable channels to 0 to decrease pattern dictionary size.
0071     [~, wwSparse,~,iBackSparse]=sparsePatternDict(wBad);
0072     iBadPatternsSparse=find(~wwSparse(:,iChan));    
0073     
0074     % Browse patterns and fix using appropriate channels
0075     for iPattern=iBadPatternsSparse'
0076         iGood=find(wwSparse(iPattern,:) & ismember(1:size(x,2),iOtherChannels)); % indices of channels to use
0077         inPattern=iBackSparse==iPattern; % indices of samples to fix
0078         
0079         [V,D]=eig(cc(iGood,iGood)); V=real(V); D=real(D);
0080         topcs=V(:,D/max(D) > PCA_THRESH); % discard weak dims
0081         b=cc(iChan,iGood)*topcs/(topcs'*cc(iGood,iGood)*topcs); % projection matrix
0082         xx=(xBad(inPattern,iGood));
0083         yBad(inPattern,iChan)=xx*(topcs*b')+mn(inPattern,iChan); % projection
0084     end % of bad patterns loop
0085     
0086     % Inject fixed data back
0087     y(badSamples,:)=yBad;
0088         
0089 end % of channel loop
0090 
0091 % Create parcimonious pattern dictionary
0092 function [w, ww,nOccurrences,iBack]=sparsePatternDict(w)
0093 
0094 %Parameters
0095 ratio1=1/100; % maximum iteration ratio
0096 ratio2=1/100; % minimum efficiency ratio
0097 minThresh=size(w,2)/3; % minimum number of good channels to keep in each pattern
0098 
0099 % Initialize before while loop
0100 [ww, nOccurrences, iBack]=patternDict(w);
0101 iSimPatterns=1:size(ww,1);
0102 maxIterNum=size(ww,1)*ratio1; % maximum number of iterations allowed
0103 minEff=size(ww,1)*ratio2; % minimum efficiency required: number of grouped pattern / dictionary size
0104 iIter=1;
0105 
0106 % Group patterns until it has taken too much time or is not efficient enough
0107 while iIter<=maxIterNum && numel(iSimPatterns)> minEff 
0108     
0109     % Compute similarity metric
0110 %     sim=2*(ww-0.5)*2*(ww'-0.5); % number of common bad channels AND common good channels
0111     sim=ww*ww'; % number of common good channels only
0112     sim_noDiag=reshape(sim(~logical(diag(ones(size(ww,1),1)))),size(ww,1)-1,size(ww,1)); % values excluding diagonal
0113     
0114     % Compute threshold for number of common good channels across grouped
0115     % patterns
0116     weight=(iIter-1)/maxIterNum;
0117     nThresh=max(minThresh,...
0118         (mean((1+weight)*sim_noDiag(:))+(1-weight)*max(sim_noDiag(:)))/2);
0119 
0120     % Pick the pattern with the highest number of similar patterns
0121     [~, iPat]=max(sum(sim>=nThresh));  
0122  
0123     % Find most similar patterns
0124     iSimPatterns=find(sim(iPat,:)>=nThresh); % indices of similar patterns
0125     
0126     % Change all to unique pattern with common good channels only
0127     inPat=ismember(iBack, iSimPatterns);
0128     w(inPat,:)=repmat(all(ww(iSimPatterns,:),1),sum(inPat),1); 
0129     
0130     % Update pattern dictionary
0131     [ww,nOccurrences,iBack]=patternDict(w); 
0132     if 0; fprintf('%dth iteration, nDiffThresh=%g, %d patterns regrouped, %d patterns in dictionary\n',...
0133         iIter, nThresh, numel(iSimPatterns), size(ww,1)); end
0134     
0135     % Update other parameters
0136     maxIterNum=min(maxIterNum,size(ww,1)*ratio1+iIter); % maximum number of iterations allowed
0137     minEff=size(ww,1)*ratio2; % minimum efficiency required: number of grouped pattern / dictionary size
0138     iIter=iIter+1;    
0139     
0140 end % of while loop
0141 
0142 % create a dictionary of weight patterns
0143 function [ww,nOccurrences,iBack]=patternDict(w)
0144 % ww: dictionary of patterns
0145 % nOccurrences: number of times each pattern occurred
0146 % iBack: index to reconstruct input from dictionary
0147 [ww,~,IC,nOccurrences]=nt_unique(w,'rows');
0148 [nOccurrences,iSort]=sort(nOccurrences, 'descend'); % sort by decreasing number
0149 [~,iReverse]=sort(iSort); %
0150 ww=ww(iSort,:); % same order for patterns, w = ww(iReverse1(IC),:)
0151 iBack=iReverse(IC); % w = ww(iBack,:)
0152 
0153 %%% TEST %%%
0154 if 0
0155     x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0156     x=x0*randn(10,20)+1;
0157     x(1:4000,1)=x(1:4000,1)+0.3*randn(size(x(1:4000,1)));
0158     w=ones(size(x));  
0159     w(1:4000,1)=0; w(4001:6000,3)=0; w(6000:7000,4)=0; w(7000:8000,5)=0; w(8000:9000,6)=0; w(9000:10000,7)=0;
0160     %w=randn(size(w))>0;
0161     %b=nt_regw(x(:,1),x(:,2:end),w(:,1)); y=x(:,2:end)*b;
0162     y=nt_inpaint(x,w);
0163     figure(2); clf
0164     subplot 211; plot([x(:,1),y(:,1)]); legend('raw','clean'); title('one channel'); subplot 212; plot(x(:,1)-y(:,1)); title('raw-clean');
0165 end
0166 if 0
0167     x=randn(10000,10);
0168     w=ones(size(x));
0169     y=nt_inpaint(x,w);
0170     figure(1); clf
0171     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0172 end
0173 if 0
0174     x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0175     x=x0*randn(10,20)+1;
0176     w=ones(size(x));
0177     x(1:1000,1:2)=100; w(1:1000,1:2)=0;
0178     y=nt_inpaint(x,w);
0179     figure(1); clf
0180     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0181 end
0182 if 0
0183     [p,x]=nt_read_data('/data/meg/theoldmanandthesea/eeg/mc/MC_aespa_speech_45.mat'); x=x'; x=x(:,1:128); x=x(1:10000,:);
0184     %[p,x]=nt_read_data('/data/meg/arzounian/ADC_DA_140521_p20/ADC_DA_140521_p20_01_calib'); x=x'; x=x(1:10000,:);
0185     
0186     x=nt_demean(x);
0187     [x,w]=nt_detrend(x,10);   
0188     profile on; y=nt_inpaint(x,w); profile report;
0189     figure(1); clf
0190     subplot 311; plot(x); title('raw');  subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0191     figure(2); clf
0192     ch=35;subplot 311; plot([x(:,ch),y(:,ch)]); subplot 312; plot(x(:,ch)-y(:,ch)); subplot 313; plot(w(:,ch), '.-');
0193 end
0194 
0195

Generated on Mon 27-Feb-2017 15:36:07 by m2html © 2005