0001 function [y,yy]=nt_inpaint(x,w)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 nt_greetings;
0013
0014 PCA_THRESH=10^-15;
0015 NSMOOTH=0;
0016 thresh=0.5;
0017 nClosest=min(20,size(x,2)-1);
0018
0019
0020 if nargin<1; error('!'); end
0021 if nargin<2||isempty(w); w=ones(size(x)); end
0022 if ndims(x)>2; error('!'); end
0023 if ~all(size(x)==size(w)); error('!'); end
0024 [nsamples,nchan]=size(x);
0025
0026
0027 We have multichannel data x and a multichannel weighting function w (0 or
0028 1). There are many configurations of valid/invalid channels to consider.
0029 List them.
0030
0031
0032 [ww,nOccurrences,iBack]=patternDict(w);
0033 nPatterns=size(ww,1);
0034 disp(size(ww));
0035
0036
0037 Now we have a list of all the different weight patterns: ww. The
0038 vector iBack indicates which data samples fit each pattern: w = ww(iBack,:).
0039
0040
0041
0042 Find which channels are 'neighbors' in terms of covariance.
0043
0044
0045
0046 x0=x;
0047 [x,save_mean]=nt_demean(x,w);
0048 [x,save_amp]=nt_normcol(x,w);
0049 xx=x.*w;
0050 c=(xx'*xx) ./ (w'*w); clear xx;
0051 c=abs(c);
0052 sims=c+10*eye(size(c));
0053
0054 y=x;
0055
0056
0057 We now have a matrix indicating proximity between channels.
0058
0059
0060
0061 For each channel, we calculate the projection matrix on the the subspace spanned
0062 by other *valid* channels. There are as many projection matrices as patterns
0063 of valid/invalid channels. Each projection matrix is estimated on data samples for
0064 which iChan is valid, and can be used to reconstruct data samples for which it is
0065 invalid.
0066
0067
0068 for iChan=1:nchan
0069
0070
0071 We want to avoid having to consider all patterns of valid/unvalid
0072 other channels. For that we'll group patterns.
0073 First we order the other channels by decreasing similarity, putting
0074 invalid samples last. This needs to be done for each pattern.
0075
0076
0077 sim=sims(iChan,:);
0078 sim=repmat(sim,nPatterns,1);
0079 sim((~ww))=0;
0080 [~,closest]=sort(abs(sim),2,'descend');
0081 for k=1:size(closest,1);
0082 closest(k,find(sim(k,closest(k,:))==0))=0;
0083 end
0084 for k=1:size(closest,1);
0085 if closest(k,1)==iChan;
0086 closest(k,1:size(closest,2)-1) = closest(k,2:end);
0087 else
0088
0089 end
0090 end
0091 closest=closest(:,1:end-1);
0092
0093
0094 We now have, for each pattern, a list of channels closest to iChan.
0095 There are a lot of different patterns, so we merge those for which the nClosest
0096 channels are the same.
0097
0098
0099
0100 [C,IA,IC]=unique(closest(:,1:nClosest),'rows');
0101 iBack2=IC(iBack);
0102
0103
0104 We now have a smaller array C of reduced patterns. The
0105 vector iBack2 indicates which data samples correspond to each pattern.
0106
0107
0108
0109 For some patterns, iChan is valid throughout. We can skip these.
0110 For others, only a few samples are invalid. To save time w can skip these
0111 too and fix them later using serial interpolation.
0112
0113
0114 toFix=[];
0115 NSKIP=2;
0116 www=ones(size(x,1),1);
0117 for iPattern=1:size(C,1)
0118
0119 mySamples=find(iBack2==iPattern);
0120 mySamples=mySamples(find(~w(mySamples,iChan)));
0121 if numel(mySamples)<=NSKIP
0122 www(mySamples)=0;
0123 else
0124 toFix=[toFix,iPattern];
0125 end
0126 end
0127 C=C(toFix,:);
0128
0129 for iPattern=1:size(C,1)
0130
0131
0132 Estimate matrix to project iChan on the other channels listed in this
0133 pattern.
0134
0135
0136 oChan=C(iPattern,:);
0137 oChan(find(oChan==0))=[];
0138
0139
0140 mySamples=find(iBack2==toFix(iPattern));
0141 mySamples=mySamples(find(~w(mySamples,iChan)));
0142
0143
0144
0145 iBothValid=all(w(:,[iChan,oChan]),2);
0146 xxx=x(iBothValid, [iChan,oChan]);
0147
0148
0149
0150
0151
0152 if isempty(xxx);
0153 disp([iChan, iPattern]); disp('empty');
0154 continue;
0155 end
0156
0157
0158 mn=mean(xxx,1);
0159 xxx=nt_demean(xxx);
0160 ccc=xxx'*xxx;
0161
0162
0163 [topcs,eigenvalues]=nt_pcarot(ccc(2:end,2:end));
0164 idx=find(eigenvalues/max(eigenvalues) > PCA_THRESH);
0165 topcs=topcs(:,idx);
0166
0167
0168 b=ccc(1,2:end)*topcs / (topcs'*ccc(2:end,2:end)*topcs);
0169
0170
0171 We now have the projection matrix to project channel iChan on channels oChan,
0172 applicable to samples corresponding to this pattern. We can use it
0173 to fix samples for which iChan is invalid.
0174
0175
0176 y(mySamples,iChan) = ...
0177 (x(mySamples,oChan) - repmat(mn(2:end),numel(mySamples),1))...
0178 *(topcs*b') ...
0179 + mn(1);
0180
0181 end
0182
0183
0184 Now we fix the isolated samples that we skipped using serial interpolation.
0185
0186 MAXGAPSIZE=100;
0187 y(:,iChan)=fillgap(y(:,iChan),www,MAXGAPSIZE);
0188
0189 end
0190
0191
0192 y=bsxfun(@times,y,save_amp);
0193 y=bsxfun(@plus,y,save_mean);
0194
0195 yy=y;
0196 y(w~=0) = x0(w~=0);
0197
0198 if ~nargout
0199
0200 disp(nt_wpwr(y)/nt_wpwr(x));
0201 figure(11); clf;
0202 subplot 311; plot(x0); title('raw');
0203 subplot 312; plot(y); title('projected on other channels')
0204 subplot 313; plot(w); ylim([-.1 1.1]); title('weight');
0205 clear w y
0206 end
0207
0208 function [y,w]=fillgap(x,w,maxGapSize)
0209
0210
0211
0212
0213
0214
0215
0216 if nargin<2; error('!'); end
0217 if nargin<3||isempty(maxGapSize); maxGapSize=1; end
0218 if size(x,2)>1; error('!'); end
0219 if size(x) ~= size(w); error('!'); end
0220 y=x;
0221 if all(w); return; end
0222
0223 iToFix=1+find(~w(2:end-1)&w(1:end-2)&w(3:end));
0224 y(iToFix)=(y(iToFix-1)+y(iToFix+1))/2;
0225 w(iToFix)=1;
0226
0227 iStart=find(w(1:end-2) & ~w(2:end-1));
0228 iStop=find(~w(1:end-1) & w(2:end));
0229 if isempty(iStart)||isempty(iStop); return; end
0230 if iStop(1)<iStart(1);
0231 iStop=iStop(2:end);
0232 end
0233 iStart=iStart(1:numel(iStop));
0234 for gapSize=2:maxGapSize
0235 idx=find(iStop-iStart==gapSize);
0236 for k=1:gapSize
0237
0238 y(iStart(idx)+k) = ( y(iStart(idx)) * (gapSize-k+1) + y(iStart(idx)+gapSize+1) * k ) / (gapSize+1);
0239 w(iStart(idx)+k) = 1;
0240 end
0241 end
0242
0243
0244
0245 function [ww,nOccurrences,iBack]=patternDict(w)
0246
0247
0248
0249 [ww,~,IC,nOccurrences]=nt_unique(w,'rows');
0250 [nOccurrences,iSort]=sort(nOccurrences, 'descend');
0251 [~,iReverse]=sort(iSort);
0252 ww=ww(iSort,:);
0253 iBack=iReverse(IC);
0254
0255
0256 if 0
0257 x0=sin(2*pi*(1:10000)'*(1:5)/10000);
0258 x=x0*randn(5,10)+1;
0259 x(1:4000,1)=x(1:4000,1)+0.3*randn(size(x(1:4000,1)));
0260 w=ones(size(x));
0261 w(1:4000,1)=0; w(4001:6000,3)=0;
0262
0263
0264 y=nt_inpaint(x,w);
0265 figure(2); clf
0266 subplot 211; plot([x(:,1),y(:,1)]); legend('raw','clean'); title('one channel'); subplot 212; plot(x(:,1)-y(:,1)); title('raw-clean');
0267 end
0268 if 0
0269 x0=sin(2*pi*(1:10000)'*(1:3)/10000);
0270 x=x0*nt_normcol(randn(3,5));
0271 w=ones(size(x));
0272 x(1:1000,1)=100; w(1:1000,1)=0;
0273 x(2001:3000,1)=100; w(2001:3000,1)=0;
0274 x(1:2000,2)=100; w(1:2000,2)=0;
0275 x=x+0.1*randn(size(x));
0276 [y,yy]=nt_inpaint(x,w);
0277 figure(1); clf
0278 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0279 end
0280 if 0
0281 N=3;
0282 nchans=50;
0283 x=zeros(1100,N);
0284 for k=1:N
0285 x(:,k)=sin(2*pi*k*(1:1100)'/1100);
0286 end
0287 x=x*randn(N,nchans);
0288 x=nt_normcol(x) + 0*randn(size(x));
0289 xx=x;
0290 w=ones(size(x));
0291 for k=1:nchans
0292 xx(k*20+(1:10),k)=100;
0293 w(k*20+(1:10),k)=0;
0294 end
0295 [y,yy]=nt_inpaint(xx,w);
0296 figure(1); clf;
0297 subplot 411; plot(x); title('orig'); subplot 412; plot(xx); title('w glitches');
0298 subplot 413; plot(y); title('clean'); subplot 414; plot(x-y); title('diff w orig');
0299 end
0300 if 0
0301 N=10;
0302 nchans=20;
0303 nsamples=1100;
0304 x=zeros(nsamples,N);
0305 for k=1:N
0306 x(:,k)=sin(2*pi*k*(1:nsamples)'/nsamples);
0307 end
0308 x=x*randn(N,nchans);
0309
0310 xx=x;
0311 w=ones(size(x));
0312 for k=1:nchans
0313 xx(500+k*20+(1:40),k)=100;
0314 w(500+k*20+(1:40),k)=0;
0315 end
0316 [y]=nt_inpaint(xx,w);
0317 figure(1); clf;
0318 subplot 411; plot(x); title('original');
0319 subplot 412; plot(xx); title ('with glitches');
0320 subplot 413; plot (y); title ('fixed');
0321 subplot 414; plot(x-y); title ('error');
0322 end
0323 if 0
0324 [p,x]=nt_read_data('/data/meg/theoldmanandthesea/eeg/mc/MC_aespa_speech_45.mat'); x=x'; x=x(:,1:128); x=x(0+(1:10000),:);
0325
0326
0327 x=nt_demean(x);
0328 [x,w]=nt_detrend(x,10);
0329 profile on; y=nt_inpaint(x,w); profile report;
0330 figure(1); clf
0331 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0332 figure(2); clf
0333 ch=35;subplot 311; plot([x(:,ch),y(:,ch)]); subplot 312; plot(x(:,ch)-y(:,ch)); subplot 313; plot(w(:,ch), '.-');
0334 end
0335
0336