0001 function y=nt_inpaint(x,w)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 nt_greetings;
0017
0018
0019 PCA_THRESH=10^-25;
0020 minProp=0.5;
0021
0022
0023 We estimate the weighted covariance matrix of the data, then project
0024 invalid (weight=0) samples on the subspace spanned by the valid (weight=1)
0025 samples.
0026
0027 The main obstacle is computational, as there are a very large number of
0028 configurations of valid / invalid channels to cater to.
0029
0030
0031
0032 if nargin<2; error('!'); end
0033
0034 if any(size(w) ~= size(x)); error('w must be of same size as x'); end
0035 if any(w ~= 0 & w ~= 1); error('weights must be 0 or 1'); end
0036 if any(0 == sum(w,2)); warning('for some samples all channels have zero weight'); end
0037 x=nt_unfold(x);
0038 w=nt_unfold(w);
0039
0040 y=x;
0041
0042 for iChan=1:size(x,2)
0043 if 0; fprintf('Fixing channel %d \n',iChan); end
0044
0045
0046 badSamples=~w(:,iChan);
0047 if all(~badSamples); continue; end
0048 minDur=sum(w(:,iChan))*minProp;
0049
0050
0051 iOtherChannels=setdiff(1:size(x,2),iChan);
0052 iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);
0053 nGoodPerChannel=sum(w(iOthersAllGood,:));
0054 while sum(iOthersAllGood) < minDur
0055 [~, iWorst] = min(nGoodPerChannel(iOtherChannels));
0056 iOtherChannels(iWorst)=[];
0057 iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);
0058 nGoodPerChannel=sum(w(iOthersAllGood,:));
0059 end
0060 x=nt_demean(x,iOthersAllGood);
0061 mn=y-x;
0062 cc=nt_cov(x(iOthersAllGood,:));
0063
0064
0065 wBad=w(badSamples,:);
0066 xBad=x(badSamples,:);
0067 yBad=y(badSamples,:);
0068
0069
0070 wBad(:,~ismember(1:size(x,2),iOtherChannels))=0;
0071 [~, wwSparse,~,iBackSparse]=sparsePatternDict(wBad);
0072 iBadPatternsSparse=find(~wwSparse(:,iChan));
0073
0074
0075 for iPattern=iBadPatternsSparse'
0076 iGood=find(wwSparse(iPattern,:) & ismember(1:size(x,2),iOtherChannels));
0077 inPattern=iBackSparse==iPattern;
0078
0079 [V,D]=eig(cc(iGood,iGood)); V=real(V); D=real(D);
0080 topcs=V(:,D/max(D) > PCA_THRESH);
0081 b=cc(iChan,iGood)*topcs/(topcs'*cc(iGood,iGood)*topcs);
0082 xx=(xBad(inPattern,iGood));
0083 yBad(inPattern,iChan)=xx*(topcs*b')+mn(inPattern,iChan);
0084 end
0085
0086
0087 y(badSamples,:)=yBad;
0088
0089 end
0090
0091 if ~nargout
0092 figure(1); clf
0093 subplot 211;
0094 plot(x)
0095 subplot 212;
0096 plot(y)
0097 clear y w;
0098 end
0099
0100
0101 function [w, ww,nOccurrences,iBack]=sparsePatternDict(w)
0102
0103
0104 ratio1=1/100;
0105 ratio2=1/100;
0106 minThresh=size(w,2)/3;
0107
0108
0109 [ww, nOccurrences, iBack]=patternDict(w);
0110 iSimPatterns=1:size(ww,1);
0111 maxIterNum=size(ww,1)*ratio1;
0112 minEff=size(ww,1)*ratio2;
0113 iIter=1;
0114
0115
0116 while iIter<=maxIterNum && numel(iSimPatterns)> minEff
0117
0118
0119
0120 sim=ww*ww';
0121 sim_noDiag=reshape(sim(~logical(diag(ones(size(ww,1),1)))),size(ww,1)-1,size(ww,1));
0122
0123
0124
0125 weight=(iIter-1)/maxIterNum;
0126 nThresh=max(minThresh,...
0127 (mean((1+weight)*sim_noDiag(:))+(1-weight)*max(sim_noDiag(:)))/2);
0128
0129
0130 [~, iPat]=max(sum(sim>=nThresh));
0131
0132
0133 iSimPatterns=find(sim(iPat,:)>=nThresh);
0134
0135
0136 inPat=ismember(iBack, iSimPatterns);
0137 w(inPat,:)=repmat(all(ww(iSimPatterns,:),1),sum(inPat),1);
0138
0139
0140 [ww,nOccurrences,iBack]=patternDict(w);
0141 if 0; fprintf('%dth iteration, nDiffThresh=%g, %d patterns regrouped, %d patterns in dictionary\n',...
0142 iIter, nThresh, numel(iSimPatterns), size(ww,1)); end
0143
0144
0145 maxIterNum=min(maxIterNum,size(ww,1)*ratio1+iIter);
0146 minEff=size(ww,1)*ratio2;
0147 iIter=iIter+1;
0148
0149 end
0150
0151
0152 function [ww,nOccurrences,iBack]=patternDict(w)
0153
0154
0155
0156 [ww,~,IC,nOccurrences]=nt_unique(w,'rows');
0157 [nOccurrences,iSort]=sort(nOccurrences, 'descend');
0158 [~,iReverse]=sort(iSort);
0159 ww=ww(iSort,:);
0160 iBack=iReverse(IC);
0161
0162
0163 if 0
0164 x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0165 x=x0*randn(10,20)+1;
0166 x(1:4000,1)=x(1:4000,1)+0.3*randn(size(x(1:4000,1)));
0167 w=ones(size(x));
0168 w(1:4000,1)=0; w(4001:6000,3)=0; w(6000:7000,4)=0; w(7000:8000,5)=0; w(8000:9000,6)=0; w(9000:10000,7)=0;
0169
0170
0171 y=nt_inpaint(x,w);
0172 figure(2); clf
0173 subplot 211; plot([x(:,1),y(:,1)]); legend('raw','clean'); title('one channel'); subplot 212; plot(x(:,1)-y(:,1)); title('raw-clean');
0174 end
0175 if 0
0176 x=randn(10000,10);
0177 w=ones(size(x));
0178 y=nt_inpaint(x,w);
0179 figure(1); clf
0180 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0181 end
0182 if 0
0183 x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0184 x=x0*randn(10,20)+1;
0185 w=ones(size(x));
0186 x(1:1000,1:2)=100; w(1:1000,1:2)=0;
0187 y=nt_inpaint(x,w);
0188 figure(1); clf
0189 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0190 end
0191 if 0
0192 N=30;
0193 nchans=50;
0194 x=zeros(1100,N);
0195 for k=1:N
0196 x(:,k)=sin(2*pi*k*(1:1100)'/1100);
0197 end
0198 x=x*randn(N,50);
0199 xx=x;
0200 w=ones(size(x));
0201 for k=1:nchans
0202 xx(k*20+(1:20),k)=100;
0203 w(k*20+(1:20),k)=0;
0204 end
0205 [y]=nt_inpaint(xx,w);
0206 figure(1); clf;
0207 subplot 311; plot(x); subplot 312; plot(xx); subplot 313; plot(y);
0208 end
0209 if 0
0210 [p,x]=nt_read_data('/data/meg/theoldmanandthesea/eeg/mc/MC_aespa_speech_45.mat'); x=x'; x=x(:,1:128); x=x(1:10000,:);
0211
0212
0213 x=nt_demean(x);
0214 [x,w]=nt_detrend(x,10);
0215 profile on; y=nt_inpaint(x,w); profile report;
0216 figure(1); clf
0217 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0218 figure(2); clf
0219 ch=35;subplot 311; plot([x(:,ch),y(:,ch)]); subplot 312; plot(x(:,ch)-y(:,ch)); subplot 313; plot(w(:,ch), '.-');
0220 end
0221
0222