0001 function y=nt_inpaint(x,w)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 nt_greetings;
0017
0018
0019 PCA_THRESH=10^-25;
0020 minProp=0.5;
0021
0022
0023 We estimate the weighted covariance matrix of the data, then project
0024 invalid (weight=0) samples on the subspace spanned by the valid (weight=1)
0025 samples.
0026
0027 The main obstacle is computational, as there are a very large number of
0028 configurations of valid / invalid channels to cater to.
0029
0030
0031
0032 if nargin<2; error('!'); end
0033
0034 if any(size(w) ~= size(x)); error('w must be of same size as x'); end
0035 if any(w ~= 0 & w ~= 1); error('weights must be 0 or 1'); end
0036 if any(0 == sum(w,2)); warning('for some samples all channels have zero weight'); end
0037 x=nt_unfold(x);
0038 w=nt_unfold(w);
0039
0040 y=x;
0041
0042 for iChan=1:size(x,2)
0043 if 0; fprintf('Fixing channel %d \n',iChan); end
0044
0045
0046 badSamples=~w(:,iChan);
0047 if all(~badSamples); continue; end
0048 minDur=sum(w(:,iChan))*minProp;
0049
0050
0051 iOtherChannels=setdiff(1:size(x,2),iChan);
0052 iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);
0053 nGoodPerChannel=sum(w(iOthersAllGood,:));
0054 while sum(iOthersAllGood) < minDur
0055 [~, iWorst] = min(nGoodPerChannel(iOtherChannels));
0056 iOtherChannels(iWorst)=[];
0057 iOthersAllGood=all(w(:,[iChan, iOtherChannels]),2);
0058 nGoodPerChannel=sum(w(iOthersAllGood,:));
0059 end
0060 x=nt_demean(x,iOthersAllGood);
0061 mn=y-x;
0062 cc=nt_cov(x(iOthersAllGood,:));
0063
0064
0065 wBad=w(badSamples,:);
0066 xBad=x(badSamples,:);
0067 yBad=y(badSamples,:);
0068
0069
0070 wBad(:,~ismember(1:size(x,2),iOtherChannels))=0;
0071 [~, wwSparse,~,iBackSparse]=sparsePatternDict(wBad);
0072 iBadPatternsSparse=find(~wwSparse(:,iChan));
0073
0074
0075 for iPattern=iBadPatternsSparse'
0076 iGood=find(wwSparse(iPattern,:) & ismember(1:size(x,2),iOtherChannels));
0077 inPattern=iBackSparse==iPattern;
0078
0079 [V,D]=eig(cc(iGood,iGood)); V=real(V); D=real(D);
0080 topcs=V(:,D/max(D) > PCA_THRESH);
0081 b=cc(iChan,iGood)*topcs/(topcs'*cc(iGood,iGood)*topcs);
0082 xx=(xBad(inPattern,iGood));
0083 yBad(inPattern,iChan)=xx*(topcs*b')+mn(inPattern,iChan);
0084 end
0085
0086
0087 y(badSamples,:)=yBad;
0088
0089 end
0090
0091
0092 function [w, ww,nOccurrences,iBack]=sparsePatternDict(w)
0093
0094
0095 ratio1=1/100;
0096 ratio2=1/100;
0097 minThresh=size(w,2)/3;
0098
0099
0100 [ww, nOccurrences, iBack]=patternDict(w);
0101 iSimPatterns=1:size(ww,1);
0102 maxIterNum=size(ww,1)*ratio1;
0103 minEff=size(ww,1)*ratio2;
0104 iIter=1;
0105
0106
0107 while iIter<=maxIterNum && numel(iSimPatterns)> minEff
0108
0109
0110
0111 sim=ww*ww';
0112 sim_noDiag=reshape(sim(~logical(diag(ones(size(ww,1),1)))),size(ww,1)-1,size(ww,1));
0113
0114
0115
0116 weight=(iIter-1)/maxIterNum;
0117 nThresh=max(minThresh,...
0118 (mean((1+weight)*sim_noDiag(:))+(1-weight)*max(sim_noDiag(:)))/2);
0119
0120
0121 [~, iPat]=max(sum(sim>=nThresh));
0122
0123
0124 iSimPatterns=find(sim(iPat,:)>=nThresh);
0125
0126
0127 inPat=ismember(iBack, iSimPatterns);
0128 w(inPat,:)=repmat(all(ww(iSimPatterns,:),1),sum(inPat),1);
0129
0130
0131 [ww,nOccurrences,iBack]=patternDict(w);
0132 if 0; fprintf('%dth iteration, nDiffThresh=%g, %d patterns regrouped, %d patterns in dictionary\n',...
0133 iIter, nThresh, numel(iSimPatterns), size(ww,1)); end
0134
0135
0136 maxIterNum=min(maxIterNum,size(ww,1)*ratio1+iIter);
0137 minEff=size(ww,1)*ratio2;
0138 iIter=iIter+1;
0139
0140 end
0141
0142
0143 function [ww,nOccurrences,iBack]=patternDict(w)
0144
0145
0146
0147 [ww,~,IC,nOccurrences]=nt_unique(w,'rows');
0148 [nOccurrences,iSort]=sort(nOccurrences, 'descend');
0149 [~,iReverse]=sort(iSort);
0150 ww=ww(iSort,:);
0151 iBack=iReverse(IC);
0152
0153
0154 if 0
0155 x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0156 x=x0*randn(10,20)+1;
0157 x(1:4000,1)=x(1:4000,1)+0.3*randn(size(x(1:4000,1)));
0158 w=ones(size(x));
0159 w(1:4000,1)=0; w(4001:6000,3)=0; w(6000:7000,4)=0; w(7000:8000,5)=0; w(8000:9000,6)=0; w(9000:10000,7)=0;
0160
0161
0162 y=nt_inpaint(x,w);
0163 figure(2); clf
0164 subplot 211; plot([x(:,1),y(:,1)]); legend('raw','clean'); title('one channel'); subplot 212; plot(x(:,1)-y(:,1)); title('raw-clean');
0165 end
0166 if 0
0167 x=randn(10000,10);
0168 w=ones(size(x));
0169 y=nt_inpaint(x,w);
0170 figure(1); clf
0171 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0172 end
0173 if 0
0174 x0=sin(2*pi*(1:10000)'*(1:10)/10000);
0175 x=x0*randn(10,20)+1;
0176 w=ones(size(x));
0177 x(1:1000,1:2)=100; w(1:1000,1:2)=0;
0178 y=nt_inpaint(x,w);
0179 figure(1); clf
0180 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0181 end
0182 if 0
0183 [p,x]=nt_read_data('/data/meg/theoldmanandthesea/eeg/mc/MC_aespa_speech_45.mat'); x=x'; x=x(:,1:128); x=x(1:10000,:);
0184
0185
0186 x=nt_demean(x);
0187 [x,w]=nt_detrend(x,10);
0188 profile on; y=nt_inpaint(x,w); profile report;
0189 figure(1); clf
0190 subplot 311; plot(x); title('raw'); subplot 312; plot(y); title('clean'); subplot 313; plot(x-y); title('raw-clean');
0191 figure(2); clf
0192 ch=35;subplot 311; plot([x(:,ch),y(:,ch)]); subplot 312; plot(x(:,ch)-y(:,ch)); subplot 313; plot(w(:,ch), '.-');
0193 end
0194
0195