Home > NoiseTools > nt_pca_big.m

nt_pca_big

PURPOSE ^

[topcs,pwr,y]=nt_pca_big(x,N,normflag) - approximate PCA of big data

SYNOPSIS ^

function [topcs,pwr,y]=nt_pca_big(x,N,aggregate,normflag)

DESCRIPTION ^

[topcs,pwr,y]=nt_pca_big(x,N,normflag) - approximate PCA of big data

  r: matrix to project to PCs
  pwr: power per PC
  y: PCs

  x: data
  N: chunk size [default: 100]
  aggregate: 'local' [default], 'global', or 'globmean'
  normflag: normalize at each level [default: don't]

 Data are processed recursively in chunks of N columns.  Each chunk is
 submitted to PCA and either N/2 PCs are retained ('local') or 1 PC is
 retained ('global').  If 'globmean' the data are averaged over chunks 
 of nchans/N channels, then PCA

 NoiseTools

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [topcs,pwr,y]=nt_pca_big(x,N,aggregate,normflag)
0002 %[topcs,pwr,y]=nt_pca_big(x,N,normflag) - approximate PCA of big data
0003 %
0004 %  r: matrix to project to PCs
0005 %  pwr: power per PC
0006 %  y: PCs
0007 %
0008 %  x: data
0009 %  N: chunk size [default: 100]
0010 %  aggregate: 'local' [default], 'global', or 'globmean'
0011 %  normflag: normalize at each level [default: don't]
0012 %
0013 % Data are processed recursively in chunks of N columns.  Each chunk is
0014 % submitted to PCA and either N/2 PCs are retained ('local') or 1 PC is
0015 % retained ('global').  If 'globmean' the data are averaged over chunks
0016 % of nchans/N channels, then PCA
0017 %
0018 % NoiseTools
0019 nt_greetings();
0020 
0021 if nargin<1; error('!'); end
0022 if nargin<2||isempty(N); 
0023     N=100;
0024     disp('defaulting to N=100');
0025 end
0026 if nargin<3||isempty(aggregate); aggregate='local'; end
0027 if nargin<4||isempty(normflag); normflag=0; end
0028 
0029 [nsamples,nchans,ntrials]=size(x); 
0030 x=nt_unfold(x);
0031 
0032 if normflag;
0033     tonorm=1./sqrt(mean(x.^2)); 
0034     tonorm(find(isnan(tonorm)))=0;
0035     x=bsxfun(@times,x,tonorm);
0036 end
0037 if ~ (strcmp(aggregate,'local') || strcmp(aggregate,'global') || strcmp(aggregate,'globmean'))
0038     error('!');
0039 end
0040 
0041 
0042 
0043 if nchans<=N; 
0044     [topcs,pwr]=nt_pca0(x);
0045 else
0046     if strcmp(aggregate,'local')
0047         topcs=zeros(nchans,N);
0048         a=round(nchans/2);
0049         NN=round(N/2);
0050         tp=nt_pca_big(x(:,1:a),N,aggregate,normflag);
0051         [topcs(1:a,1:NN)]=tp(:,1:NN);
0052         tp=nt_pca_big(x(:,a+1:end),N,aggregate,normflag);
0053         [topcs(a+1:end,NN+1:N)]=tp(:,1:(N-NN));
0054         [topcs2,pwr,z]=nt_pca_big(x*topcs,N,aggregate,normflag);
0055         topcs=topcs*topcs2;
0056     elseif strcmp(aggregate,'global') 
0057         nChunks=ceil(nchans/(N));
0058         nKeep=ceil(N/nChunks);
0059         topcs=zeros(nchans,nChunks*nKeep);
0060         for iChunk=1:nChunks
0061             a=(iChunk-1)*N;
0062             idx=(a+1):min(a+N,nchans);
0063             tp=nt_pca_big(x(:,idx),N,aggregate,normflag);
0064             topcs(idx,(iChunk-1)*nKeep+(1:nKeep))=tp(:,1:nKeep);
0065         end
0066         %figure(1); clf; nt_imagescc(topcs); pause
0067         [topcs2,pwr,z]=nt_pca_big(x*topcs,N,aggregate,normflag);
0068         topcs=topcs*topcs2;
0069     else % globmean
0070         nChunks=ceil(nchans/N);
0071         topcs=zeros(nchans,N);
0072         for iChunk=1:nChunks
0073             a=(iChunk-1)*N;
0074             idx=(a+1):min(a+N,nchans);
0075             topcs(idx,iChunk)=1/numel(idx);
0076             %disp(idx)
0077         end
0078         %figure(1); clf; nt_imagescc(topcs); pause
0079         %[topcs2,pwr,z]=nt_pca_big(x*topcs,N,'local',normflag);
0080         [topcs2,pwr,z]=nt_pca0(x*topcs);
0081         topcs=topcs*topcs2;
0082     end
0083         
0084 end
0085 if ~strcmp(aggregate,'globmean') 
0086     if size(topcs,2)>=N; 
0087         topcs=topcs(:,1:N); % truncate to save space
0088         pwr=pwr(1:N);
0089     else
0090         topcs(:,end:N)=0; % should rarely happen
0091         pwr(end:N)=0;
0092     end
0093 end
0094 
0095 %disp([size(topcs)])
0096 
0097 if normflag; topcs=bsxfun(@times,topcs,tonorm'); end
0098 
0099 if nargout>1; y=x*topcs; y=nt_fold(y,nsamples);end
0100 
0101 if ~nargout % don't return, just plot something
0102     y=nt_unfold(y);
0103     semilogy(mean(y.^2), '.-');
0104     xlabel('pc'); ylabel('power');
0105     figure(2); clf
0106     plot(y); xlabel('sample');
0107     figure(3); clf
0108     nt_imagescc(nt_normcol(x'*y)'); 
0109     ylabel('pc'); xlabel('channel');
0110     clear topcs y
0111 end
0112 
0113 
0114 
0115 
0116 
0117 
0118 %% test code
0119 if 0
0120     % basic
0121     x=randn(10000,3000);
0122     N=20; % blocksize
0123     [topcs,pwr,y]=nt_pca_big(x,N,'local');
0124     figure(1); clf; plot(pwr);
0125     [topcs,pwr,y]=nt_pca_big(x,N,'global');
0126     figure(2); clf; plot(pwr);
0127     [topcs,pwr,y]=nt_pca_big(x,N,'globmean');
0128     figure(3); clf; plot(pwr);
0129 end
0130 if 0 
0131     x=sin(2*pi*3*(1:1000)'/1000)*randn(1,1000);
0132     x=2*x + randn(size(x));
0133     N=30; 
0134     [topcs,pwr,y]=nt_pca_big(x,N);
0135     figure(1); plot(pwr);
0136     figure(2); subplot 121; plot(x); subplot 122; plot(x*topcs);
0137 end   
0138 if 0 
0139     nchans=100000;
0140     nnoise=900;
0141     x=repmat(sin(2*pi*3*(1:1000)'/1000), 1, nchans);
0142     x= 0.06*nt_normcol(x) + nt_normcol(randn(size(x,1),nnoise)*randn(nnoise,nchans));
0143     N=200;
0144     [topcs,pwr,y]=nt_pca_big(x,N,'global');
0145     figure(1); plot(pwr);
0146     figure(2); subplot 121; plot(x(:,1:100)); subplot 122; plot(y(:,1));
0147 end   
0148 if 0 
0149     nchans=100000;
0150     nnoise=900;
0151     x=repmat(sin(2*pi*3*(1:1000)'/1000), 1, nchans);
0152     x= 0.01*nt_normcol(x) + nt_normcol(randn(size(x,1),nnoise)*randn(nnoise,nchans));
0153     [topcs,pwr,y]=nt_pca_big(x,100,'globmean');
0154     figure(1); plot(pwr);
0155     figure(2); subplot 121; plot(x(:,1:100)); subplot 122; plot(y(:,1));
0156 end   
0157

Generated on Tue 09-May-2017 13:19:57 by m2html © 2005