Home > NoiseTools > nt_pca_big_old2.m

nt_pca_big_old2

PURPOSE ^

[topcs,pwr,y]=nt_pca_big(x,chunksize,npcs,normflag) - approximate PCA of big data

SYNOPSIS ^

function [topcs,pwr,y]=nt_pca_big(x,chunksize,npcs,aggregate,normflag)

DESCRIPTION ^

[topcs,pwr,y]=nt_pca_big(x,chunksize,npcs,normflag) - approximate PCA of big data

  r: matrix to project to PCs
  pwr: power per PC
  y: PCs

  x: data
  chunksize: chunk size [default: 100]
  npcs: number of PCs to return [default: chunksize]
  aggregate: 'local' [default], 'global', or 'globmean'
  normflag: normalize at each level [default: don't]

 Data are processed recursively in chunks of N columns.  Each chunk is
 submitted to PCA and either N/2 PCs are retained ('local') or 1 PC is
 retained ('global').  If 'globmean' the data are averaged over chunks 
 of nchans/N channels, then PCA

 NoiseTools

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [topcs,pwr,y]=nt_pca_big(x,chunksize,npcs,aggregate,normflag)
0002 %[topcs,pwr,y]=nt_pca_big(x,chunksize,npcs,normflag) - approximate PCA of big data
0003 %
0004 %  r: matrix to project to PCs
0005 %  pwr: power per PC
0006 %  y: PCs
0007 %
0008 %  x: data
0009 %  chunksize: chunk size [default: 100]
0010 %  npcs: number of PCs to return [default: chunksize]
0011 %  aggregate: 'local' [default], 'global', or 'globmean'
0012 %  normflag: normalize at each level [default: don't]
0013 %
0014 % Data are processed recursively in chunks of N columns.  Each chunk is
0015 % submitted to PCA and either N/2 PCs are retained ('local') or 1 PC is
0016 % retained ('global').  If 'globmean' the data are averaged over chunks
0017 % of nchans/N channels, then PCA
0018 %
0019 % NoiseTools
0020 
0021 nt_greetings();
0022 
0023 %disp([size(x,2),chunksize,npcs])
0024 
0025 if nargin<1; error('!'); end
0026 if nargin<2||isempty(chunksize); 
0027     chunksize=100;
0028     disp('defaulting to chunksize=30');
0029 end
0030 if nargin<3||isempty(npcs); 
0031     npcs=chunksize;
0032     disp('defaulting to npcs=chunksize');
0033 end
0034 if nargin<4||isempty(aggregate); aggregate='local'; end
0035 if nargin<5||isempty(normflag); normflag=0; end
0036 
0037 [nsamples,nchans,ntrials]=size(x); 
0038 x=nt_unfold(x);
0039 
0040 if npcs ~= 2*round(npcs/2);
0041     npcs=2*round(npcs/2);
0042     disp(['npcs -->',num2str(npcs)]);
0043 end
0044 if normflag;
0045     tonorm=1./sqrt(mean(x.^2)); 
0046     tonorm(find(isnan(tonorm)))=0;
0047     x=bsxfun(@times,x,tonorm);
0048 end
0049 if ~ (strcmp(aggregate,'local') || strcmp(aggregate,'global') || strcmp(aggregate,'globmean'))
0050     error('!');
0051 end
0052 
0053 if nchans<=chunksize; 
0054     [topcs,pwr]=nt_pca0(x);
0055 else
0056     if strcmp(aggregate,'local')
0057         topcs=zeros(nchans,npcs);
0058         a=round(nchans/2);
0059         tp=nt_pca_big(x(:,1:a),chunksize,npcs,aggregate,normflag);
0060         topcs(1:a,1:npcs/2)=tp(:,1:npcs/2);
0061         tp=nt_pca_big(x(:,a+1:end),chunksize,npcs,aggregate,normflag);
0062         topcs(a+1:end,npcs/2+1:end)=tp(:,1:npcs/2);
0063         [topcs2,pwr,z]=nt_pca_big(x*topcs,chunksize,npcs,aggregate,normflag);
0064         topcs=topcs*topcs2;
0065     elseif strcmp(aggregate,'global') 
0066         nChunks=ceil(nchans/chunksize);
0067         nKeep=ceil(npcs/nChunks);
0068         topcs=zeros(nchans,npcs);
0069         for iChunk=1:nChunks
0070             a=(iChunk-1)*chunksize;
0071             idx=(a+1):min(a+chunksize,nchans);
0072             tp=nt_pca_big(x(:,idx),chunksize,npcs,aggregate,normflag);
0073             topcs(idx,(iChunk-1)*nKeep+(1:nKeep))=tp(:,1:nKeep);
0074         end
0075         figure(1); clf; nt_imagescc(topcs); pause
0076         [topcs2,pwr,z]=nt_pca_big(x*topcs,chunksize,npcs,aggregate,normflag);
0077         topcs=topcs*topcs2;
0078     else % globmean
0079         nChunks=ceil(nchans/(chunksize));
0080         topcs=zeros(nchans,npcs);
0081         for iChunk=1:nChunks
0082             a=(iChunk-1)*chunksize;
0083             idx=(a+1):min(a+chunksize,nchans);
0084             topcs(idx,iChunk)=1/numel(idx);
0085             %disp(idx)
0086         end
0087         %figure(1); clf; nt_imagescc(topcs);
0088         [topcs2,pwr,z]=nt_pca_big(x*topcs,chunksize,npcs,aggregate,normflag);
0089         topcs=topcs*topcs2;
0090     end
0091         
0092 end
0093 if size(topcs,2)>=npcs; 
0094     topcs=topcs(:,1:npcs); % truncate to save space
0095     pwr=pwr(1:npcs);
0096 else
0097     topcs(:,end:npcs)=0; % should rarely happen
0098     pwr(end:npcs)=0;
0099 end
0100    
0101 %disp([size(topcs)])
0102 
0103 if normflag; topcs=bsxfun(@times,topcs,tonorm'); end
0104 
0105 if nargout>1; y=x*topcs; y=nt_fold(y,nsamples);end
0106 
0107 if ~nargout % don't return, just plot something
0108     y=nt_unfold(y);
0109     semilogy(mean(y.^2), '.-');
0110     xlabel('pc'); ylabel('power');
0111     figure(2); clf
0112     plot(y); xlabel('sample');
0113     figure(3); clf
0114     nt_imagescc(nt_normcol(x'*y)'); 
0115     ylabel('pc'); xlabel('channel');
0116     clear topcs y
0117 end
0118 
0119 
0120 
0121 
0122 
0123 
0124 %% test code
0125 if 0
0126     % basic
0127     x=randn(10000,3000);
0128     N=20; % blocksize
0129     [topcs,pwr,y]=nt_pca_big(x,N,[],'local');
0130     figure(1); clf; plot(pwr);
0131     [topcs,pwr,y]=nt_pca_big(x,N,[],'global');
0132     figure(2); clf; plot(pwr);
0133     [topcs,pwr,y]=nt_pca_big(x,N,[],'globmean');
0134     figure(3); clf; plot(pwr);
0135 end
0136 if 0 
0137     x=sin(2*pi*3*(1:1000)'/1000)*randn(1,1000);
0138     x=2*x + randn(size(x));
0139     N=30; 
0140     [topcs,pwr,y]=nt_pca_big(x,N);
0141     figure(1); plot(pwr);
0142     figure(2); subplot 121; plot(x); subplot 122; plot(x*topcs);
0143 end   
0144 if 0 
0145     nchans=100000;
0146     nnoise=900;
0147     x=repmat(sin(2*pi*3*(1:1000)'/1000), 1, nchans);
0148     x= 0.06*nt_normcol(x) + nt_normcol(randn(size(x,1),nnoise)*randn(nnoise,nchans));
0149     N=200;
0150     [topcs,pwr,y]=nt_pca_big(x,N,'global');
0151     figure(1); plot(pwr);
0152     figure(2); subplot 121; plot(x(:,1:100)); subplot 122; plot(y(:,1));
0153 end   
0154 if 0 
0155     nchans=100000;
0156     nnoise=900;
0157     x=repmat(sin(2*pi*3*(1:1000)'/1000), 1, nchans);
0158     x= 0.01*nt_normcol(x) + nt_normcol(randn(size(x,1),nnoise)*randn(nnoise,nchans));
0159     [topcs,pwr,y]=nt_pca_big(x,100,'globmean');
0160     figure(1); plot(pwr);
0161     figure(2); subplot 121; plot(x(:,1:100)); subplot 122; plot(y(:,1));
0162 end   
0163

Generated on Fri 02-Dec-2016 14:27:04 by m2html © 2005