Home > NoiseTools > nt_cca.m

nt_cca

PURPOSE ^

[A,B,R]=nt_cca(x,y,lags,C,m,thresh) - canonical correlation

SYNOPSIS ^

function [A,B,R]=nt_cca(x,y,lags,C,m,thresh)

DESCRIPTION ^

[A,B,R]=nt_cca(x,y,lags,C,m,thresh) - canonical correlation

  A, B: transform matrices
  R: r scores

  x,y: column matrices
  lags: positive lag means y delayed relative to x
  C: covariance matrix of [x, y]
  m: number of columns of x
  thresh: discard PCs below this 

  Usage 1:
   [A,B,R]=nt_cca(x,y); % CCA of x, y

  Usage 2: 
   [A,B,R]=nt_cca(x,y,lags); % CCA of x, y for each value of lags.
   A positive lag indicates that y is delayed relative to x.

  Usage 3:
   C=[x,y]'*[x,y]; % covariance
   [A,B,R]=nt_cca([],[],[],C,size(x,2)); % CCA of x,y

 Use the third form to handle multiple files or large data
 (covariance C can be calculated chunk-by-chunk). 

 C can be 3-D, which case CCA is derived independently from each page.

 Warning: means of x and y are NOT removed.
 Warning: A, B scaled so that (x*A)^2 and (y*B)^2 are identity matrices (differs from canoncorr).

 See nt_cov_lags, nt_relshift, nt_cov, nt_pca.

 NoiseTools.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [A,B,R]=nt_cca(x,y,lags,C,m,thresh)
0002 %[A,B,R]=nt_cca(x,y,lags,C,m,thresh) - canonical correlation
0003 %
0004 %  A, B: transform matrices
0005 %  R: r scores
0006 %
0007 %  x,y: column matrices
0008 %  lags: positive lag means y delayed relative to x
0009 %  C: covariance matrix of [x, y]
0010 %  m: number of columns of x
0011 %  thresh: discard PCs below this
0012 %
0013 %  Usage 1:
0014 %   [A,B,R]=nt_cca(x,y); % CCA of x, y
0015 %
0016 %  Usage 2:
0017 %   [A,B,R]=nt_cca(x,y,lags); % CCA of x, y for each value of lags.
0018 %   A positive lag indicates that y is delayed relative to x.
0019 %
0020 %  Usage 3:
0021 %   C=[x,y]'*[x,y]; % covariance
0022 %   [A,B,R]=nt_cca([],[],[],C,size(x,2)); % CCA of x,y
0023 %
0024 % Use the third form to handle multiple files or large data
0025 % (covariance C can be calculated chunk-by-chunk).
0026 %
0027 % C can be 3-D, which case CCA is derived independently from each page.
0028 %
0029 % Warning: means of x and y are NOT removed.
0030 % Warning: A, B scaled so that (x*A)^2 and (y*B)^2 are identity matrices (differs from canoncorr).
0031 %
0032 % See nt_cov_lags, nt_relshift, nt_cov, nt_pca.
0033 %
0034 % NoiseTools.
0035 
0036 nt_greetings; 
0037 
0038 if ~exist('thresh','var');
0039     thresh=10.^-12; 
0040 end
0041 
0042 if exist('x','var') && ~isempty(x)
0043     % Calculate covariance of [x,y]
0044     if ~exist('y','var'); error('!'); end
0045     if ~exist('lags','var')||isempty('lags'); lags=[0]; end
0046     if numel(lags)==1 && lags==0 && isnumeric(x) && ndims(x)==2; 
0047         C=[x,y]'*[x,y]; % simple case
0048         m=size(x,2); 
0049     else
0050         [C,~,m]=nt_cov_lags(x,y,lags); % lags, multiple trials, etc.
0051     end
0052     [A,B,R]=nt_cca([],[],[],C,m,thresh);
0053     
0054     if nargout==0 
0055         % plot something nice
0056         if length(lags)>1;
0057             figure(1); clf;
0058             plot(R'); title('correlation for each CC'); xlabel('lag'); ylabel('correlation');
0059         end
0060      end
0061     return
0062 end % else keep going
0063 
0064 if ~exist('C','var') || isempty(C) ; error('!'); end
0065 if ~exist('m','var'); error('!'); end
0066 if size(C,1)~=size(C,2); error('!'); end
0067 if ~isempty(x) || ~isempty(y) || ~isempty(lags)  ; error('!'); end
0068 if ndims(C)>3; error('!'); end
0069 
0070 if ndims(C) == 3
0071     % covariance is 3D: do a separate CCA for each page
0072     N=min(m,size(C,1)-m); % note that for some pages there may be fewer than N CCs
0073     A=zeros(m,N,size(C,3));
0074     B=zeros(size(C,1)-m,N,size(C,3));
0075     R=zeros(N,size(C,3));
0076     for k=1:size(C,3);
0077         [AA,BB,RR]=nt_cca([],[],[],C(:,:,k),m);
0078         A(1:size(AA,1),1:size(AA,2),k)=AA;
0079         B(1:size(BB,1),1:size(BB,2),k)=BB;
0080         R(1:size(RR,2),k)=RR;
0081     end
0082     return;
0083 end % else keep going
0084 
0085 
0086 %%
0087 % Calculate CCA given C=[x,y]'*[x,y] and m=size(x,2);
0088 
0089 % sphere x
0090 Cx=C(1:m,1:m);
0091 [V, S] = eig(Cx) ;  
0092 V=real(V); S=real(S);
0093 [E, idx] = sort(diag(S)', 'descend') ;
0094 keep=find(E/max(E)>thresh);
0095 topcs = V(:,idx(keep));
0096 E = E (keep);
0097 EXP=1-10^-12; 
0098 E=E.^EXP; % break symmetry when x and y perfectly correlated (otherwise cols of x*A and y*B are not orthogonal)
0099 A1=topcs*diag(sqrt((1./E)));
0100 
0101 % sphere y
0102 Cy=C(m+1:end,m+1:end);
0103 [V, S] = eig(Cy) ;  
0104 V=real(V); S=real(S);
0105 [E, idx] = sort(diag(S)', 'descend') ;
0106 keep=find(E/max(E)>thresh);
0107 topcs = V(:,idx(keep));
0108 E = E (keep);
0109 E=E.^EXP; %
0110 A2=topcs*diag(sqrt((1./E)));
0111 
0112 % apply sphering matrices to C
0113 AA=zeros( size(A1,1)+size(A2,1), size(A1,2)+size(A2,2) );
0114 AA( 1:size(A1,1), 1:size(A1,2) )=A1;
0115 AA( size(A1,1)+1:end, size(A1,2)+1:end )=A2;
0116 C= AA' * C * AA;
0117 
0118 N=min(size(A1,2),size(A2,2)); % number of canonical components
0119 
0120 % PCA
0121 [V, S] = eig(C) ;
0122 %[V, S] = eigs(C,N) ; % not faster
0123 V=real(V); S=real(S);
0124 [E, idx] = sort(diag(S)', 'descend') ;
0125 topcs = V(:,idx);
0126 
0127 A=A1*topcs(1:size(A1,2),1:N)*sqrt(2);  % why sqrt(2)?...
0128 B=A2*topcs(size(A1,2)+1:end,1:N)*sqrt(2);
0129 R=E(1:N)-1; 
0130 
0131 
0132 %{
0133 Why does it work?
0134 If x and y were uncorrelated, eigenvalues E would be all ones. 
0135 Correlated dimensions (the canonical correlates) should give values E>1, 
0136 i.e. they should map to the first PCs. 
0137 To obtain CCs we just select the first N PCs. 
0138 %}
0139 
0140 %%
0141 
0142 %%
0143 % test code
0144 if 0
0145     % basic
0146     clear
0147     x=randn(10000,20);
0148     y=randn(10000,8);
0149     y(:,1:2)=x(:,1:2); % perfectly correlated
0150     y(:,3:4)=x(:,3:4)+randn(10000,2); % 1/2 correlated
0151     y(:,5:6)=x(:,5:6)+randn(10000,2)*3; % 1/4 correlated
0152     y(:,7:8)=randn(10000,2); % uncorrelated
0153     [A,B,R]=nt_cca(x,y);
0154     figure(1); clf
0155     subplot 321; imagesc(A); title('A');
0156     subplot 322; imagesc(B); title('B');
0157     subplot 323; plot(R, '.-'); title('R')
0158     subplot 324; imagescc((x*A)'*(x*A)); title ('covariance of x*A');
0159     subplot 325; imagescc((y*B)'*(y*B)); title ('covariance of y*B');
0160     subplot 326; imagescc([x*A,y*B]'*[x*A,y*B]); title ('covariance of [x*A,y*B]');
0161 end
0162 
0163 if 0 
0164     % compare with canoncorr
0165     clear
0166     x=randn(1000,11);
0167     y=randn(1000,9);
0168     x=x-repmat(mean(x),size(x,1),1); % center, otherwise result may differ slightly from canoncorr
0169     y=y-repmat(mean(y),size(y,1),1);
0170     [A1,B1,R1]=canoncorr(x,y);
0171     [A2,B2,R2]=nt_cca(x,y);   
0172     A2=A2*sqrt(size(x,1)); % scale like canoncorr
0173     B2=B2*sqrt(size(y,1));
0174     figure(1); clf; 
0175     subplot 211; 
0176     plot([R1' R2']); title('R'); legend({'canoncorr', 'nt_cca'}, 'Interpreter','none'); 
0177     if mean(A1(:,1).*A2(:,1))<0; A2=-A2; end
0178     subplot 212; 
0179     plot(([x*A1(:,1),x*A2(:,1)])); title('first component'); legend({'canoncorr', 'nt_cca'}, 'Interpreter','none'); 
0180     figure(2); clf;set(gcf,'defaulttextinterpreter','none')
0181     subplot 121; 
0182     imagescc([x*A1,y*B1]'*[x*A1,y*B1]); title('canoncorr'); 
0183     subplot 122; 
0184     imagescc([x*A2,y*B2]'*[x*A2,y*B2]); title('nt_cca');
0185 end
0186 
0187 if 0
0188     % time
0189     x=randn(100000,100); 
0190     tic; 
0191     [A,B,R]=nt_cca(x,x); 
0192     disp('nt_cca time: ');
0193     toc    
0194     [A,B,R]=canoncorr(x,x); 
0195     disp('canoncorr time: ');
0196     toc
0197 %     [A,B,R]=cca(x,x);
0198 %     disp('cca time: ');
0199 %     toc
0200 end
0201 
0202 if 0
0203     % lags
0204     x=randn(1000,10);
0205     y=randn(1000,10);
0206     y(:,1:3)=x(:,1:3);
0207     lags=-10:10;
0208     [A1,B1,R1]=nt_cca(x,y,lags);
0209     figure(1); clf
0210     plot(lags,R1'); xlabel('lag'); ylabel('R');
0211 end
0212 
0213 if 0
0214     % what happens if x & y perfectly correlated?
0215     x=randn(1000,10);
0216     y=randn(1000,10); y=x(:,randperm(10)); %+0.000001*y;
0217     [A1,B1,R1]=nt_cca(x,y);
0218     figure(1); clf
0219     imagescc([x*A1,y*B1]'*[x*A1,y*B1]);
0220 end    
0221 
0222 if 0
0223     % x and y are cell arrays
0224     x=randn(1000,10); 
0225     y=randn(1000,10);
0226     xx={x,x,x};  yy={x,y,y};
0227     [A,B,R]=nt_cca(xx,yy);
0228     disp('seems to work...');
0229 end
0230 
0231     
0232

Generated on Sun 11-Dec-2016 18:36:17 by m2html © 2005