Home > NoiseTools > nt_split.m

nt_split

PURPOSE ^

[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals

SYNOPSIS ^

function [idx,score_vector,score]=nt_split(x,depth,thresh,guard)

DESCRIPTION ^

[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals

  idx: index at which to split
  score: proportion variance reduced
  score_vector: score as a function of split position
  

  x: data (time * channels)
  depth: recursion depth (we find on the order of 2^depth split points).
  thresh: threshold reduction of variance [default: 0]
  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
  

  This routine finds the best place to split a time series.
  

 Examples: 
   nt_split(x);            % find point of largest change
   nt_split(x.^2);         % largest change of variance
   nt_split(nt_xprod(x);   % largest change of covariance
   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
   nt_split(x,3);          % recurse 3 times (--> 7 split points)
   nt_split(x,3,10);       % same, but splits are at least 10 points from ends
   test_nt_split;          % test script

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [idx,score_vector,score]=nt_split(x,depth,thresh,guard)
0002 %[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals
0003 %
0004 %  idx: index at which to split
0005 %  score: proportion variance reduced
0006 %  score_vector: score as a function of split position
0007 %
0008 %
0009 %  x: data (time * channels)
0010 %  depth: recursion depth (we find on the order of 2^depth split points).
0011 %  thresh: threshold reduction of variance [default: 0]
0012 %  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
0013 %
0014 %
0015 %  This routine finds the best place to split a time series.
0016 %
0017 %
0018 % Examples:
0019 %   nt_split(x);            % find point of largest change
0020 %   nt_split(x.^2);         % largest change of variance
0021 %   nt_split(nt_xprod(x);   % largest change of covariance
0022 %   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
0023 %   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
0024 %   nt_split(x,3);          % recurse 3 times (--> 7 split points)
0025 %   nt_split(x,3,10);       % same, but splits are at least 10 points from ends
0026 %   test_nt_split;          % test script
0027 
0028 if nargin<2||isempty(depth); depth=1; end
0029 if nargin<3||isempty(thresh); thresh=0; end
0030 if nargin<4||isempty(guard); guard=0; end
0031 if ndims(x)>2; error('!'); end
0032 
0033 
0034 if numel(depth)>1; error('!'); end
0035 
0036 [m,n]=size(x);
0037 if m<2*guard; % don't try to split if too small
0038     idx=[]; score_vector=[]; 
0039     return; 
0040 end
0041 % if m==1; idx=1; return; end
0042 % if m==0; idx=[]; return; end
0043 
0044 x=nt_demean(x);
0045 
0046 %{
0047 For each potential split point we calculate the sum of the per-interval
0048 ssq of first and second interval. This is vectorized using cumsum.
0049 %}
0050 
0051 % to minimize memory requirements code is repeated after flipping:
0052 xxx=x;
0053 first_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2,1./(1:m)');
0054 xxx=flipud(x); 
0055 second_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2, 1./(1:m)'); %clear x
0056 score_vector=first_term+second_term(end:-1:1,:);    % sum per-interval ssqs
0057 score_vector=score_vector*diag(1./sum(xxx.^2));  % normalize each dimension
0058 score_vector=mean(score_vector,2);      % average across dimensions
0059 
0060 % find the sweet spot:
0061 %[score0,idx]=min(score_vector(guard+1:end-guard));  idx=idx+guard;
0062 [score0,idx]=min(score_vector);
0063 %disp(['depth: ',num2str(depth), ', score0: ',num2str(score0)]);
0064 if score0>1-thresh ||... % improvement is not good enough
0065         idx<guard || idx>size(x,1)-guard
0066     idx=[]; 
0067     score_vector=[];
0068 end
0069 
0070 if depth>1 && ~isempty(idx)
0071     [a,sv]=nt_split(x(1:idx,:),depth-1,thresh,guard);
0072     [b,sv]=nt_split(x(idx+1:end,:),depth-1,thresh,guard);
0073     idx=[a,idx,idx+b];
0074     idx=unique(idx);
0075 end
0076 
0077 if nargout>2 || nargout==0;
0078     % score = reduction in variance if mean removed from each segment
0079     ssq_total=sum( (x-repmat(mean(x),size(x,1),1)).^2 );
0080     idx2=unique([1,idx,m]); 
0081     ssq=zeros(1,size(x,2));
0082     for iSegment=1:numel(idx2)-1
0083         xx=x(idx2(iSegment):idx2(iSegment+1),:);
0084         ssq=ssq + sum( (xx - repmat(mean(xx),size(xx,1),1) ).^2 );
0085     end
0086     score=(mean(ssq))./mean(ssq_total);
0087 end
0088 
0089 %disp(['nt_split_nargout: ', num2str(nargout)])
0090 
0091 if nargout==0;
0092     disp(['split at ', num2str(idx)]);
0093     disp(['(%: ', num2str(100*idx/m, '  %.01f'), ')'])
0094     disp(['score: ', num2str(score,  '%.01f')]);
0095     
0096     figure(200);
0097     subplot 211
0098     plot(score_vector);
0099     subplot 212
0100     plot(x); drawnow
0101     nt_mark(idx);
0102     if numel(idx)>1; disp(['smallest interval: ', num2str(min(diff(idx)))]); end
0103     clear idx score score_vector
0104 end
0105   
0106 
0107 % TODO:
0108 % - online "bottom-up" calculation (aggregate rather than split)
0109 
0110 
0111

Generated on Mon 30-Jan-2017 18:59:11 by m2html © 2005