Home > NoiseTools > nt_split.m

nt_split

PURPOSE ^

[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals

SYNOPSIS ^

function [idx,score_vector,score]=nt_split(x,depth,thresh,guard)

DESCRIPTION ^

[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals

  idx: index at which to split
  score: proportion variance reduced
  score_vector: score as a function of split position
  

  x: data (time * channels)
  depth: recursion depth (we find on the order of 2^depth split points).
  thresh: threshold reduction of variance [default: 0]
  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
  

  This routine finds the best place to split a time series.
  

 Examples: 
   nt_split(x);            % find point of largest change
   nt_split(x.^2);         % largest change of variance
   nt_split(nt_xprod(x);   % largest change of covariance
   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
   nt_split(x,3);          % recurse 3 times (--> 7 split points)
   nt_split(x,3,10);       % same, but splits are at least 10 points from ends
   test_nt_split;          % test script

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [idx,score_vector,score]=nt_split(x,depth,thresh,guard)
0002 %[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals
0003 %
0004 %  idx: index at which to split
0005 %  score: proportion variance reduced
0006 %  score_vector: score as a function of split position
0007 %
0008 %
0009 %  x: data (time * channels)
0010 %  depth: recursion depth (we find on the order of 2^depth split points).
0011 %  thresh: threshold reduction of variance [default: 0]
0012 %  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
0013 %
0014 %
0015 %  This routine finds the best place to split a time series.
0016 %
0017 %
0018 % Examples:
0019 %   nt_split(x);            % find point of largest change
0020 %   nt_split(x.^2);         % largest change of variance
0021 %   nt_split(nt_xprod(x);   % largest change of covariance
0022 %   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
0023 %   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
0024 %   nt_split(x,3);          % recurse 3 times (--> 7 split points)
0025 %   nt_split(x,3,10);       % same, but splits are at least 10 points from ends
0026 %   test_nt_split;          % test script
0027 
0028 if nargin<2||isempty(depth); depth=1; end
0029 if nargin<3||isempty(thresh); thresh=0; end
0030 if nargin<4||isempty(guard); guard=0; end
0031 if ndims(x)>2; error('!'); end
0032 
0033 
0034 if numel(depth)>1; error('!'); end
0035 
0036 [m,n]=size(x);
0037 if m<2*guard; idx=[]; return; end
0038 % if m==1; idx=1; return; end
0039 % if m==0; idx=[]; return; end
0040 
0041 x=nt_demean(x);
0042 
0043 %{
0044 For each potential split point we calculate the sum of the per-interval
0045 ssq of first and second interval. This is vectorized using cumsum.
0046 %}
0047 
0048 % to minimize memory requirements code is repeated after flipping:
0049 xxx=x;
0050 first_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2,1./(1:m)');
0051 xxx=flipud(x); 
0052 second_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2, 1./(1:m)'); %clear x
0053 score_vector=first_term+second_term(end:-1:1,:);    % sum per-interval ssqs
0054 score_vector=score_vector*diag(1./sum(xxx.^2));  % normalize each dimension
0055 score_vector=mean(score_vector,2);      % average across dimensions
0056 
0057 % find the sweet spot:
0058 [score0,idx]=min(score_vector(guard+1:end-guard));  idx=idx+guard;
0059 %disp(['depth: ',num2str(depth), ', score0: ',num2str(score0)]);
0060 if score0>1-thresh; % improvement is not good enough
0061     idx=[]; 
0062 end
0063 
0064 if depth>1 && ~isempty(idx)
0065     [a,sv]=nt_split(x(1:idx,:),depth-1,thresh,guard);
0066     [b,sv]=nt_split(x(idx+1:end,:),depth-1,thresh,guard);
0067     idx=[a,idx,idx+b];
0068     idx=unique(idx);
0069 end
0070 
0071 if nargout>2 || nargout==0;
0072     % score = reduction in variance if mean removed from each segment
0073     ssq_total=sum( (x-repmat(mean(x),size(x,1),1)).^2 );
0074     idx2=unique([1,idx,m]); 
0075     ssq=zeros(1,size(x,2));
0076     for iSegment=1:numel(idx2)-1
0077         xx=x(idx2(iSegment):idx2(iSegment+1),:);
0078         ssq=ssq + sum( (xx - repmat(mean(xx),size(xx,1),1) ).^2 );
0079     end
0080     score=(mean(ssq))./mean(ssq_total);
0081 end
0082 
0083 %disp(['nt_split_nargout: ', num2str(nargout)])
0084 
0085 if nargout==0;
0086     disp(['split at ', num2str(idx)]);
0087     disp(['(%: ', num2str(100*idx/m, '  %.01f'), ')'])
0088     disp(['score: ', num2str(score,  '%.01f')]);
0089     
0090     figure(200);
0091     subplot 211
0092     plot(score_vector);
0093     subplot 212
0094     plot(x); drawnow
0095     nt_mark(idx);
0096     if numel(idx)>1; disp(['smallest interval: ', num2str(min(diff(idx)))]); end
0097     clear idx score score_vector
0098 end
0099   
0100 
0101 % TODO:
0102 % - online "bottom-up" calculation (aggregate rather than split)
0103 
0104 
0105

Generated on Mon 28-Nov-2016 20:12:47 by m2html © 2005