Home > NoiseTools > nt_split.m

nt_split

PURPOSE ^

[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals

SYNOPSIS ^

function [idx,score_vector,score]=nt_split(x,depth,thresh,guard)

DESCRIPTION ^

[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals

  idx: index at which to split
  score: proportion variance reduced
  score_vector: score as a function of split position
  

  x: data (time * channels)
  depth: recursion depth (we find on the order of 2^depth split points).
  thresh: threshold reduction of variance [default: 0]
  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
  

  This routine finds the best place to split a time series.
  

 Examples: 
   nt_split(x);            % find point of largest change
   nt_split(x.^2);         % largest change of variance
   nt_split(nt_xprod(x);   % largest change of covariance
   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
   nt_split(x,3);          % recurse 3 times (--> 7 split points)
   nt_split(x,3,10);       % same, but splits are at least 10 points from ends

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [idx,score_vector,score]=nt_split(x,depth,thresh,guard)
0002 %[idx,score_vector,score]=nt_split(x,depth,thresh,guard) - split time series into intervals
0003 %
0004 %  idx: index at which to split
0005 %  score: proportion variance reduced
0006 %  score_vector: score as a function of split position
0007 %
0008 %
0009 %  x: data (time * channels)
0010 %  depth: recursion depth (we find on the order of 2^depth split points).
0011 %  thresh: threshold reduction of variance [default: 0]
0012 %  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
0013 %
0014 %
0015 %  This routine finds the best place to split a time series.
0016 %
0017 %
0018 % Examples:
0019 %   nt_split(x);            % find point of largest change
0020 %   nt_split(x.^2);         % largest change of variance
0021 %   nt_split(nt_xprod(x);   % largest change of covariance
0022 %   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
0023 %   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
0024 %   nt_split(x,3);          % recurse 3 times (--> 7 split points)
0025 %   nt_split(x,3,10);       % same, but splits are at least 10 points from ends
0026 
0027 if nargin<2||isempty(depth); depth=1; end
0028 if nargin<3||isempty(thresh); thresh=0; end
0029 if nargin<4||isempty(guard); guard=0; end
0030 if ndims(x)>2; error('!'); end
0031 
0032 
0033 if numel(depth)>1; error('!'); end
0034 
0035 [m,n]=size(x);
0036 if m<2*guard; idx=[]; return; end
0037 % if m==1; idx=1; return; end
0038 % if m==0; idx=[]; return; end
0039 
0040 x=nt_demean(x);
0041 
0042 %{
0043 For each potential split point we calculate the sum of the per-interval
0044 ssq of first and second interval. This is vectorized using cumsum.
0045 %}
0046 
0047 % to minimize memory requirements code is repeated after flipping:
0048 xxx=x;
0049 first_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2,1./(1:m)');
0050 xxx=flipud(x); 
0051 second_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2, 1./(1:m)'); %clear x
0052 score_vector=first_term+second_term(end:-1:1,:);    % sum per-interval ssqs
0053 score_vector=score_vector*diag(1./sum(xxx.^2));  % normalize each dimension
0054 score_vector=mean(score_vector,2);      % average across dimensions
0055 
0056 % find the sweet spot:
0057 [score0,idx]=min(score_vector(guard+1:end-guard));  idx=idx+guard;
0058 disp(['depth: ',num2str(depth), ', score0: ',num2str(score0)]);
0059 if score0>1-thresh; % improvement is not good enough
0060     idx=[]; 
0061 end
0062 
0063 if depth>1 && ~isempty(idx)
0064     [a,sv]=nt_split(x(1:idx,:),depth-1,thresh,guard);
0065     [b,sv]=nt_split(x(idx+1:end,:),depth-1,thresh,guard);
0066     idx=[a,idx,idx+b];
0067     idx=unique(idx);
0068 end
0069 
0070 if nargout>2 || nargout==0;
0071     % score = reduction in variance if mean removed from each segment
0072     ssq_total=sum( (x-repmat(mean(x),size(x,1),1)).^2 );
0073     idx2=unique([1,idx,m]); 
0074     ssq=zeros(1,size(x,2));
0075     for iSegment=1:numel(idx2)-1
0076         xx=x(idx2(iSegment):idx2(iSegment+1),:);
0077         ssq=ssq + sum( (xx - repmat(mean(xx),size(xx,1),1) ).^2 );
0078     end
0079     score=(mean(ssq))./mean(ssq_total);
0080 end
0081 
0082 %disp(['nt_split_nargout: ', num2str(nargout)])
0083 
0084 if nargout==0;
0085     disp(['split at ', num2str(idx)]);
0086     disp(['(%: ', num2str(100*idx/m, '  %.01f'), ')'])
0087     disp(['score: ', num2str(score,  '%.01f')]);
0088     
0089     figure(200);
0090     subplot 211
0091     plot(score_vector);
0092     subplot 212
0093     plot(x); drawnow
0094     nt_mark(idx);
0095     if numel(idx)>1; disp(['smallest interval: ', num2str(min(diff(idx)))]); end
0096     clear idx score score_vector
0097 end
0098   
0099 
0100 % TODO:
0101 % - online "bottom-up" calculation (aggregate rather than split)
0102 
0103 
0104

Generated on Mon 10-Nov-2014 14:40:42 by m2html © 2005