Description of nt

[idx,score_vector,score]=nt_split(x,depth,thresh,guard,minstep) - split time series into intervals

  idx: index at which to split
  score: proportion variance reduced
  score_vector: score as a function of split position
  

  x: data (time * channels)
  depth: recursion depth (we find up to 2^depth-1 split points).
  thresh: if relative variance reduction smaller don't spit [default: 0]
  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
  minstep: if absolute local step size smaller don't split
  

  This routine finds the best place to split a time series.
  

 Examples: 
   nt_split(x);            % find point of largest change
   nt_split(x.^2);         % largest change of variance
   nt_split(nt_xprod(x);   % largest change of covariance
   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
   nt_split(x,3);          % recurse 3 times (--> 7 split points)
   nt_split(x,3,10);       % same, but splits are at least 10 points from ends
   test_nt_split;          % test script

0001 function [idx,score_vector,score]=nt_split(x,depth,thresh,guard,minstep)
0002 %[idx,score_vector,score]=nt_split(x,depth,thresh,guard,minstep) - split time series into intervals
0003 %
0004 %  idx: index at which to split
0005 %  score: proportion variance reduced
0006 %  score_vector: score as a function of split position
0007 %
0008 %
0009 %  x: data (time * channels)
0010 %  depth: recursion depth (we find up to 2^depth-1 split points).
0011 %  thresh: if relative variance reduction smaller don't spit [default: 0]
0012 %  guard: number of samples to avoid at each end (counteracts bias towards splitting near ends)
0013 %  minstep: if absolute local step size smaller don't split
0014 %
0015 %
0016 %  This routine finds the best place to split a time series.
0017 %
0018 %
0019 % Examples:
0020 %   nt_split(x);            % find point of largest change
0021 %   nt_split(x.^2);         % largest change of variance
0022 %   nt_split(nt_xprod(x);   % largest change of covariance
0023 %   nt_split(nt_xprod(x,'nodiag'); % same, ignoring variance
0024 %   nt_split(nt_xprod(nt_normrow(x),'nodiag'); % same, each slice normalized
0025 %   nt_split(x,3);          % recurse 3 times (--> 7 split points)
0026 %   nt_split(x,3,10);       % same, but splits are at least 10 points from ends
0027 %   test_nt_split;          % test script
0028 
0029 if nargin<2||isempty(depth); depth=1; end
0030 if nargin<3||isempty(thresh); thresh=0; end
0031 if nargin<4||isempty(guard); guard=0; end
0032 if nargin<5; minstep=[]; end
0033 
0034 if ndims(x)>2; error('!'); end
0035 if numel(depth)>1; error('!'); end
0036 
0037 [m,n]=size(x);
0038 if m<2*guard; % don't try to split if too small
0039     idx=[]; score_vector=[]; 
0040     return; 
0041 end
0042 
0043 x=nt_demean(x);
0044 
0045 %{
0046 For each potential split point we calculate the sum of the per-interval
0047 ssq of first and second interval. This is vectorized using cumsum.
0048 %}
0049 
0050 % to minimize memory requirements code is repeated after flipping:
0051 xxx=x;
0052 first_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2,1./(1:m)');
0053 xxx=flipud(x); 
0054 second_term = cumsum(xxx.^2) - bsxfun(@times, cumsum(xxx).^2, 1./(1:m)'); %clear x
0055 score_vector=first_term+second_term(end:-1:1,:);    % sum per-interval ssqs
0056 score_vector=score_vector*diag(1./sum(xxx.^2));     % normalize each dimension
0057 score_vector=mean(score_vector,2);                  % average across dimensions
0058 
0059 % find the sweet spot:
0060 [score0,idx]=min(score_vector);
0061 
0062 if score0>1-thresh ...                          % improvement too small
0063         || idx<guard || idx>size(x,1)-guard     % too close to edge
0064     idx=[]; 
0065     score_vector=[];
0066 end
0067 
0068 if depth>1 && ~isempty(idx)
0069     [a,sv]=nt_split(x(1:idx,:),depth-1,thresh,guard,minstep);
0070     [b,sv]=nt_split(x(idx+1:end,:),depth-1,thresh,guard,minstep);
0071     idx=[a,idx,idx+b];
0072     idx=unique(idx);
0073 end
0074 
0075 % weed out splits with small variance reduction score, or small step size
0076 toWeed=[];
0077 idx=[0, idx, size(x,1)];
0078 for iSplit=1:numel(idx)-2;
0079     a=nt_demean(x(idx(iSplit)+1:idx(iSplit+1),:));
0080     b=nt_demean(x(idx(iSplit+1)+1:idx(iSplit+2),:));
0081     c=nt_demean(x(idx(iSplit)+1:idx(iSplit+2),:));
0082     MARGIN=10;                              % samples, distance on each side at which to sample step size
0083     if (sum(a(:).^2)+sum(b(:).^2)) > (1-thresh)*sum(c(:).^2)...
0084         || (~isempty(minstep) && ...        % local step size is...
0085         mean(abs(x(idx(iSplit+1)-20,:) - x(idx(iSplit+1)+20,:)),2) < minstep); % ...too small
0086         toWeed=[toWeed,iSplit];
0087     end
0088 end
0089 idx(toWeed)=[];
0090 
0091 if depth==6
0092     % refine split positions (sometimes they are off by one or two samples)
0093     idx2=idx;
0094     disp(idx)
0095     for iSplit=1:numel(idx)-2
0096         ii=nt_split( x(idx(iSplit)+1 : idx(iSplit+2)-2), 1); % -2 to avoid influence of next step
0097         disp([idx(iSplit) idx(iSplit+2) ii])
0098         idx2(iSplit+1)=idx(iSplit)+ii;
0099     end
0100     idx=idx2;
0101 end
0102 
0103 idx=idx(2:end-1);
0104 
0105 
0106             
0107 
0108 if nargout>2 || nargout==0;
0109     % score = reduction in variance if mean removed from each segment
0110     ssq_total=sum( (x-repmat(mean(x),size(x,1),1)).^2 );
0111     idx2=unique([1,idx,m]); 
0112     ssq=zeros(1,size(x,2));
0113     for iSegment=1:numel(idx2)-1
0114         xx=x(idx2(iSegment):idx2(iSegment+1),:);
0115         ssq=ssq + sum( (xx - repmat(mean(xx),size(xx,1),1) ).^2 );
0116     end
0117     score=(mean(ssq))./mean(ssq_total);
0118 end
0119 
0120 %disp(['nt_split_nargout: ', num2str(nargout)])
0121 
0122 if nargout==0;
0123     disp(['split at ', num2str(idx)]);
0124     disp(['(%: ', num2str(100*idx/m, '  %.01f'), ')'])
0125     disp(['score: ', num2str(score,  '%.01f')]);
0126     
0127     figure(200);
0128     subplot 211
0129     plot(score_vector);
0130     subplot 212
0131     plot(x); drawnow
0132     nt_mark(idx);
0133     if numel(idx)>1; disp(['smallest interval: ', num2str(min(diff(idx)))]); end
0134     clear idx score score_vector
0135 end
0136 
0137 %disp([depth, idx])
0138 
0139 % TODO:
0140 % - online "bottom-up" calculation (aggregate rather than split)
0141 
0142 
0143

nt_split

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SOURCE CODE