Skip to content

Commit

Permalink
Merge branch 'OperationChanges'
Browse files Browse the repository at this point in the history
  • Loading branch information
benfulcher committed Aug 5, 2021
2 parents 000279b + 5ee824c commit 4911b2a
Show file tree
Hide file tree
Showing 15 changed files with 362 additions and 129 deletions.
16 changes: 10 additions & 6 deletions Database/INP_mops.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ DN_Mean(x,'harm') DN_hmean
DN_Mean(x,'rms') DN_rms
DN_Mean(x,'median') DN_median
DN_Mean(x,'midhinge') DN_midhinge
DN_HistogramMode(x_z,5) DN_HistogramMode_5
DN_HistogramMode(x_z,10) DN_HistogramMode_10
DN_HistogramMode(x_z,20) DN_HistogramMode_20
DN_HistogramMode(x_z,5,true,false) DN_HistogramMode_5
DN_HistogramMode(x_z,10,true,false) DN_HistogramMode_10
DN_HistogramMode(x_z,21,true,false) DN_HistogramMode_21
DN_HistogramMode(abs(x_z),5,true,false) DN_HistogramMode_abs_5
DN_HistogramMode(abs(x_z),10,true,false) DN_HistogramMode_abs_10
DN_HistogramMode(abs(x_z),21,true,false) DN_HistogramMode_abs_21
DN_HistogramAsymmetry(x_z,11,false) DN_HistogramAsymmetry_11
DN_TrimmedMean(x,1) DN_TrimmedMean_1
DN_TrimmedMean(x,5) DN_TrimmedMean_5
DN_TrimmedMean(x,10) DN_TrimmedMean_10
Expand Down Expand Up @@ -870,8 +874,8 @@ DN_CompareKSFit(x,'gamma') DN_CompareKSFit_gamma
DN_CompareKSFit(x,'logn') DN_CompareKSFit_logn
DN_CompareKSFit(x,'wbl') DN_CompareKSFit_wbl
DN_OutlierInclude(x_z,'abs',0.01) DN_OutlierInclude_abs_001
DN_OutlierInclude(x_z,'p',0.01) DN_OutlierInclude_p_001
DN_OutlierInclude(x_z,'n',0.01) DN_OutlierInclude_n_001
DN_OutlierInclude(x_z,'pos',0.01) DN_OutlierInclude_p_001
DN_OutlierInclude(x_z,'neg',0.01) DN_OutlierInclude_n_001
ST_LocalExtrema(x_z,'l',50) ST_LocalExtrema_l50
ST_LocalExtrema(x_z,'l',100) ST_LocalExtrema_l100
ST_LocalExtrema(x_z,'n',25) ST_LocalExtrema_n25
Expand All @@ -881,8 +885,8 @@ EX_MovingThreshold(x_z,0.1,0.1) EX_MovingThreshold_01_01
EX_MovingThreshold(x_z,0.1,0.02) EX_MovingThreshold_01_002
EX_MovingThreshold(x_z,1,0.1) EX_MovingThreshold_1_01
EX_MovingThreshold(x_z,1,0.02) EX_MovingThreshold_1_002
SB_BinaryStats(x_z,'diff') SB_BinaryStats_diff
SB_BinaryStats(x_z,'mean') SB_BinaryStats_mean
SB_BinaryStats(x_z,'diff') SB_BinaryStats_diff
SB_BinaryStats(x_z,'iqr') SB_BinaryStats_iqr
SB_MotifTwo(x_z,'diff') SB_MotifTwo_diff
SB_MotifTwo(x_z,'mean') SB_MotifTwo_mean
Expand Down
142 changes: 81 additions & 61 deletions Database/INP_ops.txt

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions Database/INP_ops_notUsed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Features are computed by hctsa but not used

# These are not relevant for stationary processes, but may be relevant for characterizing shorter series
# (for a dataset containing time series all of the same length, these will be identical to the original features
# (that do not have 'norm' appended to their names))
SB_BinaryStats_mean.longstretch1norm SB_BinaryStats_mean_longstretch1norm symbolic,distribution,correlation
SB_BinaryStats_mean.meanstretch1norm SB_BinaryStats_mean_meanstretch1norm symbolic,distribution,correlation
SB_BinaryStats_mean.stdstretch1norm SB_BinaryStats_mean_stdstretch1norm symbolic,distribution,correlation
SB_BinaryStats_mean.longstretch0norm SB_BinaryStats_mean_longstretch0norm symbolic,distribution,correlation
SB_BinaryStats_mean.meanstretch0norm SB_BinaryStats_mean_meanstretch0norm symbolic,distribution,correlation
SB_BinaryStats_mean.stdstretch0norm SB_BinaryStats_mean_stdstretch0norm symbolic,distribution,correlation
SB_BinaryStats_diff.longstretch1norm SB_BinaryStats_diff_longstretch1norm symbolic,distribution,correlation
SB_BinaryStats_diff.longstretch0norm SB_BinaryStats_diff_longstretch0norm symbolic,distribution,correlation
SB_BinaryStats_diff.meanstretch1norm SB_BinaryStats_diff_meanstretch1norm symbolic,distribution,correlation
SB_BinaryStats_diff.meanstretch0norm SB_BinaryStats_diff_meanstretch0norm symbolic,distribution,correlation
SB_BinaryStats_diff.stdstretch1norm SB_BinaryStats_diff_stdstretch1norm symbolic,distribution,correlation
SB_BinaryStats_diff.stdstretch0norm SB_BinaryStats_diff_stdstretch0norm symbolic,distribution,correlation
SB_BinaryStats_iqr.longstretch1norm SB_BinaryStats_iqr_longstretch1norm symbolic,distribution,correlation
SB_BinaryStats_iqr.longstretch0norm SB_BinaryStats_iqr_longstretch0norm symbolic,distribution,correlation
SB_BinaryStats_iqr.meanstretch1norm SB_BinaryStats_iqr_meanstretch1norm symbolic,distribution,correlation
SB_BinaryStats_iqr.meanstretch0norm SB_BinaryStats_iqr_meanstretch0norm symbolic,distribution,correlation
SB_BinaryStats_iqr.stdstretch1norm SB_BinaryStats_iqr_stdstretch1norm symbolic,distribution,correlation
SB_BinaryStats_iqr.stdstretch0norm SB_BinaryStats_iqr_stdstretch0norm symbolic,distribution,correlation
4 changes: 3 additions & 1 deletion Operations/CO_FirstCrossing.m
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
% corrFun, the self-correlation function to measure:
% (i) 'ac': normal linear autocorrelation function. Uses CO_AutoCorr to
% calculate autocorrelations.
% threshold, to cross: e.g., 0, 1/exp(1).
% threshold, to cross. Examples: 0 [first zero crossing], 1/exp(1) [first 1/e crossing].

% ------------------------------------------------------------------------------
% Copyright (C) 2020, Ben D. Fulcher <[email protected]>,
Expand Down Expand Up @@ -60,6 +60,8 @@
switch corrFun
case 'ac'
% Autocorrelation at all time lags
% (a little inefficient to not do it incrementally, but maybe Fourier method)
% (does it anyway...)
corrs = CO_AutoCorr(y,[],'Fourier');
otherwise
error('Unknown correlation function ''%s''',corrFun);
Expand Down
27 changes: 15 additions & 12 deletions Operations/CO_NonlinearAutocorr.m
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
% y -- should be the z-scored time series (Nx1 vector)
% taus -- should be a vector of the time delays as above (mx1 vector)
% e.g., [2] computes <x_i x_{i-2}>
% e.g., [1,2] computes <x_i x_{i-1} x{i-2}>
% e.g., [1,1,3] computes <x_i x_{i-1}^2 x{i-3}>
% e.g., [1,2] computes <x_i x_{i-1} x_{i-2}>
% e.g., [1,1,3] computes <x_i x_{i-1}^2 x_{i-3}>
% e.g., [0,0,1] computes <x_i^3 x_{i-1}>
% doAbs [opt] -- a boolean (0,1) -- if one, takes an absolute value before
% taking the final mean -- useful for an odd number of
% contributions to the sum. Default is to do this for odd
Expand All @@ -27,10 +28,10 @@
% taus vectors) the result will be near zero due to fluctuations
% below the mean; even for highly-correlated signals. (doAbs)
%
% (*) doAbs = 1 is really a different operation that can't be compared with
% the values obtained from taking doAbs = 0 (i.e., for odd lengths
% (*) doAbs = true is really a different operation that can't be compared with
% the values obtained from taking doAbs = false (i.e., for odd lengths
% of taus)
% (*) It can be helpful to look at nlac at each iteration.
% (*) It can be helpful to look at nonlinearAC at each iteration.

% ------------------------------------------------------------------------------
% Copyright (C) 2020, Ben D. Fulcher <[email protected]>,
Expand Down Expand Up @@ -66,27 +67,29 @@
% ------------------------------------------------------------------------------
if nargin < 3 || isempty(doAbs) % use default settings for doAbs
if rem(length(taus),2) == 1
doAbs = 0;
doAbs = false;
else
% Even number of time-lags
doAbs = 1; % take abs, otherwise will be a very small number
doAbs = true; % take abs, otherwise will be a very small number
end
end
%-------------------------------------------------------------------------------

N = length(y); % time-series length
tmax = max(taus); % the maximum delay time
tMax = max(taus); % the maximum delay time

% Compute the autocorrelation sum iteratively
nlac = y(tmax+1:N);
nonlinearAC = y(tMax+1:N);
for i = 1:length(taus)
nlac = nlac.*y(tmax-taus(i)+1:N-taus(i));
nonlinearAC = nonlinearAC .* y(tMax-taus(i)+1:N-taus(i));
end

%-------------------------------------------------------------------------------
% Compute output
if doAbs
out = mean(abs(nlac));
out = mean(abs(nonlinearAC));
else
out = mean(nlac);
out = mean(nonlinearAC);
end

end
93 changes: 93 additions & 0 deletions Operations/DN_HistogramAsymmetry.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
function out = DN_HistogramAsymmetry(y,numBins,doSimple)
% DN_HistogramAsymmetry Measures of distributional asymmetry
%
% Measures the asymmetry of the histogram distribution of the input data vector.
%
%---INPUTS:
%
% y, the input data vector.
% numBins, the number of bins to use in the histogram.
% doSimple, whether to use a simple binning method (linearly spaced bins).

% ------------------------------------------------------------------------------
% Copyright (C) 2020, Ben D. Fulcher <[email protected]>,
% <http://www.benfulcher.com>
%
% If you use this code for your research, please cite the following two papers:
%
% (1) B.D. Fulcher and N.S. Jones, "hctsa: A Computational Framework for Automated
% Time-Series Phenotyping Using Massive Feature Extraction, Cell Systems 5: 527 (2017).
% DOI: 10.1016/j.cels.2017.10.001
%
% (2) B.D. Fulcher, M.A. Little, N.S. Jones, "Highly comparative time-series
% analysis: the empirical structure of time series and their methods",
% J. Roy. Soc. Interface 10(83) 20130048 (2013).
% DOI: 10.1098/rsif.2013.0048
%
% This function is free software: you can redistribute it and/or modify it under
% the terms of the GNU General Public License as published by the Free Software
% Foundation, either version 3 of the License, or (at your option) any later
% version.
%
% This program is distributed in the hope that it will be useful, but WITHOUT
% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
% details.
%
% You should have received a copy of the GNU General Public License along with
% this program. If not, see <http://www.gnu.org/licenses/>.
% ------------------------------------------------------------------------------

%-------------------------------------------------------------------------------
% Check inputs and set defaults:
%-------------------------------------------------------------------------------
if nargin < 2
numBins = 10;
end
if nargin < 3
doSimple = true;
end

%-------------------------------------------------------------------------------
% Check z-score standardization (since it is assumed that positive and negative
% values can be treated separately):
iszscored = BF_iszscored(y);
if ~iszscored
warning('DN_HistogramAsymmetry assumes a z-scored (or standardized) input')
end

%-------------------------------------------------------------------------------
% Compute the histogram separately from positive and negative values in the data:
yPos = y(y > 0);
yNeg = y(y < 0);
if doSimple
[countsPos,binEdgesPos] = BF_SimpleBinner(yPos,numBins);
[countsNeg,binEdgesNeg] = BF_SimpleBinner(yNeg,numBins);
else
[countsPos,binEdgesPos] = histcounts(yPos,numBins);
[countsNeg,binEdgesNeg] = histcounts(yNeg,numBins);
end

% Normalize by total counts:
NnonZero = sum(y~=0);
pPos = countsPos/NnonZero;
pNeg = countsNeg/NnonZero;

% Compute bin centers from bin edges:
binCentersPos = mean([binEdgesPos(1:end-1); binEdgesPos(2:end)]);
binCentersNeg = mean([binEdgesNeg(1:end-1); binEdgesNeg(2:end)]);

% Histogram counts and overall density differences:
out.densityDiff = sum(y > 0) - sum(y < 0); % measure of asymmetry about the mean
out.modeProbPos = max(pPos);
out.modeProbNeg = max(pNeg);
out.modeDiff = out.modeProbPos - out.modeProbNeg;

% Mean position of maximums (if multiple):
out.posMode = mean(binCentersPos(pPos == out.modeProbPos));
out.negMode = mean(binCentersNeg(pNeg == out.modeProbNeg));
out.modeAsymmetry = out.posMode + out.negMode;



end
19 changes: 17 additions & 2 deletions Operations/DN_HistogramMode.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function out = DN_HistogramMode(y,numBins,doPlot)
function out = DN_HistogramMode(y,numBins,doSimple,doPlot)
% DN_HistogramMode Mode of a data vector.
%
% Measures the mode of the data vector using histograms with a given number
Expand All @@ -8,6 +8,7 @@
%
% y, the input data vector.
% numBins, the number of bins to use in the histogram.
% doSimple, whether to use a simple binning method (linearly spaced bins).
% doPlot, whether to show a plot of what was computed.

% ------------------------------------------------------------------------------
Expand Down Expand Up @@ -39,13 +40,27 @@
% this program. If not, see <http://www.gnu.org/licenses/>.
% ------------------------------------------------------------------------------

%-------------------------------------------------------------------------------
% Check inputs and set defaults:
%-------------------------------------------------------------------------------
if nargin < 2
numBins = 'auto';
end
if nargin < 3
doSimple = true;
end
if nargin < 4
doPlot = false;
end
%-------------------------------------------------------------------------------

% Compute the histogram from the data:
if isnumeric(numBins)
[N,binEdges] = histcounts(y,numBins);
if doSimple
[N,binEdges] = BF_SimpleBinner(y,numBins);
else
[N,binEdges] = histcounts(y,numBins);
end
elseif ischar(numBins)
[N,binEdges] = histcounts(y,'BinMethod',numBins);
else
Expand Down
45 changes: 30 additions & 15 deletions Operations/DN_OutlierInclude.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
%
% thresholdHow, the method of how to determine outliers:
% (i) 'abs': outliers are furthest from the mean,
% (ii) 'p': outliers are the greatest positive deviations from the mean, or
% (iii) 'n': outliers are the greatest negative deviations from the mean.
% (ii) 'pos': outliers are the greatest positive deviations from the mean, or
% (iii) 'neg': outliers are the greatest negative deviations from the mean.
%
% inc, the increment to move through (fraction of std if input time series is
% z-scored)
Expand Down Expand Up @@ -72,13 +72,16 @@
doPlot = false; % Plot some outputs

% ------------------------------------------------------------------------------
%% Check Inputs
%% Check inputs and set defaults
% ------------------------------------------------------------------------------
% If time series is all the same value -- ridiculous! ++BF 21/3/2010
if all(y == y(1)) % the whole time series is just a single value
% If the time series is a constant causes issues
if all(y == y(1))
% This method is not suitable for such time series: return a NaN
fprintf(1,'The time series is a constant!\n');
out = NaN; return % this method is not suitable for such time series: return a NaN
out = NaN;
return
end

% Check z-scored time series
if ~BF_iszscored(y)
warning('The input time series should be z-scored')
Expand All @@ -96,24 +99,29 @@
% ------------------------------------------------------------------------------
%% Initialize thresholds
% ------------------------------------------------------------------------------
% Could be better to just use a fixed number of increments here, from 0 to the max.
% (rather than forcing a fixed inc)
switch thresholdHow
case 'abs' % analyze absolute value deviations
thr = (0:inc:max(abs(y)));
tot = N;
case 'p' % analyze only positive deviations
case 'pos' % analyze only positive deviations
thr = (0:inc:max(y));
tot = sum(y >= 0);
case 'n' % analyze only negative deviations
case 'neg' % analyze only negative deviations
thr = (0:inc:max(-y));
tot = sum(y <= 0);
otherwise
error('Error thresholding with ''%s''. Must select either ''abs'', ''p'', or ''n''.',thresholdHow)
error('Error thresholding with ''%s''. Must select either ''abs'', ''pos'', or ''neg''.',thresholdHow)
end

if isempty(thr)
error('I suspect that this is a highly peculiar time series?!!!')
error('Error setting increments through the time-series values...')
end

%-------------------------------------------------------------------------------
% Calculate statistics of over-threshold events, looping over thresholds
%-------------------------------------------------------------------------------
msDt = zeros(length(thr),6); % mean, std, proportion_of_time_series_included,
% median of index relative to middle, mean,
% error
Expand All @@ -126,9 +134,9 @@
switch thresholdHow
case 'abs' % look at absolute value deviations
r = find(abs(y) >= th);
case 'p' % look at only positive deviations
case 'pos' % look at only positive deviations
r = find(y >= th);
case 'n' % look at only negative deviations
case 'neg' % look at only negative deviations
r = find(y <= -th);
end

Expand Down Expand Up @@ -156,6 +164,8 @@
% ------------------------------------------------------------------------------
%% Trim
% ------------------------------------------------------------------------------
% NB: would be more efficient to catch this within the loop above

% Trim off where the number of events is only one; hence the differenced
% series returns NaN
fbi = find(isnan(msDt(:,1)),1,'first'); % first bad index
Expand Down Expand Up @@ -192,7 +202,7 @@
%-------------------------------------------------------------------------------

% ------------------------------------------------------------------------------
%% Fit an exponential to the mean as a function of the threshold
%% Fit an exponential to the mean inter-event interval as a function of the threshold
% ------------------------------------------------------------------------------
s = fitoptions('Method','NonlinearLeastSquares','StartPoint',[0.1 2.5 1]);
f = fittype('a*exp(b*x)+c','options',s);
Expand Down Expand Up @@ -247,9 +257,14 @@
out.nflrmse = gof.rmse;

% ------------------------------------------------------------------------------
%% Stationarity assumption
%% Stationarity metrics
% ------------------------------------------------------------------------------
% mean, median and std of the median and mean of inter-intervals
% Mean, median and std of the mean inter-event interval:
out.mdtm = mean(msDt(:,1));
out.mdtmd = median(msDt(:,1));
out.mdtstd = std(msDt(:,1));

% Mean, median and std of the median and mean of indices over-threshold events occur
out.mdrm = mean(msDt(:,4));
out.mdrmd = median(msDt(:,4));
out.mdrstd = std(msDt(:,4));
Expand Down
Loading

0 comments on commit 4911b2a

Please sign in to comment.