Skip to content

Commit

Permalink
[ML] AIOps Log Rate Analysis: Improve query generators (#171008)
Browse files Browse the repository at this point in the history
Improves the query generation for some parts of log rate analysis and
adds jest tests to verify the improvements.
Previously, some query generators were not concise enough with their
checks and it could happen that for example range queries for the
overall time range were added twice or that the range query for the
overall time range was added although there were also range filters
added for the inner baseline and deviation time range, for example:

```json
    "query": {
      "bool": {
        "filter": [
...
          {
            "range": {
              "@timestamp": {
                "gte": 1698796800506,
                "lte": 1699568984807,
                "format": "epoch_millis"
              }
            }
          },
          {
            "bool": {
              "should": [
                {
                  "range": {
                    "@timestamp": {
                      "gte": 1699272000000,
                      "lte": 1699344000000,
                      "format": "epoch_millis"
                    }
                  }
                },
                {
                  "range": {
                    "@timestamp": {
                      "gte": 1698969600000,
                      "lte": 1699185600000,
                      "format": "epoch_millis"
                    }
                  }
                }
              ]
            }
          }
        ],
...
```

The PR also unifies some mocks for the jest unit tests.
  • Loading branch information
walterra authored Nov 16, 2023
1 parent 7c80161 commit 4d27bfb
Show file tree
Hide file tree
Showing 19 changed files with 552 additions and 251 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
* 2.0.
*/

import React, { useEffect, useState, type FC } from 'react';
import { isEqual } from 'lodash';
import React, { useEffect, useMemo, useState, type FC } from 'react';
import { EuiEmptyPrompt, EuiHorizontalRule, EuiPanel } from '@elastic/eui';
import type { Moment } from 'moment';

Expand All @@ -32,7 +33,18 @@ import {
import type { GroupTableItem } from '../../log_rate_analysis_results_table/types';
import { useLogRateAnalysisResultsTableRowContext } from '../../log_rate_analysis_results_table/log_rate_analysis_results_table_row_provider';

const DEFAULT_SEARCH_QUERY = { match_all: {} };
const DEFAULT_SEARCH_QUERY: estypes.QueryDslQueryContainer = { match_all: {} };
const DEFAULT_SEARCH_BAR_QUERY: estypes.QueryDslQueryContainer = {
bool: {
filter: [],
must: [
{
match_all: {},
},
],
must_not: [],
},
};

export function getDocumentCountStatsSplitLabel(
significantItem?: SignificantItem,
Expand Down Expand Up @@ -93,6 +105,13 @@ export const LogRateAnalysisContent: FC<LogRateAnalysisContentProps> = ({
setIsBrushCleared(windowParameters === undefined);
}, [windowParameters]);

// Checks if `esSearchQuery` is the default empty query passed on from the search bar
// and if that's the case fall back to a simpler match all query.
const searchQuery = useMemo(
() => (isEqual(esSearchQuery, DEFAULT_SEARCH_BAR_QUERY) ? DEFAULT_SEARCH_QUERY : esSearchQuery),
[esSearchQuery]
);

const {
currentSelectedSignificantItem,
currentSelectedGroup,
Expand All @@ -105,7 +124,7 @@ export const LogRateAnalysisContent: FC<LogRateAnalysisContentProps> = ({
const { documentStats, earliest, latest } = useData(
dataView,
'log_rate_analysis',
esSearchQuery,
searchQuery,
setGlobalState,
currentSelectedSignificantItem,
currentSelectedGroup,
Expand Down Expand Up @@ -170,7 +189,7 @@ export const LogRateAnalysisContent: FC<LogRateAnalysisContentProps> = ({
stickyHistogram={stickyHistogram}
onReset={clearSelection}
sampleProbability={sampleProbability}
searchQuery={esSearchQuery}
searchQuery={searchQuery}
windowParameters={windowParameters}
barColorOverride={barColorOverride}
barHighlightColorOverride={barHighlightColorOverride}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const paramsMock = {
index: 'the-index',
timeFieldName: 'the-time-field-name',
start: 0,
end: 50,
baselineMin: 10,
baselineMax: 20,
deviationMin: 30,
deviationMax: 40,
includeFrozen: false,
searchQuery: '{ "match_all": {} }',
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { AiopsLogRateAnalysisSchema } from '../../../../../common/api/log_rate_analysis/schema';

import { paramsMock } from './params_match_all';
import { searchQueryMock } from './search_query';

export const paramsSearchQueryMock: AiopsLogRateAnalysisSchema = {
...paramsMock,
searchQuery: searchQueryMock,
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

// This is the format that gets passed on from the Kibana search bar.
export const searchQueryMock = JSON.stringify({
bool: {
filter: [],
minimum_should_match: 1,
must_not: [],
should: [{ term: { 'the-term': { value: 'the-value' } } }],
},
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';

import { paramsMock } from './__mocks__/params_match_all';
import { getBaselineOrDeviationFilter, getCategoryRequest } from './fetch_categories';

describe('getBaselineOrDeviationFilter', () => {
it('returns a filter that matches both baseline and deviation time range', () => {
const baselineOrDeviationFilter = getBaselineOrDeviationFilter(paramsMock);

expect(baselineOrDeviationFilter).toEqual({
bool: {
should: [
{
range: {
'the-time-field-name': { gte: 10, lte: 20, format: 'epoch_millis' },
},
},
{
range: {
'the-time-field-name': { gte: 30, lte: 40, format: 'epoch_millis' },
},
},
],
},
});
});
});

describe('getCategoryRequest', () => {
it('returns the category request', () => {
const randomSamplerWrapper = createRandomSamplerWrapper({
probability: 0.1,
seed: 1234,
});

const query = getCategoryRequest(paramsMock, 'the-field-name', randomSamplerWrapper);

// Because the time range filter is covered by the should clauses that cover both
// baseline (10,20) and deviation (30,40), we expect that there is no other
// time range filter whatsoever, for example for start/end (0,50).
expect(query).toEqual({
index: 'the-index',
size: 0,
body: {
query: {
bool: {
filter: [
{
bool: {
should: [
{
range: {
'the-time-field-name': {
gte: 10,
lte: 20,
format: 'epoch_millis',
},
},
},
{
range: {
'the-time-field-name': {
gte: 30,
lte: 40,
format: 'epoch_millis',
},
},
},
],
},
},
],
},
},
aggs: {
sample: {
random_sampler: { probability: 0.1, seed: 1234 },
aggs: {
categories: {
categorize_text: { field: 'the-field-name', size: 1000 },
aggs: {
hit: {
top_hits: { size: 1, sort: ['the-time-field-name'], _source: 'the-field-name' },
},
},
},
},
},
},
},
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,67 @@ import { isRequestAbortedError } from '../../../lib/is_request_aborted_error';

import { getQueryWithParams } from './get_query_with_params';

// Filter that includes docs from both the baseline and deviation time range.
export const getBaselineOrDeviationFilter = (
params: AiopsLogRateAnalysisSchema
): estypes.QueryDslQueryContainer => {
return {
bool: {
should: [
{
range: {
[params.timeFieldName]: {
gte: params.baselineMin,
lte: params.baselineMax,
format: 'epoch_millis',
},
},
},
{
range: {
[params.timeFieldName]: {
gte: params.deviationMin,
lte: params.deviationMax,
format: 'epoch_millis',
},
},
},
],
},
};
};

export const getCategoryRequest = (
params: AiopsLogRateAnalysisSchema,
fieldName: string,
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
{ wrap }: RandomSamplerWrapper
): estypes.SearchRequest => {
const { index, timeFieldName } = params;

const query = getQueryWithParams({
params,
termFilters: undefined,
filter,
// We're skipping the overall range query here since this
// is covered by the filter which will match docs in both baseline
// and deviation time range via `getBaselineOrDeviationFilter`.
skipRangeQuery: true,
filter: getBaselineOrDeviationFilter(params),
});

const { params: request } = createCategoryRequest(
index,
fieldName,
timeFieldName,
from,
to,
undefined,
undefined,
query,
wrap
);

// In this case we're only interested in the aggregation which
// `createCategoryRequest` returns, so we're re-applying the original
// query we create via `getQueryWithParams` here.
request.body.query = query;

return request;
};

Expand All @@ -64,9 +101,6 @@ export const fetchCategories = async (
esClient: ElasticsearchClient,
params: AiopsLogRateAnalysisSchema,
fieldNames: string[],
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
logger: Logger,
// The default value of 1 means no sampling will be used
sampleProbability: number = 1,
Expand All @@ -82,7 +116,7 @@ export const fetchCategories = async (

const settledPromises = await Promise.allSettled(
fieldNames.map((fieldName) => {
const request = getCategoryRequest(params, fieldName, from, to, filter, randomSamplerWrapper);
const request = getCategoryRequest(params, fieldName, randomSamplerWrapper);
return esClient.search(request, {
signal: abortSignal,
maxRetries: 0,
Expand Down
Loading

0 comments on commit 4d27bfb

Please sign in to comment.