Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] AIOps Log Rate Analysis: Improve query generators #171008

Merged
merged 11 commits into from
Nov 16, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
* 2.0.
*/

import React, { useEffect, useState, type FC } from 'react';
import { isEqual } from 'lodash';
import React, { useEffect, useMemo, useState, type FC } from 'react';
import { EuiEmptyPrompt, EuiHorizontalRule, EuiPanel } from '@elastic/eui';
import type { Moment } from 'moment';

Expand All @@ -32,7 +33,18 @@ import {
import type { GroupTableItem } from '../../log_rate_analysis_results_table/types';
import { useLogRateAnalysisResultsTableRowContext } from '../../log_rate_analysis_results_table/log_rate_analysis_results_table_row_provider';

const DEFAULT_SEARCH_QUERY = { match_all: {} };
const DEFAULT_SEARCH_QUERY: estypes.QueryDslQueryContainer = { match_all: {} };
const DEFAULT_SEARCH_BAR_QUERY: estypes.QueryDslQueryContainer = {
bool: {
filter: [],
must: [
{
match_all: {},
},
],
must_not: [],
},
};

export function getDocumentCountStatsSplitLabel(
significantItem?: SignificantItem,
Expand Down Expand Up @@ -93,6 +105,13 @@ export const LogRateAnalysisContent: FC<LogRateAnalysisContentProps> = ({
setIsBrushCleared(windowParameters === undefined);
}, [windowParameters]);

// Checks if `esSearchQuery` is the default empty query passed on from the search bar
// and if that's the case fall back to a simpler match all query.
const searchQuery = useMemo(
() => (isEqual(esSearchQuery, DEFAULT_SEARCH_BAR_QUERY) ? DEFAULT_SEARCH_QUERY : esSearchQuery),
[esSearchQuery]
);

const {
currentSelectedSignificantItem,
currentSelectedGroup,
Expand All @@ -105,7 +124,7 @@ export const LogRateAnalysisContent: FC<LogRateAnalysisContentProps> = ({
const { documentStats, earliest, latest } = useData(
dataView,
'log_rate_analysis',
esSearchQuery,
searchQuery,
setGlobalState,
currentSelectedSignificantItem,
currentSelectedGroup,
Expand Down Expand Up @@ -170,7 +189,7 @@ export const LogRateAnalysisContent: FC<LogRateAnalysisContentProps> = ({
stickyHistogram={stickyHistogram}
onReset={clearSelection}
sampleProbability={sampleProbability}
searchQuery={esSearchQuery}
searchQuery={searchQuery}
windowParameters={windowParameters}
barColorOverride={barColorOverride}
barHighlightColorOverride={barHighlightColorOverride}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const paramsMock = {
index: 'the-index',
timeFieldName: 'the-time-field-name',
start: 0,
end: 50,
baselineMin: 10,
baselineMax: 20,
deviationMin: 30,
deviationMax: 40,
includeFrozen: false,
searchQuery: '{ "match_all": {} }',
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { AiopsLogRateAnalysisSchema } from '../../../../../common/api/log_rate_analysis/schema';

import { paramsMock } from './params_match_all';
import { searchQueryMock } from './search_query';

export const paramsSearchQueryMock: AiopsLogRateAnalysisSchema = {
...paramsMock,
searchQuery: searchQueryMock,
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

// This is the format that gets passed on from the Kibana search bar.
export const searchQueryMock = JSON.stringify({
bool: {
filter: [],
minimum_should_match: 1,
must_not: [],
should: [{ term: { 'the-term': { value: 'the-value' } } }],
},
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';

import { paramsMock } from './__mocks__/params_match_all';
import { getBaselineOrDeviationFilter, getCategoryRequest } from './fetch_categories';

describe('getBaselineOrDeviationFilter', () => {
it('returns a filter that matches both baseline and deviation time range', () => {
const baselineOrDeviationFilter = getBaselineOrDeviationFilter(paramsMock);

expect(baselineOrDeviationFilter).toEqual({
bool: {
should: [
{
range: {
'the-time-field-name': { gte: 10, lte: 20, format: 'epoch_millis' },
},
},
{
range: {
'the-time-field-name': { gte: 30, lte: 40, format: 'epoch_millis' },
},
},
],
},
});
});
});

describe('getCategoryRequest', () => {
it('returns the category request', () => {
const randomSamplerWrapper = createRandomSamplerWrapper({
probability: 0.1,
seed: 1234,
});

const query = getCategoryRequest(paramsMock, 'the-field-name', randomSamplerWrapper);

// Because the time range filter is covered by the should clauses that cover both
// baseline (10,20) and deviation (30,40), we expect that there is no other
// time range filter whatsoever, for example for start/end (0,50).
expect(query).toEqual({
index: 'the-index',
size: 0,
body: {
query: {
bool: {
filter: [
{
bool: {
should: [
{
range: {
'the-time-field-name': {
gte: 10,
lte: 20,
format: 'epoch_millis',
},
},
},
{
range: {
'the-time-field-name': {
gte: 30,
lte: 40,
format: 'epoch_millis',
},
},
},
],
},
},
],
},
},
aggs: {
sample: {
random_sampler: { probability: 0.1, seed: 1234 },
aggs: {
categories: {
categorize_text: { field: 'the-field-name', size: 1000 },
aggs: {
hit: {
top_hits: { size: 1, sort: ['the-time-field-name'], _source: 'the-field-name' },
},
},
},
},
},
},
},
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,67 @@ import { isRequestAbortedError } from '../../../lib/is_request_aborted_error';

import { getQueryWithParams } from './get_query_with_params';

// Filter that includes docs from both the baseline and deviation time range.
export const getBaselineOrDeviationFilter = (
params: AiopsLogRateAnalysisSchema
): estypes.QueryDslQueryContainer => {
return {
bool: {
should: [
{
range: {
[params.timeFieldName]: {
gte: params.baselineMin,
lte: params.baselineMax,
format: 'epoch_millis',
},
},
},
{
range: {
[params.timeFieldName]: {
gte: params.deviationMin,
lte: params.deviationMax,
format: 'epoch_millis',
},
},
},
],
},
};
};

export const getCategoryRequest = (
params: AiopsLogRateAnalysisSchema,
fieldName: string,
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
{ wrap }: RandomSamplerWrapper
): estypes.SearchRequest => {
const { index, timeFieldName } = params;

const query = getQueryWithParams({
params,
termFilters: undefined,
filter,
// We're skipping the overall range query here since this
// is covered by the filter which will match docs in both baseline
// and deviation time range via `getBaselineOrDeviationFilter`.
skipRangeQuery: true,
filter: getBaselineOrDeviationFilter(params),
});

const { params: request } = createCategoryRequest(
index,
fieldName,
timeFieldName,
from,
to,
undefined,
undefined,
query,
wrap
);

// In this case we're only interested in the aggregation which
// `createCategoryRequest` returns, so we're re-applying the original
// query we create via `getQueryWithParams` here.
request.body.query = query;

return request;
};

Expand All @@ -64,9 +101,6 @@ export const fetchCategories = async (
esClient: ElasticsearchClient,
params: AiopsLogRateAnalysisSchema,
fieldNames: string[],
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
logger: Logger,
// The default value of 1 means no sampling will be used
sampleProbability: number = 1,
Expand All @@ -82,7 +116,7 @@ export const fetchCategories = async (

const settledPromises = await Promise.allSettled(
fieldNames.map((fieldName) => {
const request = getCategoryRequest(params, fieldName, from, to, filter, randomSamplerWrapper);
const request = getCategoryRequest(params, fieldName, randomSamplerWrapper);
return esClient.search(request, {
signal: abortSignal,
maxRetries: 0,
Expand Down
Loading
Loading