Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debug around ML rule execution #189307

Closed
wants to merge 15 commits into from
Closed
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* 2.0.
*/

import type { Logger } from '@kbn/core/server';

import type {
IRuleMonitoringService,
RuleExecutionContext,
Expand All @@ -16,18 +18,19 @@ export interface IPreviewRuleExecutionLogger {
}

export const createPreviewRuleExecutionLogger = (
loggedStatusChanges: Array<RuleExecutionContext & StatusChangeArgs>
loggedStatusChanges: Array<RuleExecutionContext & StatusChangeArgs>,
logger?: Logger
): IPreviewRuleExecutionLogger => {
return {
factory: ({ context }) => {
const spyLogger = {
context,

trace: () => {},
debug: () => {},
info: () => {},
warn: () => {},
error: () => {},
trace: logger?.error ?? console.error,
debug: logger?.error ?? console.error,
info: logger?.error ?? console.error,
warn: logger?.error ?? console.error,
error: logger?.error ?? console.error,

logStatusChange: (args: StatusChangeArgs): Promise<void> => {
loggedStatusChanges.push({ ...context, ...args });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,10 @@ export const previewRulesRoute = (
const previewId = uuidv4();
const username = security?.authc.getCurrentUser(request)?.username;
const loggedStatusChanges: Array<RuleExecutionContext & StatusChangeArgs> = [];
const previewRuleExecutionLogger = createPreviewRuleExecutionLogger(loggedStatusChanges);
const previewRuleExecutionLogger = createPreviewRuleExecutionLogger(
loggedStatusChanges,
logger
);
const runState: Record<string, unknown> = {};
const logs: RulePreviewLogs[] = [];
let isAborted = false;
Expand Down Expand Up @@ -299,6 +302,9 @@ export const previewRulesRoute = (
},
})) as { state: TState });

logger.error(
`LOGGED STATUS CHANGES: ${JSON.stringify(loggedStatusChanges, null, 2)}`
);
const errors = loggedStatusChanges
.filter((item) => item.newStatus === RuleExecutionStatusEnum.failed)
.map((item) => item.message ?? 'Unknown Error');
Expand Down Expand Up @@ -455,6 +461,7 @@ export const previewRulesRoute = (
);
break;
case 'machine_learning':
logger.error('IN RULE PREVIEW FOR MACHINE_LEARNING RULE');
const mlAlertType = previewRuleTypeWrapper(createMlAlertType(ruleOptions));
await runExecutors(
mlAlertType.executor,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ export const createMlAlertType = (
isAlertSuppressionActive,
experimentalFeatures,
});

const { logger } = createOptions;
logger.error(`ML EXECUTOR STATE: ${JSON.stringify(state, null, 2)}`);
logger.error(`ML EXECUTOR RESULT: ${JSON.stringify(result, null, 2)}`);
return { ...result, state };
},
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@ export const findMlSignals = async ({
maxRecords: maxSignals,
exceptionFilter,
};
console.error('ML Alert search params:', JSON.stringify(params, null, 2));
return getAnomalies(params, mlAnomalySearch);
};
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ export const mlExecutor = async ({
const ruleParams = completeRule.ruleParams;

return withSecuritySpan('mlExecutor', async () => {
ruleExecutionLogger.error('ML EXECUTOR');
if (ml == null) {
throw new Error('ML plugin unavailable during rule execution');
}
Expand All @@ -87,9 +88,11 @@ export const mlExecutor = async ({
const summaryJobs = await ml
.jobServiceProvider(fakeRequest, services.savedObjectsClient)
.jobsSummary(ruleParams.machineLearningJobId);
ruleExecutionLogger.error('ALL ML JOBS SUMMARY', JSON.stringify(summaryJobs, null, 2));
const jobSummaries = summaryJobs.filter((job) =>
ruleParams.machineLearningJobId.includes(job.id)
);
ruleExecutionLogger.error('RELEVANT ML JOBS SUMMARY', JSON.stringify(jobSummaries, null, 2));

if (
jobSummaries.length < 1 ||
Expand All @@ -114,6 +117,7 @@ export const mlExecutor = async ({

let anomalyResults: AnomalyResults;
try {
ruleExecutionLogger.error('finding ML signals...');
anomalyResults = await findMlSignals({
ml,
// Using fake KibanaRequest as it is needed to satisfy the ML Services API, but can be empty as it is
Expand All @@ -127,6 +131,10 @@ export const mlExecutor = async ({
maxSignals: tuple.maxSignals,
exceptionFilter,
});
ruleExecutionLogger.error(
'ML Alert search results:',
JSON.stringify(anomalyResults, null, 2)
);
} catch (error) {
if (typeof error.message === 'string' && (error.message as string).endsWith('missing')) {
result.userError = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export const waitForRuleStatus = async (
const rule = response.body;
const ruleStatus = rule?.execution_summary?.last_execution.status;
const ruleStatusDate = rule?.execution_summary?.last_execution.date;
log.debug('waitForRuleStatus response', JSON.stringify(rule, null, 2));

return (
rule != null &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
SPACE_IDS,
VERSION,
} from '@kbn/rule-data-utils';
import { DATAFEED_STATE, JOB_STATE } from '@kbn/ml-plugin/common';
import { MachineLearningRuleCreateProps } from '@kbn/security-solution-plugin/common/api/detection_engine';
import {
ALERT_ANCESTORS,
Expand All @@ -46,6 +47,7 @@ import {
previewRule,
previewRuleWithExceptionEntries,
setupMlModulesWithRetry,
waitForAnomalies,
} from '../../../../utils';
import {
createRule,
Expand All @@ -71,6 +73,7 @@ export default ({ getService }: FtrProviderContext) => {
const dataPathBuilder = new EsArchivePathBuilder(isServerless);
const auditPath = dataPathBuilder.getPath('auditbeat/hosts');
const retry = getService('retry');
const ml = getService('ml');

const siemModule = 'security_linux_v3';
const mlJobId = 'v3_linux_anomalous_network_activity';
Expand All @@ -81,6 +84,7 @@ export default ({ getService }: FtrProviderContext) => {
severity: 'critical',
type: 'machine_learning',
anomaly_threshold: 30,
interval: '1m',
machine_learning_job_id: mlJobId,
from: '1900-01-01T00:00:00.000Z',
rule_id: 'ml-rule-id',
Expand All @@ -94,6 +98,14 @@ export default ({ getService }: FtrProviderContext) => {
await setupMlModulesWithRetry({ module: siemModule, supertest, retry });
await forceStartDatafeeds({ jobId: mlJobId, rspCode: 200, supertest });
await esArchiver.load('x-pack/test/functional/es_archives/security_solution/anomalies');
console.log('waiting for job state');
await ml.api.waitForJobState(mlJobId, JOB_STATE.OPENED);
console.log('waiting for datafeed state');
await ml.api.waitForDatafeedState(`datafeed-${mlJobId}`, DATAFEED_STATE.STARTED);
console.log('done waiting for ML state');
console.log('waiting for anomalies');
await waitForAnomalies({ es, jobId: mlJobId, retry });
console.log('done waiting for anomalies');
});

after(async () => {
Expand Down Expand Up @@ -205,6 +217,7 @@ export default ({ getService }: FtrProviderContext) => {
.expect(200);

const ruleResponse = response.body;
console.log('missing job ruleResponse', JSON.stringify(ruleResponse, null, 2));
expect(ruleResponse.execution_summary.last_execution.message.includes('missing')).toEqual(
true
);
Expand Down Expand Up @@ -342,6 +355,21 @@ export default ({ getService }: FtrProviderContext) => {
it('should be enriched alert with criticality_level', async () => {
const { previewId } = await previewRule({ supertest, rule });
const previewAlerts = await getPreviewAlerts({ es, previewId });
const allPreviewAlerts = await es.search({
index: '.preview.alerts-security.alerts-*',
size: 1000,
query: {
bool: {
filter: {
term: {
'kibana.alert.rule.type': 'machine_learning',
},
},
},
},
});

console.log('preview alerts', JSON.stringify(allPreviewAlerts, null, 2));

expect(previewAlerts).toHaveLength(1);
expect(previewAlerts[0]._source).toEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export const getMetricsWithRetry = (
reset: boolean = false,
callback?: (metrics: NodeMetrics) => boolean
): Promise<NodeMetrics> => {
return retry.try(async () => {
return retry.tryForTime(6 * 60 * 1000, async () => {
const metrics = await getMetricsRequest(request, reset);

if (metrics.metrics) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

import type SuperTest from 'supertest';
import type { Client } from '@elastic/elasticsearch';
import { RetryService } from '@kbn/ftr-common-functional-services';
import { ML_GROUP_ID } from '@kbn/security-solution-plugin/common/constants';
import { getCommonRequestHeader } from '../../../../../functional/services/ml/common_api';
Expand Down Expand Up @@ -92,3 +93,42 @@ export const forceStartDatafeeds = async ({

return body;
};

export const waitForAnomalies = async ({
es,
jobId,
retry,
}: {
es: Client;
jobId: string;
retry: RetryService;
}) =>
retry.try(async () => {
const anomaliesResponse = await es.search({
index: '.ml-anomalies-*',
body: {
query: {
bool: {
filter: [
{ term: { job_id: jobId } },
{
query_string: {
query: 'result_type:record',
analyze_wildcard: false,
},
},
{ term: { is_interim: false } },
],
},
},
},
});

const anomalies = anomaliesResponse.hits?.hits ?? [];

if (!anomalies.length) {
throw new Error('No anomalies found');
}

return anomalies;
});