-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds instrumentation for search path #8408
Changes from 6 commits
7cd788f
a4d3ba8
ee33785
1444655
40b6043
610af40
3dc40eb
12835ee
479a937
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,6 +87,9 @@ | |
import org.opensearch.tasks.CancellableTask; | ||
import org.opensearch.tasks.Task; | ||
import org.opensearch.tasks.TaskId; | ||
import org.opensearch.telemetry.tracing.SpanScope; | ||
import org.opensearch.telemetry.tracing.TracerFactory; | ||
import org.opensearch.telemetry.tracing.listener.TracingActionListener; | ||
import org.opensearch.threadpool.ThreadPool; | ||
import org.opensearch.transport.RemoteClusterAware; | ||
import org.opensearch.transport.RemoteClusterService; | ||
|
@@ -156,6 +159,7 @@ public class TransportSearchAction extends HandledTransportAction<SearchRequest, | |
private final NamedWriteableRegistry namedWriteableRegistry; | ||
private final CircuitBreaker circuitBreaker; | ||
private final SearchPipelineService searchPipelineService; | ||
private final TracerFactory tracerFactory; | ||
|
||
@Inject | ||
public TransportSearchAction( | ||
|
@@ -170,7 +174,8 @@ public TransportSearchAction( | |
ActionFilters actionFilters, | ||
IndexNameExpressionResolver indexNameExpressionResolver, | ||
NamedWriteableRegistry namedWriteableRegistry, | ||
SearchPipelineService searchPipelineService | ||
SearchPipelineService searchPipelineService, | ||
TracerFactory tracerFactory | ||
) { | ||
super(SearchAction.NAME, transportService, actionFilters, (Writeable.Reader<SearchRequest>) SearchRequest::new); | ||
this.client = client; | ||
|
@@ -185,6 +190,7 @@ public TransportSearchAction( | |
this.indexNameExpressionResolver = indexNameExpressionResolver; | ||
this.namedWriteableRegistry = namedWriteableRegistry; | ||
this.searchPipelineService = searchPipelineService; | ||
this.tracerFactory = tracerFactory; | ||
} | ||
|
||
private Map<String, AliasFilter> buildPerIndexAliasFilter( | ||
|
@@ -286,7 +292,9 @@ protected void doExecute(Task task, SearchRequest searchRequest, ActionListener< | |
listener | ||
); | ||
} | ||
executeRequest(task, searchRequest, this::searchAsyncAction, listener); | ||
SpanScope scope = tracerFactory.getTracer().startSpan("SearchTask_" + task.getId()); | ||
TracingActionListener tracingActionListener = new TracingActionListener(tracerFactory, listener, scope); | ||
executeRequest(task, searchRequest, this::searchAsyncAction, tracingActionListener); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you please share a sample tracing span, how will that look like and what attributes will be collected. Also how other transport action will be instrumented with any context being passed from client? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shwetathareja, Here are the sample spans.
Yes, we will add instrumentations in the foundational classes like RestAction, TransportAction and TaskManager etc. |
||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -135,6 +135,8 @@ | |
import org.opensearch.search.sort.SortOrder; | ||
import org.opensearch.search.suggest.Suggest; | ||
import org.opensearch.search.suggest.completion.CompletionSuggestion; | ||
import org.opensearch.telemetry.tracing.SpanScope; | ||
import org.opensearch.telemetry.tracing.TracerFactory; | ||
import org.opensearch.threadpool.Scheduler.Cancellable; | ||
import org.opensearch.threadpool.ThreadPool; | ||
import org.opensearch.threadpool.ThreadPool.Names; | ||
|
@@ -303,6 +305,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv | |
private final AtomicInteger openPitContexts = new AtomicInteger(); | ||
private final String sessionId = UUIDs.randomBase64UUID(); | ||
private final Executor indexSearcherExecutor; | ||
private final TracerFactory tracerFactory; | ||
|
||
public SearchService( | ||
ClusterService clusterService, | ||
|
@@ -314,7 +317,8 @@ public SearchService( | |
FetchPhase fetchPhase, | ||
ResponseCollectorService responseCollectorService, | ||
CircuitBreakerService circuitBreakerService, | ||
Executor indexSearcherExecutor | ||
Executor indexSearcherExecutor, | ||
TracerFactory tracerFactory | ||
) { | ||
Settings settings = clusterService.getSettings(); | ||
this.threadPool = threadPool; | ||
|
@@ -362,6 +366,7 @@ public SearchService( | |
|
||
lowLevelCancellation = LOW_LEVEL_CANCELLATION_SETTING.get(settings); | ||
clusterService.getClusterSettings().addSettingsUpdateConsumer(LOW_LEVEL_CANCELLATION_SETTING, this::setLowLevelCancellation); | ||
this.tracerFactory = tracerFactory; | ||
} | ||
|
||
private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) { | ||
|
@@ -590,13 +595,18 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh | |
Releasable ignored = readerContext.markAsUsed(getKeepAlive(request)); | ||
SearchContext context = createContext(readerContext, request, task, true) | ||
) { | ||
final long afterQueryTime; | ||
try (SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(context)) { | ||
long afterQueryTime; | ||
final SpanScope spanScope = tracerFactory.getTracer().startSpan("QueryPhase_" + context.shardTarget().getShardId()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Gaganjuneja we need to change the
and never ask for tracer again, we need to wrap around instead. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opened a bug #8561 to address this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @reta With this change, shall we use |
||
try (SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(context); spanScope) { | ||
addtracingAttributes(spanScope, context); | ||
loadOrExecuteQueryPhase(request, context); | ||
if (context.queryResult().hasSearchContext() == false && readerContext.singleSession()) { | ||
freeReaderContext(readerContext.id()); | ||
} | ||
afterQueryTime = executor.success(); | ||
} catch (Exception e) { | ||
spanScope.setError(e); | ||
throw e; | ||
} | ||
if (request.numberOfShards() == 1) { | ||
return executeFetchPhase(readerContext, context, afterQueryTime); | ||
|
@@ -621,14 +631,24 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh | |
} | ||
} | ||
|
||
private void addtracingAttributes(SpanScope scope, SearchContext context) { | ||
scope.addSpanAttribute("shard_id", context.shardTarget().getShardId().getId()); | ||
scope.addSpanAttribute("node_id", context.shardTarget().getNodeId()); | ||
} | ||
|
||
private QueryFetchSearchResult executeFetchPhase(ReaderContext reader, SearchContext context, long afterQueryTime) { | ||
try (SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(context, true, afterQueryTime)) { | ||
final SpanScope spanScope = tracerFactory.getTracer().startSpan("FetchPhase_" + context.shardTarget().getShardId()); | ||
try (SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(context, true, afterQueryTime); spanScope) { | ||
addtracingAttributes(spanScope, context); | ||
shortcutDocIdsToLoad(context); | ||
fetchPhase.execute(context); | ||
if (reader.singleSession()) { | ||
freeReaderContext(reader.id()); | ||
} | ||
executor.success(); | ||
} catch (Exception e) { | ||
spanScope.setError(e); | ||
Gaganjuneja marked this conversation as resolved.
Show resolved
Hide resolved
|
||
throw e; | ||
} | ||
return new QueryFetchSearchResult(context.queryResult(), context.fetchResult()); | ||
} | ||
|
@@ -649,10 +669,13 @@ public void executeQueryPhase( | |
} | ||
runAsync(getExecutor(readerContext.indexShard()), () -> { | ||
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null); | ||
final SpanScope spanScope = tracerFactory.getTracer().startSpan("QueryPhase_" + shardSearchRequest.shardId().getId()); | ||
try ( | ||
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, false); | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext) | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext); | ||
spanScope | ||
) { | ||
addtracingAttributes(spanScope, searchContext); | ||
searchContext.searcher().setAggregatedDfs(readerContext.getAggregatedDfs(null)); | ||
processScroll(request, readerContext, searchContext); | ||
queryPhase.execute(searchContext); | ||
|
@@ -661,6 +684,7 @@ public void executeQueryPhase( | |
return new ScrollQuerySearchResult(searchContext.queryResult(), searchContext.shardTarget()); | ||
} catch (Exception e) { | ||
logger.trace("Query phase failed", e); | ||
spanScope.setError(e); | ||
// we handle the failure in the failure listener below | ||
throw e; | ||
} | ||
|
@@ -673,10 +697,13 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, | |
final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest)); | ||
runAsync(getExecutor(readerContext.indexShard()), () -> { | ||
readerContext.setAggregatedDfs(request.dfs()); | ||
final SpanScope spanScope = tracerFactory.getTracer().startSpan("QueryPhase_" + shardSearchRequest.shardId().getId()); | ||
try ( | ||
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, true); | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext) | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext); | ||
spanScope | ||
) { | ||
addtracingAttributes(spanScope, searchContext); | ||
searchContext.searcher().setAggregatedDfs(request.dfs()); | ||
queryPhase.execute(searchContext); | ||
if (searchContext.queryResult().hasSearchContext() == false && readerContext.singleSession()) { | ||
|
@@ -692,6 +719,7 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, | |
return searchContext.queryResult(); | ||
} catch (Exception e) { | ||
assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); | ||
spanScope.setError(e); | ||
logger.trace("Query phase failed", e); | ||
// we handle the failure in the failure listener below | ||
throw e; | ||
|
@@ -728,10 +756,13 @@ public void executeFetchPhase( | |
} | ||
runAsync(getExecutor(readerContext.indexShard()), () -> { | ||
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null); | ||
final SpanScope spanScope = tracerFactory.getTracer().startSpan("FetchPhase_" + shardSearchRequest.shardId().getId()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. append "Scroll" to fetch & query phase in the span name |
||
try ( | ||
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, false); | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext) | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext); | ||
spanScope | ||
) { | ||
addtracingAttributes(spanScope, searchContext); | ||
searchContext.assignRescoreDocIds(readerContext.getRescoreDocIds(null)); | ||
searchContext.searcher().setAggregatedDfs(readerContext.getAggregatedDfs(null)); | ||
processScroll(request, readerContext, searchContext); | ||
|
@@ -741,6 +772,7 @@ public void executeFetchPhase( | |
return new ScrollQueryFetchSearchResult(fetchSearchResult, searchContext.shardTarget()); | ||
} catch (Exception e) { | ||
assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); | ||
spanScope.setError(e); | ||
logger.trace("Fetch phase failed", e); | ||
// we handle the failure in the failure listener below | ||
throw e; | ||
|
@@ -760,14 +792,20 @@ public void executeFetchPhase(ShardFetchRequest request, SearchShardTask task, A | |
searchContext.assignRescoreDocIds(readerContext.getRescoreDocIds(request.getRescoreDocIds())); | ||
searchContext.searcher().setAggregatedDfs(readerContext.getAggregatedDfs(request.getAggregatedDfs())); | ||
searchContext.docIdsToLoad(request.docIds(), 0, request.docIdsSize()); | ||
final SpanScope spanScope = tracerFactory.getTracer().startSpan("FetchPhase_" + searchContext.shardTarget().getShardId()); | ||
try ( | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext, true, System.nanoTime()) | ||
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext, true, System.nanoTime()); | ||
spanScope | ||
) { | ||
addtracingAttributes(spanScope, searchContext); | ||
fetchPhase.execute(searchContext); | ||
if (readerContext.singleSession()) { | ||
freeReaderContext(request.contextId()); | ||
} | ||
executor.success(); | ||
} catch (Exception e) { | ||
spanScope.setError(e); | ||
Gaganjuneja marked this conversation as resolved.
Show resolved
Hide resolved
|
||
throw e; | ||
} | ||
return searchContext.fetchResult(); | ||
} catch (Exception e) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.telemetry.tracing.listener; | ||
|
||
import org.opensearch.action.ActionListener; | ||
import org.opensearch.telemetry.tracing.SpanScope; | ||
import org.opensearch.telemetry.tracing.TracerFactory; | ||
|
||
/** | ||
* Handles the tracing scope and delegate the request to the action listener. | ||
* @param <Response> response. | ||
*/ | ||
public class TracingActionListener<Response> implements ActionListener<Response> { | ||
|
||
private final ActionListener<Response> delegate; | ||
private final SpanScope spanScope; | ||
private final TracerFactory tracerFactory; | ||
|
||
/** | ||
* Creates instance. | ||
* @param tracerFactory tracer factory | ||
* @param delegate action listener to be delegated | ||
* @param spanScope tracer scope. | ||
*/ | ||
public TracingActionListener(TracerFactory tracerFactory, ActionListener<Response> delegate, SpanScope spanScope) { | ||
this.tracerFactory = tracerFactory; | ||
this.delegate = delegate; | ||
this.spanScope = spanScope; | ||
} | ||
|
||
@Override | ||
public void onResponse(Response response) { | ||
try (spanScope) { | ||
delegate.onResponse(response); | ||
} | ||
} | ||
|
||
@Override | ||
public void onFailure(Exception e) { | ||
try (spanScope) { | ||
spanScope.setError(e); | ||
delegate.onFailure(e); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
/** | ||
* This package contains classes needed for telemetry. | ||
*/ | ||
package org.opensearch.telemetry.tracing.listener; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TracingAwareListenerWrapper.
I am not an expert on search code path but do you think instrumenting
SearchOperationListener
would easy / clean access to different phases?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@shwetathareja I tried implementing SearchOperationListener for instrumenting query and fetch phase but since now we are returning the SpanScope from the startSpan, hence we need to maintain the state (SpanScope returned from the preQueryPhase needs to be cached for ending in the onQueryPhase method.). We can do that in the ThreadLocal since this runs in a single threaded fashion as far as I understood. Need your thoughts on that? @reta