Skip to content

Commit

Permalink
Add synonyms sets information to cluster stats (elastic#97900)
Browse files Browse the repository at this point in the history
  • Loading branch information
carlosdelest authored Jul 27, 2023
1 parent c848f31 commit c0a99ba
Show file tree
Hide file tree
Showing 8 changed files with 458 additions and 17 deletions.
3 changes: 2 additions & 1 deletion docs/reference/cluster/stats.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -1595,7 +1595,8 @@ The API returns the following response:
"built_in_char_filters": [],
"built_in_tokenizers": [],
"built_in_filters": [],
"built_in_analyzers": []
"built_in_analyzers": [],
"synonyms": {}
},
"versions": [
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
---
"get cluster stats returns synonym sets stats":

- skip:
version: " - 8.9.99"
reason: "synonym sets stats are added for v8.10.0"

- do:
cluster.stats: {}

- length: { indices.analysis.synonyms: 0 }

- do:
indices.create:
index: test-index1
body:
settings:
analysis:
filter:
bigram_max_size:
type: length
max: 16
min: 0
synonyms_inline_filter:
type: synonym
synonyms: ["foo bar", "bar => baz"]

other_inline_filter:
type: synonym
synonyms: ["foo bar baz"]

synonyms_path_filter:
type: synonym
synonyms_path: "/a/reused/path"

other_synonyms_path_filter:
type: synonym_graph
synonyms_path: "/a/different/path"

another_synonyms_path_filter:
type: synonym_graph
synonyms_path: "/another/different/path"

synonyms_set_filter:
type: synonym_graph
synonyms_set: reused-synonym-set



- do:
indices.create:
index: test-index2
body:
settings:
analysis:
filter:
en-stem-filter:
name: light_english
type: stemmer
language: light_english

other_synonyms_filter:
type: synonym
synonyms_set: another-synonym-set

a_repeated_synonyms_set_filter:
type: synonym
synonyms_set: reused-synonym-set

repeated_inline_filter:
type: synonym
synonyms: ["foo bar", "bar => baz"]



- do:
indices.create:
index: test-index3
body:
settings:
analysis:
filter:
other_synonyms_filter:
type: synonym
synonyms_set: a-different-synonym-set

a_repeated_synonyms_set_filter:
type: synonym
synonyms_set: reused-synonym-set

more_inline_filter:
type: synonym
synonyms: ["foo bar", "bar => baz"]



- do:
cluster.stats: {}

- length: { indices.analysis.synonyms: 3 }
- match: { indices.analysis.synonyms.synonyms.count: 4 }
- match: { indices.analysis.synonyms.synonyms.index_count: 3 }
- match: { indices.analysis.synonyms.synonyms_path.count: 3 }
- match: { indices.analysis.synonyms.synonyms_path.index_count: 1 }
- match: { indices.analysis.synonyms.synonyms_set.count: 3 }
- match: { indices.analysis.synonyms.synonyms_set.index_count: 3 }
3 changes: 2 additions & 1 deletion server/src/main/java/org/elasticsearch/TransportVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,10 @@ private static TransportVersion registerTransportVersion(int id, String uniqueId
public static final TransportVersion V_8_500_042 = registerTransportVersion(8_500_042, "763b4801-a4fc-47c4-aff5-7f5a757b8a07");
public static final TransportVersion V_8_500_043 = registerTransportVersion(8_500_043, "50baabd14-7f5c-4f8c-9351-94e0d397aabc");
public static final TransportVersion V_8_500_044 = registerTransportVersion(8_500_044, "96b83320-2317-4e9d-b735-356f18c1d76a");
public static final TransportVersion V_8_500_045 = registerTransportVersion(8_500_045, "24a596dd-c843-4c0a-90b3-759697d74026");

private static class CurrentHolder {
private static final TransportVersion CURRENT = findCurrent(V_8_500_044);
private static final TransportVersion CURRENT = findCurrent(V_8_500_045);

// finds the pluggable current version, or uses the given fallback
private static TransportVersion findCurrent(TransportVersion fallback) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.action.admin.cluster.stats;

import org.elasticsearch.TransportVersion;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.MappingMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
Expand All @@ -33,12 +34,19 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;

import static org.elasticsearch.TransportVersion.V_8_500_045;

/**
* Statistics about analysis usage.
*/
public final class AnalysisStats implements ToXContentFragment, Writeable {

private static final TransportVersion SYNONYM_SETS_VERSION = V_8_500_045;

private static final Set<String> SYNONYM_FILTER_TYPES = Set.of("synonym", "synonym_graph");

/**
* Create {@link AnalysisStats} from the given cluster state.
*/
Expand All @@ -51,6 +59,9 @@ public static AnalysisStats of(Metadata metadata, Runnable ensureNotCancelled) {
final Map<String, IndexFeatureStats> usedBuiltInTokenizers = new HashMap<>();
final Map<String, IndexFeatureStats> usedBuiltInTokenFilters = new HashMap<>();
final Map<String, IndexFeatureStats> usedBuiltInAnalyzers = new HashMap<>();
final Map<String, SynonymsStats> usedSynonyms = new HashMap<>();
final Set<String> synonymsIdsUsedInIndices = new HashSet<>();
final Set<String> synonymsIdsUsed = new HashSet<>();

final Map<MappingMetadata, Integer> mappingCounts = new IdentityHashMap<>(metadata.getMappingsByHash().size());
for (IndexMetadata indexMetadata : metadata) {
Expand Down Expand Up @@ -118,6 +129,13 @@ public static AnalysisStats of(Metadata metadata, Runnable ensureNotCancelled) {
Map<String, Settings> tokenFilterSettings = indexSettings.getGroups("index.analysis.filter");
usedBuiltInTokenFilters.keySet().removeAll(tokenFilterSettings.keySet());
aggregateAnalysisTypes(tokenFilterSettings.values(), usedTokenFilterTypes, indexTokenFilterTypes);
aggregateSynonymsStats(
tokenFilterSettings.values(),
usedSynonyms,
indexMetadata.getIndex().getName(),
synonymsIdsUsed,
synonymsIdsUsedInIndices
);
countMapping(mappingCounts, indexMetadata);
}
for (Map.Entry<MappingMetadata, Integer> mappingAndCount : mappingCounts.entrySet()) {
Expand Down Expand Up @@ -147,7 +165,8 @@ public static AnalysisStats of(Metadata metadata, Runnable ensureNotCancelled) {
usedBuiltInCharFilters.values(),
usedBuiltInTokenizers.values(),
usedBuiltInTokenFilters.values(),
usedBuiltInAnalyzers.values()
usedBuiltInAnalyzers.values(),
usedSynonyms
);
}

Expand Down Expand Up @@ -176,6 +195,39 @@ private static void aggregateAnalysisTypes(
}
}

private static void aggregateSynonymsStats(
Collection<Settings> filterSettings,
Map<String, SynonymsStats> synonymsStats,
String indexName,
Set<String> synonymsIdsUsed,
Set<String> synonymIdsUsedInIndices
) {
for (Settings filterComponentSettings : filterSettings) {
final String type = filterComponentSettings.get("type");
if (SYNONYM_FILTER_TYPES.contains(type)) {
boolean isInline = false;
String synonymRuleType = "synonyms_set";
// Avoid requesting settings for synonyms rule type, as it transforms to string a potentially large number of synonym rules
String synonymId = filterComponentSettings.get(synonymRuleType);
if (synonymId == null) {
synonymRuleType = "synonyms_path";
synonymId = filterComponentSettings.get(synonymRuleType);
}
if (synonymId == null) {
synonymRuleType = "synonyms";
isInline = true;
}
SynonymsStats stat = synonymsStats.computeIfAbsent(synonymRuleType, id -> new SynonymsStats());
if (synonymIdsUsedInIndices.add(synonymRuleType + indexName)) {
stat.indexCount++;
}
if (isInline || synonymsIdsUsed.add(synonymRuleType + synonymId)) {
stat.count++;
}
}
}
}

private static Set<IndexFeatureStats> sort(Collection<IndexFeatureStats> set) {
List<IndexFeatureStats> list = new ArrayList<>(set);
list.sort(Comparator.comparing(IndexFeatureStats::getName));
Expand All @@ -185,6 +237,8 @@ private static Set<IndexFeatureStats> sort(Collection<IndexFeatureStats> set) {
private final Set<IndexFeatureStats> usedCharFilters, usedTokenizers, usedTokenFilters, usedAnalyzers;
private final Set<IndexFeatureStats> usedBuiltInCharFilters, usedBuiltInTokenizers, usedBuiltInTokenFilters, usedBuiltInAnalyzers;

private final Map<String, SynonymsStats> usedSynonyms;

AnalysisStats(
Collection<IndexFeatureStats> usedCharFilters,
Collection<IndexFeatureStats> usedTokenizers,
Expand All @@ -193,7 +247,8 @@ private static Set<IndexFeatureStats> sort(Collection<IndexFeatureStats> set) {
Collection<IndexFeatureStats> usedBuiltInCharFilters,
Collection<IndexFeatureStats> usedBuiltInTokenizers,
Collection<IndexFeatureStats> usedBuiltInTokenFilters,
Collection<IndexFeatureStats> usedBuiltInAnalyzers
Collection<IndexFeatureStats> usedBuiltInAnalyzers,
Map<String, SynonymsStats> usedSynonyms
) {
this.usedCharFilters = sort(usedCharFilters);
this.usedTokenizers = sort(usedTokenizers);
Expand All @@ -203,6 +258,7 @@ private static Set<IndexFeatureStats> sort(Collection<IndexFeatureStats> set) {
this.usedBuiltInTokenizers = sort(usedBuiltInTokenizers);
this.usedBuiltInTokenFilters = sort(usedBuiltInTokenFilters);
this.usedBuiltInAnalyzers = sort(usedBuiltInAnalyzers);
this.usedSynonyms = new TreeMap<>(usedSynonyms);
}

public AnalysisStats(StreamInput input) throws IOException {
Expand All @@ -214,6 +270,11 @@ public AnalysisStats(StreamInput input) throws IOException {
usedBuiltInTokenizers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedBuiltInTokenFilters = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
usedBuiltInAnalyzers = Collections.unmodifiableSet(new LinkedHashSet<>(input.readList(IndexFeatureStats::new)));
if (input.getTransportVersion().onOrAfter(SYNONYM_SETS_VERSION)) {
usedSynonyms = input.readImmutableMap(SynonymsStats::new);
} else {
usedSynonyms = Collections.emptyMap();
}
}

@Override
Expand All @@ -226,6 +287,9 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeCollection(usedBuiltInTokenizers);
out.writeCollection(usedBuiltInTokenFilters);
out.writeCollection(usedBuiltInAnalyzers);
if (out.getTransportVersion().onOrAfter(SYNONYM_SETS_VERSION)) {
out.writeMap(usedSynonyms, StreamOutput::writeString, (o, v) -> v.writeTo(o));
}
}

/**
Expand Down Expand Up @@ -284,6 +348,10 @@ public Set<IndexFeatureStats> getUsedBuiltInAnalyzers() {
return usedBuiltInAnalyzers;
}

public Map<String, SynonymsStats> getUsedSynonyms() {
return usedSynonyms;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand All @@ -296,7 +364,8 @@ public boolean equals(Object o) {
&& Objects.equals(usedBuiltInCharFilters, that.usedBuiltInCharFilters)
&& Objects.equals(usedBuiltInTokenizers, that.usedBuiltInTokenizers)
&& Objects.equals(usedBuiltInTokenFilters, that.usedBuiltInTokenFilters)
&& Objects.equals(usedBuiltInAnalyzers, that.usedBuiltInAnalyzers);
&& Objects.equals(usedBuiltInAnalyzers, that.usedBuiltInAnalyzers)
&& Objects.equals(usedSynonyms, that.usedSynonyms);
}

@Override
Expand All @@ -309,7 +378,8 @@ public int hashCode() {
usedBuiltInCharFilters,
usedBuiltInTokenizers,
usedBuiltInTokenFilters,
usedBuiltInAnalyzers
usedBuiltInAnalyzers,
usedSynonyms
);
}

Expand All @@ -333,7 +403,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
toXContentCollection(builder, params, "built_in_tokenizers", usedBuiltInTokenizers);
toXContentCollection(builder, params, "built_in_filters", usedBuiltInTokenFilters);
toXContentCollection(builder, params, "built_in_analyzers", usedBuiltInAnalyzers);
builder.field("synonyms");
builder.map(usedSynonyms);

builder.endObject();

return builder;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,9 @@ public final XContentBuilder toXContent(XContentBuilder builder, Params params)
protected void doXContent(XContentBuilder builder, Params params) throws IOException {

}

@Override
public String toString() {
return "IndexFeatureStats{" + "name='" + name + '\'' + ", count=" + count + ", indexCount=" + indexCount + '}';
}
}
Loading

0 comments on commit c0a99ba

Please sign in to comment.