diff --git a/server/src/internalClusterTest/java/org/elasticsearch/monitor/metrics/IndicesMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/monitor/metrics/IndicesMetricsIT.java new file mode 100644 index 0000000000000..c9cff473353d5 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/monitor/metrics/IndicesMetricsIT.java @@ -0,0 +1,245 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.monitor.metrics; + +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.PluginsService; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.TestTelemetryPlugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.hamcrest.Matcher; + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.mapper.DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasSize; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0) +public class IndicesMetricsIT extends ESIntegTestCase { + + public static class TestAPMInternalSettings extends Plugin { + @Override + public List> getSettings() { + return List.of( + Setting.timeSetting("telemetry.agent.metrics_interval", TimeValue.timeValueSeconds(0), Setting.Property.NodeScope) + ); + } + } + + @Override + protected Collection> nodePlugins() { + return List.of(TestTelemetryPlugin.class, TestAPMInternalSettings.class); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put("telemetry.agent.metrics_interval", TimeValue.timeValueSeconds(0)) // disable metrics cache refresh delay + .build(); + } + + static final String STANDARD_INDEX_COUNT = "es.indices.standard.total"; + static final String STANDARD_DOCS_COUNT = "es.indices.standard.docs.total"; + static final String STANDARD_BYTES_SIZE = "es.indices.standard.bytes.total"; + + static final String TIME_SERIES_INDEX_COUNT = "es.indices.time_series.total"; + static final String TIME_SERIES_DOCS_COUNT = "es.indices.time_series.docs.total"; + static final String TIME_SERIES_BYTES_SIZE = "es.indices.time_series.bytes.total"; + + static final String LOGSDB_INDEX_COUNT = "es.indices.logsdb.total"; + static final String LOGSDB_DOCS_COUNT = "es.indices.logsdb.docs.total"; + static final String LOGSDB_BYTES_SIZE = "es.indices.logsdb.bytes.total"; + + public void testIndicesMetrics() { + String node = internalCluster().startNode(); + ensureStableCluster(1); + final TestTelemetryPlugin telemetry = internalCluster().getInstance(PluginsService.class, node) + .filterPlugins(TestTelemetryPlugin.class) + .findFirst() + .orElseThrow(); + telemetry.resetMeter(); + long numStandardIndices = randomIntBetween(1, 5); + long numStandardDocs = populateStandardIndices(numStandardIndices); + collectThenAssertMetrics( + telemetry, + 1, + Map.of( + STANDARD_INDEX_COUNT, + equalTo(numStandardIndices), + STANDARD_DOCS_COUNT, + equalTo(numStandardDocs), + STANDARD_BYTES_SIZE, + greaterThan(0L), + + TIME_SERIES_INDEX_COUNT, + equalTo(0L), + TIME_SERIES_DOCS_COUNT, + equalTo(0L), + TIME_SERIES_BYTES_SIZE, + equalTo(0L), + + LOGSDB_INDEX_COUNT, + equalTo(0L), + LOGSDB_DOCS_COUNT, + equalTo(0L), + LOGSDB_BYTES_SIZE, + equalTo(0L) + ) + ); + + long numTimeSeriesIndices = randomIntBetween(1, 5); + long numTimeSeriesDocs = populateTimeSeriesIndices(numTimeSeriesIndices); + collectThenAssertMetrics( + telemetry, + 2, + Map.of( + STANDARD_INDEX_COUNT, + equalTo(numStandardIndices), + STANDARD_DOCS_COUNT, + equalTo(numStandardDocs), + STANDARD_BYTES_SIZE, + greaterThan(0L), + + TIME_SERIES_INDEX_COUNT, + equalTo(numTimeSeriesIndices), + TIME_SERIES_DOCS_COUNT, + equalTo(numTimeSeriesDocs), + TIME_SERIES_BYTES_SIZE, + greaterThan(20L), + + LOGSDB_INDEX_COUNT, + equalTo(0L), + LOGSDB_DOCS_COUNT, + equalTo(0L), + LOGSDB_BYTES_SIZE, + equalTo(0L) + ) + ); + + long numLogsdbIndices = randomIntBetween(1, 5); + long numLogsdbDocs = populateLogsdbIndices(numLogsdbIndices); + collectThenAssertMetrics( + telemetry, + 3, + Map.of( + STANDARD_INDEX_COUNT, + equalTo(numStandardIndices), + STANDARD_DOCS_COUNT, + equalTo(numStandardDocs), + STANDARD_BYTES_SIZE, + greaterThan(0L), + + TIME_SERIES_INDEX_COUNT, + equalTo(numTimeSeriesIndices), + TIME_SERIES_DOCS_COUNT, + equalTo(numTimeSeriesDocs), + TIME_SERIES_BYTES_SIZE, + greaterThan(20L), + + LOGSDB_INDEX_COUNT, + equalTo(numLogsdbIndices), + LOGSDB_DOCS_COUNT, + equalTo(numLogsdbDocs), + LOGSDB_BYTES_SIZE, + greaterThan(0L) + ) + ); + } + + void collectThenAssertMetrics(TestTelemetryPlugin telemetry, int times, Map> matchers) { + telemetry.collect(); + for (Map.Entry> e : matchers.entrySet()) { + String name = e.getKey(); + List measurements = telemetry.getLongGaugeMeasurement(name); + assertThat(name, measurements, hasSize(times)); + assertThat(name, measurements.get(measurements.size() - 1).getLong(), e.getValue()); + } + } + + int populateStandardIndices(long numIndices) { + int totalDocs = 0; + for (int i = 0; i < numIndices; i++) { + String indexName = "standard-" + i; + createIndex(indexName); + int numDocs = between(1, 5); + for (int d = 0; d < numDocs; d++) { + indexDoc(indexName, Integer.toString(d), "f", Integer.toString(d)); + } + totalDocs += numDocs; + flush(indexName); + } + return totalDocs; + } + + int populateTimeSeriesIndices(long numIndices) { + int totalDocs = 0; + for (int i = 0; i < numIndices; i++) { + String indexName = "time_series-" + i; + Settings settings = Settings.builder().put("mode", "time_series").putList("routing_path", List.of("host")).build(); + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings(settings) + .setMapping( + "@timestamp", + "type=date", + "host", + "type=keyword,time_series_dimension=true", + "cpu", + "type=long,time_series_metric=gauge" + ) + .get(); + long timestamp = DEFAULT_DATE_TIME_FORMATTER.parseMillis("2024-04-15T00:00:00Z"); + int numDocs = between(1, 5); + for (int d = 0; d < numDocs; d++) { + timestamp += between(1, 5) * 1000L; + client().prepareIndex(indexName) + .setSource("@timestamp", timestamp, "host", randomFrom("prod", "qa"), "cpu", randomIntBetween(1, 100)) + .get(); + } + totalDocs += numDocs; + flush(indexName); + } + return totalDocs; + } + + int populateLogsdbIndices(long numIndices) { + int totalDocs = 0; + for (int i = 0; i < numIndices; i++) { + String indexName = "logsdb-" + i; + Settings settings = Settings.builder().put("mode", "logsdb").build(); + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings(settings) + .setMapping("@timestamp", "type=date", "host.name", "type=keyword", "cpu", "type=long") + .get(); + long timestamp = DEFAULT_DATE_TIME_FORMATTER.parseMillis("2024-04-15T00:00:00Z"); + int numDocs = between(1, 5); + for (int d = 0; d < numDocs; d++) { + timestamp += between(1, 5) * 1000L; + client().prepareIndex(indexName) + .setSource("@timestamp", timestamp, "host.name", randomFrom("prod", "qa"), "cpu", randomIntBetween(1, 100)) + .get(); + } + totalDocs += numDocs; + flush(indexName); + } + return totalDocs; + } +} diff --git a/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java b/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java new file mode 100644 index 0000000000000..17e290283d5e0 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java @@ -0,0 +1,177 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.monitor.metrics; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.AlreadyClosedException; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.common.util.SingleObjectCache; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.shard.IllegalIndexShardStateException; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.telemetry.metric.LongWithAttributes; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; + +/** + * {@link IndicesMetrics} monitors index statistics on an Elasticsearch node and exposes them as metrics + * through the provided {@link MeterRegistry}. It tracks the current total number of indices, document count, and + * store size (in bytes) for each index mode. + */ +public class IndicesMetrics extends AbstractLifecycleComponent { + private final Logger logger = LogManager.getLogger(IndicesMetrics.class); + private final MeterRegistry registry; + private final List metrics = new ArrayList<>(); + private final IndicesStatsCache stateCache; + + public IndicesMetrics(MeterRegistry meterRegistry, IndicesService indicesService, TimeValue metricsInterval) { + this.registry = meterRegistry; + // Use half of the update interval to ensure that results aren't cached across updates, + // while preventing the cache from expiring when reading different gauges within the same update. + var cacheExpiry = new TimeValue(metricsInterval.getMillis() / 2); + this.stateCache = new IndicesStatsCache(indicesService, cacheExpiry); + } + + private static List registerAsyncMetrics(MeterRegistry registry, IndicesStatsCache cache) { + List metrics = new ArrayList<>(IndexMode.values().length * 3); + assert IndexMode.values().length == 3 : "index modes have changed"; + for (IndexMode indexMode : IndexMode.values()) { + String name = indexMode.getName(); + metrics.add( + registry.registerLongGauge( + "es.indices." + name + ".total", + "total number of " + name + " indices", + "unit", + () -> new LongWithAttributes(cache.getOrRefresh().get(indexMode).numIndices) + ) + ); + metrics.add( + registry.registerLongGauge( + "es.indices." + name + ".docs.total", + "total documents of " + name + " indices", + "unit", + () -> new LongWithAttributes(cache.getOrRefresh().get(indexMode).numDocs) + ) + ); + metrics.add( + registry.registerLongGauge( + "es.indices." + name + ".bytes.total", + "total size in bytes of " + name + " indices", + "unit", + () -> new LongWithAttributes(cache.getOrRefresh().get(indexMode).numBytes) + ) + ); + } + return metrics; + } + + @Override + protected void doStart() { + metrics.addAll(registerAsyncMetrics(registry, stateCache)); + } + + @Override + protected void doStop() { + stateCache.stopRefreshing(); + } + + @Override + protected void doClose() throws IOException { + metrics.forEach(metric -> { + try { + metric.close(); + } catch (Exception e) { + logger.warn("metrics close() method should not throw Exception", e); + } + }); + } + + static class IndexStats { + int numIndices = 0; + long numDocs = 0; + long numBytes = 0; + } + + private static class IndicesStatsCache extends SingleObjectCache> { + private static final Map MISSING_STATS; + static { + MISSING_STATS = new EnumMap<>(IndexMode.class); + for (IndexMode value : IndexMode.values()) { + MISSING_STATS.put(value, new IndexStats()); + } + } + + private boolean refresh; + private final IndicesService indicesService; + + IndicesStatsCache(IndicesService indicesService, TimeValue interval) { + super(interval, MISSING_STATS); + this.indicesService = indicesService; + this.refresh = true; + } + + private Map internalGetIndicesStats() { + Map stats = new EnumMap<>(IndexMode.class); + for (IndexMode mode : IndexMode.values()) { + stats.put(mode, new IndexStats()); + } + for (IndexService indexService : indicesService) { + for (IndexShard indexShard : indexService) { + if (indexShard.isSystem()) { + continue; // skip system indices + } + final ShardRouting shardRouting = indexShard.routingEntry(); + if (shardRouting.primary() == false) { + continue; // count primaries only + } + if (shardRouting.recoverySource() != null) { + continue; // exclude relocating shards + } + final IndexMode indexMode = indexShard.indexSettings().getMode(); + final IndexStats indexStats = stats.get(indexMode); + if (shardRouting.shardId().id() == 0) { + indexStats.numIndices++; + } + try { + indexStats.numDocs += indexShard.commitStats().getNumDocs(); + indexStats.numBytes += indexShard.storeStats().sizeInBytes(); + } catch (IllegalIndexShardStateException | AlreadyClosedException ignored) { + // ignored + } + } + } + return stats; + } + + @Override + protected Map refresh() { + return refresh ? internalGetIndicesStats() : getNoRefresh(); + } + + @Override + protected boolean needsRefresh() { + return getNoRefresh() == MISSING_STATS || super.needsRefresh(); + } + + void stopRefreshing() { + this.refresh = false; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 1447ac1c5b59b..5024cc5468866 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -66,6 +66,7 @@ import org.elasticsearch.injection.guice.Injector; import org.elasticsearch.monitor.fs.FsHealthService; import org.elasticsearch.monitor.jvm.JvmInfo; +import org.elasticsearch.monitor.metrics.IndicesMetrics; import org.elasticsearch.monitor.metrics.NodeMetrics; import org.elasticsearch.node.internal.TerminationHandler; import org.elasticsearch.plugins.ClusterCoordinationPlugin; @@ -441,6 +442,7 @@ public void onTimeout(TimeValue timeout) { } injector.getInstance(NodeMetrics.class).start(); + injector.getInstance(IndicesMetrics.class).start(); injector.getInstance(HealthPeriodicLogger.class).start(); logger.info("started {}", transportService.getLocalNode()); @@ -489,6 +491,7 @@ private void stop() { stopIfStarted(SearchService.class); stopIfStarted(TransportService.class); stopIfStarted(NodeMetrics.class); + stopIfStarted(IndicesMetrics.class); pluginLifecycleComponents.forEach(Node::stopIfStarted); // we should stop this last since it waits for resources to get released @@ -558,6 +561,7 @@ public synchronized void close() throws IOException { toClose.add(() -> stopWatch.stop().start("transport")); toClose.add(injector.getInstance(TransportService.class)); toClose.add(injector.getInstance(NodeMetrics.class)); + toClose.add(injector.getInstance(IndicesService.class)); if (ReadinessService.enabled(environment)) { toClose.add(injector.getInstance(ReadinessService.class)); } diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index c4816b440f568..b3c95186b6037 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -141,6 +141,7 @@ import org.elasticsearch.monitor.MonitorService; import org.elasticsearch.monitor.fs.FsHealthService; import org.elasticsearch.monitor.jvm.JvmInfo; +import org.elasticsearch.monitor.metrics.IndicesMetrics; import org.elasticsearch.monitor.metrics.NodeMetrics; import org.elasticsearch.node.internal.TerminationHandler; import org.elasticsearch.node.internal.TerminationHandlerProvider; @@ -1063,6 +1064,7 @@ private void construct( final TimeValue metricsInterval = settings.getAsTime("telemetry.agent.metrics_interval", TimeValue.timeValueSeconds(10)); final NodeMetrics nodeMetrics = new NodeMetrics(telemetryProvider.getMeterRegistry(), nodeService, metricsInterval); + final IndicesMetrics indicesMetrics = new IndicesMetrics(telemetryProvider.getMeterRegistry(), indicesService, metricsInterval); final SearchService searchService = serviceProvider.newSearchService( pluginsService, @@ -1162,6 +1164,7 @@ private void construct( b.bind(Transport.class).toInstance(transport); b.bind(TransportService.class).toInstance(transportService); b.bind(NodeMetrics.class).toInstance(nodeMetrics); + b.bind(IndicesMetrics.class).toInstance(indicesMetrics); b.bind(NetworkService.class).toInstance(networkService); b.bind(IndexMetadataVerifier.class).toInstance(indexMetadataVerifier); b.bind(ClusterInfoService.class).toInstance(clusterInfoService);