diff --git a/.gitignore b/.gitignore index e470da31..4321fd02 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ elasticsearch_exporter *-stamp .tarballs /vendor +.idea \ No newline at end of file diff --git a/collector/nodes.go b/collector/nodes.go index 364c541d..a6c2a63f 100644 --- a/collector/nodes.go +++ b/collector/nodes.go @@ -38,6 +38,7 @@ func getRoles(node NodeStatsNodeResponse) map[string]bool { "data_content": false, "ml": false, "remote_cluster_client": false, + "search": false, "transform": false, "ingest": false, "client": true, @@ -92,7 +93,7 @@ func createRoleMetric(role string) *nodeMetric { } var ( - defaultNodeLabels = []string{"cluster", "host", "name", "es_master_node", "es_data_node", "es_ingest_node", "es_client_node"} + defaultNodeLabels = []string{"cluster", "host", "name", "es_master_node", "es_data_node", "es_ingest_node", "es_client_node", "es_search_node"} defaultRoleLabels = []string{"cluster", "host", "name"} defaultThreadPoolLabels = append(defaultNodeLabels, "type") defaultBreakerLabels = append(defaultNodeLabels, "breaker") @@ -110,6 +111,7 @@ var ( fmt.Sprintf("%t", roles["data"]), fmt.Sprintf("%t", roles["ingest"]), fmt.Sprintf("%t", roles["client"]), + fmt.Sprintf("%t", roles["search"]), } } defaultThreadPoolLabelValues = func(cluster string, node NodeStatsNodeResponse, pool string) []string { @@ -171,6 +173,13 @@ type filesystemIODeviceMetric struct { Labels func(cluster string, node NodeStatsNodeResponse, device string) []string } +type fileCacheMetric struct { + Type prometheus.ValueType + Desc *prometheus.Desc + Value func(fileCacheStats NodeStatsFileCacheResponse) float64 + Labels func(cluster string, node NodeStatsNodeResponse) []string +} + // Nodes information struct type Nodes struct { logger log.Logger @@ -188,6 +197,7 @@ type Nodes struct { threadPoolMetrics []*threadPoolMetric filesystemDataMetrics []*filesystemDataMetric filesystemIODeviceMetrics []*filesystemIODeviceMetric + fileCacheMetrics []*fileCacheMetric } // NewNodes defines Nodes Prometheus metrics @@ -1781,6 +1791,104 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no Labels: defaultFilesystemIODeviceLabelValues, }, }, + fileCacheMetrics: []*fileCacheMetric{ + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "active_in_bytes"), + "file_cache active memory in bytes", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.ActiveInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "total_in_bytes"), + "file_cache total memory in bytes", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.TotalInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "used_in_bytes"), + "file_cache used memory in bytes", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.UsedInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "evictions_in_bytes"), + "file_cache evicted memory in bytes", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.EvictionsInBytes) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "active_percent"), + "file_cache active memory as percent", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.ActivePercent) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "used_percent"), + "file_cache used memory as percent", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.UsedPercent) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "hit_count"), + "file_cache hit count", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.HitCount) + }, + Labels: defaultNodeLabelValues, + }, + { + Type: prometheus.GaugeValue, + Desc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "filecache", "miss_count"), + "file_cache miss count", + defaultNodeLabels, nil, + ), + Value: func(fileCacheStats NodeStatsFileCacheResponse) float64 { + return float64(fileCacheStats.MissCount) + }, + Labels: defaultNodeLabelValues, + }, + }, } } @@ -1801,6 +1909,9 @@ func (c *Nodes) Describe(ch chan<- *prometheus.Desc) { for _, metric := range c.filesystemIODeviceMetrics { ch <- metric.Desc } + for _, metric := range c.fileCacheMetrics { + ch <- metric.Desc + } ch <- c.up.Desc() ch <- c.totalScrapes.Desc() ch <- c.jsonParseFailures.Desc() @@ -1955,5 +2066,16 @@ func (c *Nodes) Collect(ch chan<- prometheus.Metric) { } } + // File cache Stats + for _, metric := range c.fileCacheMetrics { + ch <- prometheus.MustNewConstMetric( + metric.Desc, + metric.Type, + metric.Value(node.FileCache), + metric.Labels(nodeStatsResp.ClusterName, node)..., + ) + } + } + } diff --git a/collector/nodes_response.go b/collector/nodes_response.go index 4985add0..fb14977a 100644 --- a/collector/nodes_response.go +++ b/collector/nodes_response.go @@ -40,6 +40,7 @@ type NodeStatsNodeResponse struct { HTTP map[string]interface{} `json:"http"` Transport NodeStatsTransportResponse `json:"transport"` Process NodeStatsProcessResponse `json:"process"` + FileCache NodeStatsFileCacheResponse `json:"file_cache"` } // NodeStatsBreakersResponse is a representation of a statistics about the field data circuit breaker @@ -384,3 +385,15 @@ type ClusterHealthResponse struct { TimedOut bool `json:"timed_out"` UnassignedShards int64 `json:"unassigned_shards"` } + +// NodeStatsFileCacheResponse is a representation of OpenSearch Searchable Snapshots File_cache +type NodeStatsFileCacheResponse struct { + ActiveInBytes int64 `json:"active_in_bytes"` + TotalInBytes int64 `json:"total_in_bytes"` + UsedInBytes int64 `json:"used_in_bytes"` + EvictionsInBytes int64 `json:"evictions_in_bytes"` + ActivePercent int64 `json:"active_percent"` + UsedPercent int64 `json:"used_percent"` + HitCount int64 `json:"hit_count"` + MissCount int64 `json:"miss_count"` +}