Collapse All Expand All
vm-health (every 1m0s) 5

/etc/alerts/alerts.yml

Rule Error Samples Updated
alert: TooManyRestarts (for: 0s)
changes(process_start_time_seconds{job=~"victoriametrics|vmagent|vmalert"}[15m]) > 2

Labels: severity=critical
0 46.892s ago
alert: ServiceDown (for: 2m0s)
up{job=~"victoriametrics|vmagent|vmalert"} == 0

Labels: severity=critical
0 46.891s ago
alert: ProcessNearFDLimits (for: 5m0s)
(process_max_fds - process_open_fds) < 100

Labels: severity=critical
0 46.889s ago
alert: TooHighMemoryUsage (for: 5m0s)
(process_resident_memory_anon_bytes / vm_available_memory_bytes) > 0.9

Labels: severity=critical
0 46.888s ago
alert: TooHighCPUUsage (for: 5m0s)
rate(process_cpu_seconds_total[5m]) / process_cpu_cores_available > 0.9

Labels: severity=critical
0 46.886s ago
vmagent (every 30s) 10

/etc/alerts/alerts.yml

Rule Error Samples Updated
alert: PersistentQueueIsDroppingData (for: 10m0s)
sum(increase(vm_persistentqueue_bytes_dropped_total[5m])) by (job, instance) > 0

Labels: severity=critical
0 0.170s ago
alert: RejectedRemoteWriteDataBlocksAreDropped (for: 15m0s)
sum(increase(vmagent_remotewrite_packets_dropped_total[5m])) by (job, instance) > 0

Labels: severity=warning
0 0.171s ago
alert: TooManyScrapeErrors (for: 15m0s)
sum(increase(vm_promscrape_scrapes_failed_total[5m])) by (job, instance) > 0

Labels: severity=warning
0 0.170s ago
alert: TooManyWriteErrors (for: 15m0s)
(sum(increase(vm_ingestserver_request_errors_total[5m])) by (job, instance)
+
sum(increase(vmagent_http_request_errors_total[5m])) by (job, instance)) > 0

Labels: severity=warning
0 0.169s ago
alert: TooManyRemoteWriteErrors (for: 15m0s)
sum(rate(vmagent_remotewrite_retries_count_total[5m])) by(job, instance, url) > 0

Labels: severity=warning
0 0.169s ago
alert: RemoteWriteConnectionIsSaturated (for: 15m0s)
rate(vmagent_remotewrite_send_duration_seconds_total[5m]) > 0.9

Labels: severity=warning
0 0.169s ago
alert: PersistentQueueForWritesIsSaturated (for: 15m0s)
rate(vm_persistentqueue_write_duration_seconds_total[5m]) > 0.9

Labels: severity=warning
0 0.168s ago
alert: PersistentQueueForReadsIsSaturated (for: 15m0s)
rate(vm_persistentqueue_read_duration_seconds_total[5m]) > 0.9

Labels: severity=warning
0 0.168s ago
alert: SeriesLimitHourReached (for: 0s)
(vmagent_hourly_series_limit_current_series / vmagent_hourly_series_limit_max_series) > 0.9

Labels: severity=critical
0 0.167s ago
alert: SeriesLimitDayReached (for: 0s)
(vmagent_daily_series_limit_current_series / vmagent_daily_series_limit_max_series) > 0.9

Labels: severity=critical
0 0.167s ago
vmsingle (every 30s) 10

/etc/alerts/alerts.yml

Rule Error Samples Updated
alert: DiskRunsOutOfSpaceIn3Days (for: 30m0s)
vm_free_disk_space_bytes / ignoring(path)
(
   (
    rate(vm_rows_added_to_storage_total[1d]) -
    ignoring(type) rate(vm_deduplicated_samples_total{type="merge"}[1d])
   )
  * scalar(
    sum(vm_data_size_bytes{type!="indexdb"}) /
    sum(vm_rows{type!="indexdb"})
   )
) < 3 * 24 * 3600

Labels: severity=critical
0 17.184s ago
alert: DiskRunsOutOfSpace (for: 30m0s)
sum(vm_data_size_bytes) by(instance) /
(
 sum(vm_free_disk_space_bytes) by(instance) +
 sum(vm_data_size_bytes) by(instance)
) > 0.8

Labels: severity=critical
0 17.185s ago
alert: RequestErrorsToAPI (for: 15m0s)
increase(vm_http_request_errors_total[5m]) > 0

Labels: severity=warning
0 17.182s ago
alert: ConcurrentFlushesHitTheLimit (for: 15m0s)
avg_over_time(vm_concurrent_addrows_current[1m]) >= vm_concurrent_addrows_capacity

Labels: severity=warning
0 17.183s ago
alert: TooManyLogs (for: 15m0s)
sum(increase(vm_log_messages_total{level!="info"}[5m])) by (job, instance) > 0

Labels: severity=warning
0 17.181s ago
alert: RowsRejectedOnIngestion (for: 15m0s)
sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0

Labels: severity=warning
0 17.181s ago
alert: TooHighChurnRate (for: 15m0s)
(
   sum(rate(vm_new_timeseries_created_total[5m])) by(instance)
   /
   sum(rate(vm_rows_inserted_total[5m])) by (instance)
 ) > 0.1

Labels: severity=warning
0 17.180s ago
alert: TooHighChurnRate24h (for: 15m0s)
sum(increase(vm_new_timeseries_created_total[24h])) by(instance)
>
(sum(vm_cache_entries{type="storage/hour_metric_ids"}) by(instance) * 3)

Labels: severity=warning
0 17.179s ago
alert: TooHighSlowInsertsRate (for: 15m0s)
(
   sum(rate(vm_slow_row_inserts_total[5m])) by(instance)
   /
   sum(rate(vm_rows_inserted_total[5m])) by (instance)
 ) > 0.05

Labels: severity=warning
0 17.179s ago
alert: LabelsLimitExceededOnIngestion (for: 15m0s)
sum(increase(vm_metrics_with_dropped_labels_total[5m])) by (instance) > 0

Labels: severity=warning
0 17.179s ago