switch to official Prometheus client

This commit is contained in:
Jeffrey C. Ollie 2023-09-01 13:10:25 -05:00
parent 514fa50a8c
commit b0628543f8
No known key found for this signature in database
GPG key ID: F936E4DCB7E25F15
3 changed files with 282 additions and 217 deletions

17
poetry.lock generated
View file

@ -565,6 +565,21 @@ files = [
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"]
[[package]]
name = "prometheus-client"
version = "0.17.1"
description = "Python client for the Prometheus monitoring system."
category = "main"
optional = false
python-versions = ">=3.6"
files = [
{file = "prometheus_client-0.17.1-py3-none-any.whl", hash = "sha256:e537f37160f6807b8202a6fc4764cdd19bac5480ddd3e0d463c3002b34462101"},
{file = "prometheus_client-0.17.1.tar.gz", hash = "sha256:21e674f39831ae3f8acde238afd9a27a37d0d2fb5a28ea094f0ce25d2cbf2091"},
]
[package.extras]
twisted = ["twisted"]
[[package]]
name = "protobuf"
version = "4.24.2"
@ -770,4 +785,4 @@ test = ["pytest", "pytest-grpc"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "d0558513b225264653cde8153abe3425004ee3298c1407cbb2dc672fe356b2f3"
content-hash = "9756b224be33434f1cf2d6702163ed6e6ffb37f40d608ea6be5e0a832d5e8bfe"

View file

@ -17,6 +17,7 @@ influxdb = "^5.3.1"
influxdb-client = "^1.37.0"
pypng = "^0.20220715.0"
typing-extensions = "^4.7.1"
prometheus-client = "^0.17.1"
[tool.poetry.group.dev.dependencies]
black = "^23.7.0"
@ -32,7 +33,7 @@ build-backend = "poetry.core.masonry.api"
[tool.isort]
profile = "black"
line_length = 88
line_length = 120
force_single_line = true
force_sort_within_sections = true
from_first = false

View file

@ -1,17 +1,23 @@
#!/usr/bin/python3
"""Prometheus exporter for Starlink user terminal data info.
This script pulls the current status info and/or metrics computed from the
history data and makes it available via HTTP in the format Prometheus expects.
"""
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler
from http.server import ThreadingHTTPServer
import logging
import signal
import sys
import threading
import time
from typing import Self
from prometheus_client import Counter
from prometheus_client import Enum
from prometheus_client import Gauge
from prometheus_client import Info
from prometheus_client import MetricsHandler
import starlink_grpc_tools.dish_common as dish_common
@ -25,119 +31,204 @@ def handle_sigterm(signum, frame):
raise Terminated
class MetricInfo:
unit = ""
kind = "gauge"
help = ""
common_labels = ["id"]
def __init__(self, unit=None, kind=None, help=None) -> None:
if unit:
self.unit = f"_{unit}"
if kind:
self.kind = kind
if help:
self.help = help
pass
METRICS_INFO = {
"status_uptime": MetricInfo(unit="seconds", kind="counter"),
"status_seconds_to_first_nonempty_slot": MetricInfo(),
"status_pop_ping_drop_rate": MetricInfo(),
"status_downlink_throughput_bps": MetricInfo(),
"status_uplink_throughput_bps": MetricInfo(),
"status_pop_ping_latency_ms": MetricInfo(),
"status_alerts": MetricInfo(),
"status_fraction_obstructed": MetricInfo(),
"status_currently_obstructed": MetricInfo(),
"status_seconds_obstructed": MetricInfo(),
"status_obstruction_duration": MetricInfo(),
"status_obstruction_interval": MetricInfo(),
"status_direction_azimuth": MetricInfo(),
"status_direction_elevation": MetricInfo(),
"status_is_snr_above_noise_floor": MetricInfo(),
"status_alert_motors_stuck": MetricInfo(),
"status_alert_thermal_throttle": MetricInfo(),
"status_alert_thermal_shutdown": MetricInfo(),
"status_alert_mast_not_near_vertical": MetricInfo(),
"status_alert_unexpected_location": MetricInfo(),
"status_alert_slow_ethernet_speeds": MetricInfo(),
"status_alert_roaming": MetricInfo(),
"status_alert_install_pending": MetricInfo(),
"status_alert_is_heating": MetricInfo(),
"status_alert_power_supply_thermal_throttle": MetricInfo(),
"status_alert_is_power_save_idle": MetricInfo(),
"status_alert_moving_fast_while_not_aviation": MetricInfo(),
"status_alert_moving_while_not_mobile": MetricInfo(),
"ping_stats_samples": MetricInfo(kind="counter"),
"ping_stats_end_counter": MetricInfo(kind="counter"),
"usage_download_usage": MetricInfo(unit="bytes", kind="counter"),
"usage_upload_usage": MetricInfo(unit="bytes", kind="counter"),
METRICS: dict[str, Counter | Enum | Gauge] = {
"status_uptime": Gauge(
"starlink_status_uptime_seconds",
"",
common_labels,
),
"status_seconds_to_first_nonempty_slot": Gauge(
"starlink_status_seconds_to_first_nonempty_slot",
"",
common_labels,
),
"status_pop_ping_drop_rate": Gauge(
"starlink_status_pop_ping_drop_rate",
"",
common_labels,
),
"status_downlink_throughput_bps": Gauge(
"starlink_status_downlink_throughput_bps",
"",
common_labels,
),
"status_uplink_throughput_bps": Gauge(
"starlink_status_uplink_throughput_bps",
"",
common_labels,
),
"status_pop_ping_latency_ms": Gauge(
"starlink_status_pop_ping_latency_ms",
"",
common_labels,
),
"status_alerts": Gauge(
"starlink_status_alerts",
"",
common_labels,
),
"status_fraction_obstructed": Gauge(
"starlink_status_fraction_obstructed",
"",
common_labels,
),
"status_currently_obstructed": Gauge(
"starlink_status_currently_obstructed",
"",
common_labels,
),
"status_seconds_obstructed": Gauge(
"starlink_status_seconds_obstructed",
"",
common_labels,
),
"status_obstruction_duration": Gauge(
"starlink_status_obstruction_duration",
"",
common_labels,
),
"status_obstruction_interval": Gauge(
"starlink_status_obstruction_interval",
"",
common_labels,
),
"status_direction_azimuth": Gauge(
"starlink_status_direction_azimuth",
"",
common_labels,
),
"status_direction_elevation": Gauge(
"starlink_status_direction_elevation",
"",
common_labels,
),
"status_is_snr_above_noise_floor": Gauge(
"starlink_status_is_snr_above_noise_floor",
"",
common_labels,
),
"status_alert_motors_stuck": Gauge(
"starlink_status_alert_motors_stuck",
"",
common_labels,
),
"status_alert_thermal_throttle": Gauge(
"starlink_status_alert_thermal_throttle",
"",
common_labels,
),
"status_alert_thermal_shutdown": Gauge(
"starlink_status_alert_thermal_shutdown",
"",
common_labels,
),
"status_alert_mast_not_near_vertical": Gauge(
"starlink_status_alert_mast_not_near_vertical",
"",
common_labels,
),
"status_alert_unexpected_location": Gauge(
"starlink_status_alert_unexpected_location",
"",
common_labels,
),
"status_alert_slow_ethernet_speeds": Gauge(
"starlink_status_alert_slow_ethernet_speeds",
"",
common_labels,
),
"status_alert_roaming": Gauge(
"starlink_status_alert_roaming",
"",
common_labels,
),
"status_alert_install_pending": Gauge(
"starlink_status_alert_install_pending",
"",
common_labels,
),
"status_alert_is_heating": Gauge(
"starlink_status_alert_is_heating",
"",
common_labels,
),
"status_alert_power_supply_thermal_throttle": Gauge(
"starlink_status_alert_power_supply_thermal_throttle",
"",
common_labels,
),
"status_alert_is_power_save_idle": Gauge(
"starlink_status_alert_is_power_save_idle",
"",
common_labels,
),
"status_alert_moving_fast_while_not_aviation": Gauge(
"starlink_status_alert_moving_fast_while_not_aviation",
"",
common_labels,
),
"status_alert_moving_while_not_mobile": Gauge(
"starlink_status_alert_moving_while_not_mobile",
"",
common_labels,
),
"ping_stats_samples": Gauge(
"starlink_ping_stats_samples",
"",
common_labels,
),
"ping_stats_end_counter": Gauge(
"starlink_ping_stats_end_counter",
"",
common_labels,
),
"usage_download_usage": Gauge(
"starlink_usage_download_usage_bytes",
"",
common_labels,
),
"usage_upload_usage": Gauge(
"starlink_usage_upload_usage_bytes",
"",
common_labels,
),
"status_state": Enum(
"starlink_status_state",
"",
common_labels,
states=[
"UNKNOWN",
"CONNECTED",
"BOOTING",
"SEARCHING",
"STOWED",
"THERMAL_SHUTDOWN",
"NO_SATS",
"OBSTRUCTED",
"NO_DOWNLINK",
"NO_PINGS",
"DISH_UNREACHABLE",
],
),
}
STATE_VALUES = [
"UNKNOWN",
"CONNECTED",
"BOOTING",
"SEARCHING",
"STOWED",
"THERMAL_SHUTDOWN",
"NO_SATS",
"OBSTRUCTED",
"NO_DOWNLINK",
"NO_PINGS",
"DISH_UNREACHABLE",
]
class Metric:
name = ""
timestamp = ""
kind = None
help = None
values = None
def __init__(self, name, timestamp, kind="gauge", help="", values=None):
self.name = name
self.timestamp = timestamp
self.kind = kind
self.help = help
if values:
self.values = values
else:
self.values = []
pass
def __str__(self):
if not self.values:
return ""
lines = []
lines.append(f"# HELP {self.name} {self.help}")
lines.append(f"# TYPE {self.name} {self.kind}")
for value in self.values:
lines.append(f"{self.name}{value} {self.timestamp*1000}")
lines.append("")
return str.join("\n", lines)
class MetricValue:
value = 0
labels = None
def __init__(self, value, labels=None) -> None:
self.value = value
self.labels = labels
def __str__(self):
label_str = ""
if self.labels:
label_str = (
"{"
+ str.join(",", [f'{v[0]}="{v[1]}"' for v in self.labels.items()])
+ "}"
)
return f"{label_str} {self.value}"
info = Info(
"starlink_info",
"",
# common_labels,
)
unprocessed_metrics = Gauge(
"starlink_unprocessed_metrics",
"",
common_labels + ["metric"],
)
missing_metrics = Gauge(
"starlink_missing_metrics",
"",
common_labels + ["metric"],
)
def parse_args():
@ -152,119 +243,76 @@ def parse_args():
return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"])
def prometheus_export(opts, gstate):
raw_data = {}
class GatherMetrics(threading.Thread):
def __init__(self: Self, opts, gstate, *args, **kw):
self.opts = opts
self.gstate = gstate
super().__init__(*args, **kw)
def data_add_item(name, value, category):
raw_data[category + "_" + name] = value
pass
def run(self: Self):
while True:
self.gather()
time.sleep(5.0)
def data_add_sequencem(name, value, category, start):
raise NotImplementedError("Did not expect sequence data")
def gather(self: Self) -> None:
raw_data = {}
with gstate.lock:
rc, status_ts, hist_ts = dish_common.get_data(
opts, gstate, data_add_item, data_add_sequencem
)
def data_add_item(name, value, category):
raw_data[category + "_" + name] = value
metrics = []
# snr is not supported by starlink any more but still returned by the grpc
# service for backwards compatibility
if "status_snr" in raw_data:
del raw_data["status_snr"]
metrics.append(
Metric(
name="starlink_status_state",
timestamp=status_ts,
values=[
MetricValue(
value=int(raw_data["status_state"] == state_value),
labels={"state": state_value},
)
for state_value in STATE_VALUES
],
)
)
del raw_data["status_state"]
info_metrics = ["status_id", "status_hardware_version", "status_software_version"]
metrics_not_found = []
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
if len(metrics_not_found) < len(info_metrics):
metrics.append(
Metric(
name="starlink_info",
timestamp=status_ts,
values=[
MetricValue(
value=1,
labels={
x.replace("status_", ""): raw_data.pop(x)
for x in info_metrics
if x in raw_data
},
)
],
def data_add_sequencem(name, value, category, start):
raise NotImplementedError(
f"Did not expect sequence data {name!r} {value!r} {category!r} {start!r}"
)
)
for name, metric_info in METRICS_INFO.items():
if name in raw_data:
metrics.append(
Metric(
name=f"starlink_{name}{metric_info.unit}",
timestamp=status_ts,
kind=metric_info.kind,
values=[MetricValue(value=float(raw_data.pop(name) or 0))],
)
with self.gstate.lock:
rc, status_ts, hist_ts = dish_common.get_data(
self.opts, self.gstate, data_add_item, data_add_sequencem
)
else:
metrics_not_found.append(name)
metrics.append(
Metric(
name="starlink_exporter_unprocessed_metrics",
timestamp=status_ts,
values=[MetricValue(value=1, labels={"metric": name}) for name in raw_data],
# snr is not supported by starlink any more but still returned by the grpc
# service for backwards compatibility
if "status_snr" in raw_data:
del raw_data["status_snr"]
status_id = raw_data.get("status_id")
info_metrics = [
"status_id",
"status_hardware_version",
"status_software_version",
]
metrics_not_found = []
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
info.info(
{
x.replace("status_", ""): raw_data.pop(x)
for x in info_metrics
if x in raw_data
}
)
)
metrics.append(
Metric(
name="starlink_exporter_missing_metrics",
timestamp=status_ts,
values=[
MetricValue(
value=1,
labels={"metric": name},
)
for name in metrics_not_found
],
)
)
for name, metric_info in METRICS.items():
if name in raw_data:
match metric_info:
case Gauge():
metric_info.labels(id=status_id).set(raw_data.pop(name) or 0)
return str.join("\n", [str(metric) for metric in metrics])
case Enum():
metric_info.labels(id=status_id).state(raw_data.pop(name) or 0)
case _:
pass
class MetricsRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
path = self.path.partition("?")[0]
if path.lower() == "/favicon.ico":
self.send_error(HTTPStatus.NOT_FOUND)
return
else:
metrics_not_found.append(name)
opts = self.server.opts
gstate = self.server.gstate
for name in raw_data:
unprocessed_metrics.labels(id=status_id, metric=name).set(1)
content = prometheus_export(opts, gstate)
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "text/plain")
self.send_header("Content-Length", len(content))
self.end_headers()
self.wfile.write(content.encode())
for name in metrics_not_found:
missing_metrics.labels(id=status_id, metric=name).set(1)
def main():
@ -275,10 +323,11 @@ def main():
gstate = dish_common.GlobalState(target=opts.target)
gstate.lock = threading.Lock()
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsRequestHandler)
gather = GatherMetrics(opts, gstate)
gather.start()
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsHandler)
httpd.daemon_threads = False
httpd.opts = opts
httpd.gstate = gstate
signal.signal(signal.SIGTERM, handle_sigterm)
@ -289,7 +338,7 @@ def main():
pass
finally:
httpd.server_close()
httpd.gstate.shutdown()
gstate.shutdown()
sys.exit()