switch to official Prometheus client

This commit is contained in:
Jeffrey C. Ollie 2023-09-01 13:10:25 -05:00
parent 514fa50a8c
commit b0628543f8
No known key found for this signature in database
GPG key ID: F936E4DCB7E25F15
3 changed files with 282 additions and 217 deletions

17
poetry.lock generated
View file

@ -565,6 +565,21 @@ files = [
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"]
[[package]]
name = "prometheus-client"
version = "0.17.1"
description = "Python client for the Prometheus monitoring system."
category = "main"
optional = false
python-versions = ">=3.6"
files = [
{file = "prometheus_client-0.17.1-py3-none-any.whl", hash = "sha256:e537f37160f6807b8202a6fc4764cdd19bac5480ddd3e0d463c3002b34462101"},
{file = "prometheus_client-0.17.1.tar.gz", hash = "sha256:21e674f39831ae3f8acde238afd9a27a37d0d2fb5a28ea094f0ce25d2cbf2091"},
]
[package.extras]
twisted = ["twisted"]
[[package]] [[package]]
name = "protobuf" name = "protobuf"
version = "4.24.2" version = "4.24.2"
@ -770,4 +785,4 @@ test = ["pytest", "pytest-grpc"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "d0558513b225264653cde8153abe3425004ee3298c1407cbb2dc672fe356b2f3" content-hash = "9756b224be33434f1cf2d6702163ed6e6ffb37f40d608ea6be5e0a832d5e8bfe"

View file

@ -17,6 +17,7 @@ influxdb = "^5.3.1"
influxdb-client = "^1.37.0" influxdb-client = "^1.37.0"
pypng = "^0.20220715.0" pypng = "^0.20220715.0"
typing-extensions = "^4.7.1" typing-extensions = "^4.7.1"
prometheus-client = "^0.17.1"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
black = "^23.7.0" black = "^23.7.0"
@ -32,7 +33,7 @@ build-backend = "poetry.core.masonry.api"
[tool.isort] [tool.isort]
profile = "black" profile = "black"
line_length = 88 line_length = 120
force_single_line = true force_single_line = true
force_sort_within_sections = true force_sort_within_sections = true
from_first = false from_first = false

View file

@ -1,17 +1,23 @@
#!/usr/bin/python3
"""Prometheus exporter for Starlink user terminal data info. """Prometheus exporter for Starlink user terminal data info.
This script pulls the current status info and/or metrics computed from the This script pulls the current status info and/or metrics computed from the
history data and makes it available via HTTP in the format Prometheus expects. history data and makes it available via HTTP in the format Prometheus expects.
""" """
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler
from http.server import ThreadingHTTPServer from http.server import ThreadingHTTPServer
import logging import logging
import signal import signal
import sys import sys
import threading import threading
import time
from typing import Self
from prometheus_client import Counter
from prometheus_client import Enum
from prometheus_client import Gauge
from prometheus_client import Info
from prometheus_client import MetricsHandler
import starlink_grpc_tools.dish_common as dish_common import starlink_grpc_tools.dish_common as dish_common
@ -25,57 +31,174 @@ def handle_sigterm(signum, frame):
raise Terminated raise Terminated
class MetricInfo: common_labels = ["id"]
unit = ""
kind = "gauge"
help = ""
def __init__(self, unit=None, kind=None, help=None) -> None: METRICS: dict[str, Counter | Enum | Gauge] = {
if unit: "status_uptime": Gauge(
self.unit = f"_{unit}" "starlink_status_uptime_seconds",
if kind: "",
self.kind = kind common_labels,
if help: ),
self.help = help "status_seconds_to_first_nonempty_slot": Gauge(
pass "starlink_status_seconds_to_first_nonempty_slot",
"",
common_labels,
METRICS_INFO = { ),
"status_uptime": MetricInfo(unit="seconds", kind="counter"), "status_pop_ping_drop_rate": Gauge(
"status_seconds_to_first_nonempty_slot": MetricInfo(), "starlink_status_pop_ping_drop_rate",
"status_pop_ping_drop_rate": MetricInfo(), "",
"status_downlink_throughput_bps": MetricInfo(), common_labels,
"status_uplink_throughput_bps": MetricInfo(), ),
"status_pop_ping_latency_ms": MetricInfo(), "status_downlink_throughput_bps": Gauge(
"status_alerts": MetricInfo(), "starlink_status_downlink_throughput_bps",
"status_fraction_obstructed": MetricInfo(), "",
"status_currently_obstructed": MetricInfo(), common_labels,
"status_seconds_obstructed": MetricInfo(), ),
"status_obstruction_duration": MetricInfo(), "status_uplink_throughput_bps": Gauge(
"status_obstruction_interval": MetricInfo(), "starlink_status_uplink_throughput_bps",
"status_direction_azimuth": MetricInfo(), "",
"status_direction_elevation": MetricInfo(), common_labels,
"status_is_snr_above_noise_floor": MetricInfo(), ),
"status_alert_motors_stuck": MetricInfo(), "status_pop_ping_latency_ms": Gauge(
"status_alert_thermal_throttle": MetricInfo(), "starlink_status_pop_ping_latency_ms",
"status_alert_thermal_shutdown": MetricInfo(), "",
"status_alert_mast_not_near_vertical": MetricInfo(), common_labels,
"status_alert_unexpected_location": MetricInfo(), ),
"status_alert_slow_ethernet_speeds": MetricInfo(), "status_alerts": Gauge(
"status_alert_roaming": MetricInfo(), "starlink_status_alerts",
"status_alert_install_pending": MetricInfo(), "",
"status_alert_is_heating": MetricInfo(), common_labels,
"status_alert_power_supply_thermal_throttle": MetricInfo(), ),
"status_alert_is_power_save_idle": MetricInfo(), "status_fraction_obstructed": Gauge(
"status_alert_moving_fast_while_not_aviation": MetricInfo(), "starlink_status_fraction_obstructed",
"status_alert_moving_while_not_mobile": MetricInfo(), "",
"ping_stats_samples": MetricInfo(kind="counter"), common_labels,
"ping_stats_end_counter": MetricInfo(kind="counter"), ),
"usage_download_usage": MetricInfo(unit="bytes", kind="counter"), "status_currently_obstructed": Gauge(
"usage_upload_usage": MetricInfo(unit="bytes", kind="counter"), "starlink_status_currently_obstructed",
} "",
common_labels,
STATE_VALUES = [ ),
"status_seconds_obstructed": Gauge(
"starlink_status_seconds_obstructed",
"",
common_labels,
),
"status_obstruction_duration": Gauge(
"starlink_status_obstruction_duration",
"",
common_labels,
),
"status_obstruction_interval": Gauge(
"starlink_status_obstruction_interval",
"",
common_labels,
),
"status_direction_azimuth": Gauge(
"starlink_status_direction_azimuth",
"",
common_labels,
),
"status_direction_elevation": Gauge(
"starlink_status_direction_elevation",
"",
common_labels,
),
"status_is_snr_above_noise_floor": Gauge(
"starlink_status_is_snr_above_noise_floor",
"",
common_labels,
),
"status_alert_motors_stuck": Gauge(
"starlink_status_alert_motors_stuck",
"",
common_labels,
),
"status_alert_thermal_throttle": Gauge(
"starlink_status_alert_thermal_throttle",
"",
common_labels,
),
"status_alert_thermal_shutdown": Gauge(
"starlink_status_alert_thermal_shutdown",
"",
common_labels,
),
"status_alert_mast_not_near_vertical": Gauge(
"starlink_status_alert_mast_not_near_vertical",
"",
common_labels,
),
"status_alert_unexpected_location": Gauge(
"starlink_status_alert_unexpected_location",
"",
common_labels,
),
"status_alert_slow_ethernet_speeds": Gauge(
"starlink_status_alert_slow_ethernet_speeds",
"",
common_labels,
),
"status_alert_roaming": Gauge(
"starlink_status_alert_roaming",
"",
common_labels,
),
"status_alert_install_pending": Gauge(
"starlink_status_alert_install_pending",
"",
common_labels,
),
"status_alert_is_heating": Gauge(
"starlink_status_alert_is_heating",
"",
common_labels,
),
"status_alert_power_supply_thermal_throttle": Gauge(
"starlink_status_alert_power_supply_thermal_throttle",
"",
common_labels,
),
"status_alert_is_power_save_idle": Gauge(
"starlink_status_alert_is_power_save_idle",
"",
common_labels,
),
"status_alert_moving_fast_while_not_aviation": Gauge(
"starlink_status_alert_moving_fast_while_not_aviation",
"",
common_labels,
),
"status_alert_moving_while_not_mobile": Gauge(
"starlink_status_alert_moving_while_not_mobile",
"",
common_labels,
),
"ping_stats_samples": Gauge(
"starlink_ping_stats_samples",
"",
common_labels,
),
"ping_stats_end_counter": Gauge(
"starlink_ping_stats_end_counter",
"",
common_labels,
),
"usage_download_usage": Gauge(
"starlink_usage_download_usage_bytes",
"",
common_labels,
),
"usage_upload_usage": Gauge(
"starlink_usage_upload_usage_bytes",
"",
common_labels,
),
"status_state": Enum(
"starlink_status_state",
"",
common_labels,
states=[
"UNKNOWN", "UNKNOWN",
"CONNECTED", "CONNECTED",
"BOOTING", "BOOTING",
@ -87,57 +210,25 @@ STATE_VALUES = [
"NO_DOWNLINK", "NO_DOWNLINK",
"NO_PINGS", "NO_PINGS",
"DISH_UNREACHABLE", "DISH_UNREACHABLE",
] ],
),
}
info = Info(
class Metric: "starlink_info",
name = "" "",
timestamp = "" # common_labels,
kind = None )
help = None unprocessed_metrics = Gauge(
values = None "starlink_unprocessed_metrics",
"",
def __init__(self, name, timestamp, kind="gauge", help="", values=None): common_labels + ["metric"],
self.name = name )
self.timestamp = timestamp missing_metrics = Gauge(
self.kind = kind "starlink_missing_metrics",
self.help = help "",
if values: common_labels + ["metric"],
self.values = values )
else:
self.values = []
pass
def __str__(self):
if not self.values:
return ""
lines = []
lines.append(f"# HELP {self.name} {self.help}")
lines.append(f"# TYPE {self.name} {self.kind}")
for value in self.values:
lines.append(f"{self.name}{value} {self.timestamp*1000}")
lines.append("")
return str.join("\n", lines)
class MetricValue:
value = 0
labels = None
def __init__(self, value, labels=None) -> None:
self.value = value
self.labels = labels
def __str__(self):
label_str = ""
if self.labels:
label_str = (
"{"
+ str.join(",", [f'{v[0]}="{v[1]}"' for v in self.labels.items()])
+ "}"
)
return f"{label_str} {self.value}"
def parse_args(): def parse_args():
@ -152,119 +243,76 @@ def parse_args():
return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"]) return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"])
def prometheus_export(opts, gstate): class GatherMetrics(threading.Thread):
def __init__(self: Self, opts, gstate, *args, **kw):
self.opts = opts
self.gstate = gstate
super().__init__(*args, **kw)
def run(self: Self):
while True:
self.gather()
time.sleep(5.0)
def gather(self: Self) -> None:
raw_data = {} raw_data = {}
def data_add_item(name, value, category): def data_add_item(name, value, category):
raw_data[category + "_" + name] = value raw_data[category + "_" + name] = value
pass
def data_add_sequencem(name, value, category, start): def data_add_sequencem(name, value, category, start):
raise NotImplementedError("Did not expect sequence data") raise NotImplementedError(
f"Did not expect sequence data {name!r} {value!r} {category!r} {start!r}"
with gstate.lock:
rc, status_ts, hist_ts = dish_common.get_data(
opts, gstate, data_add_item, data_add_sequencem
) )
metrics = [] with self.gstate.lock:
rc, status_ts, hist_ts = dish_common.get_data(
self.opts, self.gstate, data_add_item, data_add_sequencem
)
# snr is not supported by starlink any more but still returned by the grpc # snr is not supported by starlink any more but still returned by the grpc
# service for backwards compatibility # service for backwards compatibility
if "status_snr" in raw_data: if "status_snr" in raw_data:
del raw_data["status_snr"] del raw_data["status_snr"]
metrics.append( status_id = raw_data.get("status_id")
Metric(
name="starlink_status_state",
timestamp=status_ts,
values=[
MetricValue(
value=int(raw_data["status_state"] == state_value),
labels={"state": state_value},
)
for state_value in STATE_VALUES
],
)
)
del raw_data["status_state"]
info_metrics = ["status_id", "status_hardware_version", "status_software_version"] info_metrics = [
"status_id",
"status_hardware_version",
"status_software_version",
]
metrics_not_found = [] metrics_not_found = []
metrics_not_found.extend([x for x in info_metrics if x not in raw_data]) metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
if len(metrics_not_found) < len(info_metrics): info.info(
metrics.append( {
Metric(
name="starlink_info",
timestamp=status_ts,
values=[
MetricValue(
value=1,
labels={
x.replace("status_", ""): raw_data.pop(x) x.replace("status_", ""): raw_data.pop(x)
for x in info_metrics for x in info_metrics
if x in raw_data if x in raw_data
}, }
)
],
)
) )
for name, metric_info in METRICS_INFO.items(): for name, metric_info in METRICS.items():
if name in raw_data: if name in raw_data:
metrics.append( match metric_info:
Metric( case Gauge():
name=f"starlink_{name}{metric_info.unit}", metric_info.labels(id=status_id).set(raw_data.pop(name) or 0)
timestamp=status_ts,
kind=metric_info.kind, case Enum():
values=[MetricValue(value=float(raw_data.pop(name) or 0))], metric_info.labels(id=status_id).state(raw_data.pop(name) or 0)
)
) case _:
pass
else: else:
metrics_not_found.append(name) metrics_not_found.append(name)
metrics.append( for name in raw_data:
Metric( unprocessed_metrics.labels(id=status_id, metric=name).set(1)
name="starlink_exporter_unprocessed_metrics",
timestamp=status_ts,
values=[MetricValue(value=1, labels={"metric": name}) for name in raw_data],
)
)
metrics.append( for name in metrics_not_found:
Metric( missing_metrics.labels(id=status_id, metric=name).set(1)
name="starlink_exporter_missing_metrics",
timestamp=status_ts,
values=[
MetricValue(
value=1,
labels={"metric": name},
)
for name in metrics_not_found
],
)
)
return str.join("\n", [str(metric) for metric in metrics])
class MetricsRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
path = self.path.partition("?")[0]
if path.lower() == "/favicon.ico":
self.send_error(HTTPStatus.NOT_FOUND)
return
opts = self.server.opts
gstate = self.server.gstate
content = prometheus_export(opts, gstate)
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", "text/plain")
self.send_header("Content-Length", len(content))
self.end_headers()
self.wfile.write(content.encode())
def main(): def main():
@ -275,10 +323,11 @@ def main():
gstate = dish_common.GlobalState(target=opts.target) gstate = dish_common.GlobalState(target=opts.target)
gstate.lock = threading.Lock() gstate.lock = threading.Lock()
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsRequestHandler) gather = GatherMetrics(opts, gstate)
gather.start()
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsHandler)
httpd.daemon_threads = False httpd.daemon_threads = False
httpd.opts = opts
httpd.gstate = gstate
signal.signal(signal.SIGTERM, handle_sigterm) signal.signal(signal.SIGTERM, handle_sigterm)
@ -289,7 +338,7 @@ def main():
pass pass
finally: finally:
httpd.server_close() httpd.server_close()
httpd.gstate.shutdown() gstate.shutdown()
sys.exit() sys.exit()