switch to official Prometheus client

This commit is contained in:
Jeffrey C. Ollie 2023-09-01 13:10:25 -05:00
parent 514fa50a8c
commit b0628543f8
No known key found for this signature in database
GPG key ID: F936E4DCB7E25F15
3 changed files with 282 additions and 217 deletions

17
poetry.lock generated
View file

@ -565,6 +565,21 @@ files = [
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"]
[[package]]
name = "prometheus-client"
version = "0.17.1"
description = "Python client for the Prometheus monitoring system."
category = "main"
optional = false
python-versions = ">=3.6"
files = [
{file = "prometheus_client-0.17.1-py3-none-any.whl", hash = "sha256:e537f37160f6807b8202a6fc4764cdd19bac5480ddd3e0d463c3002b34462101"},
{file = "prometheus_client-0.17.1.tar.gz", hash = "sha256:21e674f39831ae3f8acde238afd9a27a37d0d2fb5a28ea094f0ce25d2cbf2091"},
]
[package.extras]
twisted = ["twisted"]
[[package]] [[package]]
name = "protobuf" name = "protobuf"
version = "4.24.2" version = "4.24.2"
@ -770,4 +785,4 @@ test = ["pytest", "pytest-grpc"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "d0558513b225264653cde8153abe3425004ee3298c1407cbb2dc672fe356b2f3" content-hash = "9756b224be33434f1cf2d6702163ed6e6ffb37f40d608ea6be5e0a832d5e8bfe"

View file

@ -17,6 +17,7 @@ influxdb = "^5.3.1"
influxdb-client = "^1.37.0" influxdb-client = "^1.37.0"
pypng = "^0.20220715.0" pypng = "^0.20220715.0"
typing-extensions = "^4.7.1" typing-extensions = "^4.7.1"
prometheus-client = "^0.17.1"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
black = "^23.7.0" black = "^23.7.0"
@ -32,7 +33,7 @@ build-backend = "poetry.core.masonry.api"
[tool.isort] [tool.isort]
profile = "black" profile = "black"
line_length = 88 line_length = 120
force_single_line = true force_single_line = true
force_sort_within_sections = true force_sort_within_sections = true
from_first = false from_first = false

View file

@ -1,17 +1,23 @@
#!/usr/bin/python3
"""Prometheus exporter for Starlink user terminal data info. """Prometheus exporter for Starlink user terminal data info.
This script pulls the current status info and/or metrics computed from the This script pulls the current status info and/or metrics computed from the
history data and makes it available via HTTP in the format Prometheus expects. history data and makes it available via HTTP in the format Prometheus expects.
""" """
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler
from http.server import ThreadingHTTPServer from http.server import ThreadingHTTPServer
import logging import logging
import signal import signal
import sys import sys
import threading import threading
import time
from typing import Self
from prometheus_client import Counter
from prometheus_client import Enum
from prometheus_client import Gauge
from prometheus_client import Info
from prometheus_client import MetricsHandler
import starlink_grpc_tools.dish_common as dish_common import starlink_grpc_tools.dish_common as dish_common
@ -25,119 +31,204 @@ def handle_sigterm(signum, frame):
raise Terminated raise Terminated
class MetricInfo: common_labels = ["id"]
unit = ""
kind = "gauge"
help = ""
def __init__(self, unit=None, kind=None, help=None) -> None: METRICS: dict[str, Counter | Enum | Gauge] = {
if unit: "status_uptime": Gauge(
self.unit = f"_{unit}" "starlink_status_uptime_seconds",
if kind: "",
self.kind = kind common_labels,
if help: ),
self.help = help "status_seconds_to_first_nonempty_slot": Gauge(
pass "starlink_status_seconds_to_first_nonempty_slot",
"",
common_labels,
METRICS_INFO = { ),
"status_uptime": MetricInfo(unit="seconds", kind="counter"), "status_pop_ping_drop_rate": Gauge(
"status_seconds_to_first_nonempty_slot": MetricInfo(), "starlink_status_pop_ping_drop_rate",
"status_pop_ping_drop_rate": MetricInfo(), "",
"status_downlink_throughput_bps": MetricInfo(), common_labels,
"status_uplink_throughput_bps": MetricInfo(), ),
"status_pop_ping_latency_ms": MetricInfo(), "status_downlink_throughput_bps": Gauge(
"status_alerts": MetricInfo(), "starlink_status_downlink_throughput_bps",
"status_fraction_obstructed": MetricInfo(), "",
"status_currently_obstructed": MetricInfo(), common_labels,
"status_seconds_obstructed": MetricInfo(), ),
"status_obstruction_duration": MetricInfo(), "status_uplink_throughput_bps": Gauge(
"status_obstruction_interval": MetricInfo(), "starlink_status_uplink_throughput_bps",
"status_direction_azimuth": MetricInfo(), "",
"status_direction_elevation": MetricInfo(), common_labels,
"status_is_snr_above_noise_floor": MetricInfo(), ),
"status_alert_motors_stuck": MetricInfo(), "status_pop_ping_latency_ms": Gauge(
"status_alert_thermal_throttle": MetricInfo(), "starlink_status_pop_ping_latency_ms",
"status_alert_thermal_shutdown": MetricInfo(), "",
"status_alert_mast_not_near_vertical": MetricInfo(), common_labels,
"status_alert_unexpected_location": MetricInfo(), ),
"status_alert_slow_ethernet_speeds": MetricInfo(), "status_alerts": Gauge(
"status_alert_roaming": MetricInfo(), "starlink_status_alerts",
"status_alert_install_pending": MetricInfo(), "",
"status_alert_is_heating": MetricInfo(), common_labels,
"status_alert_power_supply_thermal_throttle": MetricInfo(), ),
"status_alert_is_power_save_idle": MetricInfo(), "status_fraction_obstructed": Gauge(
"status_alert_moving_fast_while_not_aviation": MetricInfo(), "starlink_status_fraction_obstructed",
"status_alert_moving_while_not_mobile": MetricInfo(), "",
"ping_stats_samples": MetricInfo(kind="counter"), common_labels,
"ping_stats_end_counter": MetricInfo(kind="counter"), ),
"usage_download_usage": MetricInfo(unit="bytes", kind="counter"), "status_currently_obstructed": Gauge(
"usage_upload_usage": MetricInfo(unit="bytes", kind="counter"), "starlink_status_currently_obstructed",
"",
common_labels,
),
"status_seconds_obstructed": Gauge(
"starlink_status_seconds_obstructed",
"",
common_labels,
),
"status_obstruction_duration": Gauge(
"starlink_status_obstruction_duration",
"",
common_labels,
),
"status_obstruction_interval": Gauge(
"starlink_status_obstruction_interval",
"",
common_labels,
),
"status_direction_azimuth": Gauge(
"starlink_status_direction_azimuth",
"",
common_labels,
),
"status_direction_elevation": Gauge(
"starlink_status_direction_elevation",
"",
common_labels,
),
"status_is_snr_above_noise_floor": Gauge(
"starlink_status_is_snr_above_noise_floor",
"",
common_labels,
),
"status_alert_motors_stuck": Gauge(
"starlink_status_alert_motors_stuck",
"",
common_labels,
),
"status_alert_thermal_throttle": Gauge(
"starlink_status_alert_thermal_throttle",
"",
common_labels,
),
"status_alert_thermal_shutdown": Gauge(
"starlink_status_alert_thermal_shutdown",
"",
common_labels,
),
"status_alert_mast_not_near_vertical": Gauge(
"starlink_status_alert_mast_not_near_vertical",
"",
common_labels,
),
"status_alert_unexpected_location": Gauge(
"starlink_status_alert_unexpected_location",
"",
common_labels,
),
"status_alert_slow_ethernet_speeds": Gauge(
"starlink_status_alert_slow_ethernet_speeds",
"",
common_labels,
),
"status_alert_roaming": Gauge(
"starlink_status_alert_roaming",
"",
common_labels,
),
"status_alert_install_pending": Gauge(
"starlink_status_alert_install_pending",
"",
common_labels,
),
"status_alert_is_heating": Gauge(
"starlink_status_alert_is_heating",
"",
common_labels,
),
"status_alert_power_supply_thermal_throttle": Gauge(
"starlink_status_alert_power_supply_thermal_throttle",
"",
common_labels,
),
"status_alert_is_power_save_idle": Gauge(
"starlink_status_alert_is_power_save_idle",
"",
common_labels,
),
"status_alert_moving_fast_while_not_aviation": Gauge(
"starlink_status_alert_moving_fast_while_not_aviation",
"",
common_labels,
),
"status_alert_moving_while_not_mobile": Gauge(
"starlink_status_alert_moving_while_not_mobile",
"",
common_labels,
),
"ping_stats_samples": Gauge(
"starlink_ping_stats_samples",
"",
common_labels,
),
"ping_stats_end_counter": Gauge(
"starlink_ping_stats_end_counter",
"",
common_labels,
),
"usage_download_usage": Gauge(
"starlink_usage_download_usage_bytes",
"",
common_labels,
),
"usage_upload_usage": Gauge(
"starlink_usage_upload_usage_bytes",
"",
common_labels,
),
"status_state": Enum(
"starlink_status_state",
"",
common_labels,
states=[
"UNKNOWN",
"CONNECTED",
"BOOTING",
"SEARCHING",
"STOWED",
"THERMAL_SHUTDOWN",
"NO_SATS",
"OBSTRUCTED",
"NO_DOWNLINK",
"NO_PINGS",
"DISH_UNREACHABLE",
],
),
} }
STATE_VALUES = [ info = Info(
"UNKNOWN", "starlink_info",
"CONNECTED", "",
"BOOTING", # common_labels,
"SEARCHING", )
"STOWED", unprocessed_metrics = Gauge(
"THERMAL_SHUTDOWN", "starlink_unprocessed_metrics",
"NO_SATS", "",
"OBSTRUCTED", common_labels + ["metric"],
"NO_DOWNLINK", )
"NO_PINGS", missing_metrics = Gauge(
"DISH_UNREACHABLE", "starlink_missing_metrics",
] "",
common_labels + ["metric"],
)
class Metric:
name = ""
timestamp = ""
kind = None
help = None
values = None
def __init__(self, name, timestamp, kind="gauge", help="", values=None):
self.name = name
self.timestamp = timestamp
self.kind = kind
self.help = help
if values:
self.values = values
else:
self.values = []
pass
def __str__(self):
if not self.values:
return ""
lines = []
lines.append(f"# HELP {self.name} {self.help}")
lines.append(f"# TYPE {self.name} {self.kind}")
for value in self.values:
lines.append(f"{self.name}{value} {self.timestamp*1000}")
lines.append("")
return str.join("\n", lines)
class MetricValue:
value = 0
labels = None
def __init__(self, value, labels=None) -> None:
self.value = value
self.labels = labels
def __str__(self):
label_str = ""
if self.labels:
label_str = (
"{"
+ str.join(",", [f'{v[0]}="{v[1]}"' for v in self.labels.items()])
+ "}"
)
return f"{label_str} {self.value}"
def parse_args(): def parse_args():
@ -152,119 +243,76 @@ def parse_args():
return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"]) return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"])
def prometheus_export(opts, gstate): class GatherMetrics(threading.Thread):
raw_data = {} def __init__(self: Self, opts, gstate, *args, **kw):
self.opts = opts
self.gstate = gstate
super().__init__(*args, **kw)
def data_add_item(name, value, category): def run(self: Self):
raw_data[category + "_" + name] = value while True:
pass self.gather()
time.sleep(5.0)
def data_add_sequencem(name, value, category, start): def gather(self: Self) -> None:
raise NotImplementedError("Did not expect sequence data") raw_data = {}
with gstate.lock: def data_add_item(name, value, category):
rc, status_ts, hist_ts = dish_common.get_data( raw_data[category + "_" + name] = value
opts, gstate, data_add_item, data_add_sequencem
)
metrics = [] def data_add_sequencem(name, value, category, start):
raise NotImplementedError(
# snr is not supported by starlink any more but still returned by the grpc f"Did not expect sequence data {name!r} {value!r} {category!r} {start!r}"
# service for backwards compatibility
if "status_snr" in raw_data:
del raw_data["status_snr"]
metrics.append(
Metric(
name="starlink_status_state",
timestamp=status_ts,
values=[
MetricValue(
value=int(raw_data["status_state"] == state_value),
labels={"state": state_value},
)
for state_value in STATE_VALUES
],
)
)
del raw_data["status_state"]
info_metrics = ["status_id", "status_hardware_version", "status_software_version"]
metrics_not_found = []
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
if len(metrics_not_found) < len(info_metrics):
metrics.append(
Metric(
name="starlink_info",
timestamp=status_ts,
values=[
MetricValue(
value=1,
labels={
x.replace("status_", ""): raw_data.pop(x)
for x in info_metrics
if x in raw_data
},
)
],
) )
)
for name, metric_info in METRICS_INFO.items(): with self.gstate.lock:
if name in raw_data: rc, status_ts, hist_ts = dish_common.get_data(
metrics.append( self.opts, self.gstate, data_add_item, data_add_sequencem
Metric(
name=f"starlink_{name}{metric_info.unit}",
timestamp=status_ts,
kind=metric_info.kind,
values=[MetricValue(value=float(raw_data.pop(name) or 0))],
)
) )
else:
metrics_not_found.append(name)
metrics.append( # snr is not supported by starlink any more but still returned by the grpc
Metric( # service for backwards compatibility
name="starlink_exporter_unprocessed_metrics", if "status_snr" in raw_data:
timestamp=status_ts, del raw_data["status_snr"]
values=[MetricValue(value=1, labels={"metric": name}) for name in raw_data],
status_id = raw_data.get("status_id")
info_metrics = [
"status_id",
"status_hardware_version",
"status_software_version",
]
metrics_not_found = []
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
info.info(
{
x.replace("status_", ""): raw_data.pop(x)
for x in info_metrics
if x in raw_data
}
) )
)
metrics.append( for name, metric_info in METRICS.items():
Metric( if name in raw_data:
name="starlink_exporter_missing_metrics", match metric_info:
timestamp=status_ts, case Gauge():
values=[ metric_info.labels(id=status_id).set(raw_data.pop(name) or 0)
MetricValue(
value=1,
labels={"metric": name},
)
for name in metrics_not_found
],
)
)
return str.join("\n", [str(metric) for metric in metrics]) case Enum():
metric_info.labels(id=status_id).state(raw_data.pop(name) or 0)
case _:
pass
class MetricsRequestHandler(BaseHTTPRequestHandler): else:
def do_GET(self): metrics_not_found.append(name)
path = self.path.partition("?")[0]
if path.lower() == "/favicon.ico":
self.send_error(HTTPStatus.NOT_FOUND)
return
opts = self.server.opts for name in raw_data:
gstate = self.server.gstate unprocessed_metrics.labels(id=status_id, metric=name).set(1)
content = prometheus_export(opts, gstate) for name in metrics_not_found:
self.send_response(HTTPStatus.OK) missing_metrics.labels(id=status_id, metric=name).set(1)
self.send_header("Content-type", "text/plain")
self.send_header("Content-Length", len(content))
self.end_headers()
self.wfile.write(content.encode())
def main(): def main():
@ -275,10 +323,11 @@ def main():
gstate = dish_common.GlobalState(target=opts.target) gstate = dish_common.GlobalState(target=opts.target)
gstate.lock = threading.Lock() gstate.lock = threading.Lock()
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsRequestHandler) gather = GatherMetrics(opts, gstate)
gather.start()
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsHandler)
httpd.daemon_threads = False httpd.daemon_threads = False
httpd.opts = opts
httpd.gstate = gstate
signal.signal(signal.SIGTERM, handle_sigterm) signal.signal(signal.SIGTERM, handle_sigterm)
@ -289,7 +338,7 @@ def main():
pass pass
finally: finally:
httpd.server_close() httpd.server_close()
httpd.gstate.shutdown() gstate.shutdown()
sys.exit() sys.exit()