switch to official Prometheus client
This commit is contained in:
parent
514fa50a8c
commit
b0628543f8
3 changed files with 282 additions and 217 deletions
17
poetry.lock
generated
17
poetry.lock
generated
|
@ -565,6 +565,21 @@ files = [
|
|||
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"]
|
||||
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus-client"
|
||||
version = "0.17.1"
|
||||
description = "Python client for the Prometheus monitoring system."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "prometheus_client-0.17.1-py3-none-any.whl", hash = "sha256:e537f37160f6807b8202a6fc4764cdd19bac5480ddd3e0d463c3002b34462101"},
|
||||
{file = "prometheus_client-0.17.1.tar.gz", hash = "sha256:21e674f39831ae3f8acde238afd9a27a37d0d2fb5a28ea094f0ce25d2cbf2091"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
twisted = ["twisted"]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "4.24.2"
|
||||
|
@ -770,4 +785,4 @@ test = ["pytest", "pytest-grpc"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "d0558513b225264653cde8153abe3425004ee3298c1407cbb2dc672fe356b2f3"
|
||||
content-hash = "9756b224be33434f1cf2d6702163ed6e6ffb37f40d608ea6be5e0a832d5e8bfe"
|
||||
|
|
|
@ -17,6 +17,7 @@ influxdb = "^5.3.1"
|
|||
influxdb-client = "^1.37.0"
|
||||
pypng = "^0.20220715.0"
|
||||
typing-extensions = "^4.7.1"
|
||||
prometheus-client = "^0.17.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.7.0"
|
||||
|
@ -32,7 +33,7 @@ build-backend = "poetry.core.masonry.api"
|
|||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
line_length = 88
|
||||
line_length = 120
|
||||
force_single_line = true
|
||||
force_sort_within_sections = true
|
||||
from_first = false
|
||||
|
|
|
@ -1,17 +1,23 @@
|
|||
#!/usr/bin/python3
|
||||
"""Prometheus exporter for Starlink user terminal data info.
|
||||
|
||||
This script pulls the current status info and/or metrics computed from the
|
||||
history data and makes it available via HTTP in the format Prometheus expects.
|
||||
"""
|
||||
|
||||
from http import HTTPStatus
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
|
||||
from http.server import ThreadingHTTPServer
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from typing import Self
|
||||
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Enum
|
||||
from prometheus_client import Gauge
|
||||
from prometheus_client import Info
|
||||
from prometheus_client import MetricsHandler
|
||||
|
||||
import starlink_grpc_tools.dish_common as dish_common
|
||||
|
||||
|
@ -25,57 +31,174 @@ def handle_sigterm(signum, frame):
|
|||
raise Terminated
|
||||
|
||||
|
||||
class MetricInfo:
|
||||
unit = ""
|
||||
kind = "gauge"
|
||||
help = ""
|
||||
common_labels = ["id"]
|
||||
|
||||
def __init__(self, unit=None, kind=None, help=None) -> None:
|
||||
if unit:
|
||||
self.unit = f"_{unit}"
|
||||
if kind:
|
||||
self.kind = kind
|
||||
if help:
|
||||
self.help = help
|
||||
pass
|
||||
|
||||
|
||||
METRICS_INFO = {
|
||||
"status_uptime": MetricInfo(unit="seconds", kind="counter"),
|
||||
"status_seconds_to_first_nonempty_slot": MetricInfo(),
|
||||
"status_pop_ping_drop_rate": MetricInfo(),
|
||||
"status_downlink_throughput_bps": MetricInfo(),
|
||||
"status_uplink_throughput_bps": MetricInfo(),
|
||||
"status_pop_ping_latency_ms": MetricInfo(),
|
||||
"status_alerts": MetricInfo(),
|
||||
"status_fraction_obstructed": MetricInfo(),
|
||||
"status_currently_obstructed": MetricInfo(),
|
||||
"status_seconds_obstructed": MetricInfo(),
|
||||
"status_obstruction_duration": MetricInfo(),
|
||||
"status_obstruction_interval": MetricInfo(),
|
||||
"status_direction_azimuth": MetricInfo(),
|
||||
"status_direction_elevation": MetricInfo(),
|
||||
"status_is_snr_above_noise_floor": MetricInfo(),
|
||||
"status_alert_motors_stuck": MetricInfo(),
|
||||
"status_alert_thermal_throttle": MetricInfo(),
|
||||
"status_alert_thermal_shutdown": MetricInfo(),
|
||||
"status_alert_mast_not_near_vertical": MetricInfo(),
|
||||
"status_alert_unexpected_location": MetricInfo(),
|
||||
"status_alert_slow_ethernet_speeds": MetricInfo(),
|
||||
"status_alert_roaming": MetricInfo(),
|
||||
"status_alert_install_pending": MetricInfo(),
|
||||
"status_alert_is_heating": MetricInfo(),
|
||||
"status_alert_power_supply_thermal_throttle": MetricInfo(),
|
||||
"status_alert_is_power_save_idle": MetricInfo(),
|
||||
"status_alert_moving_fast_while_not_aviation": MetricInfo(),
|
||||
"status_alert_moving_while_not_mobile": MetricInfo(),
|
||||
"ping_stats_samples": MetricInfo(kind="counter"),
|
||||
"ping_stats_end_counter": MetricInfo(kind="counter"),
|
||||
"usage_download_usage": MetricInfo(unit="bytes", kind="counter"),
|
||||
"usage_upload_usage": MetricInfo(unit="bytes", kind="counter"),
|
||||
}
|
||||
|
||||
STATE_VALUES = [
|
||||
METRICS: dict[str, Counter | Enum | Gauge] = {
|
||||
"status_uptime": Gauge(
|
||||
"starlink_status_uptime_seconds",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_seconds_to_first_nonempty_slot": Gauge(
|
||||
"starlink_status_seconds_to_first_nonempty_slot",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_pop_ping_drop_rate": Gauge(
|
||||
"starlink_status_pop_ping_drop_rate",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_downlink_throughput_bps": Gauge(
|
||||
"starlink_status_downlink_throughput_bps",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_uplink_throughput_bps": Gauge(
|
||||
"starlink_status_uplink_throughput_bps",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_pop_ping_latency_ms": Gauge(
|
||||
"starlink_status_pop_ping_latency_ms",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alerts": Gauge(
|
||||
"starlink_status_alerts",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_fraction_obstructed": Gauge(
|
||||
"starlink_status_fraction_obstructed",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_currently_obstructed": Gauge(
|
||||
"starlink_status_currently_obstructed",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_seconds_obstructed": Gauge(
|
||||
"starlink_status_seconds_obstructed",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_obstruction_duration": Gauge(
|
||||
"starlink_status_obstruction_duration",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_obstruction_interval": Gauge(
|
||||
"starlink_status_obstruction_interval",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_direction_azimuth": Gauge(
|
||||
"starlink_status_direction_azimuth",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_direction_elevation": Gauge(
|
||||
"starlink_status_direction_elevation",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_is_snr_above_noise_floor": Gauge(
|
||||
"starlink_status_is_snr_above_noise_floor",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_motors_stuck": Gauge(
|
||||
"starlink_status_alert_motors_stuck",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_thermal_throttle": Gauge(
|
||||
"starlink_status_alert_thermal_throttle",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_thermal_shutdown": Gauge(
|
||||
"starlink_status_alert_thermal_shutdown",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_mast_not_near_vertical": Gauge(
|
||||
"starlink_status_alert_mast_not_near_vertical",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_unexpected_location": Gauge(
|
||||
"starlink_status_alert_unexpected_location",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_slow_ethernet_speeds": Gauge(
|
||||
"starlink_status_alert_slow_ethernet_speeds",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_roaming": Gauge(
|
||||
"starlink_status_alert_roaming",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_install_pending": Gauge(
|
||||
"starlink_status_alert_install_pending",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_is_heating": Gauge(
|
||||
"starlink_status_alert_is_heating",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_power_supply_thermal_throttle": Gauge(
|
||||
"starlink_status_alert_power_supply_thermal_throttle",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_is_power_save_idle": Gauge(
|
||||
"starlink_status_alert_is_power_save_idle",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_moving_fast_while_not_aviation": Gauge(
|
||||
"starlink_status_alert_moving_fast_while_not_aviation",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_alert_moving_while_not_mobile": Gauge(
|
||||
"starlink_status_alert_moving_while_not_mobile",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"ping_stats_samples": Gauge(
|
||||
"starlink_ping_stats_samples",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"ping_stats_end_counter": Gauge(
|
||||
"starlink_ping_stats_end_counter",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"usage_download_usage": Gauge(
|
||||
"starlink_usage_download_usage_bytes",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"usage_upload_usage": Gauge(
|
||||
"starlink_usage_upload_usage_bytes",
|
||||
"",
|
||||
common_labels,
|
||||
),
|
||||
"status_state": Enum(
|
||||
"starlink_status_state",
|
||||
"",
|
||||
common_labels,
|
||||
states=[
|
||||
"UNKNOWN",
|
||||
"CONNECTED",
|
||||
"BOOTING",
|
||||
|
@ -87,57 +210,25 @@ STATE_VALUES = [
|
|||
"NO_DOWNLINK",
|
||||
"NO_PINGS",
|
||||
"DISH_UNREACHABLE",
|
||||
]
|
||||
],
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class Metric:
|
||||
name = ""
|
||||
timestamp = ""
|
||||
kind = None
|
||||
help = None
|
||||
values = None
|
||||
|
||||
def __init__(self, name, timestamp, kind="gauge", help="", values=None):
|
||||
self.name = name
|
||||
self.timestamp = timestamp
|
||||
self.kind = kind
|
||||
self.help = help
|
||||
if values:
|
||||
self.values = values
|
||||
else:
|
||||
self.values = []
|
||||
pass
|
||||
|
||||
def __str__(self):
|
||||
if not self.values:
|
||||
return ""
|
||||
|
||||
lines = []
|
||||
lines.append(f"# HELP {self.name} {self.help}")
|
||||
lines.append(f"# TYPE {self.name} {self.kind}")
|
||||
for value in self.values:
|
||||
lines.append(f"{self.name}{value} {self.timestamp*1000}")
|
||||
lines.append("")
|
||||
return str.join("\n", lines)
|
||||
|
||||
|
||||
class MetricValue:
|
||||
value = 0
|
||||
labels = None
|
||||
|
||||
def __init__(self, value, labels=None) -> None:
|
||||
self.value = value
|
||||
self.labels = labels
|
||||
|
||||
def __str__(self):
|
||||
label_str = ""
|
||||
if self.labels:
|
||||
label_str = (
|
||||
"{"
|
||||
+ str.join(",", [f'{v[0]}="{v[1]}"' for v in self.labels.items()])
|
||||
+ "}"
|
||||
info = Info(
|
||||
"starlink_info",
|
||||
"",
|
||||
# common_labels,
|
||||
)
|
||||
unprocessed_metrics = Gauge(
|
||||
"starlink_unprocessed_metrics",
|
||||
"",
|
||||
common_labels + ["metric"],
|
||||
)
|
||||
missing_metrics = Gauge(
|
||||
"starlink_missing_metrics",
|
||||
"",
|
||||
common_labels + ["metric"],
|
||||
)
|
||||
return f"{label_str} {self.value}"
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
@ -152,119 +243,76 @@ def parse_args():
|
|||
return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"])
|
||||
|
||||
|
||||
def prometheus_export(opts, gstate):
|
||||
class GatherMetrics(threading.Thread):
|
||||
def __init__(self: Self, opts, gstate, *args, **kw):
|
||||
self.opts = opts
|
||||
self.gstate = gstate
|
||||
super().__init__(*args, **kw)
|
||||
|
||||
def run(self: Self):
|
||||
while True:
|
||||
self.gather()
|
||||
time.sleep(5.0)
|
||||
|
||||
def gather(self: Self) -> None:
|
||||
raw_data = {}
|
||||
|
||||
def data_add_item(name, value, category):
|
||||
raw_data[category + "_" + name] = value
|
||||
pass
|
||||
|
||||
def data_add_sequencem(name, value, category, start):
|
||||
raise NotImplementedError("Did not expect sequence data")
|
||||
|
||||
with gstate.lock:
|
||||
rc, status_ts, hist_ts = dish_common.get_data(
|
||||
opts, gstate, data_add_item, data_add_sequencem
|
||||
raise NotImplementedError(
|
||||
f"Did not expect sequence data {name!r} {value!r} {category!r} {start!r}"
|
||||
)
|
||||
|
||||
metrics = []
|
||||
with self.gstate.lock:
|
||||
rc, status_ts, hist_ts = dish_common.get_data(
|
||||
self.opts, self.gstate, data_add_item, data_add_sequencem
|
||||
)
|
||||
|
||||
# snr is not supported by starlink any more but still returned by the grpc
|
||||
# service for backwards compatibility
|
||||
if "status_snr" in raw_data:
|
||||
del raw_data["status_snr"]
|
||||
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_status_state",
|
||||
timestamp=status_ts,
|
||||
values=[
|
||||
MetricValue(
|
||||
value=int(raw_data["status_state"] == state_value),
|
||||
labels={"state": state_value},
|
||||
)
|
||||
for state_value in STATE_VALUES
|
||||
],
|
||||
)
|
||||
)
|
||||
del raw_data["status_state"]
|
||||
status_id = raw_data.get("status_id")
|
||||
|
||||
info_metrics = ["status_id", "status_hardware_version", "status_software_version"]
|
||||
info_metrics = [
|
||||
"status_id",
|
||||
"status_hardware_version",
|
||||
"status_software_version",
|
||||
]
|
||||
metrics_not_found = []
|
||||
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
|
||||
|
||||
if len(metrics_not_found) < len(info_metrics):
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_info",
|
||||
timestamp=status_ts,
|
||||
values=[
|
||||
MetricValue(
|
||||
value=1,
|
||||
labels={
|
||||
info.info(
|
||||
{
|
||||
x.replace("status_", ""): raw_data.pop(x)
|
||||
for x in info_metrics
|
||||
if x in raw_data
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
for name, metric_info in METRICS_INFO.items():
|
||||
for name, metric_info in METRICS.items():
|
||||
if name in raw_data:
|
||||
metrics.append(
|
||||
Metric(
|
||||
name=f"starlink_{name}{metric_info.unit}",
|
||||
timestamp=status_ts,
|
||||
kind=metric_info.kind,
|
||||
values=[MetricValue(value=float(raw_data.pop(name) or 0))],
|
||||
)
|
||||
)
|
||||
match metric_info:
|
||||
case Gauge():
|
||||
metric_info.labels(id=status_id).set(raw_data.pop(name) or 0)
|
||||
|
||||
case Enum():
|
||||
metric_info.labels(id=status_id).state(raw_data.pop(name) or 0)
|
||||
|
||||
case _:
|
||||
pass
|
||||
|
||||
else:
|
||||
metrics_not_found.append(name)
|
||||
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_exporter_unprocessed_metrics",
|
||||
timestamp=status_ts,
|
||||
values=[MetricValue(value=1, labels={"metric": name}) for name in raw_data],
|
||||
)
|
||||
)
|
||||
for name in raw_data:
|
||||
unprocessed_metrics.labels(id=status_id, metric=name).set(1)
|
||||
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_exporter_missing_metrics",
|
||||
timestamp=status_ts,
|
||||
values=[
|
||||
MetricValue(
|
||||
value=1,
|
||||
labels={"metric": name},
|
||||
)
|
||||
for name in metrics_not_found
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
return str.join("\n", [str(metric) for metric in metrics])
|
||||
|
||||
|
||||
class MetricsRequestHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
path = self.path.partition("?")[0]
|
||||
if path.lower() == "/favicon.ico":
|
||||
self.send_error(HTTPStatus.NOT_FOUND)
|
||||
return
|
||||
|
||||
opts = self.server.opts
|
||||
gstate = self.server.gstate
|
||||
|
||||
content = prometheus_export(opts, gstate)
|
||||
self.send_response(HTTPStatus.OK)
|
||||
self.send_header("Content-type", "text/plain")
|
||||
self.send_header("Content-Length", len(content))
|
||||
self.end_headers()
|
||||
self.wfile.write(content.encode())
|
||||
for name in metrics_not_found:
|
||||
missing_metrics.labels(id=status_id, metric=name).set(1)
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -275,10 +323,11 @@ def main():
|
|||
gstate = dish_common.GlobalState(target=opts.target)
|
||||
gstate.lock = threading.Lock()
|
||||
|
||||
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsRequestHandler)
|
||||
gather = GatherMetrics(opts, gstate)
|
||||
gather.start()
|
||||
|
||||
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsHandler)
|
||||
httpd.daemon_threads = False
|
||||
httpd.opts = opts
|
||||
httpd.gstate = gstate
|
||||
|
||||
signal.signal(signal.SIGTERM, handle_sigterm)
|
||||
|
||||
|
@ -289,7 +338,7 @@ def main():
|
|||
pass
|
||||
finally:
|
||||
httpd.server_close()
|
||||
httpd.gstate.shutdown()
|
||||
gstate.shutdown()
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
|
Loading…
Reference in a new issue