starlink-grpc-tools/starlink_grpc_tools/dish_grpc_prometheus.py

348 lines
9 KiB
Python
Raw Normal View History

2022-11-15 10:45:24 -06:00
"""Prometheus exporter for Starlink user terminal data info.
This script pulls the current status info and/or metrics computed from the
history data and makes it available via HTTP in the format Prometheus expects.
2022-11-15 10:45:24 -06:00
"""
2023-09-01 13:10:25 -05:00
2023-08-31 22:15:04 -05:00
from http.server import ThreadingHTTPServer
import logging
import signal
import sys
import threading
2023-09-01 13:10:25 -05:00
import time
from typing import Self
from prometheus_client import Counter
from prometheus_client import Enum
from prometheus_client import Gauge
from prometheus_client import Info
from prometheus_client import MetricsHandler
2022-11-15 10:45:24 -06:00
2023-08-31 22:15:04 -05:00
import starlink_grpc_tools.dish_common as dish_common
2022-11-15 10:45:24 -06:00
class Terminated(Exception):
pass
def handle_sigterm(signum, frame):
# Turn SIGTERM into an exception so main loop can clean up
raise Terminated
2023-09-01 13:10:25 -05:00
common_labels = ["id"]
METRICS: dict[str, Counter | Enum | Gauge] = {
"status_uptime": Gauge(
"starlink_status_uptime_seconds",
"",
common_labels,
),
"status_seconds_to_first_nonempty_slot": Gauge(
"starlink_status_seconds_to_first_nonempty_slot",
"",
common_labels,
),
"status_pop_ping_drop_rate": Gauge(
"starlink_status_pop_ping_drop_rate",
"",
common_labels,
),
"status_downlink_throughput_bps": Gauge(
"starlink_status_downlink_throughput_bps",
"",
common_labels,
),
"status_uplink_throughput_bps": Gauge(
"starlink_status_uplink_throughput_bps",
"",
common_labels,
),
"status_pop_ping_latency_ms": Gauge(
"starlink_status_pop_ping_latency_ms",
"",
common_labels,
),
"status_alerts": Gauge(
"starlink_status_alerts",
"",
common_labels,
),
"status_fraction_obstructed": Gauge(
"starlink_status_fraction_obstructed",
"",
common_labels,
),
"status_currently_obstructed": Gauge(
"starlink_status_currently_obstructed",
"",
common_labels,
),
"status_seconds_obstructed": Gauge(
"starlink_status_seconds_obstructed",
"",
common_labels,
),
"status_obstruction_duration": Gauge(
"starlink_status_obstruction_duration",
"",
common_labels,
),
"status_obstruction_interval": Gauge(
"starlink_status_obstruction_interval",
"",
common_labels,
),
"status_direction_azimuth": Gauge(
"starlink_status_direction_azimuth",
"",
common_labels,
),
"status_direction_elevation": Gauge(
"starlink_status_direction_elevation",
"",
common_labels,
),
"status_is_snr_above_noise_floor": Gauge(
"starlink_status_is_snr_above_noise_floor",
"",
common_labels,
),
"status_alert_motors_stuck": Gauge(
"starlink_status_alert_motors_stuck",
"",
common_labels,
),
"status_alert_thermal_throttle": Gauge(
"starlink_status_alert_thermal_throttle",
"",
common_labels,
),
"status_alert_thermal_shutdown": Gauge(
"starlink_status_alert_thermal_shutdown",
"",
common_labels,
),
"status_alert_mast_not_near_vertical": Gauge(
"starlink_status_alert_mast_not_near_vertical",
"",
common_labels,
),
"status_alert_unexpected_location": Gauge(
"starlink_status_alert_unexpected_location",
"",
common_labels,
),
"status_alert_slow_ethernet_speeds": Gauge(
"starlink_status_alert_slow_ethernet_speeds",
"",
common_labels,
),
"status_alert_roaming": Gauge(
"starlink_status_alert_roaming",
"",
common_labels,
),
"status_alert_install_pending": Gauge(
"starlink_status_alert_install_pending",
"",
common_labels,
),
"status_alert_is_heating": Gauge(
"starlink_status_alert_is_heating",
"",
common_labels,
),
"status_alert_power_supply_thermal_throttle": Gauge(
"starlink_status_alert_power_supply_thermal_throttle",
"",
common_labels,
),
"status_alert_is_power_save_idle": Gauge(
"starlink_status_alert_is_power_save_idle",
"",
common_labels,
),
"status_alert_moving_fast_while_not_aviation": Gauge(
"starlink_status_alert_moving_fast_while_not_aviation",
"",
common_labels,
),
"status_alert_moving_while_not_mobile": Gauge(
"starlink_status_alert_moving_while_not_mobile",
"",
common_labels,
),
"ping_stats_samples": Gauge(
"starlink_ping_stats_samples",
"",
common_labels,
),
"ping_stats_end_counter": Gauge(
"starlink_ping_stats_end_counter",
"",
common_labels,
),
"usage_download_usage": Gauge(
"starlink_usage_download_usage_bytes",
"",
common_labels,
),
"usage_upload_usage": Gauge(
"starlink_usage_upload_usage_bytes",
"",
common_labels,
),
"status_state": Enum(
"starlink_status_state",
"",
common_labels,
states=[
"UNKNOWN",
"CONNECTED",
"BOOTING",
"SEARCHING",
"STOWED",
"THERMAL_SHUTDOWN",
"NO_SATS",
"OBSTRUCTED",
"NO_DOWNLINK",
"NO_PINGS",
"DISH_UNREACHABLE",
],
),
2022-11-15 10:45:24 -06:00
}
2023-09-01 13:10:25 -05:00
info = Info(
"starlink_info",
"",
# common_labels,
)
unprocessed_metrics = Gauge(
"starlink_unprocessed_metrics",
"",
common_labels + ["metric"],
)
missing_metrics = Gauge(
"starlink_missing_metrics",
"",
common_labels + ["metric"],
)
2022-11-15 10:45:24 -06:00
def parse_args():
2023-08-31 22:15:04 -05:00
parser = dish_common.create_arg_parser(
output_description="Prometheus exporter", bulk_history=False
)
2022-11-15 10:45:24 -06:00
group = parser.add_argument_group(title="HTTP server options")
group.add_argument("--address", default="0.0.0.0", help="IP address to listen on")
group.add_argument("--port", default=8080, type=int, help="Port to listen on")
return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"])
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
class GatherMetrics(threading.Thread):
def __init__(self: Self, opts, gstate, *args, **kw):
self.opts = opts
self.gstate = gstate
super().__init__(*args, **kw)
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
def run(self: Self):
while True:
self.gather()
time.sleep(5.0)
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
def gather(self: Self) -> None:
raw_data = {}
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
def data_add_item(name, value, category):
raw_data[category + "_" + name] = value
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
def data_add_sequencem(name, value, category, start):
raise NotImplementedError(
f"Did not expect sequence data {name!r} {value!r} {category!r} {start!r}"
2023-08-31 22:15:04 -05:00
)
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
with self.gstate.lock:
rc, status_ts, hist_ts = dish_common.get_data(
self.opts, self.gstate, data_add_item, data_add_sequencem
2023-08-31 22:15:04 -05:00
)
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
# snr is not supported by starlink any more but still returned by the grpc
# service for backwards compatibility
if "status_snr" in raw_data:
del raw_data["status_snr"]
status_id = raw_data.get("status_id")
info_metrics = [
"status_id",
"status_hardware_version",
"status_software_version",
]
metrics_not_found = []
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
info.info(
{
x.replace("status_", ""): raw_data.pop(x)
for x in info_metrics
if x in raw_data
}
2023-08-31 22:15:04 -05:00
)
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
for name, metric_info in METRICS.items():
if name in raw_data:
match metric_info:
case Gauge():
metric_info.labels(id=status_id).set(raw_data.pop(name) or 0)
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
case Enum():
metric_info.labels(id=status_id).state(raw_data.pop(name) or 0)
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
case _:
pass
2023-09-01 13:10:25 -05:00
else:
metrics_not_found.append(name)
2023-09-01 13:10:25 -05:00
for name in raw_data:
unprocessed_metrics.labels(id=status_id, metric=name).set(1)
for name in metrics_not_found:
missing_metrics.labels(id=status_id, metric=name).set(1)
2022-11-15 10:45:24 -06:00
def main():
opts = parse_args()
2022-11-15 10:45:24 -06:00
logging.basicConfig(format="%(levelname)s: %(message)s", stream=sys.stderr)
gstate = dish_common.GlobalState(target=opts.target)
gstate.lock = threading.Lock()
2022-11-15 10:45:24 -06:00
2023-09-01 13:10:25 -05:00
gather = GatherMetrics(opts, gstate)
gather.start()
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsHandler)
httpd.daemon_threads = False
signal.signal(signal.SIGTERM, handle_sigterm)
print("HTTP listening on port", opts.port)
try:
httpd.serve_forever()
except (KeyboardInterrupt, Terminated):
pass
finally:
httpd.server_close()
2023-09-01 13:10:25 -05:00
gstate.shutdown()
2022-11-15 10:45:24 -06:00
sys.exit()
if __name__ == "__main__":
main()