2022-11-15 10:45:24 -06:00
|
|
|
"""Prometheus exporter for Starlink user terminal data info.
|
|
|
|
|
|
|
|
This script pulls the current status info and/or metrics computed from the
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
history data and makes it available via HTTP in the format Prometheus expects.
|
2022-11-15 10:45:24 -06:00
|
|
|
"""
|
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
|
2023-08-31 22:15:04 -05:00
|
|
|
from http.server import ThreadingHTTPServer
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
import logging
|
|
|
|
import signal
|
|
|
|
import sys
|
|
|
|
import threading
|
2023-09-01 13:10:25 -05:00
|
|
|
import time
|
|
|
|
from typing import Self
|
|
|
|
|
|
|
|
from prometheus_client import Counter
|
|
|
|
from prometheus_client import Enum
|
|
|
|
from prometheus_client import Gauge
|
|
|
|
from prometheus_client import Info
|
|
|
|
from prometheus_client import MetricsHandler
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-08-31 22:15:04 -05:00
|
|
|
import starlink_grpc_tools.dish_common as dish_common
|
2022-11-15 10:45:24 -06:00
|
|
|
|
|
|
|
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
class Terminated(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def handle_sigterm(signum, frame):
|
|
|
|
# Turn SIGTERM into an exception so main loop can clean up
|
|
|
|
raise Terminated
|
|
|
|
|
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
common_labels = ["id"]
|
|
|
|
|
|
|
|
METRICS: dict[str, Counter | Enum | Gauge] = {
|
|
|
|
"status_uptime": Gauge(
|
|
|
|
"starlink_status_uptime_seconds",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_seconds_to_first_nonempty_slot": Gauge(
|
|
|
|
"starlink_status_seconds_to_first_nonempty_slot",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_pop_ping_drop_rate": Gauge(
|
|
|
|
"starlink_status_pop_ping_drop_rate",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_downlink_throughput_bps": Gauge(
|
|
|
|
"starlink_status_downlink_throughput_bps",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_uplink_throughput_bps": Gauge(
|
|
|
|
"starlink_status_uplink_throughput_bps",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_pop_ping_latency_ms": Gauge(
|
|
|
|
"starlink_status_pop_ping_latency_ms",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alerts": Gauge(
|
|
|
|
"starlink_status_alerts",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_fraction_obstructed": Gauge(
|
|
|
|
"starlink_status_fraction_obstructed",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_currently_obstructed": Gauge(
|
|
|
|
"starlink_status_currently_obstructed",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_seconds_obstructed": Gauge(
|
|
|
|
"starlink_status_seconds_obstructed",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_obstruction_duration": Gauge(
|
|
|
|
"starlink_status_obstruction_duration",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_obstruction_interval": Gauge(
|
|
|
|
"starlink_status_obstruction_interval",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_direction_azimuth": Gauge(
|
|
|
|
"starlink_status_direction_azimuth",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_direction_elevation": Gauge(
|
|
|
|
"starlink_status_direction_elevation",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_is_snr_above_noise_floor": Gauge(
|
|
|
|
"starlink_status_is_snr_above_noise_floor",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_motors_stuck": Gauge(
|
|
|
|
"starlink_status_alert_motors_stuck",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_thermal_throttle": Gauge(
|
|
|
|
"starlink_status_alert_thermal_throttle",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_thermal_shutdown": Gauge(
|
|
|
|
"starlink_status_alert_thermal_shutdown",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_mast_not_near_vertical": Gauge(
|
|
|
|
"starlink_status_alert_mast_not_near_vertical",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_unexpected_location": Gauge(
|
|
|
|
"starlink_status_alert_unexpected_location",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_slow_ethernet_speeds": Gauge(
|
|
|
|
"starlink_status_alert_slow_ethernet_speeds",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_roaming": Gauge(
|
|
|
|
"starlink_status_alert_roaming",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_install_pending": Gauge(
|
|
|
|
"starlink_status_alert_install_pending",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_is_heating": Gauge(
|
|
|
|
"starlink_status_alert_is_heating",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_power_supply_thermal_throttle": Gauge(
|
|
|
|
"starlink_status_alert_power_supply_thermal_throttle",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_is_power_save_idle": Gauge(
|
|
|
|
"starlink_status_alert_is_power_save_idle",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_moving_fast_while_not_aviation": Gauge(
|
|
|
|
"starlink_status_alert_moving_fast_while_not_aviation",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_alert_moving_while_not_mobile": Gauge(
|
|
|
|
"starlink_status_alert_moving_while_not_mobile",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"ping_stats_samples": Gauge(
|
|
|
|
"starlink_ping_stats_samples",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"ping_stats_end_counter": Gauge(
|
|
|
|
"starlink_ping_stats_end_counter",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"usage_download_usage": Gauge(
|
|
|
|
"starlink_usage_download_usage_bytes",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"usage_upload_usage": Gauge(
|
|
|
|
"starlink_usage_upload_usage_bytes",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
),
|
|
|
|
"status_state": Enum(
|
|
|
|
"starlink_status_state",
|
|
|
|
"",
|
|
|
|
common_labels,
|
|
|
|
states=[
|
|
|
|
"UNKNOWN",
|
|
|
|
"CONNECTED",
|
|
|
|
"BOOTING",
|
|
|
|
"SEARCHING",
|
|
|
|
"STOWED",
|
|
|
|
"THERMAL_SHUTDOWN",
|
|
|
|
"NO_SATS",
|
|
|
|
"OBSTRUCTED",
|
|
|
|
"NO_DOWNLINK",
|
|
|
|
"NO_PINGS",
|
|
|
|
"DISH_UNREACHABLE",
|
|
|
|
],
|
|
|
|
),
|
2022-11-15 10:45:24 -06:00
|
|
|
}
|
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
info = Info(
|
|
|
|
"starlink_info",
|
|
|
|
"",
|
|
|
|
# common_labels,
|
|
|
|
)
|
|
|
|
unprocessed_metrics = Gauge(
|
|
|
|
"starlink_unprocessed_metrics",
|
|
|
|
"",
|
|
|
|
common_labels + ["metric"],
|
|
|
|
)
|
|
|
|
missing_metrics = Gauge(
|
|
|
|
"starlink_missing_metrics",
|
|
|
|
"",
|
|
|
|
common_labels + ["metric"],
|
|
|
|
)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
|
|
|
|
|
|
|
def parse_args():
|
2023-08-31 22:15:04 -05:00
|
|
|
parser = dish_common.create_arg_parser(
|
|
|
|
output_description="Prometheus exporter", bulk_history=False
|
|
|
|
)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
|
|
|
group = parser.add_argument_group(title="HTTP server options")
|
|
|
|
group.add_argument("--address", default="0.0.0.0", help="IP address to listen on")
|
|
|
|
group.add_argument("--port", default=8080, type=int, help="Port to listen on")
|
|
|
|
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
return dish_common.run_arg_parser(parser, modes=["status", "alert_detail", "usage"])
|
2022-11-15 10:45:24 -06:00
|
|
|
|
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
class GatherMetrics(threading.Thread):
|
|
|
|
def __init__(self: Self, opts, gstate, *args, **kw):
|
|
|
|
self.opts = opts
|
|
|
|
self.gstate = gstate
|
|
|
|
super().__init__(*args, **kw)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
def run(self: Self):
|
|
|
|
while True:
|
|
|
|
self.gather()
|
|
|
|
time.sleep(5.0)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
def gather(self: Self) -> None:
|
|
|
|
raw_data = {}
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
def data_add_item(name, value, category):
|
|
|
|
raw_data[category + "_" + name] = value
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
def data_add_sequencem(name, value, category, start):
|
|
|
|
raise NotImplementedError(
|
|
|
|
f"Did not expect sequence data {name!r} {value!r} {category!r} {start!r}"
|
2023-08-31 22:15:04 -05:00
|
|
|
)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
with self.gstate.lock:
|
|
|
|
rc, status_ts, hist_ts = dish_common.get_data(
|
|
|
|
self.opts, self.gstate, data_add_item, data_add_sequencem
|
2023-08-31 22:15:04 -05:00
|
|
|
)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
# snr is not supported by starlink any more but still returned by the grpc
|
|
|
|
# service for backwards compatibility
|
|
|
|
if "status_snr" in raw_data:
|
|
|
|
del raw_data["status_snr"]
|
|
|
|
|
|
|
|
status_id = raw_data.get("status_id")
|
|
|
|
|
|
|
|
info_metrics = [
|
|
|
|
"status_id",
|
|
|
|
"status_hardware_version",
|
|
|
|
"status_software_version",
|
|
|
|
]
|
|
|
|
metrics_not_found = []
|
|
|
|
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
|
|
|
|
|
|
|
|
info.info(
|
|
|
|
{
|
|
|
|
x.replace("status_", ""): raw_data.pop(x)
|
|
|
|
for x in info_metrics
|
|
|
|
if x in raw_data
|
|
|
|
}
|
2023-08-31 22:15:04 -05:00
|
|
|
)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
for name, metric_info in METRICS.items():
|
|
|
|
if name in raw_data:
|
|
|
|
match metric_info:
|
|
|
|
case Gauge():
|
|
|
|
metric_info.labels(id=status_id).set(raw_data.pop(name) or 0)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
case Enum():
|
|
|
|
metric_info.labels(id=status_id).state(raw_data.pop(name) or 0)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
case _:
|
|
|
|
pass
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
else:
|
|
|
|
metrics_not_found.append(name)
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
for name in raw_data:
|
|
|
|
unprocessed_metrics.labels(id=status_id, metric=name).set(1)
|
|
|
|
|
|
|
|
for name in metrics_not_found:
|
|
|
|
missing_metrics.labels(id=status_id, metric=name).set(1)
|
2022-11-15 10:45:24 -06:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
opts = parse_args()
|
2022-11-15 10:45:24 -06:00
|
|
|
|
|
|
|
logging.basicConfig(format="%(levelname)s: %(message)s", stream=sys.stderr)
|
|
|
|
|
|
|
|
gstate = dish_common.GlobalState(target=opts.target)
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
gstate.lock = threading.Lock()
|
2022-11-15 10:45:24 -06:00
|
|
|
|
2023-09-01 13:10:25 -05:00
|
|
|
gather = GatherMetrics(opts, gstate)
|
|
|
|
gather.start()
|
|
|
|
|
|
|
|
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsHandler)
|
A few tweaks to the Prometheus exporter script
Move the global state onto the http server object so it doesn't have to be accessed as module globals.
Limit the mode groups that can be selected via command line args to the ones that are actually parsed. There are a few other options added in dish_common that don't really apply to this script, but they are mostly harmless, whereas some of the other mode groups will cause this script to throw an exception.
Reject access to "/favicon.ico" path, so testing from a web browser does not result in running the dish queries twice, and thus confusing the global state a little.
Add a lock to serialize calls to dish_common.get_data. That function is not thread-safe, even with CPython's Global Interpreter Lock, because the starlink_grpc functions it calls block. This script is really not meant for concurrent HTTP access, given that the usage stats are reported as usage since last access (by default), but since it's technically supported, might as well have it work properly.
Add the same handling of keyboard interrupt (Ctrl-C) and SIGTERM signal as the other grpc scripts, along with proper shutdown.
2022-12-21 12:25:57 -06:00
|
|
|
httpd.daemon_threads = False
|
|
|
|
|
|
|
|
signal.signal(signal.SIGTERM, handle_sigterm)
|
|
|
|
|
|
|
|
print("HTTP listening on port", opts.port)
|
|
|
|
try:
|
|
|
|
httpd.serve_forever()
|
|
|
|
except (KeyboardInterrupt, Terminated):
|
|
|
|
pass
|
|
|
|
finally:
|
|
|
|
httpd.server_close()
|
2023-09-01 13:10:25 -05:00
|
|
|
gstate.shutdown()
|
2022-11-15 10:45:24 -06:00
|
|
|
|
|
|
|
sys.exit()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|