Merge pull request #71 from luxifr/prom-exporter
add prometheus exporter
This commit is contained in:
commit
ffd950c30c
2 changed files with 274 additions and 1 deletions
|
@ -38,10 +38,13 @@ Of the 3 groups below, the grpc scripts are really the only ones being actively
|
|||
|
||||
### The grpc scripts
|
||||
|
||||
This set of scripts includes `dish_grpc_text.py`, `dish_grpc_influx.py`, `dish_grpc_influx2.py`, `dish_grpc_sqlite.py`, and `dish_grpc_mqtt.py`. They mostly support the same functionality, but write their output in different ways. `dish_grpc_text.py` writes data to standard output, `dish_grpc_influx.py` and `dish_grpc_influx2.py` send it to an InfluxDB 1.x and 2.x server, respectively, `dish_grpc_sqlite.py` writes it to a sqlite database, and `dish_grpc_mqtt.py` sends it to a MQTT broker.
|
||||
This set of scripts includes `dish_grpc_text.py`, `dish_grpc_influx.py`, `dish_grpc_influx2.py`, `dish_grpc_sqlite.py`, `dish_grpc_mqtt.py`, and `dish_grpc_prometheus.py`. They mostly support the same functionality, but write their output in different ways. `dish_grpc_text.py` writes data to standard output, `dish_grpc_influx.py` and `dish_grpc_influx2.py` send it to an InfluxDB 1.x and 2.x server, respectively, `dish_grpc_sqlite.py` writes it to a sqlite database, and `dish_grpc_mqtt.py` sends it to a MQTT broker. `dish_grpc_prometheus.py` does not write anywhere but will listen for HTTP requests and
|
||||
return data in a format Prometheus can scrape.
|
||||
|
||||
All these scripts support processing status data and/or history data in various modes. The status data is mostly what appears related to the dish in the Debug Data section of the Starlink app, whereas most of the data displayed in the Statistics page of the Starlink app comes from the history data. Specific status or history data groups can be selected by including their mode names on the command line. Run the scripts with `-h` command line option to get a list of available modes. See the documentation at the top of `starlink_grpc.py` for detail on what each of the fields means within each mode group.
|
||||
|
||||
`dish_grpc_prometheus.py` has only been tested with the modes `status`, `usage`, and `alert_detail`.
|
||||
|
||||
For example, data from all the currently available status groups can be output by doing:
|
||||
```shell script
|
||||
python3 dish_grpc_text.py status obstruction_detail alert_detail
|
||||
|
|
270
dish_grpc_prometheus.py
Normal file
270
dish_grpc_prometheus.py
Normal file
|
@ -0,0 +1,270 @@
|
|||
#!/usr/bin/python3
|
||||
"""Prometheus exporter for Starlink user terminal data info.
|
||||
|
||||
This script pulls the current status info and/or metrics computed from the
|
||||
history data and makes it available via HTTP in the format Prometeus expects.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from http import HTTPStatus
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
|
||||
import dish_common
|
||||
|
||||
|
||||
class MetricInfo:
|
||||
unit = ""
|
||||
kind = "gauge"
|
||||
help = ""
|
||||
|
||||
def __init__(self, unit=None, kind=None, help=None) -> None:
|
||||
if unit:
|
||||
self.unit = f"_{unit}"
|
||||
if kind:
|
||||
self.kind = kind
|
||||
if help:
|
||||
self.help = help
|
||||
pass
|
||||
|
||||
|
||||
METRICS_INFO = {
|
||||
"status_uptime": MetricInfo(unit="seconds", kind="counter"),
|
||||
"status_seconds_to_first_nonempty_slot": MetricInfo(),
|
||||
"status_pop_ping_drop_rate": MetricInfo(),
|
||||
"status_downlink_throughput_bps": MetricInfo(),
|
||||
"status_uplink_throughput_bps": MetricInfo(),
|
||||
"status_pop_ping_latency_ms": MetricInfo(),
|
||||
"status_alerts": MetricInfo(),
|
||||
"status_fraction_obstructed": MetricInfo(),
|
||||
"status_currently_obstructed": MetricInfo(),
|
||||
"status_seconds_obstructed": MetricInfo(),
|
||||
"status_obstruction_duration": MetricInfo(),
|
||||
"status_obstruction_interval": MetricInfo(),
|
||||
"status_direction_azimuth": MetricInfo(),
|
||||
"status_direction_elevation": MetricInfo(),
|
||||
"status_is_snr_above_noise_floor": MetricInfo(),
|
||||
"status_alert_motors_stuck": MetricInfo(),
|
||||
"status_alert_thermal_throttle": MetricInfo(),
|
||||
"status_alert_thermal_shutdown": MetricInfo(),
|
||||
"status_alert_mast_not_near_vertical": MetricInfo(),
|
||||
"status_alert_unexpected_location": MetricInfo(),
|
||||
"status_alert_slow_ethernet_speeds": MetricInfo(),
|
||||
"status_alert_roaming": MetricInfo(),
|
||||
"status_alert_install_pending": MetricInfo(),
|
||||
"status_alert_is_heating": MetricInfo(),
|
||||
"status_alert_power_supply_thermal_throttle": MetricInfo(),
|
||||
"ping_stats_samples": MetricInfo(kind="counter"),
|
||||
"ping_stats_end_counter": MetricInfo(kind="counter"),
|
||||
"usage_download_usage": MetricInfo(unit="bytes", kind="counter"),
|
||||
"usage_upload_usage": MetricInfo(unit="bytes", kind="counter"),
|
||||
}
|
||||
|
||||
STATE_VALUES = [
|
||||
"UNKNOWN",
|
||||
"CONNECTED",
|
||||
"BOOTING",
|
||||
"SEARCHING",
|
||||
"STOWED",
|
||||
"THERMAL_SHUTDOWN",
|
||||
"NO_SATS",
|
||||
"OBSTRUCTED",
|
||||
"NO_DOWNLINK",
|
||||
"NO_PINGS",
|
||||
"DISH_UNREACHABLE",
|
||||
]
|
||||
|
||||
|
||||
class Metric:
|
||||
name = ""
|
||||
timestamp = ""
|
||||
kind = None
|
||||
help = None
|
||||
values = None
|
||||
|
||||
def __init__(self, name, timestamp, kind="gauge", help="", values=None):
|
||||
self.name = name
|
||||
self.timestamp = timestamp
|
||||
self.kind = kind
|
||||
self.help = help
|
||||
if values:
|
||||
self.values = values
|
||||
else:
|
||||
self.values = []
|
||||
pass
|
||||
|
||||
def __str__(self):
|
||||
if not self.values:
|
||||
return ""
|
||||
|
||||
lines = []
|
||||
lines.append(f"# HELP {self.name} {self.help}")
|
||||
lines.append(f"# TYPE {self.name} {self.kind}")
|
||||
for value in self.values:
|
||||
lines.append(f"{self.name}{value} {self.timestamp*1000}")
|
||||
lines.append("")
|
||||
return str.join("\n", lines)
|
||||
|
||||
|
||||
class MetricValue:
|
||||
value = 0
|
||||
labels = None
|
||||
|
||||
def __init__(self, value, labels=None) -> None:
|
||||
self.value = value
|
||||
self.labels = labels
|
||||
|
||||
def __str__(self):
|
||||
label_str = ""
|
||||
if self.labels:
|
||||
label_str = (
|
||||
"{"
|
||||
+ str.join(",", [f'{v[0]}="{v[1]}"' for v in self.labels.items()])
|
||||
+ "}"
|
||||
)
|
||||
return f"{label_str} {self.value}"
|
||||
|
||||
|
||||
opts = None
|
||||
gstate = None
|
||||
|
||||
|
||||
def parse_args():
|
||||
global opts
|
||||
|
||||
parser = dish_common.create_arg_parser(
|
||||
output_description="Prometheus exporter", bulk_history=False
|
||||
)
|
||||
|
||||
group = parser.add_argument_group(title="HTTP server options")
|
||||
group.add_argument("--address", default="0.0.0.0", help="IP address to listen on")
|
||||
group.add_argument("--port", default=8080, type=int, help="Port to listen on")
|
||||
|
||||
opts = dish_common.run_arg_parser(parser)
|
||||
|
||||
|
||||
def prometheus_export():
|
||||
global opts, gstate
|
||||
|
||||
raw_data = {}
|
||||
|
||||
def data_add_item(name, value, category):
|
||||
raw_data[category + "_" + name] = value
|
||||
pass
|
||||
|
||||
def data_add_sequencem(name, value, category, start):
|
||||
raise NotImplementedError("Did not expect sequence data")
|
||||
|
||||
rc, status_ts, hist_ts = dish_common.get_data(
|
||||
opts, gstate, data_add_item, data_add_sequencem
|
||||
)
|
||||
|
||||
metrics = []
|
||||
|
||||
# snr is not supported by starlink any more but still returned by the grpc
|
||||
# service for backwards compatibility
|
||||
if "status_snr" in raw_data:
|
||||
del raw_data["status_snr"]
|
||||
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_status_state",
|
||||
timestamp=status_ts,
|
||||
values=[
|
||||
MetricValue(
|
||||
value=int(raw_data["status_state"] == state_value),
|
||||
labels={"state": state_value},
|
||||
)
|
||||
for state_value in STATE_VALUES
|
||||
],
|
||||
)
|
||||
)
|
||||
del raw_data["status_state"]
|
||||
|
||||
info_metrics = ["status_id", "status_hardware_version", "status_software_version"]
|
||||
metrics_not_found = []
|
||||
metrics_not_found.extend([x for x in info_metrics if x not in raw_data])
|
||||
|
||||
if len(metrics_not_found) < len(info_metrics):
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_info",
|
||||
timestamp=status_ts,
|
||||
values=[
|
||||
MetricValue(
|
||||
value=1,
|
||||
labels={
|
||||
x.replace("status_", ""): raw_data.pop(x)
|
||||
for x in info_metrics
|
||||
if x in raw_data
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
for name, metric_info in METRICS_INFO.items():
|
||||
if name in raw_data:
|
||||
metrics.append(
|
||||
Metric(
|
||||
name=f"starlink_{name}{metric_info.unit}",
|
||||
timestamp=status_ts,
|
||||
kind=metric_info.kind,
|
||||
values=[MetricValue(value=float(raw_data.pop(name) or 0))],
|
||||
)
|
||||
)
|
||||
else:
|
||||
metrics_not_found.append(name)
|
||||
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_exporter_unprocessed_metrics",
|
||||
timestamp=status_ts,
|
||||
values=[MetricValue(value=1, labels={"metric": name}) for name in raw_data],
|
||||
)
|
||||
)
|
||||
|
||||
metrics.append(
|
||||
Metric(
|
||||
name="starlink_exporter_missing_metrics",
|
||||
timestamp=status_ts,
|
||||
values=[
|
||||
MetricValue(
|
||||
value=1,
|
||||
labels={"metric": name},
|
||||
)
|
||||
for name in metrics_not_found
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
return str.join("\n", [str(metric) for metric in metrics])
|
||||
|
||||
|
||||
class MetricsRequestHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
content = prometheus_export()
|
||||
self.send_response(HTTPStatus.OK)
|
||||
self.send_header("Content-type", "text/plain")
|
||||
self.send_header("Content-Length", len(content))
|
||||
self.end_headers()
|
||||
self.wfile.write(content.encode())
|
||||
|
||||
|
||||
def main():
|
||||
global opts, gstate
|
||||
|
||||
parse_args()
|
||||
|
||||
logging.basicConfig(format="%(levelname)s: %(message)s", stream=sys.stderr)
|
||||
|
||||
gstate = dish_common.GlobalState(target=opts.target)
|
||||
|
||||
httpd = ThreadingHTTPServer((opts.address, opts.port), MetricsRequestHandler)
|
||||
httpd.serve_forever()
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in a new issue