add missing routeros tools from dinotools

This commit is contained in:
sepehr 2024-08-09 12:48:59 +03:30
parent e4b6fed5f2
commit 60c4f1870f
35 changed files with 5085 additions and 1 deletions

View file

@ -0,0 +1,2 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later

View file

@ -0,0 +1,450 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Any, Dict, List, Optional, Union
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext, ScalarPercentContext
from ..helper import escape_filename, logger
from ..resource import RouterOSCheckResource
class InterfaceResource(RouterOSCheckResource):
name = "Interface"
def __init__(
self,
cmd_options: Dict[str, Any],
check: nagiosplugin.Check,
names: List[str],
regex: bool,
single_interface: bool,
ignore_disabled: bool,
cookie_filename: str,
warning_values: List[str],
critical_values: List[str],
override_values: List[str],
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._interface_data: Optional[Dict[str, Any]] = None
self.names: List[Union[Any]] = names
self.regex = regex
if self.regex:
regex_names = []
for name in names:
regex_names.append(re.compile(name))
self.names = regex_names
self.single_interface = single_interface
self.ignore_disabled = ignore_disabled
self.cookie_filename = cookie_filename
self._parsed_warning_values: Dict[str, str] = self.prepare_thresholds(warning_values)
self._parsed_critical_values: Dict[str, str] = self.prepare_thresholds(critical_values)
self._parsed_override_values: Dict[str, str] = self.prepare_override_values(override_values)
self._routeros_metric_values = [
# Later values depend on the speed
{
"name": "speed",
"missing_ok": True,
"dst_value_name": "speed-byte",
"type": self.parse_routeros_speed,
"factor": 1 / 8,
"no_metric": True,
},
{
"name": "speed",
"missing_ok": True,
"type": self.parse_routeros_speed,
"min": 0,
},
{
"name": "disabled",
"type": bool,
"context_class": None,
},
{
"name": "running",
"type": bool,
"context_class": None,
},
{
"name": "actual-mtu",
"type": int,
"min": 0,
},
{
"name": "fp-rx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "fp-rx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "fp-tx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "fp-tx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "l2mtu",
"type": int,
"min": 0,
# CHR devices don't report l2mtu
"missing_ok": True,
},
{
"name": "link-downs",
"type": int,
"min": 0,
"uom": "c",
},
# {"name": "mtu", "type": int, "min": 0},
{
"name": "rx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "rx-drop",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "rx-error",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "rx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "tx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
},
{
"name": "tx-drop",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "tx-error",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "tx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "tx-queue-drop",
"type": int,
"min": 0,
"uom": "c",
"rate": True
},
]
def _add_contexts(self, name, values, metric_prefix=""):
self._check.add(
InterfaceDisabledContext(f"{metric_prefix.format(name=name)}disabled", interface_name=name),
InterfaceRunningContext(f"{metric_prefix.format(name=name)}running", interface_name=name),
)
custom_metric_names = ["disabled", "running"]
for metric_value in self._routeros_metric_values:
metric_value_name = metric_value.get("dst", metric_value["name"])
if metric_value_name in custom_metric_names:
continue
if metric_value.get("no_metric"):
continue
context_class = metric_value.get("context_class", nagiosplugin.ScalarContext)
self._check.add(
context_class(
f"{metric_prefix.format(name=name)}{metric_value_name}",
warning=self._parsed_warning_values.get(metric_value["name"]),
critical=self._parsed_critical_values.get(metric_value["name"]),
)
)
if metric_value.get("rate"):
rate_percent_total_name = metric_value.get("rate_percent_total_name")
rate_total_value = None
if rate_percent_total_name:
rate_total_value = values.get(rate_percent_total_name)
if rate_total_value is not None:
rate_context_class_percent = metric_value.get("context_class", ScalarPercentContext)
self._check.add(
rate_context_class_percent(
name=f"{metric_prefix.format(name=name)}{metric_value_name}_rate",
total_value=rate_total_value,
warning=self._parsed_warning_values.get(f"{metric_value['name']}_rate"),
critical=self._parsed_critical_values.get(f"{metric_value['name']}_rate"),
)
)
else:
rate_context_class = metric_value.get("context_class", nagiosplugin.ScalarContext)
self._check.add(
rate_context_class(
name=f"{metric_prefix.format(name=name)}{metric_value_name}_rate",
warning=self._parsed_warning_values.get(metric_value["name"]),
critical=self._parsed_critical_values.get(metric_value["name"]),
)
)
def fetch_data(self) -> Dict[str, Dict]:
if self._interface_data:
return self._interface_data
api = self._connect_api()
logger.info("Fetching data ...")
interface_ethernet_data = {}
call = api.path(
"/interface/ethernet"
)
call_results = tuple(call)
for result in call_results:
interface_ethernet_data[result["name"]] = {
"speed": result["speed"],
}
call = api.path(
"/interface"
)
call_results = tuple(call)
self._interface_data = {}
for result in call_results:
if self.ignore_disabled and result["disabled"]:
continue
if result["name"] in interface_ethernet_data:
result.update(interface_ethernet_data[result["name"]])
result.update(self._parsed_override_values)
if len(self.names) == 0:
self._interface_data[result["name"]] = result
elif self.regex:
for name in self.names:
if name.match(result["name"]):
self._interface_data[result["name"]] = result
elif result["name"] in self.names:
self._interface_data[result["name"]] = result
return self._interface_data
@property
def interface_names(self):
return tuple(self.fetch_data().keys())
def probe(self):
routeros_metrics = []
data = self.fetch_data()
if self.single_interface:
if len(self.interface_names) == 1:
cookie_filename = self.cookie_filename.format(
name=escape_filename(self.interface_names[0])
)
with nagiosplugin.Cookie(cookie_filename) as cookie:
routeros_metrics += self.get_routeros_metric_item(data[self.interface_names[0]], cookie=cookie)
self._add_contexts(name=self.interface_names[0], values=data[self.interface_names[0]])
else:
for name in self.interface_names:
cookie_filename = self.cookie_filename.format(
name=escape_filename(name)
)
with nagiosplugin.Cookie(cookie_filename) as cookie:
routeros_metrics += self.get_routeros_metric_item(data[name], name_prefix=f"{name} ", cookie=cookie)
self._add_contexts(name=name, values=data[name], metric_prefix="{name} ")
return routeros_metrics
class InterfaceDisabledContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceResource):
if metric.value is True:
return self.result_cls(
nagiosplugin.state.Warn,
hint="Interface '{self._interface_name}' is disabled",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceRunningContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceResource):
if metric.value is False:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint=f"Interface '{self._interface_name}' not running",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
@cli.command("interface")
@click.option(
"--name",
"names",
default=[],
multiple=True,
help="The name of the GRE interface to monitor. This can be specified multiple times",
)
@click.option(
"--regex",
"regex",
default=False,
is_flag=True,
help="Treat the specified names as regular expressions and try to find all matching interfaces. (Default: not set)",
)
@click.option(
"--single",
"single",
default=False,
is_flag=True,
help="If set the check expects the interface to exist",
)
@click.option(
"--ignore-disabled/--no-ignore-disabled",
default=True,
is_flag=True,
help="Ignore disabled interfaces",
)
@click.option(
"--cookie-filename",
"cookie_filename",
default="/tmp/check_routeros_interface_{name}.data",
help=(
"The filename to use to store the information to calculate the rate. '{name}' will be replaced with an "
"internal uniq id. It Will create one file per interface."
"(Default: /tmp/check_routeros_interface_{name}.data)"
),
)
@click.option(
"override_values",
"--value-override",
multiple=True,
help=(
"Override a value read from the RouterOS device. "
"Format of the value must be compatible with RouterOS values. "
"Example: Override/Set the speed value for bridges or tunnels: "
"--value-override speed:10Gbps"
)
)
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-warning cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-critical cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.pass_context
def interface(
ctx, names, regex, single, ignore_disabled, cookie_filename, warning_values, critical_values, override_values
):
"""Check the state and the stats of interfaces"""
check = nagiosplugin.Check()
resource = InterfaceResource(
cmd_options=ctx.obj,
check=check,
names=names,
regex=regex,
single_interface=single,
ignore_disabled=ignore_disabled,
cookie_filename=cookie_filename,
warning_values=warning_values,
critical_values=critical_values,
override_values=override_values,
)
check.add(resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
"All interfaces UP"
)
)
if single and len(resource.interface_names) != 1:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
f"Only one matching interface is allowed. Found {len(resource.interface_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,186 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Any, Dict, List, Optional, Union
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class InterfaceGREResource(RouterOSCheckResource):
name = "GRE"
def __init__(
self,
cmd_options: Dict[str, Any],
names: List[str],
regex: bool,
single_interface: bool,
ignore_disabled: bool,
):
super().__init__(cmd_options=cmd_options)
self._interface_data: Optional[Dict[str, Any]] = None
self.names: List[Union[Any]] = names
self.regex = regex
if self.regex:
regex_names = []
for name in names:
regex_names.append(re.compile(name))
self.names = regex_names
self.single_interface = single_interface
self.ignore_disabled = ignore_disabled
self._routeros_metric_values = [
{"name": "disabled", "type": bool},
{"name": "running", "type": bool},
{"name": "actual-mtu", "type": int, "min": 0},
]
def fetch_data(self) -> Dict[str, Dict]:
if self._interface_data:
return self._interface_data
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/interface/gre"
)
call_results = tuple(call)
self._interface_data = {}
for result in call_results:
if self.ignore_disabled and result["disabled"]:
continue
if len(self.names) == 0:
self._interface_data[result["name"]] = result
elif self.regex:
for name in self.names:
if name.match(result["name"]):
self._interface_data[result["name"]] = result
elif result["name"] in self.names:
self._interface_data[result["name"]] = result
return self._interface_data
@property
def interface_names(self):
return tuple(self.fetch_data().keys())
def probe(self):
routeros_metrics = []
data = self.fetch_data()
if self.single_interface:
if len(self.interface_names) == 1:
return self.get_routeros_metric_item(data[self.interface_names[0]])
else:
for name in self.interface_names:
routeros_metrics += self.get_routeros_metric_item(data[name], name_prefix=f"{name} ")
return routeros_metrics
class InterfaceGREDisabledContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceGREResource):
if metric.value is True:
return self.result_cls(
nagiosplugin.state.Warn,
hint="GRE interface '{self._interface_name}' is disabled",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceGRERunningContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceGREResource):
if metric.value is False:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint=f"GRE interface '{self._interface_name}' not running",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
@cli.command("interface.gre")
@click.option(
"--name",
"names",
default=[],
multiple=True,
help="The name of the GRE interface to monitor. This can be specified multiple times",
)
@click.option(
"--regex",
"regex",
default=False,
is_flag=True,
help="Treat the specified names as regular expressions and try to find all matching interfaces. (Default: not set)",
)
@click.option(
"--single",
"single",
default=False,
is_flag=True,
help="If set the check expects the interface to exist",
)
@click.option(
"--ignore-disabled/--no-ignore-disabled",
default=True,
is_flag=True,
help="Ignore disabled interfaces",
)
@click.pass_context
def interface_gre(ctx, names, regex, single, ignore_disabled):
"""Check the state of a GRE interface."""
resource = InterfaceGREResource(
cmd_options=ctx.obj,
names=names,
regex=regex,
single_interface=single,
ignore_disabled=ignore_disabled,
)
check = nagiosplugin.Check(
resource,
)
if single:
if len(resource.interface_names) == 1:
name = resource.interface_names[0]
check.add(
InterfaceGREDisabledContext("disabled", interface_name=name),
InterfaceGRERunningContext("running", interface_name=name),
nagiosplugin.ScalarContext("actual-mtu"),
)
else:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
f"Only one matching interface is allowed. Found {len(resource.interface_names)}"
)
)
else:
for name in resource.interface_names:
check.add(
InterfaceGREDisabledContext(f"{name} disabled", interface_name=name),
InterfaceGRERunningContext(f"{name} running", interface_name=name),
nagiosplugin.ScalarContext(f"{name} actual-mtu"),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,124 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class InterfaceVrrpCheck(RouterOSCheckResource):
name = "VRRP"
def __init__(self, cmd_options, name, master_must):
super().__init__(cmd_options=cmd_options)
self._name = name
self.backup = None
self.disabled = None
self.enabled = None
self.invalid = None
self.master = None
self.master_must = master_must
self.running = None
def probe(self):
key_name = librouteros.query.Key("name")
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/interface/vrrp"
).select(
key_name,
librouteros.query.Key("backup"),
librouteros.query.Key("disabled"),
librouteros.query.Key("invalid"),
librouteros.query.Key("master"),
librouteros.query.Key("running"),
).where(
key_name == self._name
)
results = tuple(call)
result = results[0]
self.disabled = result["disabled"]
self.enabled = not self.disabled
yield nagiosplugin.Metric(
name="disabled",
value=self.disabled,
)
if self.enabled:
for n in ("backup", "invalid", "master", "running"):
if n not in result:
continue
setattr(self, n, result[n])
yield nagiosplugin.Metric(
name=n,
value=result[n],
)
class InterfaceVrrpDisabled(BooleanContext):
def evaluate(self, metric, resource: InterfaceVrrpCheck):
if metric.value is True:
return self.result_cls(nagiosplugin.state.Warn, "VRRP is disabled", metric)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceVrrpInvalid(BooleanContext):
def evaluate(self, metric, resource: InterfaceVrrpCheck):
if metric.value is True:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint="VRRP config is invalid"
)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceVrrpMaster(BooleanContext):
def evaluate(self, metric, resource: InterfaceVrrpCheck):
if not metric.value and resource.master_must:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint="VRRP interface is not master"
)
return self.result_cls(nagiosplugin.state.Ok)
@cli.command("interface.vrrp")
@click.option(
"--name",
required=True,
help="The name of the VRRP interface to check",
)
@click.option(
"--master",
default=False,
help="If set the interface must be master",
)
@click.pass_context
def interface_vrrp(ctx, name, master):
"""Check the state of VRRP interfaces"""
check = nagiosplugin.Check(
InterfaceVrrpCheck(
cmd_options=ctx.obj,
name=name,
master_must=master,
),
BooleanContext("backup"),
InterfaceVrrpDisabled("disabled"),
InterfaceVrrpInvalid("invalid"),
InterfaceVrrpMaster("master"),
BooleanContext("running")
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,195 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Any, Dict, List, Optional, Union
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class RoutingBGPPeerResource(RouterOSCheckResource):
name = "BGP Peer"
def __init__(
self,
cmd_options: Dict[str, Any],
names: List[str],
regex: bool,
single_peer: bool,
):
super().__init__(cmd_options=cmd_options)
self._peer_data: Optional[Dict[str, Any]] = None
self.names: List[Union[Any]] = names
self.regex = regex
if self.regex:
regex_names = []
for name in names:
regex_names.append(re.compile(name))
self.names = regex_names
self.single_peer = single_peer
self.state: Optional[str] = None
self._routeros_metric_values = [
{"name": "disabled", "type": bool},
{"name": "prefix-count", "dst": "prefix_count", "type": int},
{"name": "state", "type": str},
{"name": "updates-received", "dst": "updates_received", "type": int},
{"name": "updates-sent", "dst": "updates_sent", "type": int},
{"name": "uptime", "type": self.parse_routeros_time_duration, "min": 0, "uom": "s"},
]
def fetch_data(self) -> Dict[str, Dict]:
if self._peer_data:
return self._peer_data
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/routing/bgp/peer"
)
call_results = tuple(call)
self._peer_data = {}
for result in call_results:
if self.regex:
for name in self.names:
if name.match(result["name"]):
self._peer_data[result["name"]] = result
elif result["name"] in self.names:
self._peer_data[result["name"]] = result
return self._peer_data
@property
def peer_names(self):
return tuple(self.fetch_data().keys())
def probe(self):
routeros_metrics = []
data = self.fetch_data()
if self.single_peer:
if len(self.peer_names) == 1:
return self.get_routeros_metric_item(data[self.peer_names[0]])
else:
for name in self.peer_names:
routeros_metrics += self.get_routeros_metric_item(data[name], name_prefix=f"{name} ")
return routeros_metrics
class RoutingBGPPeerState(BooleanContext):
def __init__(self, *args, **kwargs):
super(RoutingBGPPeerState, self).__init__(*args, **kwargs)
self.fmt_metric = "{name} is {valueunit}"
def evaluate(self, metric, resource: RoutingBGPPeerResource):
if metric.value is None:
return nagiosplugin.Result(
state=nagiosplugin.state.Critical,
# hint=f"Neighbor for instance '{resource.instance}' and router-id '{resource.router_id}' not found"
)
value = metric.value
if value in ("established",):
return self.result_cls(
state=nagiosplugin.state.Ok,
hint="Connection with peer established",
)
elif value in ("idle", "connect", "active", "opensent", "openconfirm"):
return self.result_cls(
state=nagiosplugin.state.Critical,
hint=f"Connection to peer not established (State: {value})"
)
else:
return self.result_cls(
state=nagiosplugin.state.Unknown,
hint=f"Unable to find peer state (State: {value})"
)
class RoutingBGPPeerSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
for result in results:
if isinstance(result.resource, RoutingBGPPeerResource):
data = result.resource.fetch_data()
texts = []
for name in result.resource.peer_names:
texts.append(f"Connection to {name} is {data[name]['state']}")
return ", ".join(texts)
return ""
@cli.command("routing.bgp.peers")
@click.option(
"--name",
"names",
default=[],
multiple=True,
help="The name of the BGP peer to check. This can be specified multiple times",
)
@click.option(
"--regex",
"regex",
default=False,
is_flag=True,
help="Treat the specified names as regular expressions and try to find all matching peers. (Default: not set)",
)
@click.option(
"--single",
"single",
default=False,
is_flag=True,
help="If set the check expects the peer to exist",
)
@click.pass_context
def routing_bgp_peer(ctx, names, regex, single):
resource = RoutingBGPPeerResource(
cmd_options=ctx.obj,
names=names,
regex=regex,
single_peer=single,
)
check = nagiosplugin.Check(
resource,
RoutingBGPPeerSummary(),
)
if single:
if len(resource.peer_names) == 1:
check.add(
BooleanContext("disabled"),
RoutingBGPPeerState("state"),
nagiosplugin.ScalarContext("prefix_count"),
nagiosplugin.ScalarContext("uptime"),
nagiosplugin.ScalarContext("updates_received"),
nagiosplugin.ScalarContext("updates_sent"),
)
else:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
f"Only one matching peer is allowed. Found {len(resource.peer_names)}"
)
)
else:
for name in resource.peer_names:
check.add(
BooleanContext(f"{name} disabled"),
RoutingBGPPeerState(f"{name} state"),
nagiosplugin.ScalarContext(f"{name} prefix_count"),
nagiosplugin.ScalarContext(f"{name} uptime"),
nagiosplugin.ScalarContext(f"{name} updates_received"),
nagiosplugin.ScalarContext(f"{name} updates_sent"),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,160 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from typing import Any, Dict, Optional
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class RoutingOSPFNeighborResource(RouterOSCheckResource):
name = "OSPF NEIGHBOR"
def __init__(
self,
cmd_options: Dict[str, Any],
instance: str,
router_id: str,
area: Optional[str] = None
):
super().__init__(cmd_options=cmd_options)
self.area = area
self.instance = instance
self.router_id = router_id
self.state: Optional[str] = None
self._routeros_metric_values = [
{"name": "adjacency", "type": self.parse_routeros_time_duration, "min": 0, "uom": "s"},
{"name": "state", "type": None},
{"name": "state-changes", "dst": "state_changes", "type": int},
]
if self.routeros_version < RouterOSVersion("7"):
self._routeros_metric_values += [
{"name": "priority", "type": int},
{"name": "ls-retransmits", "dst": "ls_retransmits", "type": int},
{"name": "ls-requests", "dst": "ls_requests", "type": int},
{"name": "db-summaries", "dst": "db_summaries", "type": int},
]
def probe(self):
# ToDo: Only available in v7.x
key_area = librouteros.query.Key("area")
key_instance = librouteros.query.Key("instance")
key_router_id = librouteros.query.Key("router-id")
logger.info("Fetching data ...")
select_keys = [
key_instance,
key_router_id,
] + self.get_routeros_select_keys()
if self.routeros_version >= RouterOSVersion("7"):
select_keys.append(key_area)
where = [
key_instance == self.instance,
key_router_id == self.router_id,
]
if self.area is not None:
if self.routeros_version >= RouterOSVersion("7"):
where.append(key_area == self.area)
else:
logger.warning("The area selector is only available on RouterOS 7.x")
call = self.api.path(
"/routing/ospf/neighbor"
).select(
*select_keys
).where(
*where
)
results = tuple(call)
if len(results) == 0:
return nagiosplugin.Metric(
name="state",
value=None
)
result = results[0]
return self.get_routeros_metric_item(result)
class RoutingOSPFNeighborState(BooleanContext):
def evaluate(self, metric, resource: RoutingOSPFNeighborResource):
if metric.value is None:
if resource.area is None:
hint = f"Neighbor for instance '{resource.instance}' and router-id '{resource.router_id}' not found"
else:
hint = (
f"Neighbor for area '{resource.area}', instance '{resource.instance}' and "
f"router-id '{resource.router_id}' not found"
)
return nagiosplugin.Result(
state=nagiosplugin.state.Critical,
hint=hint
)
elif metric.value in ("Down",):
return self.result_cls(
state=nagiosplugin.state.Critical,
hint="Link to neighbor down"
)
elif metric.value in ("Full",):
return self.result_cls(
state=nagiosplugin.state.Ok,
hint="Communicating with neighbor"
)
else:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint=f"Link to neighbor not fully up, state: {metric.value}"
)
@cli.command("routing.ospf.neighbors")
@click.option(
"--area",
help="The area the neighbor router belongs to (only supported on RouterOS v7.x",
)
@click.option(
"--instance",
required=True,
help="The name of the OSPF instance",
)
@click.option(
"--router-id",
required=True,
help="The ID of the neighbor router",
)
@click.pass_context
def routing_ospf_neighbors(ctx, area, instance, router_id):
"""Check the state of an OSPF neighbor"""
resource = RoutingOSPFNeighborResource(
cmd_options=ctx.obj,
area=area,
instance=instance,
router_id=router_id,
)
check = nagiosplugin.Check(
resource,
nagiosplugin.ScalarContext("priority"),
nagiosplugin.ScalarContext("adjacency"),
nagiosplugin.ScalarContext("state_changes"),
nagiosplugin.ScalarContext("ls_retransmits"),
nagiosplugin.ScalarContext("ls_requests"),
nagiosplugin.ScalarContext("db_summaries"),
RoutingOSPFNeighborState("state")
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,94 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2023)
# SPDX-License-Identifier: GPL-3.0-or-later
from datetime import datetime
from pprint import pformat
from typing import List
import click
import nagiosplugin
from ..cli import cli
from ..context import SimplePositiveFloatContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemClockResource(RouterOSCheckResource):
name = "CLOCK"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
):
super().__init__(cmd_options=cmd_options)
self._check = check
def probe(self):
api = self._connect_api()
logger.info("Fetching clock data ...")
call = api.path(
"/system/clock"
)
results = tuple(call)
result = results[0]
logger.debug(f"Extracted values {pformat(result)}")
device_datetime = self.parse_routeros_date_time(result["date"], result["time"])
device_timediff = datetime.now() - device_datetime
yield nagiosplugin.Metric(
name="time-diff",
value=device_timediff.total_seconds(),
uom="s",
)
class SystemClockSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
for result in results:
if result.metric and result.metric.name == "time-diff":
return f"Time diff is {result.metric.value:.2f}s"
return ""
@cli.command("system.clock")
@click.option(
"--warning",
help="Warning threshold for time diff in seconds",
type=float,
)
@click.option(
"--critical",
help="Critical threshold for time diff in seconds",
type=float,
)
@click.pass_context
@nagiosplugin.guarded
def system_clock(ctx, warning, critical):
"""This command reads the information from /system/clock to extract the required information."""
check = nagiosplugin.Check()
resource = SystemClockResource(
cmd_options=ctx.obj,
check=check,
)
check.add(
resource,
SimplePositiveFloatContext(
name="time-diff",
warning=warning,
critical=critical,
fmt_metric="Time diff is {valueunit}",
),
SystemClockSummary(),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,184 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from pprint import pformat
import re
from typing import Dict, List
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemCpuResource(RouterOSCheckResource):
name = "CPU"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
warning_values: List[str],
critical_values: List[str],
use_regex: bool
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.values: Dict[str, float] = {}
self.use_regex: bool = use_regex
self.warning_values: Dict[str, str] = {}
self.critical_values: Dict[str, str] = {}
self.warning_regex_values: Dict[re.Pattern, str] = {}
self.critical_regex_values: Dict[re.Pattern, str] = {}
if self.use_regex:
self.warning_regex_values = self.prepare_regex_thresholds(warning_values)
self.critical_regex_values = self.prepare_regex_thresholds(critical_values)
else:
self.warning_values = self.prepare_thresholds(warning_values)
self.critical_values = self.prepare_thresholds(critical_values)
def probe(self):
key_cpu_load = librouteros.query.Key("cpu-load")
api = self._connect_api()
logger.info("Fetching global data ...")
call = api.path(
"/system/resource"
).select(
key_cpu_load
)
results = tuple(call)
result = results[0]
logger.debug(f"Extracted values {pformat(result)}")
yield nagiosplugin.Metric(
name="cpu-load",
value=result["cpu-load"],
uom="%",
min=0,
max=100,
)
logger.info("Fetching cpu data ...")
call = api.path(
"/system/resource/cpu"
)
results = tuple(call)
logger.debug(f"Extracted values {pformat(results)}")
for cpu in results:
name = cpu["cpu"]
for value_name_suffix in ("load", "irq", "disk"):
value_name = f"{name}-{value_name_suffix}"
if self.use_regex:
for regex, threshold in self.warning_regex_values.items():
if regex.match(value_name):
self.warning_values[value_name] = threshold
break
for regex, threshold in self.critical_regex_values.items():
if regex.match(value_name):
self.critical_values[value_name] = threshold
break
self.values[value_name] = float(cpu[value_name_suffix])
for name, value in self.values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
uom="%",
min=0,
max=100,
)
class SystemCpuSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
for result in results:
if result.metric and result.metric.name == "cpu-load":
return f"System load is {result.metric.value}%"
return ""
@cli.command("system.cpu")
@click.option(
"--load-warning",
help="Warning threshold for global cpu load",
)
@click.option(
"--load-critical",
help="Critical threshold for global cpu load",
)
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-warning cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-critical cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.option(
"--regex",
"use_regex",
default=False,
is_flag=True,
help=(
"Treat values from --value-warning and --value-critical as regex to find all matching values."
"Example: Warn if cpu load of at least one CPU is above 80%: --value-warning 'cpu\\d+-load:80'"
)
)
@click.pass_context
@nagiosplugin.guarded
def system_cpu(ctx, load_warning, load_critical, warning_values, critical_values, use_regex):
"""This command reads the information from /system/resource and /system/resource/cpu to extract the required
information.
"""
check = nagiosplugin.Check()
resource = SystemCpuResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
use_regex=use_regex,
)
check.add(
resource,
nagiosplugin.ScalarContext(
name="cpu-load",
warning=load_warning,
critical=load_critical,
),
SystemCpuSummary(),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,184 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Dict, List, Set
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemFanResource(RouterOSCheckResource):
name = "FAN"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
warning_values: List[str],
critical_values: List[str],
use_regex: bool
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.fan_names: Set[str] = set()
self.fan_values: Dict[str, int] = {}
self.use_regex: bool = use_regex
self.warning_values: Dict[str, str] = {}
self.critical_values: Dict[str, str] = {}
self.warning_regex_values: Dict[re.Pattern, str] = {}
self.critical_regex_values: Dict[re.Pattern, str] = {}
if self.use_regex:
self.warning_regex_values = self.prepare_regex_thresholds(warning_values)
self.critical_regex_values = self.prepare_regex_thresholds(critical_values)
else:
self.warning_values = self.prepare_thresholds(warning_values)
self.critical_values = self.prepare_thresholds(critical_values)
self._fetch_data()
def _fetch_data(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
api_results = tuple(call)
if self.routeros_version < RouterOSVersion("7"):
api_result_items = []
for name, value in api_results[0].items():
api_result_items.append({
"name": name,
"value": value,
})
else:
api_result_items = api_results
regex_name = re.compile(r"(?P<name>fan\d+)-(?P<type>(speed))")
for item in api_result_items:
m = regex_name.match(item["name"])
if not m:
continue
if self.use_regex:
for regex, threshold in self.warning_regex_values.items():
if regex.match(item["name"]):
self.warning_values[item["name"]] = threshold
break
for regex, threshold in self.critical_regex_values.items():
if regex.match(item["name"]):
self.critical_values[item["name"]] = threshold
break
if m.group("type") in ("speed",):
self.fan_values[item["name"]] = int(item["value"])
self.fan_names.add(m.group("name"))
def probe(self):
for name, value in self.fan_values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
)
@cli.command("system.fan")
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If fan1-speed should be in the range of 4000 to 5000 you can set "
"--value-warning fan1-speed:4000:5000 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If fan1-speed should be in the range of 4000 to 5000 you can set "
"--value-critical fan1-speed:4000:5000 "
"Can be specified multiple times"
)
)
@click.option(
"--regex",
"use_regex",
default=False,
is_flag=True,
help="Treat values from --value-warning and --value-critical as regex to find all matching values"
)
@click.option(
"--no-fan-ok",
is_flag=True,
default=False,
help="The check will be unknown if no fan is available. Provide this option to ignore this."
)
@click.option(
"expected_names",
"--expect-fan",
multiple=True,
default=[],
help="Name of the fan to expect. Can be specified multiple times."
)
@click.pass_context
@nagiosplugin.guarded
def system_fan(ctx, warning_values, critical_values, use_regex, no_fan_ok, expected_names):
check = nagiosplugin.Check()
fan_resource = SystemFanResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
use_regex=use_regex,
)
check.add(fan_resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
hint=f"Looks like all fans work as expected: {', '.join(sorted(fan_resource.fan_names))}"
)
)
if len(fan_resource.fan_names) == 0 and not no_fan_ok:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="No FANs found"
)
)
if len(expected_names) > 0:
missing_names = []
for name in expected_names:
if name not in fan_resource.fan_names:
missing_names.append(name)
if len(missing_names) > 0:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"Expected FAN(s) not found: {', '.join(missing_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,149 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from datetime import datetime
from typing import List, Optional
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemLicenseResource(RouterOSCheckResource):
name = "License"
def __init__(self, cmd_options):
super().__init__(cmd_options=cmd_options)
def days_left(value):
time_delta = self.parse_routeros_datetime(value) - datetime.now()
return int(time_delta.total_seconds()) / 60 / 60 / 24
logger.info("Fetching information ...")
call = self.api.path(
"/system/resource"
)
result = tuple(call)[0]
self.has_renewal = result["board-name"].lower() == "chr"
self.deadline_datetime: Optional[datetime] = None
self.next_renewal_datetime: Optional[datetime] = None
self._routeros_metric_values = []
if self.has_renewal:
self._routeros_metric_values += [
{"name": "level", "type": None},
{"name": "deadline-at", "dst": "deadline-in", "type": days_left, "missing_ok": True},
{"name": "next-renewal-at", "dst": "next-renewal-in", "type": days_left, "missing_ok": True},
]
else:
self._routeros_metric_values += [
{"name": "nlevel", "dst": "level", "type": None},
]
def probe(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/license"
)
result = tuple(call)[0]
if self.has_renewal:
if "deadline-at" in result:
self.deadline_datetime = self.parse_routeros_datetime(result["deadline-at"])
if "next-renewal-at" in result:
self.next_renewal_datetime = self.parse_routeros_datetime(result["next-renewal-at"])
return self.get_routeros_metric_item(result)
class SystemLicenseRenewSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
hints = []
resource: Optional[SystemLicenseResource] = None
for result in results:
if result.resource:
resource = result.resource
if result.hint:
hints.append(result.hint)
if resource and resource.has_renewal:
if resource.next_renewal_datetime:
time_delta = resource.next_renewal_datetime - datetime.now()
hints.append(f"Next renewal in {time_delta.days} day(s) ({resource.next_renewal_datetime})")
if resource.deadline_datetime:
time_delta = resource.deadline_datetime - datetime.now()
hints.append(f"Deadline in {time_delta.days} day(s) ({resource.deadline_datetime})")
return ", ".join(hints)
class SystemLicenseLevelContext(nagiosplugin.Context):
def __init__(self, *args, levels=None, **kwargs):
self._levels = levels
super(SystemLicenseLevelContext, self).__init__(*args, **kwargs)
def evaluate(self, metric, resource):
if self._levels is None or len(self._levels) == 0 or metric.value in self._levels:
return nagiosplugin.Result(
nagiosplugin.Ok,
hint=f"License level is '{metric.value}'"
)
return nagiosplugin.Result(
nagiosplugin.Warn,
hint=f"License level '{metric.value}' not in list with allowed levels: {', '.join(self._levels)}"
)
@cli.command("system.license")
@click.option("--deadline-warning", default="28:", help="Number of days until deadline is reached (Default: '28:')")
@click.option("--deadline-critical", default="14:", help="Number of days until deadline is reached (Default: '14:')")
@click.option(
"--next-renewal-warning",
default=None,
help="Number of days until renewal is done (Default: None, Example: '-14:')"
)
@click.option("--next-renewal-critical", default=None, help="Number of days until renewal is done (Default: None)")
@click.option(
"--level",
"levels",
default=None,
multiple=True,
help="Allowed license levels. Repeat to use multiple values."
)
@click.pass_context
@nagiosplugin.guarded
def system_license(ctx, deadline_warning, deadline_critical, next_renewal_warning, next_renewal_critical, levels):
resource = SystemLicenseResource(
cmd_options=ctx.obj,
)
check = nagiosplugin.Check(resource)
if resource.has_renewal:
check.add(
nagiosplugin.ScalarContext(
name="deadline-in",
warning=deadline_warning,
critical=deadline_critical,
),
nagiosplugin.ScalarContext(
name="next-renewal-in",
warning=next_renewal_warning,
critical=next_renewal_critical,
),
SystemLicenseRenewSummary(),
)
check.add(
SystemLicenseLevelContext(
name="level",
levels=levels,
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,121 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from typing import List
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..context import ScalarPercentContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemMemoryResource(RouterOSCheckResource):
name = "MEMORY"
def __init__(self, cmd_options):
super().__init__(cmd_options=cmd_options)
self.memory_total = None
def probe(self):
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/system/resource"
).select(
librouteros.query.Key("free-memory"),
librouteros.query.Key("total-memory")
)
results = tuple(call)
result = results[0]
memory_free = result["free-memory"]
self.memory_total = result["total-memory"]
yield nagiosplugin.Metric(
name="free",
value=memory_free,
uom="B",
min=0,
max=self.memory_total,
)
yield nagiosplugin.Metric(
name="used",
value=self.memory_total - memory_free,
uom="B",
min=0,
max=self.memory_total,
)
class SystemMemorySummary(nagiosplugin.summary.Summary):
def __init__(self, result_names: List[str]):
super().__init__()
self._result_names = result_names
def ok(self, results):
msgs = []
for result_name in self._result_names:
msgs.append(str(results[result_name]))
return " ".join(msgs)
@cli.command("system.memory")
@click.option(
"--used/--free",
is_flag=True,
default=True,
help="Set if used or free memory should be checked. (Default: used)",
)
@click.option(
"--warning",
required=True,
help="Warning threshold in % or MB. Example (20% oder 20 = 20MB)",
)
@click.option(
"--critical",
required=True,
help="Critical threshold in % or MB. Example (20% oder 20 = 20MB)",
)
@click.pass_context
@nagiosplugin.guarded
def system_memory(ctx, used, warning, critical):
check = nagiosplugin.Check(
SystemMemoryResource(
cmd_options=ctx.obj,
)
)
if used:
check.add(nagiosplugin.ScalarContext(
name="free",
))
check.add(ScalarPercentContext(
name="used",
total_name="memory_total",
warning=warning,
critical=critical
))
else:
check.add(ScalarPercentContext(
name="free",
total_name="memory_total",
warning=f"{warning}:",
critical=f"{critical}:"
))
check.add(nagiosplugin.ScalarContext(
name="used",
))
check.add(SystemMemorySummary(
result_names=["used"] if used else ["free"]
))
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,261 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2023)
# SPDX-License-Identifier: GPL-3.0-or-later
from pprint import pformat
from typing import List, Optional
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext, PerfdataScalarContext
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemNtpClientResource(RouterOSCheckResource):
name = "NTP"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
expected_servers: Optional[List[str]] = None,
last_update_before_warning: Optional[float] = None,
last_update_before_critical: Optional[float] = None,
offset_warning: Optional[float] = None,
offset_critical: Optional[float] = None,
stratum_warning: Optional[int] = None,
stratum_critical: Optional[int] = None,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._expected_servers = expected_servers
self._offset_warning = offset_warning
self._offset_critical = offset_critical
self._last_update_before_warning = last_update_before_warning
self._last_update_before_critical = last_update_before_critical
self._stratum_warning = stratum_warning
self._stratum_critical = stratum_critical
def probe(self):
logger.info("Fetching ntp client data ...")
call = self.api.path(
"/system/ntp/client"
)
results = tuple(call)
result = results[0]
logger.debug(f"Extracted values {pformat(result)}")
self._routeros_metric_values += [
{"name": "enabled", "type": bool},
]
if not result["enabled"]:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Critical,
"NTP Client not enabled"
)
)
return self.get_routeros_metric_item(result)
#: Current address of the server the devices gets its time from
current_server_address: Optional[str] = None
if self.routeros_version < RouterOSVersion("7"):
metric_values = [
{"name": "last-adjustment", "dst": "offset", "type": self.parse_routeros_time_duration, "uom": "s"},
{"name": "last-update-before", "type": self.parse_routeros_time_duration, "uom": "s"},
]
metric_value_names_not_found = []
for metric_value in metric_values:
if metric_value["name"] not in result:
metric_value_names_not_found.append(metric_value["name"])
if len(metric_value_names_not_found) > 0:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Critical,
(
f"Looks like NTP client not running. "
f"Unable to find values for {', '.join(metric_value_names_not_found)}"
)
)
)
else:
self._routeros_metric_values += metric_values
self._check.add(
nagiosplugin.ScalarContext(
name="last-update-before",
warning=self._last_update_before_warning,
critical=self._last_update_before_critical,
),
nagiosplugin.ScalarContext(
name="offset",
warning=f"-{self._offset_warning}:{self._offset_warning}" if self._offset_warning else None,
critical=f"-{self._offset_critical}:{self._offset_critical}" if self._offset_critical else None,
),
)
if self._expected_servers:
current_server_address = result.get("last-update-from")
if current_server_address is None:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
"Unable to get address of server (last-update-from)"
)
)
else:
self._routeros_metric_values += [
{"name": "freq-drift", "type": float},
{"name": "synced-stratum", "dst": "stratum", "type": int},
{"name": "system-offset", "dst": "offset", "type": lambda v: float(v) / 1000, "uom": "s"},
]
self._check.add(
PerfdataScalarContext(
name="freq-drift",
),
nagiosplugin.ScalarContext(
name="offset",
warning=f"-{self._offset_warning}:{self._offset_warning}" if self._offset_warning else None,
critical=f"-{self._offset_critical}:{self._offset_critical}" if self._offset_critical else None,
),
nagiosplugin.ScalarContext(
name="stratum",
warning=self._stratum_warning,
critical=self._stratum_critical,
),
)
if self._expected_servers:
current_server_address = result.get("synced-server")
if current_server_address is None:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
"Unable to get address of server (synced-server)"
)
)
if current_server_address and current_server_address not in self._expected_servers:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
(
f"Server '{current_server_address}' not in list of expected servers: "
f"{', '.join(self._expected_servers)}"
)
)
)
return self.get_routeros_metric_item(result)
class SystemNtpClientSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
messages: List[str] = []
for result in results:
if result.metric and result.metric.name == "stratum":
messages.append(f"Stratum is {result.metric.value}")
if result.metric and result.metric.name == "offset":
messages.append(f"Offset is {result.metric.value:.2f}s")
return ", ".join(messages)
@cli.command("system.ntp.client")
@click.option(
"--last-update-before-warning",
help=(
"The time from the NTP server should at least be synchronised in the last N seconds. "
"Default: 30 minutes = 1800 seconds "
"Note: This is only available on RouterOS 6.x"
),
type=float,
default=60 * 30,
)
@click.option(
"--last-update-before-critical",
help=(
"The time from the NTP server should at least be synchronised in the last N seconds. "
"Default: 60 minutes = 3600 seconds "
"Note: This is only available on RouterOS 6.x"
),
type=float,
default=60 * 60,
)
@click.option(
"--offset-warning",
help="Warning threshold for offset from the NTP server in seconds",
type=float,
default=10.0,
)
@click.option(
"--offset-critical",
help="Critical threshold for offset from the NTP server in seconds",
type=float,
default=30.0,
)
@click.option(
"--stratum-warning",
help=(
"Check the stratum and report warning state if it does not match. "
"Note: The stratum is only available on RouterOS 7.x"
),
type=int,
)
@click.option(
"--stratum-critical",
help=(
"Check the stratum and report critical state if it does not match. "
"Note: The stratum is only available on RouterOS 7.x"
),
type=int,
)
@click.option(
"expected_servers",
"--expected-server",
multiple=True,
help=(
"Address of the ntp server we expect to get our time from. "
"This must be the IPv4/IPv6 address and not the FQDN. "
"It can be provided multiple times. "
"Example: --expected-server 10.0.0.1 --expected-server 192.168.1.1"
),
)
@click.pass_context
@nagiosplugin.guarded
def system_clock(ctx, last_update_before_warning, last_update_before_critical, offset_warning, offset_critical,
stratum_warning, stratum_critical, expected_servers):
"""
This command reads the information from /system/ntp/client to extract the required information.
It checks if is the NTP client enabled, if the NTP server is reachable and if is the offset in the threshold.
"""
check = nagiosplugin.Check()
resource = SystemNtpClientResource(
cmd_options=ctx.obj,
check=check,
last_update_before_warning=last_update_before_warning,
last_update_before_critical=last_update_before_critical,
offset_warning=offset_warning,
offset_critical=offset_critical,
stratum_warning=stratum_warning,
stratum_critical=stratum_critical,
expected_servers=expected_servers,
)
check.add(
resource,
SystemNtpClientSummary(),
BooleanContext(
name="enabled",
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,81 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemPowerResource(RouterOSCheckResource):
name = "Power"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._routeros_metric_values = [
{"name": "power-consumption", "type": float},
]
def probe(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
if self.routeros_version < RouterOSVersion("7"):
call = call.select(
*self.get_routeros_select_keys()
)
api_result_items = tuple(call)
api_result_items = self._convert_v6_list_to_v7(api_result_items)
else:
api_result_items = tuple(call)
result_metrics = self.get_routeros_metrics(api_result_items)
if len(result_metrics) == 0:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="Power consumption not found."
)
)
return result_metrics
@cli.command("system.power")
@click.option(
"--warning",
help="Warning threshold for total power consumption",
)
@click.option(
"--critical",
help="Critical threshold for total power consumption",
)
@click.pass_context
@nagiosplugin.guarded
def system_power(ctx, warning, critical):
"""Check the total power consumption of a device. This might not be available on all devices"""
check = nagiosplugin.Check()
check.add(
SystemPowerResource(
cmd_options=ctx.obj,
check=check,
),
nagiosplugin.ScalarContext(
"power-consumption",
warning=warning,
critical=critical,
fmt_metric="Power consumption {value}W",
),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,182 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Dict, List, Set
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemPsuResource(RouterOSCheckResource):
name = "PSU"
def __init__(
self, cmd_options, check: nagiosplugin.Check, warning_values: List[str], critical_values: List[str],
no_psu_ok: bool,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.psu_names: Set[str] = set()
self.psu_states: Dict[str, str] = {}
self.psu_values: Dict[str, float] = {}
self.warning_values = self._prepare_thresholds(warning_values)
self.critical_values = self._prepare_thresholds(critical_values)
self.no_psu_ok = no_psu_ok
self._fetch_data()
def _fetch_data(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
api_results = tuple(call)
if self.routeros_version < RouterOSVersion("7"):
api_result_items = []
for name, value in api_results[0].items():
api_result_items.append({
"name": name,
"value": value,
})
else:
api_result_items = api_results
regex_name = re.compile(r"(?P<name>psu\d+)-(?P<type>(state|current|voltage))")
for api_result_item in api_result_items:
m = regex_name.match(api_result_item["name"])
if not m:
continue
self.psu_names.add(m.group("name"))
if m.group("type") in ("current", "voltage"):
self.psu_values[api_result_item["name"]] = float(api_result_item["value"])
if m.group("type") == "state":
self.psu_states[m.group("name")] = api_result_item["value"]
if not self.no_psu_ok and len(self.psu_values) == 0 and len(self.psu_states) == 0:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="No PSU values and stats found"
)
)
@staticmethod
def _prepare_thresholds(thresholds: List[str]):
results = {}
for threshold in thresholds:
name, _, value = threshold.partition(":")
if value is None or value == "":
logger.warning(f"Unable to parse threshold for {name}")
results[name] = value
return results
def probe(self):
for name, value in self.psu_values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
)
for name, value in self.psu_states.items():
value_name = f"{name}-state-ok"
self._check.add(
BooleanContext(value_name)
)
if value != "ok":
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"PSU: {name} state {value}"
)
)
yield nagiosplugin.Metric(
name=value_name,
value=(value == "ok")
)
@cli.command("system.psu")
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If psu1-voltage should be in the range of 12-12.1V you can set --value-warning psu1-voltage:12:12.1 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If psu1-voltage should be in the range of 12-12.1V you can set --value-critical psu1-voltage:12:12.1 "
"Can be specified multiple times"
)
)
@click.option(
"--no-psu-ok",
is_flag=True,
default=False,
help="The check will be unknown if not at least one psu stat or value is available. Set this to ignore this."
)
@click.option(
"expected_psu_names",
"--expect-psu",
multiple=True,
default=[],
help="Name of the PSU to expect at least one value or state. Can be specified multiple times."
)
@click.pass_context
@nagiosplugin.guarded
def system_psu(ctx, warning_values, critical_values, no_psu_ok, expected_psu_names):
"""Check the power supply units (PSU)"""
check = nagiosplugin.Check()
psu_resource = SystemPsuResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
no_psu_ok=no_psu_ok,
)
check.add(psu_resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
hint=f"Looks like all PSU work like expected: {', '.join(psu_resource.psu_names)}"
)
)
if len(expected_psu_names) > 0:
missing_psu_names = []
for psu_name in expected_psu_names:
if psu_name not in psu_resource.psu_names:
missing_psu_names.append(psu_name)
if len(missing_psu_names) > 0:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"Expected PSU(s) not found: {', '.join(missing_psu_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,184 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Dict, List, Set
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemTemperatureResource(RouterOSCheckResource):
name = "Temperature"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
warning_values: List[str],
critical_values: List[str],
use_regex: bool
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.names: Set[str] = set()
self.values: Dict[str, float] = {}
self.use_regex: bool = use_regex
self.warning_values: Dict[str, str] = {}
self.critical_values: Dict[str, str] = {}
self.warning_regex_values: Dict[re.Pattern, str] = {}
self.critical_regex_values: Dict[re.Pattern, str] = {}
if self.use_regex:
self.warning_regex_values = self.prepare_regex_thresholds(warning_values)
self.critical_regex_values = self.prepare_regex_thresholds(critical_values)
else:
self.warning_values = self.prepare_thresholds(warning_values)
self.critical_values = self.prepare_thresholds(critical_values)
self._fetch_data()
def _fetch_data(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
api_result_items = tuple(call)
if self.routeros_version < RouterOSVersion("7"):
api_result_items = self._convert_v6_list_to_v7(api_result_items)
regex_name = re.compile(r".*temperature.*")
for item in api_result_items:
m = regex_name.match(item["name"])
if not m:
continue
if self.use_regex:
for regex, threshold in self.warning_regex_values.items():
if regex.match(item["name"]):
self.warning_values[item["name"]] = threshold
break
for regex, threshold in self.critical_regex_values.items():
if regex.match(item["name"]):
self.critical_values[item["name"]] = threshold
break
self.names.add(item["name"])
self.values[item["name"]] = float(item["value"])
def probe(self):
for name, value in self.values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
)
@cli.command("system.temperature")
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If cpu-temperature should be in the range of 40 and 60°C you can set "
"--value-warning cpu-temperature:40:60 "
"If cpu-temperature should not be higher than 50.5°C you can set "
"--value-warning cpu-temperature:50.5 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If cpu-temperature should be in the range of 40 and 60°C you can set "
"--value-critical cpu-temperature:40:60 "
"If cpu-temperature should not be higher than 50.5°C you can set "
"--value-critical cpu-temperature:50.5 "
"Can be specified multiple times"
)
)
@click.option(
"--regex",
"use_regex",
default=False,
is_flag=True,
help="Treat values from --value-warning and --value-critical as regex to find all matching values"
)
@click.option(
"--no-temperature-ok",
is_flag=True,
default=False,
help="The check will be unknown if no temperature is available. Provide this option to ignore this."
)
@click.option(
"expected_names",
"--expect-temperature",
multiple=True,
default=[],
help="Name of the temperature to expect. Can be specified multiple times. Example: board-temperature1"
)
@click.pass_context
@nagiosplugin.guarded
def system_temperature(ctx, warning_values, critical_values, use_regex, no_temperature_ok, expected_names):
"""This command reads the information from /system/health and extracts all values containing the
word temperature in its name. Like 'board-temperature', 'board-temperature1', 'cpu-temperature', ...
Be aware that not all devices return the same values.
"""
check = nagiosplugin.Check()
temperature_resource = SystemTemperatureResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
use_regex=use_regex,
)
check.add(temperature_resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
hint=f"Looks like all temperatures are OK: {', '.join(sorted(temperature_resource.names))}"
)
)
if len(temperature_resource.names) == 0 and not no_temperature_ok:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="No temperatures found"
)
)
if len(expected_names) > 0:
missing_names = []
for name in expected_names:
if name not in temperature_resource.names:
missing_names.append(name)
if len(missing_names) > 0:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"Expected temperature(s) not found: {', '.join(missing_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,175 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from typing import List, Optional
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemUpdateResource(RouterOSCheckResource):
name = "Update"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
check_for_update: bool = False,
latest_version: Optional[str] = None,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._check_for_update = check_for_update
self._installed_version = None
self._latest_version = None
if latest_version:
self._latest_version = RouterOSVersion(latest_version)
def probe(self):
logger.info("Fetching data ...")
if self._check_for_update:
logger.debug("Run command to check for updates ...")
call = self.api(
"/system/package/update/check-for-updates"
)
logger.debug("Waiting that update command finished")
# Wait until command has finished
tuple(call)
call = self.api.path(
"/system/package/update"
)
result = tuple(call)[0]
self._routeros_metric_values = [
{"name": "channel", "type": None},
]
installed_version = result.get("installed-version")
if installed_version:
self._installed_version = RouterOSVersion(installed_version)
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Ok,
hint=f"Installed version: {self._installed_version}"
)
)
else:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Warn,
hint="Unable to get installed version"
)
)
latest_version = result.get("latest-version")
if self._latest_version is None and latest_version:
self._latest_version = RouterOSVersion(latest_version)
if self._installed_version and self._latest_version:
if self._installed_version < self._latest_version:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Critical,
hint=(
f"Update version '{self._latest_version}' available. "
f"Version installed '{self._installed_version}'"
)
)
)
status = result.get("status")
if isinstance(status, str) and "error" in status.lower():
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Critical,
hint=f"Looks like there was an error: '{status}'"
)
)
return self.get_routeros_metric_item(result)
class SystemUpdateChannelContext(nagiosplugin.Context):
def __init__(self, *args, channels: Optional[List[str]] = None, **kwargs):
super(SystemUpdateChannelContext, self).__init__(*args, **kwargs)
self._channels = channels
def evaluate(self, metric, resource):
if self._channels is None or len(self._channels) == 0 or metric.value in self._channels:
return nagiosplugin.Result(
nagiosplugin.Ok,
hint=f"Update channel is '{metric.value}'"
)
return nagiosplugin.Result(
nagiosplugin.Warn,
hint=f"Update channel '{metric.value}' not in list with allowed channels: {', '.join(self._channels)}"
)
class SystemUpdateSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
messages = []
for result in results:
messages.append(result.hint)
if len(messages) > 0:
return ", ".join(messages)
return "Looks good"
@cli.command("system.update")
@click.option(
"--channel",
"channels",
default=None,
multiple=True,
help="Allowed update channel. Repeat to use multiple values."
)
@click.option(
"--latest-version",
"latest_version",
default=None,
help=(
"Set a version that should at least be installed. "
"Use this if the update server is not available or if you want check with your own update policy."
)
)
@click.option(
"--check-for-update",
"check_for_update",
is_flag=True,
default=False,
help=(
"Actively check for updates. "
"This will run the command /system/package/update/check-for-updates . "
"If you don't whant to use this feature you have to schedule a task to look for updates."
)
)
@click.pass_context
@nagiosplugin.guarded
def system_update(ctx, channels, latest_version, check_for_update):
check = nagiosplugin.Check()
check.add(
SystemUpdateResource(
cmd_options=ctx.obj,
check=check,
check_for_update=check_for_update,
latest_version=latest_version,
),
SystemUpdateChannelContext(
name="channel",
channels=channels,
),
SystemUpdateSummary(),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemUptimeResource(RouterOSCheckResource):
name = "UPTIME"
def __init__(self, cmd_options):
super().__init__(cmd_options=cmd_options)
def probe(self):
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/system/resource"
).select(
librouteros.query.Key("uptime"),
)
results = tuple(call)
result = results[0]
yield nagiosplugin.Metric(
name="uptime",
value=self.parse_routeros_time_duration(result["uptime"]),
uom="s",
min=0,
)
@cli.command("system.uptime")
@click.pass_context
@nagiosplugin.guarded
def system_uptime(ctx):
"""Get Uptime of a device"""
check = nagiosplugin.Check(
SystemUptimeResource(
cmd_options=ctx.obj,
),
nagiosplugin.ScalarContext(
name="uptime",
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,150 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Optional, Tuple
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class ToolPingCheck(RouterOSCheckResource):
name = "PING"
def __init__(self, cmd_options, address):
super().__init__(cmd_options=cmd_options)
self._address = address
self._max_packages = 1
def probe(self):
def strip_time(value) -> Tuple[Optional[int], Optional[str]]:
m = re.compile(r"^(?P<time>[0-9]+)(?P<uom>.*)$").match(value)
if m:
return int(m.group("time")), m.group("uom")
return None, None
params = {"address": self._address, "count": self._max_packages}
api = self._connect_api()
logger.info("Call /ping command ...")
call = api("/ping", **params)
results = tuple(call)
result = results[-1]
yield nagiosplugin.Metric(
name="packet_loss",
value=result["packet-loss"],
uom="%",
min=0,
max=100,
)
yield nagiosplugin.Metric(
name="sent",
value=result["sent"],
min=0,
max=self._max_packages,
)
yield nagiosplugin.Metric(
name="received",
value=result["received"],
min=0,
max=self._max_packages,
)
if result["received"] > 0:
yield nagiosplugin.Metric(
name="rtt_min",
value=strip_time(result["min-rtt"])[0],
min=0,
)
yield nagiosplugin.Metric(
name="rtt_max",
value=strip_time(result["max-rtt"])[0],
min=0,
)
yield nagiosplugin.Metric(
name="rtt_avg",
value=strip_time(result["avg-rtt"])[0],
min=0,
)
yield nagiosplugin.Metric(
name="size",
value=result["size"]
)
yield nagiosplugin.Metric(
name="ttl",
value=result["ttl"],
min=0,
max=255,
)
@cli.command("tool.ping")
@click.option(
"--address",
required=True,
help="Address of device to ping",
)
@click.option(
"--packet-loss-warning",
help="Warning threshold for packet loss",
)
@click.option(
"--packet-loss-critical",
help="Critical threshold for packet loss",
)
@click.option(
"--ttl-warning",
help="Warning threshold for the Time-To-Live (TTL) value",
)
@click.option(
"--ttl-critical",
help="Critical threshold for the Time-To-Live (TTL) value",
)
@click.pass_context
def tool_ping(ctx, address, packet_loss_warning, packet_loss_critical, ttl_warning, ttl_critical):
"""Execute a ping command on the device to check other devices"""
check = nagiosplugin.Check(
ToolPingCheck(
cmd_options=ctx.obj,
address=address
)
)
check.add(nagiosplugin.ScalarContext(
name="packet_loss",
warning=packet_loss_warning,
critical=packet_loss_critical
))
check.add(nagiosplugin.ScalarContext(
name="sent"
))
check.add(nagiosplugin.ScalarContext(
name="received"
))
check.add(nagiosplugin.ScalarContext(
name="rtt_avg"
))
check.add(nagiosplugin.ScalarContext(
name="rtt_min"
))
check.add(nagiosplugin.ScalarContext(
name="rtt_max"
))
check.add(nagiosplugin.ScalarContext(
name="size"
))
check.add(nagiosplugin.ScalarContext(
name="ttl",
warning=ttl_warning,
critical=ttl_critical
))
check.main(verbose=ctx.obj["verbose"])