diff --git a/interface-definitions/protocols_failover.xml.in b/interface-definitions/protocols_failover.xml.in index c0caec68e..f70975949 100644 --- a/interface-definitions/protocols_failover.xml.in +++ b/interface-definitions/protocols_failover.xml.in @@ -1,135 +1,141 @@ <?xml version="1.0"?> <interfaceDefinition> <node name="protocols"> <children> <node name="failover" owner="${vyos_conf_scripts_dir}/protocols_failover.py"> <properties> <help>Failover Routing</help> <priority>490</priority> </properties> <children> <tagNode name="route"> <properties> <help>Failover IPv4 route</help> <valueHelp> <format>ipv4net</format> <description>IPv4 failover route</description> </valueHelp> <constraint> <validator name="ipv4-prefix"/> </constraint> </properties> <children> <tagNode name="next-hop"> <properties> <help>Next-hop IPv4 router address</help> <valueHelp> <format>ipv4</format> <description>Next-hop router address</description> </valueHelp> <constraint> <validator name="ipv4-address"/> </constraint> </properties> <children> <node name="check"> <properties> <help>Check target options</help> </properties> <children> <leafNode name="policy"> <properties> <help>Policy for check targets</help> <completionHelp> <list>any-available all-available</list> </completionHelp> <valueHelp> <format>all-available</format> <description>All targets must be alive</description> </valueHelp> <valueHelp> <format>any-available</format> <description>Any target must be alive</description> </valueHelp> <constraint> <regex>(all-available|any-available)</regex> </constraint> </properties> <defaultValue>any-available</defaultValue> </leafNode> #include <include/port-number.xml.i> <leafNode name="target"> <properties> <help>Check target address</help> <valueHelp> <format>ipv4</format> <description>Address to check</description> </valueHelp> <constraint> <validator name="ipv4-address"/> </constraint> <multi/> </properties> </leafNode> <leafNode name="timeout"> <properties> <help>Timeout between checks</help> <valueHelp> <format>u32:1-300</format> <description>Timeout in seconds between checks</description> </valueHelp> <constraint> <validator name="numeric" argument="--range 1-255"/> </constraint> </properties> <defaultValue>10</defaultValue> </leafNode> <leafNode name="type"> <properties> <help>Check type</help> <completionHelp> <list>arp icmp tcp</list> </completionHelp> <valueHelp> <format>arp</format> <description>Check target by ARP</description> </valueHelp> <valueHelp> <format>icmp</format> <description>Check target by ICMP</description> </valueHelp> <valueHelp> <format>tcp</format> <description>Check target by TCP</description> </valueHelp> <constraint> <regex>(arp|icmp|tcp)</regex> </constraint> </properties> <defaultValue>icmp</defaultValue> </leafNode> </children> </node> #include <include/static/static-route-interface.xml.i> <leafNode name="metric"> <properties> <help>Route metric for this gateway</help> <valueHelp> <format>u32:1-255</format> <description>Route metric</description> </valueHelp> <constraint> <validator name="numeric" argument="--range 1-255"/> </constraint> </properties> <defaultValue>1</defaultValue> </leafNode> + <leafNode name="onlink"> + <properties> + <help>The next hop is directly connected to the interface, even if it does not match interface prefix</help> + <valueless/> + </properties> + </leafNode> </children> </tagNode> </children> </tagNode> </children> </node> </children> </node> </interfaceDefinition> diff --git a/src/helpers/vyos-failover.py b/src/helpers/vyos-failover.py index f34c18916..348974364 100755 --- a/src/helpers/vyos-failover.py +++ b/src/helpers/vyos-failover.py @@ -1,234 +1,235 @@ #!/usr/bin/env python3 # # Copyright (C) 2022-2024 VyOS maintainers and contributors # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 or later as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import argparse import json import socket import time from vyos.utils.process import rc_cmd from pathlib import Path from systemd import journal my_name = Path(__file__).stem def is_route_exists(route, gateway, interface, metric): """Check if route with expected gateway, dev and metric exists""" rc, data = rc_cmd(f'ip --json route show protocol failover {route} ' f'via {gateway} dev {interface} metric {metric}') if rc == 0: data = json.loads(data) if len(data) > 0: return True return False def get_best_route_options(route, debug=False): """ Return current best route ('gateway, interface, metric) % get_best_route_options('203.0.113.1') ('192.168.0.1', 'eth1', 1) % get_best_route_options('203.0.113.254') (None, None, None) """ rc, data = rc_cmd(f'ip --detail --json route show protocol failover {route}') if rc == 0: data = json.loads(data) if len(data) == 0: print(f'\nRoute {route} for protocol failover was not found') return None, None, None # Fake metric 999 by default # Search route with the lowest metric best_metric = 999 for entry in data: if debug: print('\n', entry) metric = entry.get('metric') gateway = entry.get('gateway') iface = entry.get('dev') if metric < best_metric: best_metric = metric best_gateway = gateway best_interface = iface if debug: print(f'### Best_route exists: {route}, best_gateway: {best_gateway}, ' f'best_metric: {best_metric}, best_iface: {best_interface}') return best_gateway, best_interface, best_metric def is_port_open(ip, port): """ Check connection to remote host and port Return True if host alive % is_port_open('example.com', 8080) True """ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) s.settimeout(2) try: s.connect((ip, int(port))) s.shutdown(socket.SHUT_RDWR) return True except: return False finally: s.close() def is_target_alive(target_list=None, iface='', proto='icmp', port=None, debug=False, policy='any-available') -> bool: """Check the availability of each target in the target_list using the specified protocol ICMP, ARP, TCP Args: target_list (list): A list of IP addresses or hostnames to check. iface (str): The name of the network interface to use for the check. proto (str): The protocol to use for the check. Options are 'icmp', 'arp', or 'tcp'. port (int): The port number to use for the TCP check. Only applicable if proto is 'tcp'. debug (bool): If True, print debug information during the check. policy (str): The policy to use for the check. Options are 'any-available' or 'all-available'. Returns: bool: True if all targets are reachable according to the policy, False otherwise. Example: % is_target_alive(['192.0.2.1', '192.0.2.5'], 'eth1', proto='arp', policy='all-available') True """ if iface != '': iface = f'-I {iface}' num_reachable_targets = 0 for target in target_list: match proto: case 'icmp': command = f'/usr/bin/ping -q {target} {iface} -n -c 2 -W 1' rc, response = rc_cmd(command) if debug: print(f' [ CHECK-TARGET ]: [{command}] -- return-code [RC: {rc}]') if rc == 0: num_reachable_targets += 1 if policy == 'any-available': return True case 'arp': command = f'/usr/bin/arping -b -c 2 -f -w 1 -i 1 {iface} {target}' rc, response = rc_cmd(command) if debug: print(f' [ CHECK-TARGET ]: [{command}] -- return-code [RC: {rc}]') if rc == 0: num_reachable_targets += 1 if policy == 'any-available': return True case _ if proto == 'tcp' and port is not None: if is_port_open(target, port): num_reachable_targets += 1 if policy == 'any-available': return True case _: return False if policy == 'all-available' and num_reachable_targets == len(target_list): return True return False if __name__ == '__main__': # Parse command arguments and get config parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', action='store', help='Path to protocols failover configuration', required=True, type=Path) args = parser.parse_args() try: config_path = Path(args.config) config = json.loads(config_path.read_text()) except Exception as err: print( f'Configuration file "{config_path}" does not exist or malformed: {err}' ) exit(1) # Useful debug info to console, use debug = True # sudo systemctl stop vyos-failover.service # sudo /usr/libexec/vyos/vyos-failover.py --config /run/vyos-failover.conf debug = False while(True): for route, route_config in config.get('route').items(): exists_gateway, exists_iface, exists_metric = get_best_route_options(route, debug=debug) for next_hop, nexthop_config in route_config.get('next_hop').items(): conf_iface = nexthop_config.get('interface') conf_metric = int(nexthop_config.get('metric')) port = nexthop_config.get('check').get('port') port_opt = f'port {port}' if port else '' policy = nexthop_config.get('check').get('policy') proto = nexthop_config.get('check').get('type') target = nexthop_config.get('check').get('target') timeout = nexthop_config.get('check').get('timeout') + onlink = 'onlink' if 'onlink' in nexthop_config else '' # Route not found in the current routing table if not is_route_exists(route, next_hop, conf_iface, conf_metric): if debug: print(f" [NEW_ROUTE_DETECTED] route: [{route}]") # Add route if check-target alive if is_target_alive(target, conf_iface, proto, port, debug=debug, policy=policy): if debug: print(f' [ ADD ] -- ip route add {route} via {next_hop} dev {conf_iface} ' f'metric {conf_metric} proto failover\n###') rc, command = rc_cmd(f'ip route add {route} via {next_hop} dev {conf_iface} ' - f'metric {conf_metric} proto failover') + f'{onlink} metric {conf_metric} proto failover') # If something is wrong and gateway not added # Example: Error: Next-hop has invalid gateway. if rc !=0: if debug: print(f'{command} -- return-code [RC: {rc}] {next_hop} dev {conf_iface}') else: journal.send(f'ip route add {route} via {next_hop} dev {conf_iface} ' - f'metric {conf_metric} proto failover', SYSLOG_IDENTIFIER=my_name) + f'{onlink} metric {conf_metric} proto failover', SYSLOG_IDENTIFIER=my_name) else: if debug: print(f' [ TARGET_FAIL ] target checks fails for [{target}], do nothing') journal.send(f'Check fail for route {route} target {target} proto {proto} ' f'{port_opt}', SYSLOG_IDENTIFIER=my_name) # Route was added, check if the target is alive # We should delete route if check fails only if route exists in the routing table if not is_target_alive(target, conf_iface, proto, port, debug=debug, policy=policy) and \ is_route_exists(route, next_hop, conf_iface, conf_metric): if debug: print(f'Nexh_hop {next_hop} fail, target not response') print(f' [ DEL ] -- ip route del {route} via {next_hop} dev {conf_iface} ' f'metric {conf_metric} proto failover [DELETE]') rc_cmd(f'ip route del {route} via {next_hop} dev {conf_iface} metric {conf_metric} proto failover') journal.send(f'ip route del {route} via {next_hop} dev {conf_iface} ' f'metric {conf_metric} proto failover', SYSLOG_IDENTIFIER=my_name) time.sleep(int(timeout))