#!/bin/bash #This script does the following: #Configures traffic control parameters to transmit and receive #Sets Egress and Ingress policies #Tunes real-time behaviors #Runs Linux PTP and PHC2SYS #Kernel mechanism to allocate CPU to the processes #Usage: To configure as PTP master, # ./application_dependencies.sh -i interface_name -m # To configure as PTP slave # ./application_dependencies.sh -i interface_name -s # ./application_dependencies.sh [-h] [-i] [-n] [-v] #Program name and version program=$(basename $0) version=2.0 #Set global variables for PTP master, PTP slave and NTP usage #These variables can be modified according to the input arguments provided by the user ptp_master=0 ptp_slave=0 ntp=0 ################################################################################ # Print the program usage # ################################################################################ printUsage() { echo $program configures transmit and receive options, egress and ingress policies, echo tunes realtime behaviors and runs PTP, PHC2SYS. echo By default, it turns off NTP and uses BMC algorithm to find the PTP master. echo Usage: $0 -i interface_name -m/s/N echo Options are: echo -h/--help Print help echo -i/--interface i210 interface name echo -m/--master Configure as PTP master echo -s/--slave Configure as PTP slave echo -n/--ntp Turn ON NTP - This option will work only when the user configures PTP master echo -v/--version Print version } ################################################################################ # Print program version # ################################################################################ printVersion() { echo $program version: $version } ################################################################################ # Main program # ################################################################################ main() { #Create cpusets to redirect process into these cpusets mkdir -p /cpuset #Mount cpuset if not mounted mount | awk '{if ($3 == "/cpuset") { exit 0}} ENDFILE{exit -1}' || \ mount -t cpuset none /cpuset/ cd /cpuset #Create sys-cpuset for the first core mkdir -p sys /bin/echo 0 > sys/cpuset.cpus /bin/echo 1 > sys/cpuset.cpu_exclusive /bin/echo 0 > sys/cpuset.mems #Create ptp-cpuset for the second core mkdir -p ptp /bin/echo 1 > ptp/cpuset.cpus /bin/echo 1 > ptp/cpuset.cpu_exclusive /bin/echo 0 > ptp/cpuset.mems /bin/echo 1 > ptp/cpuset.mem_hardwall #Create com-cpuset for the third core mkdir -p com /bin/echo 2 > com/cpuset.cpus /bin/echo 1 > com/cpuset.cpu_exclusive /bin/echo 0 > com/cpuset.mems /bin/echo 1 > com/cpuset.mem_hardwall #Create user-cpuset for the fourth core mkdir -p user /bin/echo 3 > user/cpuset.cpus /bin/echo 1 > user/cpuset.cpu_exclusive /bin/echo 0 > user/cpuset.mems /bin/echo 1 > user/cpuset.mem_hardwall #Move all processes from the root cpuset to the sys-cpuset for T in `cat tasks`; do /bin/echo $T > sys/tasks 2> /dev/null; done declare -i count=0 #Prints help function if no interface parameter is provided by the user if test -z "$IFACE"; then echo Error: Interface name not specified return 1 fi #A node cannot act as both PTP master and PTP slave #Verify if the user has configured a node both as PTP master and PTP slave if test $ptp_slave -eq 1 -a $ptp_master -eq 1; then echo Error: PTP Master and Slave can not be configured at the same instance return 1 #NTP - Network Time Protocol, takes time from the network (Internet) #PTP slave should be synchronized with PTP master's clock and not with NTP clock. #So, NTP should not be enabled at the PTP slave side #Verify whether NTP is turned ON in the PTP slave device elif test $ptp_slave -eq 1 -a $ntp -eq 1; then echo Error: NTP can not be ON when configured as PTP Slave return 1 #If a node is not configured either as PTP master or as PTP slave by the user, then by default, it will be configured as PTP master #The PTP master will be chosen using BMC (Best Master Clock) algorithm, and the other system will act as a PTP slave elif test $ptp_slave -eq 0 -a $ptp_master -eq 0; then ptp_master=1 fi #To run NTP and PTP in parallel, NTP should be true in the PTP master node and false in the PTP slave node. #Depending on this, PHC2SYS commands will also be modified at both the PTP master and PTP slave nodes. This change can be seen at the end of this file. #PHC2SYS - Physical Clock to System Clock, this utility is used to synchronize the hardware clock and the user clock of a particular system if test $ntp -eq 0; then timedatectl set-ntp false else timedatectl set-ntp true fi timedatectl >> /var/log/application_dependencies.log #Verify the given interface is available or not interface=`ip link show | grep $IFACE` if test -z "$interface";then echo Error: Interface not available return 1 fi #Verify whether the configured interface is up. If not, wait until the interface is up interface_check=$(cat /sys/class/net/$IFACE/operstate) while test "$interface_check" == "down" -a $count -lt 10000; do count=($count)+1; done interface_check=$(cat /sys/class/net/$IFACE/operstate) if test "$interface_check" == "up" -o \ "$interface_check" == "down"; then #Get the number of cores CORES=`nproc --all` #Set the irq priority to 81 for IFACE PRIORITY=81 #In our case, we are transmitting PubSub packets as isochronous traffic. #Driver level modifications are done to reduce latency at both transmission and reception by configuring using the below commands ################################################################################ # Configure transmit # ################################################################################ #Delete any existing MQPRIO configuration MQPRIO=`sudo tc qdisc show | grep mqprio` if test ! -z "$MQPRIO"; then sudo tc qdisc del dev $IFACE root fi #On the transmission side, we can use MQPRIO or TAPRIO to achieve deterministic publishing of packets in the specified hardware queues. #We always suggest using TAPRIO. But as our test setup is in the 4.19 kernel and TAPRIO support is available only from 5.4 kernel, we stick to MQPRIO. #mqprio - MQPRIO qdisc is a simple queueing discipline that allows mapping traffic flows to hardware queue ranges using priorities # and a configurable priority to traffic class mapping #ETF - Earliest TxTime First can be used along with MQPRIO and TAPRIO, which allows applications to control the instant when a packet # should be dequeued from the traffic control layer into the net device. (transmits the packets into the wire with deterministic nanosecond precision) #We use these features (MQPRIO and ETF) to configure the PubSub packets to transmit with nanosecond precision #Currently, i210 supports ETF feature (transmission at nanosecond precision level) only at its hardware queues 0 and 1. #The below command maps the Linux socket priorities to certain traffic classes, which in turn maps to specific hardware queues #From IEEE802.1 standard, there are 8 traffic classes and the different traffic types should be mapped to one of these traffic classes. #Considering the design of i210 hardware and traffic classes usage in Linux, we can create up to 4 traffic classes as this should be mapped to #one or more hardware queues (i210 has only 4 hardware queues). #In our setup, we have created 3 traffic classes (num_tc 3). #Linux socket priority 3 is mapped to traffic class 0, Linux socket priority 2 is mapped to traffic class 1, #all other Linux socket priorities are mapped to traffic class 2 (total Linux socket priorities are 0 to 15) #Packets from traffic class 0 goes into one queue at offset 0(1@0 - tc 0 to queue 0) #Packets from traffic class 1 goes into one queue at offset 1(1@1 - tc 1 to queue 1) #Packets from traffic class 2 goes into two queues at offset 2(2@2 - tc 2 to queue 2 and 3) #"hw 0" indicates no hardware offload is enabled #So, configure the PubSub packets of socket priority 3 to transmit using the hardware queue 0 using the below command sudo tc qdisc add dev $IFACE parent root mqprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 0 #Check mqprio queuing discipline is configured in given interface mqprio_check=`tc qdisc show dev $IFACE | grep "mqprio"` if test -z "$mqprio_check";then echo Error mqprio queuing discipline not assigned in $IFACE interface fi #Apply ETF to queue 0 and queue 1 using the below command. #Normally, ETF requires the system clock to be in sync with the PTP hardware clock for deterministic delivery. This (PHC2SYS) will be done at the end of this file. #To apply ETF, get the MQPRIO number that has been installed by the above command #Using the MQPRIO number, install the ETF in queue 0(num:1) and in queue1(num:2) #Offload parameter is enabled to support launch time feature #Clockid parameter specifies which clock is used to set the transmission timestamps (CLOCK_TAI) #The delta parameter specifies the next wakeup time after enqueuing or dequeuing (used as a safety margin for transmission) MQPRIO_NUM=`sudo tc qdisc show | grep mqprio | cut -d ':' -f1 | cut -d ' ' -f3` sudo tc qdisc add dev $IFACE parent $MQPRIO_NUM:1 etf offload clockid CLOCK_TAI delta 150000 sudo tc qdisc add dev $IFACE parent $MQPRIO_NUM:2 etf offload clockid CLOCK_TAI delta 150000 ################################################################################ # Configure receive # ################################################################################ #Log the current ethtool features and ntuple features /sbin/ethtool --show-features $IFACE | grep ntup >> /var/log/application_dependencies.log /sbin/ethtool --show-ntuple $IFACE >> /var/log/application_dependencies.log #On the receive side, to improve determinism, filter the incoming packets using the following commands. #Configure incoming packets to receive at a particular hardware queue using the ntuple filtering commands below #ntuple filtering - This allows to specify parameters to steer the incoming packets to a particular RX queue #1. We should disable promiscuous multicast as we only intend to receive the required packets #2. We should have an exact match of the multicast address of the incoming frame # with Multicast Filter Address (MFA) see section 7.1.1.1.2 of i210 datasheet #3. As there is a bug in multicast MAC address filtering, filter the packets using VLAN(0x8100). #TODO: multicast MAC address filtering #Disable promiscuous mode using the below command #Check promiscuous mode is on and set off the promiscuous mode in given interface promisc=`ip link show | grep $IFACE | grep PROMISC` if test ! -z "$promisc";then `ip link set $IFACE promisc off` promisc_check=`ip link show | grep $IFACE | grep PROMISC` if test ! -z "$promisc_check";then echo Error: Promisc ON in $IFACE interface fi fi #Enable ntuple feature to filter the incoming packets ntuple=`/sbin/ethtool --show-features $IFACE | grep ntuple | cut -d ":" -f2` if [ $ntuple == "off" ];then `/sbin/ethtool -K $IFACE ntuple on` ntuple_check=`/sbin/ethtool --show-features $IFACE | grep ntuple | cut -d ":" -f2` if [ $ntuple_check == "off" ];then echo Error: ntuple feature of $IFACE interface is off fi fi #Delete if there are any existing ntuple configurations /sbin/ethtool --config-ntuple $IFACE delete 15 /sbin/ethtool --config-ntuple $IFACE delete 14 #Check the existing ntuple configuration 14 deleted ntuple_conf=`/sbin/ethtool --show-ntuple $IFACE | grep 14 | cut -d ":" -f2` if test ! -z "$ntuple_conf";then echo Error: Existing ntuple configuration 14 not deleted fi #Check the existing ntuple configuration 15 deleted ntuple_conf1=`/sbin/ethtool --show-ntuple $IFACE | grep 15 | cut -d ":" -f2` if test ! -z "$ntuple_conf1";then echo Error: Existing ntuple configuration 15 not deleted fi #Filter the incoming traffic using ethertype and steer them into desired hardware queue #Ethertype 0xb62c for OPC UA PubSub is steered into the hardware queue 1 (action 1) #Ethertype 0x88f7 for PTP traffic is steered into the hardware queue 3 (action 3) /sbin/ethtool --config-ntuple $IFACE flow-type ether proto 0xb62c loc 15 action 1 /sbin/ethtool --config-ntuple $IFACE flow-type ether proto 0x88f7 loc 14 action 3 #Configures the incoming packets other than the ones configured using ntuple to flow evenly between the first N receive queues #Here we have used N value as 1, the incoming packets will flow into the hardware queue 0 (equal 1) /sbin/ethtool -X $IFACE equal 1 #Log the updated ethtool features and ntuple features /sbin/ethtool --show-features $IFACE | grep ntup >> /var/log/application_dependencies.log /sbin/ethtool --show-ntuple $IFACE >> /var/log/application_dependencies.log #Set egress policy for VLANs - This maps socket priority to VLAN PCP. (Here it is mapped 0 to 0, 1 to 1 and so on) for j in `seq 3 3`; do sudo ip link set $IFACE.8 type vlan egress $j:$j; done #Set ingress policy for VLANs - This maps socket priority to VLAN PCP. (Here it is mapped 0 to 0, 1 to 1 and so on) for j in `seq 3 3`; do sudo ip link set $IFACE.8 type vlan ingress $j:$j; done #Check the VLAN ingress policy is set vlan_ingress=`cat /proc/net/vlan/enp2s0.8 | grep INGRESS | grep "3:3"` if test -z "$vlan_ingress";then echo Error: VLAN Ingress policy 3:3 not set fi #Check the VLAN egress policy is set vlan_egress=`cat /proc/net/vlan/enp2s0.8 | grep EGRESS | grep "3:3"` if test -z "$vlan_egress";then echo Error: VLAN Egress policy 3:3 not set fi #Log the updated tc and VLAN configurations sudo tc qdisc show >> /var/log/application_dependencies.log ip a >> /var/log/application_dependencies.log cat /proc/net/vlan/$IFACE.8 >> /var/log/application_dependencies.log ################################################################################ # Tune Linux for best possible real-time behavior # ################################################################################ #Number of CPU cores CORESVALUE=`expr $CORES - 1` #Log the default mode of CPU for i in `seq 0 $CORESVALUE`; do cat /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor >> /var/log/application_dependencies.log; done #By default, nodes will be in power save mode. This mode attempts to save power by reducing the CPU's speed whenever possible. #But as we need high performance and don't need any interruptions from the CPU, we will modify all the CPU cores to performance mode using the below command for i in `seq 0 $CORESVALUE`; do echo performance >> /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor; cat /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor >> /var/log/application_dependencies.log; done #Check performance mode is set for i in `cat /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor`; do if test -z "i"; then echo Error: Performance mode is not set fi done #The interrupts caused by PubSub packets should be processed as soon as possible. #So, setting the irq priority of the PubSub TxRx interface to 81. #It is set to 81 as it is the maximum priority given to the Subscriber thread of PubSub application. for i in `cat /proc/interrupts | grep "$IFACE-TxRx-[0-3]" | awk '{print $1}' | sed 's/.$//'` do for j in `ps ax | grep "irq/$i*-$IFACE-" | sed "s/^ *//" | cut -d" " -f1`; do sudo chrt -ap $j; sudo chrt -fp $PRIORITY $j; sudo chrt -ap $j >> /var/log/rc.local.log; done; done #EEE(Energy Efficient Ethernet) reduces power consumption during periods of low data activity, which may introduce latency in TxRx path of PubSub packets #To disable EEE, use the below command eee=`/sbin/ethtool --show-eee $IFACE | grep "enabled"` if test ! -z "eee";then `/sbin/ethtool --set-eee $IFACE eee off` eee_check=`/sbin/ethtool --show-eee $IFACE | grep "enabled"` if test ! -z "$eee_check";then echo Error: EEE mode enabled fi fi /sbin/ethtool --show-eee $IFACE >> /var/log/application_dependencies.log ################################################################################ # Run PTP # ################################################################################ #Kill active PTP and PHC2SYS applications pkill -9 ptp4l pkill -9 phc2sys if test $ptp_master -eq 1; then #PTP MASTER - Run PTP master in the background with socket priority set as 1. sudo daemonize -E BUILD_ID=dontKillMe -o /var/log/ptp4l.log -e /var/log/ptp4l.err.log /usr/bin/taskset -c 0 chrt 90 /usr/local/sbin/ptp4l -i $IFACE -2 -mq -f /etc/linuxptp/gPTP.cfg --step_threshold=1 --fault_reset_interval=0 --socket_priority=1 --announceReceiptTimeout=10 --transportSpecific=1 #PHC2SYS synchronizes user clock and hardware clock. This runs in the background with priority one lesser than PTP. #The below command takes the user clock as a base and synchronizes the hardware clock. sudo daemonize -E BUILD_ID=dontKillMe -o /var/log/phc2sys.log -e /var/log/phc2sys.err.log /usr/bin/taskset -c 0 chrt 89 /usr/local/sbin/phc2sys -c $IFACE -s CLOCK_REALTIME --step_threshold=1 --transportSpecific=1 -w -m retval=0 else #PTP SLAVE - Run PTP slave in the background with socket priority set as 1. sudo daemonize -E BUILD_ID=dontKillMe -o /var/log/ptp4l.log -e /var/log/ptp4l.err.log /usr/bin/taskset -c 0 chrt 90 /usr/local/sbin/ptp4l -i $IFACE -2 -mq -s -f /etc/linuxptp/gPTP.cfg --step_threshold=1 --fault_reset_interval=0 --socket_priority=1 --announceReceiptTimeout=10 --transportSpecific=1 #PHC2SYS synchronizes user clock and hardware clock. This runs in the background with priority one lesser than PTP. #The below command takes the hardware clock as a base and synchronizes the user clock. sudo daemonize -E BUILD_ID=dontKillMe -o /var/log/phc2sys.log -e /var/log/phc2sys.err.log /usr/bin/taskset -c 0 chrt 89 /usr/local/sbin/phc2sys -s $IFACE -c CLOCK_REALTIME --step_threshold=1 --transportSpecific=1 -w -m retval=0 fi #Enabling smp_affinity to assign IRQs interrupts to processors for i in `grep $IFACE /proc/interrupts | cut -d" " -f2 | tr -d ":"`; do echo smp_affinity of core $i - `cat /proc/irq/$i/smp_affinity`; echo 4>/proc/irq/$i/smp_affinity; done #Move PTP process to the ptp-cpuset (CPU core 1) pidOfPTP=`ps -aux | grep "[p]tp" | awk '{print $2}'` if test -z "$pidOfPTP"; then echo Error: PTP is not active else /bin/echo $pidOfPTP > /cpuset/ptp/tasks fi #Move PHC2SYS process to the ptp-cpuset (CPU core 1) pidOfPHC=`ps -aux | grep "[p]hc" | awk '{print $2}'` if test -z "$pidOfPHC"; then echo Error: PHC2SYS is not active else /bin/echo $pidOfPHC > /cpuset/ptp/tasks fi else echo Error: Interface is not up retval=1 fi #Total number of queues available is 4 TOTAL_NUMBER_OF_QUEUES=4 #Interrupts are available for all TxRx queues starting from TxRx0 to TxRx3. #The below commands move the TxRx0 and TxRx1 interrupts to com-cpuset (CPU core 2) #TxRx2 interrupts are moved to user-cpuset (CPU core 3) and TxRx3 interrupts are moved to ptp-cpuset (CPU core 1) for (( queue=0; queue<$TOTAL_NUMBER_OF_QUEUES; queue++ )); do irq=`cat /proc/interrupts | grep "$IFACE-TxRx-$queue"|awk '{print $1}' | cut -d':' -f1` pid=`ps -aux | grep "irq/$irq-$IFACE-" | grep -v grep | awk '{print $2}'` if test $queue -lt 2; then /bin/echo $pid > /cpuset/com/tasks elif test $queue -eq 2; then /bin/echo $pid > /cpuset/user/tasks else /bin/echo $pid > /cpuset/ptp/tasks fi done #Create a directory /mnt/ramdisk if already not present #This directory is created to store the latency csv files generated by the PubSub application if test ! -d /mnt/ramdisk; then sudo mkdir -p /mnt/ramdisk fi return $retval } ################################################################################ # Parse the command line argument # ################################################################################ parseCommandLine() { #Special case that nothing was provided on the command line so print usage if test $# -eq 0; then printUsage exit fi #Indicate specification for single character options optstring="hmsnvi:" #Indicate specification for long options optstringLong="help,master,slave,ntp,version,interface:" #Parse the options using getopt command GETOPT_OUT=$(getopt --options $optstring --longoptions $optstringLong -- "$@") exitCode=$? if test $exitCode -ne 0; then printUsage exit fi #The following constructs the command by concatenating arguments eval set -- "$GETOPT_OUT" #Loop over the options #The error handling will catch cases were argument is missing #Shift over the known number of options/arguments while true; do case "$1" in -h|--help) printUsage exit;; -i|--interface) IFACE=$2; shift 2;; -m|--master) ptp_master=1; shift;; -s|--slave) ptp_slave=1; shift;; -n|--ntp) ntp=1; shift;; -v|--version) printVersion exit;; --) shift; break;; *) echo Error: Invalid option printUsage exit;; esac done #Main() function call main retval=$? if test $retval -ne 0; then printUsage echo Application dependencies execution failed else echo Application dependencies executed successfully fi } #Parse the command line options #Pass all arguments to the function parseCommandLine "$@"