Skip to content

Commit

Permalink
initial fabric_port_down_check function (#114)
Browse files Browse the repository at this point in the history
* initial fabric_port_down_check function

* fabric_port_down_check check + tests + doc

* Update aci-preupgrade-validation-script.py

Co-authored-by: takishida <[email protected]>

* Update aci-preupgrade-validation-script.py

Co-authored-by: takishida <[email protected]>

---------

Co-authored-by: takishida <[email protected]>
  • Loading branch information
monrog2 and takishida authored May 6, 2024
1 parent 043fa78 commit 0a09334
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 1 deletion.
37 changes: 37 additions & 0 deletions aci-preupgrade-validation-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -3020,6 +3020,42 @@ def vmm_active_uplinks_check(index, total_checks, **kwargs):
return result


def fabric_port_down_check(index, total_checks, **kwargs):
title = 'Fabric Port is Down (F1394 ethpm-if-port-down-fabric)'
result = FAIL_O
msg = ''
headers = ["Pod", "Node", "Int", "Reason"]
unformatted_headers = ['dn', 'Fault Description']
unformatted_data = []
data = []
recommended_action = 'Identify if these ports are needed for redundancy and reason for being down'
doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations#fabric-port-is-down'
print_title(title, index, total_checks)

fault_api = 'faultInst.json'
fault_api += '?&query-target-filter=and(eq(faultInst.code,"F1394")'
fault_api += ',eq(faultInst.rule,"ethpm-if-port-down-fabric"))'

faultInsts = icurl('class',fault_api)
dn_re = node_regex + r'/.+/phys-\[(?P<int>eth\d/\d+)\]'

for faultInst in faultInsts:
m = re.search(dn_re, faultInst['faultInst']['attributes']['dn'])
if m:
podid = m.group('pod')
nodeid = m.group('node')
port = m.group('int')
reason = faultInst['faultInst']['attributes']['descr'].split("reason:")[1]
data.append([podid, nodeid, port, reason])
else:
unformatted_data.append([faultInst['faultInst']['attributes']['dn'], faultInst['faultInst']['attributes']['descr']])

if not data and not unformatted_data:
result = PASS
print_result(title, result, msg, headers, data, unformatted_headers, unformatted_data, recommended_action=recommended_action, doc_url=doc_url)
return result


if __name__ == "__main__":
prints(' ==== %s%s, Script Version %s ====\n' % (ts, tz, SCRIPT_VERSION))
prints('!!!! Check https://github.com/datacenter/ACI-Pre-Upgrade-Validation-Script for Latest Release !!!!\n')
Expand Down Expand Up @@ -3078,6 +3114,7 @@ def vmm_active_uplinks_check(index, total_checks, **kwargs):
lldp_with_infra_vlan_mismatch_check,
hw_program_fail_check,
scalability_faults_check,
fabric_port_down_check,

# Configurations
vpc_paired_switches_check,
Expand Down
50 changes: 49 additions & 1 deletion docs/docs/validations.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ Items | Faults | This Script
[Different infra VLAN via LLDP][f16] | F0454: infra-vlan-mismatch | :white_check_mark: | :white_check_mark: 4.2(4) | :white_check_mark:
[HW Programming Failure][f17] | F3544: L3Out Prefixes<br>F3545: Contracts | :white_check_mark: | :white_check_mark: 5.1(1) | :white_check_mark:
[Scalability (faults related to Capacity Dashboard)][f18] | TCA faults for eqptcapacityEntity | :white_check_mark: | :no_entry_sign: | :white_check_mark:
[Fabric Port is Down][f19] | F1394: ethpm-if-port-down-fabric | :white_check_mark: | :no_entry_sign: | :no_entry_sign:



[f1]: #apic-disk-space-usage
Expand All @@ -90,7 +92,7 @@ Items | Faults | This Script
[f16]: #different-infra-vlan-via-lldp
[f17]: #hw-programming-failure
[f18]: #scalability-faults-related-to-capacity-dashboard

[f19]: #fabric-port-is-down

### Configuration Checks

Expand Down Expand Up @@ -1213,6 +1215,52 @@ Examples of what's monitored via `Operations > Capacity Dashboard > Leaf Capacit
```


### Fabric Port is Down

The script checks for fault code `F1394` with rule `ethpm-if-port-down-fabric`, which indicate that ACI has flagged configured Fabric ports for being in a down state.

It is important to understand whether or not these downed fabric prots are preventing your leaf nodes from having redundant paths. If unexpected, address these issues before performing the ACI Upgrade.

Failure to do so may lead to outages during switch upgrades due to leaf nodes not having redundant spine paths.

!!! example "Fault Example (F0469: duplicate-subnets-within-ctx)"
```
admin@f1-apic1:~> moquery -c faultInst -f 'fault.Inst.code=="F1394"'
Total Objects shown: 4

# fault.Inst
code : F1394
ack : no
alert : no
annotation :
cause : interface-physical-down
changeSet : lastLinkStChg (New: 2023-10-24T03:24:57.051+00:00), operBitset (New: 4-5,11,35)
childAction :
created : 2023-09-09T08:53:35.125+00:00
delegated : no
descr : Port is down, reason:err-disabled-link-flaps(err-disabled), used by:Fabric
dn : topology/pod-1/node-101/sys/phys-[eth1/53]/phys/fault-F1394
domain : access
extMngdBy : undefined
highestSeverity : minor
lastTransition : 2023-10-24T03:24:57.101+00:00
lc : raised
modTs : never
occur : 1
origSeverity : minor
prevSeverity : minor
rn : fault-F1394
rule : ethpm-if-port-down-fabric
severity : minor
status :
subject : port-down
title :
type : communications
uid :
userdom : all
--- omit ---
```

## Configuration Check Details

### VPC-paired Leaf switches
Expand Down
29 changes: 29 additions & 0 deletions tests/fabric_port_down_check/faultInst_pos.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[
{
"faultInst": {
"attributes": {
"descr": "Port is down, reason:err-disabled-link-flaps(err-disabled), used by:Fabric",
"dn": "topology/pod-1/node-105/sys/phys-[eth1/53]/phys/fault-F1394",
"rule": "ethpm-if-port-down-fabric"
}
}
},
{
"faultInst": {
"attributes": {
"descr": "Port is down, reason:linkNotConnected(connected), used by:Fabric",
"dn": "topology/pod-1/node-101/sys/phys-[eth1/53]/phys/fault-F1394",
"rule": "ethpm-if-port-down-fabric"
}
}
},
{
"faultInst": {
"attributes": {
"descr": "Port is down, reason:linkNotConnected(connected), used by:Fabric",
"dn": "topology/pod-1/node-102/sys/phys-eth1/53/phys/fault-F1394",
"rule": "ethpm-if-port-down-fabric"
}
}
}
]
38 changes: 38 additions & 0 deletions tests/fabric_port_down_check/test_fabric_port_down_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import pytest
import logging
import importlib
from helpers.utils import read_data

script = importlib.import_module("aci-preupgrade-validation-script")

log = logging.getLogger(__name__)
dir = os.path.dirname(os.path.abspath(__file__))


# icurl queries
faultInsts = 'faultInst.json'
faultInsts += '?&query-target-filter=and(eq(faultInst.code,"F1394")'
faultInsts += ',eq(faultInst.rule,"ethpm-if-port-down-fabric"))'


@pytest.mark.parametrize(
"icurl_outputs, expected_result",
[
(
{
faultInsts: read_data(dir, "faultInst_pos.json"),
},
script.FAIL_O,
),
(
{
faultInsts: [],
},
script.PASS,
),
],
)
def test_logic(mock_icurl, expected_result):
result = script.fabric_port_down_check(1, 1)
assert result == expected_result

0 comments on commit 0a09334

Please sign in to comment.