Mellanox ConnecX4 (mlx5_core), attempting to demo the VPP feature available yields unstable results. Attempting to apply the following to a table with no flowtables defined:
+ vpp {
+ interfaces {
+ bridge br10 {
+ member {
+ interface eth2 {
+ }
+ interface lo10 {
+ bvi
+ }
+ interface vxlan10 {
+ }
+ }
+ }
+ loopback lo10 {
+ kernel-interface "vpptun10"
+ }
+ vxlan vxlan10 {
+ remote "10.10.10.2"
+ source-address "10.10.10.1"
+ vni "10"
+ }
+ }
+ kernel-interfaces vpptun10 {
+ address "10.10.0.10/24"
+ }
+ settings {
+ interface eth1 {
+ driver "dpdk"
+ }
+ interface eth2 {
+ driver "dpdk"
+ }
+ }
+ }[14522.347560] page_pool_release_retry() stalled pool shutdown 1 inflight 14499 sec (hundreds of times) [18947.547157] IPv4: martian source 85.195.XXX.XXX (my IP) from 0.0.0.0, on dev eth1 [18947.547187] ll header: 00000000: 50 6b 4b 29 7f 7c 00 00 f7 f7 02 56 08 00 [26662.736708] mlx5_core 0000:01:00.1 eth2: Link down [64267.182723] mlx5_core 0000:01:00.1 eth2: Link up [76021.529987] mlx5_core 0000:01:00.1 eth2: Link down [78457.354303] mlx5_core 0000:01:00.1 eth2: Link up[88758.804499] Initializing XFRM netlink socket [88760.837601] mlx5_core 0000:01:00.0 eth1: Error cqe on cqn 0x22, ci 0x0, qn 0x10be, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88760.838153] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88760.838156] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88760.838159] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88760.838161] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 be 00 00 d8 d2 [88760.838164] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88760.838168] 00000000: 00 00 00 0a 00 10 be 04 00 00 00 08 00 00 00 00 [88760.838170] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00 [88760.838173] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 08 00 45 00 00 54 [88760.838176] 00000030: 00 00 00 50 00 00 22 00 00 00 00 00 6f 88 40 52 [88760.838190] mlx5_core 0000:01:00.0 eth1: ERR CQE on SQ: 0x10be [88761.065281] ------------[ cut here ]------------ [88761.065283] WARNING: CPU: 2 PID: 65910 at drivers/iommu/dma-iommu.c:1094 iommu_dma_unmap_page+0x74/0x90 [88761.065289] Modules linked in: xfrm_user xfrm_algo uio_pci_generic uio nf_conntrack_bridge nft_flow_offload nf_flow_table_inet nf_flow_table nft_nat nft_masq af_packet nft_ct nft_chain_nat nf_nat nf_tables nfnetlink_cthelper nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink binfmt_misc intel_rapl_common crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 sha256_ssse3 sha1_ssse3 aesni_intel virtio_console virtio_balloon crypto_simd cryptd iTCO_wdt pcspkr iTCO_vendor_support button evdev tcp_bbr sch_fq_codel mpls_iptunnel mpls_router ip_tunnel br_netfilter bridge stp llc vfio_pci vfio_pci_core irqbypass vfio_iommu_type1 vfio fuse efi_pstore configfs ip_tables x_tables autofs4 usb_storage ohci_hcd sd_mod squashfs lz4_decompress loop overlay ext4 crc16 mbcache jbd2 nls_cp437 vfat fat efivarfs nls_ascii mlx5_ib ib_uverbs ib_core hid_generic usbhid hid virtio_net net_failover failover ahci virtio_blk libahci libata virtio_pci virtio_pci_legacy_dev scsi_mod virtio_pci_modern_dev crc32c_intel scsi_common [88761.065341] mlx5_core virtio ehci_pci virtio_ring i2c_i801 mlxfw pci_hyperv_intf uhci_hcd i2c_smbus lpc_ich ehci_hcd [88761.065348] CPU: 2 PID: 65910 Comm: vpp_main Not tainted 6.6.79-vyos #1 [88761.065350] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [88761.065351] RIP: 0010:iommu_dma_unmap_page+0x74/0x90 [88761.065353] Code: 2b 48 3b 28 72 26 48 3b 68 08 73 20 4d 89 f8 44 89 f1 4c 89 ea 48 89 ee 48 89 df 5b 5d 41 5c 41 5d 41 5e 41 5f e9 2c e3 ae ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f e9 26 4a 6c 00 66 66 2e 0f 1f [88761.065354] RSP: 0000:ffff9800c6d0bd18 EFLAGS: 00010246 [88761.065356] RAX: 0000000000000000 RBX: ffff899880f120c0 RCX: 0000000000000000 [88761.065357] RDX: 0000000000000000 RSI: ffff8999828cd000 RDI: 0000000000000000 [88761.065358] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 [88761.065358] R10: ffff9800c2a9b100 R11: ffff9800c2a9b100 R12: 000000006f884052 [88761.065359] R13: 0000000000000050 R14: 0000000000000001 R15: 0000000000000000 [88761.065362] FS: 00007f7fce0e5f40(0000) GS:ffff899befd00000(0000) knlGS:0000000000000000 [88761.065363] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [88761.065364] CR2: 00007f7f84a7dff8 CR3: 00000001e15e2000 CR4: 0000000000750ee0 [88761.065366] PKRU: 55555554 [88761.065366] Call Trace: [88761.065371] <TASK> [88761.065372] ? iommu_dma_unmap_page+0x74/0x90 [88761.065373] ? __warn+0x78/0x110 [88761.065378] ? iommu_dma_unmap_page+0x74/0x90 [88761.065379] ? report_bug+0x159/0x180 [88761.065383] ? handle_bug+0x58/0x90 [88761.065385] ? exc_invalid_op+0x13/0x60 [88761.065387] ? asm_exc_invalid_op+0x16/0x20 [88761.065391] ? iommu_dma_unmap_page+0x74/0x90 [88761.065392] ? iommu_dma_unmap_page+0x29/0x90 [88761.065394] mlx5e_poll_tx_cq+0x151/0x510 [mlx5_core] [88761.065433] mlx5e_napi_poll+0x7d/0x710 [mlx5_core] [88761.065458] ? srso_alias_return_thunk+0x5/0xfbef5 [88761.065460] __napi_poll+0x23/0x1a0 [88761.065463] net_rx_action+0x141/0x2c0 [88761.065465] handle_softirqs+0xd2/0x280 [88761.065467] __irq_exit_rcu+0x68/0x90 [88761.065468] common_interrupt+0x3c/0xa0 [88761.065471] asm_common_interrupt+0x22/0x40 [88761.065472] RIP: 0033:0x7f7fcf89b473 [88761.065473] Code: 4c 8b 5c 24 30 48 83 c4 48 eb a1 66 2e 0f 1f 84 00 00 00 00 00 66 90 64 48 8b 14 25 08 00 00 00 48 8b 05 48 fc 01 00 48 39 02 <75> 16 48 8b 07 48 c1 e0 04 48 8b 04 02 48 83 f8 ff 74 05 48 03 47 [88761.065475] RSP: 002b:00007f7f86cc2dd8 EFLAGS: 00000246 [88761.065476] RAX: 0000000000000003 RBX: 00007f7f8e0e5700 RCX: 0000000000000000 [88761.065477] RDX: 00007f7fce0e68e0 RSI: 0000000000000000 RDI: 00007f7fcf87af30 [88761.065477] RBP: 00012224361f9ed0 R08: 0000000000000024 R09: 00000000000008f3 [88761.065478] R10: 0000000000000000 R11: 0000000000000000 R12: 00007f7f8ebbe4c0 [88761.065479] R13: 00007f7f8e0e5700 R14: 00012224361f9ed0 R15: 000055c43980ec40 [88761.065481] </TASK> [88761.065481] ---[ end trace 0000000000000000 ]--- [88761.065483] mlx5_core 0000:01:00.0 eth1: Error cqe on cqn 0x22, ci 0x4, qn 0x10be, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88761.065784] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88761.065785] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88761.065786] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88761.065786] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 be 00 00 dc d2 [88761.065787] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88761.065788] 00000000: 00 00 00 0a 00 10 be 04 00 00 00 08 00 00 00 00 [88761.065789] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00 [88761.065790] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 08 00 45 00 00 28 [88761.065791] 00000030: 00 00 00 24 00 00 22 00 00 00 00 00 6f 53 f0 52 [88761.065807] mlx5_core 0000:01:00.0 eth1: ERR CQE on SQ: 0x10be [88761.106890] mlx5_core 0000:01:00.0 eth1: Error cqe on cqn 0x1d, ci 0x0, qn 0x10b9, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88761.107189] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88761.107192] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88761.107195] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88761.107197] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 b9 00 00 df d2 [88761.107200] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88761.107203] 00000000: 00 00 00 0a 00 10 b9 04 00 00 00 08 00 00 00 00 [88761.107205] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00 [88761.107207] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 08 00 45 00 00 28 [88761.107210] 00000030: 00 00 00 2a 00 00 22 00 00 00 00 00 6f 53 e0 52 [88761.107224] mlx5_core 0000:01:00.0 eth1: ERR CQE on SQ: 0x10b9 [88762.013802] mlx5_core 0000:01:00.0 defunct_eth1: renamed from eth1 [88763.071279] mlx5_core 0000:01:00.0 defunct_eth1: Link up [88763.076888] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x18, ci 0x0, qn 0x10c8, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88763.077207] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.077208] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.077209] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.077210] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 c8 00 00 ae d2 [88763.077211] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88763.077212] 00000000: 00 00 00 0a 00 10 c8 04 00 00 00 08 00 00 00 00 [88763.077213] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33 [88763.077214] 00000020: 00 00 00 16 50 6b 4b 29 7f 7c 86 dd 60 00 00 00 [88763.077215] 00000030: 00 00 00 98 00 00 22 00 00 00 00 00 6f 53 a0 14 [88763.082685] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10c8 [88763.083367] tun: Universal TUN/TAP device driver, 1.6 [88763.103292] infiniband mlx5_0: dump_cqe:273:(pid 65950): WC error: 6, Message: memory bind operation error [88763.103295] cqe_dump: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.103297] cqe_dump: 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.103297] cqe_dump: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.103298] cqe_dump: 00000030: 00 00 00 00 12 00 78 06 25 00 00 93 00 00 f7 d2 [88763.103326] ------------[ cut here ]------------ [88763.103327] WARNING: CPU: 2 PID: 65910 at drivers/infiniband/hw/mlx5/umr.c:333 mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib] [88763.103339] Modules linked in: vhost_net vhost vhost_iotlb tap tun xfrm_user xfrm_algo uio_pci_generic uio nf_conntrack_bridge nft_flow_offload nf_flow_table_inet nf_flow_table nft_nat nft_masq af_packet nft_ct nft_chain_nat nf_nat nf_tables nfnetlink_cthelper nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink binfmt_misc intel_rapl_common crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 sha256_ssse3 sha1_ssse3 aesni_intel virtio_console virtio_balloon crypto_simd cryptd iTCO_wdt pcspkr iTCO_vendor_support button evdev tcp_bbr sch_fq_codel mpls_iptunnel mpls_router ip_tunnel br_netfilter bridge stp llc vfio_pci vfio_pci_core irqbypass vfio_iommu_type1 vfio fuse efi_pstore configfs ip_tables x_tables autofs4 usb_storage ohci_hcd sd_mod squashfs lz4_decompress loop overlay ext4 crc16 mbcache jbd2 nls_cp437 vfat fat efivarfs nls_ascii mlx5_ib ib_uverbs ib_core hid_generic usbhid hid virtio_net net_failover failover ahci virtio_blk libahci libata virtio_pci virtio_pci_legacy_dev scsi_mod [88763.103381] virtio_pci_modern_dev crc32c_intel scsi_common mlx5_core virtio ehci_pci virtio_ring i2c_i801 mlxfw pci_hyperv_intf uhci_hcd i2c_smbus lpc_ich ehci_hcd [88763.103389] CPU: 2 PID: 65910 Comm: vpp_main Tainted: G W 6.6.79-vyos #1 [88763.103391] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [88763.103391] RIP: 0010:mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib] [88763.103398] Code: 48 89 ef e8 34 2c 98 d2 48 8d 7c 24 60 e8 6a 1b 98 d2 44 8b 4c 24 58 45 85 c9 74 6c 41 83 f9 05 0f 84 d8 fd ff ff 48 8b 1c 24 <0f> 0b 65 4c 8b 2c 25 80 df 02 00 4c 8d a3 08 05 00 00 45 8b 85 28 [88763.103400] RSP: 0018:ffff9800c6d0b978 EFLAGS: 00010202 [88763.103401] RAX: 0000000000000001 RBX: ffff899889642000 RCX: 0000000000000000 [88763.103402] RDX: 0000000000000001 RSI: 0000000055555554 RDI: ffff9800c6d0b9e0 [88763.103403] RBP: ffff899889642b28 R08: ffff899befd2f238 R09: 0000000000000006 [88763.103404] R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000080 [88763.103405] R13: ffff89988d3e6800 R14: ffff899889642b10 R15: 0000000000000000 [88763.103407] FS: 00007f7fce0e5f40(0000) GS:ffff899befd00000(0000) knlGS:0000000000000000 [88763.103408] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [88763.103409] CR2: 0000557e474645c0 CR3: 00000001e15e2000 CR4: 0000000000750ee0 [88763.103411] PKRU: 55555554 [88763.103411] Call Trace: [88763.103413] <TASK> [88763.103414] ? mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib] [88763.103421] ? __warn+0x78/0x110 [88763.103425] ? mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib] [88763.103432] ? report_bug+0x159/0x180 [88763.103436] ? handle_bug+0x58/0x90 [88763.103438] ? exc_invalid_op+0x13/0x60 [88763.103439] ? asm_exc_invalid_op+0x16/0x20 [88763.103443] ? mlx5r_umr_post_send_wait+0x32e/0x4f0 [mlx5_ib] [88763.103449] ? mlx5r_umr_post_send_wait+0x316/0x4f0 [mlx5_ib] [88763.103456] ? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib] [88763.103462] mlx5r_umr_update_mr_pas+0x255/0x3c0 [mlx5_ib] [88763.103470] ? __pfx_ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x10/0x10 [ib_uverbs] [88763.103477] create_real_mr+0x17c/0x1a0 [mlx5_ib] [88763.103485] ? rdma_lookup_get_uobject+0x37/0x180 [ib_uverbs] [88763.103490] ? __pfx_ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x10/0x10 [ib_uverbs] [88763.103495] ib_uverbs_reg_mr+0x16e/0x2a0 [ib_uverbs] [88763.103501] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xbf/0x130 [ib_uverbs] [88763.103507] ib_uverbs_cmd_verbs+0xbfa/0xca0 [ib_uverbs] [88763.103512] ? srso_alias_return_thunk+0x5/0xfbef5 [88763.103514] ? __pfx_ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x10/0x10 [ib_uverbs] [88763.103518] ? srso_alias_return_thunk+0x5/0xfbef5 [88763.103522] ? srso_alias_return_thunk+0x5/0xfbef5 [88763.103523] ? blk_finish_plug+0x20/0x40 [88763.103525] ? srso_alias_return_thunk+0x5/0xfbef5 [88763.103527] ? do_madvise.part.0+0x561/0xc60 [88763.103530] ib_uverbs_ioctl+0x9f/0x110 [ib_uverbs] [88763.103534] __x64_sys_ioctl+0x8b/0xc0 [88763.103537] do_syscall_64+0x34/0x80 [88763.103539] entry_SYSCALL_64_after_hwframe+0x78/0xe2 [88763.103541] RIP: 0033:0x7f7fce334d1b [88763.103542] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1c 48 8b 44 24 18 64 48 2b 04 25 28 00 00 [88763.103544] RSP: 002b:00007f7f84a7e2d0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [88763.103545] RAX: ffffffffffffffda RBX: 00007f7f84a7e350 RCX: 00007f7fce334d1b [88763.103546] RDX: 00007f7f84a7e370 RSI: 00000000c0181b01 RDI: 0000000000000013 [88763.103547] RBP: 0000000000000028 R08: 000055c45d130ae0 R09: 00007f7f84a7e388 [88763.103547] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f7f84a7e4b4 [88763.103548] R13: 000000000000000c R14: 00007f7f84a7e4c8 R15: 000055c45d130c20 [88763.103550] </TASK> [88763.103550] ---[ end trace 0000000000000000 ]--- [88763.103551] infiniband mlx5_0: mlx5r_umr_post_send_wait:334:(pid 65910): reg umr failed (6). Trying to recover and resubmit the flushed WQEs [88763.270693] mlx5_core 0000:01:00.1 defunct_eth2: renamed from eth2 [88763.407033] mlx5_core 0000:01:00.1 defunct_eth2: Link up [88763.917606] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x1d, ci 0x0, qn 0x10cd, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88763.917933] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.917936] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.917939] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88763.917941] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 cd 00 00 ab d2 [88763.917944] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88763.917947] 00000000: 00 00 00 0a 00 10 cd 04 00 00 00 08 00 00 00 00 [88763.917949] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33 [88763.917956] 00000020: 00 00 00 16 50 6b 4b 29 7f 7c 86 dd 60 00 00 00 [88763.917958] 00000030: 00 00 00 84 00 00 22 00 00 00 00 00 6f 4f f0 14 [88763.918276] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10cd [88764.001391] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x18, ci 0x2, qn 0x10c8, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88764.001701] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88764.001703] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88764.001704] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88764.001704] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 c8 00 00 ac d2 [88764.001705] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88764.001706] 00000000: 00 00 00 0a 00 10 c8 04 00 00 00 08 00 00 00 00 [88764.001707] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33 [88764.001708] 00000020: ff 00 00 12 50 6b 4b 29 7f 7c 86 dd 60 00 00 00 [88764.001708] 00000030: 00 00 00 44 00 00 22 00 00 00 00 00 6f 53 ac 14 [88764.001743] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10c8 [88764.008936] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x1d, ci 0x1, qn 0x10cd, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88764.009245] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88764.009246] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88764.009247] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88764.009248] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 cd 00 00 aa d2 [88764.009249] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88764.009250] 00000000: 00 00 00 0a 00 10 cd 04 00 00 00 08 00 00 00 00 [88764.009251] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 33 33 [88764.009251] 00000020: ff 29 7f 7c 50 6b 4b 29 7f 7c 86 dd 60 00 00 00 [88764.009252] 00000030: 00 00 00 44 00 00 22 00 00 00 00 00 6f 4f f5 94 [88764.009278] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10cd [88768.094171] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x27, ci 0x0, qn 0x10d7, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88768.094492] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88768.094493] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88768.094494] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88768.094495] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 d7 00 00 b1 d2 [88768.094496] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88768.094497] 00000000: 00 00 00 0a 00 10 d7 04 00 00 00 08 00 00 00 00 [88768.094498] 00000010: 00 00 00 00 c0 00 00 00 00 00 00 00 00 12 33 33 [88768.094499] 00000020: 00 01 00 02 50 6b 4b 29 7f 7c 86 dd 60 0b 40 53 [88768.094499] 00000030: 00 00 00 8a 00 00 22 00 00 00 00 00 6f 4f e9 1c [88768.094509] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10d7 [88768.158701] page_pool_release_retry() stalled pool shutdown 1 inflight 88750 sec [88782.302244] mlx5_core 0000:01:00.0 defunct_eth1: Error cqe on cqn 0x22, ci 0x0, qn 0x10d2, opcode 0xd, syndrome 0x4, vendor syndrome 0x51 [88782.302561] 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88782.302563] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88782.302564] 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [88782.302564] 00000030: 00 00 00 00 04 00 51 04 0a 00 10 d2 00 00 b4 d2 [88782.302565] WQE DUMP: WQ size 8192 WQ cur size 0, WQE index 0x0, len: 64 [88782.302567] 00000000: 00 00 00 0a 00 10 d2 04 00 00 00 08 00 00 00 00 [88782.302567] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 12 00 00 [88782.302568] 00000020: f7 f7 02 56 50 6b 4b 29 7f 7c 86 dd 60 00 00 00 [88782.302569] 00000030: 00 00 00 44 00 00 22 00 00 00 00 00 6b ff f0 14 [88782.302593] mlx5_core 0000:01:00.0 defunct_eth1: ERR CQE on SQ: 0x10d2 [88819.483169] page_pool_release_retry() stalled pool shutdown 206 inflight 60 sec [88820.616111] mlx5_core 0000:01:00.0 defunct_eth1: Failed to get min RX wqes on Channel[1] RQN[0xc0004f] wq cur_sz(0) min_rx_wqes(128) [88820.616117] mlx5_core 0000:01:00.0 defunct_eth1: RX timeout on channel: 1, ICOSQ: 0x10e7, RQ: 0xc0004f, CQ: 0x33 [88820.618402] mlx5_core 0000:01:00.0 defunct_eth1: EQ 0x8: Cons = 0x7a31d, irqn = 0x3a [88820.640398] mlx5_core 0000:01:00.0 defunct_eth1: Failed to get min RX wqes on Channel[2] RQN[0xc00050] wq cur_sz(0) min_rx_wqes(128) [88820.640406] mlx5_core 0000:01:00.0 defunct_eth1: RX timeout on channel: 2, ICOSQ: 0x10ec, RQ: 0xc00050, CQ: 0x38 [88820.662083] mlx5_core 0000:01:00.0 defunct_eth1: Failed to get min RX wqes on Channel[3] RQN[0xc00051] wq cur_sz(0) min_rx_wqes(128) [88820.662090] mlx5_core 0000:01:00.0 defunct_eth1: RX timeout on channel: 3, ICOSQ: 0x10f1, RQ: 0xc00051, CQ: 0x3d [88820.668110] mlx5_core 0000:01:00.0: free_4k:279:(pid 65878): page not found [89121.542290] page_pool_release_retry() stalled pool shutdown 206 inflight 362 sec [89128.615872] mlx5_core 0000:01:00.0: E-Switch: Unload vfs: mode(LEGACY), nvfs(0), necvfs(0), active vports(0) [89128.618823] mlx5_core 0000:01:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0) [89130.629664] page_pool_release_retry() stalled pool shutdown 1 inflight 89113 sec [89180.889193] mlx5_core 0000:01:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0) [89181.104693] mlx5_core 0000:01:00.0: E-Switch: cleanup [89181.954119] page_pool_release_retry() stalled pool shutdown 206 inflight 422 sec [89182.231397] mlx5_core 0000:01:00.0: mlx5_cmd_out_err:806:(pid 739): MANAGE_PAGES(0x108) op_mod(0x2) failed, status bad system state(0x4), syndrome (0xe8912), err(-5) [89182.231787] mlx5_core 0000:01:00.0: reclaim_pages:558:(pid 739): failed reclaiming pages: err -5 [89182.232033] mlx5_core 0000:01:00.0: mlx5_reclaim_root_pages:698:(pid 739): reclaim_pages err (-5) func_id=0x0 ec_func=0x0 [89182.232036] ------------[ cut here ]------------ [89182.232036] FW pages counter is 28674 after reclaiming all pages [89182.232066] WARNING: CPU: 0 PID: 739 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:730 mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core] [89182.232108] Modules linked in: vhost_net vhost vhost_iotlb tap tun xfrm_user xfrm_algo uio_pci_generic uio nf_conntrack_bridge nft_flow_offload nf_flow_table_inet nf_flow_table nft_nat nft_masq af_packet nft_ct nft_chain_nat nf_nat nf_tables nfnetlink_cthelper nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink binfmt_misc intel_rapl_common crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 sha256_ssse3 sha1_ssse3 aesni_intel virtio_console virtio_balloon crypto_simd cryptd iTCO_wdt pcspkr iTCO_vendor_support button evdev tcp_bbr sch_fq_codel mpls_iptunnel mpls_router ip_tunnel br_netfilter bridge stp llc vfio_pci vfio_pci_core irqbypass vfio_iommu_type1 vfio fuse efi_pstore configfs ip_tables x_tables autofs4 usb_storage ohci_hcd sd_mod squashfs lz4_decompress loop overlay ext4 crc16 mbcache jbd2 nls_cp437 vfat fat efivarfs nls_ascii mlx5_ib ib_uverbs ib_core hid_generic usbhid hid virtio_net net_failover failover ahci virtio_blk libahci libata virtio_pci virtio_pci_legacy_dev scsi_mod [89182.232158] virtio_pci_modern_dev crc32c_intel scsi_common mlx5_core virtio ehci_pci virtio_ring i2c_i801 mlxfw pci_hyperv_intf uhci_hcd i2c_smbus lpc_ich ehci_hcd [89182.232167] CPU: 0 PID: 739 Comm: python3 Tainted: G W 6.6.79-vyos #1 [89182.232169] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [89182.232170] RIP: 0010:mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core] [89182.232193] Code: 05 00 00 e8 c8 8e a2 d2 e9 21 ff ff ff 0f 0b 41 8b b5 b0 07 00 00 85 f6 0f 84 6c ff ff ff 48 c7 c7 c0 b0 77 c0 e8 46 a2 46 d2 <0f> 0b 41 8b b5 b8 07 00 00 85 f6 0f 84 5e ff ff ff 48 c7 c7 f8 b0 [89182.232195] RSP: 0018:ffff9800c184fca0 EFLAGS: 00010286 [89182.232196] RAX: 0000000000000000 RBX: ffff8998811f4928 RCX: 0000000000000027 [89182.232197] RDX: ffff899befc1d4c8 RSI: 0000000000000001 RDI: ffff899befc1d4c0 [89182.232198] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff9800c184fb38 [89182.232199] R10: 0000000000000003 R11: ffffffff93ebab08 R12: 0000000000000000 [89182.232199] R13: ffff8998811f41a0 R14: 0000000000000000 R15: 0000000000001388 [89182.232202] FS: 00007f3b10779040(0000) GS:ffff899befc00000(0000) knlGS:0000000000000000 [89182.232203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [89182.232204] CR2: 0000558fdcb0ad70 CR3: 000000012b078000 CR4: 0000000000750ef0 [89182.232206] PKRU: 55555554 [89182.232206] Call Trace: [89182.232209] <TASK> [89182.232210] ? mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core] [89182.232232] ? __warn+0x78/0x110 [89182.232238] ? mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core] [89182.232262] ? report_bug+0x159/0x180 [89182.232265] ? srso_alias_return_thunk+0x5/0xfbef5 [89182.232267] ? prb_read_valid+0x12/0x20 [89182.232271] ? handle_bug+0x58/0x90 [89182.232273] ? exc_invalid_op+0x13/0x60 [89182.232274] ? asm_exc_invalid_op+0x16/0x20 [89182.232277] ? mlx5_reclaim_startup_pages+0x22a/0x290 [mlx5_core] [89182.232300] mlx5_function_disable+0x20/0xc0 [mlx5_core] [89182.232321] ? srso_alias_return_thunk+0x5/0xfbef5 [89182.232322] mlx5_uninit_one+0x7e/0xf0 [mlx5_core] [89182.232344] remove_one+0x49/0xc0 [mlx5_core] [89182.232365] pci_device_remove+0x36/0xa0 [89182.232369] device_release_driver_internal+0x196/0x200 [89182.232373] pci_stop_bus_device+0x67/0x90 [89182.232377] pci_stop_and_remove_bus_device_locked+0x11/0x20 [89182.232378] remove_store+0x74/0x90 [89182.232381] kernfs_fop_write_iter+0x103/0x1e0 [89182.232384] vfs_write+0x1da/0x3a0 [89182.232388] ksys_write+0x5e/0xe0 [89182.232390] do_syscall_64+0x34/0x80 [89182.232392] entry_SYSCALL_64_after_hwframe+0x78/0xe2 [89182.232394] RIP: 0033:0x7f3b1087233f [89182.232396] Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 f9 d4 f8 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 4c d5 f8 ff 48 [89182.232397] RSP: 002b:00007fff1eaae410 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 [89182.232398] RAX: ffffffffffffffda RBX: 0000000000a860d8 RCX: 00007f3b1087233f [89182.232399] RDX: 0000000000000001 RSI: 0000000026c1b410 RDI: 0000000000000014 [89182.232400] RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 [89182.232401] R10: 0000000000000001 R11: 0000000000000293 R12: 00007f3b10778fc0 [89182.232401] R13: 0000000000000014 R14: 0000000000a440c0 R15: 0000000000000000 [89182.232403] </TASK> [89182.232404] ---[ end trace 0000000000000000 ]---