Skip to content

Commit dcdd0b1

Browse files
authored
Move underlay NICs back into H/W Classification (#504)
Today, we get our TX and RX pathways on underlay devices for XDE by creating a secondary MAC client on each device. As part of this process we must attach a unicast MAC address (or specify `MAC_OPEN_FLAGS_NO_UNICAST_ADDR`) during creation to spin up a valid datapath, otherwise we can receive packets on our promiscuous mode handler but any sent packets are immediately dropped by MAC. However, datapath setup then fails to supply a dedicated ring/group for the new client, and the device is reduced to pure software classification. This hard-disables any ring polling threads, and so all packet processing occurs in the interrupt context. This limits throughput and increases OPTE's blast radius on control plane/crucible traffic between sleds. This PR places a hold onto the underlay NICs via `dls`, and makes use of `dls_open`/`dls_close` to acquire a valid transmit pathway onto the original (primary) MAC client, to which we can also attach a promiscuous callback. As desired, we are back in hardware classification. This work is orthogonal to #62 (and related efforts) which will get us out of promiscuous mode -- both are necessary parts of making optimal use of the illumos networking stack. Closes #489 .
1 parent b08c264 commit dcdd0b1

File tree

10 files changed

+532
-173
lines changed

10 files changed

+532
-173
lines changed

Cargo.lock

Lines changed: 11 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ darling = "0.20"
5151
dyn-clone = "1.0"
5252
heapless = "0.8"
5353
ipnetwork = { version = "0.20", default-features = false }
54-
itertools = { version = "0.12", default-features = false }
54+
itertools = { version = "0.13", default-features = false }
5555
libc = "0.2"
5656
libnet = { git = "https://github.com/oxidecomputer/netadm-sys" }
5757
nix = { version = "0.29", features = ["signal", "user"] }

xde-tests/tests/loopback.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ fn test_xde_loopback() -> Result<()> {
1111
let topol = xde_tests::two_node_topology()?;
1212

1313
// Now we should be able to ping b from a on the overlay.
14-
&topol.nodes[0]
14+
_ = &topol.nodes[0]
1515
.zone
1616
.zone
1717
.zexec(&format!("ping {}", &topol.nodes[1].port.ip()))?;

xde/src/dls.rs

Lines changed: 0 additions & 27 deletions
This file was deleted.

xde/src/dls/mod.rs

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
// Copyright 2024 Oxide Computer Company
6+
7+
//! Safe abstractions around DLS public and private functions.
8+
9+
pub mod sys;
10+
11+
use crate::mac::mac_client_handle;
12+
use crate::mac::MacClient;
13+
use crate::mac::MacPerimeterHandle;
14+
use crate::mac::MacTxFlags;
15+
use crate::mac::MAC_DROP_ON_NO_DESC;
16+
use core::ffi::CStr;
17+
use core::fmt::Display;
18+
use core::ptr;
19+
use core::ptr::NonNull;
20+
use illumos_sys_hdrs::c_int;
21+
use illumos_sys_hdrs::datalink_id_t;
22+
use illumos_sys_hdrs::uintptr_t;
23+
use illumos_sys_hdrs::ENOENT;
24+
use opte::engine::packet::Packet;
25+
use opte::engine::packet::PacketState;
26+
pub use sys::*;
27+
28+
/// An integer ID used by DLS to refer to a given link.
29+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
30+
pub struct LinkId(datalink_id_t);
31+
32+
impl LinkId {
33+
/// Request the link ID for a device using its name.
34+
pub fn from_name(name: impl AsRef<CStr>) -> Result<Self, LinkError> {
35+
let mut link_id = 0;
36+
37+
unsafe {
38+
match dls_mgmt_get_linkid(name.as_ref().as_ptr(), &mut link_id) {
39+
0 => Ok(LinkId(link_id)),
40+
ENOENT => Err(LinkError::NotFound),
41+
err => Err(LinkError::Other(err)),
42+
}
43+
}
44+
}
45+
}
46+
47+
impl From<LinkId> for datalink_id_t {
48+
fn from(val: LinkId) -> Self {
49+
val.0
50+
}
51+
}
52+
53+
/// Errors encountered while querying DLS for a `LinkId`.
54+
pub enum LinkError {
55+
NotFound,
56+
Other(i32),
57+
}
58+
59+
impl Display for LinkError {
60+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
61+
match self {
62+
LinkError::NotFound => write!(f, "link not found"),
63+
LinkError::Other(e) => write!(f, "unknown error ({e})"),
64+
}
65+
}
66+
}
67+
68+
/// A hold on an existing link managed by DLS.
69+
#[derive(Debug)]
70+
struct DlsLink {
71+
inner: Option<DlsLinkInner>,
72+
link: LinkId,
73+
}
74+
75+
#[derive(Debug)]
76+
struct DlsLinkInner {
77+
dlp: *mut dls_link,
78+
dlh: dls_dl_handle,
79+
}
80+
81+
impl DlsLink {
82+
/// Place a hold on an existing link.
83+
fn hold(mph: &MacPerimeterHandle) -> Result<Self, c_int> {
84+
let mut dlp = ptr::null_mut();
85+
let mut dlh = ptr::null_mut();
86+
let link = mph.link_id();
87+
88+
let res = unsafe { dls_devnet_hold(link.into(), &mut dlh) };
89+
if res != 0 {
90+
return Err(res);
91+
}
92+
93+
let res = unsafe { dls_link_hold(dls_devnet_mac(dlh), &mut dlp) };
94+
if res == 0 {
95+
Ok(Self { inner: Some(DlsLinkInner { dlp, dlh }), link })
96+
} else {
97+
unsafe { dls_devnet_rele(dlh) };
98+
Err(res)
99+
}
100+
}
101+
102+
/// Release a hold on a given link.
103+
///
104+
/// This operation requires that you acquire the MAC perimeter
105+
/// for the target device.
106+
fn release(mut self, mph: &MacPerimeterHandle) {
107+
if let Some(inner) = self.inner.take() {
108+
if mph.link_id() != self.link {
109+
panic!("Tried to free link hold with the wrong MAC perimeter: saw {:?}, wanted {:?}",
110+
mph.link_id(), self.link);
111+
}
112+
unsafe {
113+
dls_link_rele(inner.dlp);
114+
dls_devnet_rele(inner.dlh);
115+
}
116+
}
117+
}
118+
119+
/// Convert a hold into a `DlsStream` for packet Rx/Tx.
120+
fn open_stream(
121+
mut self,
122+
mph: &MacPerimeterHandle,
123+
) -> Result<DlsStream, c_int> {
124+
let Some(inner) = self.inner.as_ref() else {
125+
panic!("attempted to open a DlsStream on freed link")
126+
};
127+
128+
if mph.link_id() != self.link {
129+
panic!("Tried to open stream with the wrong MAC perimeter: saw {:?}, wanted {:?}",
130+
mph.link_id(), self.link);
131+
}
132+
133+
// NOTE: this is a stlouis-only way to create a dld_str_t. It
134+
// is virtually identical to the clean-slate state from the kmemcache,
135+
// with no rq/wq set.
136+
let dld_str = NonNull::new(unsafe { dld_str_create_detached() });
137+
let Some(dld_str) = dld_str else {
138+
self.release(mph);
139+
return Err(-1);
140+
};
141+
142+
let res = unsafe { dls_open(inner.dlp, inner.dlh, dld_str.as_ptr()) };
143+
if res == 0 {
144+
// DLP is held/consumed by dls_open.
145+
_ = self.inner.take();
146+
Ok(DlsStream {
147+
inner: Some(DlsStreamInner { dld_str }),
148+
link: mph.link_id(),
149+
}
150+
.into())
151+
} else {
152+
self.release(mph);
153+
Err(res)
154+
}
155+
}
156+
}
157+
158+
impl Drop for DlsLink {
159+
fn drop(&mut self) {
160+
if self.inner.take().is_some() {
161+
opte::engine::err!(
162+
"dropped hold on link {:?} without releasing!!",
163+
self.link
164+
);
165+
}
166+
}
167+
}
168+
169+
/// A DLS message stream on a target link, allowing packet
170+
/// Rx and Tx.
171+
#[derive(Debug)]
172+
pub struct DlsStream {
173+
inner: Option<DlsStreamInner>,
174+
link: LinkId,
175+
}
176+
177+
#[derive(Debug)]
178+
struct DlsStreamInner {
179+
dld_str: NonNull<dld_str_s>,
180+
}
181+
182+
impl DlsStream {
183+
pub fn open(link_id: LinkId) -> Result<Self, c_int> {
184+
let perim = MacPerimeterHandle::from_linkid(link_id)?;
185+
let link_handle = DlsLink::hold(&perim)?;
186+
link_handle.open_stream(&perim)
187+
}
188+
189+
/// Returns the ID of the link this stream belongs to.
190+
pub fn link_id(&self) -> LinkId {
191+
self.link
192+
}
193+
194+
/// Send the [`Packet`] on this client, dropping if there is no
195+
/// descriptor available
196+
///
197+
/// This function always consumes the [`Packet`].
198+
///
199+
/// XXX The underlying mac_tx() function accepts a packet chain,
200+
/// but for now we pass only a single packet at a time.
201+
pub fn tx_drop_on_no_desc(
202+
&self,
203+
pkt: Packet<impl PacketState>,
204+
hint: uintptr_t,
205+
flags: MacTxFlags,
206+
) {
207+
let Some(inner) = self.inner.as_ref() else {
208+
// XXX: probably handle or signal an error here.
209+
return;
210+
};
211+
// We must unwrap the raw `mblk_t` out of the `pkt` here,
212+
// otherwise the mblk_t would be dropped at the end of this
213+
// function along with `pkt`.
214+
let mut raw_flags = flags.bits();
215+
raw_flags |= MAC_DROP_ON_NO_DESC;
216+
unsafe {
217+
str_mdata_fastpath_put(
218+
inner.dld_str.as_ptr(),
219+
pkt.unwrap_mblk(),
220+
hint,
221+
raw_flags,
222+
)
223+
};
224+
}
225+
}
226+
227+
impl MacClient for DlsStream {
228+
fn mac_client_handle(&self) -> Result<*mut mac_client_handle, c_int> {
229+
let Some(inner) = self.inner.as_ref() else {
230+
return Err(-1);
231+
};
232+
233+
Ok(unsafe { dld_str_mac_client_handle(inner.dld_str.as_ptr()) })
234+
}
235+
}
236+
237+
impl Drop for DlsStream {
238+
fn drop(&mut self) {
239+
if let Some(inner) = self.inner.take() {
240+
match MacPerimeterHandle::from_linkid(self.link) {
241+
Ok(_perim) => unsafe {
242+
// NOTE: this is a stlouis-only way to free this
243+
// dld_str_t. It will handle the remainder of the
244+
// cleanup for which dld_str_detach would have been
245+
// responsible.
246+
dls_close(inner.dld_str.as_ptr());
247+
dld_str_destroy_detached(inner.dld_str.as_ptr());
248+
},
249+
Err(e) => opte::engine::err!(
250+
"couldn't acquire MAC perimeter (err {}): \
251+
dropped stream on link {:?} without releasing",
252+
e,
253+
self.link,
254+
),
255+
}
256+
}
257+
}
258+
}

0 commit comments

Comments
 (0)