From 2672dc8aa5517e310e1773d5f5245a57f01728d8 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Wed, 19 Nov 2025 12:50:24 -0800 Subject: [PATCH] Fix a bug around IFLA_NET_NS_FD IFLA_NET_NS_FD is supplied in an RTM_SETLINK message from userland to move a link to a new netns. The same message might mutate other attributes of the link. Thus far we were applying those changes to a different link in the source netns. PiperOrigin-RevId: 834406670 --- pkg/sentry/socket/netstack/BUILD | 1 + pkg/sentry/socket/netstack/stack.go | 37 +++-- test/syscalls/linux/socket_netlink_route.cc | 135 ++++++++++++++---- .../linux/socket_netlink_route_util.cc | 3 + .../linux/socket_netlink_route_util.h | 1 + 5 files changed, 137 insertions(+), 40 deletions(-) diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 0bb273a64d..b588844bb7 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -9,6 +9,7 @@ package( declare_mutex( name = "netstack_link_mutex", out = "netstack_link_mutex.go", + nested_lock_names = ["destStack"], package = "netstack", prefix = "netstackLink", ) diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 7e8d4cdd09..2f689f7fe1 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -229,7 +229,9 @@ func (s *Stack) SetInterface(ctx context.Context, msg *nlmsg.Message) *syserr.Er // precondition: s.linkLock is held. func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map[uint16]nlmsg.BytesView) *syserr.Error { - oldNicInfo, ok := s.Stack.SingleNICInfo(id) + home := s + changed := false + oldNicInfo, ok := home.Stack.SingleNICInfo(id) if !ok { return syserr.ErrUnknownNICID } @@ -250,23 +252,34 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map } defer ns.DecRef(ctx) peer := ns.Stack().(*Stack) - if peer.Stack != s.Stack { + if peer.Stack != home.Stack { var err tcpip.Error oldID := id - id, err = s.Stack.SetNICStack(id, peer.Stack) + peer.linkMu.NestedLock(netstackLinkLockDeststack) + defer peer.linkMu.NestedUnlock(netstackLinkLockDeststack) + + id, err = home.Stack.SetNICStack(id, peer.Stack) if err != nil { return syserr.TranslateNetstackError(err) } - s.sendDeleteEvent(ctx, oldID, oldNicInfo) // inform about exit from old ns - peer.sendChangeEvent(ctx, id) // inform about entry into new ns + home.sendDeleteEvent(ctx, oldID, oldNicInfo) // inform about exit from old ns + changed = true // inform about entry into new ns + + oldNicInfo, ok = peer.Stack.SingleNICInfo(id) + if !ok { + // Because we hold peer.linkMu, this should never happen. + ctx.Warningf("Newly rehomed NIC %d not found in new stack %p", id, home.Stack) + return syserr.ErrUnknownNICID + } + home = peer + // TODO: Once we support IFLA_LINK_NETNSID, we need to call sendChangeEvent on // the peer interface if this interface is part of a veth pair. } } - changed := false for t, v := range linkAttrs { switch t { case linux.IFLA_MASTER: @@ -274,11 +287,11 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map if !ok { return syserr.ErrInvalidArgument } - if mid, ok := s.Stack.GetNICCoordinatorID(id); ok && mid == tcpip.NICID(master) { + if mid, ok := home.Stack.GetNICCoordinatorID(id); ok && mid == tcpip.NICID(master) { continue } if master != 0 { - if err := s.Stack.SetNICCoordinator(id, tcpip.NICID(master)); err != nil { + if err := home.Stack.SetNICCoordinator(id, tcpip.NICID(master)); err != nil { return syserr.TranslateNetstackError(err) } changed = true @@ -291,7 +304,7 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map if oldNicInfo.LinkAddress == addr { continue } - if err := s.Stack.SetNICAddress(id, addr); err != nil { + if err := home.Stack.SetNICAddress(id, addr); err != nil { return syserr.TranslateNetstackError(err) } changed = true @@ -299,7 +312,7 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map if oldNicInfo.Name == v.String() { continue } - if err := s.Stack.SetNICName(id, v.String()); err != nil { + if err := home.Stack.SetNICName(id, v.String()); err != nil { return syserr.TranslateNetstackError(err) } changed = true @@ -311,7 +324,7 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map if oldNicInfo.MTU == mtu { continue } - if err := s.Stack.SetNICMTU(id, mtu); err != nil { + if err := home.Stack.SetNICMTU(id, mtu); err != nil { return syserr.TranslateNetstackError(err) } changed = true @@ -321,7 +334,7 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map } if changed { - s.sendChangeEvent(ctx, id) + home.sendChangeEvent(ctx, id) } return nil } diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc index 977baa2687..7d61729c7a 100644 --- a/test/syscalls/linux/socket_netlink_route.cc +++ b/test/syscalls/linux/socket_netlink_route.cc @@ -1796,6 +1796,16 @@ TEST(NetlinkRouteTest, VethAdd) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); SKIP_IF(IsRunningWithHostinet()); + // Running the test in an ephemeral netns allows it to not interfere with + // other tests that also want to create veth pairs with the same names. + const FileDescriptor curr_nsfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/thread-self/ns/net", O_RDONLY)); + Cleanup restore_netns = Cleanup([&] { + ASSERT_THAT(setns(curr_nsfd.get(), CLONE_NEWNET), + SyscallSucceedsWithValue(0)); + }); + ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0)); + FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); VethRequest req = GetVethRequest(kSeq, "veth1", "veth2"); @@ -1823,6 +1833,86 @@ TEST(NetlinkRouteTest, LookupAllAddrOrder) { } } +struct NetNSRequest { + struct nlmsghdr hdr; + struct ifinfomsg ifm; + char buf[1024]; +}; + +struct NetNSRequest GetNetNSRequest(uint32_t seq, int if_index, int ns_fd) { + struct NetNSRequest req = {}; + + req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.hdr.nlmsg_type = RTM_NEWLINK; + req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.hdr.nlmsg_seq = seq; + req.ifm.ifi_family = AF_UNSPEC; + req.ifm.ifi_index = if_index; + addattr(&req.hdr, sizeof(req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd)); + + return req; +} + +// Guard against b/456552490, a bug where gvisor'd try to modify the given +// attributes in the wrong stack when the request also directs it to change the +// netns. +TEST(NetlinkRouteTest, ChangeNetnsAndOtherAttrsTogether) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); + SKIP_IF(IsRunningWithHostinet()); + // TODO(gvisor.dev/issue/4595): enable cooperative save tests. + const DisableSave ds; + + // Running the test in an ephemeral netns allows it to not interfere with + // other tests that also want to create veth pairs with the same names. + const FileDescriptor curr_nsfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/thread-self/ns/net", O_RDONLY)); + Cleanup restore_netns = Cleanup([&] { + ASSERT_THAT(setns(curr_nsfd.get(), CLONE_NEWNET), + SyscallSucceedsWithValue(0)); + }); + ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0)); + + FileDescriptor nlsk = + ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); + VethRequest req = GetVethRequest(kSeq, "veth1", "veth2"); + EXPECT_NO_ERRNO( + NetlinkRequestAckOrError(nlsk, kSeq, &req, req.hdr.nlmsg_len)); + int inner_veth_idx = if_nametoindex("veth2"); + ASSERT_NE(inner_veth_idx, 0); + + // Enter a new network namespace and move veth2 into it. + ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0)); + const FileDescriptor inner_nsfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/thread-self/ns/net", O_RDONLY)); + NetNSRequest set_netns_req = + GetNetNSRequest(kSeq, inner_veth_idx, inner_nsfd.get()); + // Request a name change along with the netns change. + std::string new_name = "veth2_new"; + addattr(&set_netns_req.hdr, sizeof(set_netns_req), IFLA_IFNAME, + new_name.c_str(), new_name.size()); + EXPECT_NO_ERRNO(NetlinkRequestAckOrError(nlsk, kSeq, &set_netns_req, + set_netns_req.hdr.nlmsg_len)); + + // Verify that an interface with the new name exists in the inner netns. + bool found_name_inner = false; + std::vector links = ASSERT_NO_ERRNO_AND_VALUE(DumpLinks()); + for (const Link& link : links) { + if (link.name == new_name) { + found_name_inner = true; + break; + } + } + EXPECT_TRUE(found_name_inner) + << "Did not find interface with name " << new_name; + + // And verify also that the outer netns does not have the new name. + links = ASSERT_NO_ERRNO_AND_VALUE(DumpLinks(nlsk)); + for (const Link& link : links) { + ASSERT_NE(link.name, new_name) + << "Found interface with name " << new_name << " in outer netns"; + } +} + struct NameRequest { struct nlmsghdr hdr; struct ifinfomsg ifm; @@ -1965,20 +2055,6 @@ TEST(NetlinkRouteTest, LinkMulticastGroupBasic) { } } -struct VethRequest GetSetNetNSRequest(uint32_t seq, int if_index, int ns_fd) { - struct VethRequest req = {}; - - req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.hdr.nlmsg_type = RTM_NEWLINK; - req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.hdr.nlmsg_seq = seq; - req.ifm.ifi_family = AF_UNSPEC; - req.ifm.ifi_index = if_index; - addattr(&req.hdr, sizeof(req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd)); - - return req; -} - // To verify the namespaced nature of the netlink multicast groups. TEST(NetlinkRouteTest, LinkMulticastGroupNamespaced) { SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN))); @@ -1986,6 +2062,16 @@ TEST(NetlinkRouteTest, LinkMulticastGroupNamespaced) { // TODO(gvisor.dev/issue/4595): enable cooperative save tests. const DisableSave ds; + // Running the test in an ephemeral netns allows it to not interfere with + // other tests that also want to create veth pairs with the same names. + const FileDescriptor curr_nsfd = + ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/thread-self/ns/net", O_RDONLY)); + Cleanup restore_netns = Cleanup([&] { + ASSERT_THAT(setns(curr_nsfd.get(), CLONE_NEWNET), + SyscallSucceedsWithValue(0)); + }); + ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0)); + FileDescriptor control_nlsk = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE)); VethRequest req = GetVethRequest(kSeq, "veth1", "veth2"); @@ -1998,16 +2084,9 @@ TEST(NetlinkRouteTest, LinkMulticastGroupNamespaced) { struct sockaddr_nl mcast_addr = {}; mcast_addr.nl_family = AF_NETLINK; mcast_addr.nl_groups = RTMGRP_LINK; - FileDescriptor root_nlsk = + FileDescriptor outer_nlsk = ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE, &mcast_addr)); - const FileDescriptor root_nsfd = - ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/thread-self/ns/net", O_RDONLY)); - Cleanup restore_netns = Cleanup([&] { - ASSERT_THAT(setns(root_nsfd.get(), CLONE_NEWNET), - SyscallSucceedsWithValue(0)); - }); - // Enter a new network namespace. ASSERT_THAT(unshare(CLONE_NEWNET), SyscallSucceedsWithValue(0)); FileDescriptor inner_nlsk = @@ -2016,24 +2095,24 @@ TEST(NetlinkRouteTest, LinkMulticastGroupNamespaced) { // And move veth2 into it. const FileDescriptor inner_nsfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/thread-self/ns/net", O_RDONLY)); - VethRequest set_netns_req = - GetSetNetNSRequest(kSeq, inner_veth_idx, inner_nsfd.get()); + NetNSRequest set_netns_req = + GetNetNSRequest(kSeq, inner_veth_idx, inner_nsfd.get()); EXPECT_NO_ERRNO(NetlinkRequestAckOrError(control_nlsk, kSeq, &set_netns_req, set_netns_req.hdr.nlmsg_len)); constexpr int kPollTimeoutMs = 1000; bool got_msg = false; - // We expect an RTM_DELINK message for veth2 in the root netns socket. + // We expect an RTM_DELINK message for veth2 in the outer netns socket. // But an RTM_NEWLINK is also expected for veth1 because its peer was moved. // Hence the two attempts. N.B. gVisor does not send the RTM_NEWLINK because // IFLA_LINK_NETNSID is not yet supported. for (int i = 0; i < 2; ++i) { - struct pollfd pfd = {.fd = root_nlsk.get(), .events = POLLIN}; + struct pollfd pfd = {.fd = outer_nlsk.get(), .events = POLLIN}; ASSERT_EQ(RetryEINTR(poll)(&pfd, 1, kPollTimeoutMs), 1) << "root_nlsk: Did not get veth2 DELLINK"; ASSERT_NO_ERRNO(NetlinkResponse( - root_nlsk, + outer_nlsk, [&](const struct nlmsghdr* hdr) { const struct ifinfomsg* msg = reinterpret_cast(NLMSG_DATA(hdr)); @@ -2044,7 +2123,7 @@ TEST(NetlinkRouteTest, LinkMulticastGroupNamespaced) { /*expect_nlmsgerr=*/false)); if (got_msg) break; } - EXPECT_TRUE(got_msg) << "root_nlsk: Did not get veth2 DELLINK"; + EXPECT_TRUE(got_msg) << "outer_nlsk: Did not get veth2 DELLINK"; // We expect an RTM_NEWLINK message for veth2 in the inner netns socket. { diff --git a/test/syscalls/linux/socket_netlink_route_util.cc b/test/syscalls/linux/socket_netlink_route_util.cc index f77ece9ed1..367bd821e4 100644 --- a/test/syscalls/linux/socket_netlink_route_util.cc +++ b/test/syscalls/linux/socket_netlink_route_util.cc @@ -261,7 +261,10 @@ PosixError DumpLinks( PosixErrorOr> DumpLinks() { ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, NetlinkBoundSocket(NETLINK_ROUTE)); + return DumpLinks(fd); +} +PosixErrorOr> DumpLinks(const FileDescriptor& fd) { std::vector links; RETURN_IF_ERRNO(DumpLinks(fd, kSeq, [&](const struct nlmsghdr* hdr) { if (hdr->nlmsg_type != RTM_NEWLINK || diff --git a/test/syscalls/linux/socket_netlink_route_util.h b/test/syscalls/linux/socket_netlink_route_util.h index 2b37eb9e9e..e4167995f6 100644 --- a/test/syscalls/linux/socket_netlink_route_util.h +++ b/test/syscalls/linux/socket_netlink_route_util.h @@ -38,6 +38,7 @@ PosixError DumpLinks(const FileDescriptor& fd, uint32_t seq, const std::function& fn); PosixErrorOr> DumpLinks(); +PosixErrorOr> DumpLinks(const FileDescriptor& fd); // Returns the loopback link on the system. ENOENT if not found. PosixErrorOr LoopbackLink();