From 29c4a882f362d07bf8358b95bf3e8d1d5fb1399c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Cie=C5=9Blak?= Date: Tue, 3 Dec 2024 15:48:58 +0100 Subject: [PATCH] Pass port from VNet to local proxy (#49453) * Prepare app specs in tests for specifying TCP ports * Refactor logging in lib/vnet/app_resolver.go Use libutils.NewPackageLogger, call it log instead of slog which makes it harder to use the imported default slog logger instead of the one from a struct. Move creation of logger within TCPAppResolver.resolveTCPHandlerForCluster * Pass port from VNet to local proxy * Don't create another package logger * Don't pass logger to newTCPAppHandler * Fix typo in comment * Explicitly pass port to dialHost * Convert multi-line definitions of simple appSpecs to single-line * Add TODO comment about validating local port * Empty commit to trigger CI --- lib/vnet/app_resolver.go | 116 +++++++++++++++++++++---------- lib/vnet/vnet.go | 4 +- lib/vnet/vnet_test.go | 145 +++++++++++++++++++++++++++++++-------- 3 files changed, 198 insertions(+), 67 deletions(-) diff --git a/lib/vnet/app_resolver.go b/lib/vnet/app_resolver.go index 01ca4defaa76b..d2fd0b3bc9d9a 100644 --- a/lib/vnet/app_resolver.go +++ b/lib/vnet/app_resolver.go @@ -26,6 +26,7 @@ import ( "log/slog" "net" "strings" + "sync" "github.com/gravitational/trace" "github.com/jonboulle/clockwork" @@ -94,7 +95,7 @@ type DialOptions struct { type TCPAppResolver struct { appProvider AppProvider clusterConfigCache *ClusterConfigCache - slog *slog.Logger + log *slog.Logger clock clockwork.Clock } @@ -109,7 +110,7 @@ type TCPAppResolver struct { func NewTCPAppResolver(appProvider AppProvider, opts ...tcpAppResolverOption) (*TCPAppResolver, error) { r := &TCPAppResolver{ appProvider: appProvider, - slog: slog.With(teleport.ComponentKey, "VNet.AppResolver"), + log: log.With(teleport.ComponentKey, "VNet.AppResolver"), } for _, opt := range opts { opt(r) @@ -159,7 +160,7 @@ func (r *TCPAppResolver) ResolveTCPHandler(ctx context.Context, fqdn string) (*T // the error but don't return it so that DNS resolution will be forwarded upstream instead of // failing, to avoid breaking e.g. web app access (we don't know if this is a web or TCP app yet // because we can't log in). - slog.ErrorContext(ctx, "Failed to get teleport client.", "error", err) + r.log.ErrorContext(ctx, "Failed to get teleport client.", "error", err) continue } @@ -168,8 +169,7 @@ func (r *TCPAppResolver) ResolveTCPHandler(ctx context.Context, fqdn string) (*T leafClusterName = clusterClient.ClusterName() } - slog := r.slog.With("profile", profileName, "fqdn", fqdn, "leaf_cluster", leafClusterName) - return r.resolveTCPHandlerForCluster(ctx, slog, clusterClient, profileName, leafClusterName, fqdn) + return r.resolveTCPHandlerForCluster(ctx, clusterClient, profileName, leafClusterName, fqdn) } // fqdn did not match any profile, forward the request upstream. return nil, ErrNoTCPHandler @@ -180,7 +180,7 @@ var errNoMatch = errors.New("cluster does not match queried FQDN") func (r *TCPAppResolver) clusterClientForAppFQDN(ctx context.Context, profileName, fqdn string) (ClusterClient, error) { rootClient, err := r.appProvider.GetCachedClient(ctx, profileName, "") if err != nil { - r.slog.ErrorContext(ctx, "Failed to get root cluster client, apps in this cluster will not be resolved.", "profile", profileName, "error", err) + r.log.ErrorContext(ctx, "Failed to get root cluster client, apps in this cluster will not be resolved.", "profile", profileName, "error", err) return nil, errNoMatch } @@ -192,7 +192,7 @@ func (r *TCPAppResolver) clusterClientForAppFQDN(ctx context.Context, profileNam leafClusters, err := getLeafClusters(ctx, rootClient) if err != nil { // Good chance we're here because the user is not logged in to the profile. - r.slog.ErrorContext(ctx, "Failed to list leaf clusters, apps in this cluster will not be resolved.", "profile", profileName, "error", err) + r.log.ErrorContext(ctx, "Failed to list leaf clusters, apps in this cluster will not be resolved.", "profile", profileName, "error", err) return nil, errNoMatch } @@ -201,13 +201,13 @@ func (r *TCPAppResolver) clusterClientForAppFQDN(ctx context.Context, profileNam for _, leafClusterName := range allClusters { clusterClient, err := r.appProvider.GetCachedClient(ctx, profileName, leafClusterName) if err != nil { - r.slog.ErrorContext(ctx, "Failed to get cluster client, apps in this cluster will not be resolved.", "profile", profileName, "leaf_cluster", leafClusterName, "error", err) + r.log.ErrorContext(ctx, "Failed to get cluster client, apps in this cluster will not be resolved.", "profile", profileName, "leaf_cluster", leafClusterName, "error", err) continue } clusterConfig, err := r.clusterConfigCache.GetClusterConfig(ctx, clusterClient) if err != nil { - r.slog.ErrorContext(ctx, "Failed to get VnetConfig, apps in the cluster will not be resolved.", "profile", profileName, "leaf_cluster", leafClusterName, "error", err) + r.log.ErrorContext(ctx, "Failed to get VnetConfig, apps in the cluster will not be resolved.", "profile", profileName, "leaf_cluster", leafClusterName, "error", err) continue } for _, zone := range clusterConfig.DNSZones { @@ -242,10 +242,10 @@ func getLeafClusters(ctx context.Context, rootClient ClusterClient) ([]string, e // query. func (r *TCPAppResolver) resolveTCPHandlerForCluster( ctx context.Context, - slog *slog.Logger, clusterClient ClusterClient, profileName, leafClusterName, fqdn string, ) (*TCPHandlerSpec, error) { + log := r.log.With("profile", profileName, "leaf_cluster", leafClusterName, "fqdn", fqdn) // An app public_addr could technically be full-qualified or not, match either way. expr := fmt.Sprintf(`(resource.spec.public_addr == "%s" || resource.spec.public_addr == "%s") && hasPrefix(resource.spec.uri, "tcp://")`, strings.TrimSuffix(fqdn, "."), fqdn) @@ -257,7 +257,7 @@ func (r *TCPAppResolver) resolveTCPHandlerForCluster( if err != nil { // Don't return an unexpected error so we can try to find the app in different clusters or forward the // request upstream. - slog.InfoContext(ctx, "Failed to list application servers.", "error", err) + log.InfoContext(ctx, "Failed to list application servers.", "error", err) return nil, ErrNoTCPHandler } if len(resp.Resources) == 0 { @@ -282,9 +282,15 @@ func (r *TCPAppResolver) resolveTCPHandlerForCluster( } type tcpAppHandler struct { - profileName string - leafClusterName string - lp *alpnproxy.LocalProxy + log *slog.Logger + appProvider AppProvider + clock clockwork.Clock + profileName string + leafClusterName string + app types.Application + portToLocalProxy map[uint16]*alpnproxy.LocalProxy + // mu guards access to portToLocalProxy. + mu sync.Mutex } func (r *TCPAppResolver) newTCPAppHandler( @@ -293,38 +299,73 @@ func (r *TCPAppResolver) newTCPAppHandler( leafClusterName string, app types.Application, ) (*tcpAppHandler, error) { - dialOpts, err := r.appProvider.GetDialOptions(ctx, profileName) + return &tcpAppHandler{ + appProvider: r.appProvider, + clock: r.clock, + profileName: profileName, + leafClusterName: leafClusterName, + app: app, + portToLocalProxy: make(map[uint16]*alpnproxy.LocalProxy), + log: r.log.With(teleport.ComponentKey, "VNet.AppHandler", + "profile", profileName, "leaf_cluster", leafClusterName, "fqdn", app.GetPublicAddr()), + }, nil +} + +// getOrInitializeLocalProxy returns a separate local proxy for each port for multi-port apps. For +// single-port apps, it returns the same local proxy no matter the port. +func (h *tcpAppHandler) getOrInitializeLocalProxy(ctx context.Context, localPort uint16) (*alpnproxy.LocalProxy, error) { + h.mu.Lock() + defer h.mu.Unlock() + + // Connections to single-port apps need to go through a local proxy that has a cert with TargetPort + // set to 0. This ensures that the old behavior is kept for such apps, where the client can dial + // the public address of an app on any port and be routed to the port from the URI. + // + // https://github.com/gravitational/teleport/blob/master/rfd/0182-multi-port-tcp-app-access.md#vnet-with-single-port-apps + if len(h.app.GetTCPPorts()) == 0 { + localPort = 0 + } + // TODO(ravicious): For multi-port apps, check if localPort is valid and surface the error in UI. + // https://github.com/gravitational/teleport/blob/master/rfd/0182-multi-port-tcp-app-access.md#incorrect-port + + lp, ok := h.portToLocalProxy[localPort] + if ok { + return lp, nil + } + + dialOpts, err := h.appProvider.GetDialOptions(ctx, h.profileName) if err != nil { - return nil, trace.Wrap(err, "getting dial options for profile %q", profileName) + return nil, trace.Wrap(err, "getting dial options for profile %q", h.profileName) } - clusterClient, err := r.appProvider.GetCachedClient(ctx, profileName, leafClusterName) + clusterClient, err := h.appProvider.GetCachedClient(ctx, h.profileName, h.leafClusterName) if err != nil { return nil, trace.Wrap(err) } routeToApp := proto.RouteToApp{ - Name: app.GetName(), - PublicAddr: app.GetPublicAddr(), + Name: h.app.GetName(), + PublicAddr: h.app.GetPublicAddr(), // ClusterName must not be set to "" when targeting an app from a root cluster. Otherwise the // connection routed through a local proxy will just get lost somewhere in the cluster (with no // clear error being reported) and hang forever. ClusterName: clusterClient.ClusterName(), - URI: app.GetURI(), + URI: h.app.GetURI(), + TargetPort: uint32(localPort), } appCertIssuer := &appCertIssuer{ - appProvider: r.appProvider, - profileName: profileName, - leafClusterName: leafClusterName, + appProvider: h.appProvider, + profileName: h.profileName, + leafClusterName: h.leafClusterName, routeToApp: routeToApp, } - certChecker := client.NewCertChecker(appCertIssuer, r.clock) + certChecker := client.NewCertChecker(appCertIssuer, h.clock) middleware := &localProxyMiddleware{ certChecker: certChecker, - appProvider: r.appProvider, + appProvider: h.appProvider, routeToApp: routeToApp, - profileName: profileName, - leafClusterName: leafClusterName, + profileName: h.profileName, + leafClusterName: h.leafClusterName, } localProxyConfig := alpnproxy.LocalProxyConfig{ @@ -336,25 +377,28 @@ func (r *TCPAppResolver) newTCPAppHandler( ALPNConnUpgradeRequired: dialOpts.ALPNConnUpgradeRequired, Middleware: middleware, InsecureSkipVerify: dialOpts.InsecureSkipVerify, - Clock: r.clock, + Clock: h.clock, } - lp, err := alpnproxy.NewLocalProxy(localProxyConfig) + h.log.DebugContext(ctx, "Creating local proxy", "target_port", localPort) + newLP, err := alpnproxy.NewLocalProxy(localProxyConfig) if err != nil { return nil, trace.Wrap(err, "creating local proxy") } - return &tcpAppHandler{ - profileName: profileName, - leafClusterName: leafClusterName, - lp: lp, - }, nil + h.portToLocalProxy[localPort] = newLP + + return newLP, nil } // HandleTCPConnector handles an incoming TCP connection from VNet by passing it to the local alpn proxy, // which is set up with middleware to automatically handler certificate renewal and re-logins. -func (h *tcpAppHandler) HandleTCPConnector(ctx context.Context, connector func() (net.Conn, error)) error { - return trace.Wrap(h.lp.HandleTCPConnector(ctx, connector), "handling TCP connector") +func (h *tcpAppHandler) HandleTCPConnector(ctx context.Context, localPort uint16, connector func() (net.Conn, error)) error { + lp, err := h.getOrInitializeLocalProxy(ctx, localPort) + if err != nil { + return trace.Wrap(err) + } + return trace.Wrap(lp.HandleTCPConnector(ctx, connector), "handling TCP connector") } // appCertIssuer implements [client.CertIssuer]. diff --git a/lib/vnet/vnet.go b/lib/vnet/vnet.go index 66f07a92ff6cf..fb5b6710ac220 100644 --- a/lib/vnet/vnet.go +++ b/lib/vnet/vnet.go @@ -117,7 +117,7 @@ type TCPHandlerSpec struct { // [connector] to complete the TCP handshake and get the TCP conn. This is so that clients will see that the // TCP connection was refused, instead of seeing a successful TCP dial that is immediately closed. type TCPHandler interface { - HandleTCPConnector(ctx context.Context, connector func() (net.Conn, error)) error + HandleTCPConnector(ctx context.Context, localPort uint16, connector func() (net.Conn, error)) error } // UDPHandler defines the behavior for handling UDP connections from VNet. @@ -423,7 +423,7 @@ func (ns *NetworkStack) handleTCP(req *tcp.ForwarderRequest) { return conn, nil } - if err := handler.HandleTCPConnector(ctx, connector); err != nil { + if err := handler.HandleTCPConnector(ctx, id.LocalPort, connector); err != nil { if errors.Is(err, context.Canceled) { slog.DebugContext(ctx, "TCP connection handler returned early due to canceled context.") } else { diff --git a/lib/vnet/vnet_test.go b/lib/vnet/vnet_test.go index 34416e1e64cc3..96259bbb51e26 100644 --- a/lib/vnet/vnet_test.go +++ b/lib/vnet/vnet_test.go @@ -18,6 +18,7 @@ package vnet import ( "bytes" + "cmp" "context" "crypto/ed25519" "crypto/rand" @@ -52,6 +53,7 @@ import ( headerv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/header/v1" "github.com/gravitational/teleport/api/gen/proto/go/teleport/vnet/v1" "github.com/gravitational/teleport/api/types" + apiutils "github.com/gravitational/teleport/api/utils" "github.com/gravitational/teleport/lib/auth/authclient" "github.com/gravitational/teleport/lib/utils" ) @@ -210,7 +212,7 @@ func (p *testPack) lookupHost(ctx context.Context, host string) ([]string, error return resolver.LookupHost(ctx, host) } -func (p *testPack) dialHost(ctx context.Context, host string) (net.Conn, error) { +func (p *testPack) dialHost(ctx context.Context, host string, port int) (net.Conn, error) { addrs, err := p.lookupHost(ctx, host) if err != nil { return nil, trace.Wrap(err) @@ -219,7 +221,7 @@ func (p *testPack) dialHost(ctx context.Context, host string) (net.Conn, error) for _, addr := range addrs { netIP := net.ParseIP(addr) ip := tcpip.AddrFromSlice(netIP) - conn, err := p.dialIPPort(ctx, ip, 123) + conn, err := p.dialIPPort(ctx, ip, uint16(port)) if err != nil { allErrs = append(allErrs, trace.Wrap(err, "dialing %s", addr)) continue @@ -235,8 +237,14 @@ func (n noUpstreamNameservers) UpstreamNameservers(ctx context.Context) ([]strin return nil, nil } +type appSpec struct { + // publicAddr is used both as the name of the app and its public address in the final spec. + publicAddr string + tcpPorts []*types.PortRange +} + type testClusterSpec struct { - apps []string + apps []appSpec cidrRange string customDNSZones []string leafClusters map[string]testClusterSpec @@ -247,14 +255,18 @@ type echoAppProvider struct { dialOpts DialOptions reissueAppCert func() tls.Certificate onNewConnectionCallCount atomic.Uint32 + // requestedRouteToApps indexed by public address. + requestedRouteToApps map[string][]proto.RouteToApp + requestedRouteToAppsMu sync.RWMutex } // newEchoAppProvider returns an app provider with the list of named apps in each profile and leaf cluster. func newEchoAppProvider(clusterSpecs map[string]testClusterSpec, dialOpts DialOptions, reissueAppCert func() tls.Certificate) *echoAppProvider { return &echoAppProvider{ - clusters: clusterSpecs, - dialOpts: dialOpts, - reissueAppCert: reissueAppCert, + clusters: clusterSpecs, + dialOpts: dialOpts, + reissueAppCert: reissueAppCert, + requestedRouteToApps: make(map[string][]proto.RouteToApp), } } @@ -292,9 +304,25 @@ func (p *echoAppProvider) GetCachedClient(ctx context.Context, profileName, leaf } func (p *echoAppProvider) ReissueAppCert(ctx context.Context, profileName, leafClusterName string, routeToApp proto.RouteToApp) (tls.Certificate, error) { + p.requestedRouteToAppsMu.Lock() + defer p.requestedRouteToAppsMu.Unlock() + + p.requestedRouteToApps[routeToApp.PublicAddr] = append(p.requestedRouteToApps[routeToApp.PublicAddr], routeToApp) + return p.reissueAppCert(), nil } +func (p *echoAppProvider) AreAllRequestedRouteToAppsForPort(publicAddr string, port int) bool { + p.requestedRouteToAppsMu.RLock() + defer p.requestedRouteToAppsMu.RUnlock() + + routes := p.requestedRouteToApps[publicAddr] + + return apiutils.All(routes, func(route proto.RouteToApp) bool { + return route.TargetPort == uint32(port) + }) +} + func (p *echoAppProvider) GetDialOptions(ctx context.Context, profileName string) (*DialOptions, error) { return &p.dialOpts, nil } @@ -379,25 +407,31 @@ func (c *fakeAuthClient) GetResources(ctx context.Context, req *proto.ListResour resp := &proto.ListResourcesResponse{} for _, app := range c.clusterSpec.apps { // Poor-man's predicate expression filter. - if !strings.Contains(req.PredicateExpression, app) { + if !strings.Contains(req.PredicateExpression, app.publicAddr) { continue } + spec := &types.AppV3{ + Metadata: types.Metadata{ + Name: app.publicAddr, + }, + Spec: types.AppSpecV3{ + PublicAddr: app.publicAddr, + }, + } + + if len(app.tcpPorts) != 0 { + spec.SetTCPPorts(app.tcpPorts) + } + resp.Resources = append(resp.Resources, &proto.PaginatedResource{ Resource: &proto.PaginatedResource_AppServer{ AppServer: &types.AppServerV3{ Kind: types.KindAppServer, Metadata: types.Metadata{ - Name: app, + Name: app.publicAddr, }, Spec: types.AppServerSpecV3{ - App: &types.AppV3{ - Metadata: types.Metadata{ - Name: app, - }, - Spec: types.AppSpecV3{ - PublicAddr: app, - }, - }, + App: spec, }, }, }, @@ -456,12 +490,23 @@ func TestDialFakeApp(t *testing.T) { appProvider := newEchoAppProvider(map[string]testClusterSpec{ "root1.example.com": { - apps: []string{ - "echo1.root1.example.com", - "echo2.root1.example.com", - "echo.myzone.example.com", - "echo.nested.myzone.example.com", - "not.in.a.custom.zone", + apps: []appSpec{ + appSpec{publicAddr: "echo1.root1.example.com"}, + appSpec{publicAddr: "echo2.root1.example.com"}, + appSpec{publicAddr: "echo.myzone.example.com"}, + appSpec{publicAddr: "echo.nested.myzone.example.com"}, + appSpec{publicAddr: "not.in.a.custom.zone"}, + appSpec{ + publicAddr: "multi-port.root1.example.com", + tcpPorts: []*types.PortRange{ + &types.PortRange{ + Port: 1337, + }, + &types.PortRange{ + Port: 4242, + }, + }, + }, }, customDNSZones: []string{ "myzone.example.com", @@ -469,18 +514,38 @@ func TestDialFakeApp(t *testing.T) { cidrRange: "192.168.2.0/24", leafClusters: map[string]testClusterSpec{ "leaf1.example.com": { - apps: []string{"echo1.leaf1.example.com"}, + apps: []appSpec{ + appSpec{publicAddr: "echo1.leaf1.example.com"}, + appSpec{ + publicAddr: "multi-port.leaf1.example.com", + tcpPorts: []*types.PortRange{ + &types.PortRange{ + Port: 1337, + }, + &types.PortRange{ + Port: 4242, + }, + }, + }, + }, }, "leaf2.example.com": { - apps: []string{"echo1.leaf2.example.com"}, + apps: []appSpec{ + appSpec{publicAddr: "echo1.leaf2.example.com"}, + }, }, }, }, "root2.example.com": { - apps: []string{"echo1.root2.example.com", "echo2.root2.example.com"}, + apps: []appSpec{ + appSpec{publicAddr: "echo1.root2.example.com"}, + appSpec{publicAddr: "echo2.root2.example.com"}, + }, leafClusters: map[string]testClusterSpec{ "leaf3.example.com": { - apps: []string{"echo1.leaf3.example.com"}, + apps: []appSpec{ + appSpec{publicAddr: "echo1.leaf3.example.com"}, + }, }, }, }, @@ -493,6 +558,7 @@ func TestDialFakeApp(t *testing.T) { validTestCases := []struct { app string + port int expectCIDR string }{ { @@ -531,6 +597,16 @@ func TestDialFakeApp(t *testing.T) { app: "echo1.leaf3.example.com", expectCIDR: defaultIPv4CIDRRange, }, + { + app: "multi-port.root1.example.com", + port: 1337, + expectCIDR: "192.168.2.0/24", + }, + { + app: "multi-port.leaf1.example.com", + port: 1337, + expectCIDR: defaultIPv4CIDRRange, + }, } t.Run("valid", func(t *testing.T) { @@ -550,7 +626,8 @@ func TestDialFakeApp(t *testing.T) { _, expectNet, err := net.ParseCIDR(tc.expectCIDR) require.NoError(t, err) - conn, err := p.dialHost(ctx, tc.app) + const defaultPort = 80 + conn, err := p.dialHost(ctx, tc.app, cmp.Or(tc.port, defaultPort)) require.NoError(t, err) t.Cleanup(func() { assert.NoError(t, conn.Close()) }) @@ -565,6 +642,14 @@ func TestDialFakeApp(t *testing.T) { require.True(t, expectNet.Contains(remoteIPSuffix), "expected CIDR range %s does not include remote IP %s", expectNet, remoteIPSuffix) testEchoConnection(t, conn) + + // For multi-port apps, certs should have RouteToApp.TargetPort set to the specified + // cert. + // + // Single-port apps are going to be dialed on defaultPort in tests, but certs for them + // need to have RouteToApp.TargetPort set to 0. + require.True(t, appProvider.AreAllRequestedRouteToAppsForPort(tc.app, tc.port), + "not all requested certs had RouteToApp.TargetPort set to %d", tc.port) }) } }) @@ -627,7 +712,9 @@ func TestOnNewConnection(t *testing.T) { appProvider := newEchoAppProvider(map[string]testClusterSpec{ "root1.example.com": { - apps: []string{"echo1"}, + apps: []appSpec{ + appSpec{publicAddr: "echo1"}, + }, cidrRange: "192.168.2.0/24", leafClusters: map[string]testClusterSpec{}, }, @@ -650,7 +737,7 @@ func TestOnNewConnection(t *testing.T) { require.Equal(t, uint32(0), appProvider.onNewConnectionCallCount.Load()) // Establish a connection to a valid app and verify that OnNewConnection was called. - conn, err := p.dialHost(ctx, validAppName) + conn, err := p.dialHost(ctx, validAppName, 80 /* bogus port */) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, conn.Close()) }) require.Equal(t, uint32(1), appProvider.onNewConnectionCallCount.Load())