diff --git a/v1/image.go b/v1/image.go index c74e345..f0f1b31 100644 --- a/v1/image.go +++ b/v1/image.go @@ -102,10 +102,11 @@ func validateArchitecture(ctx context.Context, sshClient *ssh.Client) (string, e return "", fmt.Errorf("failed to check architecture: %w, stdout: %s, stderr: %s", err, stdout, stderr) } arch := strings.TrimSpace(stdout) - if !strings.Contains(arch, "x86_64") { - return "", fmt.Errorf("expected x86_64 architecture, got: %s", arch) + normalizedArch := GetArchitecture(arch) + if normalizedArch == ArchitectureUnknown { + return "", fmt.Errorf("unsupported architecture: %s", arch) } - return "x86_64", nil + return string(normalizedArch), nil } func validateOSVersion(ctx context.Context, sshClient *ssh.Client) (string, error) { diff --git a/v1/instancetype.go b/v1/instancetype.go index 77d9083..a58bc0b 100644 --- a/v1/instancetype.go +++ b/v1/instancetype.go @@ -42,10 +42,10 @@ const ( ) func GetArchitecture(architecture string) Architecture { - switch strings.ToLower(architecture) { - case "x86_64": + switch strings.ToLower(strings.TrimSpace(architecture)) { + case "x86_64", "amd64": return ArchitectureX86_64 - case "arm64": + case "arm64", "aarch64": return ArchitectureARM64 default: return ArchitectureUnknown diff --git a/v1/instancetype_test.go b/v1/instancetype_test.go new file mode 100644 index 0000000..d76b323 --- /dev/null +++ b/v1/instancetype_test.go @@ -0,0 +1,30 @@ +package v1 + +import "testing" + +func TestGetArchitectureAliases(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + raw string + want Architecture + }{ + {name: "x86_64", raw: "x86_64", want: ArchitectureX86_64}, + {name: "amd64", raw: "amd64", want: ArchitectureX86_64}, + {name: "arm64", raw: "arm64", want: ArchitectureARM64}, + {name: "aarch64", raw: "aarch64", want: ArchitectureARM64}, + {name: "trim and case", raw: " AARCH64\n", want: ArchitectureARM64}, + {name: "unknown", raw: "riscv64", want: ArchitectureUnknown}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + if got := GetArchitecture(tt.raw); got != tt.want { + t.Fatalf("GetArchitecture(%q) = %q, want %q", tt.raw, got, tt.want) + } + }) + } +} diff --git a/v1/providers/testkube/README.md b/v1/providers/testkube/README.md new file mode 100644 index 0000000..15b0c46 --- /dev/null +++ b/v1/providers/testkube/README.md @@ -0,0 +1,55 @@ +# Test Kubernetes Provider + +`test-kubernetes` is a test/non-prod-only provider that backs cloud instance lifecycle calls with Kubernetes resources. + +## Credentials + +The credential supports either a base64-encoded kubeconfig or in-cluster Kubernetes authentication: + +```go +type TestKubeCredential struct { + RefID string + AuthMode TestKubeAuthMode // "kubeconfig" or "in-cluster" + KubeconfigBase64 string + Namespace string +} +``` + +When `AuthMode` is empty, it defaults to `"kubeconfig"` for compatibility. When `AuthMode` is `"in-cluster"`, the provider uses `rest.InClusterConfig()` and requires `KubeconfigBase64` to be empty. This is intended for dev-plane running inside the same Kubernetes cluster it will use as the testkube target. The pod's Kubernetes service account must have RBAC permissions to manage the target namespace's testkube resources. + +### Mode: In-Cluster + +The `testkube` provider can be used as any other, with the caveat being that "VM" resources are actually represented by pods within the context k8s cluster. This allows "environments" to be spun up and down quickly and cheaply, even though they don't necessarily perfectly emulate cloud-provided VMs. + +### Mode: Kubeconfig + +Alternatively to the in-cluster mode, resources can also be hosted by an arbitrary kubernetes cluster. This cluster can be hosted (e.g.: another EKS cluster) but can also be running locally. For example, local validation can use minikube with `minikube tunnel`. The tunnel updates normal Kubernetes `LoadBalancer` Service status and makes the reported external IP reachable from the host, so the provider does not need local-cluster-specific endpoint translation. + +```bash +brew install minikube kubectl + +minikube start --driver=docker --profile testkube +kubectl config use-context testkube + +docker build -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest ./v1/providers/testkube/images/ubuntu-vm +minikube --profile testkube image load ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest +kubectl create namespace testkube + +# In another terminal, keep this running while validation runs. +sudo minikube --profile testkube tunnel + +# Polulate .env with the contents of: +kubectl config view --raw --minify | base64 | tr -d '\n' + +# .env +TESTKUBE_KUBECONFIG_BASE64= +TESTKUBE_NAMESPACE=testkube +``` + +For in-cluster validation, set `TESTKUBE_AUTH_MODE=in-cluster` and omit `TESTKUBE_KUBECONFIG_BASE64`. + +Clean up: + +```bash +minikube --profile testkube delete +``` \ No newline at end of file diff --git a/v1/providers/testkube/capabilities.go b/v1/providers/testkube/capabilities.go new file mode 100644 index 0000000..c2c2cc4 --- /dev/null +++ b/v1/providers/testkube/capabilities.go @@ -0,0 +1,23 @@ +package v1 + +import ( + "context" + + cloudv1 "github.com/brevdev/cloud/v1" +) + +func getTestKubeCapabilities() cloudv1.Capabilities { + return cloudv1.Capabilities{ + cloudv1.CapabilityCreateInstance, + cloudv1.CapabilityTerminateInstance, + cloudv1.CapabilityTags, + } +} + +func (c *TestKubeCredential) GetCapabilities(_ context.Context) (cloudv1.Capabilities, error) { + return getTestKubeCapabilities(), nil +} + +func (c *TestKubeClient) GetCapabilities(_ context.Context) (cloudv1.Capabilities, error) { + return getTestKubeCapabilities(), nil +} diff --git a/v1/providers/testkube/client.go b/v1/providers/testkube/client.go new file mode 100644 index 0000000..d89b413 --- /dev/null +++ b/v1/providers/testkube/client.go @@ -0,0 +1,303 @@ +package v1 + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "fmt" + "strings" + "time" + + cloudv1 "github.com/brevdev/cloud/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +const ( + CloudProviderID = "testkube" + + DefaultNamespace = "default" + DefaultLocation = "test-local" + + servicePortName = "ssh" + servicePort = int32(22) + containerSSHPort = int32(22) +) + +type TestKubeAuthMode string + +const ( + TestKubeAuthModeKubeconfig TestKubeAuthMode = "kubeconfig" + TestKubeAuthModeInCluster TestKubeAuthMode = "in-cluster" +) + +// TestKubeCredential authenticates a developer test provider backed by Kubernetes. +type TestKubeCredential struct { + RefID string + AuthMode TestKubeAuthMode + KubeconfigBase64 string + Namespace string +} + +var _ cloudv1.CloudCredential = &TestKubeCredential{} + +var restInClusterConfig = rest.InClusterConfig + +func NewTestKubeCredential(refID, kubeconfigBase64, namespace string) *TestKubeCredential { + return &TestKubeCredential{ + RefID: refID, + AuthMode: TestKubeAuthModeKubeconfig, + KubeconfigBase64: kubeconfigBase64, + Namespace: namespace, + } +} + +func NewInClusterTestKubeCredential(refID, namespace string) *TestKubeCredential { + return &TestKubeCredential{ + RefID: refID, + AuthMode: TestKubeAuthModeInCluster, + Namespace: namespace, + } +} + +func (c *TestKubeCredential) GetReferenceID() string { + return c.RefID +} + +func (c *TestKubeCredential) GetAPIType() cloudv1.APIType { + return cloudv1.APITypeGlobal +} + +func (c *TestKubeCredential) GetCloudProviderID() cloudv1.CloudProviderID { + return CloudProviderID +} + +func (c *TestKubeCredential) GetTenantID() (string, error) { + authMode, err := c.validateAuthMode() + if err != nil { + return "", err + } + var fingerprint string + switch authMode { + case TestKubeAuthModeKubeconfig: + fingerprint = "kubeconfig:" + c.KubeconfigBase64 + ":" + c.Namespace + case TestKubeAuthModeInCluster: + fingerprint = "in-cluster:" + c.Namespace + default: + return "", fmt.Errorf("unknown testkube auth mode: %s", authMode) + } + return fmt.Sprintf("%s-%x", CloudProviderID, sha256.Sum256([]byte(fingerprint))), nil +} + +func (c *TestKubeCredential) MakeClient(ctx context.Context, location string) (cloudv1.CloudClient, error) { + return c.MakeClientWithOptions(ctx, location) +} + +func (c *TestKubeCredential) MakeClientWithOptions(_ context.Context, location string, opts ...TestKubeClientOption) (cloudv1.CloudClient, error) { + restConfig, err := c.restConfig() + if err != nil { + return nil, err + } + + clientOpts := []TestKubeClientOption{ + WithNamespace(c.Namespace), + WithLocation(firstNonEmpty(location, DefaultLocation)), + } + clientOpts = append(clientOpts, opts...) + return NewTestKubeClient(c.RefID, restConfig, clientOpts...) +} + +func (c *TestKubeCredential) restConfig() (*rest.Config, error) { + authMode, err := c.validateAuthMode() + if err != nil { + return nil, err + } + switch authMode { + case TestKubeAuthModeKubeconfig: + if c.KubeconfigBase64 == "" { + return nil, fmt.Errorf("kubeconfigBase64 is required") + } + kubeconfig, err := base64.StdEncoding.DecodeString(c.KubeconfigBase64) + if err != nil { + return nil, fmt.Errorf("decode kubeconfig: %w", err) + } + return clientcmd.RESTConfigFromKubeConfig(kubeconfig) + case TestKubeAuthModeInCluster: + return restInClusterConfig() + default: + return nil, fmt.Errorf("unknown testkube auth mode: %s", authMode) + } +} + +func (c *TestKubeCredential) authMode() TestKubeAuthMode { + authMode := TestKubeAuthMode(strings.TrimSpace(string(c.AuthMode))) + if authMode == "" { + return TestKubeAuthModeKubeconfig + } + return authMode +} + +func (c *TestKubeCredential) validateAuthMode() (TestKubeAuthMode, error) { + authMode := c.authMode() + switch authMode { + case TestKubeAuthModeKubeconfig: + return authMode, nil + case TestKubeAuthModeInCluster: + if c.KubeconfigBase64 != "" { + return "", fmt.Errorf("kubeconfigBase64 must be empty when authMode is %q", authMode) + } + return authMode, nil + default: + return "", fmt.Errorf("unknown testkube auth mode: %s", authMode) + } +} + +// TestKubeClient implements the CloudClient interface with Kubernetes primitives. +type TestKubeClient struct { + cloudv1.NotImplCloudClient + + refID string + namespace string + location string + k8sClient kubernetes.Interface + logger cloudv1.Logger +} + +var _ cloudv1.CloudClient = &TestKubeClient{} + +type testKubeClientOptions struct { + namespace string + location string + k8sClient kubernetes.Interface + logger cloudv1.Logger +} + +type TestKubeClientOption func(*testKubeClientOptions) error + +func WithNamespace(namespace string) TestKubeClientOption { + return func(options *testKubeClientOptions) error { + options.namespace = namespace + return nil + } +} + +func WithLocation(location string) TestKubeClientOption { + return func(options *testKubeClientOptions) error { + options.location = location + return nil + } +} + +func WithKubernetesClient(k8sClient kubernetes.Interface) TestKubeClientOption { + return func(options *testKubeClientOptions) error { + options.k8sClient = k8sClient + return nil + } +} + +func WithLogger(logger cloudv1.Logger) TestKubeClientOption { + return func(options *testKubeClientOptions) error { + options.logger = logger + return nil + } +} + +func NewTestKubeClient(refID string, config *rest.Config, opts ...TestKubeClientOption) (*TestKubeClient, error) { + options := testKubeClientOptions{ + namespace: DefaultNamespace, + location: DefaultLocation, + logger: &cloudv1.NoopLogger{}, + } + for _, opt := range opts { + if err := opt(&options); err != nil { + return nil, err + } + } + if strings.TrimSpace(refID) == "" { + return nil, fmt.Errorf("refID is required") + } + if strings.TrimSpace(options.namespace) == "" { + options.namespace = DefaultNamespace + } + if strings.TrimSpace(options.location) == "" { + options.location = DefaultLocation + } + if options.k8sClient == nil { + if config == nil { + return nil, fmt.Errorf("kubernetes rest config is required") + } + k8sClient, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("create kubernetes client: %w", err) + } + options.k8sClient = k8sClient + } + + return &TestKubeClient{ + refID: refID, + namespace: options.namespace, + location: options.location, + k8sClient: options.k8sClient, + logger: options.logger, + }, nil +} + +func (c *TestKubeClient) GetAPIType() cloudv1.APIType { + return cloudv1.APITypeGlobal +} + +func (c *TestKubeClient) GetCloudProviderID() cloudv1.CloudProviderID { + return CloudProviderID +} + +func (c *TestKubeClient) GetReferenceID() string { + return c.refID +} + +func (c *TestKubeClient) GetTenantID() (string, error) { + return fmt.Sprintf("%s-%x", CloudProviderID, sha256.Sum256([]byte(c.refID+c.namespace))), nil +} + +func (c *TestKubeClient) MakeClient(ctx context.Context, location string) (cloudv1.CloudClient, error) { + return c.MakeClientWithOptions(ctx, location) +} + +func (c *TestKubeClient) MakeClientWithOptions(_ context.Context, location string, opts ...TestKubeClientOption) (cloudv1.CloudClient, error) { + options := testKubeClientOptions{ + namespace: c.namespace, + location: c.location, + k8sClient: c.k8sClient, + logger: c.logger, + } + if location != "" { + options.location = location + } + for _, opt := range opts { + if err := opt(&options); err != nil { + return nil, err + } + } + c.namespace = options.namespace + c.location = options.location + c.k8sClient = options.k8sClient + c.logger = options.logger + return c, nil +} + +func (c *TestKubeClient) GetInstancePollTime() time.Duration { + return time.Second +} + +func (c *TestKubeClient) GetInstanceTypePollTime() time.Duration { + return time.Minute +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if value != "" { + return value + } + } + return "" +} diff --git a/v1/providers/testkube/client_test.go b/v1/providers/testkube/client_test.go new file mode 100644 index 0000000..588a409 --- /dev/null +++ b/v1/providers/testkube/client_test.go @@ -0,0 +1,109 @@ +package v1 + +import ( + "context" + "testing" + + cloudv1 "github.com/brevdev/cloud/v1" + "github.com/stretchr/testify/require" + "k8s.io/client-go/rest" +) + +func TestCredentialRestConfigRequiresKubeconfigByDefault(t *testing.T) { + t.Parallel() + + credential := NewTestKubeCredential("test", "", "testkube") + + _, err := credential.restConfig() + require.EqualError(t, err, "kubeconfigBase64 is required") +} + +func TestCredentialRestConfigUsesInClusterAuthMode(t *testing.T) { + original := restInClusterConfig + t.Cleanup(func() { + restInClusterConfig = original + }) + + called := false + restInClusterConfig = func() (*rest.Config, error) { + called = true + return &rest.Config{Host: "https://kubernetes.default.svc"}, nil + } + + credential := NewInClusterTestKubeCredential("test", "testkube") + + config, err := credential.restConfig() + require.NoError(t, err) + require.True(t, called) + require.Equal(t, "https://kubernetes.default.svc", config.Host) +} + +func TestCredentialRestConfigRejectsMixedAuthConfig(t *testing.T) { + t.Parallel() + + credential := &TestKubeCredential{ + RefID: "test", + AuthMode: TestKubeAuthModeInCluster, + KubeconfigBase64: "not-allowed", + Namespace: "testkube", + } + + _, err := credential.restConfig() + require.EqualError(t, err, `kubeconfigBase64 must be empty when authMode is "in-cluster"`) +} + +func TestCredentialRestConfigRejectsUnknownAuthMode(t *testing.T) { + t.Parallel() + + credential := &TestKubeCredential{ + RefID: "test", + AuthMode: "token", + Namespace: "testkube", + } + + _, err := credential.restConfig() + require.EqualError(t, err, "unknown testkube auth mode: token") +} + +func TestCredentialMakeClientUsesInClusterConfig(t *testing.T) { + original := restInClusterConfig + t.Cleanup(func() { + restInClusterConfig = original + }) + + restInClusterConfig = func() (*rest.Config, error) { + return &rest.Config{Host: "https://kubernetes.default.svc"}, nil + } + + credential := NewInClusterTestKubeCredential("test", "testkube") + + client, err := credential.MakeClient(context.Background(), "staging") + require.NoError(t, err) + + testKubeClient, ok := client.(*TestKubeClient) + require.True(t, ok) + require.Equal(t, "testkube", testKubeClient.namespace) + require.Equal(t, "staging", testKubeClient.location) +} + +func TestCredentialMakeClientWithOptionsUsesLogger(t *testing.T) { + original := restInClusterConfig + t.Cleanup(func() { + restInClusterConfig = original + }) + + restInClusterConfig = func() (*rest.Config, error) { + return &rest.Config{Host: "https://kubernetes.default.svc"}, nil + } + + logger := &cloudv1.NoopLogger{} + credential := NewInClusterTestKubeCredential("test", "testkube") + + client, err := credential.MakeClientWithOptions(context.Background(), "staging", WithLogger(logger)) + require.NoError(t, err) + + testKubeClient, ok := client.(*TestKubeClient) + require.True(t, ok) + require.Same(t, logger, testKubeClient.logger) + require.Equal(t, "staging", testKubeClient.location) +} diff --git a/v1/providers/testkube/images/ubuntu-vm/Dockerfile b/v1/providers/testkube/images/ubuntu-vm/Dockerfile new file mode 100644 index 0000000..88b10ea --- /dev/null +++ b/v1/providers/testkube/images/ubuntu-vm/Dockerfile @@ -0,0 +1,42 @@ +FROM ubuntu:24.04 + +ENV container=docker +ENV USER_NAME=ubuntu +ENV SUDO_ACCESS=true + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + dbus \ + openssh-server \ + sudo \ + systemd \ + systemd-sysv \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && if ! id ubuntu >/dev/null 2>&1; then useradd --create-home --shell /bin/bash ubuntu; fi \ + && passwd -d ubuntu >/dev/null 2>&1 || true \ + && mkdir -p /run/sshd /etc/docker /etc/testkube \ + && printf '{\n "storage-driver": "vfs"\n}\n' > /etc/docker/daemon.json \ + && ssh-keygen -A \ + && systemctl enable ssh.service \ + && systemctl mask \ + console-getty.service \ + dev-hugepages.mount \ + getty.target \ + sys-fs-fuse-connections.mount \ + systemd-logind.service + +COPY testkube-bootstrap /usr/local/sbin/testkube-bootstrap +COPY testkube-bootstrap.service /etc/systemd/system/testkube-bootstrap.service + +RUN chmod 0755 /usr/local/sbin/testkube-bootstrap \ + && systemctl enable testkube-bootstrap.service + +EXPOSE 22 + +STOPSIGNAL SIGRTMIN+3 +CMD ["/sbin/init"] diff --git a/v1/providers/testkube/images/ubuntu-vm/README.md b/v1/providers/testkube/images/ubuntu-vm/README.md new file mode 100644 index 0000000..5a84978 --- /dev/null +++ b/v1/providers/testkube/images/ubuntu-vm/README.md @@ -0,0 +1,55 @@ +# Testkube Ubuntu VM Image + +This image backs the `test.ok.cpu` testkube instance type: + +```text +ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest +``` + +## Publish to GHCR + +Authenticate Docker to GHCR with the GitHub CLI: + +```bash +gh auth status +gh auth refresh -h github.com -s write:packages +gh auth token | docker login ghcr.io -u "$(gh api user --jq .login)" --password-stdin +``` + +Build and push the image from the repository root. For EKS, publish an amd64 image because `test.ok.cpu` advertises `x86_64`: + +```bash +docker buildx build \ + --platform linux/amd64 \ + -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest \ + --push \ + ./v1/providers/testkube/images/ubuntu-vm +``` + +If you need both local Apple Silicon clusters and amd64 EKS nodes to pull the same tag, publish a multi-arch manifest: + +```bash +docker buildx build \ + --platform linux/amd64,linux/arm64 \ + -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest \ + --push \ + ./v1/providers/testkube/images/ubuntu-vm +``` + +You can also use an explicit token instead of `gh auth token`: + +```bash +echo "$GITHUB_TOKEN" | docker login ghcr.io -u "$GITHUB_USER" --password-stdin +``` + +The token needs `write:packages` to publish and `read:packages` for clusters pulling a private GHCR package. + +## Local Build + +For local minikube or kind validation where the image is loaded directly into the cluster, a normal local build is enough: + +```bash +docker build \ + -t ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest \ + ./v1/providers/testkube/images/ubuntu-vm +``` diff --git a/v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap b/v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap new file mode 100644 index 0000000..02263aa --- /dev/null +++ b/v1/providers/testkube/images/ubuntu-vm/testkube-bootstrap @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ "${TESTKUBE_FAIL_BUILD:-false}" == "true" ]]; then + echo "TESTKUBE_FAIL_BUILD=true; exiting testkube image" + systemctl --no-block exit 42 || true + exit 42 +fi + +user_name="${USER_NAME:-ubuntu}" + +if ! id "$user_name" >/dev/null 2>&1; then + useradd --create-home --shell /bin/bash "$user_name" +fi +passwd -d "$user_name" >/dev/null 2>&1 || true + +home_dir="$(getent passwd "$user_name" | cut -d: -f6)" +install -d -m 0700 -o "$user_name" -g "$user_name" "$home_dir/.ssh" + +if [[ -n "${PUBLIC_KEY:-}" ]]; then + printf '%s\n' "$PUBLIC_KEY" > "$home_dir/.ssh/authorized_keys" +else + : > "$home_dir/.ssh/authorized_keys" +fi +chown "$user_name:$user_name" "$home_dir/.ssh/authorized_keys" +chmod 0600 "$home_dir/.ssh/authorized_keys" + +if [[ "${SUDO_ACCESS:-true}" == "true" ]]; then + printf '%s ALL=(ALL) NOPASSWD:ALL\n' "$user_name" > /etc/sudoers.d/testkube-user + chmod 0440 /etc/sudoers.d/testkube-user +else + rm -f /etc/sudoers.d/testkube-user +fi + +mkdir -p /run/sshd /etc/testkube /etc/ssh/sshd_config.d +ssh-keygen -A + +password_access="no" +if [[ "${PASSWORD_ACCESS:-false}" == "true" ]]; then + password_access="yes" +fi + +cat > /etc/ssh/sshd_config.d/90-testkube.conf < /etc/testkube/scenario.env < 0 { + instance.SSHPort = int(service.Spec.Ports[0].Port) + } + + switch service.Spec.Type { + case corev1.ServiceTypeLoadBalancer: + // No ingress means no public IP. + if len(service.Status.LoadBalancer.Ingress) == 0 { + return + } + // Set the public IP to the first ingress IP. + ingress := service.Status.LoadBalancer.Ingress[0] + if ingress.IP != "" { + instance.PublicIP = ingress.IP + instance.PublicDNS = ingress.IP + } + if ingress.Hostname != "" { + instance.PublicDNS = ingress.Hostname + if instance.PublicIP == "" { + instance.PublicIP = ingress.Hostname + } + } + case corev1.ServiceTypeNodePort: + // No ports means no node port. + if len(service.Spec.Ports) == 0 { + return + } + // Set the SSH port to the first node port. + instance.SSHPort = int(service.Spec.Ports[0].NodePort) + case corev1.ServiceTypeClusterIP: + // Keep the cluster IP as the private IP. + } +} + +func statusFromResources(pod *corev1.Pod, service *corev1.Service) cloudv1.Status { + if pod.DeletionTimestamp != nil { + return cloudv1.Status{LifecycleStatus: cloudv1.LifecycleStatusTerminating} + } + if podFailed(*pod) { + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusFailed, + Messages: podMessages(*pod), + } + } + if podReady(*pod) { + if service == nil { + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusPending, + Messages: append(podMessages(*pod), "waiting for service"), + } + } + if service.Spec.Type == corev1.ServiceTypeLoadBalancer && !loadBalancerReady(service) { + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusPending, + Messages: append(podMessages(*pod), fmt.Sprintf("service %s waiting for load balancer ingress", service.Name)), + } + } + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusRunning, + Messages: podMessages(*pod), + } + } + return cloudv1.Status{ + LifecycleStatus: cloudv1.LifecycleStatusPending, + Messages: podMessages(*pod), + } +} + +func loadBalancerReady(service *corev1.Service) bool { + for _, ingress := range service.Status.LoadBalancer.Ingress { + if ingress.IP != "" || ingress.Hostname != "" { + return true + } + } + return false +} + +func podReady(pod corev1.Pod) bool { + if pod.Status.Phase != corev1.PodRunning { + return false + } + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true + } + } + return false +} + +func podFailed(pod corev1.Pod) bool { + if pod.Status.Phase == corev1.PodFailed { + return true + } + for _, status := range pod.Status.ContainerStatuses { + if status.State.Terminated != nil && status.State.Terminated.ExitCode != 0 { + return true + } + if status.State.Waiting != nil && isFailureWaitingReason(status.State.Waiting.Reason) { + return true + } + } + return false +} + +func isFailureWaitingReason(reason string) bool { + switch reason { + case "CrashLoopBackOff", "CreateContainerConfigError", "ErrImagePull", "ImagePullBackOff", "InvalidImageName": + return true + default: + return false + } +} + +func podMessages(pod corev1.Pod) []string { + messages := []string{} + if pod.Status.Phase != "" { + messages = append(messages, fmt.Sprintf("%s: phase=%s", pod.Name, pod.Status.Phase)) + } + for _, condition := range pod.Status.Conditions { + if condition.Message != "" { + messages = append(messages, fmt.Sprintf("%s: %s", pod.Name, condition.Message)) + } + } + for _, status := range pod.Status.ContainerStatuses { + if status.State.Waiting != nil { + message := status.State.Waiting.Reason + if status.State.Waiting.Message != "" { + message += ": " + status.State.Waiting.Message + } + messages = append(messages, fmt.Sprintf("%s/%s waiting: %s", pod.Name, status.Name, message)) + } + if status.State.Terminated != nil { + message := status.State.Terminated.Reason + if status.State.Terminated.Message != "" { + message += ": " + status.State.Terminated.Message + } + messages = append(messages, fmt.Sprintf("%s/%s terminated: %s", pod.Name, status.Name, message)) + } + } + return messages +} + +func createdAt(pod *corev1.Pod) time.Time { + if pod.Annotations != nil { + if createdAtRaw := pod.Annotations[annotationCreatedAt]; createdAtRaw != "" { + if parsed, err := time.Parse(time.RFC3339Nano, createdAtRaw); err == nil { + return parsed + } + } + } + return pod.CreationTimestamp.Time +} + +func matchesListArgs(instance cloudv1.Instance, args cloudv1.ListInstancesArgs) bool { + if len(args.InstanceIDs) > 0 && !containsInstanceID(args.InstanceIDs, instance.CloudID) { + return false + } + if len(args.Locations) > 0 && !args.Locations.IsAll() && !args.Locations.IsAllowed(instance.Location) { + return false + } + for tagKey, allowedValues := range args.TagFilters { + tagValue, ok := instance.Tags[tagKey] + if !ok { + return false + } + if len(allowedValues) > 0 && !slices.Contains(allowedValues, tagValue) { + return false + } + } + return true +} + +func containsInstanceID(values []cloudv1.CloudProviderInstanceID, value cloudv1.CloudProviderInstanceID) bool { + for _, v := range values { + if v == value { + return true + } + } + return false +} + +func selectorLabels(cloudID string) map[string]string { + return map[string]string{ + labelCloudID: cloudID, + } +} + +func objectLabels(cloudID string, location string) map[string]string { + labels := selectorLabels(cloudID) + labels[labelName] = labelNameValue + labels[labelManagedBy] = labelManagedByValue + labels[labelLocation] = sanitizeLabelValue(location) + return labels +} + +func makeCloudID(credentialRefID string, refID string) cloudv1.CloudProviderInstanceID { + sum := sha256.Sum256([]byte(credentialRefID + ":" + refID)) + return cloudv1.CloudProviderInstanceID("tk-" + hex.EncodeToString(sum[:])[:20]) +} + +func scenarioForInstanceType(instanceType string) string { + return strings.TrimPrefix(instanceType, "test.") +} + +func marshalTags(tags cloudv1.Tags) (string, error) { + if tags == nil { + tags = cloudv1.Tags{} + } + tagsBytes, err := json.Marshal(tags) + if err != nil { + return "", fmt.Errorf("marshal testkube tags: %w", err) + } + return string(tagsBytes), nil +} + +func tagsFromAnnotations(annotations map[string]string) cloudv1.Tags { + tags := cloudv1.Tags{} + if annotations == nil || annotations[annotationTagsJSON] == "" { + return tags + } + if err := json.Unmarshal([]byte(annotations[annotationTagsJSON]), &tags); err != nil { + return cloudv1.Tags{} + } + return tags +} + +var invalidLabelValueCharPattern = regexp.MustCompile(`[^a-z0-9_.-]`) + +const maxLabelValueLength = 63 + +func sanitizeLabelValue(value string) string { + sanitized := invalidLabelValueCharPattern.ReplaceAllString(strings.ToLower(value), "-") + sanitized = strings.Trim(sanitized, "-_.") + if len(sanitized) > maxLabelValueLength { + sanitized = sanitized[:maxLabelValueLength] + sanitized = strings.TrimRight(sanitized, "-_.") + } + if sanitized == "" { + return "unknown" + } + return sanitized +} + +func sshFirewallRules() cloudv1.FirewallRules { + rule := cloudv1.FirewallRule{ + FromPort: servicePort, + ToPort: servicePort, + IPRanges: []string{"0.0.0.0/0"}, + } + return cloudv1.FirewallRules{ + IngressRules: []cloudv1.FirewallRule{rule}, + EgressRules: []cloudv1.FirewallRule{rule}, + } +} + +func int64Ptr(value int64) *int64 { + return &value +} + +func boolPtr(value bool) *bool { + return &value +} diff --git a/v1/providers/testkube/instance_test.go b/v1/providers/testkube/instance_test.go new file mode 100644 index 0000000..8d97ab8 --- /dev/null +++ b/v1/providers/testkube/instance_test.go @@ -0,0 +1,237 @@ +package v1 + +import ( + "context" + "errors" + "testing" + + cloudv1 "github.com/brevdev/cloud/v1" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestCreateInstanceProvisionFailures(t *testing.T) { + ctx := context.Background() + + for _, tc := range []struct { + name string + instanceType string + expectedErr error + }{ + { + name: "capacity", + instanceType: InstanceTypeFailCapacity, + expectedErr: cloudv1.ErrInsufficientResources, + }, + { + name: "quota", + instanceType: InstanceTypeFailQuota, + expectedErr: cloudv1.ErrOutOfQuota, + }, + } { + t.Run(tc.name, func(t *testing.T) { + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: tc.name, + Name: tc.name, + InstanceType: tc.instanceType, + }) + require.Nil(t, instance) + require.ErrorIs(t, err, tc.expectedErr) + + pods, err := client.k8sClient.CoreV1().Pods(client.namespace).List(ctx, metav1.ListOptions{}) + require.NoError(t, err) + require.Empty(t, pods.Items) + + services, err := client.k8sClient.CoreV1().Services(client.namespace).List(ctx, metav1.ListOptions{}) + require.NoError(t, err) + require.Empty(t, services.Items) + }) + } +} + +func TestInstanceLifecycle(t *testing.T) { //nolint:funlen // test ok + ctx := context.Background() + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "env-1", + Name: "dev env", + InstanceType: InstanceTypeOKCPU, + PublicKey: "ssh-rsa test", + Tags: cloudv1.Tags{ + "purpose": "test", + }, + }) + require.NoError(t, err) + require.Equal(t, cloudv1.LifecycleStatusPending, instance.Status.LifecycleStatus) + require.Equal(t, "env-1", instance.RefID) + require.Equal(t, "test-credential", instance.CloudCredRefID) + require.Equal(t, InstanceTypeOKCPU, instance.InstanceType) + spec, ok := getInstanceTypeSpec(InstanceTypeOKCPU) + require.True(t, ok) + require.Equal(t, spec.imageID, instance.ImageID) + + listed, err := client.ListInstances(ctx, cloudv1.ListInstancesArgs{ + TagFilters: map[string][]string{ + "purpose": {"test"}, + }, + }) + require.NoError(t, err) + require.Len(t, listed, 1) + + require.ErrorIs(t, client.StopInstance(ctx, instance.CloudID), cloudv1.ErrNotImplemented) + require.ErrorIs(t, client.StartInstance(ctx, instance.CloudID), cloudv1.ErrNotImplemented) + require.ErrorIs(t, client.RebootInstance(ctx, instance.CloudID), cloudv1.ErrNotImplemented) + setPodReady(t, client, instance.CloudID) + + pendingLB, err := client.GetInstance(ctx, instance.CloudID) + require.NoError(t, err) + require.Equal(t, cloudv1.LifecycleStatusPending, pendingLB.Status.LifecycleStatus) + require.Contains(t, pendingLB.Status.Messages, "service "+string(instance.CloudID)+" waiting for load balancer ingress") + + setServiceLoadBalancerIngress(t, client, instance.CloudID, "203.0.113.10", "") + + running, err := client.GetInstance(ctx, instance.CloudID) + require.NoError(t, err) + require.Equal(t, cloudv1.LifecycleStatusRunning, running.Status.LifecycleStatus) + require.Equal(t, string(instance.CloudID), running.Hostname) + require.Equal(t, "203.0.113.10", running.PublicIP) + require.Equal(t, "203.0.113.10", running.PublicDNS) + require.Equal(t, 22, running.SSHPort) + + require.NoError(t, client.UpdateInstanceTags(ctx, cloudv1.UpdateInstanceTagsArgs{ + InstanceID: instance.CloudID, + Tags: cloudv1.Tags{ + "purpose": "updated", + }, + })) + updated, err := client.GetInstance(ctx, instance.CloudID) + require.NoError(t, err) + require.Equal(t, "updated", updated.Tags["purpose"]) + + require.NoError(t, client.TerminateInstance(ctx, instance.CloudID)) + _, err = client.GetInstance(ctx, instance.CloudID) + require.True(t, errors.Is(err, cloudv1.ErrInstanceNotFound)) +} + +func TestScenarioEnvironment(t *testing.T) { + ctx := context.Background() + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "build", + Name: "build", + InstanceType: InstanceTypeFailBuild, + }) + require.NoError(t, err) + + pod, err := client.k8sClient.CoreV1().Pods(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + service, err := client.k8sClient.CoreV1().Services(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + spec, ok := getInstanceTypeSpec(InstanceTypeFailBuild) + require.True(t, ok) + require.Equal(t, spec.serviceType, service.Spec.Type) + require.Zero(t, service.Spec.Ports[0].NodePort) + container := pod.Spec.Containers[0] + require.Equal(t, spec.image, container.Image) + require.Zero(t, container.Ports[0].HostPort) + envByName := envMap(container.Env) + require.Equal(t, "fail.build", envByName[envScenario]) + require.Equal(t, "true", envByName[envFailBuild]) +} + +func TestInstanceUsesBakedImageSpec(t *testing.T) { + ctx := context.Background() + client := newTestClient(t) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "image-spec", + Name: "image spec", + InstanceType: InstanceTypeOKCPU, + }) + require.NoError(t, err) + spec, ok := getInstanceTypeSpec(InstanceTypeOKCPU) + require.True(t, ok) + require.Equal(t, spec.imageID, instance.ImageID) + + pod, err := client.k8sClient.CoreV1().Pods(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + container := pod.Spec.Containers[0] + require.Equal(t, spec.image, container.Image) + require.NotNil(t, container.ReadinessProbe) + require.NotNil(t, container.ReadinessProbe.TCPSocket) + require.Equal(t, containerSSHPort, container.ReadinessProbe.TCPSocket.Port.IntVal) + for _, mount := range container.VolumeMounts { + require.NotEqual(t, "/sys/fs/cgroup", mount.MountPath) + } +} + +func TestPopulateNetworkLoadBalancer(t *testing.T) { + instance := &cloudv1.Instance{} + populateNetwork(&corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + ClusterIP: "10.96.119.41", + Ports: []corev1.ServicePort{ + { + Port: 22, + }, + }, + }, + Status: corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{ + {Hostname: "testkube.example.com"}, + }, + }, + }, + }, instance) + + require.Equal(t, "10.96.119.41", instance.PrivateIP) + require.Equal(t, "testkube.example.com", instance.PublicIP) + require.Equal(t, "testkube.example.com", instance.PublicDNS) + require.Equal(t, 22, instance.SSHPort) +} + +func setPodReady(t *testing.T, client *TestKubeClient, instanceID cloudv1.CloudProviderInstanceID) { + t.Helper() + + pod, err := client.k8sClient.CoreV1().Pods(client.namespace).Get(context.Background(), string(instanceID), metav1.GetOptions{}) + require.NoError(t, err) + pod.Status.Phase = corev1.PodRunning + pod.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + } + _, err = client.k8sClient.CoreV1().Pods(client.namespace).UpdateStatus(context.Background(), pod, metav1.UpdateOptions{}) + require.NoError(t, err) +} + +func setServiceLoadBalancerIngress(t *testing.T, client *TestKubeClient, instanceID cloudv1.CloudProviderInstanceID, ip, hostname string) { + t.Helper() + + service, err := client.k8sClient.CoreV1().Services(client.namespace).Get(context.Background(), string(instanceID), metav1.GetOptions{}) + require.NoError(t, err) + service.Status.LoadBalancer.Ingress = []corev1.LoadBalancerIngress{ + { + IP: ip, + Hostname: hostname, + }, + } + _, err = client.k8sClient.CoreV1().Services(client.namespace).UpdateStatus(context.Background(), service, metav1.UpdateOptions{}) + require.NoError(t, err) +} + +func envMap(envVars []corev1.EnvVar) map[string]string { + envByName := map[string]string{} + for _, envVar := range envVars { + envByName[envVar.Name] = envVar.Value + } + return envByName +} diff --git a/v1/providers/testkube/instancetype.go b/v1/providers/testkube/instancetype.go new file mode 100644 index 0000000..152f35e --- /dev/null +++ b/v1/providers/testkube/instancetype.go @@ -0,0 +1,163 @@ +package v1 + +import ( + "context" + "slices" + "time" + + "github.com/alecthomas/units" + "github.com/bojanz/currency" + cloudv1 "github.com/brevdev/cloud/v1" + corev1 "k8s.io/api/core/v1" +) + +const ( + DefaultImageID = "testkube-ubuntu-vm" + DefaultImage = "ghcr.io/brevdev/cloud/testkube-ubuntu-vm:latest" + + DefaultPriceCentsPerHour = 1 + + InstanceTypeOKCPU = "test.ok.cpu" + InstanceTypeFailCapacity = "test.fail.capacity" + InstanceTypeFailQuota = "test.fail.quota" + InstanceTypeFailBuild = "test.fail.build" // TODO: trigger build failure, maybe with a process that monitors build? +) + +// instanceTypeSpec is used mainly as a tuple of instance type (from devplane) and service type (from k8s). When a request +// for instance provisioning is made, we need to determine the appropriate service type to use based on the incoming instance type. +type instanceTypeSpec struct { + instanceType cloudv1.InstanceType + imageID string + image string + serviceType corev1.ServiceType +} + +var allInstanceTypeSpecs = []instanceTypeSpec{ + makeInstanceTypeSpec(InstanceTypeOKCPU), + makeInstanceTypeSpec(InstanceTypeFailCapacity), + makeInstanceTypeSpec(InstanceTypeFailQuota), + makeInstanceTypeSpec(InstanceTypeFailBuild), +} + +func makeInstanceTypeSpec(instanceType string) instanceTypeSpec { + estimatedDeployTime := 20 * time.Second + return instanceTypeSpec{ + instanceType: makeCPUInstanceType(instanceType, true, &estimatedDeployTime), + imageID: DefaultImageID, + image: DefaultImage, + serviceType: corev1.ServiceTypeLoadBalancer, + } +} + +func (c *TestKubeClient) GetInstanceTypes(_ context.Context, args cloudv1.GetInstanceTypeArgs) ([]cloudv1.InstanceType, error) { + // Instance types are statically defined, but in the future we should consider dynamic types, with capacity numbers based on + // test input or devplane configuration. + instanceTypes := c.allInstanceTypes() + + // Filter the instance types as any normal provider would do. + instanceTypes = filterInstanceTypes(instanceTypes, args) + return instanceTypes, nil +} + +func (c *TestKubeClient) allInstanceTypes() []cloudv1.InstanceType { + instanceTypes := make([]cloudv1.InstanceType, 0, len(allInstanceTypeSpecs)) + for _, spec := range allInstanceTypeSpecs { + instanceTypes = append(instanceTypes, c.instanceTypeSpecToBrevInstanceType(spec)) + } + return instanceTypes +} + +func filterInstanceTypes(instanceTypes []cloudv1.InstanceType, args cloudv1.GetInstanceTypeArgs) []cloudv1.InstanceType { + filtered := make([]cloudv1.InstanceType, 0, len(instanceTypes)) + for _, instanceType := range instanceTypes { + if len(args.Locations) > 0 && !args.Locations.IsAll() && !args.Locations.IsAllowed(instanceType.Location) { + continue + } + if len(args.InstanceTypes) > 0 && !slices.Contains(args.InstanceTypes, instanceType.Type) { + continue + } + if args.CloudFilter != nil && !args.CloudFilter.IsAllowed(instanceType.Cloud) { + continue + } + if args.ArchitectureFilter != nil && !isArchitectureAllowed(args.ArchitectureFilter, instanceType.SupportedArchitectures) { + continue + } + if args.GPUManufactererFilter != nil && !isGPUManufacturerAllowed(args.GPUManufactererFilter, instanceType.SupportedGPUs) { + continue + } + filtered = append(filtered, instanceType) + } + return filtered +} + +func isArchitectureAllowed(filter *cloudv1.ArchitectureFilter, architectures []cloudv1.Architecture) bool { + for _, architecture := range architectures { + if filter.IsAllowed(architecture) { + return true + } + } + return false +} + +func isGPUManufacturerAllowed(filter *cloudv1.GPUManufacturerFilter, gpus []cloudv1.GPU) bool { + if len(gpus) == 0 { + return true // NB: always return CPU types + } + for _, gpu := range gpus { + if filter.IsAllowed(gpu.Manufacturer) { + return true + } + } + return false +} + +func (c *TestKubeClient) instanceTypeSpecToBrevInstanceType(spec instanceTypeSpec) cloudv1.InstanceType { + instanceType := spec.instanceType + instanceType.Location = c.location + instanceType.ID = cloudv1.MakeGenericInstanceTypeID(instanceType) + return instanceType +} + +func makeCPUInstanceType(instanceType string, available bool, estimatedDeployTime *time.Duration) cloudv1.InstanceType { + basePrice, _ := currency.NewAmountFromInt64(DefaultPriceCentsPerHour, "USD") + it := cloudv1.InstanceType{ + Type: instanceType, + SupportedStorage: []cloudv1.Storage{ + { + Type: "ephemeral", + Count: 1, + Size: units.GiB * 20, + SizeBytes: cloudv1.NewBytes(20, cloudv1.Gibibyte), + IsEphemeral: true, + }, + }, + ElasticRootVolume: false, + SupportedUsageClasses: []string{"on-demand"}, + Memory: units.GiB * 4, + MemoryBytes: cloudv1.NewBytes(4, cloudv1.Gibibyte), + SupportedNumCores: []int32{2}, + DefaultCores: 2, + VCPU: 2, + SupportedArchitectures: []cloudv1.Architecture{ + cloudv1.ArchitectureX86_64, + }, + Stoppable: false, + Rebootable: false, + IsAvailable: available, + BasePrice: &basePrice, + IsContainer: false, + EstimatedDeployTime: estimatedDeployTime, + Provider: CloudProviderID, + Cloud: CloudProviderID, + } + return it +} + +func getInstanceTypeSpec(instanceType string) (instanceTypeSpec, bool) { + for _, spec := range allInstanceTypeSpecs { + if spec.instanceType.Type == instanceType { + return spec, true + } + } + return instanceTypeSpec{}, false +} diff --git a/v1/providers/testkube/instancetype_test.go b/v1/providers/testkube/instancetype_test.go new file mode 100644 index 0000000..0762a81 --- /dev/null +++ b/v1/providers/testkube/instancetype_test.go @@ -0,0 +1,74 @@ +package v1 + +import ( + "context" + "testing" + + cloudv1 "github.com/brevdev/cloud/v1" + "github.com/stretchr/testify/require" + "k8s.io/client-go/kubernetes/fake" +) + +func TestGetInstanceTypes(t *testing.T) { + client := newTestClient(t) + + instanceTypes, err := client.GetInstanceTypes(context.Background(), cloudv1.GetInstanceTypeArgs{}) + require.NoError(t, err) + require.Len(t, instanceTypes, 4) + + instanceTypeByName := map[string]cloudv1.InstanceType{} + for _, instanceType := range instanceTypes { + instanceTypeByName[instanceType.Type] = instanceType + } + + for _, expected := range []string{ + InstanceTypeOKCPU, + InstanceTypeFailCapacity, + InstanceTypeFailQuota, + InstanceTypeFailBuild, + } { + instanceType, ok := instanceTypeByName[expected] + require.True(t, ok, "missing instance type %s", expected) + require.True(t, instanceType.IsAvailable) + require.Equal(t, CloudProviderID, instanceType.Provider) + require.Equal(t, CloudProviderID, instanceType.Cloud) + require.NotNil(t, instanceType.BasePrice) + + basePrice, err := instanceType.BasePrice.Int64() + require.NoError(t, err) + require.Greater(t, basePrice, int64(0)) + } +} + +func TestGetInstanceTypesWithGPUManufacturerFilterIncludesCPU(t *testing.T) { + client := newTestClient(t) + + instanceTypes, err := client.GetInstanceTypes(context.Background(), cloudv1.GetInstanceTypeArgs{ + GPUManufactererFilter: &cloudv1.GPUManufacturerFilter{ + IncludeGPUManufacturers: []cloudv1.Manufacturer{cloudv1.ManufacturerNVIDIA}, + }, + }) + require.NoError(t, err) + require.Len(t, instanceTypes, 4) +} + +func TestCapabilitiesDoNotAdvertiseImages(t *testing.T) { + client := newTestClient(t) + + capabilities, err := client.GetCapabilities(context.Background()) + require.NoError(t, err) + require.True(t, capabilities.IsCapable(cloudv1.CapabilityCreateInstance)) + require.False(t, capabilities.IsCapable(cloudv1.CapabilityMachineImage)) +} + +func newTestClient(t *testing.T) *TestKubeClient { + t.Helper() + + client, err := NewTestKubeClient("test-credential", nil, + WithKubernetesClient(fake.NewSimpleClientset()), + WithNamespace("testkube"), + WithLocation("local"), + ) + require.NoError(t, err) + return client +} diff --git a/v1/providers/testkube/location.go b/v1/providers/testkube/location.go new file mode 100644 index 0000000..11c0605 --- /dev/null +++ b/v1/providers/testkube/location.go @@ -0,0 +1,18 @@ +package v1 + +import ( + "context" + + cloudv1 "github.com/brevdev/cloud/v1" +) + +func (c *TestKubeClient) GetLocations(_ context.Context, _ cloudv1.GetLocationsArgs) ([]cloudv1.Location, error) { + return []cloudv1.Location{ + { + Name: c.location, + Description: "Developer test Kubernetes cluster", + Available: true, + Country: "USA", + }, + }, nil +} diff --git a/v1/providers/testkube/validation_test.go b/v1/providers/testkube/validation_test.go new file mode 100644 index 0000000..68b913e --- /dev/null +++ b/v1/providers/testkube/validation_test.go @@ -0,0 +1,283 @@ +package v1 + +import ( + "context" + "encoding/base64" + "net" + "os" + "strconv" + "strings" + "sync" + "testing" + "time" + + internalssh "github.com/brevdev/cloud/internal/ssh" + "github.com/brevdev/cloud/internal/validation" + cloudv1 "github.com/brevdev/cloud/v1" + "github.com/stretchr/testify/require" + gossh "golang.org/x/crypto/ssh" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + testKubeKubeconfigBase64EnvVar = "TESTKUBE_KUBECONFIG_BASE64" + testKubeAuthModeEnvVar = "TESTKUBE_AUTH_MODE" + testKubeNamespaceEnvVar = "TESTKUBE_NAMESPACE" + testKubeLocationEnvVar = "TESTKUBE_LOCATION" +) + +var ( + validationSSHKeysOnce sync.Once + validationSSHKeysErr error +) + +func TestValidationFunctions(t *testing.T) { + t.Parallel() + checkValidationSkip(t) + + location := testKubeLocation() + config := validation.ProviderConfig{ + Credential: testKubeCredential(), + Location: location, + StableIDs: []cloudv1.InstanceTypeID{ + cloudv1.InstanceTypeID(location + "-noSub-" + InstanceTypeOKCPU), + }, + } + + validation.RunValidationSuite(t, config) +} + +func TestInstanceLifecycleValidation(t *testing.T) { + t.Parallel() + checkValidationSkip(t) + + location := testKubeLocation() + config := validation.ProviderConfig{ + Credential: testKubeCredential(), + Location: location, + StableIDs: []cloudv1.InstanceTypeID{ + cloudv1.InstanceTypeID(location + "-noSub-" + InstanceTypeOKCPU), + }, + } + + validation.RunInstanceLifecycleValidation(t, config) +} + +func TestFailureInstanceTypesValidation(t *testing.T) { + t.Parallel() + checkValidationSkip(t) + + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + t.Cleanup(cancel) + + client, err := testKubeCredential().MakeClient(ctx, testKubeLocation()) + require.NoError(t, err) + + _, err = client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "validation-capacity", + Name: "validation-capacity", + Location: testKubeLocation(), + InstanceType: InstanceTypeFailCapacity, + }) + require.ErrorIs(t, err, cloudv1.ErrInsufficientResources) + + _, err = client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: "validation-quota", + Name: "validation-quota", + Location: testKubeLocation(), + InstanceType: InstanceTypeFailQuota, + }) + require.ErrorIs(t, err, cloudv1.ErrOutOfQuota) +} + +func TestImageBackedInstanceValidation(t *testing.T) { + checkValidationSkip(t) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + t.Cleanup(cancel) + + client := testKubeImageClient(t) + refID := "validation-image-ok-cpu-" + strconv.FormatInt(time.Now().UnixNano(), 36) + + instance, err := client.CreateInstance(ctx, cloudv1.CreateInstanceAttrs{ + RefID: refID, + Name: refID, + Location: testKubeLocation(), + InstanceType: InstanceTypeOKCPU, + PublicKey: testAuthorizedKey(t), + Tags: cloudv1.Tags{ + "test": "image-validation", + }, + }) + require.NoError(t, err) + require.NotNil(t, instance) + t.Cleanup(func() { + _ = client.TerminateInstance(context.Background(), instance.CloudID) + }) + + pod, err := client.k8sClient.CoreV1().Pods(client.namespace).Get(ctx, string(instance.CloudID), metav1.GetOptions{}) + require.NoError(t, err) + require.Equal(t, DefaultImage, pod.Spec.Containers[0].Image) + + instance = waitForValidationInstanceStatus(ctx, t, client, instance.CloudID, cloudv1.LifecycleStatusRunning, 4*time.Minute) + require.NotEmpty(t, instance.Hostname) + require.NotEmpty(t, instance.PublicIP) + require.NotZero(t, instance.SSHPort) + + sshCtx, cancelSSH := context.WithTimeout(ctx, 2*time.Minute) + t.Cleanup(cancelSSH) + require.NoError(t, internalssh.WaitForSSH(sshCtx, internalssh.ConnectionConfig{ + User: instance.SSHUser, + HostPort: net.JoinHostPort(instance.PublicIP, strconv.Itoa(instance.SSHPort)), + PrivKey: internalssh.DoNotUseDummyPrivateKey, + }, internalssh.WaitForSSHOptions{ + Timeout: 90 * time.Second, + ConnectionTimeout: 10 * time.Second, + CheckCmd: "sudo -n true && command -v apt-get && command -v systemctl && systemctl list-units --type=service --no-pager >/dev/null", + WaitTime: 2 * time.Second, + })) +} + +func checkValidationSkip(t *testing.T) { + t.Helper() + + kubeconfigBase64 := os.Getenv(testKubeKubeconfigBase64EnvVar) + authMode := testKubeAuthMode() + isValidationTest := os.Getenv("VALIDATION_TEST") + if authMode == TestKubeAuthModeKubeconfig && kubeconfigBase64 == "" && isValidationTest != "" { + t.Fatalf("%s not set, but VALIDATION_TEST is set", testKubeKubeconfigBase64EnvVar) + } + if authMode == TestKubeAuthModeKubeconfig && kubeconfigBase64 == "" { + t.Skipf("%s not set, skipping testkube validation tests", testKubeKubeconfigBase64EnvVar) + } + ensureValidationSSHKeys(t) +} + +func testKubeCredential() *TestKubeCredential { + if testKubeAuthMode() == TestKubeAuthModeInCluster { + return NewInClusterTestKubeCredential("validation-test", testKubeNamespace()) + } + return NewTestKubeCredential("validation-test", os.Getenv(testKubeKubeconfigBase64EnvVar), testKubeNamespace()) +} + +func testKubeImageClient(t *testing.T) *TestKubeClient { + t.Helper() + + credential := testKubeCredential() + restConfig, err := credential.restConfig() + require.NoError(t, err) + + client, err := NewTestKubeClient(credential.RefID, restConfig, + WithNamespace(testKubeNamespace()), + WithLocation(testKubeLocation()), + ) + require.NoError(t, err) + return client +} + +func testKubeNamespace() string { + if namespace := os.Getenv(testKubeNamespaceEnvVar); namespace != "" { + return namespace + } + return DefaultNamespace +} + +func testKubeAuthMode() TestKubeAuthMode { + if authMode := os.Getenv(testKubeAuthModeEnvVar); authMode != "" { + return TestKubeAuthMode(authMode) + } + return TestKubeAuthModeKubeconfig +} + +func testKubeLocation() string { + if location := os.Getenv(testKubeLocationEnvVar); location != "" { + return location + } + return DefaultLocation +} + +func testAuthorizedKey(t *testing.T) string { + t.Helper() + + authorizedKey, err := defaultAuthorizedKey() + require.NoError(t, err) + return authorizedKey +} + +func defaultAuthorizedKey() (string, error) { + signer, err := gossh.ParsePrivateKey([]byte(internalssh.DoNotUseDummyPrivateKey)) + if err != nil { + return "", err + } + return strings.TrimSpace(string(gossh.MarshalAuthorizedKey(signer.PublicKey()))), nil +} + +func ensureValidationSSHKeys(t *testing.T) { + t.Helper() + + validationSSHKeysOnce.Do(func() { + if os.Getenv("TEST_PRIVATE_KEY_BASE64") == "" { + validationSSHKeysErr = os.Setenv( + "TEST_PRIVATE_KEY_BASE64", + base64.StdEncoding.EncodeToString([]byte(internalssh.DoNotUseDummyPrivateKey)), + ) + if validationSSHKeysErr != nil { + return + } + } + + if os.Getenv("TEST_PUBLIC_KEY_BASE64") == "" { + authorizedKey, err := defaultAuthorizedKey() + if err != nil { + validationSSHKeysErr = err + return + } + validationSSHKeysErr = os.Setenv( + "TEST_PUBLIC_KEY_BASE64", + base64.StdEncoding.EncodeToString([]byte(authorizedKey)), + ) + } + }) + require.NoError(t, validationSSHKeysErr) +} + +func waitForValidationInstanceStatus(ctx context.Context, t *testing.T, client *TestKubeClient, instanceID cloudv1.CloudProviderInstanceID, status cloudv1.LifecycleStatus, timeout time.Duration) *cloudv1.Instance { + t.Helper() + + deadline := time.NewTimer(timeout) + defer deadline.Stop() + + tick := time.NewTicker(2 * time.Second) + defer tick.Stop() + + var lastInstance *cloudv1.Instance + var lastErr error + for { + instance, err := client.GetInstance(ctx, instanceID) + if err != nil { + lastErr = err + } else { + lastErr = nil + lastInstance = instance + if instance.Status.LifecycleStatus == status { + return instance + } + if instance.Status.LifecycleStatus == cloudv1.LifecycleStatusFailed && status != cloudv1.LifecycleStatusFailed { + t.Fatalf("instance %s failed while waiting for %s: %v", instanceID, status, instance.Status.Messages) + } + } + + select { + case <-ctx.Done(): + require.NoError(t, lastErr) + t.Fatalf("context ended waiting for instance %s to become %s: %v", instanceID, status, ctx.Err()) + case <-deadline.C: + require.NoError(t, lastErr) + if lastInstance != nil { + t.Fatalf("instance %s status is %s, waiting for %s: %v", instanceID, lastInstance.Status.LifecycleStatus, status, lastInstance.Status.Messages) + } + t.Fatalf("timed out waiting for instance %s to become %s", instanceID, status) + case <-tick.C: + } + } +}