Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce drenv providers #1534

Merged
merged 14 commits into from
Sep 18, 2024
4 changes: 2 additions & 2 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:

- name: Setup drenv
working-directory: test
run: drenv setup -v
run: drenv setup -v envs/regional-dr.yaml

- name: Install ramenctl
run: pip install -e ramenctl
Expand Down Expand Up @@ -100,4 +100,4 @@ jobs:
- name: Cleanup drenv
if: always()
working-directory: test
run: drenv cleanup -v
run: drenv cleanup -v envs/regional-dr.yaml
3 changes: 0 additions & 3 deletions hack/make-venv
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ cp coverage.pth $venv/lib/python*/site-packages
echo "Adding venv symlink..."
ln -sf $venv/bin/activate venv

echo "Setting up minikube for drenv"
$venv/bin/drenv setup -v

echo
echo "To activate the environment run:"
echo
Expand Down
2 changes: 1 addition & 1 deletion test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ coverage-html:
xdg-open htmlcov/index.html

cluster:
drenv start --name-prefix $(prefix) $(env)
drenv start --name-prefix $(prefix) $(env) -v

clean:
drenv delete --name-prefix $(prefix) $(env)
8 changes: 5 additions & 3 deletions test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -539,9 +539,11 @@ $ drenv delete envs/example.yaml

- `templates`: templates for creating new profiles.
- `name`: profile name.
- `external`: true if this is existing external cluster. In this
case the tool will not start a minikube cluster and all other
options are ignored.
- `provider`: cluster provider. The default provider is "minikube",
creating cluster using VM or containers. Use "external" to use
exsiting clusters not managed by `drenv`. Use the special value
"$provider" to select the best provider for the host. (default
"$provider")
- `driver`: The minikube driver. On Linux, the default drivers are kvm2 and
docker for VMs and containers. On MacOS, the defaults are hyperkit and
podman. Use "$vm" and "$container" values to use the recommended VM and
Expand Down
134 changes: 34 additions & 100 deletions test/drenv/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
from . import cache
from . import cluster
from . import commands
from . import containerd
from . import envfile
from . import kubectl
from . import minikube
from . import providers
from . import ramen
from . import shutdown

Expand Down Expand Up @@ -114,8 +113,8 @@ def parse_args():
add_command(sp, "dump", do_dump, help="dump an environment yaml")

add_command(sp, "clear", do_clear, help="cleared cached resources", envfile=False)
add_command(sp, "setup", do_setup, help="setup minikube for drenv", envfile=False)
add_command(sp, "cleanup", do_cleanup, help="cleanup minikube", envfile=False)
add_command(sp, "setup", do_setup, help="setup host for drenv")
add_command(sp, "cleanup", do_cleanup, help="cleanup host")

return parser.parse_args()

Expand Down Expand Up @@ -183,13 +182,19 @@ def handle_termination_signal(signo, frame):


def do_setup(args):
logging.info("[main] Setting up minikube for drenv")
minikube.setup_files()
env = load_env(args)
for name in set(p["provider"] for p in env["profiles"]):
logging.info("[main] Setting up '%s' for drenv", name)
provider = providers.get(name)
provider.setup()


def do_cleanup(args):
logging.info("[main] Cleaning up minikube")
minikube.cleanup_files()
env = load_env(args)
for name in set(p["provider"] for p in env["profiles"]):
logging.info("[main] Cleaning up '%s' for drenv", name)
provider = providers.get(name)
provider.cleanup()


def do_clear(args):
Expand Down Expand Up @@ -299,14 +304,16 @@ def do_suspend(args):
env = load_env(args)
logging.info("[%s] Suspending environment", env["name"])
for profile in env["profiles"]:
run("virsh", "-c", "qemu:///system", "suspend", profile["name"])
provider = providers.get(profile["provider"])
provider.suspend(profile)


def do_resume(args):
env = load_env(args)
logging.info("[%s] Resuming environment", env["name"])
for profile in env["profiles"]:
run("virsh", "-c", "qemu:///system", "resume", profile["name"])
provider = providers.get(profile["provider"])
provider.resume(profile)


def do_dump(args):
Expand Down Expand Up @@ -351,18 +358,14 @@ def collect_addons(env):


def start_cluster(profile, hooks=(), args=None, **options):
if profile["external"]:
logging.debug("[%s] Skipping external cluster", profile["name"])
else:
is_restart = minikube_profile_exists(profile["name"])
start_minikube_cluster(profile, verbose=args.verbose)
if profile["containerd"]:
logging.info("[%s] Configuring containerd", profile["name"])
containerd.configure(profile)
if is_restart:
restart_failed_deployments(profile)
else:
minikube.load_files(profile["name"])
provider = providers.get(profile["provider"])
existing = provider.exists(profile)

provider.start(profile, verbose=args.verbose)
provider.configure(profile, existing=existing)

if existing:
restart_failed_deployments(profile)

if hooks:
execute(
Expand All @@ -387,96 +390,27 @@ def stop_cluster(profile, hooks=(), **options):
allow_failure=True,
)

if profile["external"]:
logging.debug("[%s] Skipping external cluster", profile["name"])
elif cluster_status != cluster.UNKNOWN:
stop_minikube_cluster(profile)
if cluster_status != cluster.UNKNOWN:
provider = providers.get(profile["provider"])
provider.stop(profile)


def delete_cluster(profile, **options):
if profile["external"]:
logging.debug("[%s] Skipping external cluster", profile["name"])
else:
delete_minikube_cluster(profile)
provider = providers.get(profile["provider"])
provider.delete(profile)

profile_config = drenv.config_dir(profile["name"])
if os.path.exists(profile_config):
logging.info("[%s] Removing config %s", profile["name"], profile_config)
shutil.rmtree(profile_config)


def minikube_profile_exists(name):
out = minikube.profile("list", output="json")
profiles = json.loads(out)
for profile in profiles["valid"]:
if profile["Name"] == name:
return True
return False


def start_minikube_cluster(profile, verbose=False):
start = time.monotonic()
logging.info("[%s] Starting minikube cluster", profile["name"])

minikube.start(
profile["name"],
driver=profile["driver"],
container_runtime=profile["container_runtime"],
extra_disks=profile["extra_disks"],
disk_size=profile["disk_size"],
network=profile["network"],
nodes=profile["nodes"],
cni=profile["cni"],
cpus=profile["cpus"],
memory=profile["memory"],
addons=profile["addons"],
service_cluster_ip_range=profile["service_cluster_ip_range"],
extra_config=profile["extra_config"],
feature_gates=profile["feature_gates"],
alsologtostderr=verbose,
)

logging.info(
"[%s] Cluster started in %.2f seconds",
profile["name"],
time.monotonic() - start,
)


def stop_minikube_cluster(profile):
start = time.monotonic()
logging.info("[%s] Stopping cluster", profile["name"])
minikube.stop(profile["name"])
logging.info(
"[%s] Cluster stopped in %.2f seconds",
profile["name"],
time.monotonic() - start,
)


def delete_minikube_cluster(profile):
start = time.monotonic()
logging.info("[%s] Deleting cluster", profile["name"])
minikube.delete(profile["name"])
logging.info(
"[%s] Cluster deleted in %.2f seconds",
profile["name"],
time.monotonic() - start,
)


def restart_failed_deployments(profile, initial_wait=30):
def restart_failed_deployments(profile):
"""
When restarting, kubectl can report stale status for a while, before it
starts to report real status. Then it takes a while until all deployments
become available.

We first wait for initial_wait seconds to give Kubernetes chance to fail
liveness and readiness checks. Then we restart for failed deployments.
When restarting after failure, some deployment may enter failing state.
This is not handled by the addons. Restarting the deployment solves this
issue. This may also be solved at the addon level.
"""
logging.info("[%s] Waiting for fresh status", profile["name"])
time.sleep(initial_wait)

logging.info("[%s] Looking up failed deployments", profile["name"])
debug = partial(logging.debug, f"[{profile['name']}] %s")

Expand Down
27 changes: 20 additions & 7 deletions test/drenv/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,38 @@
import time

from . import kubectl
from . import commands

# Cluster does not have kubeconfig.
UNKNOWN = "unknwon"

# Cluster has kubeconfig.
CONFIGURED = "configured"

# APIServer is responding.
# APIServer is ready.
READY = "ready"


def status(name):
if not kubeconfig(name):
return UNKNOWN

out = kubectl.version(context=name, output="json")
version_info = json.loads(out)
if "serverVersion" not in version_info:
try:
readyz(name)
except commands.Error:
return CONFIGURED

return READY


def wait_until_ready(name, timeout=600):
def wait_until_ready(name, timeout=600, log=print):
"""
Wait until a cluster is ready.

This is useful when starting profiles concurrently, when one profile needs
to wait for another profile.
to wait for another profile, or when restarting a stopped cluster.
"""
log(f"Waiting until cluster '{name}' is ready")
deadline = time.monotonic() + timeout
delay = min(1.0, timeout / 60)
last_status = None
Expand All @@ -43,7 +45,7 @@ def wait_until_ready(name, timeout=600):
current_status = status(name)

if current_status != last_status:
print(f"Cluster '{name}' is {current_status}")
log(f"Cluster '{name}' is {current_status}")
last_status = current_status

if current_status == READY:
Expand Down Expand Up @@ -77,3 +79,14 @@ def kubeconfig(context_name):
return cluster

return {}


def readyz(name, verbose=False):
"""
Check if API server is ready.
https://kubernetes.io/docs/reference/using-api/health-checks/
"""
path = "/readyz"
if verbose:
path += "?verbose"
return kubectl.get("--raw", path, context=name)
17 changes: 15 additions & 2 deletions test/drenv/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,23 @@ def run(*args, input=None, decode=True, env=None):
return output.decode() if decode else output


def watch(*args, input=None, keepends=False, decode=True, timeout=None, env=None):
def watch(
*args,
input=None,
keepends=False,
decode=True,
timeout=None,
env=None,
stderr=subprocess.PIPE,
):
"""
Run command args, iterating over lines read from the child process stdout.

Some commands have no output and log everyting to stderr (like drenv). To
watch the output call with stderr=subprocess.STDOUT. When such command
fails, we have always have empty error, since the content was already
yielded to the caller.

Assumes that the child process output UTF-8. Will raise if the command
outputs binary data. This is not a problem in this projects since all our
commands are text based.
Expand Down Expand Up @@ -144,7 +157,7 @@ def watch(*args, input=None, keepends=False, decode=True, timeout=None, env=None
# Avoid blocking foerver if there is no input.
stdin=subprocess.PIPE if input else subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stderr=stderr,
env=env,
)
except OSError as e:
Expand Down
29 changes: 29 additions & 0 deletions test/drenv/commands_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,35 @@ def test_watch_lines():
assert output == ["line %d" % i for i in range(10)]


def test_watch_stderr_success():
# Watching command like drenv, logging only to stderr without any output.
script = r"""
import sys
for i in range(10):
sys.stderr.write(f"line {i}\n")
"""
cmd = ["python3", "-c", script]
output = list(commands.watch(*cmd, stderr=subprocess.STDOUT))
assert output == [f"line {i}" for i in range(10)]


def test_watch_stderr_error():
# When stderr is redirected to stdout the error is empty.
script = r"""
import sys
sys.stderr.write("before error\n")
sys.exit("error")
"""
cmd = ["python3", "-c", script]
output = []
with pytest.raises(commands.Error) as e:
for line in commands.watch(*cmd, stderr=subprocess.STDOUT):
output.append(line)

assert output == ["before error", "error"]
assert e.value.error == ""


def test_watch_partial_lines():
script = """
import time
Expand Down
Loading
Loading