Skip to content

Commit

Permalink
Adding GPU feature and some dependent tools
Browse files Browse the repository at this point in the history
  • Loading branch information
sharsonia committed May 14, 2021
1 parent d8794f7 commit ecb43c3
Show file tree
Hide file tree
Showing 7 changed files with 337 additions and 18 deletions.
3 changes: 2 additions & 1 deletion lisa/features/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from .gpu import Gpu
from .serial_console import SerialConsole
from .startstop import StartStop

__all__ = ["SerialConsole", "StartStop"]
__all__ = ["Gpu", "SerialConsole", "StartStop"]
120 changes: 120 additions & 0 deletions lisa/features/gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from enum import Enum
from typing import Any

from lisa.feature import Feature
from lisa.operating_system import Linux, Redhat, Ubuntu
from lisa.tools import Uname, Wget
from lisa.util import LisaException

FEATURE_NAME_GPU = "Gpu"


class compute_sdk(Enum):
GRID = 0
CUDA = 1


class Gpu(Feature):
def __init__(self, node: Any, platform: Any) -> None:
super().__init__(node, platform)
self._log = self._node.log

@classmethod
def name(cls) -> str:
return FEATURE_NAME_GPU

def _install_grid_driver(self, version: str) -> None:
self._log.info("Starting GRID driver installation")

def _install_cuda_driver(self, version: str = "CUDA_DRIVER") -> None:
self._log.info("Starting CUDA driver installation")
cuda_repo = ""
distro = Linux(self._node)

# CUDA driver installation for redhat distros
if isinstance(self._node.os, Redhat):
cuda_repo_pkg = f"cuda-repo-rhel7-{version}.x86_64.rpm"
cuda_repo = (
"http://developer.download.nvidia.com/"
f"compute/cuda/repos/rhel7/x86_64/{cuda_repo_pkg}"
)
distro = Redhat(self._node)

# CUDA driver installation for Ubuntu distros
elif isinstance(self._node.os, Ubuntu):
os_version = self._node.os.get_os_version()
cuda_repo_pkg = f"cuda-repo-ubuntu{os_version}//./_{version}_amd64.deb"
cuda_repo = (
"http://developer.download.nvidia.com/compute/"
f"cuda/repos/ubuntu{os_version}//.//x86_64/${cuda_repo_pkg}"
)
distro = Ubuntu(self._node)

else:
raise LisaException("Distro not supported to install CUDA driver.")

wget_tool = self._node.tools[Wget]
# download the cuda driver at /tmp/
wget_tool.get(cuda_repo, "/tmp/", cuda_repo_pkg)
# install the cuda driver rpm
install_result = distro.install_packages(f"/tmp/{cuda_repo_pkg}", signed=False)
if install_result.exit_code != 0:
raise LisaException(
f"Failed to install {cuda_repo_pkg}. stdout: {install_result.stdout}"
)
else:
self._log.info("Sucessfully installed cuda-drivers")

def install_gpu_dep(self) -> None:
uname_tool = self._node.tools[Uname]
uname_ver = uname_tool.get_linux_information().uname_version

# install dependency libraries for redhat and CentOS
if isinstance(self._node.os, Redhat):
# install the kernel-devel and kernel-header packages
package_name = f"kernel-devel-{uname_ver} kernel-headers-{uname_ver}"
install_result = self._node.os.install_packages(package_name)
if install_result.exit_code != 0:
raise LisaException(
f"Failed to install {package_name}."
f" stdout: {install_result.stdout}"
)
# mesa-libEGL install/update is require to avoid a conflict between
# libraries - bugzilla.redhat 1584740
package_name = "mesa-libGL mesa-libEGL libglvnd-devel"
install_result = self._node.os.install_packages(package_name)
if install_result.exit_code != 0:
raise LisaException(
f"Failed to install {package_name}."
f" stdout: {install_result.stdout}"
)
# install dkms
package_name = "dkms"
install_result = self._node.os.install_packages(package_name, signed=False)
if install_result.exit_code != 0:
raise LisaException(
f"Failed to install {package_name}. stdout: {install_result.stdout}"
)

# install dependency libraraies for Ubuntu
elif isinstance(self._node.os, Ubuntu):
package_name = (
f"build-essential libelf-dev linux-tools-{uname_ver}"
f" linux-cloud-tools-{uname_ver} python libglvnd-dev ubuntu-desktop"
)
install_result = self._node.os.install_packages(package_name)
if install_result.exit_code != 0:
raise LisaException(
f"Failed to install {package_name}."
f" stdout: {install_result.stdout}"
)

def install_compute_sdk(self, driver: compute_sdk, version: str) -> None:
if driver == compute_sdk.GRID:
self._install_grid_driver(version)

elif driver == compute_sdk.CUDA:
self._install_cuda_driver(version)
129 changes: 114 additions & 15 deletions lisa/operating_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
# Licensed under the MIT license.

import re
from dataclasses import dataclass
from functools import partial
from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Pattern, Type, Union

from lisa.executable import Tool
from lisa.util import BaseClassMixin, LisaException, get_matched_str
from lisa.util.logger import get_logger
from lisa.util.process import ExecutableResult
from lisa.util.subclasses import Factory

if TYPE_CHECKING:
Expand All @@ -17,6 +19,18 @@
_get_init_logger = partial(get_logger, name="os")


@dataclass
class OsVersion:
vendor: str
release: str = ""
codename: str = ""
package: str = ""
update: str = ""

def __str__(self) -> str:
return self.vendor


class OperatingSystem:
__lsb_release_pattern = re.compile(r"^Description:[ \t]+([\w]+)[ ]+$", re.M)
__os_release_pattern_name = re.compile(
Expand Down Expand Up @@ -99,6 +113,7 @@ def _get_detect_string(cls, node: Any) -> Iterable[str]:
cmd_result = typed_node.execute(cmd="lsb_release -d", no_error_log=True)
yield get_matched_str(cmd_result.stdout, cls.__lsb_release_pattern)

# It covers distros like ClearLinux too
cmd_result = typed_node.execute(cmd="cat /etc/os-release", no_error_log=True)
yield get_matched_str(cmd_result.stdout, cls.__os_release_pattern_name)
yield get_matched_str(cmd_result.stdout, cls.__os_release_pattern_id)
Expand Down Expand Up @@ -150,16 +165,23 @@ def type_name(cls) -> str:
def name_pattern(cls) -> Pattern[str]:
return re.compile(f"^{cls.type_name()}$")

def _install_packages(self, packages: Union[List[str]]) -> None:
def _install_packages(
self, packages: Union[List[str]], signed: bool = True
) -> ExecutableResult:
raise NotImplementedError()

def _initialize_package_installation(self) -> None:
# sub os can override it, but it's optional
pass

def _get_os_version(self) -> OsVersion:
raise NotImplementedError()

def install_packages(
self, packages: Union[str, Tool, Type[Tool], List[Union[str, Tool, Type[Tool]]]]
) -> None:
self,
packages: Union[str, Tool, Type[Tool], List[Union[str, Tool, Type[Tool]]]],
signed: bool = True,
) -> ExecutableResult:
package_names: List[str] = []
if not isinstance(packages, list):
packages = [packages]
Expand All @@ -179,7 +201,23 @@ def install_packages(
if self._first_time_installation:
self._first_time_installation = False
self._initialize_package_installation()
self._install_packages(package_names)

return self._install_packages(package_names, signed)

# TODO: Immplement update_packages
def update_packages(
self, packages: Union[str, Tool, Type[Tool], List[Union[str, Tool, Type[Tool]]]]
) -> None:
pass

# TODO: Immplement query_packages
def query_packages(
self, packages: Union[str, Tool, Type[Tool], List[Union[str, Tool, Type[Tool]]]]
) -> List[str]:
pass

def get_os_version(self) -> OsVersion:
return self._get_os_version()


class BSD(Posix):
Expand All @@ -198,19 +236,46 @@ def name_pattern(cls) -> Pattern[str]:
def _initialize_package_installation(self) -> None:
self._node.execute("apt-get update", sudo=True)

def _install_packages(self, packages: Union[List[str]]) -> None:
def _install_packages(
self, packages: Union[List[str]], signed: bool = True
) -> ExecutableResult:
command = (
f"DEBIAN_FRONTEND=noninteractive "
f"apt-get -y install {' '.join(packages)}"
)
self._node.execute(command, sudo=True)
if not signed:
command = command.__add__(" --allow-unauthenticated")

return self._node.execute(command, sudo=True)

def _get_os_version(self) -> OsVersion:
os_version = OsVersion("")
cmd_result = self._node.execute(cmd="which lsb_release", no_error_log=True)
if cmd_result.exit_code != 0 and cmd_result.stdout != "":
os_version.vendor = self._node.execute(
cmd="lsb_release -i -s", no_error_log=True
).stdout
os_version.release = self._node.execute(
cmd="lsb_release -r -s", no_error_log=True
).stdout
os_version.release = self._node.execute(
cmd="lsb_release -c -s", no_error_log=True
).stdout
if os_version.vendor in ["Debian", "Ubuntu", "LinuxMint"]:
os_version.package = "deb"

return os_version


class Ubuntu(Debian):
@classmethod
def name_pattern(cls) -> Pattern[str]:
return re.compile("^Ubuntu|ubuntu$")

def _get_os_version(self) -> OsVersion:
os_version = OsVersion("Ubuntu")
return os_version


class FreeBSD(BSD):
...
Expand All @@ -225,11 +290,32 @@ class Fedora(Linux):
def name_pattern(cls) -> Pattern[str]:
return re.compile("^Fedora|fedora$")

def _install_packages(self, packages: Union[List[str]]) -> None:
self._node.execute(
f"dnf install -y {' '.join(packages)}",
sudo=True,
def _install_packages(
self, packages: Union[List[str]], signed: bool = True
) -> ExecutableResult:
command = f"dnf install -y {' '.join(packages)}"
if not signed:
command.__add__(" --nogpgcheck")
return self._node.execute(command, sudo=True)

def _get_os_version(self) -> OsVersion:
os_version = OsVersion("")
cmd_result = self._node.execute(
cmd="cat /etc/redhat-release", no_error_log=True
)
if cmd_result.exit_code != 0 and cmd_result.stdout != "":
result = cmd_result.stdout
for vendor in ["Red Hat", "CentOS", "Fedora", "XenServer"]:
if vendor in result:
os_version.vendor = vendor
if re.search(r"\brelease\b", result, re.IGNORECASE):
os_version.release = re.split(
"release", result, flags=re.IGNORECASE
)[1].split()[0]
check_code = re.search(r"\(([^)]+)", result)
if check_code is not None:
os_version.codename = check_code.group(1)
return os_version


class Redhat(Fedora):
Expand Down Expand Up @@ -261,8 +347,13 @@ def _initialize_package_installation(self) -> None:
timeout=3600,
)

def _install_packages(self, packages: Union[List[str]]) -> None:
self._node.execute(f"yum install -y {' '.join(packages)}", sudo=True)
def _install_packages(
self, packages: Union[List[str]], signed: bool = True
) -> ExecutableResult:
command = f"yum install -y {' '.join(packages)}"
if not signed:
command.__add__(" --nogpgcheck")
return self._node.execute(command, sudo=True)


class CentOs(Redhat):
Expand All @@ -289,9 +380,17 @@ def name_pattern(cls) -> Pattern[str]:
def _initialize_package_installation(self) -> None:
self._node.execute("zypper --non-interactive --gpg-auto-import-keys update")

def _install_packages(self, packages: Union[List[str]]) -> None:
command = f"zypper --non-interactive in {' '.join(packages)}"
self._node.execute(command, sudo=True)
def _install_packages(
self, packages: Union[List[str]], signed: bool = True
) -> ExecutableResult:
command = f"zypper --non-interactive in {' '.join(packages)}"
if not signed:
command.__add__(" --no-gpg-checks")
return self._node.execute(command, sudo=True)

def _get_os_version(self) -> OsVersion:
os_version = OsVersion("SUSE")
return os_version


class NixOS(Linux):
Expand Down
13 changes: 13 additions & 0 deletions lisa/sut_orchestrator/azure/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from lisa import features
from lisa.node import Node
from lisa.operating_system import CentOs, Redhat, Ubuntu
from lisa.util import SkippedException

from .common import get_compute_client, get_node_context, wait_operation

Expand Down Expand Up @@ -67,3 +69,14 @@ def _get_console_log(self, saved_path: Optional[Path]) -> bytes:
log_response = requests.get(diagnostic_data.serial_console_log_blob_uri)

return log_response.content


class Gpu(AzureFeatureMixin, features.Gpu):
def _initialize(self, *args: Any, **kwargs: Any) -> None:
super()._initialize(*args, **kwargs)
self._initialize_information(self._node)

def _is_supported(self) -> None:
supported_distro = (CentOs, Redhat, Ubuntu)
if not isinstance(self._node.os, supported_distro):
raise SkippedException(f"GPU is not supported with distro {self._node.os}")
4 changes: 4 additions & 0 deletions lisa/sut_orchestrator/azure/platform_.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,10 @@ def _resource_sku_to_capability(
[features.StartStop.name(), features.SerialConsole.name()]
)

# update features list if gpu feature is supported
if node_space.gpu_count:
node_space.features.update(features.Gpu.name())

return node_space

def _get_eligible_vm_sizes(
Expand Down
2 changes: 0 additions & 2 deletions lisa/tools/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@


class Make(Tool):
repo = "https://github.com/microsoft/ntttcp-for-linux"

@property
def command(self) -> str:
return "make"
Expand Down
Loading

0 comments on commit ecb43c3

Please sign in to comment.