-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[shortfin] Add heuristics for adjusting file descriptor limits on Linux.
Without this, on very large system (i.e. 64 GPU / 192 Core) systems, it was not possible to open all devices without manual tweaks to file handle descriptor limits. The result were various forms of RESOURCE_EXHAUSTED errors. This may require more tweaking in the future, and for fully robust setups, production installations should explicitly configure high limits. However, these heuristics remove a significant barrier to entry and provide some feedback in terms of logs. Progress on #463
- Loading branch information
1 parent
2cbf768
commit cdeccaf
Showing
5 changed files
with
125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
// Copyright 2024 Advanced Micro Devices, Inc. | ||
// | ||
// Licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include "shortfin/support/sysconfig.h" | ||
|
||
#include "shortfin/support/logging.h" | ||
|
||
#ifdef __linux__ | ||
#include <sys/resource.h> | ||
#endif | ||
|
||
namespace shortfin::sysconfig { | ||
|
||
// ----------------------------------------------------------------------------- | ||
// File handle limits | ||
// ----------------------------------------------------------------------------- | ||
|
||
#ifdef __linux__ | ||
|
||
bool EnsureFileLimit(unsigned needed_limit) { | ||
struct rlimit limit; | ||
if (getrlimit(RLIMIT_NOFILE, &limit) != 0) { | ||
return {}; | ||
} | ||
|
||
if (limit.rlim_cur >= needed_limit) return true; | ||
unsigned requested_limit = needed_limit; | ||
if (limit.rlim_max >= needed_limit) { | ||
logging::debug( | ||
"Estimated number of open file handles ({}) < current limit ({}) but " | ||
"within max limit ({}): Increasing limit", | ||
needed_limit, limit.rlim_cur, limit.rlim_max); | ||
} else if (limit.rlim_max > limit.rlim_cur) { | ||
logging::warn( | ||
"Esimated number of open file handles ({}) < current ({}) and max ({}) " | ||
"limit: Increasing to max", | ||
needed_limit, limit.rlim_cur, limit.rlim_max); | ||
requested_limit = limit.rlim_max; | ||
} else { | ||
logging::warn("Esimated number of open file handles ({}) < max ({})", | ||
needed_limit, limit.rlim_max); | ||
return false; | ||
} | ||
|
||
limit.rlim_cur = requested_limit; | ||
if (setrlimit(RLIMIT_NOFILE, &limit) != 0) { | ||
logging::error("Could not set open file handle limit to {}", | ||
requested_limit); | ||
return false; | ||
} | ||
|
||
return limit.rlim_cur >= needed_limit; | ||
} | ||
|
||
#else | ||
// Fallback implementation. | ||
bool EnsureFileLimit(unsigned needed_limit) { return true; } | ||
#endif | ||
|
||
} // namespace shortfin::sysconfig |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// Copyright 2024 Advanced Micro Devices, Inc. | ||
// | ||
// Licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#ifndef SHORTFIN_SUPPORT_SYSCONFIG_H | ||
#define SHORTFIN_SUPPORT_SYSCONFIG_H | ||
|
||
#include <cstdint> | ||
#include <utility> | ||
|
||
namespace shortfin::sysconfig { | ||
|
||
// Attempts to ensure that the given number of file descriptors can be created. | ||
// If the system does not support such a thing (i.e. GetOpenFileLimit() returns | ||
// nothing), then nothing is done and true is returned. If the system does | ||
// support it and heuristics say this should be allowed, then true will return. | ||
// Otherwise, a warning will be logged and false returned. | ||
// This is a best effort attempt. | ||
bool EnsureFileLimit(unsigned needed_limit); | ||
|
||
} // namespace shortfin::sysconfig | ||
|
||
#endif // SHORTFIN_SUPPORT_SYSCONFIG_H |