Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [log retention improvements pt. 1] introduce file layout interface #2534

Merged
merged 39 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
ee510cd
add time measurements in cli, engine, log file parsing
tedim52 Jul 31, 2024
f881126
add more granular measurements
tedim52 Jul 31, 2024
487e5d1
use buffered log channel
tedim52 Aug 1, 2024
f316abe
batch send log lines
tedim52 Aug 1, 2024
0614976
refactor to use log line sender
tedim52 Aug 9, 2024
75fd409
encapsulate buff channel inside log line sender
tedim52 Aug 9, 2024
42f7a30
refactor again and get tests to pass
tedim52 Aug 9, 2024
5fe30ff
flush logs and close channel when empty
tedim52 Aug 9, 2024
929f4b2
clean up
tedim52 Aug 9, 2024
fdd8bf3
undo build script change
tedim52 Aug 9, 2024
0883f39
Merge branch 'main' into tedi/logspeedup
tedim52 Aug 9, 2024
dc9d1d1
name mutex
tedim52 Aug 9, 2024
d2b9f86
lint
tedim52 Aug 9, 2024
813c98b
increase seconds to wait for logs
tedim52 Aug 9, 2024
6349c61
rename send logl ine
tedim52 Aug 10, 2024
9510e22
move log line before function
tedim52 Aug 10, 2024
3b73af1
flush before follow
tedim52 Aug 10, 2024
470c61f
clear buffers after flushing
tedim52 Aug 10, 2024
2c489e2
revert times
tedim52 Aug 10, 2024
2c1f0bf
lint
tedim52 Aug 10, 2024
815eded
turn off cypress tests
tedim52 Aug 10, 2024
ccd49c3
remove k cloud ref
tedim52 Aug 10, 2024
684cbd9
use latest docs checker
tedim52 Aug 10, 2024
fff5ff0
use latest docs checker again
tedim52 Aug 10, 2024
d00bb2b
create file layout interface
tedim52 Aug 13, 2024
8734578
reimplement per week using file layout
tedim52 Aug 13, 2024
3febce2
progress on file layout
tedim52 Aug 13, 2024
7d3dd38
add get log filepath, migrate tests to use get log filepath, add some…
tedim52 Aug 14, 2024
11449c1
Merge branch 'main' into tedi/granularetention
tedim52 Aug 14, 2024
aacffb6
remove screenshots
tedim52 Aug 14, 2024
2b73907
remove per hour for now
tedim52 Aug 15, 2024
8daf34d
get test to pass
tedim52 Aug 15, 2024
0b75005
impl per week get log files beyond retention period
tedim52 Aug 15, 2024
b191925
refactor log file manager to use file layout
tedim52 Aug 15, 2024
c6bd4f7
use file layout for removing logs beyond retention period
tedim52 Aug 15, 2024
bf45ed2
remove getFilepathStr function
tedim52 Aug 15, 2024
f589e52
move log file manager inside logs db client
tedim52 Aug 15, 2024
7890081
lint
tedim52 Aug 15, 2024
62f08c3
lint
tedim52 Aug 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,20 @@ func (client *kurtosisBackendLogsDatabaseClient) FilterExistingServiceUuids(
return filteredServiceUuidsSet, nil
}

func (client *kurtosisBackendLogsDatabaseClient) StartLogFileManagement(ctx context.Context) {
// no log file management needs to be done for this logs db client
}

func (client *kurtosisBackendLogsDatabaseClient) RemoveEnclaveLogs(enclaveUuid string) error {
// no log file management needs to be done for this logs db client
return nil
}

func (client *kurtosisBackendLogsDatabaseClient) RemoveAllLogs() error {
// no log file management needs to be done for this logs db client
return nil
}

// ====================================================================================================
//
// Private helper functions
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package file_layout

import (
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/volume_filesystem"
"time"
)

type LogFileLayout interface {
// GetLogFileLayoutFormat returns a string representation the "format" that files are laid out in
// Formats are composed:
// - "/" - representing a nested directory
// - "<enclaveUuid>" - representing where an enclave uuid is inserted
// - "<serviceUuid>" - representing where a service uuid is inserted
// - time formats specified by strftime https://cplusplus.com/reference/ctime/strftime/
// - any other ascii text
GetLogFileLayoutFormat() string
tedim52 marked this conversation as resolved.
Show resolved Hide resolved

// GetLogFilePath gets the log file path for [serviceUuid] in [enclaveUuid] at [time]
GetLogFilePath(time time.Time, enclaveUuid, serviceUuid string) string

// GetLogFilePaths retrieves a list of filepaths [filesystem] for [serviceUuid] in [enclaveUuid]
// If [retentionPeriodIntervals] is set to -1, retrieves all filepaths from the currentTime till [retentionPeriod] in order
// If [retentionPeriodIntervals] is positive, retrieves all filepaths within the range [currentTime - retentionPeriod] and [currentTime - (retentionPeriodIntervals) * retentionPeriod]
// Returned filepaths sorted from most recent to least recent
GetLogFilePaths(filesystem volume_filesystem.VolumeFilesystem, retentionPeriod time.Duration, retentionPeriodIntervals int, enclaveUuid, serviceUuid string) ([]string, error)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package file_layout

import (
"fmt"
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/logs_clock"
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/volume_consts"
"github.com/kurtosis-tech/kurtosis/engine/server/engine/centralized_logs/client_implementations/persistent_volume/volume_filesystem"
"golang.org/x/exp/slices"
"math"
"os"
"strconv"
"time"
)

const (
oneWeekInHours = 7 * 24
oneWeekDuration = oneWeekInHours * time.Hour

// basepath /year/week
PerWeekDirPathStr = "%s%s/%s/"

// ... enclave uuid/service uuid <filetype>
PerWeekFilePathFmtStr = PerWeekDirPathStr + "%s/%s%s"
)

type PerWeekFileLayout struct {
time logs_clock.LogsClock
}

func NewPerWeekFileLayout(time logs_clock.LogsClock) *PerWeekFileLayout {
return &PerWeekFileLayout{time: time}
}

func (pwf *PerWeekFileLayout) GetLogFileLayoutFormat() string {
// Right now this format is specifically made for Vector Logs Aggregators format
// This wil be used my Vector LogsAggregator to determine the path to output to
return "/var/log/kurtosis/%%Y/%%V/{{ enclave_uuid }}/{{ service_uuid }}.json"
}

func (pwf *PerWeekFileLayout) GetLogFilePath(time time.Time, enclaveUuid, serviceUuid string) string {
year, week := time.ISOWeek()
return getLogFilePath(year, week, enclaveUuid, serviceUuid)
}

func (pwf *PerWeekFileLayout) GetLogFilePaths(
filesystem volume_filesystem.VolumeFilesystem,
retentionPeriod time.Duration,
retentionPeriodIntervals int,
enclaveUuid, serviceUuid string) ([]string, error) {
var paths []string
retentionPeriodInWeeks := DurationToWeeks(retentionPeriod)

if retentionPeriodIntervals < 0 {
return pwf.getLogFilePathsFromNowTillRetentionPeriod(filesystem, retentionPeriodInWeeks, enclaveUuid, serviceUuid)
} else {
paths = pwf.getLogFilePathsBeyondRetentionPeriod(filesystem, retentionPeriodInWeeks, retentionPeriodIntervals, enclaveUuid, serviceUuid)
}

return paths, nil
}

func (pwf *PerWeekFileLayout) getLogFilePathsFromNowTillRetentionPeriod(fs volume_filesystem.VolumeFilesystem, retentionPeriodInWeeks int, enclaveUuid, serviceUuid string) ([]string, error) {
var paths []string
currentTime := pwf.time.Now()

// scan for first existing log file
firstWeekWithLogs := 0
for i := 0; i < retentionPeriodInWeeks; i++ {
year, week := currentTime.Add(time.Duration(-i) * oneWeekDuration).ISOWeek()
filePathStr := getLogFilePath(year, week, enclaveUuid, serviceUuid)
if _, err := fs.Stat(filePathStr); err == nil {
paths = append(paths, filePathStr)
firstWeekWithLogs = i
break
} else {
// return if error is not due to nonexistent file path
if !os.IsNotExist(err) {
return paths, err
}
}
}

// scan for remaining files as far back as they exist before the retention period
for i := firstWeekWithLogs + 1; i < retentionPeriodInWeeks; i++ {
year, week := currentTime.Add(time.Duration(-i) * oneWeekDuration).ISOWeek()
filePathStr := getLogFilePath(year, week, enclaveUuid, serviceUuid)
if _, err := fs.Stat(filePathStr); err != nil {
break
}
paths = append(paths, filePathStr)
}

// reverse for oldest to most recent
slices.Reverse(paths)

return paths, nil
}

func (pwf *PerWeekFileLayout) getLogFilePathsBeyondRetentionPeriod(fs volume_filesystem.VolumeFilesystem, retentionPeriodInWeeks int, retentionPeriodIntervals int, enclaveUuid, serviceUuid string) []string {
var paths []string
currentTime := pwf.time.Now()

// scan for log files just beyond the retention period
for i := 0; i < retentionPeriodIntervals; i++ {
numWeeksToGoBack := retentionPeriodInWeeks + i
year, weekToRemove := currentTime.Add(time.Duration(-numWeeksToGoBack) * oneWeekDuration).ISOWeek()
filePathStr := getLogFilePath(year, weekToRemove, enclaveUuid, serviceUuid)
if _, err := fs.Stat(filePathStr); err != nil {
continue
}
paths = append(paths, filePathStr)
}

return paths
}

func DurationToWeeks(d time.Duration) int {
return int(math.Round(d.Hours() / float64(oneWeekInHours)))
}

func getLogFilePath(year, week int, enclaveUuid, serviceUuid string) string {
formattedWeekNum := fmt.Sprintf("%02d", week)
return fmt.Sprintf(PerWeekFilePathFmtStr, volume_consts.LogsStorageDirpath, strconv.Itoa(year), formattedWeekNum, enclaveUuid, serviceUuid, volume_consts.Filetype)
}
Loading
Loading