Skip to content

Commit

Permalink
Also consider offloaded timelines for obtaining retain_lsn (neondatab…
Browse files Browse the repository at this point in the history
…ase#9308)

Also consider offloaded timelines for obtaining `retain_lsn`. This is
required for correctness for all timelines that have not been flattened
yet: otherwise we GC data that might still be required for reading.

This somewhat counteracts the original purpose of timeline offloading of
not having to iterate over offloaded timelines, but sadly it's required.
In the future, we can improve the way the offloaded timelines are
stored.

We also make the `retain_lsn` optional so that in the future, when we
implement flattening, we can make it None. This also applies to full
timeline objects by the way, where it would probably make most sense to
add a bool flag whether the timeline is successfully flattened, and if
it is, one can exclude it from `retain_lsn` as well.

Also, track whether a timeline was offloaded or not in `retain_lsn` so
that the `retain_lsn` can be excluded from visibility and size
calculation.

Part of neondatabase#8088
  • Loading branch information
arpad-m authored Oct 14, 2024
1 parent f4f7ea2 commit f54e3e9
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 23 deletions.
56 changes: 45 additions & 11 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,8 @@ pub struct OffloadedTimeline {
pub tenant_shard_id: TenantShardId,
pub timeline_id: TimelineId,
pub ancestor_timeline_id: Option<TimelineId>,
/// Whether to retain the branch lsn at the ancestor or not
pub ancestor_retain_lsn: Option<Lsn>,

// TODO: once we persist offloaded state, make this lazily constructed
pub remote_client: Arc<RemoteTimelineClient>,
Expand All @@ -504,17 +506,27 @@ pub struct OffloadedTimeline {

impl OffloadedTimeline {
fn from_timeline(timeline: &Timeline) -> Self {
let ancestor_retain_lsn = timeline
.get_ancestor_timeline_id()
.map(|_timeline_id| timeline.get_ancestor_lsn());
Self {
tenant_shard_id: timeline.tenant_shard_id,
timeline_id: timeline.timeline_id,
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
ancestor_retain_lsn,

remote_client: timeline.remote_client.clone(),
delete_progress: timeline.delete_progress.clone(),
}
}
}

#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum MaybeOffloaded {
Yes,
No,
}

#[derive(Clone)]
pub enum TimelineOrOffloaded {
Timeline(Arc<Timeline>),
Expand Down Expand Up @@ -2253,12 +2265,13 @@ impl Tenant {

if activating {
let timelines_accessor = self.timelines.lock().unwrap();
let timelines_offloaded_accessor = self.timelines_offloaded.lock().unwrap();
let timelines_to_activate = timelines_accessor
.values()
.filter(|timeline| !(timeline.is_broken() || timeline.is_stopping()));

// Before activation, populate each Timeline's GcInfo with information about its children
self.initialize_gc_info(&timelines_accessor);
self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor);

// Spawn gc and compaction loops. The loops will shut themselves
// down when they notice that the tenant is inactive.
Expand Down Expand Up @@ -3298,27 +3311,45 @@ impl Tenant {
fn initialize_gc_info(
&self,
timelines: &std::sync::MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
timelines_offloaded: &std::sync::MutexGuard<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
) {
// This function must be called before activation: after activation timeline create/delete operations
// might happen, and this function is not safe to run concurrently with those.
assert!(!self.is_active());

// Scan all timelines. For each timeline, remember the timeline ID and
// the branch point where it was created.
let mut all_branchpoints: BTreeMap<TimelineId, Vec<(Lsn, TimelineId)>> = BTreeMap::new();
let mut all_branchpoints: BTreeMap<TimelineId, Vec<(Lsn, TimelineId, MaybeOffloaded)>> =
BTreeMap::new();
timelines.iter().for_each(|(timeline_id, timeline_entry)| {
if let Some(ancestor_timeline_id) = &timeline_entry.get_ancestor_timeline_id() {
let ancestor_children = all_branchpoints.entry(*ancestor_timeline_id).or_default();
ancestor_children.push((timeline_entry.get_ancestor_lsn(), *timeline_id));
ancestor_children.push((
timeline_entry.get_ancestor_lsn(),
*timeline_id,
MaybeOffloaded::No,
));
}
});
timelines_offloaded
.iter()
.for_each(|(timeline_id, timeline_entry)| {
let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id else {
return;
};
let Some(retain_lsn) = timeline_entry.ancestor_retain_lsn else {
return;
};
let ancestor_children = all_branchpoints.entry(*ancestor_timeline_id).or_default();
ancestor_children.push((retain_lsn, *timeline_id, MaybeOffloaded::Yes));
});

// The number of bytes we always keep, irrespective of PITR: this is a constant across timelines
let horizon = self.get_gc_horizon();

// Populate each timeline's GcInfo with information about its child branches
for timeline in timelines.values() {
let mut branchpoints: Vec<(Lsn, TimelineId)> = all_branchpoints
let mut branchpoints: Vec<(Lsn, TimelineId, MaybeOffloaded)> = all_branchpoints
.remove(&timeline.timeline_id)
.unwrap_or_default();

Expand Down Expand Up @@ -4878,7 +4909,10 @@ mod tests {
{
let branchpoints = &tline.gc_info.read().unwrap().retain_lsns;
assert_eq!(branchpoints.len(), 1);
assert_eq!(branchpoints[0], (Lsn(0x40), NEW_TIMELINE_ID));
assert_eq!(
branchpoints[0],
(Lsn(0x40), NEW_TIMELINE_ID, MaybeOffloaded::No)
);
}

// You can read the key from the child branch even though the parent is
Expand Down Expand Up @@ -8261,8 +8295,8 @@ mod tests {
let mut guard = tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![
(Lsn(0x10), tline.timeline_id),
(Lsn(0x20), tline.timeline_id),
(Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),
(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
],
cutoffs: GcCutoffs {
time: Lsn(0x30),
Expand Down Expand Up @@ -8489,8 +8523,8 @@ mod tests {
let mut guard = tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![
(Lsn(0x10), tline.timeline_id),
(Lsn(0x20), tline.timeline_id),
(Lsn(0x10), tline.timeline_id, MaybeOffloaded::No),
(Lsn(0x20), tline.timeline_id, MaybeOffloaded::No),
],
cutoffs: GcCutoffs {
time: Lsn(0x30),
Expand Down Expand Up @@ -8723,7 +8757,7 @@ mod tests {
// Update GC info
let mut guard = parent_tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id)],
retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id, MaybeOffloaded::No)],
cutoffs: GcCutoffs {
time: Lsn(0x10),
space: Lsn(0x10),
Expand All @@ -8737,7 +8771,7 @@ mod tests {
// Update GC info
let mut guard = branch_tline.gc_info.write().unwrap();
*guard = GcInfo {
retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id)],
retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id, MaybeOffloaded::No)],
cutoffs: GcCutoffs {
time: Lsn(0x50),
space: Lsn(0x50),
Expand Down
8 changes: 5 additions & 3 deletions pageserver/src/tenant/size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::context::RequestContext;
use crate::pgdatadir_mapping::CalculateLogicalSizeError;

use super::{GcError, LogicalSizeCalculationCause, Tenant};
use crate::tenant::Timeline;
use crate::tenant::{MaybeOffloaded, Timeline};
use utils::id::TimelineId;
use utils::lsn::Lsn;

Expand Down Expand Up @@ -264,10 +264,12 @@ pub(super) async fn gather_inputs(
let mut lsns: Vec<(Lsn, LsnKind)> = gc_info
.retain_lsns
.iter()
.filter(|(lsn, _child_id)| lsn > &ancestor_lsn)
.filter(|(lsn, _child_id, is_offloaded)| {
lsn > &ancestor_lsn && *is_offloaded == MaybeOffloaded::No
})
.copied()
// this assumes there are no other retain_lsns than the branchpoints
.map(|(lsn, _child_id)| (lsn, LsnKind::BranchPoint))
.map(|(lsn, _child_id, _is_offloaded)| (lsn, LsnKind::BranchPoint))
.collect::<Vec<_>>();

lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));
Expand Down
21 changes: 15 additions & 6 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,10 @@ use self::logical_size::LogicalSize;
use self::walreceiver::{WalReceiver, WalReceiverConf};

use super::{
config::TenantConf, storage_layer::inmemory_layer, storage_layer::LayerVisibilityHint,
config::TenantConf,
storage_layer::{inmemory_layer, LayerVisibilityHint},
upload_queue::NotInitialized,
MaybeOffloaded,
};
use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
Expand Down Expand Up @@ -450,7 +452,7 @@ pub(crate) struct GcInfo {
/// Currently, this includes all points where child branches have
/// been forked off from. In the future, could also include
/// explicit user-defined snapshot points.
pub(crate) retain_lsns: Vec<(Lsn, TimelineId)>,
pub(crate) retain_lsns: Vec<(Lsn, TimelineId, MaybeOffloaded)>,

/// The cutoff coordinates, which are combined by selecting the minimum.
pub(crate) cutoffs: GcCutoffs,
Expand All @@ -467,8 +469,13 @@ impl GcInfo {
self.cutoffs.select_min()
}

pub(super) fn insert_child(&mut self, child_id: TimelineId, child_lsn: Lsn) {
self.retain_lsns.push((child_lsn, child_id));
pub(super) fn insert_child(
&mut self,
child_id: TimelineId,
child_lsn: Lsn,
is_offloaded: MaybeOffloaded,
) {
self.retain_lsns.push((child_lsn, child_id, is_offloaded));
self.retain_lsns.sort_by_key(|i| i.0);
}

Expand Down Expand Up @@ -2164,7 +2171,9 @@ impl Timeline {

if let Some(ancestor) = &ancestor {
let mut ancestor_gc_info = ancestor.gc_info.write().unwrap();
ancestor_gc_info.insert_child(timeline_id, metadata.ancestor_lsn());
// If we construct an explicit timeline object, it's obviously not offloaded
let is_offloaded = MaybeOffloaded::No;
ancestor_gc_info.insert_child(timeline_id, metadata.ancestor_lsn(), is_offloaded);
}

Arc::new_cyclic(|myself| {
Expand Down Expand Up @@ -4875,7 +4884,7 @@ impl Timeline {
let retain_lsns = gc_info
.retain_lsns
.iter()
.map(|(lsn, _child_id)| *lsn)
.map(|(lsn, _child_id, _is_offloaded)| *lsn)
.collect();

// Gets the maximum LSN that holds the valid lease.
Expand Down
9 changes: 6 additions & 3 deletions pageserver/src/tenant/timeline/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use crate::tenant::storage_layer::{
use crate::tenant::timeline::ImageLayerCreationOutcome;
use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter};
use crate::tenant::timeline::{Layer, ResidentLayer};
use crate::tenant::DeltaLayer;
use crate::tenant::{DeltaLayer, MaybeOffloaded};
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
use pageserver_api::config::tenant_conf_defaults::{
DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPACTION_THRESHOLD,
Expand Down Expand Up @@ -639,7 +639,10 @@ impl Timeline {
let children = self.gc_info.read().unwrap().retain_lsns.clone();

let mut readable_points = Vec::with_capacity(children.len() + 1);
for (child_lsn, _child_timeline_id) in &children {
for (child_lsn, _child_timeline_id, is_offloaded) in &children {
if *is_offloaded == MaybeOffloaded::Yes {
continue;
}
readable_points.push(*child_lsn);
}
readable_points.push(head_lsn);
Expand Down Expand Up @@ -1741,7 +1744,7 @@ impl Timeline {
let gc_info = self.gc_info.read().unwrap();
let mut retain_lsns_below_horizon = Vec::new();
let gc_cutoff = gc_info.cutoffs.select_min();
for (lsn, _timeline_id) in &gc_info.retain_lsns {
for (lsn, _timeline_id, _is_offloaded) in &gc_info.retain_lsns {
if lsn < &gc_cutoff {
retain_lsns_below_horizon.push(*lsn);
}
Expand Down

0 comments on commit f54e3e9

Please sign in to comment.