From 07403e79c4d199cd9b9913f96698fc7f050d90de Mon Sep 17 00:00:00 2001 From: akartasov Date: Thu, 11 Dec 2025 17:31:53 +0700 Subject: [PATCH] fix: cleanup logical snapshots --- engine/internal/provision/mode_local_test.go | 2 +- engine/internal/provision/pool/manager.go | 2 +- .../provision/thinclones/lvm/lvmanager.go | 2 +- .../provision/thinclones/zfs/branching.go | 2 +- engine/internal/provision/thinclones/zfs/zfs.go | 16 ++++++++++++---- .../engine/postgres/snapshot/logical.go | 3 ++- .../engine/postgres/snapshot/physical.go | 3 ++- 7 files changed, 20 insertions(+), 10 deletions(-) diff --git a/engine/internal/provision/mode_local_test.go b/engine/internal/provision/mode_local_test.go index 9183ec2a..c610d18e 100644 --- a/engine/internal/provision/mode_local_test.go +++ b/engine/internal/provision/mode_local_test.go @@ -87,7 +87,7 @@ func (m mockFSManager) DestroySnapshot(_ string, _ thinclones.DestroyOptions) (e return nil } -func (m mockFSManager) CleanupSnapshots(_ int) ([]string, error) { +func (m mockFSManager) CleanupSnapshots(_ int, _ models.RetrievalMode) ([]string, error) { return nil, nil } diff --git a/engine/internal/provision/pool/manager.go b/engine/internal/provision/pool/manager.go index d07fa242..d7e018f3 100644 --- a/engine/internal/provision/pool/manager.go +++ b/engine/internal/provision/pool/manager.go @@ -47,7 +47,7 @@ type StateReporter interface { type Snapshotter interface { CreateSnapshot(poolSuffix, dataStateAt string) (snapshotName string, err error) DestroySnapshot(snapshotName string, options thinclones.DestroyOptions) (err error) - CleanupSnapshots(retentionLimit int) ([]string, error) + CleanupSnapshots(retentionLimit int, mode models.RetrievalMode) ([]string, error) SnapshotList() []resources.Snapshot RefreshSnapshotList() } diff --git a/engine/internal/provision/thinclones/lvm/lvmanager.go b/engine/internal/provision/thinclones/lvm/lvmanager.go index c7fc0d59..911cb61e 100644 --- a/engine/internal/provision/thinclones/lvm/lvmanager.go +++ b/engine/internal/provision/thinclones/lvm/lvmanager.go @@ -106,7 +106,7 @@ func (m *LVManager) DestroySnapshot(_ string, _ thinclones.DestroyOptions) error } // CleanupSnapshots is not supported in LVM mode. -func (m *LVManager) CleanupSnapshots(_ int) ([]string, error) { +func (m *LVManager) CleanupSnapshots(_ int, _ models.RetrievalMode) ([]string, error) { log.Msg("Cleanup snapshots is not supported in LVM mode. Skip the operation.") return nil, nil diff --git a/engine/internal/provision/thinclones/zfs/branching.go b/engine/internal/provision/thinclones/zfs/branching.go index ed810519..a452897f 100644 --- a/engine/internal/provision/thinclones/zfs/branching.go +++ b/engine/internal/provision/thinclones/zfs/branching.go @@ -77,7 +77,7 @@ func (m *Manager) InitBranching() error { continue } - if err := m.SetRelation(leader.ID, follower.ID); err != nil { + if err := m.SetRelation(follower.ID, leader.ID); err != nil { return fmt.Errorf("failed to set snapshot relations: %w", err) } diff --git a/engine/internal/provision/thinclones/zfs/zfs.go b/engine/internal/provision/thinclones/zfs/zfs.go index bc55fb68..4358d0a7 100644 --- a/engine/internal/provision/thinclones/zfs/zfs.go +++ b/engine/internal/provision/thinclones/zfs/zfs.go @@ -578,7 +578,7 @@ func (m *Manager) checkDependentClones(snapshotName string) (string, error) { } // CleanupSnapshots destroys old snapshots considering retention limit and related clones. -func (m *Manager) CleanupSnapshots(retentionLimit int) ([]string, error) { +func (m *Manager) CleanupSnapshots(retentionLimit int, mode models.RetrievalMode) ([]string, error) { clonesCmd := fmt.Sprintf("zfs list -S clones -o name,origin -H -r %s", m.config.Pool.Name) clonesOutput, err := m.runner.Run(clonesCmd) @@ -588,10 +588,16 @@ func (m *Manager) CleanupSnapshots(retentionLimit int) ([]string, error) { busySnapshots := m.getBusySnapshotList(clonesOutput) + modeFilter := "" + + if mode == models.Physical { + modeFilter = "| grep _pre$" + } + cleanupCmd := fmt.Sprintf( - "zfs list -t snapshot -H -o name -s %s -s creation -r %s | grep -v clone | grep _pre$ | head -n -%d %s"+ + "zfs list -t snapshot -H -o name -s %s -s creation -r %s | grep -v clone %s | head -n -%d %s"+ "| xargs -n1 --no-run-if-empty zfs destroy -R ", - dataStateAtLabel, m.config.Pool.Name, retentionLimit, excludeBusySnapshots(busySnapshots)) + dataStateAtLabel, m.config.Pool.Name, modeFilter, retentionLimit, excludeBusySnapshots(busySnapshots)) out, err := m.runner.Run(cleanupCmd) if err != nil { @@ -892,7 +898,9 @@ func (m *Manager) SnapshotList() []resources.Snapshot { // RefreshSnapshotList updates the list of snapshots. func (m *Manager) RefreshSnapshotList() { snapshots, err := m.getSnapshots() - if err != nil { + + var emptyPoolError *EmptyPoolError + if err != nil && !errors.As(err, &emptyPoolError) { log.Err("failed to refresh snapshot list: ", err) return } diff --git a/engine/internal/retrieval/engine/postgres/snapshot/logical.go b/engine/internal/retrieval/engine/postgres/snapshot/logical.go index 744be021..042d28f9 100644 --- a/engine/internal/retrieval/engine/postgres/snapshot/logical.go +++ b/engine/internal/retrieval/engine/postgres/snapshot/logical.go @@ -34,6 +34,7 @@ import ( "gitlab.com/postgres-ai/database-lab/v3/internal/telemetry" "gitlab.com/postgres-ai/database-lab/v3/pkg/config/global" "gitlab.com/postgres-ai/database-lab/v3/pkg/log" + "gitlab.com/postgres-ai/database-lab/v3/pkg/models" "gitlab.com/postgres-ai/database-lab/v3/pkg/util" ) @@ -151,7 +152,7 @@ func (s *LogicalInitial) Run(ctx context.Context) error { log.Dbg("Cleaning up old snapshots from a dataset") - if _, err := s.cloneManager.CleanupSnapshots(0); err != nil { + if _, err := s.cloneManager.CleanupSnapshots(0, models.Logical); err != nil { return errors.Wrap(err, "failed to destroy old snapshots") } diff --git a/engine/internal/retrieval/engine/postgres/snapshot/physical.go b/engine/internal/retrieval/engine/postgres/snapshot/physical.go index 050bff9a..2d95e699 100644 --- a/engine/internal/retrieval/engine/postgres/snapshot/physical.go +++ b/engine/internal/retrieval/engine/postgres/snapshot/physical.go @@ -46,6 +46,7 @@ import ( "gitlab.com/postgres-ai/database-lab/v3/internal/telemetry" "gitlab.com/postgres-ai/database-lab/v3/pkg/config/global" "gitlab.com/postgres-ai/database-lab/v3/pkg/log" + "gitlab.com/postgres-ai/database-lab/v3/pkg/models" "gitlab.com/postgres-ai/database-lab/v3/pkg/util" "gitlab.com/postgres-ai/database-lab/v3/pkg/util/branching" ) @@ -1126,7 +1127,7 @@ func (p *PhysicalInitial) cleanupSnapshots(retentionLimit int) error { default: } - _, err := p.cloneManager.CleanupSnapshots(retentionLimit) + _, err := p.cloneManager.CleanupSnapshots(retentionLimit, models.Physical) if err != nil { return errors.Wrap(err, "failed to clean up snapshots") }