Compare commits

..

4 Commits

Author SHA1 Message Date
Frederik Ring
c3daeacecb Improve Swarm support (#333)
* Query for labeled services as well

* Try scaling down services

* Scale services back up

* Use progress tool from Docker CLI

* In test, label both services

* Clean up error and log messages

* Document scale-up/down approach in docs

* Downgrade Docker CLI to match client

* Document services stats

* Do not rely on PreviousSpec for storing desired replica count

* Log warnings from Docker when updating services

* Check whether container and service labels collide

* Document script behavior on label collision

* Add additional check if all containers have been removed

* Scale services concurrently

* Move docker interaction code into own file

* Factor out code for service updating

* Time out after five minutes of not reaching desired container count

* Inline handling of in-swarm container level restart

* Timer is more suitable for timeout race

* Timeout when scaling down services should be configurable

* Choose better filename

* Reflect changes in naming

* Rename and deprecate BACKUP_STOP_CONTAINER_LABEL

* Improve logging

* Further simplify logging
2024-01-31 12:17:41 +01:00
dependabot[bot]
2065fb2815 Bump github.com/klauspost/compress from 1.17.4 to 1.17.5 (#336) 2024-01-30 05:45:11 +00:00
Frederik Ring
97e5aa42cc Checkout action v3 uses deprecated Node version (#335) 2024-01-26 20:56:05 +01:00
Frederik Ring
ed5abd5ba8 Panic handling does not log reason for script being halted (#334) 2024-01-26 20:02:09 +01:00
16 changed files with 422 additions and 327 deletions

View File

@@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:

View File

@@ -15,8 +15,8 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-go@v4
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.21'
cache: false

View File

@@ -13,7 +13,7 @@ jobs:
contents: read
steps:
- name: Check out the repo
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v2

View File

@@ -10,7 +10,7 @@ jobs:
test:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

View File

@@ -37,7 +37,9 @@ type Config struct {
BackupRetentionDays int32 `split_words:"true" default:"-1"`
BackupPruningLeeway time.Duration `split_words:"true" default:"1m"`
BackupPruningPrefix string `split_words:"true"`
BackupStopContainerLabel string `split_words:"true" default:"true"`
BackupStopContainerLabel string `split_words:"true"`
BackupStopDuringBackupLabel string `split_words:"true" default:"true"`
BackupStopServiceTimeout time.Duration `split_words:"true" default:"5m"`
BackupFromSnapshot bool `split_words:"true"`
BackupExcludeRegexp RegexpDecoder `split_words:"true"`
BackupSkipBackendsFromPrune []string `split_words:"true"`

View File

@@ -21,6 +21,9 @@ func main() {
defer func() {
if pArg := recover(); pArg != nil {
if err, ok := pArg.(error); ok {
s.logger.Error(
fmt.Sprintf("Executing the script encountered a panic: %v", err),
)
if hookErr := s.runHooks(err); hookErr != nil {
s.logger.Error(
fmt.Sprintf("An error occurred calling the registered hooks: %s", hookErr),
@@ -44,12 +47,12 @@ func main() {
}()
s.must(s.withLabeledCommands(lifecyclePhaseArchive, func() error {
restartContainers, err := s.stopContainersAndServices()
restartContainersAndServices, err := s.stopContainersAndServices()
// The mechanism for restarting containers is not using hooks as it
// should happen as soon as possible (i.e. before uploading backups or
// similar).
defer func() {
s.must(restartContainers())
s.must(restartContainersAndServices())
}()
if err != nil {
return err

View File

@@ -5,8 +5,6 @@ package main
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
@@ -30,11 +28,6 @@ import (
openpgp "github.com/ProtonMail/go-crypto/openpgp/v2"
"github.com/containrrr/shoutrrr"
"github.com/containrrr/shoutrrr/pkg/router"
"github.com/docker/cli/cli/command/service/progress"
"github.com/docker/docker/api/types"
ctr "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/client"
"github.com/leekchan/timeutil"
"github.com/offen/envconfig"
@@ -319,302 +312,6 @@ func newScript() (*script, error) {
return s, nil
}
type noopWriteCloser struct {
io.Writer
}
func (noopWriteCloser) Close() error {
return nil
}
type handledSwarmService struct {
serviceID string
initialReplicaCount uint64
}
// stopContainersAndServices stops all Docker containers that are marked as to being
// stopped during the backup and returns a function that can be called to
// restart everything that has been stopped.
func (s *script) stopContainersAndServices() (func() error, error) {
if s.cli == nil {
return noop, nil
}
dockerInfo, err := s.cli.Info(context.Background())
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error getting docker info: %w", err)
}
isDockerSwarm := dockerInfo.Swarm.LocalNodeState != "inactive"
discardWriter := &noopWriteCloser{io.Discard}
filterMatchLabel := fmt.Sprintf(
"docker-volume-backup.stop-during-backup=%s",
s.c.BackupStopContainerLabel,
)
allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers: %w", err)
}
containersToStop, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: filterMatchLabel,
}),
})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers to stop: %w", err)
}
var allServices []swarm.Service
var servicesToScaleDown []handledSwarmService
if isDockerSwarm {
allServices, err = s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services: %w", err)
}
matchingServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: filterMatchLabel,
}),
Status: true,
})
for _, s := range matchingServices {
servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
serviceID: s.ID,
initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
})
}
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services to scale down: %w", err)
}
}
if len(containersToStop) == 0 && len(servicesToScaleDown) == 0 {
return noop, nil
}
if isDockerSwarm {
for _, container := range containersToStop {
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok {
parentService, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, types.ServiceInspectOptions{})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for parent service with ID %s: %w", swarmServiceID, err)
}
for label := range parentService.Spec.Labels {
if label == "docker-volume-backup.stop-during-backup" {
return noop, fmt.Errorf(
"(*script).stopContainersAndServices: container %s is labeled to stop but has parent service %s which is also labeled, cannot continue",
container.Names[0],
parentService.Spec.Name,
)
}
}
}
}
}
s.logger.Info(
fmt.Sprintf(
"Stopping %d out of %d running container(s) and scaling down %d out of %d active service(s) as they were labeled %s.",
len(containersToStop),
len(allContainers),
len(servicesToScaleDown),
len(allServices),
filterMatchLabel,
),
)
var stoppedContainers []types.Container
var stopErrors []error
for _, container := range containersToStop {
if err := s.cli.ContainerStop(context.Background(), container.ID, ctr.StopOptions{}); err != nil {
stopErrors = append(stopErrors, err)
} else {
stoppedContainers = append(stoppedContainers, container)
}
}
var scaledDownServices []swarm.Service
var scaleDownErrors []error
if isDockerSwarm {
for _, svc := range servicesToScaleDown {
service, _, err := s.cli.ServiceInspectWithRaw(context.Background(), svc.serviceID, types.ServiceInspectOptions{})
if err != nil {
scaleDownErrors = append(
scaleDownErrors,
fmt.Errorf("(*script).stopContainersAndServices: error inspecting service %s: %w", svc.serviceID, err),
)
continue
}
var zero uint64 = 0
serviceMode := &service.Spec.Mode
switch {
case serviceMode.Replicated != nil:
serviceMode.Replicated.Replicas = &zero
default:
scaleDownErrors = append(
scaleDownErrors,
fmt.Errorf("(*script).stopContainersAndServices: labeled service %s has to be in replicated mode", service.Spec.Name),
)
continue
}
response, err := s.cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{})
if err != nil {
scaleDownErrors = append(scaleDownErrors, err)
continue
}
for _, warning := range response.Warnings {
s.logger.Warn(
fmt.Sprintf("The Docker API returned a warning when scaling down service %s: %s", service.Spec.Name, warning),
)
}
if err := progress.ServiceProgress(context.Background(), s.cli, service.ID, discardWriter); err != nil {
scaleDownErrors = append(scaleDownErrors, err)
} else {
scaledDownServices = append(scaledDownServices, service)
}
// progress.ServiceProgress returns too early, so we need to manually check
// whether all containers belonging to the service have actually been removed
for {
containers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: fmt.Sprintf("com.docker.swarm.service.id=%s", service.ID),
}),
})
if err != nil {
scaleDownErrors = append(scaleDownErrors, err)
break
}
if len(containers) == 0 {
break
}
time.Sleep(time.Second)
}
}
}
s.stats.Containers = ContainersStats{
All: uint(len(allContainers)),
ToStop: uint(len(containersToStop)),
Stopped: uint(len(stoppedContainers)),
StopErrors: uint(len(stopErrors)),
}
s.stats.Services = ServicesStats{
All: uint(len(allServices)),
ToScaleDown: uint(len(servicesToScaleDown)),
ScaledDown: uint(len(scaledDownServices)),
ScaleDownErrors: uint(len(scaleDownErrors)),
}
var initialErr error
allErrors := append(stopErrors, scaleDownErrors...)
if len(allErrors) != 0 {
initialErr = fmt.Errorf(
"(*script).stopContainersAndServices: %d error(s) stopping containers: %w",
len(allErrors),
errors.Join(allErrors...),
)
}
return func() error {
servicesRequiringForceUpdate := map[string]struct{}{}
var restartErrors []error
for _, container := range stoppedContainers {
if swarmServiceName, ok := container.Labels["com.docker.swarm.service.name"]; ok {
servicesRequiringForceUpdate[swarmServiceName] = struct{}{}
continue
}
if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil {
restartErrors = append(restartErrors, err)
}
}
if len(servicesRequiringForceUpdate) != 0 {
services, _ := s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
for serviceName := range servicesRequiringForceUpdate {
var serviceMatch swarm.Service
for _, service := range services {
if service.Spec.Name == serviceName {
serviceMatch = service
break
}
}
if serviceMatch.ID == "" {
restartErrors = append(
restartErrors,
fmt.Errorf("(*script).stopContainersAndServices: couldn't find service with name %s", serviceName),
)
continue
}
serviceMatch.Spec.TaskTemplate.ForceUpdate += 1
if _, err := s.cli.ServiceUpdate(
context.Background(), serviceMatch.ID,
serviceMatch.Version, serviceMatch.Spec, types.ServiceUpdateOptions{},
); err != nil {
restartErrors = append(restartErrors, err)
}
}
}
var scaleUpErrors []error
if isDockerSwarm {
for _, svc := range servicesToScaleDown {
service, _, err := s.cli.ServiceInspectWithRaw(context.Background(), svc.serviceID, types.ServiceInspectOptions{})
if err != nil {
scaleUpErrors = append(scaleUpErrors, err)
continue
}
service.Spec.Mode.Replicated.Replicas = &svc.initialReplicaCount
response, err := s.cli.ServiceUpdate(
context.Background(),
service.ID,
service.Version, service.Spec,
types.ServiceUpdateOptions{},
)
if err != nil {
scaleUpErrors = append(scaleUpErrors, err)
continue
}
for _, warning := range response.Warnings {
s.logger.Warn(
fmt.Sprintf("The Docker API returned a warning when scaling up service %s: %s", service.Spec.Name, warning),
)
}
if err := progress.ServiceProgress(context.Background(), s.cli, service.ID, discardWriter); err != nil {
scaleUpErrors = append(scaleUpErrors, err)
}
}
}
allErrors := append(restartErrors, scaleUpErrors...)
if len(allErrors) != 0 {
return fmt.Errorf(
"stopContainers: %d error(s) restarting containers and services: %w",
len(allErrors),
errors.Join(allErrors...),
)
}
s.logger.Info(
fmt.Sprintf(
"Restarted %d container(s) and %d service(s).",
len(stoppedContainers),
len(scaledDownServices),
),
)
return nil
}, initialErr
}
// createArchive creates a tar archive of the configured backup location and
// saves it to disk.
func (s *script) createArchive() error {
@@ -625,7 +322,7 @@ func (s *script) createArchive() error {
"Using BACKUP_FROM_SNAPSHOT has been deprecated and will be removed in the next major version.",
)
s.logger.Warn(
"Please use `archive-pre` and `archive-post` commands to prepare your backup sources. Refer to the README for an upgrade guide.",
"Please use `archive-pre` and `archive-post` commands to prepare your backup sources. Refer to the documentation for an upgrade guide.",
)
backupSources = filepath.Join("/tmp", s.c.BackupSources)
// copy before compressing guard against a situation where backup folder's content are still growing.

338
cmd/backup/stop_restart.go Normal file
View File

@@ -0,0 +1,338 @@
package main
import (
"context"
"errors"
"fmt"
"io"
"os"
"sync"
"time"
"github.com/docker/cli/cli/command/service/progress"
"github.com/docker/docker/api/types"
ctr "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/client"
)
func scaleService(cli *client.Client, serviceID string, replicas uint64) ([]string, error) {
service, _, err := cli.ServiceInspectWithRaw(context.Background(), serviceID, types.ServiceInspectOptions{})
if err != nil {
return nil, fmt.Errorf("scaleService: error inspecting service %s: %w", serviceID, err)
}
serviceMode := &service.Spec.Mode
switch {
case serviceMode.Replicated != nil:
serviceMode.Replicated.Replicas = &replicas
default:
return nil, fmt.Errorf("scaleService: service to be scaled %s has to be in replicated mode", service.Spec.Name)
}
response, err := cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{})
if err != nil {
return nil, fmt.Errorf("scaleService: error updating service: %w", err)
}
discardWriter := &noopWriteCloser{io.Discard}
if err := progress.ServiceProgress(context.Background(), cli, service.ID, discardWriter); err != nil {
return nil, err
}
return response.Warnings, nil
}
func awaitContainerCountForService(cli *client.Client, serviceID string, count int, timeoutAfter time.Duration) error {
poll := time.NewTicker(time.Second)
timeout := time.NewTimer(timeoutAfter)
defer timeout.Stop()
defer poll.Stop()
for {
select {
case <-timeout.C:
return fmt.Errorf(
"awaitContainerCount: timed out after waiting %s for service %s to reach desired container count of %d",
timeoutAfter,
serviceID,
count,
)
case <-poll.C:
containers, err := cli.ContainerList(context.Background(), types.ContainerListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: fmt.Sprintf("com.docker.swarm.service.id=%s", serviceID),
}),
})
if err != nil {
return fmt.Errorf("awaitContainerCount: error listing containers: %w", err)
}
if len(containers) == count {
return nil
}
}
}
}
// stopContainersAndServices stops all Docker containers that are marked as to being
// stopped during the backup and returns a function that can be called to
// restart everything that has been stopped.
func (s *script) stopContainersAndServices() (func() error, error) {
if s.cli == nil {
return noop, nil
}
dockerInfo, err := s.cli.Info(context.Background())
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error getting docker info: %w", err)
}
isDockerSwarm := dockerInfo.Swarm.LocalNodeState != "inactive"
labelValue := s.c.BackupStopDuringBackupLabel
if s.c.BackupStopContainerLabel != "" {
s.logger.Warn(
"Using BACKUP_STOP_CONTAINER_LABEL has been deprecated and will be removed in the next major version.",
)
s.logger.Warn(
"Please use BACKUP_STOP_DURING_BACKUP_LABEL instead. Refer to the docs for an upgrade guide.",
)
if _, ok := os.LookupEnv("BACKUP_STOP_DURING_BACKUP_LABEL"); ok {
return noop, errors.New("(*script).stopContainersAndServices: both BACKUP_STOP_DURING_BACKUP_LABEL and BACKUP_STOP_CONTAINER_LABEL have been set, cannot continue")
}
labelValue = s.c.BackupStopContainerLabel
}
filterMatchLabel := fmt.Sprintf(
"docker-volume-backup.stop-during-backup=%s",
labelValue,
)
allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers: %w", err)
}
containersToStop, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: filterMatchLabel,
}),
})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers to stop: %w", err)
}
var allServices []swarm.Service
var servicesToScaleDown []handledSwarmService
if isDockerSwarm {
allServices, err = s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services: %w", err)
}
matchingServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{
Filters: filters.NewArgs(filters.KeyValuePair{
Key: "label",
Value: filterMatchLabel,
}),
Status: true,
})
for _, s := range matchingServices {
servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
serviceID: s.ID,
initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
})
}
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services to scale down: %w", err)
}
}
if len(containersToStop) == 0 && len(servicesToScaleDown) == 0 {
return noop, nil
}
if isDockerSwarm {
for _, container := range containersToStop {
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok {
parentService, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, types.ServiceInspectOptions{})
if err != nil {
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for parent service with ID %s: %w", swarmServiceID, err)
}
for label := range parentService.Spec.Labels {
if label == "docker-volume-backup.stop-during-backup" {
return noop, fmt.Errorf(
"(*script).stopContainersAndServices: container %s is labeled to stop but has parent service %s which is also labeled, cannot continue",
container.Names[0],
parentService.Spec.Name,
)
}
}
}
}
}
s.logger.Info(
fmt.Sprintf(
"Stopping %d out of %d running container(s) as they were labeled %s.",
len(containersToStop),
len(allContainers),
filterMatchLabel,
),
)
if isDockerSwarm {
s.logger.Info(
fmt.Sprintf(
"Scaling down %d out of %d active service(s) as they were labeled %s.",
len(servicesToScaleDown),
len(allServices),
filterMatchLabel,
),
)
}
var stoppedContainers []types.Container
var stopErrors []error
for _, container := range containersToStop {
if err := s.cli.ContainerStop(context.Background(), container.ID, ctr.StopOptions{}); err != nil {
stopErrors = append(stopErrors, err)
} else {
stoppedContainers = append(stoppedContainers, container)
}
}
var scaledDownServices []handledSwarmService
var scaleDownErrors concurrentSlice[error]
if isDockerSwarm {
wg := sync.WaitGroup{}
for _, svc := range servicesToScaleDown {
wg.Add(1)
go func(svc handledSwarmService) {
defer wg.Done()
warnings, err := scaleService(s.cli, svc.serviceID, 0)
if err != nil {
scaleDownErrors.append(err)
} else {
scaledDownServices = append(scaledDownServices, svc)
}
for _, warning := range warnings {
s.logger.Warn(
fmt.Sprintf("The Docker API returned a warning when scaling down service %s: %s", svc.serviceID, warning),
)
}
// progress.ServiceProgress returns too early, so we need to manually check
// whether all containers belonging to the service have actually been removed
if err := awaitContainerCountForService(s.cli, svc.serviceID, 0, s.c.BackupStopServiceTimeout); err != nil {
scaleDownErrors.append(err)
}
}(svc)
}
wg.Wait()
}
s.stats.Containers = ContainersStats{
All: uint(len(allContainers)),
ToStop: uint(len(containersToStop)),
Stopped: uint(len(stoppedContainers)),
StopErrors: uint(len(stopErrors)),
}
s.stats.Services = ServicesStats{
All: uint(len(allServices)),
ToScaleDown: uint(len(servicesToScaleDown)),
ScaledDown: uint(len(scaledDownServices)),
ScaleDownErrors: uint(len(scaleDownErrors.value())),
}
var initialErr error
allErrors := append(stopErrors, scaleDownErrors.value()...)
if len(allErrors) != 0 {
initialErr = fmt.Errorf(
"(*script).stopContainersAndServices: %d error(s) stopping containers: %w",
len(allErrors),
errors.Join(allErrors...),
)
}
return func() error {
var restartErrors []error
matchedServices := map[string]bool{}
for _, container := range stoppedContainers {
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok && isDockerSwarm {
if _, ok := matchedServices[swarmServiceID]; ok {
continue
}
matchedServices[swarmServiceID] = true
// in case a container was part of a swarm service, the service requires to
// be force updated instead of restarting the container as it would otherwise
// remain in a "completed" state
service, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, types.ServiceInspectOptions{})
if err != nil {
restartErrors = append(
restartErrors,
fmt.Errorf("(*script).stopContainersAndServices: error looking up parent service: %w", err),
)
continue
}
service.Spec.TaskTemplate.ForceUpdate += 1
if _, err := s.cli.ServiceUpdate(
context.Background(), service.ID,
service.Version, service.Spec, types.ServiceUpdateOptions{},
); err != nil {
restartErrors = append(restartErrors, err)
}
continue
}
if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil {
restartErrors = append(restartErrors, err)
}
}
var scaleUpErrors concurrentSlice[error]
if isDockerSwarm {
wg := &sync.WaitGroup{}
for _, svc := range servicesToScaleDown {
wg.Add(1)
go func(svc handledSwarmService) {
defer wg.Done()
warnings, err := scaleService(s.cli, svc.serviceID, svc.initialReplicaCount)
if err != nil {
scaleDownErrors.append(err)
return
}
for _, warning := range warnings {
s.logger.Warn(
fmt.Sprintf("The Docker API returned a warning when scaling up service %s: %s", svc.serviceID, warning),
)
}
}(svc)
}
wg.Wait()
}
allErrors := append(restartErrors, scaleUpErrors.value()...)
if len(allErrors) != 0 {
return fmt.Errorf(
"(*script).stopContainersAndServices: %d error(s) restarting containers and services: %w",
len(allErrors),
errors.Join(allErrors...),
)
}
s.logger.Info(
fmt.Sprintf(
"Restarted %d container(s).",
len(stoppedContainers),
),
)
if isDockerSwarm {
s.logger.Info(
fmt.Sprintf(
"Scaled %d service(s) back up.",
len(scaledDownServices),
),
)
}
return nil
}, initialErr
}

View File

@@ -8,6 +8,7 @@ import (
"fmt"
"io"
"os"
"sync"
)
var noop = func() error { return nil }
@@ -50,3 +51,31 @@ func (b *bufferingWriter) Write(p []byte) (n int, err error) {
}
return b.writer.Write(p)
}
type noopWriteCloser struct {
io.Writer
}
func (noopWriteCloser) Close() error {
return nil
}
type handledSwarmService struct {
serviceID string
initialReplicaCount uint64
}
type concurrentSlice[T any] struct {
val []T
sync.Mutex
}
func (c *concurrentSlice[T]) append(v T) {
c.Lock()
defer c.Unlock()
c.val = append(c.val, v)
}
func (c *concurrentSlice[T]) value() []T {
return c.val
}

View File

@@ -0,0 +1,19 @@
---
title: Replace deprecated BACKUP_STOP_CONTAINER_LABEL setting
layout: default
parent: How Tos
nav_order: 19
---
# Replace deprecated `BACKUP_STOP_CONTAINER_LABEL` setting
Version `v2.36.0` deprecated the `BACKUP_STOP_CONTAINER_LABEL` setting and renamed it `BACKUP_STOP_DURING_BACKUP_LABEL` which is supposed to signal that this will stop both containers _and_ services.
Migrating is done by renaming the key for your custom value:
```diff
env:
- BACKUP_STOP_CONTAINER_LABEL: database
+ BACKUP_STOP_DURING_BACKUP_LABEL: database
```
The old key will stay supported until the next major version, but logs a warning each time a backup is taken.

View File

@@ -76,7 +76,7 @@ Configuration, data about the backup run and helper functions will be passed to
Here is a list of all data passed to the template:
* `Config`: this object holds the configuration that has been passed to the script. The field names are the name of the recognized environment variables converted in PascalCase. (e.g. `BACKUP_STOP_CONTAINER_LABEL` becomes `BackupStopContainerLabel`)
* `Config`: this object holds the configuration that has been passed to the script. The field names are the name of the recognized environment variables converted in PascalCase. (e.g. `BACKUP_STOP_DURING_BACKUP_LABEL` becomes `BackupStopDuringBackupLabel`)
* `Error`: the error that made the backup fail. Only available in the `title_failure` and `body_failure` templates
* `Stats`: objects that holds stats regarding script execution. In case of an unsuccessful run, some information may not be available.
* `StartTime`: time when the script started execution

View File

@@ -14,7 +14,7 @@ In many cases, it will be desirable to stop the services that are consuming the
This image can automatically stop and restart containers and services.
By default, any container that is labeled `docker-volume-backup.stop-during-backup=true` will be stopped before the backup is being taken and restarted once it has finished.
In case you need more fine grained control about which containers should be stopped (e.g. when backing up multiple volumes on different schedules), you can set the `BACKUP_STOP_CONTAINER_LABEL` environment variable and then use the same value for labeling:
In case you need more fine grained control about which containers should be stopped (e.g. when backing up multiple volumes on different schedules), you can set the `BACKUP_STOP_DURING_BACKUP_LABEL` environment variable and then use the same value for labeling:
```yml
version: '3'
@@ -28,7 +28,7 @@ services:
backup:
image: offen/docker-volume-backup:v2
environment:
BACKUP_STOP_CONTAINER_LABEL: service1
BACKUP_STOP_DURING_BACKUP_LABEL: service1
volumes:
- data:/backup/my-app-backup:ro
- /var/run/docker.sock:/var/run/docker.sock:ro

View File

@@ -352,7 +352,7 @@ services:
AWS_ACCESS_KEY_ID: AKIAIOSFODNN7EXAMPLE
AWS_SECRET_ACCESS_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
# Label the container using the `data_1` volume as `docker-volume-backup.stop-during-backup=service1`
BACKUP_STOP_CONTAINER_LABEL: service1
BACKUP_STOP_DURING_BACKUP_LABEL: service1
volumes:
- data_1:/backup/data-1-backup:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
@@ -362,7 +362,7 @@ services:
<<: *backup_environment
# Label the container using the `data_2` volume as `docker-volume-backup.stop-during-backup=service2`
BACKUP_CRON_EXPRESSION: "0 3 * * *"
BACKUP_STOP_CONTAINER_LABEL: service2
BACKUP_STOP_DURING_BACKUP_LABEL: service2
volumes:
- data_2:/backup/data-2-backup:ro
- /var/run/docker.sock:/var/run/docker.sock:ro

View File

@@ -316,15 +316,22 @@ You can populate below template according to your requirements and use it as you
# GPG_PASSPHRASE="<xxx>"
########### STOPPING CONTAINERS DURING BACKUP
########### STOPPING CONTAINERS AND SERVICES DURING BACKUP
# Containers can be stopped by applying a
# `docker-volume-backup.stop-during-backup` label. By default, all containers
# that are labeled with `true` will be stopped. If you need more fine grained
# control (e.g. when running multiple containers based on this image), you can
# override this default by specifying a different value here.
# Containers or services can be stopped by applying a
# `docker-volume-backup.stop-during-backup` label. By default, all containers and
# services that are labeled with `true` will be stopped. If you need more fine
# grained control (e.g. when running multiple containers based on this image),
# you can override this default by specifying a different value here.
# BACKUP_STOP_DURING_BACKUP_LABEL="service1"
# BACKUP_STOP_CONTAINER_LABEL="service1"
# When trying to scale down Docker Swarm services, give up after
# the specified amount of time in case the service has not converged yet.
# In case you need to adjust this timeout, supply a duration
# value as per https://pkg.go.dev/time#ParseDuration to `BACKUP_STOP_SERVICE_TIMEOUT`.
# Defaults to 5 minutes.
# BACKUP_STOP_SERVICE_TIMEOUT="5m"
########### EXECUTING COMMANDS IN CONTAINERS PRE/POST BACKUP

2
go.mod
View File

@@ -10,7 +10,7 @@ require (
github.com/docker/cli v24.0.1+incompatible
github.com/docker/docker v24.0.7+incompatible
github.com/gofrs/flock v0.8.1
github.com/klauspost/compress v1.17.4
github.com/klauspost/compress v1.17.5
github.com/leekchan/timeutil v0.0.0-20150802142658-28917288c48d
github.com/minio/minio-go/v7 v7.0.66
github.com/offen/envconfig v1.5.0

4
go.sum
View File

@@ -456,8 +456,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/klauspost/compress v1.17.5 h1:d4vBd+7CHydUqpFBgUEKkSdtSugf9YFmSkvUYPquI5E=
github.com/klauspost/compress v1.17.5/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc=
github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=