mirror of
https://github.com/offen/docker-volume-backup.git
synced 2025-12-05 17:18:02 +01:00
Compare commits
4 Commits
v2.36.0-al
...
v2.36.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c3daeacecb | ||
|
|
2065fb2815 | ||
|
|
97e5aa42cc | ||
|
|
ed5abd5ba8 |
2
.github/workflows/deploy-docs.yml
vendored
2
.github/workflows/deploy-docs.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Setup Ruby
|
||||
uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
|
||||
4
.github/workflows/golangci-lint.yml
vendored
4
.github/workflows/golangci-lint.yml
vendored
@@ -15,8 +15,8 @@ jobs:
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-go@v4
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21'
|
||||
cache: false
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Check out the repo
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
test:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
@@ -37,7 +37,9 @@ type Config struct {
|
||||
BackupRetentionDays int32 `split_words:"true" default:"-1"`
|
||||
BackupPruningLeeway time.Duration `split_words:"true" default:"1m"`
|
||||
BackupPruningPrefix string `split_words:"true"`
|
||||
BackupStopContainerLabel string `split_words:"true" default:"true"`
|
||||
BackupStopContainerLabel string `split_words:"true"`
|
||||
BackupStopDuringBackupLabel string `split_words:"true" default:"true"`
|
||||
BackupStopServiceTimeout time.Duration `split_words:"true" default:"5m"`
|
||||
BackupFromSnapshot bool `split_words:"true"`
|
||||
BackupExcludeRegexp RegexpDecoder `split_words:"true"`
|
||||
BackupSkipBackendsFromPrune []string `split_words:"true"`
|
||||
|
||||
@@ -21,6 +21,9 @@ func main() {
|
||||
defer func() {
|
||||
if pArg := recover(); pArg != nil {
|
||||
if err, ok := pArg.(error); ok {
|
||||
s.logger.Error(
|
||||
fmt.Sprintf("Executing the script encountered a panic: %v", err),
|
||||
)
|
||||
if hookErr := s.runHooks(err); hookErr != nil {
|
||||
s.logger.Error(
|
||||
fmt.Sprintf("An error occurred calling the registered hooks: %s", hookErr),
|
||||
@@ -44,12 +47,12 @@ func main() {
|
||||
}()
|
||||
|
||||
s.must(s.withLabeledCommands(lifecyclePhaseArchive, func() error {
|
||||
restartContainers, err := s.stopContainersAndServices()
|
||||
restartContainersAndServices, err := s.stopContainersAndServices()
|
||||
// The mechanism for restarting containers is not using hooks as it
|
||||
// should happen as soon as possible (i.e. before uploading backups or
|
||||
// similar).
|
||||
defer func() {
|
||||
s.must(restartContainers())
|
||||
s.must(restartContainersAndServices())
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -5,8 +5,6 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
@@ -30,11 +28,6 @@ import (
|
||||
openpgp "github.com/ProtonMail/go-crypto/openpgp/v2"
|
||||
"github.com/containrrr/shoutrrr"
|
||||
"github.com/containrrr/shoutrrr/pkg/router"
|
||||
"github.com/docker/cli/cli/command/service/progress"
|
||||
"github.com/docker/docker/api/types"
|
||||
ctr "github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/api/types/swarm"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/leekchan/timeutil"
|
||||
"github.com/offen/envconfig"
|
||||
@@ -319,302 +312,6 @@ func newScript() (*script, error) {
|
||||
return s, nil
|
||||
}
|
||||
|
||||
type noopWriteCloser struct {
|
||||
io.Writer
|
||||
}
|
||||
|
||||
func (noopWriteCloser) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type handledSwarmService struct {
|
||||
serviceID string
|
||||
initialReplicaCount uint64
|
||||
}
|
||||
|
||||
// stopContainersAndServices stops all Docker containers that are marked as to being
|
||||
// stopped during the backup and returns a function that can be called to
|
||||
// restart everything that has been stopped.
|
||||
func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
if s.cli == nil {
|
||||
return noop, nil
|
||||
}
|
||||
|
||||
dockerInfo, err := s.cli.Info(context.Background())
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error getting docker info: %w", err)
|
||||
}
|
||||
isDockerSwarm := dockerInfo.Swarm.LocalNodeState != "inactive"
|
||||
discardWriter := &noopWriteCloser{io.Discard}
|
||||
|
||||
filterMatchLabel := fmt.Sprintf(
|
||||
"docker-volume-backup.stop-during-backup=%s",
|
||||
s.c.BackupStopContainerLabel,
|
||||
)
|
||||
|
||||
allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers: %w", err)
|
||||
}
|
||||
containersToStop, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: filterMatchLabel,
|
||||
}),
|
||||
})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers to stop: %w", err)
|
||||
}
|
||||
|
||||
var allServices []swarm.Service
|
||||
var servicesToScaleDown []handledSwarmService
|
||||
if isDockerSwarm {
|
||||
allServices, err = s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services: %w", err)
|
||||
}
|
||||
matchingServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: filterMatchLabel,
|
||||
}),
|
||||
Status: true,
|
||||
})
|
||||
for _, s := range matchingServices {
|
||||
servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
|
||||
serviceID: s.ID,
|
||||
initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
|
||||
})
|
||||
}
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services to scale down: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(containersToStop) == 0 && len(servicesToScaleDown) == 0 {
|
||||
return noop, nil
|
||||
}
|
||||
|
||||
if isDockerSwarm {
|
||||
for _, container := range containersToStop {
|
||||
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok {
|
||||
parentService, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, types.ServiceInspectOptions{})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for parent service with ID %s: %w", swarmServiceID, err)
|
||||
}
|
||||
for label := range parentService.Spec.Labels {
|
||||
if label == "docker-volume-backup.stop-during-backup" {
|
||||
return noop, fmt.Errorf(
|
||||
"(*script).stopContainersAndServices: container %s is labeled to stop but has parent service %s which is also labeled, cannot continue",
|
||||
container.Names[0],
|
||||
parentService.Spec.Name,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Stopping %d out of %d running container(s) and scaling down %d out of %d active service(s) as they were labeled %s.",
|
||||
len(containersToStop),
|
||||
len(allContainers),
|
||||
len(servicesToScaleDown),
|
||||
len(allServices),
|
||||
filterMatchLabel,
|
||||
),
|
||||
)
|
||||
|
||||
var stoppedContainers []types.Container
|
||||
var stopErrors []error
|
||||
for _, container := range containersToStop {
|
||||
if err := s.cli.ContainerStop(context.Background(), container.ID, ctr.StopOptions{}); err != nil {
|
||||
stopErrors = append(stopErrors, err)
|
||||
} else {
|
||||
stoppedContainers = append(stoppedContainers, container)
|
||||
}
|
||||
}
|
||||
|
||||
var scaledDownServices []swarm.Service
|
||||
var scaleDownErrors []error
|
||||
if isDockerSwarm {
|
||||
for _, svc := range servicesToScaleDown {
|
||||
service, _, err := s.cli.ServiceInspectWithRaw(context.Background(), svc.serviceID, types.ServiceInspectOptions{})
|
||||
if err != nil {
|
||||
scaleDownErrors = append(
|
||||
scaleDownErrors,
|
||||
fmt.Errorf("(*script).stopContainersAndServices: error inspecting service %s: %w", svc.serviceID, err),
|
||||
)
|
||||
continue
|
||||
}
|
||||
var zero uint64 = 0
|
||||
serviceMode := &service.Spec.Mode
|
||||
switch {
|
||||
case serviceMode.Replicated != nil:
|
||||
serviceMode.Replicated.Replicas = &zero
|
||||
default:
|
||||
scaleDownErrors = append(
|
||||
scaleDownErrors,
|
||||
fmt.Errorf("(*script).stopContainersAndServices: labeled service %s has to be in replicated mode", service.Spec.Name),
|
||||
)
|
||||
continue
|
||||
}
|
||||
|
||||
response, err := s.cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{})
|
||||
if err != nil {
|
||||
scaleDownErrors = append(scaleDownErrors, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, warning := range response.Warnings {
|
||||
s.logger.Warn(
|
||||
fmt.Sprintf("The Docker API returned a warning when scaling down service %s: %s", service.Spec.Name, warning),
|
||||
)
|
||||
}
|
||||
|
||||
if err := progress.ServiceProgress(context.Background(), s.cli, service.ID, discardWriter); err != nil {
|
||||
scaleDownErrors = append(scaleDownErrors, err)
|
||||
} else {
|
||||
scaledDownServices = append(scaledDownServices, service)
|
||||
}
|
||||
|
||||
// progress.ServiceProgress returns too early, so we need to manually check
|
||||
// whether all containers belonging to the service have actually been removed
|
||||
for {
|
||||
containers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: fmt.Sprintf("com.docker.swarm.service.id=%s", service.ID),
|
||||
}),
|
||||
})
|
||||
if err != nil {
|
||||
scaleDownErrors = append(scaleDownErrors, err)
|
||||
break
|
||||
}
|
||||
if len(containers) == 0 {
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.stats.Containers = ContainersStats{
|
||||
All: uint(len(allContainers)),
|
||||
ToStop: uint(len(containersToStop)),
|
||||
Stopped: uint(len(stoppedContainers)),
|
||||
StopErrors: uint(len(stopErrors)),
|
||||
}
|
||||
|
||||
s.stats.Services = ServicesStats{
|
||||
All: uint(len(allServices)),
|
||||
ToScaleDown: uint(len(servicesToScaleDown)),
|
||||
ScaledDown: uint(len(scaledDownServices)),
|
||||
ScaleDownErrors: uint(len(scaleDownErrors)),
|
||||
}
|
||||
|
||||
var initialErr error
|
||||
allErrors := append(stopErrors, scaleDownErrors...)
|
||||
if len(allErrors) != 0 {
|
||||
initialErr = fmt.Errorf(
|
||||
"(*script).stopContainersAndServices: %d error(s) stopping containers: %w",
|
||||
len(allErrors),
|
||||
errors.Join(allErrors...),
|
||||
)
|
||||
}
|
||||
|
||||
return func() error {
|
||||
servicesRequiringForceUpdate := map[string]struct{}{}
|
||||
|
||||
var restartErrors []error
|
||||
for _, container := range stoppedContainers {
|
||||
if swarmServiceName, ok := container.Labels["com.docker.swarm.service.name"]; ok {
|
||||
servicesRequiringForceUpdate[swarmServiceName] = struct{}{}
|
||||
continue
|
||||
}
|
||||
if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil {
|
||||
restartErrors = append(restartErrors, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(servicesRequiringForceUpdate) != 0 {
|
||||
services, _ := s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
|
||||
for serviceName := range servicesRequiringForceUpdate {
|
||||
var serviceMatch swarm.Service
|
||||
for _, service := range services {
|
||||
if service.Spec.Name == serviceName {
|
||||
serviceMatch = service
|
||||
break
|
||||
}
|
||||
}
|
||||
if serviceMatch.ID == "" {
|
||||
restartErrors = append(
|
||||
restartErrors,
|
||||
fmt.Errorf("(*script).stopContainersAndServices: couldn't find service with name %s", serviceName),
|
||||
)
|
||||
continue
|
||||
}
|
||||
serviceMatch.Spec.TaskTemplate.ForceUpdate += 1
|
||||
if _, err := s.cli.ServiceUpdate(
|
||||
context.Background(), serviceMatch.ID,
|
||||
serviceMatch.Version, serviceMatch.Spec, types.ServiceUpdateOptions{},
|
||||
); err != nil {
|
||||
restartErrors = append(restartErrors, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var scaleUpErrors []error
|
||||
if isDockerSwarm {
|
||||
for _, svc := range servicesToScaleDown {
|
||||
service, _, err := s.cli.ServiceInspectWithRaw(context.Background(), svc.serviceID, types.ServiceInspectOptions{})
|
||||
if err != nil {
|
||||
scaleUpErrors = append(scaleUpErrors, err)
|
||||
continue
|
||||
}
|
||||
|
||||
service.Spec.Mode.Replicated.Replicas = &svc.initialReplicaCount
|
||||
response, err := s.cli.ServiceUpdate(
|
||||
context.Background(),
|
||||
service.ID,
|
||||
service.Version, service.Spec,
|
||||
types.ServiceUpdateOptions{},
|
||||
)
|
||||
if err != nil {
|
||||
scaleUpErrors = append(scaleUpErrors, err)
|
||||
continue
|
||||
}
|
||||
for _, warning := range response.Warnings {
|
||||
s.logger.Warn(
|
||||
fmt.Sprintf("The Docker API returned a warning when scaling up service %s: %s", service.Spec.Name, warning),
|
||||
)
|
||||
}
|
||||
if err := progress.ServiceProgress(context.Background(), s.cli, service.ID, discardWriter); err != nil {
|
||||
scaleUpErrors = append(scaleUpErrors, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allErrors := append(restartErrors, scaleUpErrors...)
|
||||
if len(allErrors) != 0 {
|
||||
return fmt.Errorf(
|
||||
"stopContainers: %d error(s) restarting containers and services: %w",
|
||||
len(allErrors),
|
||||
errors.Join(allErrors...),
|
||||
)
|
||||
}
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Restarted %d container(s) and %d service(s).",
|
||||
len(stoppedContainers),
|
||||
len(scaledDownServices),
|
||||
),
|
||||
)
|
||||
return nil
|
||||
}, initialErr
|
||||
}
|
||||
|
||||
// createArchive creates a tar archive of the configured backup location and
|
||||
// saves it to disk.
|
||||
func (s *script) createArchive() error {
|
||||
@@ -625,7 +322,7 @@ func (s *script) createArchive() error {
|
||||
"Using BACKUP_FROM_SNAPSHOT has been deprecated and will be removed in the next major version.",
|
||||
)
|
||||
s.logger.Warn(
|
||||
"Please use `archive-pre` and `archive-post` commands to prepare your backup sources. Refer to the README for an upgrade guide.",
|
||||
"Please use `archive-pre` and `archive-post` commands to prepare your backup sources. Refer to the documentation for an upgrade guide.",
|
||||
)
|
||||
backupSources = filepath.Join("/tmp", s.c.BackupSources)
|
||||
// copy before compressing guard against a situation where backup folder's content are still growing.
|
||||
|
||||
338
cmd/backup/stop_restart.go
Normal file
338
cmd/backup/stop_restart.go
Normal file
@@ -0,0 +1,338 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/docker/cli/cli/command/service/progress"
|
||||
"github.com/docker/docker/api/types"
|
||||
ctr "github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/api/types/swarm"
|
||||
"github.com/docker/docker/client"
|
||||
)
|
||||
|
||||
func scaleService(cli *client.Client, serviceID string, replicas uint64) ([]string, error) {
|
||||
service, _, err := cli.ServiceInspectWithRaw(context.Background(), serviceID, types.ServiceInspectOptions{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scaleService: error inspecting service %s: %w", serviceID, err)
|
||||
}
|
||||
serviceMode := &service.Spec.Mode
|
||||
switch {
|
||||
case serviceMode.Replicated != nil:
|
||||
serviceMode.Replicated.Replicas = &replicas
|
||||
default:
|
||||
return nil, fmt.Errorf("scaleService: service to be scaled %s has to be in replicated mode", service.Spec.Name)
|
||||
}
|
||||
|
||||
response, err := cli.ServiceUpdate(context.Background(), service.ID, service.Version, service.Spec, types.ServiceUpdateOptions{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scaleService: error updating service: %w", err)
|
||||
}
|
||||
|
||||
discardWriter := &noopWriteCloser{io.Discard}
|
||||
if err := progress.ServiceProgress(context.Background(), cli, service.ID, discardWriter); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return response.Warnings, nil
|
||||
}
|
||||
|
||||
func awaitContainerCountForService(cli *client.Client, serviceID string, count int, timeoutAfter time.Duration) error {
|
||||
poll := time.NewTicker(time.Second)
|
||||
timeout := time.NewTimer(timeoutAfter)
|
||||
defer timeout.Stop()
|
||||
defer poll.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-timeout.C:
|
||||
return fmt.Errorf(
|
||||
"awaitContainerCount: timed out after waiting %s for service %s to reach desired container count of %d",
|
||||
timeoutAfter,
|
||||
serviceID,
|
||||
count,
|
||||
)
|
||||
case <-poll.C:
|
||||
containers, err := cli.ContainerList(context.Background(), types.ContainerListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: fmt.Sprintf("com.docker.swarm.service.id=%s", serviceID),
|
||||
}),
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("awaitContainerCount: error listing containers: %w", err)
|
||||
}
|
||||
if len(containers) == count {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// stopContainersAndServices stops all Docker containers that are marked as to being
|
||||
// stopped during the backup and returns a function that can be called to
|
||||
// restart everything that has been stopped.
|
||||
func (s *script) stopContainersAndServices() (func() error, error) {
|
||||
if s.cli == nil {
|
||||
return noop, nil
|
||||
}
|
||||
|
||||
dockerInfo, err := s.cli.Info(context.Background())
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error getting docker info: %w", err)
|
||||
}
|
||||
isDockerSwarm := dockerInfo.Swarm.LocalNodeState != "inactive"
|
||||
|
||||
labelValue := s.c.BackupStopDuringBackupLabel
|
||||
if s.c.BackupStopContainerLabel != "" {
|
||||
s.logger.Warn(
|
||||
"Using BACKUP_STOP_CONTAINER_LABEL has been deprecated and will be removed in the next major version.",
|
||||
)
|
||||
s.logger.Warn(
|
||||
"Please use BACKUP_STOP_DURING_BACKUP_LABEL instead. Refer to the docs for an upgrade guide.",
|
||||
)
|
||||
if _, ok := os.LookupEnv("BACKUP_STOP_DURING_BACKUP_LABEL"); ok {
|
||||
return noop, errors.New("(*script).stopContainersAndServices: both BACKUP_STOP_DURING_BACKUP_LABEL and BACKUP_STOP_CONTAINER_LABEL have been set, cannot continue")
|
||||
}
|
||||
labelValue = s.c.BackupStopContainerLabel
|
||||
}
|
||||
|
||||
filterMatchLabel := fmt.Sprintf(
|
||||
"docker-volume-backup.stop-during-backup=%s",
|
||||
labelValue,
|
||||
)
|
||||
|
||||
allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers: %w", err)
|
||||
}
|
||||
containersToStop, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: filterMatchLabel,
|
||||
}),
|
||||
})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for containers to stop: %w", err)
|
||||
}
|
||||
|
||||
var allServices []swarm.Service
|
||||
var servicesToScaleDown []handledSwarmService
|
||||
if isDockerSwarm {
|
||||
allServices, err = s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services: %w", err)
|
||||
}
|
||||
matchingServices, err := s.cli.ServiceList(context.Background(), types.ServiceListOptions{
|
||||
Filters: filters.NewArgs(filters.KeyValuePair{
|
||||
Key: "label",
|
||||
Value: filterMatchLabel,
|
||||
}),
|
||||
Status: true,
|
||||
})
|
||||
for _, s := range matchingServices {
|
||||
servicesToScaleDown = append(servicesToScaleDown, handledSwarmService{
|
||||
serviceID: s.ID,
|
||||
initialReplicaCount: *s.Spec.Mode.Replicated.Replicas,
|
||||
})
|
||||
}
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for services to scale down: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(containersToStop) == 0 && len(servicesToScaleDown) == 0 {
|
||||
return noop, nil
|
||||
}
|
||||
|
||||
if isDockerSwarm {
|
||||
for _, container := range containersToStop {
|
||||
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok {
|
||||
parentService, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, types.ServiceInspectOptions{})
|
||||
if err != nil {
|
||||
return noop, fmt.Errorf("(*script).stopContainersAndServices: error querying for parent service with ID %s: %w", swarmServiceID, err)
|
||||
}
|
||||
for label := range parentService.Spec.Labels {
|
||||
if label == "docker-volume-backup.stop-during-backup" {
|
||||
return noop, fmt.Errorf(
|
||||
"(*script).stopContainersAndServices: container %s is labeled to stop but has parent service %s which is also labeled, cannot continue",
|
||||
container.Names[0],
|
||||
parentService.Spec.Name,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Stopping %d out of %d running container(s) as they were labeled %s.",
|
||||
len(containersToStop),
|
||||
len(allContainers),
|
||||
filterMatchLabel,
|
||||
),
|
||||
)
|
||||
if isDockerSwarm {
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Scaling down %d out of %d active service(s) as they were labeled %s.",
|
||||
len(servicesToScaleDown),
|
||||
len(allServices),
|
||||
filterMatchLabel,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
var stoppedContainers []types.Container
|
||||
var stopErrors []error
|
||||
for _, container := range containersToStop {
|
||||
if err := s.cli.ContainerStop(context.Background(), container.ID, ctr.StopOptions{}); err != nil {
|
||||
stopErrors = append(stopErrors, err)
|
||||
} else {
|
||||
stoppedContainers = append(stoppedContainers, container)
|
||||
}
|
||||
}
|
||||
|
||||
var scaledDownServices []handledSwarmService
|
||||
var scaleDownErrors concurrentSlice[error]
|
||||
if isDockerSwarm {
|
||||
wg := sync.WaitGroup{}
|
||||
for _, svc := range servicesToScaleDown {
|
||||
wg.Add(1)
|
||||
go func(svc handledSwarmService) {
|
||||
defer wg.Done()
|
||||
warnings, err := scaleService(s.cli, svc.serviceID, 0)
|
||||
if err != nil {
|
||||
scaleDownErrors.append(err)
|
||||
} else {
|
||||
scaledDownServices = append(scaledDownServices, svc)
|
||||
}
|
||||
for _, warning := range warnings {
|
||||
s.logger.Warn(
|
||||
fmt.Sprintf("The Docker API returned a warning when scaling down service %s: %s", svc.serviceID, warning),
|
||||
)
|
||||
}
|
||||
// progress.ServiceProgress returns too early, so we need to manually check
|
||||
// whether all containers belonging to the service have actually been removed
|
||||
if err := awaitContainerCountForService(s.cli, svc.serviceID, 0, s.c.BackupStopServiceTimeout); err != nil {
|
||||
scaleDownErrors.append(err)
|
||||
}
|
||||
}(svc)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
s.stats.Containers = ContainersStats{
|
||||
All: uint(len(allContainers)),
|
||||
ToStop: uint(len(containersToStop)),
|
||||
Stopped: uint(len(stoppedContainers)),
|
||||
StopErrors: uint(len(stopErrors)),
|
||||
}
|
||||
|
||||
s.stats.Services = ServicesStats{
|
||||
All: uint(len(allServices)),
|
||||
ToScaleDown: uint(len(servicesToScaleDown)),
|
||||
ScaledDown: uint(len(scaledDownServices)),
|
||||
ScaleDownErrors: uint(len(scaleDownErrors.value())),
|
||||
}
|
||||
|
||||
var initialErr error
|
||||
allErrors := append(stopErrors, scaleDownErrors.value()...)
|
||||
if len(allErrors) != 0 {
|
||||
initialErr = fmt.Errorf(
|
||||
"(*script).stopContainersAndServices: %d error(s) stopping containers: %w",
|
||||
len(allErrors),
|
||||
errors.Join(allErrors...),
|
||||
)
|
||||
}
|
||||
|
||||
return func() error {
|
||||
var restartErrors []error
|
||||
matchedServices := map[string]bool{}
|
||||
for _, container := range stoppedContainers {
|
||||
if swarmServiceID, ok := container.Labels["com.docker.swarm.service.id"]; ok && isDockerSwarm {
|
||||
if _, ok := matchedServices[swarmServiceID]; ok {
|
||||
continue
|
||||
}
|
||||
matchedServices[swarmServiceID] = true
|
||||
// in case a container was part of a swarm service, the service requires to
|
||||
// be force updated instead of restarting the container as it would otherwise
|
||||
// remain in a "completed" state
|
||||
service, _, err := s.cli.ServiceInspectWithRaw(context.Background(), swarmServiceID, types.ServiceInspectOptions{})
|
||||
if err != nil {
|
||||
restartErrors = append(
|
||||
restartErrors,
|
||||
fmt.Errorf("(*script).stopContainersAndServices: error looking up parent service: %w", err),
|
||||
)
|
||||
continue
|
||||
}
|
||||
service.Spec.TaskTemplate.ForceUpdate += 1
|
||||
if _, err := s.cli.ServiceUpdate(
|
||||
context.Background(), service.ID,
|
||||
service.Version, service.Spec, types.ServiceUpdateOptions{},
|
||||
); err != nil {
|
||||
restartErrors = append(restartErrors, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil {
|
||||
restartErrors = append(restartErrors, err)
|
||||
}
|
||||
}
|
||||
|
||||
var scaleUpErrors concurrentSlice[error]
|
||||
if isDockerSwarm {
|
||||
wg := &sync.WaitGroup{}
|
||||
for _, svc := range servicesToScaleDown {
|
||||
wg.Add(1)
|
||||
go func(svc handledSwarmService) {
|
||||
defer wg.Done()
|
||||
warnings, err := scaleService(s.cli, svc.serviceID, svc.initialReplicaCount)
|
||||
if err != nil {
|
||||
scaleDownErrors.append(err)
|
||||
return
|
||||
}
|
||||
for _, warning := range warnings {
|
||||
s.logger.Warn(
|
||||
fmt.Sprintf("The Docker API returned a warning when scaling up service %s: %s", svc.serviceID, warning),
|
||||
)
|
||||
}
|
||||
}(svc)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
allErrors := append(restartErrors, scaleUpErrors.value()...)
|
||||
if len(allErrors) != 0 {
|
||||
return fmt.Errorf(
|
||||
"(*script).stopContainersAndServices: %d error(s) restarting containers and services: %w",
|
||||
len(allErrors),
|
||||
errors.Join(allErrors...),
|
||||
)
|
||||
}
|
||||
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Restarted %d container(s).",
|
||||
len(stoppedContainers),
|
||||
),
|
||||
)
|
||||
if isDockerSwarm {
|
||||
s.logger.Info(
|
||||
fmt.Sprintf(
|
||||
"Scaled %d service(s) back up.",
|
||||
len(scaledDownServices),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, initialErr
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var noop = func() error { return nil }
|
||||
@@ -50,3 +51,31 @@ func (b *bufferingWriter) Write(p []byte) (n int, err error) {
|
||||
}
|
||||
return b.writer.Write(p)
|
||||
}
|
||||
|
||||
type noopWriteCloser struct {
|
||||
io.Writer
|
||||
}
|
||||
|
||||
func (noopWriteCloser) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type handledSwarmService struct {
|
||||
serviceID string
|
||||
initialReplicaCount uint64
|
||||
}
|
||||
|
||||
type concurrentSlice[T any] struct {
|
||||
val []T
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func (c *concurrentSlice[T]) append(v T) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
c.val = append(c.val, v)
|
||||
}
|
||||
|
||||
func (c *concurrentSlice[T]) value() []T {
|
||||
return c.val
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
---
|
||||
title: Replace deprecated BACKUP_STOP_CONTAINER_LABEL setting
|
||||
layout: default
|
||||
parent: How Tos
|
||||
nav_order: 19
|
||||
---
|
||||
|
||||
# Replace deprecated `BACKUP_STOP_CONTAINER_LABEL` setting
|
||||
|
||||
Version `v2.36.0` deprecated the `BACKUP_STOP_CONTAINER_LABEL` setting and renamed it `BACKUP_STOP_DURING_BACKUP_LABEL` which is supposed to signal that this will stop both containers _and_ services.
|
||||
Migrating is done by renaming the key for your custom value:
|
||||
|
||||
```diff
|
||||
env:
|
||||
- BACKUP_STOP_CONTAINER_LABEL: database
|
||||
+ BACKUP_STOP_DURING_BACKUP_LABEL: database
|
||||
```
|
||||
|
||||
The old key will stay supported until the next major version, but logs a warning each time a backup is taken.
|
||||
@@ -76,7 +76,7 @@ Configuration, data about the backup run and helper functions will be passed to
|
||||
|
||||
Here is a list of all data passed to the template:
|
||||
|
||||
* `Config`: this object holds the configuration that has been passed to the script. The field names are the name of the recognized environment variables converted in PascalCase. (e.g. `BACKUP_STOP_CONTAINER_LABEL` becomes `BackupStopContainerLabel`)
|
||||
* `Config`: this object holds the configuration that has been passed to the script. The field names are the name of the recognized environment variables converted in PascalCase. (e.g. `BACKUP_STOP_DURING_BACKUP_LABEL` becomes `BackupStopDuringBackupLabel`)
|
||||
* `Error`: the error that made the backup fail. Only available in the `title_failure` and `body_failure` templates
|
||||
* `Stats`: objects that holds stats regarding script execution. In case of an unsuccessful run, some information may not be available.
|
||||
* `StartTime`: time when the script started execution
|
||||
|
||||
@@ -14,7 +14,7 @@ In many cases, it will be desirable to stop the services that are consuming the
|
||||
This image can automatically stop and restart containers and services.
|
||||
By default, any container that is labeled `docker-volume-backup.stop-during-backup=true` will be stopped before the backup is being taken and restarted once it has finished.
|
||||
|
||||
In case you need more fine grained control about which containers should be stopped (e.g. when backing up multiple volumes on different schedules), you can set the `BACKUP_STOP_CONTAINER_LABEL` environment variable and then use the same value for labeling:
|
||||
In case you need more fine grained control about which containers should be stopped (e.g. when backing up multiple volumes on different schedules), you can set the `BACKUP_STOP_DURING_BACKUP_LABEL` environment variable and then use the same value for labeling:
|
||||
|
||||
```yml
|
||||
version: '3'
|
||||
@@ -28,7 +28,7 @@ services:
|
||||
backup:
|
||||
image: offen/docker-volume-backup:v2
|
||||
environment:
|
||||
BACKUP_STOP_CONTAINER_LABEL: service1
|
||||
BACKUP_STOP_DURING_BACKUP_LABEL: service1
|
||||
volumes:
|
||||
- data:/backup/my-app-backup:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
|
||||
@@ -352,7 +352,7 @@ services:
|
||||
AWS_ACCESS_KEY_ID: AKIAIOSFODNN7EXAMPLE
|
||||
AWS_SECRET_ACCESS_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
# Label the container using the `data_1` volume as `docker-volume-backup.stop-during-backup=service1`
|
||||
BACKUP_STOP_CONTAINER_LABEL: service1
|
||||
BACKUP_STOP_DURING_BACKUP_LABEL: service1
|
||||
volumes:
|
||||
- data_1:/backup/data-1-backup:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
@@ -362,7 +362,7 @@ services:
|
||||
<<: *backup_environment
|
||||
# Label the container using the `data_2` volume as `docker-volume-backup.stop-during-backup=service2`
|
||||
BACKUP_CRON_EXPRESSION: "0 3 * * *"
|
||||
BACKUP_STOP_CONTAINER_LABEL: service2
|
||||
BACKUP_STOP_DURING_BACKUP_LABEL: service2
|
||||
volumes:
|
||||
- data_2:/backup/data-2-backup:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
|
||||
@@ -316,15 +316,22 @@ You can populate below template according to your requirements and use it as you
|
||||
|
||||
# GPG_PASSPHRASE="<xxx>"
|
||||
|
||||
########### STOPPING CONTAINERS DURING BACKUP
|
||||
########### STOPPING CONTAINERS AND SERVICES DURING BACKUP
|
||||
|
||||
# Containers can be stopped by applying a
|
||||
# `docker-volume-backup.stop-during-backup` label. By default, all containers
|
||||
# that are labeled with `true` will be stopped. If you need more fine grained
|
||||
# control (e.g. when running multiple containers based on this image), you can
|
||||
# override this default by specifying a different value here.
|
||||
# Containers or services can be stopped by applying a
|
||||
# `docker-volume-backup.stop-during-backup` label. By default, all containers and
|
||||
# services that are labeled with `true` will be stopped. If you need more fine
|
||||
# grained control (e.g. when running multiple containers based on this image),
|
||||
# you can override this default by specifying a different value here.
|
||||
# BACKUP_STOP_DURING_BACKUP_LABEL="service1"
|
||||
|
||||
# BACKUP_STOP_CONTAINER_LABEL="service1"
|
||||
# When trying to scale down Docker Swarm services, give up after
|
||||
# the specified amount of time in case the service has not converged yet.
|
||||
# In case you need to adjust this timeout, supply a duration
|
||||
# value as per https://pkg.go.dev/time#ParseDuration to `BACKUP_STOP_SERVICE_TIMEOUT`.
|
||||
# Defaults to 5 minutes.
|
||||
|
||||
# BACKUP_STOP_SERVICE_TIMEOUT="5m"
|
||||
|
||||
########### EXECUTING COMMANDS IN CONTAINERS PRE/POST BACKUP
|
||||
|
||||
|
||||
2
go.mod
2
go.mod
@@ -10,7 +10,7 @@ require (
|
||||
github.com/docker/cli v24.0.1+incompatible
|
||||
github.com/docker/docker v24.0.7+incompatible
|
||||
github.com/gofrs/flock v0.8.1
|
||||
github.com/klauspost/compress v1.17.4
|
||||
github.com/klauspost/compress v1.17.5
|
||||
github.com/leekchan/timeutil v0.0.0-20150802142658-28917288c48d
|
||||
github.com/minio/minio-go/v7 v7.0.66
|
||||
github.com/offen/envconfig v1.5.0
|
||||
|
||||
4
go.sum
4
go.sum
@@ -456,8 +456,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
|
||||
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
|
||||
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/klauspost/compress v1.17.5 h1:d4vBd+7CHydUqpFBgUEKkSdtSugf9YFmSkvUYPquI5E=
|
||||
github.com/klauspost/compress v1.17.5/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc=
|
||||
github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||
|
||||
Reference in New Issue
Block a user