From ec1937dc4c2928522505d30bba8881cb0a6d9f50 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 27 Mar 2023 20:02:47 +0200 Subject: [PATCH] extract: --skip-errors ignores corrupted chunks (w/ log message), see #840 Forward port of a change implemented by @enkore back in 2016: https://github.com/enkore/borg/commit/09b21b117c6d34032a9483dd82086a4fae532cb6 --- src/borg/archive.py | 57 ++++++++++++++++------ src/borg/archiver/extract_cmd.py | 21 ++++++-- src/borg/testsuite/archiver/extract_cmd.py | 22 +++++++++ 3 files changed, 82 insertions(+), 18 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index b048d5c160..4b1b9148d5 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -788,6 +788,7 @@ def extract_item( hlm=None, pi=None, continue_extraction=False, + skip_integrity_errors=False, ): """ Extract archive item. @@ -800,6 +801,8 @@ def extract_item( :param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes) :param continue_extraction: continue a previously interrupted extraction of same archive + :param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError + (ignored for dry_run and stdout) """ def same_item(item, st): @@ -849,7 +852,7 @@ def same_item(item, st): ) if has_damaged_chunks: raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.") - return + return True dest = self.cwd path = os.path.join(dest, item.path) @@ -857,7 +860,7 @@ def same_item(item, st): try: st = os.stat(path, follow_symlinks=False) if continue_extraction and same_item(item, st): - return # done! we already have fully extracted this file in a previous run. + return True # done! we already have fully extracted this file in a previous run. elif stat.S_ISDIR(st.st_mode): os.rmdir(path) else: @@ -878,20 +881,43 @@ def make_parent(path): make_parent(path) with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: - return + return True with backup_io("open"): fd = open(path, "wb") with fd: ids = [c.id for c in item.chunks] - for data in self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM): + chunk_index = -1 + chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM) + skipped_errors = False + while True: + try: + chunk_index += 1 + data = next(chunk_iterator) + except StopIteration: + break + except IntegrityError as err: + if not skip_integrity_errors: + raise + c = item.chunks[chunk_index] + size = c.size + logger.warning("%s: chunk %s: %s", remove_surrogates(item.path), bin_to_hex(c.id), err) + with backup_io("seek"): + fd.seek(size, 1) + skipped_errors = True + # restart chunk data generator + ids = [c.id for c in item.chunks[chunk_index + 1 :]] + chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM) + else: + with backup_io("write"): + size = len(data) + if sparse and zeros.startswith(data): + # all-zero chunk: create a hole in a sparse file + fd.seek(size, 1) + else: + fd.write(data) if pi: - pi.show(increase=len(data), info=[remove_surrogates(item.path)]) - with backup_io("write"): - if sparse and zeros.startswith(data): - # all-zero chunk: create a hole in a sparse file - fd.seek(len(data), 1) - else: - fd.write(data) + pi.show(increase=size, info=[remove_surrogates(item.path)]) + with backup_io("truncate_and_attrs"): pos = item_chunks_size = fd.tell() fd.truncate(pos) @@ -905,7 +931,7 @@ def make_parent(path): ) if has_damaged_chunks: raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.") - return + return not skipped_errors with backup_io: # No repository access beyond this point. if stat.S_ISDIR(mode): @@ -919,7 +945,7 @@ def make_parent(path): with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: # unusual, but possible: this is a hardlinked symlink. - return + return True target = item.target try: os.symlink(target, path) @@ -930,18 +956,19 @@ def make_parent(path): make_parent(path) with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: - return + return True os.mkfifo(path) self.restore_attrs(path, item) elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): make_parent(path) with self.extract_helper(item, path, hlm) as hardlink_set: if hardlink_set: - return + return True os.mknod(path, item.mode, item.rdev) self.restore_attrs(path, item) else: raise Exception("Unknown archive item type %r" % item.mode) + return True def restore_attrs(self, path, item, symlink=False, fd=None): """ diff --git a/src/borg/archiver/extract_cmd.py b/src/borg/archiver/extract_cmd.py index 452b9a9a56..f1d3e4e6dc 100644 --- a/src/borg/archiver/extract_cmd.py +++ b/src/borg/archiver/extract_cmd.py @@ -39,6 +39,7 @@ def do_extract(self, args, repository, manifest, archive): progress = args.progress output_list = args.output_list dry_run = args.dry_run + skip_errors = args.skip_errors stdout = args.stdout sparse = args.sparse strip_components = args.strip_components @@ -76,9 +77,16 @@ def do_extract(self, args, repository, manifest, archive): dirs.append(item) archive.extract_item(item, stdout=stdout, restore_attrs=False) else: - archive.extract_item( - item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction - ) + if not archive.extract_item( + item, + stdout=stdout, + sparse=sparse, + hlm=hlm, + pi=pi, + continue_extraction=continue_extraction, + skip_integrity_errors=skip_errors, + ): + self.exit_code = EXIT_WARNING except (BackupOSError, BackupError) as e: self.print_warning("%s: %s", remove_surrogates(orig_path), e) @@ -175,6 +183,13 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser): action="store_true", help="continue a previously interrupted extraction of same archive", ) + subparser.add_argument( + "--skip-errors", + dest="skip_errors", + action="store_true", + help="skip corrupted chunks with a log message (exit 1) instead of aborting " + "(no effect for --dry-run and --stdout)", + ) subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name") subparser.add_argument( "paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported" diff --git a/src/borg/testsuite/archiver/extract_cmd.py b/src/borg/testsuite/archiver/extract_cmd.py index 46d4d26dba..fef7ff5ed0 100644 --- a/src/borg/testsuite/archiver/extract_cmd.py +++ b/src/borg/testsuite/archiver/extract_cmd.py @@ -625,6 +625,28 @@ def test_overwrite(archivers, request): cmd(archiver, "extract", "test", exit_code=1) +def test_extract_skip_errors(archivers, request): + archiver = request.getfixturevalue(archivers) + create_regular_file(archiver.input_path, "file1", contents=b"a" * 280 + b"b" * 280) + cmd(archiver, "rcreate", "-e" "none") + cmd(archiver, "create", "--chunker-params", "7,9,8,128", "test", "input") + segment_files = sorted(os.listdir(os.path.join(archiver.repository_path, "data", "0")), reverse=True) + print( + ", ".join( + f"{fn}: {os.stat(os.path.join(archiver.repository_path, 'data', '0', fn)).st_size}b" for fn in segment_files + ) + ) + name = segment_files[3] # must be the segment file that has the file's chunks + with open(os.path.join(archiver.repository_path, "data", "0", name), "r+b") as fd: + fd.seek(100) + fd.write(b"XXXX") + with changedir("output"): + output = cmd(archiver, "extract", "--skip-errors", "test", exit_code=1) + assert "input/file1: chunk" in output + assert os.stat("input/file1").st_size == 560 + cmd(archiver, "check", exit_code=1) + + # derived from test_extract_xattrs_errors() @pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason="xattr not supported on this system, or this version of fakeroot") def test_do_not_fail_when_percent_is_in_xattr_name(archivers, request):