Skip to content

Commit

Permalink
Refactor and annotate PreservationIngestService.
Browse files Browse the repository at this point in the history
refs #4594
  • Loading branch information
justinlittman committed Sep 22, 2023
1 parent 101237e commit 2d3013e
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 41 deletions.
93 changes: 65 additions & 28 deletions app/services/preservation_ingest_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,50 +11,62 @@ class PreservationIngestService
# @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
# @raise [Preservation::Client::Error] if bad response from preservation catalog.
def self.transfer(cocina_object)
druid = cocina_object.externalIdentifier
workspace = DruidTools::Druid.new(druid, Settings.sdr.local_workspace_root)
signature_catalog = signature_catalog_from_preservation(druid)
new_version_id = signature_catalog.version_id + 1
new(cocina_object).transfer
end

def initialize(cocina_object)
@cocina_object = cocina_object
end

# @param [Cocina::Models::DRO, Cocina::Models::Collection] cocina_object The representation of the digital object
# @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
# @raise [Preservation::Client::Error] if bad response from preservation catalog.
def transfer
# Writes versionMetadata.xml, contentMetadata.xml, and cocina.json
metadata_dir = PreservationMetadataExtractor.extract(workspace:, cocina_object:)

# Makes sure that the versionMetadata.xml version matches the expected version from preservation.
verify_version_metadata(metadata_dir, new_version_id)
version_inventory = Preserve::FileInventoryBuilder.build(metadata_dir:,
druid:,
version_id: new_version_id)
# Creates a Moab::FileInventory from the contentMetadata.xml
version_inventory = Preserve::FileInventoryBuilder.build(metadata_dir:, druid:, version_id: new_version_id)

# Creates a Moab::FileInventory containing only the files that are changed in this version.
version_additions = signature_catalog.version_additions(version_inventory)

# Find the changed files in the workspace
content_additions = version_additions.group('content')
if content_additions.nil? || content_additions.files.empty?
content_dir = nil
else
content_dir = nil
if content_additions.present? && content_additions.files.any?
new_file_list = content_additions.path_list
content_dir = workspace.find_filelist_parent('content', new_file_list)
end

content_group = version_inventory.group('content')

# Regenerate the fixitites for content (md5, sha1, sha256) if they are missing.
signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
# export the bag (in tar format)
bag_dir = Pathname(Settings.sdr.local_export_home).join(druid.sub('druid:', ''))
bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
bagger.reset_bag
bagger.create_bag_inventory(:depositor)
bagger.deposit_group('content', content_dir)
bagger.deposit_group('metadata', metadata_dir)
bagger.create_tagfiles
Preserve::BagVerifier.verify(directory: bag_dir)

export(version_inventory, content_dir, metadata_dir)
end
# NOTE: the following methods should probably all be private

# @param [String] druid The object identifier
private

attr_reader :cocina_object

# @return [Moab::SignatureCatalog] the manifest of all files previously ingested,
# or if there is none, a SignatureCatalog object for version 0.
# @raise [Preservation::Client::Error] if bad response from preservation catalog.
def self.signature_catalog_from_preservation(druid)
Preservation::Client.objects.signature_catalog(druid)
rescue Preservation::Client::NotFoundError
Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0)
def signature_catalog
@signature_catalog ||= begin
Preservation::Client.objects.signature_catalog(druid)
rescue Preservation::Client::NotFoundError
Moab::SignatureCatalog.new(digital_object_id: druid, version_id: 0)
end
end

# @param [Pathname] metadata_dir the location of the metadata directory in the workspace
# @param [Integer] expected the version identifer expected to be used in the versionMetadata
def self.verify_version_metadata(metadata_dir, expected)
def verify_version_metadata(metadata_dir, expected)
vmfile = metadata_dir.join('versionMetadata.xml')
verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
true
Expand All @@ -63,20 +75,45 @@ def self.verify_version_metadata(metadata_dir, expected)
# @param [Pathname] pathname The location of the file containing a version number
# @param [Integer] expected The version number that should be in the file
# @param [Integer] found The version number that is actually in the file
def self.verify_version_id(pathname, expected, found)
def verify_version_id(pathname, expected, found)
raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found

true
end

# @param [Pathname] pathname the location of the versionMetadata file
# @return [Integer] the versionId found in the last version element, or nil if missing
def self.vmfile_version_id(pathname)
def vmfile_version_id(pathname)
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?

doc = Nokogiri::XML(File.read(pathname.to_s))
nodeset = doc.xpath('/versionMetadata/version')
version_id = nodeset.last['versionId']
version_id&.to_i
end

# Export the bag (using symlinks for content files)
def export(version_inventory, content_dir, metadata_dir)
bag_dir = Pathname(Settings.sdr.local_export_home).join(druid.sub('druid:', ''))
bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
bagger.reset_bag
bagger.create_bag_inventory(:depositor)
bagger.deposit_group('content', content_dir)
bagger.deposit_group('metadata', metadata_dir)
bagger.create_tagfiles
# Checks for required files. (Does not check fixities.)
Preserve::BagVerifier.verify(directory: bag_dir)
end

def druid
cocina_object.externalIdentifier
end

def workspace
@workspace ||= DruidTools::Druid.new(druid, Settings.sdr.local_workspace_root)
end

def new_version_id
@new_version_id ||= signature_catalog.version_id + 1
end
end
32 changes: 19 additions & 13 deletions spec/services/preservation_ingest_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
File.read(fixtures.join('sdr_repo/dd116zh0343/v0001/manifests/signatureCatalog.xml'))
)
end
let(:cocina_object) { instance_double(Cocina::Models::DRO, externalIdentifier: druid) }
let(:druid) { 'druid:dd116zh0343' }

before do
allow(Settings.sdr).to receive_messages(local_workspace_root: fixtures.join('workspace').to_s,
Expand All @@ -36,8 +38,6 @@
end

describe '.transfer' do
let(:druid) { 'druid:dd116zh0343' }
let(:cocina_object) { instance_double(Cocina::Models::DRO, externalIdentifier: druid) }
let(:metadata_dir) { fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata') }

before do
Expand Down Expand Up @@ -109,16 +109,16 @@
end
end

describe '.signature_catalog_from_preservation' do
let(:druid) { 'druid:dd116zh0343' }
describe '.signature_catalog' do
let(:service) { described_class.new(cocina_object) }

context 'when signature_catalog exists in preservation' do
before do
allow(Preservation::Client.objects).to receive(:signature_catalog).and_return(fixture_sig_cat_obj)
end

it 'retrieves it as a Moab::SignatureCatalog object' do
sig_cat = described_class.signature_catalog_from_preservation(druid)
sig_cat = service.send(:signature_catalog)
expect(sig_cat).to be_an_instance_of(Moab::SignatureCatalog)
expect(sig_cat.digital_object_id).to eq druid
expect(sig_cat.version_id).to eq 1
Expand All @@ -132,7 +132,7 @@
end

it 'returns a Moab::SignatureCatalog object for version 0' do
sig_cat = described_class.signature_catalog_from_preservation(druid)
sig_cat = service.send(:signature_catalog)
expect(sig_cat).to be_an_instance_of(Moab::SignatureCatalog)
expect(sig_cat.digital_object_id).to eq druid
expect(sig_cat.version_id).to eq 0
Expand All @@ -141,14 +141,20 @@
end
end

specify '.verify_version_id' do
expect(described_class.verify_version_id('/mypath/myfile', 2, 2)).to be_truthy
expect { described_class.verify_version_id('/mypath/myfile', 1, 2) }.to raise_exception('Version mismatch in /mypath/myfile, expected 1, found 2')
describe '.verify_version_id' do
let(:service) { described_class.new(cocina_object) }

it 'verifies the version' do
expect(service.send(:verify_version_id, '/mypath/myfile', 2, 2)).to be_truthy
expect { service.send(:verify_version_id, '/mypath/myfile', 1, 2) }.to raise_exception('Version mismatch in /mypath/myfile, expected 1, found 2')
end
end

specify '.vmfile_version_id' do
metadata_dir = fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata')
vmfile = metadata_dir.join('versionMetadata.xml')
expect(described_class.vmfile_version_id(vmfile)).to eq 2
describe '.vmfile_version_id' do
it 'returns the version' do
metadata_dir = fixtures.join('workspace/dd/116/zh/0343/dd116zh0343/metadata')
vmfile = metadata_dir.join('versionMetadata.xml')
expect(described_class.new(cocina_object).send(:vmfile_version_id, vmfile)).to eq 2
end
end
end

0 comments on commit 2d3013e

Please sign in to comment.