Skip to content

Commit

Permalink
add method to return s3 key for filename
Browse files Browse the repository at this point in the history
  • Loading branch information
peetucket committed Sep 30, 2024
1 parent 7cb472c commit 788f3e3
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 10 deletions.
5 changes: 5 additions & 0 deletions lib/dor/text_extraction/speech_to_text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def filenames_to_stt
stt_files.map(&:filename)
end

# return the s3 location for a given filename
def s3_location(filename)
File.join(job_id, filename)
end

# return the job_id for the stt job, defined as the druid-version of the object
def job_id
"#{bare_druid}-v#{cocina_object.version}"
Expand Down
14 changes: 4 additions & 10 deletions lib/robots/dor_repo/speech_to_text/fetch_files.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,15 @@ def initialize

# available from LyberCore::Robot: druid, bare_druid, workflow_service, object_client, cocina_object, logger
def perform_work
sttable_filenames.each do |filename|
raise "Unable to fetch #{filename} for #{druid}" unless file_fetcher.write_file_with_retries(filename:, location: aws_provider.bucket.object(File.join(job_id, filename)), max_tries: 3)
speech_to_text.filenames_to_stt.each do |filename|
raise "Unable to fetch #{filename} for #{druid}" unless file_fetcher.write_file_with_retries(filename:, location: aws_provider.bucket.object(speech_to_text.s3_location(filename)), max_tries: 3)
end
end

private

def sttable_filenames
Dor::TextExtraction::SpeechToText.new(cocina_object:).filenames_to_stt
end

# this will be the base of the S3 key for the files sent (to namespace them in the bucket)
# it is the same as the job_id when we send the SQS message
def job_id
@job_id ||= Dor::TextExtraction::SpeechToText.new(cocina_object:).job_id
def speech_to_text
@speech_to_text ||= Dor::TextExtraction::SpeechToText.new(cocina_object:)
end

def file_fetcher
Expand Down
9 changes: 9 additions & 0 deletions spec/lib/dor/text_extraction/speech_to_text_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ def build_file(sdr_preserve, shelve, filename)
end
end

describe '#s3_location' do
let(:cocina_object) { instance_double(Cocina::Models::DRO, version:, externalIdentifier: druid, dro?: true, type: object_type) }
let(:version) { 3 }

it 'returns the s3 filename key for a given filename' do
expect(stt.s3_location('text.xml')).to eq("#{bare_druid}-v#{version}/text.xml")
end
end

describe '#job_id' do
let(:cocina_object) { instance_double(Cocina::Models::DRO, version:, externalIdentifier: druid, dro?: true, type: object_type) }
let(:version) { 3 }
Expand Down
2 changes: 2 additions & 0 deletions spec/robots/dor_repo/speech_to_text/fetch_files_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
allow(Aws::S3::Client).to receive(:new).and_return(aws_client)
allow(Aws::S3::Object).to receive(:new).with(bucket_name: Settings.aws.speech_to_text.base_s3_bucket, key: "#{job_id}/file1.mov", client: aws_client).and_return(mov_location)
allow(Aws::S3::Object).to receive(:new).with(bucket_name: Settings.aws.speech_to_text.base_s3_bucket, key: "#{job_id}/file2.mp3", client: aws_client).and_return(mp3_location)
allow(stt).to receive(:s3_location).with('file1.mov').and_return("#{job_id}/file1.mov")
allow(stt).to receive(:s3_location).with('file2.mp3').and_return("#{job_id}/file2.mp3")
end

context 'when fetching files is successful' do
Expand Down

0 comments on commit 788f3e3

Please sign in to comment.