Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPIKE] Decouple DSA from Solr #4578

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions app/models/dro.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ class Dro < RepositoryRecord
validates :content_type, :access, :administrative, :description,
:identification, :structural, presence: true

scope :has_admin_policy, ->(admin_policy_druid) { where("administrative ->> 'hasAdminPolicy' = '#{admin_policy_druid}'").select(:external_identifier).order(:external_identifier) }
scope :in_virtual_objects, ->(member_druid) { where("structural #> '{hasMemberOrders,0}' -> 'members' ? :druid", druid: member_druid) }
scope :members_of_collection, ->(collection_druid) { where("structural -> 'isMemberOf' ? :druid", druid: collection_druid).select(:external_identifier, :version, :content_type) }
scope :embargoed_and_releaseable, -> { where("(access -> 'embargo' ->> 'releaseDate')::timestamp <= ?", Time.zone.now).select(:external_identifier) }
Comment on lines +8 to +11
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If nothing else, I learned a bit more about how to do fancy JSONB queries with our crazy nested JSON.


def self.find_by_source_id(source_id)
find_by("identification->>'sourceId' = ?", source_id)
end
Expand Down
14 changes: 2 additions & 12 deletions app/reports/apo_catkey.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

require 'csv'

# Find items that are goverened by the provided APO and then return all catkeys and refresh status.
# Find items that are governed by the provided APO and then return all catkeys and refresh status.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo.

# https://github.com/sul-dlss/dor-services-app/issues/4373
# Invoke via:
# bin/rails r -e production "ApoCatkey.report('druid:bx911tp9024')"
Expand All @@ -12,17 +12,7 @@ def self.report(apo_druid)

CSV.open(output_file, 'w') do |csv|
csv << %w[druid catkey refresh]
query = "is_governed_by_ssim:\"info:fedora/#{apo_druid}\"&objectType_ssim:\"item\""
druids = []
# borrowed from bin/generate-druid-list
loop do
results = SolrService.query('*:*', fl: 'id', rows: 10000, fq: query, start: druids.length, sort: 'id asc')
break if results.empty?

results.each { |r| druids << r['id'] }
sleep(0.5)
end

druids = Dro.has_admin_policy(apo_druid).map(&:external_identifier)
num_dros = druids.size
puts "Found #{num_dros} objects that are governed by APO #{apo_druid}"
druids.each_with_index do |druid, i|
Expand Down
1 change: 0 additions & 1 deletion app/services/delete_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
class DeleteService
# Tries to remove any existence of the object in our systems
# Does the following:
# - Removes item from Fedora/Solr
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cruft.

# - Removes content from dor workspace
# - Removes content from assembly workspace
# - Removes content from sdr export area
Expand Down
16 changes: 6 additions & 10 deletions app/services/embargo_release_service.rb
Original file line number Diff line number Diff line change
@@ -1,27 +1,23 @@
# frozen_string_literal: true

# Finds objects where the embargo release date has passed for embargoed items
# Builds list of candidate objects by doing a Solr query
# Builds list of candidate objects by querying the database
#
# Should run once a day from cron
class EmbargoReleaseService
RELEASEABLE_NOW_QUERY = 'embargo_status_ssim:"embargoed" AND embargo_release_dtsim:[* TO NOW]'

def self.release_all
# Find objects to process
Rails.logger.info("***** Querying solr: #{RELEASEABLE_NOW_QUERY}")
solr = SolrService.get(RELEASEABLE_NOW_QUERY, 'rows' => '5000', 'fl' => 'id')
embargoed_items_to_release = Dro.embargoed_and_releaseable

num_found = solr['response']['numFound'].to_i
if num_found.zero?
if embargoed_items_to_release.none?
Rails.logger.info('No objects to process')
return
end
Rails.logger.info("Found #{num_found} objects")
Rails.logger.info("Found #{embargoed_items_to_release.count} objects")

count = 0
solr['response']['docs'].each do |doc|
release(doc['id'])
embargoed_items_to_release.each do |item|
release(item.external_identifier)
count += 1
end

Expand Down
36 changes: 27 additions & 9 deletions app/services/member_service.rb
Original file line number Diff line number Diff line change
@@ -1,19 +1,37 @@
# frozen_string_literal: true

# Finds the members of a collection by using Solr
# Finds the members of a collection
class MemberService
# @param [String] druid the identifier of the collection
# @param [Boolean] only_published when true, restrict to only published items
# @param [Boolean] exclude_opened when true, exclude opened items
# @return [Array<Hash<String,String>>] the members of this collection
def self.for(druid, only_published: false, exclude_opened: false)
query = "is_member_of_collection_ssim:\"info:fedora/#{druid}\""
query += ' published_dttsim:[* TO *]' if only_published
query += ' -processing_status_text_ssi:Opened' if exclude_opened
args = {
fl: 'id,objectType_ssim',
rows: 100_000_000
}
SolrService.query query, args
Dro
.members_of_collection(druid)
.then { |members| reject_opened_members(members, exclude_opened) }
.then { |members| select_published_members(members, only_published) }
.map do |member|
{
'id' => member.external_identifier,
'objectType' => member.content_type == Cocina::Models::ObjectType.agreement ? 'agreement' : 'item'
}
end
end

def self.reject_opened_members(members, exclude_opened)
return members unless exclude_opened

members.reject do |member|
WorkflowClientFactory.build.status(druid: member.external_identifier, version: member.version).display_simplified == 'Opened'
end
end

def self.select_published_members(members, only_published)
return members unless only_published

members.select do |member|
WorkflowClientFactory.build.lifecycle(druid: member.external_identifier, milestone_name: 'published', version: member.version).present?
end
Comment on lines +12 to +35
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the sluggish part.

end
end
6 changes: 3 additions & 3 deletions app/services/publish/public_desc_metadata_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def add_doi
# expand constituent relations into relatedItem references -- see JUMBO-18
# @return [Void]
def add_constituent_relations!
VirtualObject.for(druid: cocina_object.externalIdentifier).each do |solr_doc|
VirtualObject.for(druid: cocina_object.externalIdentifier).each do |virtual_object_hash|
# create the MODS relation
relatedItem = doc.create_element('relatedItem', xmlns: MODS_NS)
relatedItem['type'] = 'host'
Expand All @@ -66,14 +66,14 @@ def add_constituent_relations!
# load the title from the virtual object's DC.title
titleInfo = doc.create_element('titleInfo', xmlns: MODS_NS)
title = doc.create_element('title', xmlns: MODS_NS)
title.content = solr_doc.fetch(:title)
title.content = virtual_object_hash.fetch(:title)
titleInfo << title
relatedItem << titleInfo

# point to the PURL for the virtual object
location = doc.create_element('location', xmlns: MODS_NS)
url = doc.create_element('url', xmlns: MODS_NS)
url.content = purl_url(solr_doc.fetch(:id))
url.content = purl_url(virtual_object_hash.fetch(:id))
location << url
relatedItem << location

Expand Down
4 changes: 2 additions & 2 deletions app/services/published_relationships_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def collections
def virtual_objects
return unless cocina_object.dro?

VirtualObject.for(druid: cocina_object.externalIdentifier).map do |solr_doc|
"<fedora:isConstituentOf rdf:resource=\"info:fedora/#{solr_doc.fetch(:id)}\"/>"
VirtualObject.for(druid: cocina_object.externalIdentifier).map do |virtual_object_hash|
"<fedora:isConstituentOf rdf:resource=\"info:fedora/#{virtual_object_hash.fetch(:id)}\"/>"
end.join(INDENT)
end
end
38 changes: 0 additions & 38 deletions app/services/solr_service.rb

This file was deleted.

9 changes: 5 additions & 4 deletions app/services/virtual_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ class VirtualObject
# @param [String] druid
# @return [Array<Hash>] a list of results with ids and titles
def self.for(druid:)
query = "has_constituents_ssim:#{druid.sub(':', '\:')}"
response = SolrService.get(query, { fl: 'id sw_display_title_tesim' })
response.fetch('response').fetch('docs').map do |row|
{ id: row.fetch('id'), title: row.fetch('sw_display_title_tesim').first }
Dro.in_virtual_objects(druid).map do |dro|
{
id: dro.external_identifier,
title: Cocina::Models::Builders::TitleBuilder.build(dro.to_cocina.description.title)
}
end
end
end
4 changes: 3 additions & 1 deletion bin/clean-druid-list
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ count = 0
File.open(options[:output], 'w') do |file|
druids.each_with_index do |druid, index|
puts "Finding #{druid} (#{index + 1})"
next if SolrService.query('*:*', fl: 'id', rows: 1, fq: "id:\"#{druid}\"").empty?
next unless AdminPolicy.exists?(external_identifier: druid) ||
Collection.exists?(external_identifier: druid) ||
Dro.exists?(external_identifier: druid)

file.write("#{druid}\n")
count += 1
Expand Down
2 changes: 2 additions & 0 deletions bin/generate-druid-list
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
require_relative '../config/environment'
require 'optparse'

# TODO: Figure out if we still want this or not, given how tightly coupled this functionality is to Solr
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment.


options = { output: 'druids.txt', quiet: false }
parser = OptionParser.new do |option_parser|
option_parser.banner = 'Usage: bin/generate-druid-list \'<QUERY, e.g., project_tag_ssim:"Naxos : born digital audio">\' [options]'
Expand Down
2 changes: 2 additions & 0 deletions lib/tasks/missing_druids.rake
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

# TODO: Figure out if we still want this or not, given how tightly coupled this functionality is to Solr
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment.


namespace :missing_druids do
desc 'Find unindexed druids'
task unindexed_objects: :environment do
Expand Down