Skip to content

Commit

Permalink
Merge pull request #28 from hathitrust/DEV-838-full-marc
Browse files Browse the repository at this point in the history
DEV-838: Full MARC record output
  • Loading branch information
aelkiss authored May 24, 2024
2 parents ded649c + e6b3651 commit 0fdf9f0
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 4 deletions.
18 changes: 18 additions & 0 deletions config/extra_description.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<description>
<eprints xmlns="http://www.openarchives.org/OAI/1.1/eprints" xsi:schemaLocation="http://www.openarchives.org/OAI/1.1/eprints http://www.openarchives.org/OAI/1.1/eprints.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<content>
<URL>https://www.hathitrust.org/member-libraries/resources-for-librarians/data-resources/oai-feed/</URL>
<text>The Open Archives Initiative Protocol for Metadata Harvesting (OAI-PMH) is a protocol used in libraries and archives for the automated delivery of structured bibliographic metadata. You can use this option to retrieve metadata in MARC21 or unqualified Dublin Core formats in XML structure. The OAI feed allows you to access new and updated records and (for the full set of records) discover if any have been deleted. For best practices related to OAI, and a list of potential harvesters, see https://www.ideals.illinois.edu/items/50369.</text>
</content>
<metadataPolicy>
<URL>https://www.hathitrust.org/the-collection/terms-conditions/metadata-sharing-and-use-policy/#bibliographic-metadata-sharing-policy</URL>
<text>Metadata is provided under the terms of the HathiTrust Bibliographic Metadata Sharing Policy. See details at the above URL.</text>
</metadataPolicy>
<dataPolicy>
<URL>https://www.hathitrust.org/the-collection/search-access/access-use-policy/</URL>
<text>HathiTrust is a collaborative library initiative. Users are encouraged to cite and link to digital content and are free to do so without asking for permission. Depending on the source of the digitized work, licenses or other contractual terms may restrict further distribution or other uses. For volume-specific information, please consult the &lt;dc:rights&gt; element (oai_dc) or 856$r (marc21). You need to make your own assessment of the copyright or other legal concerns related to uses beyond those provided by HathiTrust for particular works.

The possible Access and Use statements that apply to each book are listed at the URL above.</text>
</dataPolicy>
</eprints>
</description>
26 changes: 26 additions & 0 deletions lib/oai_solr/marc21_full.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
require "marc"
require "oai"

module OAISolr
class Marc21Full
ZEPHIR_FIELDS = %w[DAT CAT CID HOL FMT].to_set

def prefix
"marc21_full"
end

def schema
"http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
end

def namespace
"http://www.loc.gov/MARC21/slim"
end

def encode _, record
record.marc_record.tap do |r|
r.fields.reject! { |f| ZEPHIR_FIELDS.include?(f.tag) }
end.to_xml_string(fast_but_unsafe: true, include_namespace: true)
end
end
end
6 changes: 6 additions & 0 deletions lib/oai_solr/provider.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
require "oai"
require "oai_solr/settings"
require "oai_solr/model"
require "oai_solr/marc21"
# Not currently enabled
# require "oai_solr/marc21_full"
require "oai_solr/dublin_core"

module OAISolr
Expand All @@ -11,8 +14,11 @@ class Provider < OAI::Provider::Base
admin_email Settings.admin_email
source_model OAISolr::Model.new
register_format OAISolr::Marc21.new
# Not currently enabled
# register_format OAISolr::Marc21Full.new
register_format OAISolr::DublinCore.instance
sample_id Settings.sample_identifier
update_granularity OAI::Const::Granularity::LOW
extra_description File.read("config/extra_description.xml")
end
end
59 changes: 59 additions & 0 deletions spec/oai_solr_marc21_full_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
require "spec_helper"
require "oai_solr/record"
require "oai_solr/marc21_full"
require "json"
require "nokogiri"

RSpec.describe OAISolr::Marc21Full do
shared_examples_for "full marc record" do |file|
let(:sdoc) { JSON.parse(File.read("spec/data/#{file}")) }
let(:oai_record) { OAISolr::Record.new(sdoc) }
let(:full_marc_xml) { described_class.new.encode(nil, oai_record) }
let(:full_marc_record) { MARC::XMLReader.new(StringIO.new(full_marc_xml)).first }
let(:slim_schema) do
Nokogiri::XML::Schema(File.open(File.dirname(__FILE__) + "/schemas/MARC21slim.xsd"))
end

describe "#encode" do
it "provides valid marc for #{file}" do
parsed = Nokogiri::XML::Document.parse(full_marc_xml)
expect(slim_schema.valid?(parsed)).to be true
end

it "has 974s for #{file}" do
orig = oai_record.marc_record
expect(orig.fields("974").count).to be > 0
expect(orig.fields("974").count).to eq(full_marc_record.fields("974").count)
end

it "has an 008 for #{file}" do
expect(full_marc_record["008"]).not_to be(nil)
end

it "does not have special zephir fields" do
%w[CID DAT CAT FMT HOL].each do |zephir_field|
expect(full_marc_record[zephir_field]).to be nil
end
end

it "has a title field" do
expect(full_marc_record["245"].count).to be > 0
end

it "has a subject field" do
expect(full_marc_record["650"].count).to be > 0
end

# true for the two sample records below, not necessarily
# always these indicators!
it "has indicators for title field" do
f = full_marc_record["245"]
expect(f.indicator1).to eq("1")
expect(f.indicator2).to eq("0")
end
end
end

it_behaves_like "full marc record", "000004150.json"
it_behaves_like "full marc record", "000007599.json"
end
4 changes: 0 additions & 4 deletions spec/oai_solr_marc21_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@
slimmed = marc21.slim_marc(rec.marc_record)
parsed = Nokogiri::XML::Document.parse(slimmed.to_xml.to_s)
expect(slim_schema.valid?(parsed)).to be true
# valid? is missing from the MARC gem, but it only checks for
# ControlField/DataField discrepancies anyway
# expect(rec.marc_record.valid?).to be true
# expect(marc21.slim_marc(rec.marc_record).valid?).to be true
end

it "replaces the 974s with 856s for #{file}" do
Expand Down
29 changes: 29 additions & 0 deletions spec/oai_solr_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def doc
describe "Identify" do
before(:each) { get oai_endpoint, verb: "Identify" }
it_behaves_like "valid oai response"

it "references metadata policy" do
expect(last_response.body).to include("/metadata-sharing-and-use-policy")
end
end

describe "ListMetadataFormats" do
Expand All @@ -75,6 +79,14 @@ def doc
it "claims to support marc21" do
expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).to include("marc21")
end

xit "includes marc21_full (currently disabled)" do
expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).to include("marc21_full")
end

it "does not include marc21_full" do
expect(doc.xpath("//xmlns:metadataPrefix").map { |mp| mp.content }).not_to include("marc21_full")
end
end

describe "ListSets" do
Expand Down Expand Up @@ -286,6 +298,23 @@ def doc
end
end

describe "GetRecord full MARC" do
before(:each) { get oai_endpoint, verb: "GetRecord", metadataPrefix: "marc21_full", identifier: existing_record["id"] }
let(:response_record) { MARC::XMLReader.new(StringIO.new(last_response.body)).first }

xcontext "when enabled (currently disabled)" do
it_behaves_like "valid oai response"

it "can get a record as MARC" do
expect(response_record.leader).to match(/[\dA-Za-z ]{23}/)
end
end

it "returns an error" do
expect(doc.xpath("count(//xmlns:error[@code='cannotDisseminateFormat'])")).to eq(1)
end
end

describe "GetRecord with nonexistent identifier" do
it "returns oai idDoesNotExist error" do
get oai_endpoint, verb: "GetRecord", metadataPrefix: "oai_dc", identifier: "nonexistent"
Expand Down

0 comments on commit 0fdf9f0

Please sign in to comment.