Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not index SCSB items if they are private #2393

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions marc_to_solr/lib/princeton_marc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -700,15 +700,16 @@ def process_holdings(record)
all_holdings = {}
holdings_helpers = ProcessHoldingsHelpers.new(record:)
holdings_helpers.fields_852_alma_or_scsb.each do |field_852|
next if holdings_helpers.includes_only_private_scsb_items?(field_852)
holding_id = holdings_helpers.holding_id(field_852)
# Calculate the permanent holding
holding = holdings_helpers.build_holding(field_852, permanent: true)
items_by_holding_id = holdings_helpers.items_by_holding_id(holding_id)
items_by_holding = holdings_helpers.items_by_852(field_852)
group_866_867_868_fields = holdings_helpers.group_866_867_868_on_holding_perm_id(holding_id, field_852)
# if there are items (876 fields)
if items_by_holding_id.present?
add_permanent_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings, holding)
add_temporary_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings)
if items_by_holding.present?
add_permanent_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings, holding)
add_temporary_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings)
else
# if there are no items (876 fields), create the holding by using the 852 field
all_holdings[holding_id] = remove_empty_call_number_fields(holding) unless holding_id.nil? || invalid_location?(holding['location_code'])
Expand All @@ -718,19 +719,21 @@ def process_holdings(record)
all_holdings
end

def add_permanent_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings, holding)
locations = holdings_helpers.select_permanent_location_876(items_by_holding_id, field_852)
def add_permanent_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings, holding)
locations = holdings_helpers.select_permanent_location_876(items_by_holding, field_852)

locations.each do |field_876|
holding_key = holdings_helpers.holding_id(field_852)
add_item_to_holding(field_852, field_876, holding_key, holdings_helpers, all_holdings, holding)
end
end

def add_temporary_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings)
locations = holdings_helpers.select_temporary_location_876(items_by_holding_id, field_852)
def add_temporary_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings)
locations = holdings_helpers.select_temporary_location_876(items_by_holding, field_852)

locations.each do |field_876|
next if holdings_helpers.includes_only_private_scsb_items?(field_852)

if holdings_helpers.current_location_code(field_876) == 'RES_SHARE$IN_RS_REQ'
holding = holdings_helpers.build_holding(field_852, permanent: true)
holding_key = holdings_helpers.holding_id(field_852)
Expand Down
16 changes: 14 additions & 2 deletions marc_to_solr/lib/process_holdings_helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,14 @@ def group_866_867_868_on_holding_perm_id(holding_perm_id, field_852)
end
end

def items_by_holding_id(holding_id)
record.fields("876").select { |f| f["0"] == holding_id }
def items_by_852(field_852)
holding_id = holding_id(field_852)
items = record.fields("876").select { |f| f["0"] == holding_id }
items.map { |item| item unless private_scsb_item?(item, field_852) }.compact
end

def private_scsb_item?(field_876, field_852)
field_876['x'] == 'Private' && scsb?(field_852)
end

# Select 852 fields from an Alma or SCSB record
Expand Down Expand Up @@ -84,6 +90,12 @@ def build_call_number(field_852)
call_number.present? ? call_number.join(' ').strip : []
end

def includes_only_private_scsb_items?(field_852)
return false unless scsb?(field_852)

items_by_852(field_852).empty?
end

# Builds the holding, without any item-specific information
# @returns [Hash]
def build_holding(field_852, field_876 = nil, permanent:)
Expand Down
10 changes: 10 additions & 0 deletions marc_to_solr/lib/traject_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1242,6 +1242,16 @@
accumulator[0] = all_holdings.to_json.to_s unless all_holdings.empty?
end

# Skip SCSB records that include only private items
each_record do |record, context|
recap_notes = process_recap_notes(record)
next if recap_notes.empty?
next if recap_notes.map { |note| note.include?("P") }.include?(false)

id = id_extractor.extract(record).first
context.skip!("Skipped #{id} because record includes only private items.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool!

end

## for recap notes
to_field 'recap_notes_display' do |record, accumulator|
recap_notes = process_recap_notes(record)
Expand Down
151 changes: 151 additions & 0 deletions spec/fixtures/marc_to_solr/scsb_harvard_multiple.mrx
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
<?xml version="1.0" encoding="UTF-8"?><marcxml:collection xmlns:marcxml="http://www.loc.gov/MARC21/slim">
<marcxml:record>
<marcxml:leader>01793cam a2200289Ma 4500</marcxml:leader>
<marcxml:controlfield tag="001">SCSB-9879609</marcxml:controlfield>
<marcxml:controlfield tag="005">20210108133627.0</marcxml:controlfield>
<marcxml:controlfield tag="008">080503s2007 ua 000 0 ara d</marcxml:controlfield>
<marcxml:controlfield tag="009">990115251640203941</marcxml:controlfield>
<marcxml:datafield tag="035" ind1=" " ind2=" ">
<marcxml:subfield code="a">(OCoLC)227281073</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="040" ind1=" " ind2=" ">
<marcxml:subfield code="a">LEILA</marcxml:subfield>
<marcxml:subfield code="b">eng</marcxml:subfield>
<marcxml:subfield code="c">LEILA</marcxml:subfield>
<marcxml:subfield code="d">HVL</marcxml:subfield>
<marcxml:subfield code="d">OCLCF</marcxml:subfield>
<marcxml:subfield code="d">OCLCQ</marcxml:subfield>
<marcxml:subfield code="d">OCLCO</marcxml:subfield>
<marcxml:subfield code="d">OCLCA</marcxml:subfield>
<marcxml:subfield code="d">HUL</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="043" ind1=" " ind2=" ">
<marcxml:subfield code="a">f-ua---</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="050" ind1=" " ind2="4">
<marcxml:subfield code="a">KRM2754</marcxml:subfield>
<marcxml:subfield code="b">.B36 2007x</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="100" ind1="1" ind2=" ">
<marcxml:subfield code="6">880-01</marcxml:subfield>
<marcxml:subfield code="a">Bannā, Maḥmūd ʻāṭif.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="880" ind1="1" ind2=" ">
<marcxml:subfield code="6">100-01//r</marcxml:subfield>
<marcxml:subfield code="a">بنا، محمود عاطف.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="245" ind1="1" ind2="3">
<marcxml:subfield code="6">880-02</marcxml:subfield>
<marcxml:subfield code="a">al-ʻUqūd al-idārīyah /</marcxml:subfield>
<marcxml:subfield code="c">Maḥmūd ʻāṭif al-Bannā.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="880" ind1="1" ind2="2">
<marcxml:subfield code="6">245-02//r</marcxml:subfield>
<marcxml:subfield code="a">العقود الادارية /</marcxml:subfield>
<marcxml:subfield code="c">محمود عاطف البنا.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="250" ind1=" " ind2=" ">
<marcxml:subfield code="6">880-03</marcxml:subfield>
<marcxml:subfield code="a">al-Ṭabʻah 1.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="880" ind1=" " ind2=" ">
<marcxml:subfield code="6">250-03//r</marcxml:subfield>
<marcxml:subfield code="a">الطبعة 1.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="260" ind1=" " ind2=" ">
<marcxml:subfield code="6">880-04</marcxml:subfield>
<marcxml:subfield code="a">Madīnat Naṣr, al-Qāhirah :</marcxml:subfield>
<marcxml:subfield code="b">Dār al-Fikr al-ʻArabī,</marcxml:subfield>
<marcxml:subfield code="c">2007.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="880" ind1=" " ind2=" ">
<marcxml:subfield code="6">260-04//r</marcxml:subfield>
<marcxml:subfield code="a">مدينة نصر، القاهرة :</marcxml:subfield>
<marcxml:subfield code="b">دار الفكر العربي،</marcxml:subfield>
<marcxml:subfield code="c">2007.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="300" ind1=" " ind2=" ">
<marcxml:subfield code="a">375 pages ;</marcxml:subfield>
<marcxml:subfield code="c">24 cm</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="336" ind1=" " ind2=" ">
<marcxml:subfield code="a">text</marcxml:subfield>
<marcxml:subfield code="b">txt</marcxml:subfield>
<marcxml:subfield code="2">rdacontent</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="337" ind1=" " ind2=" ">
<marcxml:subfield code="a">unmediated</marcxml:subfield>
<marcxml:subfield code="b">n</marcxml:subfield>
<marcxml:subfield code="2">rdamedia</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="338" ind1=" " ind2=" ">
<marcxml:subfield code="a">volume</marcxml:subfield>
<marcxml:subfield code="b">nc</marcxml:subfield>
<marcxml:subfield code="2">rdacarrier</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="650" ind1=" " ind2="0">
<marcxml:subfield code="a">Public contracts</marcxml:subfield>
<marcxml:subfield code="z">Egypt.</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="650" ind1=" " ind2="7">
<marcxml:subfield code="a">Public contracts</marcxml:subfield>
<marcxml:subfield code="2">fast</marcxml:subfield>
<marcxml:subfield code="0">(OCoLC)fst01082170</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="651" ind1=" " ind2="7">
<marcxml:subfield code="a">Egypt</marcxml:subfield>
<marcxml:subfield code="2">fast</marcxml:subfield>
<marcxml:subfield code="0">(OCoLC)fst01208755</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="583" ind1="1" ind2=" ">
<marcxml:subfield code="a">committed to retain</marcxml:subfield>
<marcxml:subfield code="c">20181001</marcxml:subfield>
<marcxml:subfield code="d">in perpetuity</marcxml:subfield>
<marcxml:subfield code="f">ReCAP Shared Collection</marcxml:subfield>
<marcxml:subfield code="5">HUL</marcxml:subfield>
<marcxml:subfield code="8">222123425660003941</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="852" ind1="0" ind2=" ">
<marcxml:subfield code="c">HD</marcxml:subfield>
<marcxml:subfield code="h">KRM2754</marcxml:subfield>
<marcxml:subfield code="i">.B36 2007x</marcxml:subfield>
<marcxml:subfield code="8">222123425660003941</marcxml:subfield>
<marcxml:subfield code="0">10615482</marcxml:subfield>
<marcxml:subfield code="b">scsbhl</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="876" ind1=" " ind2=" ">
<marcxml:subfield code="0">10615482</marcxml:subfield>
<marcxml:subfield code="3"/>
<marcxml:subfield code="a">16302174</marcxml:subfield>
<marcxml:subfield code="h"/>
<marcxml:subfield code="j">Available</marcxml:subfield>
<marcxml:subfield code="k">LAW</marcxml:subfield>
<marcxml:subfield code="p">32044123007148</marcxml:subfield>
<marcxml:subfield code="t"/>
<marcxml:subfield code="x">Private</marcxml:subfield>
<marcxml:subfield code="z">HL</marcxml:subfield>
<marcxml:subfield code="l">HD</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="852" ind1="0" ind2=" ">
<marcxml:subfield code="c">HD</marcxml:subfield>
<marcxml:subfield code="h">KRM2754</marcxml:subfield>
<marcxml:subfield code="i">.B36 2007x</marcxml:subfield>
<marcxml:subfield code="8">222123425660003941</marcxml:subfield>
<marcxml:subfield code="0">10615483</marcxml:subfield>
<marcxml:subfield code="b">scsbhl</marcxml:subfield>
</marcxml:datafield>
<marcxml:datafield tag="876" ind1=" " ind2=" ">
<marcxml:subfield code="0">10615483</marcxml:subfield>
<marcxml:subfield code="3"/>
<marcxml:subfield code="a">16302175</marcxml:subfield>
<marcxml:subfield code="h"/>
<marcxml:subfield code="j">Available</marcxml:subfield>
<marcxml:subfield code="k">LAW</marcxml:subfield>
<marcxml:subfield code="p">32044123007149</marcxml:subfield>
<marcxml:subfield code="t"/>
<marcxml:subfield code="x">Open</marcxml:subfield>
<marcxml:subfield code="z">HL</marcxml:subfield>
<marcxml:subfield code="l">HD</marcxml:subfield>
</marcxml:datafield>
</marcxml:record>
</marcxml:collection>
16 changes: 13 additions & 3 deletions spec/marc_to_solr/lib/config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def fixture_record(fixture_name, indexer: @indexer)
@scsb_nypl = @indexer.map_record(fixture_record('SCSB-8157262'))
@scsb_alt_title = @indexer.map_record(fixture_record('scsb_cul_alt_title'))
@scsb_private = @indexer.map_record(fixture_record('scsb_harvard_private'))
@scsb_multiple = @indexer.map_record(fixture_record('scsb_harvard_multiple'))
@scsb_committed = @indexer.map_record(fixture_record('scsb_harvard_committed'))
@scsb_uncommittable = @indexer.map_record(fixture_record('scsb_harvard_uncommittable'))
@recap_record = @indexer.map_record(fixture_record('994081873506421'))
Expand Down Expand Up @@ -1259,13 +1260,22 @@ def fixture_record(fixture_name, indexer: @indexer)
end
end

describe 'private recap items' do
it "skips indexing record if only item is private" do
expect(@scsb_private).to be nil
end
it "skips indexing private items" do
expect(@scsb_multiple).to be
holdings = JSON.parse(@scsb_multiple["holdings_1display"].first)
public_holding_id = "10615483"
expect(holdings.keys).to match_array([public_holding_id])
end
end

describe 'recap_notes_display' do
it "skips indexing for Princeton Recap records" do
expect(@recap_record["recap_notes_display"]).to be nil
end
it "Indexes H - P, if a private SCSB record" do
expect(@scsb_private["recap_notes_display"]).to eq ["H - P"]
end
it "Indexes C - S, if a shared SCSB record" do
expect(@scsb_alt_title["recap_notes_display"]).to eq ["C - S"]
end
Expand Down
10 changes: 10 additions & 0 deletions spec/marc_to_solr/lib/princeton_marc_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,10 @@ def fixture_record(fixture_name)
@holdings_scsb_hl = JSON.parse(@record_scsb_hl["holdings_1display"][0])
@holding_id_scsb_hl = "10615189"
@holdings_scsb_hl_block = @holdings_scsb_hl[@holding_id_scsb_hl]

@record_scsb_mixed = @indexer.map_record(fixture_record('scsb_harvard_multiple'))
@holdings_scsb_mixed = JSON.parse(@record_scsb_mixed["holdings_1display"][0])
@holding_id_scsb_mixed_public = "10615483"
end

it 'indexes location if it exists' do
Expand Down Expand Up @@ -848,6 +852,12 @@ def fixture_record(fixture_name)
it "indexes 876$l for scsb" do
expect(@holdings_scsb_hl_block['items'][0]['storage_location']).to eq("HD")
end

context 'with a record with both public and private holdings' do
it 'indexes only public items for SCSB' do
expect(@holdings_scsb_mixed.keys).to match_array([@holding_id_scsb_mixed_public])
end
end
end
end
end
Loading