From be23a4f1cd441281b8fccabdff220ffde4e996c0 Mon Sep 17 00:00:00 2001
From: Ray Schwartz <schwartzr2@wpunj.edu>
Date: Fri, 23 Aug 2024 08:45:58 -0400
Subject: [PATCH 1/3] Add files via upload

---
 marc3h.py | 414 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 414 insertions(+)
 create mode 100644 marc3h.py

diff --git a/marc3h.py b/marc3h.py
new file mode 100644
index 0000000..49ddbad
--- /dev/null
+++ b/marc3h.py
@@ -0,0 +1,414 @@
+import re
+import pymarc
+from pymarc import Subfield, Record, Field, MARCWriter
+from datetime import datetime
+from libgutenberg import GutenbergDatabase
+from libgutenberg.DublinCoreMapping import DublinCoreObject
+from os.path import join
+
+
+def stub(dc):
+    record = pymarc.Record()
+    now = datetime.now()
+
+    # c - Corrected or revised, a - Language material, m - Monograph/Item, 3 - Abbreviated level, u - Unknown
+    
+    record.leader[5] = 'c'
+    record.leader[6] = 'a'
+    record.leader[7] = 'm'
+    record.leader[17] = '3'
+    record.leader[18] = 'u'
+   
+    field001 = pymarc.Field(tag='001', data=str(dc.project_gutenberg_id))
+    record.add_ordered_field(field001)
+
+    field003 = pymarc.Field(tag='003', data='UtSlPG')
+    record.add_ordered_field(field003)
+    
+    # m - Computer file/Electronic resource - Coded data elements relating to either a computer file or an electronic resource in form.
+
+    field006 = pymarc.Field(tag='006', data='m')
+    record.add_ordered_field(field006)
+
+    # c - Electronic resource, r - Remote, n - Not applicable
+    
+    field007 = pymarc.Field(tag='007', data='cr n')
+    record.add_ordered_field(field007)
+
+    # 008 in looking at pub date some have a 906 others have a 4 digit year in 260.  Have to write an expression to capture that. If there is a date, use 's' in position 6 then 7-10 for the date. Otherwise '|' for 6 to 10 meaning 'no attempt to code'. Positions 15-17 - Place of publication, production, or execution 'xx#' - No place, unknown, or undetermined.  For position 23 could be o for online or s for electronic.  May have to not code for language. Because database is not coded for MARC lang codes only for ISO639-1--use MARCtag041 instead. Position 39 cataloging source d - Other.
+    
+    new_field_value = now.strftime('%y%m%d') + '|||||||||xx |||||o|||||||||||||| d'
+    match_found = False
+
+    for att in dc.book.attributes:
+     if (att.fk_attriblist == 906 and att.fk_attriblist is not None) or (att.fk_attriblist == 260 and re.search(r'\b\d{4}\b', str(att.fk_attriblist))):
+        new_field_value = now.strftime('%y%m%d') + 's' + str(att.text) + '||||||||xx |||||o|||||||||||||| d'
+        match_found = True
+        break
+
+    if not match_found:
+     new_field_value = now.strftime('%y%m%d') + '|||||||||xx |||||o|||||||||||||| d'
+
+    field008 = pymarc.Field(tag='008', data=new_field_value)
+    record.add_ordered_field(field008)
+
+      
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 10:
+    
+        field010 = pymarc.Field(
+            tag='010',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field010)
+
+
+    field040 = pymarc.Field(
+            tag='040',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value='UtSlPG'),
+                ]
+               )
+    record.add_ordered_field(field040)
+
+
+    if len(dc.languages):
+    
+        field041 = pymarc.Field(
+            tag='041',
+            indicators=[' ', '7'],
+            subfields=[
+                    Subfield(code='a', value=str(lang.id)) for lang in dc.languages
+                ] + [
+                    Subfield(code='2', value='iso639-1')
+                ]
+            )
+        record.add_ordered_field(field041)
+
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 240:
+    
+        field240 = pymarc.Field(
+            tag='240',
+            indicators=['1', str(att.nonfiling)],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field240)
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 246:
+    
+        field246 = pymarc.Field(
+            tag='246',
+            indicators=['1', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field246)
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 250:
+    
+        field250 = pymarc.Field(
+            tag='250',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field250)
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 300:
+    
+        field300 = pymarc.Field(
+            tag='300',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field300)
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 440:
+    
+        field490 = pymarc.Field(
+            tag='490',
+            indicators=['1', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field490)
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 440:
+    
+        field830 = pymarc.Field(
+            tag='830',
+            indicators=[' ', '0'],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field830)
+
+# need to replace carriage returns.  Tag 500 has multiple lines.
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 500:
+    
+        field500 = pymarc.Field(
+            tag='500',
+            indicators=[' ', " "],
+            subfields=[
+               Subfield(code='a', value=re.sub('\n', ' ', str(att.text))),
+               ]
+               )
+        record.add_ordered_field(field500)
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 505:
+    
+        field505 = pymarc.Field(
+            tag='505',
+            indicators=['0', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field505)
+
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 508:
+    
+        field508 = pymarc.Field(
+            tag='508',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field508)
+
+    for subject in dc.subjects:
+    
+        field653 = pymarc.Field(
+            tag='653',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(subject.subject)),
+               ]
+               )
+        record.add_ordered_field(field653)
+
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 904:
+    
+        field856 = pymarc.Field(
+            tag='856',
+            indicators=['4', '0'],
+            subfields=[
+               Subfield(code='a', value=f"https://www.gutenberg.org/ebooks/{str(dc.project_gutenberg_id)}"),
+               ]
+               )
+        record.add_ordered_field(field856)
+
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 904:
+    
+        field856 = pymarc.Field(
+            tag='856',
+            indicators=['4', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field856)
+
+
+    # Author name
+    num_auths = len(dc.authors)
+    if num_auths:
+        field100 = pymarc.Field(
+            tag='100',
+            indicators=['1', ' '],
+            subfields=[
+                Subfield(code='a', value=dc.format_author_date(dc.authors[0]))  # Can do better
+            ]
+        )
+        record.add_ordered_field(field100)
+    if num_auths > 1:
+        for auth in dc.authors[1:]:
+            field = pymarc.Field(
+                tag='700',
+                indicators=['1', ' '],
+                subfields=[
+                    Subfield(code='a', value=dc.format_author_date(auth)),
+                    Subfield(code='e', value='joint author.'),
+                ]
+            )
+            record.add_ordered_field(field)
+
+
+ # Add Subfield to 245 indicating format
+ 
+ 
+    for att in dc.book.attributes:
+      if att.fk_attriblist == 245:
+      
+          if '\n'in dc.title:
+
+           field245 = pymarc.Field(
+            tag='245',
+            indicators=['1', str(att.nonfiling)],
+            subfields=[
+               Subfield(code='a', value=dc.title_no_subtitle),
+               Subfield(code='h', value='[electronic resource] :'),
+               Subfield(code='b', value=re.sub(r'^[^\n]*\n', '', dc.title).replace('\n', ' ')),
+                      ]
+         )
+          else:
+        
+           for att in dc.book.attributes:
+            if att.fk_attriblist == 245:
+               
+             field245 = pymarc.Field(
+              tag='245',
+              indicators=['1', str(att.nonfiling)],
+              subfields=[
+               Subfield(code='a', value=dc.title_no_subtitle),
+               Subfield(code='h', value='[electronic resource]'),
+                      ]
+         )
+          record.add_ordered_field(field245)
+
+    # Publisher, date
+  for att in dc.book.attributes:
+    if att.fk_attriblist == 260:
+        field260 = Field(
+            tag='260',
+            indicators=[' ', ' '],
+            subfields=[
+                Subfield(code='a', value=f"{dc.pubinfo.place} :"),
+                Subfield(code='b', value=f"{dc.pubinfo.publisher},"),
+                Subfield(code='c', value=str(dc.pubinfo.years).replace('[(\'copyright\', \'', 'c').replace('\'), (\'pubdate\', \'', ', ').replace('\'), (\'copyright\', \'', ', c').replace('\')]', '.')),
+            ]
+        )
+    elif att.fk_attriblist == 906:
+        field260 = Field(
+            tag='260',
+            indicators=[' ', ' '],
+            subfields=[
+                Subfield(code='a', value='[S.l. :'),
+                Subfield(code='b', value='s.n.]'),
+                Subfield(code='c', value=str(att.text)),
+            ]
+        )
+    else:
+        field260 = Field(
+            tag='260',
+            indicators=[' ', ' '],
+            subfields=[
+                Subfield(code='a', value='[S.l. :'),
+                Subfield(code='b', value='s.n.]'),
+            ]
+        )
+    record.add_ordered_field(field260)
+
+    add_license(record, dc)
+
+    return record
+
+
+def add_license(record, dc):
+    if dc.rights:
+        # Add 540 field (terms governing use)
+        field540 = pymarc.Field(
+            tag='540',
+            indicators=[' ', ' '],
+            subfields=[
+                Subfield(code='a', value=dc.rights),
+            ]
+        )
+        record.add_ordered_field(field540)
+
+
+def add_subject(record, dc):
+    if dc.subjects:
+     field653 = pymarc.Field(
+    	tag='653', 
+    	indicators=[' ', ' '],
+    	subfields=[
+    	    Subfield(code='a', data=dc.subjects),
+    	    ]
+    	   )
+    record.add_ordered_field(field653)
+
+
+# Generate 100 records
+all_records = []  # Create a list to store all records
+for i in range(100):
+    booknums = list(range(1, 101))  # Replace with your actual book numbers
+    dc = DublinCoreObject()
+    dc.load_from_database(booknums[i])
+    record = stub(dc)
+    all_records.append(record)  # Append each record to the list
+
+# Write all records to one file
+with open("combined_output.txt100f", "w") as text_file:
+    for record in all_records:
+        text_file.write(str(record) + "\n")  # Separate records with a newline
+
+print("Combined records written to combined_output.txt")
+
+
+# Generate 100 records
+all_records = []  # Create a list to store all records
+for i in range(100):
+    booknums = list(range(68775, 69195))  # Replace with your actual book numbers
+    dc = DublinCoreObject()
+    dc.load_from_database(booknums[i])
+    record = stub(dc)
+    all_records.append(record)  # Append each record to the list
+
+# Write all records to one file
+with open("combined_output.txt69000f", "w") as text_file:
+    for record in all_records:
+        text_file.write(str(record) + "\n")  # Separate records with a newline
+
+print("Combined records written to combined_output.txt")
+
+
+all_records = []  # Create a list to store all records
+
+for i in range(100):
+    booknums = list(range(68995, 69195))  # Replace with your actual book numbers
+
+    dc = DublinCoreObject()
+    dc.load_from_database(booknums[i])
+
+    record = stub(dc)
+    all_records.append(record)  # Append each record to the list
+
+# Write all records to one MARC file
+with open("combined_output.mrc", "wb") as marc_file:
+    writer = MARCWriter(marc_file)
+    for record in all_records:
+        writer.write(record)
+    writer.close()
+
+print("Combined records written to combined_output.mrc")
+

From cc7cb7a7910d744fb3efa1753579601663fab56d Mon Sep 17 00:00:00 2001
From: Ray Schwartz <schwartzr2@wpunj.edu>
Date: Fri, 23 Aug 2024 22:01:41 -0400
Subject: [PATCH 2/3] updated file

---
 marc3h.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/marc3h.py b/marc3h.py
index 49ddbad..5fbec21 100644
--- a/marc3h.py
+++ b/marc3h.py
@@ -296,8 +296,8 @@ def stub(dc):
           record.add_ordered_field(field245)
 
     # Publisher, date
-  for att in dc.book.attributes:
-    if att.fk_attriblist == 260:
+    for att in dc.book.attributes:
+      if att.fk_attriblist == 260:
         field260 = Field(
             tag='260',
             indicators=[' ', ' '],
@@ -307,18 +307,11 @@ def stub(dc):
                 Subfield(code='c', value=str(dc.pubinfo.years).replace('[(\'copyright\', \'', 'c').replace('\'), (\'pubdate\', \'', ', ').replace('\'), (\'copyright\', \'', ', c').replace('\')]', '.')),
             ]
         )
-    elif att.fk_attriblist == 906:
-        field260 = Field(
-            tag='260',
-            indicators=[' ', ' '],
-            subfields=[
-                Subfield(code='a', value='[S.l. :'),
-                Subfield(code='b', value='s.n.]'),
-                Subfield(code='c', value=str(att.text)),
-            ]
-        )
+        record.add_ordered_field(field260)
+        break
+
     else:
-        field260 = Field(
+      field260 = Field(
             tag='260',
             indicators=[' ', ' '],
             subfields=[
@@ -326,7 +319,7 @@ def stub(dc):
                 Subfield(code='b', value='s.n.]'),
             ]
         )
-    record.add_ordered_field(field260)
+      record.add_ordered_field(field260)
 
     add_license(record, dc)
 

From d413eed86d10d03ac5a744d9bdf1274fa2172530 Mon Sep 17 00:00:00 2001
From: Ray Schwartz <schwartzr2@wpunj.edu>
Date: Sat, 24 Aug 2024 17:42:13 -0400
Subject: [PATCH 3/3] Another update.  Added 546 and 520 tags.

---
 marc3h.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/marc3h.py b/marc3h.py
index 5fbec21..4e0e58e 100644
--- a/marc3h.py
+++ b/marc3h.py
@@ -66,6 +66,19 @@ def stub(dc):
         record.add_ordered_field(field010)
 
 
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 20:
+    
+        field010 = pymarc.Field(
+            tag='020',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field020)
+
+
     field040 = pymarc.Field(
             tag='040',
             indicators=[' ', ' '],
@@ -201,6 +214,32 @@ def stub(dc):
                )
         record.add_ordered_field(field508)
 
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 520:
+    
+        field508 = pymarc.Field(
+            tag='520',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field520)
+
+    for att in dc.book.attributes:
+     if att.fk_attriblist == 546:
+    
+        field508 = pymarc.Field(
+            tag='546',
+            indicators=[' ', ' '],
+            subfields=[
+               Subfield(code='a', value=str(att.text)),
+               ]
+               )
+        record.add_ordered_field(field546)
+
+
+
     for subject in dc.subjects:
     
         field653 = pymarc.Field(