From 592aa08c85990f972c010af6e07c1f08f5667ea0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 09:03:19 +0100 Subject: [PATCH 01/10] Add fix variations of mf runner examples #598 Reuse all fix workflows from #654 and bring folders together as suggested by @blackwinter in https://github.com/metafacture/metafacture-core/pull/654#issuecomment-2633506344 and in https://github.com/metafacture/metafacture-core/pull/662 --- .../examples/beacon/create/create.fix.flux | 36 ++ .../create/{create.flux => create.morph.flux} | 0 .../dist/examples/beacon/create/extract.fix | 50 +++ .../dist/examples/beacon/create/output.fix | 8 + .../count/gnd/count-gnd-types.fix.flux | 11 + ...-types.flux => count-gnd-types.morph.flux} | 0 .../main/dist/examples/count/gnd/gnd-type.fix | 6 + ...s.flux => metrics-gnd-subjects.morph.flux} | 0 .../examples/count/subjects/references.fix | 8 + .../count/subjects/references.fix.flux | 19 + ...{references.flux => references.morph.flux} | 1 - .../src/main/dist/examples/filter/filter.fix | 9 + .../main/dist/examples/filter/filter.fix.flux | 11 + .../filter-morph.flux => filter.morph.flux} | 2 +- .../{morph/filter-morph.xml => filter.xml} | 0 .../filter/{morph => }/gnd-sample.pica | 0 .../Ts1-Tg1-without-crisscross.fix.flux | 20 + ... => Ts1-Tg1-without-crisscross.morph.flux} | 0 .../gnd/crisscross-connections/extract.fix | 216 +++++++++++ .../gnd/crisscross-connections/output.fix | 38 ++ .../gnd/gnd-and-beacons/tp2index.fix.flux | 38 ++ .../{tp2index.flux => tp2index.morph.flux} | 6 +- .../examples/gnd/gnd-and-beacons/tp2json.fix | 52 +++ .../gnd/gnd-and-beacons/tp2json.fix.flux | 33 ++ .../{tp2json.flux => tp2json.morph.flux} | 0 .../main/dist/examples/gnd/json/tp2json.fix | 45 +++ .../dist/examples/gnd/json/tp2json.fix.flux | 10 + .../json/{tp2json.flux => tp2json.morph.flux} | 0 .../examples/gnd/references/format-gnd.fix | 193 ++++++++++ .../gnd/references/format-gnd.fix.flux | 15 + ...{format-gnd.flux => format-gnd.morph.flux} | 0 .../examples/gnd/references/merge.fix.flux | 23 ++ .../{merge.flux => merge.morph.flux} | 0 .../dist/examples/gnd/references/output.fix | 60 +++ .../examples/gnd/references/references.fix | 8 + .../gnd/references/references.fix.flux | 23 ++ ...{references.flux => references.morph.flux} | 0 .../references2/count-gnd-references.fix.flux | 21 ++ ...s.flux => count-gnd-references.morph.flux} | 0 .../dist/examples/gnd/references2/extract.fix | 216 +++++++++++ .../dist/examples/gnd/references2/output.fix | 38 ++ .../examples/marc21-to-edm/MARC21-EDM.fix | 345 ++++++++++++++++++ .../marc21-to-edm/MARC21-EDM.fix.flux | 19 + ...{MARC21-EDM.flux => MARC21-EDM.morph.flux} | 0 .../examples/morph/marc21/morph-marc21.xml | 15 - .../examples/morph/morph-marcxml-online.flux | 10 - .../dist/examples/sort/gnd-pref-label.fix | 129 +++++++ .../main/dist/examples/sort/sort-gnd.fix.flux | 11 + .../{sort-gnd.flux => sort-gnd.morph.flux} | 0 .../{morph => transform}/marc21/10.marc21 | 0 .../dist/examples/transform/marc21/marc21.fix | 7 + .../examples/transform/marc21/marc21.fix.flux | 10 + .../marc21/marc21.morph.flux} | 0 .../dist/examples/transform/marc21/marc21.xml | 13 + .../pica-to-marcxml/nonNormalized.pica | 0 .../pica-to-marcxml/pica-to-marcxml.fix | 21 ++ .../pica-to-marcxml/pica-to-marcxml.fix.flux | 13 + .../pica-to-marcxml.morph.flux} | 2 +- .../pica-to-marcxml/pica-to-marcxml.xml} | 0 59 files changed, 1780 insertions(+), 31 deletions(-) create mode 100644 metafacture-runner/src/main/dist/examples/beacon/create/create.fix.flux rename metafacture-runner/src/main/dist/examples/beacon/create/{create.flux => create.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/beacon/create/extract.fix create mode 100644 metafacture-runner/src/main/dist/examples/beacon/create/output.fix create mode 100644 metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.fix.flux rename metafacture-runner/src/main/dist/examples/count/gnd/{count-gnd-types.flux => count-gnd-types.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix rename metafacture-runner/src/main/dist/examples/count/metrics/{metrics-gnd-subjects.flux => metrics-gnd-subjects.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/count/subjects/references.fix create mode 100644 metafacture-runner/src/main/dist/examples/count/subjects/references.fix.flux rename metafacture-runner/src/main/dist/examples/count/subjects/{references.flux => references.morph.flux} (99%) create mode 100644 metafacture-runner/src/main/dist/examples/filter/filter.fix create mode 100644 metafacture-runner/src/main/dist/examples/filter/filter.fix.flux rename metafacture-runner/src/main/dist/examples/filter/{morph/filter-morph.flux => filter.morph.flux} (85%) rename metafacture-runner/src/main/dist/examples/filter/{morph/filter-morph.xml => filter.xml} (100%) rename metafacture-runner/src/main/dist/examples/filter/{morph => }/gnd-sample.pica (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/{Ts1-Tg1-without-crisscross.flux => Ts1-Tg1-without-crisscross.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/extract.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/output.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/{tp2index.flux => tp2index.morph.flux} (85%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/{tp2json.flux => tp2json.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/json/{tp2json.flux => tp2json.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/references/{format-gnd.flux => format-gnd.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references/merge.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/references/{merge.flux => merge.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references/output.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references/references.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references/references.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/references/{references.flux => references.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.fix.flux rename metafacture-runner/src/main/dist/examples/gnd/references2/{count-gnd-references.flux => count-gnd-references.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references2/extract.fix create mode 100644 metafacture-runner/src/main/dist/examples/gnd/references2/output.fix create mode 100644 metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix create mode 100644 metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix.flux rename metafacture-runner/src/main/dist/examples/marc21-to-edm/{MARC21-EDM.flux => MARC21-EDM.morph.flux} (100%) delete mode 100644 metafacture-runner/src/main/dist/examples/morph/marc21/morph-marc21.xml delete mode 100644 metafacture-runner/src/main/dist/examples/morph/morph-marcxml-online.flux create mode 100644 metafacture-runner/src/main/dist/examples/sort/gnd-pref-label.fix create mode 100644 metafacture-runner/src/main/dist/examples/sort/sort-gnd.fix.flux rename metafacture-runner/src/main/dist/examples/sort/{sort-gnd.flux => sort-gnd.morph.flux} (100%) rename metafacture-runner/src/main/dist/examples/{morph => transform}/marc21/10.marc21 (100%) create mode 100644 metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix create mode 100644 metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix.flux rename metafacture-runner/src/main/dist/examples/{morph/marc21/morph-marc21.flux => transform/marc21/marc21.morph.flux} (100%) create mode 100644 metafacture-runner/src/main/dist/examples/transform/marc21/marc21.xml rename metafacture-runner/src/main/dist/examples/{morph => transform}/pica-to-marcxml/nonNormalized.pica (100%) create mode 100644 metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix create mode 100644 metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux rename metafacture-runner/src/main/dist/examples/{morph/pica-to-marcxml/morph-pica-to-marcxml.flux => transform/pica-to-marcxml/pica-to-marcxml.morph.flux} (90%) rename metafacture-runner/src/main/dist/examples/{morph/pica-to-marcxml/morph-pica-to-marcxml.xml => transform/pica-to-marcxml/pica-to-marcxml.xml} (100%) diff --git a/metafacture-runner/src/main/dist/examples/beacon/create/create.fix.flux b/metafacture-runner/src/main/dist/examples/beacon/create/create.fix.flux new file mode 100644 index 000000000..29c0660aa --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/beacon/create/create.fix.flux @@ -0,0 +1,36 @@ +//creates a beacon file based on a pica+ dump of the DNB CBS data. + +default type = "ALL"; +default out = dump + "-" + type + ".beacon"; +default header = FLUX_DIR + "header.txt"; + + +//read header +"reading header " + header | write("stdout"); +header|open-file|as-lines|@Y; + +//count references +"counting references in " + dump | write("stdout"); + +dump| +open-file| +as-lines| +catch-object-exception| +decode-pica| +batch-log(batchsize="100000")| +fix(FLUX_DIR + "extract.fix", *)| +stream-to-triples(redirect="true")| +sort-triples(by="subject")| +collect-triples| +fix(FLUX_DIR + "output.fix")| +batch-log("merged ${totalRecords}", batchsize="100000")| +stream-to-triples| +template("${s}")| +@Y; + +@Y| +wait-for-inputs("2")| +write(out); + + + diff --git a/metafacture-runner/src/main/dist/examples/beacon/create/create.flux b/metafacture-runner/src/main/dist/examples/beacon/create/create.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/beacon/create/create.flux rename to metafacture-runner/src/main/dist/examples/beacon/create/create.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/beacon/create/extract.fix b/metafacture-runner/src/main/dist/examples/beacon/create/extract.fix new file mode 100644 index 000000000..1256b344c --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/beacon/create/extract.fix @@ -0,0 +1,50 @@ +# 002@ not repeatable + +if any_match("002@.0", "^Tp.*$") + copy_field("002@.0","ok") +end + +# DBSM: +# (006U $0 “04p01*”) or (017A $a “yy”) +if any_match("006U.0","04p01.*") + add_field("@value","DBSM|ALL") +elsif any_equal("017A.a","yy") + add_field("@value","DBSM|ALL") + +# DEA: +# (001@ $a 2””) or (209A $f “Exilarchiv” or 209A $f “HB/EB”) +elsif any_equal("001@.a","2") + add_field("@value","DEA|ALL") +elsif any_equal("209A.f","HB/EB") + add_field("@value","DEA|ALL") + +# DMA: +# (002@ $0 “G*” or 002@ $0 “M*”) or (006U $0 “10,P01*”) + +elsif any_equal("002@.0","^[GM].*") + add_field("@value","DMA|ALL") + +elsif any_equal("006U.0","^10,P01.*") + add_field("@value","DMA|ALL") +else + add_field("@value","ALL") +end + +# Test if type variable fits + +if any_contain("@value","$[type]") + add_field("@value","$[type]") +else + remove_field("@value") +end + +do list(path: "041A*|028A*|029B*|028C*|028Q*|028P*|028F*|028M*|028D*|028E*", "var":"$i") + trim("$i.9") + to_var("$i.9","ref") + if exists("$i.9") + copy_field("@value","{to:$[ref]}refed") + end +end + +retain("{to*","ok") + diff --git a/metafacture-runner/src/main/dist/examples/beacon/create/output.fix b/metafacture-runner/src/main/dist/examples/beacon/create/output.fix new file mode 100644 index 000000000..38bbaadf3 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/beacon/create/output.fix @@ -0,0 +1,8 @@ +unless exists("refed") + remove_field("ok","") +end + +unless exists("ok") + remove_field("ok","") +end + diff --git a/metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.fix.flux b/metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.fix.flux new file mode 100644 index 000000000..a493302de --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.fix.flux @@ -0,0 +1,11 @@ +default fileName = FLUX_DIR + "gnd-sample.pica"; + +fileName| +open-file| +as-lines| +decode-pica| +fix(FLUX_DIR + "gnd-type.fix")| +stream-to-triples| +count-triples(countBy="object")| +template("${s}\t${o}")| +write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.flux b/metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.flux rename to metafacture-runner/src/main/dist/examples/count/gnd/count-gnd-types.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix b/metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix new file mode 100644 index 000000000..6bc35d2e6 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix @@ -0,0 +1,6 @@ +if any_match("002@.0","...*") + replace_all("002@.0","^(..).*","$1") #only keep the first two letters + retain("002@.0") # only keep the relevent element +else + reject() +end diff --git a/metafacture-runner/src/main/dist/examples/count/metrics/metrics-gnd-subjects.flux b/metafacture-runner/src/main/dist/examples/count/metrics/metrics-gnd-subjects.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/count/metrics/metrics-gnd-subjects.flux rename to metafacture-runner/src/main/dist/examples/count/metrics/metrics-gnd-subjects.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/count/subjects/references.fix b/metafacture-runner/src/main/dist/examples/count/subjects/references.fix new file mode 100644 index 000000000..b819df067 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/count/subjects/references.fix @@ -0,0 +1,8 @@ +do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A" + copy_field("$i.9","relevantField.$append") +end + +trim("relevantField.*") +uniq("relevantField") + +retain("relevantField") diff --git a/metafacture-runner/src/main/dist/examples/count/subjects/references.fix.flux b/metafacture-runner/src/main/dist/examples/count/subjects/references.fix.flux new file mode 100644 index 000000000..a6277f31f --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/count/subjects/references.fix.flux @@ -0,0 +1,19 @@ + +default counts="myflux/counts.dat"; +default catalogue = FLUX_DIR + "10.pica"; + +//count references +"counting references in " + catalogue | write("stdout"); + +catalogue| +open-file| +as-lines| +catch-object-exception| +decode-pica| +fix(FLUX_DIR + "references.fix")| +stream-to-triples| +count-triples(countBy="object")| + +write("subjects.dat"); + + diff --git a/metafacture-runner/src/main/dist/examples/count/subjects/references.flux b/metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux similarity index 99% rename from metafacture-runner/src/main/dist/examples/count/subjects/references.flux rename to metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux index be70c4075..ccc5fc719 100644 --- a/metafacture-runner/src/main/dist/examples/count/subjects/references.flux +++ b/metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux @@ -13,7 +13,6 @@ decode-pica| morph(FLUX_DIR + "references.xml")| stream-to-triples| count-triples(countBy="object")| - write("subjects.dat"); diff --git a/metafacture-runner/src/main/dist/examples/filter/filter.fix b/metafacture-runner/src/main/dist/examples/filter/filter.fix new file mode 100644 index 000000000..5f023c675 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/filter/filter.fix @@ -0,0 +1,9 @@ +if any_match("002@.0","^Tp.*") + if any_match("041R.a",".*[Aa][Rr][Zz][Tt].*") + nothing() + else + reject() + end +else + reject() +end diff --git a/metafacture-runner/src/main/dist/examples/filter/filter.fix.flux b/metafacture-runner/src/main/dist/examples/filter/filter.fix.flux new file mode 100644 index 000000000..08f7d81f2 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/filter/filter.fix.flux @@ -0,0 +1,11 @@ +// opens file 'fileName', interprets the content as pica and filters the results + +default fileName = FLUX_DIR + "gnd-sample.pica"; + +fileName| +open-file| +as-lines| +decode-pica| +fix(FLUX_DIR + "filter.fix")| // Fix does not use the filter function but has its own filter mechanism within fix. +encode-formeta(style="verbose")| +write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/filter/morph/filter-morph.flux b/metafacture-runner/src/main/dist/examples/filter/filter.morph.flux similarity index 85% rename from metafacture-runner/src/main/dist/examples/filter/morph/filter-morph.flux rename to metafacture-runner/src/main/dist/examples/filter/filter.morph.flux index f91ff4e9b..20e6d630a 100644 --- a/metafacture-runner/src/main/dist/examples/filter/morph/filter-morph.flux +++ b/metafacture-runner/src/main/dist/examples/filter/filter.morph.flux @@ -6,6 +6,6 @@ fileName| open-file| as-lines| decode-pica| -filter(FLUX_DIR + "filter-morph.xml")| +filter(FLUX_DIR + "filter.xml")| encode-formeta(style="verbose")| write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/filter/morph/filter-morph.xml b/metafacture-runner/src/main/dist/examples/filter/filter.xml similarity index 100% rename from metafacture-runner/src/main/dist/examples/filter/morph/filter-morph.xml rename to metafacture-runner/src/main/dist/examples/filter/filter.xml diff --git a/metafacture-runner/src/main/dist/examples/filter/morph/gnd-sample.pica b/metafacture-runner/src/main/dist/examples/filter/gnd-sample.pica similarity index 100% rename from metafacture-runner/src/main/dist/examples/filter/morph/gnd-sample.pica rename to metafacture-runner/src/main/dist/examples/filter/gnd-sample.pica diff --git a/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.fix.flux new file mode 100644 index 000000000..0a37efb16 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.fix.flux @@ -0,0 +1,20 @@ +default base = ""; +default dump = FLUX_DIR + "10.pica"; +default out = base + "Ts1-Tg1-without-crisscross.txt"; + +"counting references in " + dump | write("stdout"); + +dump| +open-file| +as-lines| +catch-object-exception| +decode-pica| +batch-log(batchsize="100000")| +fix(FLUX_DIR + "extract.fix")| +stream-to-triples(redirect="true")| +sort-triples(by="subject")| +collect-triples| +fix(FLUX_DIR + "output.fix")| +batch-log(batchsize="100000")| +encode-csv(noquotes="true",separator=";")| +write(out); diff --git a/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.flux b/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.flux rename to metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/Ts1-Tg1-without-crisscross.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/extract.fix b/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/extract.fix new file mode 100644 index 000000000..d43c27077 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/extract.fix @@ -0,0 +1,216 @@ +do put_macro("gndPersonCombinedLabel") # in contrast to morph this is not normalizing utf 8 + paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"") + replace_all("$[field].@combinedLabel",", $","") + if exists("$[field].c") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c") + end + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") +end + +do put_macro("gndCorporationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","b") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "~ / ", "$[field].b", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndConferenceCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","f") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].d","$[field].add.$append") + copy_field("$[field].c","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].b", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + + +do put_macro("gndGeolocationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","g") + copy_field("$[field].z","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].x", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndSubjectCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","s") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndWorkCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","u") + copy_field("$[field].f","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].p") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].p", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +copy_field("002@.0","@type") +substring("@type","1","1") + +# pn +do list(path:"028A","var":"PERSONNAME") + call_macro("gndPersonCombinedLabel",field:"PERSONNAME",out:"@name") +end +do list(path:"028@","var":"PERSONNAME") + call_macro("gndPersonCombinedLabel",field:"PERSONNAME",out:"@syn.$append") +end + +do list(path:"029A","var":"CORPORATIONNAME") + call_macro("gndCorporationCombinedLabel",field:"CORPORATIONNAME",out:"@name") +end +do list(path:"029@","var":"CORPORATIONNAME") + call_macro("gndCorporationCombinedLabel",field:"CORPORATIONNAME",out:"@syn.$append") +end + +#f +do list(path:"030A","var":"CONFERENCENAME") + call_macro("gndConferenceCombinedLabel",field:"CONFERENCENAME",out:"@name") +end +do list(path:"030@","var":"CONFERENCENAME") + call_macro("gndConferenceCombinedLabel",field:"CONFERENCENAME",out:"@syn.$append") +end + +#s +do list(path:"065A","var":"GEONAME") + call_macro("gndGeolocationCombinedLabel",field:"GEONAME",out:"@name") +end +do list(path:"065@","var":"GEONAME") + call_macro("gndGeolocationCombinedLabel",field:"GEONAME",out:"@syn.$append") +end + +# g +do list(path:"041A","var":"SUBJECTNAME") + call_macro("gndSubjectCombinedLabel",field:"SUBJECTNAME",out:"@name") +end +do list(path:"041@","var":"SUBJECTNAME") + call_macro("gndSubjectCombinedLabel",field:"SUBJECTNAME",out:"@syn.$append") +end + +#u +do list(path:"022A","var":"WORKNAME") + call_macro("gndWorkCombinedLabel",field:"WORKNAME",out:"@name") +end +do list(path:"022@","var":"WORKNAME") + call_macro("gndWorkCombinedLabel",field:"WORKNAME",out:"@syn.$append") +end + +# copy_field("008A.a","teilbest") + +do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A" + trim("$i.9") + to_var("$i.9","ref") + if exists("$i.9") + add_field("{to:$[ref]}refed","") + end +end + +copy_field("@name","gnd.name") +copy_field("002@.0","gnd.katlevel") +substring("gnd.katlevel","2","1") +copy_field("002@.0","gnd.satzart") +substring("gnd.satzart","0","2") + +# copy_field("003@.0","gnd.id") +copy_field("007K.0","gnd.nid") +do list(path:"042A", "var":"$i") + do list(path:"$i.a","var":"$a") + copy_field("$a","gnd.sys.$append") + end +end +replace_all("gnd.sys.*",";",",") +join_field("gnd.sys"," | ") +unless exists("gnd.sys") + add_field("gnd.sys","") +end + +do list(path:"004B", "var":"$i") + do list(path:"$i.a","var":"$a") + copy_field("$a","gnd.ent.$append") + end +end +join_field("gnd.ent"," | ") +unless exists("gnd.ent") + add_field("gnd.ent","") +end + + +copy_field("@syn","gnd.syn") +join_field("gnd.syn"," | ") +unless exists("gnd.syn") + add_field("gnd.syn","") +end + +unless any_match("004B.a", "gib|gio|giw") + copy_field("004B.a","filter1") +end + +if any_match("002@.0", "Ts1|Tg1") + copy_field("002@.0","filter2") +end + +unless exists("037G.c") + add_field("filter3","ok") +end + + +retain("{to*","gnd","filter*","@*") diff --git a/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/output.fix b/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/output.fix new file mode 100644 index 000000000..a90bf5520 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/output.fix @@ -0,0 +1,38 @@ +unless exists("refed") + reject() +end + +unless exists("gnd.nid") + reject() +end + +if is_array("refed") + copy_field("refed","count") +else + copy_field("refed","count.$append") +end + +count("count") + +copy_field("_id","id") + + +copy_field("gnd.nid","nid") +copy_field("gnd.satzart","satzart") +copy_field("gnd.katlevel","katlevel") +copy_field("gnd.name","name") +copy_field("gnd.sys","sys") +copy_field("gnd.ent","ent") +copy_field("gnd.syn","syn") + +retain("count", + "id", + "nid", + "katlevel", + "satzart", + "name", + "sys", + "ent", + "syn" +) + diff --git a/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.fix.flux new file mode 100644 index 000000000..211bf09e6 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.fix.flux @@ -0,0 +1,38 @@ +// REQUIRES THE METAFACTURE-SEARCH PLUGIN + +default gnd = FLUX_DIR + "Tp-200.pica.gz"; +default beaconDir = FLUX_DIR + "beacons"; + +"reading beacons from " + beaconDir | write("stdout"); + +beaconDir| +read-dir| +log-object| +catch-object-exception| +open-file| +read-beacon(metadatafilter="name|description|institution")| +stream-to-triples| +@X; + +"reading GND dump from " + gnd | write("stdout"); + +gnd| +open-file| +as-lines| +object-batch-log(batchSize="100000")| +decode-pica| +stream-to-triples| +@X; + +@X| +sort-triples(by="subject")| +collect-triples| +fix( + "unless any_match('002@.0', '.p.*') # Accept only Tp records + reject() + end" +)| +encode-formeta| +write("stdout"); +//stream-to-index(FLUX_DIR + "id.xml", indexpath="Tp_ix"); + diff --git a/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.flux b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.morph.flux similarity index 85% rename from metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.flux rename to metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.morph.flux index 0512db493..0a80968e2 100644 --- a/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.flux +++ b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2index.morph.flux @@ -28,7 +28,7 @@ stream-to-triples| sort-triples(by="subject")| collect-triples| filter(FLUX_DIR + "filter.xml")| -//encode-formeta| -//write("stdout"); -stream-to-index(FLUX_DIR + "id.xml", indexpath="Tp_ix"); +encode-formeta| +write("stdout"); +//stream-to-index(FLUX_DIR + "id.xml", indexpath="Tp_ix"); diff --git a/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix new file mode 100644 index 000000000..cbad4a7ee --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix @@ -0,0 +1,52 @@ + +# +unless any_match("002@.0", ".p.*") + reject() +end + +do put_macro("gndPersonCombinedLabel") # in contrast to morph this is not normalizing utf 8 + paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"") + replace_all("$[field].@combinedLabel",", $","") + if exists("$[field].c") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c") + end + copy_field("$[field].n","$[field].pre.$append") + copy_field("$[field].l","$[field].pre.$append") + copy_field("$[field].g","$[field].pre.$append") + join_field("$[field].pre",", ") + replace_all("$[field].pre","^(.*)$"," <$1>") + if exists("$[field].pre") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].pre", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") +end + + +copy_field("_id","id") +add_field("type","person") +add_field("thumbnail.link","http://art-eater.com/wp-content/uploads/2012/08/ecce_mono.jpg") +add_field("thumbnail.caption","Fresco by Cecilia Giménez (2012)") + +move_field("seeAlso","seeAlso[]") + +call_macro("gndPersonCombinedLabel", field:"028A", out:"person.name") + +copy_field("050G.b","person.description") + +do list(path:"028@","var": "VARNAME") + call_macro("gndPersonCombinedLabel", field:"VARNAME", out:"person.variantname[].$append") +end + +copy_field("0032Aa.a","person.birth") +replace_all("person.birth"," ","") +replace_all("person.birth","^((\\d+?))-","$1") +replace_all("person.death"," ","") +replace_all("person.death","-((\\d+?))-$","$1") + +do list(path:"032Fa","var":"OCCUPATION") + do list(path:"OCCUPATION.a","var":"$a") + copy_field("$a","person.occupation[].$append") + end +end + +retain("id","type","thumbnail","seeAlso[]","person") diff --git a/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix.flux new file mode 100644 index 000000000..5566cf5ae --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.fix.flux @@ -0,0 +1,33 @@ +default gnd = FLUX_DIR + "Tp-200.pica.gz"; +default beaconDir = FLUX_DIR + "beacons"; +default out = "stdout"; + + +"reading beacons from " + beaconDir | write("stdout"); + +beaconDir| +read-dir| +log-object| +catch-object-exception| +open-file| +read-beacon(metadatafilter="name|dedcription")| +stream-to-triples| +@X; + + +"reading GND dump from " + gnd | write("stdout"); + +gnd| +open-file| +as-lines| +object-batch-log(batchSize="100000")| +decode-pica| +stream-to-triples| +@X; + +@X| +sort-triples(by="subject")| +collect-triples| +fix(FLUX_DIR + "tp2json.fix")| // Fix does not use the filter function but has its own filter mechanism within fix. +encode-json| +write(out); diff --git a/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.flux b/metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.flux rename to metafacture-runner/src/main/dist/examples/gnd/gnd-and-beacons/tp2json.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix new file mode 100644 index 000000000..b4a931618 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix @@ -0,0 +1,45 @@ +do put_macro("gndPersonCombinedLabel") # in contrast to morph this is not normalizing utf 8 + paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"") + replace_all("$[field].@combinedLabel",", $","") + if exists("$[field].c") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c") + end + copy_field("$[field].n","$[field].pre.$append") + copy_field("$[field].l","$[field].pre.$append") + copy_field("$[field].g","$[field].pre.$append") + join_field("$[field].pre",", ") + replace_all("$[field].pre","^(.*)$"," <$1>") + if exists("$[field].pre") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].pre", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") +end + + +copy_field("_id","id") +add_field("type","person") +add_field("thumbnail.link","http://art-eater.com/wp-content/uploads/2012/08/ecce_mono.jpg") +add_field("thumbnail.caption","Fresco by Cecilia Giménez (2012)") + + +call_macro("gndPersonCombinedLabel", field:"028A", out:"person.name") + +copy_field("050G.b","person.description") + +do list(path:"028@","var": "VARNAME") + call_macro("gndPersonCombinedLabel", field:"VARNAME", out:"person.variantname[].$append") +end + +copy_field("0032Aa.a","person.birth") +replace_all("person.birth"," ","") +replace_all("person.birth","^((\\d+?))-","$1") +replace_all("person.death"," ","") +replace_all("person.death","-((\\d+?))-$","$1") + +do list(path:"032Fa","var":"OCCUPATION") + do list(path:"OCCUPATION.a","var":"$a") + copy_field("$a","person.occupation[].$append") + end +end + +retain("id","type","thumbnail","person") diff --git a/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux new file mode 100644 index 000000000..70659b0f9 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux @@ -0,0 +1,10 @@ +default gnd = FLUX_DIR + "Tp-200.pica.gz"; + +gnd| +open-file| +as-lines| +decode-pica(normalizeutf8="true", normalizedserialization="true")| +fix(FLUX_DIR + "tp2json.fix")| +normalize-unicode-stream| +encode-json| +write(FLUX_DIR + "test.txt"); diff --git a/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.flux b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/gnd/json/tp2json.flux rename to metafacture-runner/src/main/dist/examples/gnd/json/tp2json.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix b/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix new file mode 100644 index 000000000..7f584c092 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix @@ -0,0 +1,193 @@ +do put_macro("gndPersonCombinedLabel") # in contrast to morph this is not normalizing utf 8 + paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"") + replace_all("$[field].@combinedLabel",", $","") + if exists("$[field].c") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c") + end + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") +end + +do put_macro("gndCorporationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","b") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "~ / ", "$[field].b", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndConferenceCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","f") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].d","$[field].add.$append") + copy_field("$[field].c","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].b", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + + +do put_macro("gndGeolocationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","g") + copy_field("$[field].z","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].x", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndSubjectCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","s") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndWorkCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","u") + copy_field("$[field].f","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].p") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].p", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +copy_field("002@.0","@type") +substring("@type","1","1") + + +copy_field("008A.a","teilbest") + +copy_field("002@.0","satzart") +substring("satzart","0","2") +copy_field("002@.0","katlevel") +substring("katlevel","2","1") + +# copy_field("003@.0","id") + + +copy_field("007K.0","nid") + +do list(path:"042A", "var":"$i") + do list(path:"$i.a","var":"$a") + copy_field("$a","sysnummer.$append") + end +end +join_field("sysnummer","; ") + + +do list(path:"004B", "var":"$i") + do list(path:"$i.a","var":"$a") + copy_field("$a","entcode.$append") + end +end +join_field("entcode"," | ") +unless exists("entcode") + add_field("entcode","") +end + +# pn +do list(path:"028A","var":"PERSONNAME") + call_macro("gndPersonCombinedLabel",field:"PERSONNAME",out:"name") +end +do list(path:"028@","var":"PERSONNAME") + call_macro("gndPersonCombinedLabel",field:"PERSONNAME",out:"@syn.$append") +end + +do list(path:"029A","var":"CORPORATIONNAME") + call_macro("gndCorporationCombinedLabel",field:"CORPORATIONNAME",out:"name") +end +do list(path:"029@","var":"CORPORATIONNAME") + call_macro("gndCorporationCombinedLabel",field:"CORPORATIONNAME",out:"@syn.$append") +end + +#f +do list(path:"030A","var":"CONFERENCENAME") + call_macro("gndConferenceCombinedLabel",field:"CONFERENCENAME",out:"name") +end +do list(path:"030@","var":"CONFERENCENAME") + call_macro("gndConferenceCombinedLabel",field:"CONFERENCENAME",out:"@syn.$append") +end + +#s +do list(path:"065A","var":"GEONAME") + call_macro("gndGeolocationCombinedLabel",field:"GEONAME",out:"name") +end +do list(path:"065@","var":"GEONAME") + call_macro("gndGeolocationCombinedLabel",field:"GEONAME",out:"@syn.$append") +end + +# g +do list(path:"041A","var":"SUBJECTNAME") + call_macro("gndSubjectCombinedLabel",field:"SUBJECTNAME",out:"name") +end +do list(path:"041@","var":"SUBJECTNAME") + call_macro("gndSubjectCombinedLabel",field:"SUBJECTNAME",out:"@syn.$append") +end + +#u +do list(path:"022A","var":"WORKNAME") + call_macro("gndWorkCombinedLabel",field:"WORKNAME",out:"name") +end +do list(path:"022@","var":"WORKNAME") + call_macro("gndWorkCombinedLabel",field:"WORKNAME",out:"@syn.$append") +end + +copy_field("@syn","syn") +join_field("syn"," | ") + + +vacuum() +retain("name","syn","nid","entcode","teilbest", "satzart","katlevel","sysnummer") diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix.flux new file mode 100644 index 000000000..2e54ce0a1 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.fix.flux @@ -0,0 +1,15 @@ + +default base = ""; +default gndsimple = base + "gnd-simplified.dat"; + +gnd| +open-file| +as-lines| +decode-pica| +fix(FLUX_DIR + "format-gnd.fix")| +encode-formeta(style="concise")| +write(gndsimple); + + + + diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.flux b/metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.flux rename to metafacture-runner/src/main/dist/examples/gnd/references/format-gnd.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/merge.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/references/merge.fix.flux new file mode 100644 index 000000000..5ace5778b --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references/merge.fix.flux @@ -0,0 +1,23 @@ + +default base = ""; +default counts= base + "counts.dat"; +default gndsimple = base + "gnd-simplified.dat"; +default out = base + "gnd-references.csv"; + +//merge and output +"megring information" | write("stdout"); + +counts + "," + gndsimple| +decode-string(",")| +open-file| +as-lines| +decode-formeta| +batch-log("records read: ${totalRecords}",batchsize="100000")| +stream-to-triples| +decouple| +sort-triples(by="subject")| +collect-triples| +fix(FLUX_DIR + "output.fix")| +batch-log(batchsize="100000")| +encode-csv(separator=";",noQuotes="true")| +write(out); diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/merge.flux b/metafacture-runner/src/main/dist/examples/gnd/references/merge.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/gnd/references/merge.flux rename to metafacture-runner/src/main/dist/examples/gnd/references/merge.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/output.fix b/metafacture-runner/src/main/dist/examples/gnd/references/output.fix new file mode 100644 index 000000000..0fb5aeebe --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references/output.fix @@ -0,0 +1,60 @@ +unless exists("name") + reject() +end + +# Undo preexisting order for setting order for CSV. +move_field("count","@count") +move_field("nid","@nid") +move_field("katlevel","@katlevel") +move_field("satzart","@satzart") +move_field("name","@name") +move_field("sysnummer","@sysnummer") +move_field("entcode","@entcode") +move_field("syn","@syn") + + +if exists("@count") + copy_field("@count","count") +else + add_field("count","0") +end + +copy_field("_id","id") + + +copy_field("@nid","nid") +copy_field("@satzart","satzart") +copy_field("@katlevel","katlevel") + +if exists("@name") + copy_field("@name","name") +else + add_field("name","") +end +if exists("@sysnummer") + copy_field("@sysnummer","sys") + replace_all("sys",";",",") +else + add_field("sys","") +end +if exists("@entcode") + copy_field("@entcode","ent") +else + add_field("ent","") +end +if exists("@syn") + copy_field("@syn","syn") +else + add_field("syn","") +end + +retain("count", + "id", + "nid", + "katlevel", + "satzart", + "name", + "sys", + "ent", + "syn" +) diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/references.fix b/metafacture-runner/src/main/dist/examples/gnd/references/references.fix new file mode 100644 index 000000000..b819df067 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references/references.fix @@ -0,0 +1,8 @@ +do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A" + copy_field("$i.9","relevantField.$append") +end + +trim("relevantField.*") +uniq("relevantField") + +retain("relevantField") diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/references.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/references/references.fix.flux new file mode 100644 index 000000000..4654a0d8b --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references/references.fix.flux @@ -0,0 +1,23 @@ + +default base = ""; +default counts= base + "counts.dat"; +default dump = FLUX_DIR + "10.pica"; + +//count references +"counting references in " + dump | write("stdout"); + +dump| +open-file| +as-lines| +catch-object-exception| +decode-pica| +batch-log(batchsize="100000")| +fix(FLUX_DIR + "references.fix")| +stream-to-triples| +decouple| +count-triples(countBy="object")| +collect-triples| +encode-formeta(style="concise")| +write(counts); + + diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/references.flux b/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/gnd/references/references.flux rename to metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.fix.flux new file mode 100644 index 000000000..741f121c2 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.fix.flux @@ -0,0 +1,21 @@ + +default base = ""; +default dump = FLUX_DIR + "10.pica"; +default out = base + "gnd-references.txt"; + +"counting references in " + dump | write("stdout"); + +dump| +open-file| +as-lines| +catch-object-exception| +decode-pica| +batch-log(batchsize="100000")| +fix(FLUX_DIR + "extract.fix")| +stream-to-triples(redirect="true")| +sort-triples(by="subject")| +collect-triples| +fix(FLUX_DIR + "output.fix")| +batch-log(batchsize="100000")| +encode-csv(noquotes="true",separator=";")| +write(out); diff --git a/metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.flux b/metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.flux rename to metafacture-runner/src/main/dist/examples/gnd/references2/count-gnd-references.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/gnd/references2/extract.fix b/metafacture-runner/src/main/dist/examples/gnd/references2/extract.fix new file mode 100644 index 000000000..d43c27077 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references2/extract.fix @@ -0,0 +1,216 @@ +do put_macro("gndPersonCombinedLabel") # in contrast to morph this is not normalizing utf 8 + paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"") + replace_all("$[field].@combinedLabel",", $","") + if exists("$[field].c") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c") + end + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") +end + +do put_macro("gndCorporationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","b") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "~ / ", "$[field].b", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndConferenceCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","f") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].d","$[field].add.$append") + copy_field("$[field].c","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].b", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + + +do put_macro("gndGeolocationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","g") + copy_field("$[field].z","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].x", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndSubjectCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","s") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndWorkCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","u") + copy_field("$[field].f","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].p") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].p", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +copy_field("002@.0","@type") +substring("@type","1","1") + +# pn +do list(path:"028A","var":"PERSONNAME") + call_macro("gndPersonCombinedLabel",field:"PERSONNAME",out:"@name") +end +do list(path:"028@","var":"PERSONNAME") + call_macro("gndPersonCombinedLabel",field:"PERSONNAME",out:"@syn.$append") +end + +do list(path:"029A","var":"CORPORATIONNAME") + call_macro("gndCorporationCombinedLabel",field:"CORPORATIONNAME",out:"@name") +end +do list(path:"029@","var":"CORPORATIONNAME") + call_macro("gndCorporationCombinedLabel",field:"CORPORATIONNAME",out:"@syn.$append") +end + +#f +do list(path:"030A","var":"CONFERENCENAME") + call_macro("gndConferenceCombinedLabel",field:"CONFERENCENAME",out:"@name") +end +do list(path:"030@","var":"CONFERENCENAME") + call_macro("gndConferenceCombinedLabel",field:"CONFERENCENAME",out:"@syn.$append") +end + +#s +do list(path:"065A","var":"GEONAME") + call_macro("gndGeolocationCombinedLabel",field:"GEONAME",out:"@name") +end +do list(path:"065@","var":"GEONAME") + call_macro("gndGeolocationCombinedLabel",field:"GEONAME",out:"@syn.$append") +end + +# g +do list(path:"041A","var":"SUBJECTNAME") + call_macro("gndSubjectCombinedLabel",field:"SUBJECTNAME",out:"@name") +end +do list(path:"041@","var":"SUBJECTNAME") + call_macro("gndSubjectCombinedLabel",field:"SUBJECTNAME",out:"@syn.$append") +end + +#u +do list(path:"022A","var":"WORKNAME") + call_macro("gndWorkCombinedLabel",field:"WORKNAME",out:"@name") +end +do list(path:"022@","var":"WORKNAME") + call_macro("gndWorkCombinedLabel",field:"WORKNAME",out:"@syn.$append") +end + +# copy_field("008A.a","teilbest") + +do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A" + trim("$i.9") + to_var("$i.9","ref") + if exists("$i.9") + add_field("{to:$[ref]}refed","") + end +end + +copy_field("@name","gnd.name") +copy_field("002@.0","gnd.katlevel") +substring("gnd.katlevel","2","1") +copy_field("002@.0","gnd.satzart") +substring("gnd.satzart","0","2") + +# copy_field("003@.0","gnd.id") +copy_field("007K.0","gnd.nid") +do list(path:"042A", "var":"$i") + do list(path:"$i.a","var":"$a") + copy_field("$a","gnd.sys.$append") + end +end +replace_all("gnd.sys.*",";",",") +join_field("gnd.sys"," | ") +unless exists("gnd.sys") + add_field("gnd.sys","") +end + +do list(path:"004B", "var":"$i") + do list(path:"$i.a","var":"$a") + copy_field("$a","gnd.ent.$append") + end +end +join_field("gnd.ent"," | ") +unless exists("gnd.ent") + add_field("gnd.ent","") +end + + +copy_field("@syn","gnd.syn") +join_field("gnd.syn"," | ") +unless exists("gnd.syn") + add_field("gnd.syn","") +end + +unless any_match("004B.a", "gib|gio|giw") + copy_field("004B.a","filter1") +end + +if any_match("002@.0", "Ts1|Tg1") + copy_field("002@.0","filter2") +end + +unless exists("037G.c") + add_field("filter3","ok") +end + + +retain("{to*","gnd","filter*","@*") diff --git a/metafacture-runner/src/main/dist/examples/gnd/references2/output.fix b/metafacture-runner/src/main/dist/examples/gnd/references2/output.fix new file mode 100644 index 000000000..a90bf5520 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/gnd/references2/output.fix @@ -0,0 +1,38 @@ +unless exists("refed") + reject() +end + +unless exists("gnd.nid") + reject() +end + +if is_array("refed") + copy_field("refed","count") +else + copy_field("refed","count.$append") +end + +count("count") + +copy_field("_id","id") + + +copy_field("gnd.nid","nid") +copy_field("gnd.satzart","satzart") +copy_field("gnd.katlevel","katlevel") +copy_field("gnd.name","name") +copy_field("gnd.sys","sys") +copy_field("gnd.ent","ent") +copy_field("gnd.syn","syn") + +retain("count", + "id", + "nid", + "katlevel", + "satzart", + "name", + "sys", + "ent", + "syn" +) + diff --git a/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix new file mode 100644 index 000000000..b6b428ef5 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix @@ -0,0 +1,345 @@ +# Hint: This is a translated version of the morph transformation in metafacture-runner/src/main/dist/examples/morph/marc21-to-edm/MARC21-EDM.xml +# Result is the same. The fix transformation could be adjusted to be even more efficient. + +do once("maps") + put_map("rights", + "DE-101": "http://www.europeana.eu/rights/rr-f/" + ) + put_map("isils", + "DE-101": "Deutsche Nationalbibliothek" + ) + put_map("urls", + "DE-101": "http://d-nb.info/" + ) + put_map("suffix", + "__default":"" + ) + put_map("mediatype", + "__default": "unknown", + "mediatype_003" : "Text" + ) + + put_filemap("$[FLUX_DIR]timevoc.txt", "timevoc", sep_char:"\t") + + put_var("dnb_subject","") +end + +copy_field("leader","@leader6") +substring("@leader6","6","1") +copy_field("leader","@leader7") +substring("@leader7","7","1") +copy_field("leader","@leader19") +substring("@leader19","19","1") + +copy_field("041? .a","@lang") + +paste("aggregation_id","~http://www.dnb.de/","001", join_char:"") + +if exists("003") + copy_field("003", "@isil") +else + copy_field("040","@isil") +end + + +copy_field("@isil", "edm:dataProvider$de") +lookup("edm:dataProvider$de","isils") + + +paste("#edm:dataProvider", "~http://ld.zdb-services.de/data/organisations/", "@isil", join_char:"") + + +add_field("edm:provider", "Deutsche Digitale Bibliothek") + + +if any_equal("@leader7", "m") + if any_match("@leader19","[ b]") + add_field("@mat","Monografie") + end +end + +if any_equal("@leader7", "s") + add_field("@mat","fortlaufendes Sammelwerk") +end + + +if any_match("@leader19","a") + add_field("@mat","Mehrbändiges Werk") +end + + +if any_match("@leader19","c") + add_field("@mat","Band") +end + +copy_field("@isil","@isShownByUrl") +lookup("@isShownByUrl","urls") + +copy_field("@isil","@isShownBySuffix") +lookup("@isShownBySuffix","suffix") + +paste("edm:isShownAt","@isShownByUrl","001","@isShownBySuffix", join_char:"") + +# +# + +do list(path:"650?7|60017|61017|61017|630??|655?7|651?7|648??","var":"SUBJECT") + do list(path:"SUBJECT.0","var":"SUBJECT_ID") + if any_match("SUBJECT_ID","^\\(DE-588\\)(.*)$") + copy_field("SUBJECT.a","skos:Concept.$append.skos:prefLabel$de") + copy_field("SUBJECT_ID","skos:Concept.$last.~rdf:about") + replace_all("skos:Concept.$last.~rdf:about","^\\(DE-588\\)(.*)$","http://d-nb.info/gnd/$1") + end + end +end + +do list(path:"2603 ","var":"$i") + do list(path:"$i.b","var":"$b") + unless exists("@publisherID") + copy_field("2603 .b", "260bcount.$append") + copy_field("@publisherList","@publisherCount") + if is_array("@publisherount") + count("@publisherCount") + else + add_field("@publisherCount","1") + end + paste("@publisherID", "~#/agent/","001","~-","@publisherCount", join_char:"") + end + end +end + + +paste("@orgID","~http://ld.zdb-services.de/data/organisations/","@isil",join_char:"") + +unless str_equal("$[sector]","") + add_field("@sectorID","http://ddb.vocnet.org/sparte/sparte$[sector]") +end + +replace_all("@sectorID","sec_","") + +copy_field("@sectorID","edm:Agent.$append.#rdf:type") +copy_field("@orgID","edm:Agent.$last.~rdf:about") +copy_field("@isil","edm:Agent.$last.skos:prefLabel$de") +lookup("edm:Agent.$last.skos:prefLabel$de","isils") + +copy_field("@sectorID", "skos:Concept.$append.~rdf:about") +unless str_equal("$[sector]","") + add_field("skos:Concept.$last.skos:notation","$[sector]") +end + +paste("@cEventID", "~#/event/", "001", "~-c", join_char:"") + +paste("@pEventID", "~#/event/", "001", "~-p", join_char:"") + + +copy_field("@publisherID","edm:Agent.$append.~rdf:about") +do list(path:"2603 .b","var":"$i") + unless exists("edm:Agent.$last.skos:prefLabel$de") + copy_field("$i","edm:Agent.$last.skos:prefLabel$de") + copy_field("@pEventID","edm:Agent.$last.#edm:wasPresent") + end +end + +do list(path:"2603 ","var":"$i") + do list(path:"$i.a","var":"$a") + unless exists("@place") + copy_field("$a","@place") + copy_field("$a","@placeList.$append") + copy_field("@placeList","@placeCount") + if is_array("@placeCount") + count("@placeCount") + else + add_field("@placeCount","1") + end + end + end +end +replace_all("@place"," \\[?u.a.\\]?.*$","") + +if exists("@place") + paste("@placeID","~#/place/","001","~_place","@placeCount",join_char:"") +end + +do list(path:"260[ 3] ","var":"$i") + do list(path:"$i.c","var":"$c") + copy_field("$c","@timespanList.$append") + end +end + +if exists("@timespanList") + unless exists("@timespanList.2") + copy_field("@timespanList.1","@timespan") + paste("@timeID","~#/timespan/","001","~_timespan1",join_char:"") + end +end + +add_field("@mediatypeID","$[media_type]") +lookup("@mediatypeID","mediatype") +prepend("@mediatypeID","http://ddb.vocnet.org/medientyp/") + +copy_field("@isil","@recordIDUrl") +lookup("@recordIDUrl","urls") + +copy_field("@isil","@recordIDSuffix") +lookup("@recordIDSuffix","suffix") + +paste("@recordID","@recordIDUrl","001","@recordIDSuffix", join_char:"") + + +do list(path:"1001 |1101 |7001 |7101", "var":"CREATOR") + do list(path: "CREATOR.0", "var":"CREATORID") + if any_match("CREATORID", "^\\(DE-588\\)(.*)$") + copy_field("CREATORID","edm:Agent.$append.~rdf:about") + replace_all("edm:Agent.$last.~rdf:about", "^\\(DE-588\\)(.*)$","http://d-nb.info/gnd/$1") + copy_field("CREATOR.a","edm:Agent.$last.skos:prefLabel$de") + copy_field("@cEventID","edm:Agent.$last.#edm:wasPresent") + end + end +end + + +copy_field("@cEventID","edm:Event.$append.~rdf:about") +add_field("edm:Event.$last.edm:hasType", "http://terminology.lido-schema.org/lido00012") +do list(path:"edm:Agent","var":"AGENT") + if any_match("AGENT.~rdf:about", "http://d-nb.info/gnd/.*") + copy_field("AGENT.~rdf:about", "edm:Event.$last.#crm:P11_had_participant.$append") + end +end + +copy_field("@pEventID","edm:Event.$append.~rdf:about") +add_field("edm:Event.$last.edm:hasType", "http://terminology.lido-schema.org/lido00228") +copy_field("@publisherID", "edm:Event.$last.#crm:P11_had_participant.$append") +copy_field("@timeID", "edm:Event.$last.#edm:occuredAt") +copy_field("@placeID", "edm:Event.$last.#edm:happenedAt") + + + + +copy_field("@placeID","edm:Place.~rdf:about") +copy_field("@place","edm:Place.skos:prefLabel") + + +if exists("@timeID") + copy_field("@timeID","edm:TimeSpan.~rdf:about") + do list(path:"2603 ","var":"$i") + copy_field("$i.c","edm:TimeSpan.skos:notation") + end +end + + + + +copy_field("@mediatypeID","skos:Concept.$append.~rdf:about") +unless str_equal("$[media_type]","") + add_field("skos:Concept.$last.skos:notation","$[media_type]") +end + + +copy_field("@lang","dcterms:LinguisticSystem.rdf:value") +paste("dcterms:LinguisticSystem.~rdf:about","~http://id.loc.gov/vocabulary/iso639-2/","@lang", join_char:"") + + +copy_field("@isil","edm:rights") +lookup("edm:rights","rights") + +add_field("#dcterms:rights","http://creativecommons.org/publicdomain/zero/1.0/") + +copy_field("@recordID","edm:ProvidedCHO.~rdf:about") + +copy_field("@cEventID","edm:ProvidedCHO.#edm:wasPresent.$append") +copy_field("@pEventID","edm:ProvidedCHO.#edm:wasPresent.$append") + +copy_field("@mat","edm:ProvidedCHO.dc:type") + +add_field("edm:ProvidedCHO.edm:type","TEXT") + +copy_field("2603 .b","edm:ProvidedCHO.dc:publisher") + +unless any_equal("300??.a","Online-Ressource") + copy_field("300??.a","edm:ProvidedCHO.dcterms:extent") +end + + +copy_field("@lang","edm:ProvidedCHO.dc:language") + +paste("edm:ProvidedCHO.#dcterms:language","~http://id.loc.gov/vocabulary/iso639-2/","@lang",join_char:"") + + +if exists("490??") + paste("edm:ProvidedCHO.dcterms:bibliographicCitation","490??.a","~ ; ","490??.v", join_char:"") +end + +copy_field("1[10]01 .a","edm:ProvidedCHO.dc:creator.$append") + +copy_field("7[10]01 .a","edm:ProvidedCHO.dc:contributor.$append") + + +do list(path:"830??|800??|810??|811??","var":"$i") + do list(path:"$i.w","var":"$j") + if any_match("$j","^\\(DE-101\\)(.*)$") + copy_field("$j","edm:ProvidedCHO.#dcterms:isPartOf.$append") + end + end +end + +do list(path:"650?7|60017|61017|61017|630??|655?7|651?7|648??","var":"$SUBJECT") + copy_field("$SUBJECT.a","edm:ProvidedCHO.dc:subject.$append") + do list(path:"$SUBJECT.0","var":"$SUBJECT_ID") + if any_match("$SUBJECT_ID","^\\(DE-588\\)(.*)$") + copy_field("$SUBJECT_ID","edm:ProvidedCHO.#dc:subject.$append") + replace_all("edm:ProvidedCHO.#dc:subject.$last","^\\(DE-588\\)(.*)$","http://d-nb.info/gnd/$1") + end + end +end + +replace_all("edm:ProvidedCHO.#dcterms:isPartOf.*","^\\(DE-101\\)(.*)$","http://d-nb.info/$1") + + +paste("edm:ProvidedCHO.dc:title","24510.a","24510.b",join_char:" : ") + +if exists("@timeID") + copy_field("260[ 3] .c","edm:ProvidedCHO.dcterms:issued") +end + +copy_field("250 .a","edm:ProvidedCHO.dc:description.$append") +copy_field("502??.a","edm:ProvidedCHO.dc:description.$append") + +do list(path:"245 ?|490 ?|800 ?|810 ?|811 ?|830 ?","var":"$i") + copy_field("$i.[nv]", "@position") +end + +unless exists("@position") + add_field("@position","-1") +end + +paste("edm:ProvidedCHO.ddb:techinfo","~aggregation_entity:false; position:", "@position",join_char:"") + +unless str_equal("$[dnb_subject]","") + add_field("edm:ProvidedCHO.#dc:subject", "$[dnb_subject]") + add_field("@dnbSubjectID","$[dnb_subject]") +end + +do list(path:"85640","var":"$i") + if any_match("$i.x","^Langzeitarchivierung.*") + copy_field("$i.u","@urn.~rdf:about") + elsif any_equal("$i.x","Resolving-System") + copy_field("$i.u","@urn.~rdf:about") + end + + if any_match("$i.x","^Langzeitarchivierung.*") + copy_field("$i.u","edm:WebResource.$append.~rdf:about") + elsif any_equal("$i.x","Resolving-System") + copy_field("$i.u","edm:WebResource.$append.~rdf:about") + end + copy_field("@mediatypeID","edm:WebResource.$last.dc:type") + add_field("edm:WebResource.$last.dcterms:rights", "http://www.deutsche-digitale-bibliothek.de/lizenzen/rv-fz/") +end + +retain("edm*","dc*","skos*", "aggregation_id", "#*") diff --git a/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix.flux b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix.flux new file mode 100644 index 000000000..7b338684a --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix.flux @@ -0,0 +1,19 @@ +default out = "stdout"; +default file = FLUX_DIR + "Test_DNB_Mono.xml"; +default id = "1025374754"; +default sector = "sec_002"; +default media_type = "mediatype_003"; + +file| +open-file| +//"1025374754"| +//id| +//template("https://portal.dnb.de/opac.htm?method=requestMarcXml&idn=${o}")| +//open-http| +decode-xml| +handle-marcxml| +fix(FLUX_DIR + "MARC21-EDM.fix", *)| +add-oreaggregation| +rdf-macros(referenceMarker="#")| +encode-xml(roottag="rdf:RDF", recordtag="", namespacefile= FLUX_DIR+"edm-namespaces.properties")| +write(out); diff --git a/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.flux b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.flux rename to metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/morph/marc21/morph-marc21.xml b/metafacture-runner/src/main/dist/examples/morph/marc21/morph-marc21.xml deleted file mode 100644 index 82762f5da..000000000 --- a/metafacture-runner/src/main/dist/examples/morph/marc21/morph-marc21.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - diff --git a/metafacture-runner/src/main/dist/examples/morph/morph-marcxml-online.flux b/metafacture-runner/src/main/dist/examples/morph/morph-marcxml-online.flux deleted file mode 100644 index 6bddef2c0..000000000 --- a/metafacture-runner/src/main/dist/examples/morph/morph-marcxml-online.flux +++ /dev/null @@ -1,10 +0,0 @@ -default idn = "1021191485"; - -idn| -template("https://portal.dnb.de/opac.htm?method=requestMarcXml&idn=${o}")| -open-http| -decode-xml| -handle-marcxml| -morph(morphDef)| -encode-formeta(style="multiline")| -write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/sort/gnd-pref-label.fix b/metafacture-runner/src/main/dist/examples/sort/gnd-pref-label.fix new file mode 100644 index 000000000..7c790df32 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/sort/gnd-pref-label.fix @@ -0,0 +1,129 @@ +do put_macro("gndPersonCombinedLabel") # in contrast to morph this is not normalizing utf 8 + paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"") + replace_all("$[field].@combinedLabel",", $","") + if exists("$[field].c") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c") + end + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") +end + +do put_macro("gndCorporationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","b") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].l","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "~ / ", "$[field].b", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndConferenceCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","f") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + copy_field("$[field].d","$[field].add.$append") + copy_field("$[field].c","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].b", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + + +do put_macro("gndGeolocationCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","g") + copy_field("$[field].z","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].b") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].x", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndSubjectCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","s") + copy_field("$[field].g","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + copy_field("$[field].a","$[field].@combinedLabel") + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +do put_macro("gndWorkCombinedLabel") # in contrast to morph this is not normalizing utf 8 + if any_equal("@type","u") + copy_field("$[field].f","$[field].add.$append") + copy_field("$[field].g","$[field].add.$append") + copy_field("$[field].n","$[field].add.$append") + join_field("$[field].add",", ") + replace_all("$[field].add","^(.*)$"," <$1>") + if exists("$[field].p") + paste("$[field].@combinedLabel", "$[field].a", "~ / ", "$[field].p", join_char:"") + else + copy_field("$[field].a","$[field].@combinedLabel") + end + if exists("$[field].add") + paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"") + end + copy_field("$[field].@combinedLabel", "$[out]") + end +end + +copy_field("002@.0","@type") +substring("@type","1","1") + +do list(path:"028A","var":"PERSONNAME") + call_macro("gndPersonCombinedLabel",field:"PERSONNAME",out:"name") +end +do list(path:"029A","var":"CORPORATIONNAME") + call_macro("gndCorporationCombinedLabel",field:"CORPORATIONNAME",out:"name") +end +do list(path:"030A","var":"CONFERENCENAME") + call_macro("gndConferenceCombinedLabel",field:"CONFERENCENAME",out:"name") +end +do list(path:"065A","var":"GEONAME") + call_macro("gndGeolocationCombinedLabel",field:"GEONAME",out:"name") +end +do list(path:"041A","var":"SUBJECTNAME") + call_macro("gndSubjectCombinedLabel",field:"SUBJECTNAME",out:"name") +end +do list(path:"022A","var":"WORKNAME") + call_macro("gndWorkCombinedLabel",field:"WORKNAME",out:"name") +end + +retain("name") diff --git a/metafacture-runner/src/main/dist/examples/sort/sort-gnd.fix.flux b/metafacture-runner/src/main/dist/examples/sort/sort-gnd.fix.flux new file mode 100644 index 000000000..398904295 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/sort/sort-gnd.fix.flux @@ -0,0 +1,11 @@ +default fileName = FLUX_DIR + "gnd-sample.pica"; + +fileName| +open-file| +as-lines| +decode-pica| +fix(FLUX_DIR + "gnd-pref-label.fix")| +stream-to-triples| +sort-triples(by="object")| +template("${s}\t${o}")| +write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/sort/sort-gnd.flux b/metafacture-runner/src/main/dist/examples/sort/sort-gnd.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/sort/sort-gnd.flux rename to metafacture-runner/src/main/dist/examples/sort/sort-gnd.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/morph/marc21/10.marc21 b/metafacture-runner/src/main/dist/examples/transform/marc21/10.marc21 similarity index 100% rename from metafacture-runner/src/main/dist/examples/morph/marc21/10.marc21 rename to metafacture-runner/src/main/dist/examples/transform/marc21/10.marc21 diff --git a/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix b/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix new file mode 100644 index 000000000..035e8182d --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix @@ -0,0 +1,7 @@ +if exists("24510.b") + paste("dc:title","24510.a","~:","24510.b") +else + copy_field("24510.a","dc:title") +end + +retain("dc:title") diff --git a/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix.flux b/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix.flux new file mode 100644 index 000000000..f0a3f2a2d --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.fix.flux @@ -0,0 +1,10 @@ +default file = FLUX_DIR + "10.marc21"; + +file| +open-file| +as-lines| +decode-marc21| +fix(FLUX_DIR + "morph-marc21.fix")| +stream-to-triples| +template("${o}")| +write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/morph/marc21/morph-marc21.flux b/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.morph.flux similarity index 100% rename from metafacture-runner/src/main/dist/examples/morph/marc21/morph-marc21.flux rename to metafacture-runner/src/main/dist/examples/transform/marc21/marc21.morph.flux diff --git a/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.xml b/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.xml new file mode 100644 index 000000000..5cddc7eaa --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/transform/marc21/marc21.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/metafacture-runner/src/main/dist/examples/morph/pica-to-marcxml/nonNormalized.pica b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/nonNormalized.pica similarity index 100% rename from metafacture-runner/src/main/dist/examples/morph/pica-to-marcxml/nonNormalized.pica rename to metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/nonNormalized.pica diff --git a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix new file mode 100644 index 000000000..81883f354 --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix @@ -0,0 +1,21 @@ +# maps PICA+ to marc21Xml --> +# setting the encoding scheme is mandatory. This sets it to utf8. --> +add_field("leader.characterCodingScheme","a") +# directly invoke "data" creats "controlfields" --> + +if exists("003@.0") + copy_field("003@.0","001") +else + add_field("001","no_id") +end +# entities triggers creation of "datafields" following the pattern: --> +# 'entity.name="$fieldname$ind1$ind2"' => '' --> +# and 'source.name="$subfield" => --> + +do list(path:"044N","var":"$004N") + do list(path:"$004N.a","var":"$a") + copy_field("$a","650 0.$append.a") + end +end + +retain("leader","001","650??") diff --git a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux new file mode 100644 index 000000000..de475895d --- /dev/null +++ b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux @@ -0,0 +1,13 @@ +// opens file 'fileName', interprets the content as non normalized serialized +// pica+, encode as marc, morphs to marcxml and writes to standard out + +default fileName = FLUX_DIR + "nonNormalized.pica"; + +fileName| +open-file| +as-lines| +lines-to-records| +decode-pica(normalizedSerialization="false", ignoreMissingIdn="true")| +fix(FLUX_DIR + "pica-to-marcxml.fix")| +encode-marcxml(ensurecorrectmarc21xml="true")| +write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/morph/pica-to-marcxml/morph-pica-to-marcxml.flux b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.morph.flux similarity index 90% rename from metafacture-runner/src/main/dist/examples/morph/pica-to-marcxml/morph-pica-to-marcxml.flux rename to metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.morph.flux index 1d9d18a2f..ec5ac0c7c 100644 --- a/metafacture-runner/src/main/dist/examples/morph/pica-to-marcxml/morph-pica-to-marcxml.flux +++ b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.morph.flux @@ -8,7 +8,7 @@ open-file| as-lines| lines-to-records| decode-pica(normalizedSerialization="false", ignoreMissingIdn="true")| -morph(FLUX_DIR + "morph-pica-to-marcxml.xml")| +morph(FLUX_DIR + "pica-to-marcxml.xml")| encode-marc21| decode-marc21(emitLeaderAsWhole="true", ignoreMissingId="true")| encode-marcxml| diff --git a/metafacture-runner/src/main/dist/examples/morph/pica-to-marcxml/morph-pica-to-marcxml.xml b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.xml similarity index 100% rename from metafacture-runner/src/main/dist/examples/morph/pica-to-marcxml/morph-pica-to-marcxml.xml rename to metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.xml From 926037e2e78d2d8820d4b1fe2736f159680413b9 Mon Sep 17 00:00:00 2001 From: TobiasNx <61879957+TobiasNx@users.noreply.github.com> Date: Wed, 5 Feb 2025 11:07:49 +0100 Subject: [PATCH 02/10] Adjust commet as suggested by reviewer #651 by @blackwinter Co-authored-by: Jens Wille --- .../src/main/dist/examples/gnd/references/references.fix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/references.fix b/metafacture-runner/src/main/dist/examples/gnd/references/references.fix index b819df067..cf73f9ed4 100644 --- a/metafacture-runner/src/main/dist/examples/gnd/references/references.fix +++ b/metafacture-runner/src/main/dist/examples/gnd/references/references.fix @@ -1,4 +1,4 @@ -do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A" +do list(path:"041A*|041A","var":"$i") # Until issue #651 is fixed one has to add "041A" copy_field("$i.9","relevantField.$append") end From f11ddb99c8a2193118aff298013c033f51834e69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 15:57:01 +0100 Subject: [PATCH 03/10] Delete empty line #598 --- .../src/main/dist/examples/gnd/references/references.morph.flux | 1 - 1 file changed, 1 deletion(-) diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux b/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux index bcdccb6e7..76868a4be 100644 --- a/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux +++ b/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux @@ -20,4 +20,3 @@ collect-triples| encode-formeta(style="concise")| write(counts); - From a5f6eec225d3d0da0aaf8723144c98679e9cb1aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 16:06:26 +0100 Subject: [PATCH 04/10] Use generic transform #589 --- .../examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux | 2 +- .../transform/pica-to-marcxml/pica-to-marcxml.morph.flux | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux index de475895d..05d38a498 100644 --- a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux +++ b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux @@ -1,5 +1,5 @@ // opens file 'fileName', interprets the content as non normalized serialized -// pica+, encode as marc, morphs to marcxml and writes to standard out +// pica+, encode as marc, transforms to marcxml and writes to standard out default fileName = FLUX_DIR + "nonNormalized.pica"; diff --git a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.morph.flux b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.morph.flux index ec5ac0c7c..8afc92e2f 100644 --- a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.morph.flux +++ b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.morph.flux @@ -1,5 +1,5 @@ // opens file 'fileName', interprets the content as non normalized serialized -// pica+, encode as marc, morphs to marcxml and writes to standard out +// pica+, encode as marc, transforms to marcxml and writes to standard out default fileName = FLUX_DIR + "nonNormalized.pica"; From d9aec253a0623e3c35cd8a0f7521c3d3803b0148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 16:27:35 +0100 Subject: [PATCH 05/10] Adjust fix transformation to match the morph result #589 --- .../src/main/dist/examples/count/gnd/gnd-type.fix | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix b/metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix index 6bc35d2e6..7f6db18b7 100644 --- a/metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix +++ b/metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix @@ -1,6 +1,3 @@ -if any_match("002@.0","...*") - replace_all("002@.0","^(..).*","$1") #only keep the first two letters - retain("002@.0") # only keep the relevent element -else - reject() -end +replace_all("002@.0","^.(.).*","$1") #only keep the second letter +copy_field("002@.0","") +retain("") # only keep the relevent element From 6077d6f22215d5bd3cf1e6c06fa3981119c77358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 16:53:08 +0100 Subject: [PATCH 06/10] Improve mapping for 490 #589 --- .../dist/examples/marc21-to-edm/MARC21-EDM.fix | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix index b6b428ef5..a7080956a 100644 --- a/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix +++ b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix @@ -273,7 +273,19 @@ paste("edm:ProvidedCHO.#dcterms:language","~http://id.loc.gov/vocabulary/iso639- if exists("490??") - paste("edm:ProvidedCHO.dcterms:bibliographicCitation","490??.a","~ ; ","490??.v", join_char:"") + do list(path:"490??","var":"$i") + do list(path:"$i.a", "var":"$j") + copy_field("$j", "$i.label.$append") + end + join_field("$i.label", " / ") + + do list(path:"$i.v", "var":"$j") + copy_field("$j", "$i.numbering.$append") + end + join_field("$i.numbering") + prepend("$i.numbering"," ; ") + paste("edm:ProvidedCHO.dcterms:bibliographicCitation","$i.label","$i.numbering", join_char:"") + end end copy_field("1[10]01 .a","edm:ProvidedCHO.dc:creator.$append") From 44d5471a2bc1c5b9ef12b5ff041aa21d4c6fdaf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 17:02:55 +0100 Subject: [PATCH 07/10] Delete unnecessary comment #589 --- .../src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix | 3 --- 1 file changed, 3 deletions(-) diff --git a/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix index a7080956a..c286e7e92 100644 --- a/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix +++ b/metafacture-runner/src/main/dist/examples/marc21-to-edm/MARC21-EDM.fix @@ -1,6 +1,3 @@ -# Hint: This is a translated version of the morph transformation in metafacture-runner/src/main/dist/examples/morph/marc21-to-edm/MARC21-EDM.xml -# Result is the same. The fix transformation could be adjusted to be even more efficient. - do once("maps") put_map("rights", "DE-101": "http://www.europeana.eu/rights/rr-f/" From 77e15f31b047b2a133e11718d88888052620f5b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 17:45:04 +0100 Subject: [PATCH 08/10] Adjust fix transformation to match the morph result #589 --- .../examples/transform/pica-to-marcxml/pica-to-marcxml.fix | 7 ++----- .../transform/pica-to-marcxml/pica-to-marcxml.fix.flux | 4 +++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix index 81883f354..27bdfc173 100644 --- a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix +++ b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix @@ -1,13 +1,10 @@ # maps PICA+ to marc21Xml --> # setting the encoding scheme is mandatory. This sets it to utf8. --> add_field("leader.characterCodingScheme","a") + # directly invoke "data" creats "controlfields" --> +copy_field("003@.0","001") -if exists("003@.0") - copy_field("003@.0","001") -else - add_field("001","no_id") -end # entities triggers creation of "datafields" following the pattern: --> # 'entity.name="$fieldname$ind1$ind2"' => '' --> # and 'source.name="$subfield" => --> diff --git a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux index 05d38a498..894007e37 100644 --- a/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux +++ b/metafacture-runner/src/main/dist/examples/transform/pica-to-marcxml/pica-to-marcxml.fix.flux @@ -9,5 +9,7 @@ as-lines| lines-to-records| decode-pica(normalizedSerialization="false", ignoreMissingIdn="true")| fix(FLUX_DIR + "pica-to-marcxml.fix")| -encode-marcxml(ensurecorrectmarc21xml="true")| +encode-marc21| +decode-marc21(emitLeaderAsWhole="true", ignoreMissingId="true")| +encode-marcxml| write("stdout"); From 2fa6dbc1ae79aa6fd720ff7d57f12d23b78097d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Wed, 5 Feb 2025 18:30:53 +0100 Subject: [PATCH 09/10] Adjust fix flux workflow to match the morph workflow #589 --- .../src/main/dist/examples/gnd/json/tp2json.fix.flux | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux index 70659b0f9..2147f4c4e 100644 --- a/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux +++ b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux @@ -3,8 +3,7 @@ default gnd = FLUX_DIR + "Tp-200.pica.gz"; gnd| open-file| as-lines| -decode-pica(normalizeutf8="true", normalizedserialization="true")| +decode-pica| fix(FLUX_DIR + "tp2json.fix")| -normalize-unicode-stream| encode-json| write(FLUX_DIR + "test.txt"); From b0c99c033cf03759b3e32d36ededb12a4267435d Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Thu, 6 Feb 2025 12:43:45 +0100 Subject: [PATCH 10/10] Align Flux workflow examples so that both variants match as closely as possible. (#663) --- .../src/main/dist/examples/count/subjects/references.morph.flux | 1 + .../src/main/dist/examples/gnd/json/tp2json.fix.flux | 2 +- .../src/main/dist/examples/gnd/references/references.morph.flux | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux b/metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux index ccc5fc719..be70c4075 100644 --- a/metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux +++ b/metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux @@ -13,6 +13,7 @@ decode-pica| morph(FLUX_DIR + "references.xml")| stream-to-triples| count-triples(countBy="object")| + write("subjects.dat"); diff --git a/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux index 2147f4c4e..8fd064317 100644 --- a/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux +++ b/metafacture-runner/src/main/dist/examples/gnd/json/tp2json.fix.flux @@ -6,4 +6,4 @@ as-lines| decode-pica| fix(FLUX_DIR + "tp2json.fix")| encode-json| -write(FLUX_DIR + "test.txt"); +write("stdout"); diff --git a/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux b/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux index 76868a4be..bcdccb6e7 100644 --- a/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux +++ b/metafacture-runner/src/main/dist/examples/gnd/references/references.morph.flux @@ -20,3 +20,4 @@ collect-triples| encode-formeta(style="concise")| write(counts); +