Skip to content

Commit 592aa08

Browse files
committed
Add fix variations of mf runner examples #598
Reuse all fix workflows from #654 and bring folders together as suggested by @blackwinter in #654 (comment) and in #662
1 parent 16a41b0 commit 592aa08

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+1780
-31
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//creates a beacon file based on a pica+ dump of the DNB CBS data.
2+
3+
default type = "ALL";
4+
default out = dump + "-" + type + ".beacon";
5+
default header = FLUX_DIR + "header.txt";
6+
7+
8+
//read header
9+
"reading header " + header | write("stdout");
10+
header|open-file|as-lines|@Y;
11+
12+
//count references
13+
"counting references in " + dump | write("stdout");
14+
15+
dump|
16+
open-file|
17+
as-lines|
18+
catch-object-exception|
19+
decode-pica|
20+
batch-log(batchsize="100000")|
21+
fix(FLUX_DIR + "extract.fix", *)|
22+
stream-to-triples(redirect="true")|
23+
sort-triples(by="subject")|
24+
collect-triples|
25+
fix(FLUX_DIR + "output.fix")|
26+
batch-log("merged ${totalRecords}", batchsize="100000")|
27+
stream-to-triples|
28+
template("${s}")|
29+
@Y;
30+
31+
@Y|
32+
wait-for-inputs("2")|
33+
write(out);
34+
35+
36+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# 002@ not repeatable
2+
3+
if any_match("[email protected]", "^Tp.*$")
4+
copy_field("[email protected]","ok")
5+
end
6+
7+
# DBSM:
8+
# (006U $0 “04p01*”) or (017A $a “yy”)
9+
if any_match("006U.0","04p01.*")
10+
add_field("@value","DBSM|ALL")
11+
elsif any_equal("017A.a","yy")
12+
add_field("@value","DBSM|ALL")
13+
14+
# DEA:
15+
# (001@ $a 2””) or (209A $f “Exilarchiv” or 209A $f “HB/EB”)
16+
elsif any_equal("[email protected]","2")
17+
add_field("@value","DEA|ALL")
18+
elsif any_equal("209A.f","HB/EB")
19+
add_field("@value","DEA|ALL")
20+
21+
# DMA:
22+
# (002@ $0 “G*” or 002@ $0 “M*”) or (006U $0 “10,P01*”)
23+
24+
elsif any_equal("[email protected]","^[GM].*")
25+
add_field("@value","DMA|ALL")
26+
27+
elsif any_equal("006U.0","^10,P01.*")
28+
add_field("@value","DMA|ALL")
29+
else
30+
add_field("@value","ALL")
31+
end
32+
33+
# Test if type variable fits
34+
35+
if any_contain("@value","$[type]")
36+
add_field("@value","$[type]")
37+
else
38+
remove_field("@value")
39+
end
40+
41+
do list(path: "041A*|028A*|029B*|028C*|028Q*|028P*|028F*|028M*|028D*|028E*", "var":"$i")
42+
trim("$i.9")
43+
to_var("$i.9","ref")
44+
if exists("$i.9")
45+
copy_field("@value","{to:$[ref]}refed")
46+
end
47+
end
48+
49+
retain("{to*","ok")
50+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
unless exists("refed")
2+
remove_field("ok","")
3+
end
4+
5+
unless exists("ok")
6+
remove_field("ok","")
7+
end
8+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
default fileName = FLUX_DIR + "gnd-sample.pica";
2+
3+
fileName|
4+
open-file|
5+
as-lines|
6+
decode-pica|
7+
fix(FLUX_DIR + "gnd-type.fix")|
8+
stream-to-triples|
9+
count-triples(countBy="object")|
10+
template("${s}\t${o}")|
11+
write("stdout");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
if any_match("[email protected]","...*")
2+
replace_all("[email protected]","^(..).*","$1") #only keep the first two letters
3+
retain("[email protected]") # only keep the relevent element
4+
else
5+
reject()
6+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A"
2+
copy_field("$i.9","relevantField.$append")
3+
end
4+
5+
trim("relevantField.*")
6+
uniq("relevantField")
7+
8+
retain("relevantField")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
default counts="myflux/counts.dat";
3+
default catalogue = FLUX_DIR + "10.pica";
4+
5+
//count references
6+
"counting references in " + catalogue | write("stdout");
7+
8+
catalogue|
9+
open-file|
10+
as-lines|
11+
catch-object-exception|
12+
decode-pica|
13+
fix(FLUX_DIR + "references.fix")|
14+
stream-to-triples|
15+
count-triples(countBy="object")|
16+
17+
write("subjects.dat");
18+
19+

metafacture-runner/src/main/dist/examples/count/subjects/references.flux renamed to metafacture-runner/src/main/dist/examples/count/subjects/references.morph.flux

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ decode-pica|
1313
morph(FLUX_DIR + "references.xml")|
1414
stream-to-triples|
1515
count-triples(countBy="object")|
16-
1716
write("subjects.dat");
1817

1918

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
if any_match("[email protected]","^Tp.*")
2+
if any_match("041R.a",".*[Aa][Rr][Zz][Tt].*")
3+
nothing()
4+
else
5+
reject()
6+
end
7+
else
8+
reject()
9+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// opens file 'fileName', interprets the content as pica and filters the results
2+
3+
default fileName = FLUX_DIR + "gnd-sample.pica";
4+
5+
fileName|
6+
open-file|
7+
as-lines|
8+
decode-pica|
9+
fix(FLUX_DIR + "filter.fix")| // Fix does not use the filter function but has its own filter mechanism within fix.
10+
encode-formeta(style="verbose")|
11+
write("stdout");

metafacture-runner/src/main/dist/examples/filter/morph/filter-morph.flux renamed to metafacture-runner/src/main/dist/examples/filter/filter.morph.flux

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ fileName|
66
open-file|
77
as-lines|
88
decode-pica|
9-
filter(FLUX_DIR + "filter-morph.xml")|
9+
filter(FLUX_DIR + "filter.xml")|
1010
encode-formeta(style="verbose")|
1111
write("stdout");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
default base = "";
2+
default dump = FLUX_DIR + "10.pica";
3+
default out = base + "Ts1-Tg1-without-crisscross.txt";
4+
5+
"counting references in " + dump | write("stdout");
6+
7+
dump|
8+
open-file|
9+
as-lines|
10+
catch-object-exception|
11+
decode-pica|
12+
batch-log(batchsize="100000")|
13+
fix(FLUX_DIR + "extract.fix")|
14+
stream-to-triples(redirect="true")|
15+
sort-triples(by="subject")|
16+
collect-triples|
17+
fix(FLUX_DIR + "output.fix")|
18+
batch-log(batchsize="100000")|
19+
encode-csv(noquotes="true",separator=";")|
20+
write(out);

0 commit comments

Comments
 (0)