Skip to content

Commit a2463d3

Browse files
committed
Adjust formatting
Reset formatting to old morph style and some changes in the old fluxes and adjust formatting in fix files.
1 parent 32bfc99 commit a2463d3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+775
-158
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//creates a beacon file based on a pica+ dump of the DNB CBS data.
2+
3+
default type = "ALL";
4+
default out = dump + "-" + type + ".beacon";
5+
default header = FLUX_DIR + "header.txt";
6+
7+
8+
//read header
9+
"reading header " + header | write("stdout");
10+
header|open-file|as-lines|@Y;
11+
12+
//count references
13+
"counting references in " + dump | write("stdout");
14+
15+
dump|
16+
open-file|
17+
as-lines|
18+
catch-object-exception|
19+
decode-pica|
20+
batch-log(batchsize="100000")|
21+
fix(FLUX_DIR + "extract.fix", *)|
22+
stream-to-triples(redirect="true")|
23+
sort-triples(by="subject")|
24+
collect-triples|
25+
fix(FLUX_DIR + "output.fix")|
26+
batch-log("merged ${totalRecords}", batchsize="100000")|
27+
stream-to-triples|
28+
template("${s}")|
29+
@Y;
30+
31+
@Y|
32+
wait-for-inputs("2")|
33+
write(out);
34+
35+
36+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//creates a beacon file based on a pica+ dump of the DNB CBS data.
2+
3+
default type = "ALL";
4+
default out = dump + "-" + type + ".beacon";
5+
default header = FLUX_DIR + "header.txt";
6+
7+
8+
//read header
9+
"reading header " + header | write("stdout");
10+
header|open-file|as-lines|@Y;
11+
12+
//count references
13+
"counting references in " + dump | write("stdout");
14+
15+
dump|
16+
open-file|
17+
as-lines|
18+
catch-object-exception|
19+
decode-pica|
20+
batch-log(batchsize="100000")|
21+
morph(FLUX_DIR + "extract.xml", *)|
22+
stream-to-triples(redirect="true")|
23+
sort-triples(by="subject")|
24+
collect-triples|
25+
morph(FLUX_DIR + "output.xml")|
26+
batch-log("merged ${totalRecords}", batchsize="100000")|
27+
stream-to-triples|
28+
template("${s}")|
29+
@Y;
30+
31+
@Y|
32+
wait-for-inputs("2")|
33+
write(out);
34+
35+
36+

metafacture-runner/src/main/dist/examples/beacon/create/extract.fix

+21-21
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,49 @@
11
# 002@ not repeatable
22

33
if any_match("[email protected]", "^Tp.*$")
4-
copy_field("[email protected]","ok")
4+
copy_field("[email protected]","ok")
55
end
66

7-
# <!-- DBSM: -->
8-
# <!-- (006U $0 “04p01*”) or (017A $a “yy”) -->
7+
# DBSM:
8+
# (006U $0 “04p01*”) or (017A $a “yy”)
99
if any_match("006U.0","04p01.*")
10-
add_field("@value","DBSM|ALL")
10+
add_field("@value","DBSM|ALL")
1111
elsif any_equal("017A.a","yy")
12-
add_field("@value","DBSM|ALL")
12+
add_field("@value","DBSM|ALL")
1313

14-
# <!-- DEA: -->
15-
# <!-- (001@ $a 2””) or (209A $f “Exilarchiv” or 209A $f “HB/EB”) -->
14+
# DEA:
15+
# (001@ $a 2””) or (209A $f “Exilarchiv” or 209A $f “HB/EB”)
1616
elsif any_equal("[email protected]","2")
17-
add_field("@value","DEA|ALL")
17+
add_field("@value","DEA|ALL")
1818
elsif any_equal("209A.f","HB/EB")
19-
add_field("@value","DEA|ALL")
19+
add_field("@value","DEA|ALL")
2020

21-
# <!-- DMA: -->
22-
# <!-- (002@ $0 “G*” or 002@ $0 “M*”) or (006U $0 “10,P01*”) -->
21+
# DMA:
22+
# (002@ $0 “G*” or 002@ $0 “M*”) or (006U $0 “10,P01*”)
2323

2424
elsif any_equal("[email protected]","^[GM].*")
25-
add_field("@value","DMA|ALL")
25+
add_field("@value","DMA|ALL")
2626

2727
elsif any_equal("006U.0","^10,P01.*")
28-
add_field("@value","DMA|ALL")
28+
add_field("@value","DMA|ALL")
2929
else
30-
add_field("@value","ALL")
30+
add_field("@value","ALL")
3131
end
3232

3333
# Test if type variable fits
3434

3535
if any_contain("@value","$[type]")
36-
add_field("@value","$[type]")
36+
add_field("@value","$[type]")
3737
else
38-
remove_field("@value")
38+
remove_field("@value")
3939
end
4040

4141
do list(path: "041A*|028A*|029B*|028C*|028Q*|028P*|028F*|028M*|028D*|028E*", "var":"$i")
42-
trim("$i.9")
43-
to_var("$i.9","ref")
44-
if exists("$i.9")
45-
copy_field("@value","{to:$[ref]}refed")
46-
end
42+
trim("$i.9")
43+
to_var("$i.9","ref")
44+
if exists("$i.9")
45+
copy_field("@value","{to:$[ref]}refed")
46+
end
4747
end
4848

4949
retain("{to*","ok")

metafacture-runner/src/main/dist/examples/beacon/create/output.fix

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
unless exists("refed")
2-
remove_field("ok","")
2+
remove_field("ok","")
33
end
44

55
unless exists("ok")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
default fileName = FLUX_DIR + "gnd-sample.pica";
2+
3+
fileName|
4+
open-file|
5+
as-lines|
6+
decode-pica|
7+
fix(FLUX_DIR + "gnd-type.fix")|
8+
stream-to-triples|
9+
count-triples(countBy="object")|
10+
template("${s}\t${o}")|
11+
write("stdout");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
default fileName = FLUX_DIR + "gnd-sample.pica";
2+
3+
fileName|
4+
open-file|
5+
as-lines|
6+
decode-pica|
7+
morph(FLUX_DIR + "gnd-type.xml")|
8+
stream-to-triples|
9+
count-triples(countBy="object")|
10+
template("${s}\t${o}")|
11+
write("stdout");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
default counts= FLUX_DIR + "counts.dat";
3+
default catalogue = FLUX_DIR + "10.pica";
4+
5+
//count references
6+
"counting references in " + catalogue | write("stdout");
7+
8+
catalogue|
9+
open-file|
10+
as-lines|
11+
catch-object-exception|
12+
decode-pica|
13+
fix(FLUX_DIR + "references.fix")|
14+
stream-to-triples|
15+
count-triples(countBy="object")|
16+
write(counts);
17+
18+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
default counts= FLUX_DIR + "counts.dat";
3+
default catalogue = FLUX_DIR + "10.pica";
4+
5+
//count references
6+
"counting references in " + catalogue | write("stdout");
7+
8+
catalogue|
9+
open-file|
10+
as-lines|
11+
catch-object-exception|
12+
decode-pica|
13+
morph(FLUX_DIR + "references.xml")|
14+
stream-to-triples|
15+
count-triples(countBy="object")|
16+
17+
write("subjects.dat");
18+
19+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// opens file 'fileName', interprets the content as pica and filters the results
2+
3+
default fileName = FLUX_DIR + "gnd-sample.pica";
4+
5+
fileName|
6+
open-file|
7+
as-lines|
8+
decode-pica|
9+
fix(FLUX_DIR + "filter.fix") // Fix does not use the filter function but has its own filter mechanism within fix.|
10+
encode-formeta(style="verbose")|
11+
write("stdout");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// opens file 'fileName', interprets the content as pica and filters the results
2+
3+
default fileName = FLUX_DIR + "gnd-sample.pica";
4+
5+
fileName|
6+
open-file|
7+
as-lines|
8+
decode-pica|
9+
filter(FLUX_DIR + "filter.xml")|
10+
encode-formeta(style="verbose")|
11+
write("stdout");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<metamorph xmlns="http://www.culturegraph.org/metamorph"
3+
version="1" entityMarker=".">
4+
<meta>
5+
<name>Filter Morph</name>
6+
<annotation>Filters Tp records of physicians (Arzt)</annotation>
7+
</meta>
8+
<rules>
9+
10+
<combine name="OK" value="OK">
11+
<data source="[email protected]">
12+
<substring start="0" end="2" />
13+
<equals string="Tp" />
14+
</data>
15+
<data source="041R.a">
16+
<case to="lower" />
17+
<regexp match="arzt" />
18+
</data>
19+
</combine>
20+
21+
</rules>
22+
23+
<maps>
24+
</maps>
25+
</metamorph>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
default base = "";
2+
default dump = FLUX_DIR + "10.pica";
3+
default out = base + "Ts1-Tg1-without-crisscross.txt";
4+
5+
"counting references in " + dump | write("stdout");
6+
7+
dump|
8+
open-file|
9+
as-lines|
10+
catch-object-exception|
11+
decode-pica|
12+
batch-log(batchsize="100000")|
13+
fix(FLUX_DIR + "extract.fix")|
14+
stream-to-triples(redirect="true")|
15+
sort-triples(by="subject")|
16+
collect-triples|
17+
fix(FLUX_DIR + "output.fix")|
18+
batch-log(batchsize="100000")|
19+
encode-csv(noquotes="true",separator=";")|
20+
write(out);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
default base = "";
2+
default dump = FLUX_DIR + "10.pica";
3+
default out = base + "Ts1-Tg1-without-crisscross.txt";
4+
5+
"counting references in " + dump | write("stdout");
6+
7+
dump|
8+
open-file|
9+
as-lines|
10+
catch-object-exception|
11+
decode-pica|
12+
batch-log(batchsize="100000")|
13+
morph(FLUX_DIR + "extract.xml")|
14+
stream-to-triples(redirect="true")|
15+
sort-triples(by="subject")|
16+
collect-triples|
17+
morph(FLUX_DIR + "output.xml")|
18+
batch-log(batchsize="100000")|
19+
stream-to-triples|
20+
template("${o}")|
21+
write(out);

metafacture-runner/src/main/dist/examples/gnd/crisscross-connections/extract.fix

+19-19
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
do put_macro("gndPersonCombinedLabel") # in contrast to morph this is not normalizing utf 8
2-
paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"")
3-
replace_all("$[field].@combinedLabel",", $","")
4-
if exists("$[field].c")
5-
paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c")
6-
end
7-
copy_field("$[field].n","$[field].add.$append")
8-
copy_field("$[field].l","$[field].add.$append")
9-
copy_field("$[field].g","$[field].add.$append")
10-
join_field("$[field].add",", ")
11-
replace_all("$[field].add","^(.*)$"," <$1>")
12-
if exists("$[field].add")
13-
paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"")
14-
end
15-
copy_field("$[field].@combinedLabel", "$[out]")
2+
paste("$[field].@combinedLabel","$[field].P", "$[field].a", "~, ", "$[field].d", join_char:"")
3+
replace_all("$[field].@combinedLabel",", $","")
4+
if exists("$[field].c")
5+
paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].c")
6+
end
7+
copy_field("$[field].n","$[field].add.$append")
8+
copy_field("$[field].l","$[field].add.$append")
9+
copy_field("$[field].g","$[field].add.$append")
10+
join_field("$[field].add",", ")
11+
replace_all("$[field].add","^(.*)$"," <$1>")
12+
if exists("$[field].add")
13+
paste("$[field].@combinedLabel", "$[field].@combinedLabel", "$[field].add", join_char:"")
14+
end
15+
copy_field("$[field].@combinedLabel", "$[out]")
1616
end
1717

1818
do put_macro("gndCorporationCombinedLabel") # in contrast to morph this is not normalizing utf 8
@@ -157,11 +157,11 @@ end
157157
# copy_field("008A.a","teilbest")
158158

159159
do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A"
160-
trim("$i.9")
161-
to_var("$i.9","ref")
162-
if exists("$i.9")
163-
add_field("{to:$[ref]}refed","")
164-
end
160+
trim("$i.9")
161+
to_var("$i.9","ref")
162+
if exists("$i.9")
163+
add_field("{to:$[ref]}refed","")
164+
end
165165
end
166166

167167
copy_field("@name","gnd.name")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// REQUIRES THE METAFACTURE-SEARCH PLUGIN
2+
3+
default gnd = FLUX_DIR + "Tp-200.pica.gz";
4+
default beaconDir = FLUX_DIR + "beacons";
5+
6+
"reading beacons from " + beaconDir | write("stdout");
7+
8+
beaconDir|
9+
read-dir|
10+
log-object|
11+
catch-object-exception|
12+
open-file|
13+
read-beacon(metadatafilter="name|description|institution")|
14+
stream-to-triples|
15+
@X;
16+
17+
"reading GND dump from " + gnd | write("stdout");
18+
19+
gnd|
20+
open-file|
21+
as-lines|
22+
object-batch-log(batchSize="100000")|
23+
decode-pica|
24+
stream-to-triples|
25+
@X;
26+
27+
@X|
28+
sort-triples(by="subject")|
29+
collect-triples|
30+
fix(
31+
"unless any_match('[email protected]', '.p.*') # Accept only Tp records
32+
reject()
33+
end"
34+
)|
35+
encode-formeta|
36+
write("stdout");
37+
//stream-to-index(FLUX_DIR + "id.xml", indexpath="Tp_ix");
38+

0 commit comments

Comments
 (0)