diff --git a/nonbreakablespace/README.md b/nonbreakablespace/README.md new file mode 100644 index 00000000..7f534958 --- /dev/null +++ b/nonbreakablespace/README.md @@ -0,0 +1,33 @@ +# Non-breakable space filter + +This filter replaces regular spaces with non-breakable spaces according to +predefined conditions. + +Rules for space replacement are defined for two languages: English and Czech +(default is English) in `prefixes` tables. Also, non-breakable spaces are +inserted in front of dashes and in front of numbers. Rules for inserting +non-breakable spaces in English are not as firm as in authors native language +(Czech), but some typographic conventions suggest to insert non-breakable space +after words: "I", "the", "The", "a", "A". Any suggestions regarding improvement +of English support in this filter are highly welcome. +Some extra effort is taken in detecting these patterns in *not-fully* parsed +strings (for example, if this filter is used after some macro replacing +filter). + +In this regard this filter functions similarly like TeX `vlna` preprocessor +(only Czech) or LuaTeX `luavlna` package (international). + +The default settings can be changed easily by user customization in filter file +`pandocVlna.lua` by changing contents of `prefixes` or `dashes` tables. + +Currently supported formats are: + +* LaTeX a ConTeXt +* Open Office Document +* MS Word +* HTML + +For other formats filter defaults to insert escaped Unicode sequence `\u{a0}`. + +**NOTE**: Using this filter increases strain on line-breaking patterns. Whenever +possible, consider allowing hyphenation. diff --git a/nonbreakablespace/expectedCZ.html b/nonbreakablespace/expectedCZ.html new file mode 100644 index 00000000..ef5332d8 --- /dev/null +++ b/nonbreakablespace/expectedCZ.html @@ -0,0 +1,12 @@ +

Tests

+

Basic test

+

a test i test k test o test s test u test v test z test A test I test K test O test S test U test V test Z test – test – test

+

Test with numbers

+

Test 19 test “19” test

+

Test of double prefixes.

+

A i test, i v test, a k test, a v test.

+

Test of block code

+
a = 5
+k = "test"
+

Test of inline code

+

Test a = 5 test

diff --git a/nonbreakablespace/expectedEN.html b/nonbreakablespace/expectedEN.html new file mode 100644 index 00000000..c1f2c504 --- /dev/null +++ b/nonbreakablespace/expectedEN.html @@ -0,0 +1,12 @@ +

Tests

+

Basic test

+

a test i test A test I test the test The test – test – test

+

Test with numbers

+

Test 19 test “19” test

+

Test of double prefixes.

+

A i test, i v test, a k test, a v test.

+

Test of block code

+
a = 5
+k = "test"
+

Test of inline code

+

Test a = 5 test

diff --git a/nonbreakablespace/makefile b/nonbreakablespace/makefile new file mode 100644 index 00000000..3f634b1c --- /dev/null +++ b/nonbreakablespace/makefile @@ -0,0 +1,6 @@ +DIFF ?= diff --strip-trailing-cr -u + +test: + @pandoc --lua-filter=pandocVlna.lua sampleCZ.md | $(DIFF) expectedCZ.html - + @pandoc --lua-filter=pandocVlna.lua sampleEN.md | $(DIFF) expectedEN.html - +.PHONY: test diff --git a/nonbreakablespace/pandocVlna.lua b/nonbreakablespace/pandocVlna.lua new file mode 100644 index 00000000..0bbce484 --- /dev/null +++ b/nonbreakablespace/pandocVlna.lua @@ -0,0 +1,183 @@ +--[[ +pandocVlna.lua - Filter to automatically insert non-breakable spaces in specific +locations in text. + +Currently supports czech and english languages, with default being set to +english. PRs or suggestions leading to improvement of current features or +to add supported for other languages is highly welcome. +Inspired by simillar tools in TeX toolchain: `luavlna` and `vlna`. + +Author: Tomas Krulis (with substantial help from Albert Krewinkel) +License: MIT - more details in LICENSE file in repository root directory +--]] + +local utils = require 'pandoc.utils' +local stringify = utils.stringify + +--[[ +Table of one-letter prefixes, after which should be inserted '\160'. +Verbose, but can be changed per user requirements. +--]] + +local prefixes = {} + +local prefixesEN = { + ['I'] = true, + ['a'] = true, + ['A'] = true, + ['the'] = true, + ['The'] = true +} + +local prefixesCZ = { + ['a'] = true, + ['i'] = true, + ['k'] = true, + ['o'] = true, + ['s'] = true, + ['u'] = true, + ['v'] = true, + ['z'] = true, + ['A'] = true, + ['I'] = true, + ['K'] = true, + ['O'] = true, + ['S'] = true, + ['U'] = true, + ['V'] = true, + ['Z'] = true +} + +-- Set `prefixes` according to `lang` metadata value +function Meta(meta) + if meta.lang then + langSet = stringify(meta.lang) + + if langSet == 'cs' then + prefixes = prefixesCZ + else + prefixes = prefixesEN --default to english prefixes + end + + else + prefixes = prefixesEN --default to english prefixes + end + +end + +--[[ +Some languages (czech among them) require nonbreakable space *before* long dash +--]] + +local dashes = { + ['--'] = true, + ['–'] = true +} + +--[[ +Table of replacement elements +--]] + +local nonbreakablespaces = { + html = ' ', + latex = '~', + context = '~' +} + +--[[ +Function to determine Space element replacement for non-breakable space +according to output format +--]] + +function insert_nonbreakable_space(format) + if format == 'html' then + return pandoc.RawInline('html', nonbreakablespaces.html) + elseif format:match 'latex' then + return pandoc.RawInline('tex',nonbreakablespaces.latex) + elseif format:match 'context' then + return pandoc.RawInline('tex',nonbreakablespaces.latex) + else + -- fallback to inserting non-breakable space unicode symbol + -- pandoc.Str '\xc2\xa0' -- also works + return pandoc.Str '\u{a0}' + end +end + +--[[ +Core filter function: + +* It iterates over all inline elements in block +* If it finds Space element, uses previously defined functions to find +`prefixes` or `dashes` +* Replaces Space element with `Str '\u{a0}'`, which is non-breakable space +representation +* Returns modified list of inlines +--]] + +function Inlines (inlines) + + -- variable holding replacement value for the non-breakable space + local nbsp = insert_nonbreakable_space(FORMAT) + + for i = 2, #inlines-1 do -- test from second position, to prevent error if + -- `Space` element would be first in `Inlines` block + + --assign elements to variables for more readability + local currentEl = inlines[i] + local previousEl = inlines[i-1] + local nextEl = inlines[i+1] + + if currentEl.t == 'Space' + or currentEl.t == 'SoftBreak' then + + -- Check for one-letter prefixes in Str before Space + + if previousEl.t == 'Str' and prefixes[previousEl.text] then + -- if elements in table (`prefixes`) are mapped to bolean values, + -- it is possible to test like `prefixes[argument]` instead of + -- `if prefixes[argument] == true` + inlines[i] = nbsp + end + + -- Check for dashes in Str after Space + + if nextEl.t == 'Str' and dashes[nextEl.text] then + inlines[i] = nbsp + end + + -- Check for digit `Str` elements. Those elements might not be fully + -- parsed (in case there were other filters executed before this one), + -- so following regex checks for any characters or whitespace wrapping + -- around `Str` element containing digits + + if nextEl.t == 'Str' and string.match(nextEl.text, '%.*%s*%d+%s*%.*') then + inlines[i] = nbsp + end + + end + + --[[ + Check for Str containing sequence " prefix ", which might occur in case of + preceding filter creates it inside Str element. + --]] + + if currentEl.t == 'Str' then + for prefix, _ in pairs(prefixes) do + if string.match(currentEl.text, '%.*%s+' .. prefix .. '%s+%.*') then + front, detection, replacement, back = string.match(currentEl.text, + '(%.*)(%s+' .. prefix .. ')(%s+)(%.*)') + + inlines[i].text = front .. detection .. nbsp .. back + end + end + end + + end + return inlines +end + +-- This should change the order of running functions: Meta - Inlines - rest ... +return { + {Meta = Meta}, + {Inlines = Inlines}, +} diff --git a/nonbreakablespace/sampleCZ.md b/nonbreakablespace/sampleCZ.md new file mode 100644 index 00000000..0aaf8988 --- /dev/null +++ b/nonbreakablespace/sampleCZ.md @@ -0,0 +1,29 @@ +--- +lang: cs +--- + +# Tests + +## Basic test + +a test i test k test o test s test u test v test z test A test I test K test O +test S test U test V test Z test -- test – test + +## Test with numbers + +Test 19 test "19" test + +## Test of double prefixes. + +A i test, i v test, a k test, a v test. + +## Test of block code + +``` +a = 5 +k = "test" +``` + +## Test of inline code + +Test `a = 5` test diff --git a/nonbreakablespace/sampleEN.md b/nonbreakablespace/sampleEN.md new file mode 100644 index 00000000..5dc87fa1 --- /dev/null +++ b/nonbreakablespace/sampleEN.md @@ -0,0 +1,24 @@ +# Tests + +## Basic test + +a test i test A test I test the test The test -- test – test + +## Test with numbers + +Test 19 test "19" test + +## Test of double prefixes. + +A i test, i v test, a k test, a v test. + +## Test of block code + +``` +a = 5 +k = "test" +``` + +## Test of inline code + +Test `a = 5` test