From be2aa539a921cc1e90ab97ee74657109f0af2ccc Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Tue, 19 Dec 2017 15:29:06 +0100 Subject: [PATCH 1/5] Make import of std.internal.unicode_tables lazy. There's no need to open and open and parse the gigantic `std.internal.unicode_tables` if it's never needed. --- std/uni.d | 188 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 153 insertions(+), 35 deletions(-) diff --git a/std/uni.d b/std/uni.d index 7ff90f363dd..75286953c8b 100644 --- a/std/uni.d +++ b/std/uni.d @@ -6079,12 +6079,16 @@ else return findUnicodeSet!table(name) >= 0; } -template SetSearcher(alias table, string kind) +template SetSearcher(string tableName, string kind) { /// Run-time checked search. static auto opCall(C)(in C[] name) if (is(C : dchar)) { + // lazily import the big unicode_table + mixin(q{ + import std.internal.unicode_tables; + alias table = } ~ tableName ~ ";"); import std.conv : to; CodepointSet set; if (loadUnicodeSet!table(name, set)) @@ -6095,6 +6099,10 @@ template SetSearcher(alias table, string kind) /// Compile-time checked search. static @property auto opDispatch(string name)() { + // lazily import the big unicode_table + mixin(q{ + import std.internal.unicode_tables; + alias table = } ~ tableName ~ ";"); static if (findSetName!table(name)) { CodepointSet set; @@ -6760,8 +6768,7 @@ auto caseEnclose(CodepointSet set) */ struct block { - import std.internal.unicode_tables : blocks; // generated file - mixin SetSearcher!(blocks.tab, "block"); + mixin SetSearcher!("blocks.tab", "block"); } /// @@ -6779,8 +6786,7 @@ auto caseEnclose(CodepointSet set) */ struct script { - import std.internal.unicode_tables : scripts; // generated file - mixin SetSearcher!(scripts.tab, "script"); + mixin SetSearcher!("scripts.tab", "script"); } /// @@ -6810,8 +6816,7 @@ auto caseEnclose(CodepointSet set) */ struct hangulSyllableType { - import std.internal.unicode_tables : hangul; // generated file - mixin SetSearcher!(hangul.tab, "hangul syllable type"); + mixin SetSearcher!("hangul.tab", "hangul syllable type"); } /// @@ -8771,20 +8776,27 @@ else // trusted -> avoid bounds check @trusted pure nothrow @nogc private { - import std.internal.unicode_tables; // : toLowerTable, toTitleTable, toUpperTable; // generated file - // hide template instances behind functions (Bugzilla 13232) ushort toLowerIndex(dchar c) { return toLowerIndexTrie[c]; } ushort toLowerSimpleIndex(dchar c) { return toLowerSimpleIndexTrie[c]; } - dchar toLowerTab(size_t idx) { return toLowerTable[idx]; } + dchar toLowerTab()(size_t idx) { + import std.internal.unicode_tables : toLowerTable; // generated file + return toLowerTable[idx]; + } ushort toTitleIndex(dchar c) { return toTitleIndexTrie[c]; } ushort toTitleSimpleIndex(dchar c) { return toTitleSimpleIndexTrie[c]; } - dchar toTitleTab(size_t idx) { return toTitleTable[idx]; } + dchar toTitleTab()(size_t idx) { + import std.internal.unicode_tables : toTitleTable; // generated file + return toTitleTable[idx]; + } ushort toUpperIndex(dchar c) { return toUpperIndexTrie[c]; } ushort toUpperSimpleIndex(dchar c) { return toUpperSimpleIndexTrie[c]; } - dchar toUpperTab(size_t idx) { return toUpperTable[idx]; } + dchar toUpperTab()(size_t idx) { + import std.internal.unicode_tables : toUpperTable; // generated file + return toUpperTable[idx]; + } } public: @@ -8888,8 +8900,11 @@ private dchar toTitlecase(dchar c) return c; } -private alias UpperTriple = AliasSeq!(toUpperIndex, MAX_SIMPLE_UPPER, toUpperTab); -private alias LowerTriple = AliasSeq!(toLowerIndex, MAX_SIMPLE_LOWER, toLowerTab); +enum MAX_SIMPLE_UPPER = 1051; +alias UpperTriple = AliasSeq!(toUpperIndex, MAX_SIMPLE_UPPER, toUpperTab); + +enum MAX_SIMPLE_LOWER = 1043; +alias LowerTriple = AliasSeq!(toLowerIndex, MAX_SIMPLE_LOWER, toLowerTab); // generic toUpper/toLower on whole string, creates new or returns as is private S toCase(alias indexFn, uint maxIdx, alias tableFn, alias asciiConvert, S)(S s) @trusted pure @@ -10320,30 +10335,98 @@ private: return CodepointSet.fromIntervals(decompressIntervals(compressed)); } -@safe pure nothrow auto asTrie(T...)(in TrieEntry!T e) +auto asTrie(A)(in A a) { - return const(CodepointTrie!T)(e.offsets, e.sizes, e.data); + import std.internal.unicode_tables : TrieEntry; // generated file + static auto asTrieImpl(T...)(in TrieEntry!T e) + { + return const(CodepointTrie!T)(e.offsets, e.sizes, e.data); + } + return asTrieImpl(a); } @safe pure nothrow @nogc @property { - import std.internal.unicode_tables; // generated file - // It's important to use auto return here, so that the compiler // only runs semantic on the return type if the function gets // used. Also these are functions rather than templates to not // increase the object size of the caller. - auto lowerCaseTrie() { static immutable res = asTrie(lowerCaseTrieEntries); return res; } - auto upperCaseTrie() { static immutable res = asTrie(upperCaseTrieEntries); return res; } - auto simpleCaseTrie() { static immutable res = asTrie(simpleCaseTrieEntries); return res; } - auto fullCaseTrie() { static immutable res = asTrie(fullCaseTrieEntries); return res; } - auto alphaTrie() { static immutable res = asTrie(alphaTrieEntries); return res; } - auto markTrie() { static immutable res = asTrie(markTrieEntries); return res; } - auto numberTrie() { static immutable res = asTrie(numberTrieEntries); return res; } - auto punctuationTrie() { static immutable res = asTrie(punctuationTrieEntries); return res; } - auto symbolTrie() { static immutable res = asTrie(symbolTrieEntries); return res; } - auto graphicalTrie() { static immutable res = asTrie(graphicalTrieEntries); return res; } - auto nonCharacterTrie() { static immutable res = asTrie(nonCharacterTrieEntries); return res; } + auto lowerCaseTrie() + { + import std.internal.unicode_tables : lowerCaseTrieEntries; // generated file + static immutable res = asTrie(lowerCaseTrieEntries); + return res; + } + + auto upperCaseTrie() + { + import std.internal.unicode_tables : upperCaseTrieEntries; // generated file + static immutable res = asTrie(upperCaseTrieEntries); + return res; + } + + auto simpleCaseTrie() + { + import std.internal.unicode_tables : simpleCaseTrieEntries; // generated file + static immutable res = asTrie(simpleCaseTrieEntries); + return res; + } + + auto fullCaseTrie() + { + import std.internal.unicode_tables : fullCaseTrieEntries; // generated file + static immutable res = asTrie(fullCaseTrieEntries); + return res; + } + + auto alphaTrie() + { + import std.internal.unicode_tables : alphaTrieEntries; // generated file + static immutable res = asTrie(alphaTrieEntries); + return res; + } + + auto markTrie() + { + import std.internal.unicode_tables : markTrieEntries; // generated file + static immutable res = asTrie(markTrieEntries); + return res; + } + + auto numberTrie() + { + import std.internal.unicode_tables : numberTrieEntries; // generated file + static immutable res = asTrie(numberTrieEntries); + return res; + } + + auto punctuationTrie() + { + import std.internal.unicode_tables : punctuationTrieEntries; // generated file + static immutable res = asTrie(punctuationTrieEntries); + return res; + } + + auto symbolTrie() + { + import std.internal.unicode_tables : symbolTrieEntries; // generated file + static immutable res = asTrie(symbolTrieEntries); + return res; + } + + auto graphicalTrie() + { + import std.internal.unicode_tables : graphicalTrieEntries; // generated file + static immutable res = asTrie(graphicalTrieEntries); + return res; + } + + auto nonCharacterTrie() + { + import std.internal.unicode_tables : nonCharacterTrieEntries; // generated file + static immutable res = asTrie(nonCharacterTrieEntries); + return res; + } //normalization quick-check tables auto nfcQCTrie() @@ -10433,13 +10516,48 @@ private: } //case conversion tables - auto toUpperIndexTrie() { static immutable res = asTrie(toUpperIndexTrieEntries); return res; } - auto toLowerIndexTrie() { static immutable res = asTrie(toLowerIndexTrieEntries); return res; } - auto toTitleIndexTrie() { static immutable res = asTrie(toTitleIndexTrieEntries); return res; } + auto toUpperIndexTrie() + { + import std.internal.unicode_tables : toUpperIndexTrieEntries; // generated file + static immutable res = asTrie(toUpperIndexTrieEntries); + return res; + } + + auto toLowerIndexTrie() + { + import std.internal.unicode_tables : toLowerIndexTrieEntries; // generated file + static immutable res = asTrie(toLowerIndexTrieEntries); + return res; + } + + auto toTitleIndexTrie() + { + import std.internal.unicode_tables : toTitleIndexTrieEntries; // generated file + static immutable res = asTrie(toTitleIndexTrieEntries); + return res; + } + //simple case conversion tables - auto toUpperSimpleIndexTrie() { static immutable res = asTrie(toUpperSimpleIndexTrieEntries); return res; } - auto toLowerSimpleIndexTrie() { static immutable res = asTrie(toLowerSimpleIndexTrieEntries); return res; } - auto toTitleSimpleIndexTrie() { static immutable res = asTrie(toTitleSimpleIndexTrieEntries); return res; } + auto toUpperSimpleIndexTrie() + { + import std.internal.unicode_tables : toUpperSimpleIndexTrieEntries; // generated file + static immutable res = asTrie(toUpperSimpleIndexTrieEntries); + return res; + } + + auto toLowerSimpleIndexTrie() + { + import std.internal.unicode_tables : toLowerSimpleIndexTrieEntries; // generated file + static immutable res = asTrie(toLowerSimpleIndexTrieEntries); + return res; + } + + auto toTitleSimpleIndexTrie() + { + import std.internal.unicode_tables : toTitleSimpleIndexTrieEntries; // generated file + static immutable res = asTrie(toTitleSimpleIndexTrieEntries); + return res; + } } From 39c6d2591e108a350093e6be006715d5dadfbe75 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sat, 23 Dec 2017 21:04:51 +0100 Subject: [PATCH 2/5] Make {MIN,MAX}_SIMPLE_LOWER import lazy --- std/uni.d | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/std/uni.d b/std/uni.d index 75286953c8b..f1f63036fbe 100644 --- a/std/uni.d +++ b/std/uni.d @@ -8900,11 +8900,25 @@ private dchar toTitlecase(dchar c) return c; } -enum MAX_SIMPLE_UPPER = 1051; -alias UpperTriple = AliasSeq!(toUpperIndex, MAX_SIMPLE_UPPER, toUpperTab); - -enum MAX_SIMPLE_LOWER = 1043; -alias LowerTriple = AliasSeq!(toLowerIndex, MAX_SIMPLE_LOWER, toLowerTab); +/* +Needs to be a function to make the import of std.internal.unicode_tables lazy +As of 2.078.0 DMD +- doesn't allow templated structs with zero arguments nor +- doesn't allow tuples (AliasSeq) as return value +- doesn't perform imports in structs lazily +*/ +private auto Triples() +{ + static struct TriplesImpl + { + import std.internal.unicode_tables : MAX_SIMPLE_UPPER, MAX_SIMPLE_LOWER; + //enum MAX_SIMPLE_UPPER = 1041; + alias UpperTriple = AliasSeq!(toUpperIndex, MAX_SIMPLE_UPPER, toUpperTab); + //enum MAX_SIMPLE_LOWER = 1043; + alias LowerTriple = AliasSeq!(toLowerIndex, MAX_SIMPLE_LOWER, toLowerTab); + } + return TriplesImpl(); +} // generic toUpper/toLower on whole string, creates new or returns as is private S toCase(alias indexFn, uint maxIdx, alias tableFn, alias asciiConvert, S)(S s) @trusted pure @@ -9080,7 +9094,7 @@ if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && else { static import std.ascii; - return toCaser!(LowerTriple, std.ascii.toLower)(str); + return toCaser!(Triples.LowerTriple, std.ascii.toLower)(str); } } @@ -9099,7 +9113,7 @@ if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && else { static import std.ascii; - return toCaser!(UpperTriple, std.ascii.toUpper)(str); + return toCaser!(Triples.UpperTriple, std.ascii.toUpper)(str); } } @@ -9292,11 +9306,11 @@ if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) && import std.utf : byDchar; // Decode first - return toCapitalizer!UpperTriple(str.byDchar); + return toCapitalizer!(Triples.UpperTriple)(str.byDchar); } else { - return toCapitalizer!UpperTriple(str); + return toCapitalizer!(Triples.UpperTriple)(str); } } @@ -9557,7 +9571,7 @@ private template toCaseLength(alias indexFn, uint maxIdx, alias tableFn) @safe unittest { - alias toLowerLength = toCaseLength!(LowerTriple); + alias toLowerLength = toCaseLength!(Triples.LowerTriple); assert(toLowerLength("abcd") == 4); assert(toLowerLength("аБВгд456") == 10+3); } @@ -9628,7 +9642,7 @@ private template toCaseInPlaceAlloc(alias indexFn, uint maxIdx, alias tableFn) void toLowerInPlace(C)(ref C[] s) @trusted pure if (is(C == char) || is(C == wchar) || is(C == dchar)) { - toCaseInPlace!(LowerTriple)(s); + toCaseInPlace!(Triples.LowerTriple)(s); } // overloads for the most common cases to reduce compile time @safe pure /*TODO nothrow*/ @@ -9650,7 +9664,7 @@ if (is(C == char) || is(C == wchar) || is(C == dchar)) void toUpperInPlace(C)(ref C[] s) @trusted pure if (is(C == char) || is(C == wchar) || is(C == dchar)) { - toCaseInPlace!(UpperTriple)(s); + toCaseInPlace!(Triples.UpperTriple)(s); } // overloads for the most common cases to reduce compile time/code size @safe pure /*TODO nothrow*/ @@ -9699,7 +9713,7 @@ S toLower(S)(S s) @trusted pure if (isSomeString!S) { static import std.ascii; - return toCase!(LowerTriple, std.ascii.toLower)(s); + return toCase!(Triples.LowerTriple, std.ascii.toLower)(s); } // overloads for the most common cases to reduce compile time @safe pure /*TODO nothrow*/ @@ -9881,7 +9895,7 @@ S toUpper(S)(S s) @trusted pure if (isSomeString!S) { static import std.ascii; - return toCase!(UpperTriple, std.ascii.toUpper)(s); + return toCase!(Triples.UpperTriple, std.ascii.toUpper)(s); } // overloads for the most common cases to reduce compile time @safe pure /*TODO nothrow*/ From 746684630bd2e729c3041548248dc099313c65e4 Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sat, 23 Dec 2017 21:05:29 +0100 Subject: [PATCH 3/5] Avoid string lambdas as they import the entire world --- std/uni.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/std/uni.d b/std/uni.d index f1f63036fbe..5d05efb2981 100644 --- a/std/uni.d +++ b/std/uni.d @@ -5935,7 +5935,7 @@ else import std.algorithm.iteration : map; import std.range : assumeSorted; auto range = assumeSorted!((a,b) => propertyNameLess(a,b)) - (table.map!"a.name"()); + (table.map!(a => a.name)()); size_t idx = range.lowerBound(name).length; if (idx < range.length && comparePropertyName(range[idx], name) == 0) return idx; From 8d58cc8a74c7861c64270cac0f20a177d7b4d1fa Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sat, 23 Dec 2017 21:21:32 +0100 Subject: [PATCH 4/5] Make std.exception imports local --- std/uni.d | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/std/uni.d b/std/uni.d index 5d05efb2981..73eb89872d4 100644 --- a/std/uni.d +++ b/std/uni.d @@ -6239,7 +6239,6 @@ auto caseEnclose(CodepointSet set) @safe struct UnicodeSetParser(Range) { - import std.exception : enforce; import std.typecons : tuple, Tuple; Range range; bool casefold_; @@ -6257,6 +6256,7 @@ auto caseEnclose(CodepointSet set) //also fetches next set operation Tuple!(CodepointSet,Operator) parseCharTerm() { + import std.exception : enforce; import std.range : drop; enum privateUseStart = '\U000F0000', privateUseEnd ='\U000FFFFD'; enum State{ Start, Char, Escape, CharDash, CharDashEscape, @@ -6563,6 +6563,7 @@ auto caseEnclose(CodepointSet set) CodepointSet parseSet() { + import std.exception : enforce; ValStack vstack; OpStack opstack; import std.functional : unaryFun; @@ -6695,7 +6696,6 @@ auto caseEnclose(CodepointSet set) */ @safe public struct unicode { - import std.exception : enforce; /** Performs the lookup of set of $(CODEPOINTS) with compile-time correctness checking. @@ -6833,6 +6833,7 @@ auto caseEnclose(CodepointSet set) //parse control code of form \cXXX, c assumed to be the current symbol static package dchar parseControlCode(Parser)(ref Parser p) { + import std.exception : enforce; with(p) { popFront(); @@ -6849,6 +6850,7 @@ auto caseEnclose(CodepointSet set) static package CodepointSet parsePropertySpec(Range)(ref Range p, bool negated, bool casefold) { + import std.exception : enforce; static import std.ascii; with(p) { @@ -7392,7 +7394,6 @@ if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar)) +/ @trusted struct Grapheme { - import std.exception : enforce; import std.traits : isDynamicArray; public: From 8336444c568aeb805a31354dccc65f929c31efab Mon Sep 17 00:00:00 2001 From: Sebastian Wilzbach Date: Sat, 23 Dec 2017 21:22:34 +0100 Subject: [PATCH 5/5] Templatize std.uni for lazy imports --- std/uni.d | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/std/uni.d b/std/uni.d index 73eb89872d4..95014a8ef17 100644 --- a/std/uni.d +++ b/std/uni.d @@ -1588,7 +1588,7 @@ private auto packedArrayView(T)(inout(size_t)* ptr, size_t items) @trusted pure // Partially unrolled binary search using Shar's method //============================================================================ -string genUnrolledSwitchSearch(size_t size) @safe pure nothrow +auto genUnrolledSwitchSearch(size_t size) @safe pure nothrow { import core.bitop : bsr; import std.array : replace; @@ -2760,7 +2760,7 @@ public: } --- */ - string toSourceCode(string funcName="") + string toSourceCode()(string funcName="") { import std.array : array; auto range = byInterval.array(); @@ -6135,7 +6135,7 @@ package @trusted auto memoizeExpr(string expr)() } //property for \w character class -package @property @safe CodepointSet wordCharacter() +package @property @safe auto wordCharacter() { return memoizeExpr!("unicode.Alphabetic | unicode.Mn | unicode.Mc | unicode.Me | unicode.Nd | unicode.Pc")(); @@ -6212,7 +6212,7 @@ package dchar parseUniHex(Range)(ref Range str, size_t maxDigit) .canFind("invalid codepoint")); } -auto caseEnclose(CodepointSet set) +auto caseEnclose()(CodepointSet set) { auto cased = set & unicode.LC; foreach (dchar ch; cased.byCodepoint) @@ -6226,7 +6226,7 @@ auto caseEnclose(CodepointSet set) /+ fetch codepoint set corresponding to a name (InBlock or binary property) +/ -@trusted CodepointSet getUnicodeSet(in char[] name, bool negated, bool casefold) +@trusted CodepointSet getUnicodeSet()(in char[] name, bool negated, bool casefold) { CodepointSet s = unicode(name); //FIXME: caseEnclose for new uni as Set | CaseEnclose(SET && LC)