@@ -656,47 +656,189 @@ private void addHighlightRule(String reference, List<String> parts, boolean case
656
656
Styles .StyleCompiler sh = new Styles .StyleCompiler (spec , true );
657
657
AttributedStyle style = new StyleResolver (sh ::getStyle ).resolve ("." + reference );
658
658
659
- if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .PATTERN ) {
660
- if (parts .size () == 2 ) {
661
- highlightRules .get (tokenName ).add (new HighlightRule (style , doPattern (".*" , caseInsensitive )));
662
- } else {
663
- for (int i = 2 ; i < parts .size (); i ++) {
664
- highlightRules
665
- .get (tokenName )
666
- .add (new HighlightRule (style , doPattern (parts .get (i ), caseInsensitive )));
659
+ try {
660
+ if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .PATTERN ) {
661
+ if (parts .size () == 2 ) {
662
+ highlightRules .get (tokenName ).add (new HighlightRule (style , doPattern (".*" , caseInsensitive )));
663
+ } else {
664
+ for (int i = 2 ; i < parts .size (); i ++) {
665
+ highlightRules
666
+ .get (tokenName )
667
+ .add (new HighlightRule (style , doPattern (parts .get (i ), caseInsensitive )));
668
+ }
667
669
}
670
+ } else if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .START_END ) {
671
+ String s = parts .get (2 );
672
+ String e = parts .get (3 );
673
+ highlightRules
674
+ .get (tokenName )
675
+ .add (new HighlightRule (
676
+ style ,
677
+ doPattern (s .substring (7 , s .length () - 1 ), caseInsensitive ),
678
+ doPattern (e .substring (5 , e .length () - 1 ), caseInsensitive )));
679
+ } else if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .PARSER_START_WITH ) {
680
+ highlightRules
681
+ .get (tokenName )
682
+ .add (new HighlightRule (
683
+ HighlightRule .RuleType .PARSER_START_WITH ,
684
+ style ,
685
+ parts .get (2 ).substring (10 )));
686
+ } else if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .PARSER_CONTINUE_AS ) {
687
+ highlightRules
688
+ .get (tokenName )
689
+ .add (new HighlightRule (
690
+ HighlightRule .RuleType .PARSER_CONTINUE_AS ,
691
+ style ,
692
+ parts .get (2 ).substring (11 )));
668
693
}
669
- } else if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .START_END ) {
670
- String s = parts .get (2 );
671
- String e = parts .get (3 );
672
- highlightRules
673
- .get (tokenName )
674
- .add (new HighlightRule (
675
- style ,
676
- doPattern (s .substring (7 , s .length () - 1 ), caseInsensitive ),
677
- doPattern (e .substring (5 , e .length () - 1 ), caseInsensitive )));
678
- } else if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .PARSER_START_WITH ) {
679
- highlightRules
680
- .get (tokenName )
681
- .add (new HighlightRule (
682
- HighlightRule .RuleType .PARSER_START_WITH ,
683
- style ,
684
- parts .get (2 ).substring (10 )));
685
- } else if (HighlightRule .evalRuleType (parts ) == HighlightRule .RuleType .PARSER_CONTINUE_AS ) {
686
- highlightRules
687
- .get (tokenName )
688
- .add (new HighlightRule (
689
- HighlightRule .RuleType .PARSER_CONTINUE_AS ,
690
- style ,
691
- parts .get (2 ).substring (11 )));
694
+ } catch (PatternSyntaxException e ) {
695
+ Log .warn ("Invalid highlight regex" , reference , parts , e );
696
+ } catch (Exception e ) {
697
+ Log .warn ("Failure while handling highlight regex" , reference , parts , e );
692
698
}
693
699
}
694
700
695
701
private Pattern doPattern (String regex , boolean caseInsensitive ) {
702
+ regex = posixToJavaRegex (regex );
696
703
return caseInsensitive ? Pattern .compile (regex , Pattern .CASE_INSENSITIVE ) : Pattern .compile (regex );
697
704
}
698
705
}
699
706
707
+ /**
708
+ * Posix regex is different from Java regex. This function parses the given Posix regex and escapes according to these rules:
709
+ *
710
+ * <p>The first {@code ]} in a bracket expression does not need to be escaped in Posix,translate to {@code \]}.
711
+ *
712
+ * <p>Same as above for a negating bracket expression like {@code [^][]}, translate to {@code [^\]\[]}.
713
+ *
714
+ * <p>Any {@code [} in a bracket expression does not need to be escaped in Posix, translate to {@code \[}.
715
+ *
716
+ * <p>Any {@code ]} not in a bracket expression is valid in both Posix and Java, no translation.
717
+ *
718
+ * <p>A backslash before the closing bracket like {@code [.f\]} is not an escape of the closing bracket,
719
+ * the backslash needs to be escaped for Java, translate to {@code [.f\\]}.
720
+ *
721
+ * <p>Do not perform the above translations within an escape via {@code \}.
722
+ *
723
+ * <p>Do not perform the above translations for Posix "classes" like {@code [[:word:]]} or {@code [[:digit:]]}
724
+ * and their negation {@code [-[:word]]}.
725
+ *
726
+ * <p>Do not perform the above translations for single-bracket Posix classes like {@code [:digit:]},
727
+ * and handle the case of single-bracket Posix classes inside bracket expressions, like
728
+ * @code {[[:digit:]-.]}.
729
+ *
730
+ * @param posix Posix regex
731
+ * @return Java regex
732
+ */
733
+ static String posixToJavaRegex (String posix ) {
734
+ int len = posix .length ();
735
+ StringBuilder java = new StringBuilder ();
736
+
737
+ boolean inBracketExpression = false ;
738
+
739
+ int i = 0 ;
740
+ char next ;
741
+ try {
742
+ for (; i < len ; i ++) {
743
+ char c = posix .charAt (i );
744
+
745
+ switch (c ) {
746
+ case '\\' :
747
+ next = posix .charAt (++i );
748
+ // Don't translate anything after the \ character escape
749
+ if (inBracketExpression && next == ']' ) {
750
+ inBracketExpression = false ;
751
+ java .append ("\\ \\ " ).append (next );
752
+ } else {
753
+ java .append (c ).append (next );
754
+ }
755
+ break ;
756
+ case '[' :
757
+ if (i == len - 1 ) {
758
+ throw new IllegalArgumentException ("Lone [ at the end of (index " + i + "): " + posix );
759
+ }
760
+ // Handle "double bracket" Posix "classes" like [[:word:]] or [[:digit:]] and their negations
761
+ // starting with [-[:
762
+ if (posix .regionMatches (i , "[[:" , 0 , 3 ) || posix .regionMatches (i , "[-[:" , 0 , 4 )) {
763
+ int afterClass = nextAfterClass (posix , i + 3 );
764
+ if (posix .regionMatches (afterClass , ":]]" , 0 , 3 )) {
765
+ java .append (posix , i , afterClass + 3 );
766
+ i = afterClass + 2 ;
767
+ break ;
768
+ } else if (posix .regionMatches (afterClass , ":]" , 0 , 2 )) {
769
+ if (inBracketExpression ) {
770
+ throw new IllegalArgumentException ("Unclear bracket expression" );
771
+ }
772
+ // Handles character patterns like [[:alpha:]_-]
773
+ java .append (posix , i , afterClass + 2 );
774
+ i = afterClass + 1 ;
775
+ inBracketExpression = true ;
776
+ break ;
777
+ } else {
778
+ throw new IllegalArgumentException ("Invalid character class" );
779
+ }
780
+ }
781
+ // Handle "single bracket" Posix "classes" like [:word:]
782
+ else if (posix .charAt (i + 1 ) == ':' ) {
783
+ int afterClass = nextAfterClass (posix , i + 2 );
784
+ if (!posix .regionMatches (afterClass , ":]" , 0 , 2 )) {
785
+ java .append ("[:" );
786
+ i ++;
787
+ inBracketExpression = true ;
788
+ } else {
789
+ java .append (posix , i , afterClass + 2 );
790
+ i = afterClass + 1 ;
791
+ }
792
+ break ;
793
+ }
794
+ if (inBracketExpression ) {
795
+ // Translate lone [ to \[
796
+ java .append ('\\' ).append (c );
797
+ } else {
798
+ inBracketExpression = true ;
799
+ java .append (c );
800
+ next = posix .charAt (i + 1 );
801
+ if (next == ']' ) {
802
+ i ++;
803
+ java .append ("\\ ]" );
804
+ } else if (next == '^' && posix .charAt (i + 2 ) == ']' ) {
805
+ i += 2 ;
806
+ java .append ("^\\ ]" );
807
+ }
808
+ }
809
+ break ;
810
+ case ']' :
811
+ if (inBracketExpression ) {
812
+ inBracketExpression = false ;
813
+ }
814
+ java .append (c );
815
+ break ;
816
+ default :
817
+ java .append (c );
818
+ break ;
819
+ }
820
+ }
821
+ } catch (Exception e ) {
822
+ throw new IllegalArgumentException (
823
+ "Posix-to-Java regex translation failed around index " + i + " of: " + posix , e );
824
+ }
825
+ return java .toString ();
826
+ }
827
+
828
+ private static int nextAfterClass (String s , int idx ) {
829
+ if (s .charAt (idx ) == ':' ) {
830
+ idx ++;
831
+ }
832
+ while (true ) {
833
+ char c = s .charAt (idx );
834
+ if (!Character .isLetterOrDigit (c )) {
835
+ break ;
836
+ }
837
+ idx ++;
838
+ }
839
+ return idx ;
840
+ }
841
+
700
842
protected static class RuleSplitter {
701
843
protected static List <String > split (String s ) {
702
844
List <String > out = new ArrayList <>();
0 commit comments