Skip to content

Commit 342a025

Browse files
committed
Rebase with origin/main, fix JUnit 5 migration errors
1 parent 49f51ef commit 342a025

File tree

2 files changed

+192
-112
lines changed

2 files changed

+192
-112
lines changed

opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,15 @@
3333
import javax.xml.xpath.XPathExpressionException;
3434
import javax.xml.xpath.XPathFactory;
3535

36+
import org.w3c.dom.Document;
3637
import org.w3c.dom.Element;
3738
import org.w3c.dom.Node;
3839
import org.w3c.dom.NodeList;
3940
import org.w3c.dom.Text;
4041
import org.xml.sax.SAXException;
4142

43+
import opennlp.tools.sentdetect.segment.LanguageRule;
44+
import opennlp.tools.sentdetect.segment.Rule;
4245
import opennlp.tools.util.InvalidFormatException;
4346
import opennlp.tools.util.XmlUtil;
4447
import opennlp.tools.util.model.ArtifactSerializer;
@@ -521,4 +524,79 @@ public boolean getBool(String name, boolean defValue) throws InvalidFormatExcept
521524
*/
522525
public abstract AdaptiveFeatureGenerator create() throws InvalidFormatException;
523526
}
527+
528+
public static Map<String, LanguageRule> getLanguageRules(InputStream xmlDescriptionIn) throws IOException {
529+
Document xmlDocument = createDOM(xmlDescriptionIn);
530+
Element element = xmlDocument.getDocumentElement();
531+
String tagName = element.getTagName();
532+
533+
Map<String, LanguageRule> mapping = new HashMap<>();
534+
if ("languageRules".equals(tagName)) {
535+
NodeList nodes = element.getChildNodes();
536+
for (int i = 0; i < nodes.getLength(); i++) {
537+
if (nodes.item(i) instanceof Element) {
538+
Element childElem = (Element)nodes.item(i);
539+
if ("languageRule".equals(childElem.getTagName())) {
540+
getRules(mapping, childElem);
541+
}
542+
}
543+
}
544+
}
545+
return mapping;
546+
}
547+
548+
static void getRules(Map<String, LanguageRule> map, Element element) {
549+
String name = element.getAttribute("name");
550+
if (name != null) {
551+
LanguageRule languageRule = new LanguageRule(name);
552+
NodeList nodes = element.getChildNodes();
553+
for (int i = 0; i < nodes.getLength(); i++) {
554+
if (nodes.item(i) instanceof Element) {
555+
Element childElem = (Element)nodes.item(i);
556+
if ("rule".equals(childElem.getTagName())) {
557+
getRule(languageRule, childElem);
558+
}
559+
}
560+
}
561+
map.put(name, languageRule);
562+
}
563+
}
564+
565+
static void getRule(LanguageRule languageRule, Element element) {
566+
String breaking = element.getAttribute("break");
567+
String beforeBreak = "";
568+
String afterBreak = "";
569+
if (breaking != null) {
570+
NodeList nodes = element.getChildNodes();
571+
for (int i = 0; i < nodes.getLength(); i++) {
572+
if (nodes.item(i) instanceof Element) {
573+
Element childElem = (Element)nodes.item(i);
574+
if ("beforeBreak".equals(childElem.getTagName())) {
575+
Node firstChild = childElem.getFirstChild();
576+
Text text = (Text) firstChild;
577+
if (text != null) {
578+
beforeBreak = text.getWholeText();
579+
} else {
580+
beforeBreak = "";
581+
}
582+
}
583+
if ("afterBreak".equals(childElem.getTagName())) {
584+
Node firstChild = childElem.getFirstChild();
585+
Text text = (Text) firstChild;
586+
if (text != null) {
587+
afterBreak = text.getWholeText();
588+
} else {
589+
afterBreak = "";
590+
}
591+
}
592+
}
593+
}
594+
if ("yes".equals(breaking)) {
595+
languageRule.addRule(new Rule(true, beforeBreak, afterBreak));
596+
}
597+
if ("no".equals(breaking)) {
598+
languageRule.addRule(new Rule(false, beforeBreak, afterBreak));
599+
}
600+
}
601+
}
524602
}

0 commit comments

Comments
 (0)