diff --git a/src/etc/htmldocck.py b/src/etc/htmldocck.py
index 8647db5a45dc8..db2f378300e92 100644
--- a/src/etc/htmldocck.py
+++ b/src/etc/htmldocck.py
@@ -110,72 +110,9 @@
import re
import shlex
from collections import namedtuple
-try:
- from html.parser import HTMLParser
-except ImportError:
- from HTMLParser import HTMLParser
-try:
- from xml.etree import cElementTree as ET
-except ImportError:
- from xml.etree import ElementTree as ET
-
-try:
- from html.entities import name2codepoint
-except ImportError:
- from htmlentitydefs import name2codepoint
-
-# "void elements" (no closing tag) from the HTML Standard section 12.1.2
-VOID_ELEMENTS = {'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen',
- 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'}
-
-# Python 2 -> 3 compatibility
-try:
- unichr
-except NameError:
- unichr = chr
-
+from parsel import Selector
channel = os.environ["DOC_RUST_LANG_ORG_CHANNEL"]
-
-class CustomHTMLParser(HTMLParser):
- """simplified HTML parser.
-
- this is possible because we are dealing with very regular HTML from
- rustdoc; we only have to deal with i) void elements and ii) empty
- attributes."""
- def __init__(self, target=None):
- HTMLParser.__init__(self)
- self.__builder = target or ET.TreeBuilder()
-
- def handle_starttag(self, tag, attrs):
- attrs = {k: v or '' for k, v in attrs}
- self.__builder.start(tag, attrs)
- if tag in VOID_ELEMENTS:
- self.__builder.end(tag)
-
- def handle_endtag(self, tag):
- self.__builder.end(tag)
-
- def handle_startendtag(self, tag, attrs):
- attrs = {k: v or '' for k, v in attrs}
- self.__builder.start(tag, attrs)
- self.__builder.end(tag)
-
- def handle_data(self, data):
- self.__builder.data(data)
-
- def handle_entityref(self, name):
- self.__builder.data(unichr(name2codepoint[name]))
-
- def handle_charref(self, name):
- code = int(name[1:], 16) if name.startswith(('x', 'X')) else int(name, 10)
- self.__builder.data(unichr(code))
-
- def close(self):
- HTMLParser.close(self)
- return self.__builder.close()
-
-
Command = namedtuple('Command', 'negated cmd args lineno context')
@@ -256,29 +193,11 @@ def get_commands(template):
yield Command(negated=negated, cmd=cmd, args=args, lineno=lineno+1, context=line)
-def _flatten(node, acc):
- if node.text:
- acc.append(node.text)
- for e in node:
- _flatten(e, acc)
- if e.tail:
- acc.append(e.tail)
-
-
-def flatten(node):
- acc = []
- _flatten(node, acc)
- return ''.join(acc)
-
-
def normalize_xpath(path):
path = path.replace("{{channel}}", channel)
- if path.startswith('//'):
- return '.' + path # avoid warnings
- elif path.startswith('.//'):
- return path
- else:
+ if not path.startswith('//'):
raise InvalidCheck('Non-absolute XPath is not supported due to implementation issues')
+ return path
class CachedFiles(object):
@@ -323,7 +242,7 @@ def get_tree(self, path):
with io.open(abspath, encoding='utf-8') as f:
try:
- tree = ET.fromstringlist(f.readlines(), CustomHTMLParser())
+ tree = Selector(text=f.read())
except Exception as e:
raise RuntimeError('Cannot parse an HTML file {!r}: {}'.format(path, e))
self.trees[path] = tree
@@ -351,7 +270,7 @@ def check_string(data, pat, regexp):
def check_tree_attr(tree, path, attr, pat, regexp):
path = normalize_xpath(path)
ret = False
- for e in tree.findall(path):
+ for e in tree.xpath(path):
if attr in e.attrib:
value = e.attrib[attr]
else:
@@ -363,19 +282,19 @@ def check_tree_attr(tree, path, attr, pat, regexp):
return ret
+def flatten(elem):
+ return ''.join(elem.css('::text').getall())
+
+
def check_tree_text(tree, path, pat, regexp):
path = normalize_xpath(path)
ret = False
try:
- for e in tree.findall(path):
- try:
- value = flatten(e)
- except KeyError:
- continue
- else:
- ret = check_string(value, pat, regexp)
- if ret:
- break
+ for e in tree.xpath(path):
+ value = flatten(e)
+ ret = check_string(value, pat, regexp)
+ if ret:
+ break
except Exception:
print('Failed to get path "{}"'.format(path))
raise
@@ -384,7 +303,7 @@ def check_tree_text(tree, path, pat, regexp):
def get_tree_count(tree, path):
path = normalize_xpath(path)
- return len(tree.findall(path))
+ return len(tree.xpath(path))
def stderr(*args):
diff --git a/src/test/rustdoc/fn-type.rs b/src/test/rustdoc/fn-type.rs
index 3959aeb6cfb7f..11811ffe307ee 100644
--- a/src/test/rustdoc/fn-type.rs
+++ b/src/test/rustdoc/fn-type.rs
@@ -8,6 +8,7 @@ pub struct Foo<'a, T> {
pub hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32),
}
-// @has 'foo/struct.Foo.html' '//span[@id="structfield.generic"]' "generic: fn(val: &T) -> T"
-// @has 'foo/struct.Foo.html' '//span[@id="structfield.lifetime"]' "lifetime: fn(val: &'a i32) -> i32"
-// @has 'foo/struct.Foo.html' '//span[@id="structfield.hrtb_lifetime"]' "hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32)"
+// @has 'foo/struct.Foo.html'
+// @has - '//span[@id="structfield.generic"]' "generic: fn(val: &T) -> T"
+// @has - '//span[@id="structfield.lifetime"]' "lifetime: fn(val: &'a i32) -> i32"
+// @has - '//span[@id="structfield.hrtb_lifetime"]' "hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32)"
diff --git a/src/test/rustdoc/inline_cross/renamed-via-module.rs b/src/test/rustdoc/inline_cross/renamed-via-module.rs
index cdedbf0707985..d053d659c5228 100644
--- a/src/test/rustdoc/inline_cross/renamed-via-module.rs
+++ b/src/test/rustdoc/inline_cross/renamed-via-module.rs
@@ -7,16 +7,16 @@
extern crate foo;
// @has foo/iter/index.html
-// @has - '//a/[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
-// @has - '//a/[@href="struct.StepBy.html"]' "StepBy"
+// @has - '//a[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
+// @has - '//a[@href="struct.StepBy.html"]' "StepBy"
// @has foo/iter/struct.DeprecatedStepBy.html
// @has - '//h1' "Struct foo::iter::DeprecatedStepBy"
// @has foo/iter/struct.StepBy.html
// @has - '//h1' "Struct foo::iter::StepBy"
// @has bar/iter/index.html
-// @has - '//a/[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
-// @has - '//a/[@href="struct.StepBy.html"]' "StepBy"
+// @has - '//a[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
+// @has - '//a[@href="struct.StepBy.html"]' "StepBy"
// @has bar/iter/struct.DeprecatedStepBy.html
// @has - '//h1' "Struct bar::iter::DeprecatedStepBy"
// @has bar/iter/struct.StepBy.html
diff --git a/src/test/rustdoc/intra-doc/private.rs b/src/test/rustdoc/intra-doc/private.rs
index 2756a7998e8ea..0b5882e40fcd0 100644
--- a/src/test/rustdoc/intra-doc/private.rs
+++ b/src/test/rustdoc/intra-doc/private.rs
@@ -4,9 +4,9 @@
// make sure to update `rustdoc-ui/intra-doc/private.rs` if you update this file
/// docs [DontDocMe] [DontDocMe::f] [DontDocMe::x]
-// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html"]' 'DontDocMe'
-// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html#method.f"]' 'DontDocMe::f'
-// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html#structfield.x"]' 'DontDocMe::x'
+// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html"]' 'DontDocMe'
+// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html#method.f"]' 'DontDocMe::f'
+// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html#structfield.x"]' 'DontDocMe::x'
pub struct DocMe;
struct DontDocMe {
x: usize,
diff --git a/src/test/rustdoc/primitive/no_std.rs b/src/test/rustdoc/primitive/no_std.rs
index f0f70cb6c1881..adcc9556f819d 100644
--- a/src/test/rustdoc/primitive/no_std.rs
+++ b/src/test/rustdoc/primitive/no_std.rs
@@ -2,8 +2,8 @@
#![deny(warnings)]
#![deny(rustdoc::broken_intra_doc_links)]
-// @has no_std/fn.foo.html '//a/[@href="{{channel}}/core/primitive.u8.html"]' 'u8'
-// @has no_std/fn.foo.html '//a/[@href="{{channel}}/core/primitive.u8.html"]' 'primitive link'
+// @has no_std/fn.foo.html '//a[@href="{{channel}}/core/primitive.u8.html"]' 'u8'
+// @has no_std/fn.foo.html '//a[@href="{{channel}}/core/primitive.u8.html"]' 'primitive link'
/// Link to [primitive link][u8]
pub fn foo() -> u8 {}
diff --git a/src/test/rustdoc/proc-macro.rs b/src/test/rustdoc/proc-macro.rs
index f6d1f2cf91b5f..c8507a625db36 100644
--- a/src/test/rustdoc/proc-macro.rs
+++ b/src/test/rustdoc/proc-macro.rs
@@ -6,7 +6,7 @@
#![crate_name="some_macros"]
// @has some_macros/index.html
-// @has - '//a/[@href="attr.some_proc_attr.html"]' 'some_proc_attr'
+// @has - '//a[@href="attr.some_proc_attr.html"]' 'some_proc_attr'
//! include a link to [some_proc_macro] to make sure it works.
diff --git a/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs b/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs
index ad19036126760..b8133cbf2168d 100644
--- a/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs
+++ b/src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs
@@ -8,13 +8,13 @@ pub mod internal {
///
/// [name]: mod
/// [other name]: crate::internal::mod
- // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//*a[@href="struct.mod.html"]' 'name'
- // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//*a[@href="struct.mod.html"]' 'other name'
+ // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//a[@href="struct.mod.html"]' 'name'
+ // @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//a[@href="struct.mod.html"]' 'other name'
pub struct B;
}
/// See [name].
///
/// [name]: internal::mod
-// @has 'raw_ident_eliminate_r_hashtag/struct.A.html' '//*a[@href="internal/struct.mod.html"]' 'name'
+// @has 'raw_ident_eliminate_r_hashtag/struct.A.html' '//a[@href="internal/struct.mod.html"]' 'name'
pub struct A;