Skip to content

Commit 6ae5de0

Browse files
committed
auto merge of #12231 : wycats/rust/url_path_parse, r=alexcrichton
It is sometimes useful to parse just the path portion of a URL (path, query string and fragment) rather than the entire URL. In theory I could have made Url embed a Path, but that would be a breaking change and I assume that Servo uses this API. I would be happy to update the PR to embed Path in Url if that's what people wanted.
2 parents 1228fb0 + 4667c49 commit 6ae5de0

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed

src/libextra/url.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ pub struct Url {
5555
fragment: Option<~str>
5656
}
5757

58+
#[deriving(Clone, Eq)]
59+
pub struct Path {
60+
/// The path component of a URL, for example `/foo/bar`.
61+
path: ~str,
62+
/// The query component of a URL. `~[(~"baz", ~"qux")]` represents the
63+
/// fragment `baz=qux` in the above example.
64+
query: Query,
65+
/// The fragment component, such as `quz`. Doesn't include the leading `#` character.
66+
fragment: Option<~str>
67+
}
68+
5869
/// An optional subcomponent of a URI authority component.
5970
#[deriving(Clone, Eq)]
6071
pub struct UserInfo {
@@ -88,6 +99,19 @@ impl Url {
8899
}
89100
}
90101

102+
impl Path {
103+
pub fn new(path: ~str,
104+
query: Query,
105+
fragment: Option<~str>)
106+
-> Path {
107+
Path {
108+
path: path,
109+
query: query,
110+
fragment: fragment,
111+
}
112+
}
113+
}
114+
91115
impl UserInfo {
92116
#[inline]
93117
pub fn new(user: ~str, pass: Option<~str>) -> UserInfo {
@@ -727,6 +751,21 @@ pub fn from_str(rawurl: &str) -> Result<Url, ~str> {
727751
Ok(Url::new(scheme, userinfo, host, port, path, query, fragment))
728752
}
729753

754+
pub fn path_from_str(rawpath: &str) -> Result<Path, ~str> {
755+
let (path, rest) = match get_path(rawpath, false) {
756+
Ok(val) => val,
757+
Err(e) => return Err(e)
758+
};
759+
760+
// query and fragment
761+
let (query, fragment) = match get_query_fragment(rest) {
762+
Ok(val) => val,
763+
Err(e) => return Err(e),
764+
};
765+
766+
Ok(Path{ path: path, query: query, fragment: fragment })
767+
}
768+
730769
impl FromStr for Url {
731770
fn from_str(s: &str) -> Option<Url> {
732771
match from_str(s) {
@@ -736,6 +775,15 @@ impl FromStr for Url {
736775
}
737776
}
738777

778+
impl FromStr for Path {
779+
fn from_str(s: &str) -> Option<Path> {
780+
match path_from_str(s) {
781+
Ok(path) => Some(path),
782+
Err(_) => None
783+
}
784+
}
785+
}
786+
739787
/**
740788
* Converts a URL from `Url` to string representation.
741789
*
@@ -780,18 +828,45 @@ pub fn to_str(url: &Url) -> ~str {
780828
format!("{}:{}{}{}{}", url.scheme, authority, url.path, query, fragment)
781829
}
782830

831+
pub fn path_to_str(path: &Path) -> ~str {
832+
let query = if path.query.is_empty() {
833+
~""
834+
} else {
835+
format!("?{}", query_to_str(&path.query))
836+
};
837+
838+
let fragment = match path.fragment {
839+
Some(ref fragment) => format!("\\#{}", encode_component(*fragment)),
840+
None => ~"",
841+
};
842+
843+
format!("{}{}{}", path.path, query, fragment)
844+
}
845+
783846
impl ToStr for Url {
784847
fn to_str(&self) -> ~str {
785848
to_str(self)
786849
}
787850
}
788851

852+
impl ToStr for Path {
853+
fn to_str(&self) -> ~str {
854+
path_to_str(self)
855+
}
856+
}
857+
789858
impl IterBytes for Url {
790859
fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
791860
self.to_str().iter_bytes(lsb0, f)
792861
}
793862
}
794863

864+
impl IterBytes for Path {
865+
fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
866+
self.to_str().iter_bytes(lsb0, f)
867+
}
868+
}
869+
795870
// Put a few tests outside of the 'test' module so they can test the internal
796871
// functions and those functions don't need 'pub'
797872

@@ -899,6 +974,17 @@ mod tests {
899974
assert_eq!(&u.fragment, &Some(~"something"));
900975
}
901976
977+
#[test]
978+
fn test_path_parse() {
979+
let path = ~"/doc/~u?s=v#something";
980+
981+
let up = path_from_str(path);
982+
let u = up.unwrap();
983+
assert_eq!(&u.path, &~"/doc/~u");
984+
assert_eq!(&u.query, &~[(~"s", ~"v")]);
985+
assert_eq!(&u.fragment, &Some(~"something"));
986+
}
987+
902988
#[test]
903989
fn test_url_parse_host_slash() {
904990
let urlstr = ~"http://0.42.42.42/";
@@ -907,6 +993,13 @@ mod tests {
907993
assert!(url.path == ~"/");
908994
}
909995
996+
#[test]
997+
fn test_path_parse_host_slash() {
998+
let pathstr = ~"/";
999+
let path = path_from_str(pathstr).unwrap();
1000+
assert!(path.path == ~"/");
1001+
}
1002+
9101003
#[test]
9111004
fn test_url_host_with_port() {
9121005
let urlstr = ~"scheme://host:1234";
@@ -930,13 +1023,27 @@ mod tests {
9301023
assert!(url.path == ~"/file_name.html");
9311024
}
9321025
1026+
#[test]
1027+
fn test_path_with_underscores() {
1028+
let pathstr = ~"/file_name.html";
1029+
let path = path_from_str(pathstr).unwrap();
1030+
assert!(path.path == ~"/file_name.html");
1031+
}
1032+
9331033
#[test]
9341034
fn test_url_with_dashes() {
9351035
let urlstr = ~"http://dotcom.com/file-name.html";
9361036
let url = from_str(urlstr).unwrap();
9371037
assert!(url.path == ~"/file-name.html");
9381038
}
9391039
1040+
#[test]
1041+
fn test_path_with_dashes() {
1042+
let pathstr = ~"/file-name.html";
1043+
let path = path_from_str(pathstr).unwrap();
1044+
assert!(path.path == ~"/file-name.html");
1045+
}
1046+
9401047
#[test]
9411048
fn test_no_scheme() {
9421049
assert!(get_scheme("noschemehere.html").is_err());
@@ -1017,6 +1124,14 @@ mod tests {
10171124
assert!(u.query == ~[(~"ba%d ", ~"#&+")]);
10181125
}
10191126
1127+
#[test]
1128+
fn test_path_component_encoding() {
1129+
let path = ~"/doc%20uments?ba%25d%20=%23%26%2B";
1130+
let p = path_from_str(path).unwrap();
1131+
assert!(p.path == ~"/doc uments");
1132+
assert!(p.query == ~[(~"ba%d ", ~"#&+")]);
1133+
}
1134+
10201135
#[test]
10211136
fn test_url_without_authority() {
10221137
let url = ~"mailto:test@email.com";

0 commit comments

Comments
 (0)