Skip to content

Commit 945f984

Browse files
committed
warmup: skeleton able to serve the basic HTML5 of articles
1 parent fcca0e1 commit 945f984

File tree

6 files changed

+254
-0
lines changed

6 files changed

+254
-0
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,8 @@ Cargo.lock
88

99
# These are backup files generated by rustfmt
1010
**/*.rs.bk
11+
12+
13+
# Added by cargo
14+
15+
/target

.rustfmt.toml

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Detailed instructions at: https://github.com/rust-lang-nursery/rustfmt/blob/master/Configurations.md
2+
tab_spaces = 2

Cargo.toml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "ar5iv"
3+
version = "0.1.0"
4+
edition = "2018"
5+
6+
[dependencies]
7+
lazy_static = "1.4.0"
8+
regex = "1.5.4"
9+
zip = "0.5"
10+
11+
12+
[dependencies.rocket]
13+
git="https://github.com/SergioBenitez/Rocket"
14+
version="0.5.0-rc.1"
15+
16+
[dependencies.rocket_dyn_templates]
17+
git="https://github.com/SergioBenitez/Rocket"
18+
version = "0.1.0-rc.1"
19+
features = ["tera"]

assets/ar5iv.png

15.6 KB
Loading

src/main.rs

+221
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
#![feature(proc_macro_hygiene, decl_macro)]
2+
3+
#[macro_use]
4+
extern crate rocket;
5+
// use rocket::data::{Data, ToByteUnit};
6+
// use rocket::http::uri::Absolute;
7+
// use rocket::response::content::RawText;
8+
use rocket::config::Config;
9+
use rocket::http::Status;
10+
use rocket::response::{content, status};
11+
use rocket::fs::NamedFile;
12+
use rocket::{Build, Request, Rocket};
13+
use rocket_dyn_templates::{context, Metadata, Template};
14+
15+
#[macro_use]
16+
extern crate lazy_static;
17+
use regex::Regex;
18+
use std::fs::File;
19+
use std::collections::HashMap;
20+
use std::env;
21+
use std::io::{Read,BufReader};
22+
use std::path::Path;
23+
use zip::ZipArchive;
24+
use std::borrow::Cow;
25+
26+
lazy_static! {
27+
static ref TRAILING_PDF_EXT: Regex = Regex::new("[.]pdf$").unwrap();
28+
static ref END_ARTICLE : Regex = Regex::new("</article>").unwrap();
29+
static ref END_HEAD : Regex = Regex::new("</head>").unwrap();
30+
static ref START_PAGE_CONTENT : Regex = Regex::new("<div class=\"ltx_page_content\">").unwrap();
31+
static ref END_BODY : Regex = Regex::new("</body>").unwrap();
32+
static ref AR5IV_PAPERS_ROOT_DIR: String =
33+
env::var("AR5IV_PAPERS_ROOT_DIR").unwrap_or_else(|_| String::from("/data/arxmliv"));
34+
}
35+
36+
#[get("/")]
37+
fn about() -> Template {
38+
let map: HashMap<String, String> = HashMap::new();
39+
Template::render("ar5iv", &map)
40+
}
41+
42+
#[get("/abs/<field>/<id>")]
43+
async fn abs_field(field: String, id: String) -> content::RawHtml<String> {
44+
assemble_paper(Some(field), id).await
45+
}
46+
#[get("/abs/<id>")]
47+
async fn abs(id: String) -> content::RawHtml<String> {
48+
assemble_paper(None, id).await
49+
}
50+
51+
#[get("/pdf/<field>/<id>")]
52+
async fn pdf_field(field: String, id: String) -> content::RawHtml<String> {
53+
let id_core: String = (*TRAILING_PDF_EXT.replace(&id, "")).to_owned();
54+
assemble_paper(Some(field), id_core).await
55+
}
56+
#[get("/pdf/<id>")]
57+
async fn pdf(id: String) -> content::RawHtml<String> {
58+
let id_core: String = (*TRAILING_PDF_EXT.replace(&id, "")).to_owned();
59+
assemble_paper(None, id_core).await
60+
}
61+
62+
#[get("/assets/<name>")]
63+
async fn assets(name: String) -> Option<NamedFile> {
64+
NamedFile::open(Path::new("assets/").join(name)).await.ok()
65+
}
66+
67+
#[catch(404)]
68+
fn general_not_found() -> content::RawHtml<&'static str> {
69+
content::RawHtml(
70+
r#"
71+
<p>Hmm... What are you looking for?</p>
72+
Say <a href="/hello/Sergio/100">hello!</a>
73+
"#,
74+
)
75+
}
76+
77+
#[catch(default)]
78+
fn default_catcher(status: Status, req: &Request<'_>) -> status::Custom<String> {
79+
let msg = format!("{} ({})", status, req.uri());
80+
status::Custom(status, msg)
81+
}
82+
83+
async fn assemble_paper(field_opt: Option<String>, id: String) -> content::RawHtml<String> {
84+
// Option<File>
85+
let id_base = &id[0..4];
86+
let id_arxiv = if let Some(ref field) = field_opt {
87+
format!("{}/{}", field, id)
88+
} else {
89+
id.clone()
90+
};
91+
let field = field_opt.unwrap_or_default();
92+
let paper_path_str = format!(
93+
"{}/{}/{}{}/tex_to_html.zip",
94+
*AR5IV_PAPERS_ROOT_DIR, id_base, field, id
95+
);
96+
let paper_path = Path::new(&paper_path_str);
97+
if paper_path.exists() {
98+
// TODO: Can the tokio::fs::File be swapped in here for some benefit? Does the ZIP crate allow for that?
99+
// I couldn't easily understand the answer from what I found online.
100+
let zipf = File::open(&paper_path).unwrap();
101+
let reader = BufReader::new(zipf);
102+
let mut zip = ZipArchive::new(reader).unwrap();
103+
104+
let mut log = String::new();
105+
let mut html = String::new();
106+
let mut doc_assets = HashMap::new();
107+
for i in 0..zip.len() {
108+
if let Ok(mut file) = zip.by_index(i) {
109+
if file.is_file() {
110+
let mut asset = None;
111+
match file.name() {
112+
"cortex.log" => {
113+
file.read_to_string(&mut log).unwrap();
114+
}
115+
name if name.ends_with(".html") => {
116+
file.read_to_string(&mut html).unwrap();
117+
}
118+
other => {
119+
asset = Some(other.to_string());
120+
} // record assets for later management4
121+
}
122+
if let Some(asset_name) = asset {
123+
let mut file_contents = Vec::new();
124+
file.read_to_end(&mut file_contents).unwrap();
125+
doc_assets.insert(asset_name, file_contents);
126+
}
127+
}
128+
}
129+
}
130+
131+
content::RawHtml(prepare_ar5iv_document(html, log, id_arxiv, doc_assets))
132+
} else {
133+
content::RawHtml(format!("paper id {}{} is not available on disk. ", field, id))
134+
}
135+
}
136+
137+
#[launch]
138+
fn rocket() -> _ {
139+
rocket::custom(Config::figment().merge(("template_dir", "templates")))
140+
.attach(Template::fairing())
141+
.mount("/", routes![abs, abs_field, pdf, pdf_field, about, assets])
142+
.register("/", catchers![general_not_found, default_catcher])
143+
}
144+
145+
146+
147+
fn prepare_ar5iv_document(mut main_content: String, conversion_report: String, id_arxiv: String, data_url_map: HashMap<String, Vec<u8>>) -> String {
148+
// ensure main_content is a string if undefined
149+
if main_content.is_empty() {
150+
main_content = String::from(r###"
151+
<!DOCTYPE html>
152+
<html lang="en">
153+
<head>
154+
<meta http-equiv="Content-Type" content="text/html" />
155+
<meta charset="utf-8" />
156+
<title> No content available </title>
157+
<meta name="language" content="English">
158+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
159+
</head>
160+
<body>
161+
<div class="ltx_page_main">
162+
<div class="ltx_page_content">
163+
<article class="ltx_document">
164+
</article>
165+
</div>
166+
</div>
167+
</body>
168+
</html>
169+
"###); }
170+
171+
// TODO: Add all assets as data URLs.
172+
for (filename, newurl) in data_url_map {
173+
// let escaped_name = 'src=[\'"]' + filename.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1") + '[\'"]';
174+
// new_src = "src=\"" + newurl + "\"";
175+
// main_content = main_content.replace(new RegExp(escaped_name, 'g'), new_src);
176+
};
177+
178+
// If a conversion log is present, attach it as a trailing section
179+
if !conversion_report.is_empty() {
180+
let ar5iv_logos = r###"
181+
<div class="ar5iv-logos">
182+
<a href="/"><img height="64" src="/assets/ar5iv.png"></a>
183+
&nbsp;&nbsp;&nbsp;
184+
<a href="https://arxiv.org/abs/"###.to_string() + &id_arxiv + r###"" class="arxiv-button">View original paper on arXiv</a>
185+
</div>
186+
"###;
187+
let html_report = ar5iv_logos + r###"
188+
<section id="latexml-conversion-report" class="ltx_section ltx_conversion_report">
189+
<h2 class="ltx_title ltx_title_section">CorTeX Conversion Report</h2>
190+
<div id="S1.p1" class="ltx_para">
191+
<p class="ltx_p">
192+
"### +
193+
&conversion_report.split("\n").collect::<Vec<_>>().join("</p><p class=\"ltx_p\">")
194+
+ r###"
195+
</p>
196+
</div>
197+
</section>
198+
</article>
199+
"###;
200+
main_content = END_ARTICLE.replace(&main_content, html_report).to_string();
201+
}
202+
203+
let maybe_mathjax_js = r###"
204+
<script>
205+
var canMathML = typeof(MathMLElement) == "function";
206+
if (!canMathML) {
207+
var el = document.createElement("script");
208+
el.src = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js";
209+
document.querySelector("head").appendChild(el); }
210+
</script>
211+
</body>"###;
212+
213+
let arxmliv_css = r###"
214+
<link media="all" rel="stylesheet" href="//cdn.jsdelivr.net/gh/dginev/[email protected]/css/arxmliv.css">
215+
</head>"###;
216+
217+
main_content = END_HEAD.replace(&main_content, arxmliv_css).to_string();
218+
main_content = END_BODY.replace(&main_content, maybe_mathjax_js).to_string();
219+
220+
main_content.to_string()
221+
}

templates/ar5iv.html.tera

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<html>
2+
<head>
3+
</head>
4+
<body>
5+
test
6+
</body>
7+
</html>

0 commit comments

Comments
 (0)