1
+ #![ feature( proc_macro_hygiene, decl_macro) ]
2
+
3
+ #[ macro_use]
4
+ extern crate rocket;
5
+ // use rocket::data::{Data, ToByteUnit};
6
+ // use rocket::http::uri::Absolute;
7
+ // use rocket::response::content::RawText;
8
+ use rocket:: config:: Config ;
9
+ use rocket:: http:: Status ;
10
+ use rocket:: response:: { content, status} ;
11
+ use rocket:: fs:: NamedFile ;
12
+ use rocket:: { Build , Request , Rocket } ;
13
+ use rocket_dyn_templates:: { context, Metadata , Template } ;
14
+
15
+ #[ macro_use]
16
+ extern crate lazy_static;
17
+ use regex:: Regex ;
18
+ use std:: fs:: File ;
19
+ use std:: collections:: HashMap ;
20
+ use std:: env;
21
+ use std:: io:: { Read , BufReader } ;
22
+ use std:: path:: Path ;
23
+ use zip:: ZipArchive ;
24
+ use std:: borrow:: Cow ;
25
+
26
+ lazy_static ! {
27
+ static ref TRAILING_PDF_EXT : Regex = Regex :: new( "[.]pdf$" ) . unwrap( ) ;
28
+ static ref END_ARTICLE : Regex = Regex :: new( "</article>" ) . unwrap( ) ;
29
+ static ref END_HEAD : Regex = Regex :: new( "</head>" ) . unwrap( ) ;
30
+ static ref START_PAGE_CONTENT : Regex = Regex :: new( "<div class=\" ltx_page_content\" >" ) . unwrap( ) ;
31
+ static ref END_BODY : Regex = Regex :: new( "</body>" ) . unwrap( ) ;
32
+ static ref AR5IV_PAPERS_ROOT_DIR : String =
33
+ env:: var( "AR5IV_PAPERS_ROOT_DIR" ) . unwrap_or_else( |_| String :: from( "/data/arxmliv" ) ) ;
34
+ }
35
+
36
+ #[ get( "/" ) ]
37
+ fn about ( ) -> Template {
38
+ let map: HashMap < String , String > = HashMap :: new ( ) ;
39
+ Template :: render ( "ar5iv" , & map)
40
+ }
41
+
42
+ #[ get( "/abs/<field>/<id>" ) ]
43
+ async fn abs_field ( field : String , id : String ) -> content:: RawHtml < String > {
44
+ assemble_paper ( Some ( field) , id) . await
45
+ }
46
+ #[ get( "/abs/<id>" ) ]
47
+ async fn abs ( id : String ) -> content:: RawHtml < String > {
48
+ assemble_paper ( None , id) . await
49
+ }
50
+
51
+ #[ get( "/pdf/<field>/<id>" ) ]
52
+ async fn pdf_field ( field : String , id : String ) -> content:: RawHtml < String > {
53
+ let id_core: String = ( * TRAILING_PDF_EXT . replace ( & id, "" ) ) . to_owned ( ) ;
54
+ assemble_paper ( Some ( field) , id_core) . await
55
+ }
56
+ #[ get( "/pdf/<id>" ) ]
57
+ async fn pdf ( id : String ) -> content:: RawHtml < String > {
58
+ let id_core: String = ( * TRAILING_PDF_EXT . replace ( & id, "" ) ) . to_owned ( ) ;
59
+ assemble_paper ( None , id_core) . await
60
+ }
61
+
62
+ #[ get( "/assets/<name>" ) ]
63
+ async fn assets ( name : String ) -> Option < NamedFile > {
64
+ NamedFile :: open ( Path :: new ( "assets/" ) . join ( name) ) . await . ok ( )
65
+ }
66
+
67
+ #[ catch( 404 ) ]
68
+ fn general_not_found ( ) -> content:: RawHtml < & ' static str > {
69
+ content:: RawHtml (
70
+ r#"
71
+ <p>Hmm... What are you looking for?</p>
72
+ Say <a href="/hello/Sergio/100">hello!</a>
73
+ "# ,
74
+ )
75
+ }
76
+
77
+ #[ catch( default ) ]
78
+ fn default_catcher ( status : Status , req : & Request < ' _ > ) -> status:: Custom < String > {
79
+ let msg = format ! ( "{} ({})" , status, req. uri( ) ) ;
80
+ status:: Custom ( status, msg)
81
+ }
82
+
83
+ async fn assemble_paper ( field_opt : Option < String > , id : String ) -> content:: RawHtml < String > {
84
+ // Option<File>
85
+ let id_base = & id[ 0 ..4 ] ;
86
+ let id_arxiv = if let Some ( ref field) = field_opt {
87
+ format ! ( "{}/{}" , field, id)
88
+ } else {
89
+ id. clone ( )
90
+ } ;
91
+ let field = field_opt. unwrap_or_default ( ) ;
92
+ let paper_path_str = format ! (
93
+ "{}/{}/{}{}/tex_to_html.zip" ,
94
+ * AR5IV_PAPERS_ROOT_DIR , id_base, field, id
95
+ ) ;
96
+ let paper_path = Path :: new ( & paper_path_str) ;
97
+ if paper_path. exists ( ) {
98
+ // TODO: Can the tokio::fs::File be swapped in here for some benefit? Does the ZIP crate allow for that?
99
+ // I couldn't easily understand the answer from what I found online.
100
+ let zipf = File :: open ( & paper_path) . unwrap ( ) ;
101
+ let reader = BufReader :: new ( zipf) ;
102
+ let mut zip = ZipArchive :: new ( reader) . unwrap ( ) ;
103
+
104
+ let mut log = String :: new ( ) ;
105
+ let mut html = String :: new ( ) ;
106
+ let mut doc_assets = HashMap :: new ( ) ;
107
+ for i in 0 ..zip. len ( ) {
108
+ if let Ok ( mut file) = zip. by_index ( i) {
109
+ if file. is_file ( ) {
110
+ let mut asset = None ;
111
+ match file. name ( ) {
112
+ "cortex.log" => {
113
+ file. read_to_string ( & mut log) . unwrap ( ) ;
114
+ }
115
+ name if name. ends_with ( ".html" ) => {
116
+ file. read_to_string ( & mut html) . unwrap ( ) ;
117
+ }
118
+ other => {
119
+ asset = Some ( other. to_string ( ) ) ;
120
+ } // record assets for later management4
121
+ }
122
+ if let Some ( asset_name) = asset {
123
+ let mut file_contents = Vec :: new ( ) ;
124
+ file. read_to_end ( & mut file_contents) . unwrap ( ) ;
125
+ doc_assets. insert ( asset_name, file_contents) ;
126
+ }
127
+ }
128
+ }
129
+ }
130
+
131
+ content:: RawHtml ( prepare_ar5iv_document ( html, log, id_arxiv, doc_assets) )
132
+ } else {
133
+ content:: RawHtml ( format ! ( "paper id {}{} is not available on disk. " , field, id) )
134
+ }
135
+ }
136
+
137
+ #[ launch]
138
+ fn rocket ( ) -> _ {
139
+ rocket:: custom ( Config :: figment ( ) . merge ( ( "template_dir" , "templates" ) ) )
140
+ . attach ( Template :: fairing ( ) )
141
+ . mount ( "/" , routes ! [ abs, abs_field, pdf, pdf_field, about, assets] )
142
+ . register ( "/" , catchers ! [ general_not_found, default_catcher] )
143
+ }
144
+
145
+
146
+
147
+ fn prepare_ar5iv_document ( mut main_content : String , conversion_report : String , id_arxiv : String , data_url_map : HashMap < String , Vec < u8 > > ) -> String {
148
+ // ensure main_content is a string if undefined
149
+ if main_content. is_empty ( ) {
150
+ main_content = String :: from ( r###"
151
+ <!DOCTYPE html>
152
+ <html lang="en">
153
+ <head>
154
+ <meta http-equiv="Content-Type" content="text/html" />
155
+ <meta charset="utf-8" />
156
+ <title> No content available </title>
157
+ <meta name="language" content="English">
158
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
159
+ </head>
160
+ <body>
161
+ <div class="ltx_page_main">
162
+ <div class="ltx_page_content">
163
+ <article class="ltx_document">
164
+ </article>
165
+ </div>
166
+ </div>
167
+ </body>
168
+ </html>
169
+ "### ) ; }
170
+
171
+ // TODO: Add all assets as data URLs.
172
+ for ( filename, newurl) in data_url_map {
173
+ // let escaped_name = 'src=[\'"]' + filename.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1") + '[\'"]';
174
+ // new_src = "src=\"" + newurl + "\"";
175
+ // main_content = main_content.replace(new RegExp(escaped_name, 'g'), new_src);
176
+ } ;
177
+
178
+ // If a conversion log is present, attach it as a trailing section
179
+ if !conversion_report. is_empty ( ) {
180
+ let ar5iv_logos = r###"
181
+ <div class="ar5iv-logos">
182
+ <a href="/"><img height="64" src="/assets/ar5iv.png"></a>
183
+
184
+ <a href="https://arxiv.org/abs/"### . to_string ( ) + & id_arxiv + r###"" class="arxiv-button">View original paper on arXiv</a>
185
+ </div>
186
+ "### ;
187
+ let html_report = ar5iv_logos + r###"
188
+ <section id="latexml-conversion-report" class="ltx_section ltx_conversion_report">
189
+ <h2 class="ltx_title ltx_title_section">CorTeX Conversion Report</h2>
190
+ <div id="S1.p1" class="ltx_para">
191
+ <p class="ltx_p">
192
+ "### +
193
+ & conversion_report. split ( "\n " ) . collect :: < Vec < _ > > ( ) . join ( "</p><p class=\" ltx_p\" >" )
194
+ + r###"
195
+ </p>
196
+ </div>
197
+ </section>
198
+ </article>
199
+ "### ;
200
+ main_content = END_ARTICLE . replace ( & main_content, html_report) . to_string ( ) ;
201
+ }
202
+
203
+ let maybe_mathjax_js = r###"
204
+ <script>
205
+ var canMathML = typeof(MathMLElement) == "function";
206
+ if (!canMathML) {
207
+ var el = document.createElement("script");
208
+ el.src = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js";
209
+ document.querySelector("head").appendChild(el); }
210
+ </script>
211
+ </body>"### ;
212
+
213
+ let arxmliv_css = r###"
214
+ <link media="all" rel="stylesheet" href="//cdn.jsdelivr.net/gh/dginev/[email protected] /css/arxmliv.css">
215
+ </head>"### ;
216
+
217
+ main_content = END_HEAD . replace ( & main_content, arxmliv_css) . to_string ( ) ;
218
+ main_content = END_BODY . replace ( & main_content, maybe_mathjax_js) . to_string ( ) ;
219
+
220
+ main_content. to_string ( )
221
+ }
0 commit comments