Skip to content

Commit 756f7a7

Browse files
authored
Interactive atom cleaning for DCR (guardian#24088)
* Clean interactives prior to DCR send At the moment we do this cleaning in DCR but it is very slow. Jsoup is able to do the minimal cleaning we require very quickly though so we now prefer that. * Log DCR request timeouts Previously, these were identified (wrongly) as CAPI timeouts. A separate exception type has been introduced to distinguish between them.
1 parent 271834d commit 756f7a7

File tree

4 files changed

+26
-7
lines changed

4 files changed

+26
-7
lines changed

common/app/common/package.scala

+9-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import play.twirl.api.Html
1818
import model.ApplicationContext
1919
import http.ResultWithPreconnectPreload
2020
import http.HttpPreconnections
21-
import renderers.DCRLocalConnectException
21+
import renderers.{DCRLocalConnectException, DCRTimeoutException}
2222

2323
object `package`
2424
extends implicits.Strings
@@ -59,13 +59,18 @@ object `package`
5959
log.info(s"Got a 410 while calling content api: $message, path: ${request.path}")
6060
NoCache(Gone)
6161
}
62+
63+
// Custom DCR exceptions to distinguish from CAPI/other backend errors.
64+
case error: DCRLocalConnectException =>
65+
throw error
66+
case timeout: DCRTimeoutException =>
67+
log.error(s"Got a timeout while calling DCR: ${timeout.getMessage}, path: ${request.path}", timeout)
68+
NoCache(GatewayTimeout(timeout.getMessage))
69+
6270
case timeout: TimeoutException =>
6371
log.error(s"Got a timeout while calling content api: ${timeout.getMessage}, path: ${request.path}", timeout)
6472
NoCache(GatewayTimeout(timeout.getMessage))
6573

66-
// This is thrown when locally unable to connect to DCR.
67-
case error: DCRLocalConnectException => throw error
68-
6974
case error =>
7075
log.error(s"Content api exception: ${error.getMessage}", error)
7176
Option(error.getCause).foreach { cause =>

common/app/model/dotcomrendering/DotcomRenderingUtils.scala

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ object DotcomRenderingUtils {
164164
calloutsUrl,
165165
article.elements.thumbnail,
166166
edition,
167+
article.trail.webPublicationDate,
167168
),
168169
)
169170
.filter(PageElement.isSupported)

common/app/model/dotcomrendering/pageElements/PageElement.scala

+11-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ import common.Edition
1515
import conf.Configuration
1616
import layout.ContentWidths.DotcomRenderingImageRoleWidthByBreakpointMapping
1717
import model.content._
18+
import model.dotcomrendering.InteractiveSwitchOver
1819
import model.{ImageAsset, ImageElement, ImageMedia, VideoAsset}
20+
import org.joda.time.DateTime
1921
import org.jsoup.Jsoup
2022
import play.api.libs.json._
2123
import views.support.cleaner.SoundcloudHelper
@@ -793,6 +795,7 @@ object PageElement {
793795
calloutsUrl: Option[String],
794796
overrideImage: Option[ImageElement],
795797
edition: Edition,
798+
webPublicationDate: DateTime,
796799
): List[PageElement] = {
797800

798801
def extractAtom: Option[Atom] =
@@ -1062,12 +1065,19 @@ object PageElement {
10621065
}
10631066

10641067
case Some(interactive: InteractiveAtom) => {
1068+
val isLegacy = InteractiveSwitchOver.date.isAfter(webPublicationDate)
10651069
val encodedId = URLEncoder.encode(interactive.id, "UTF-8")
10661070
Some(
10671071
InteractiveAtomBlockElement(
10681072
id = interactive.id,
10691073
url = s"${Configuration.ajax.url}/embed/atom/interactive/$encodedId",
1070-
html = Some(interactive.html),
1074+
// Note, we parse legacy interactives to do minimal cleaning of
1075+
// the HTML (e.g. to ensure all tags are closed). Some break
1076+
// without this. E.g.
1077+
// https://www.theguardian.com/info/ng-interactive/2021/mar/17/make-sense-of-the-week-with-australia-weekend.
1078+
html =
1079+
if (isLegacy) Some(Jsoup.parseBodyFragment(interactive.html).outerHtml)
1080+
else Some(interactive.html),
10711081
css = Some(interactive.css),
10721082
js = interactive.mainJS,
10731083
placeholderUrl = interactive.placeholderUrl,

common/app/renderers/DotcomRenderingService.scala

+5-2
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ import http.ResultWithPreconnectPreload
2121
import http.HttpPreconnections
2222

2323
import java.net.ConnectException
24+
import java.util.concurrent.TimeoutException
2425

25-
// Introduced as CAPI error handling elsewhere would smother a regular ConnectException.
26+
// Introduced as CAPI error handling elsewhere would smother these otherwise
2627
case class DCRLocalConnectException(message: String) extends ConnectException(message)
28+
case class DCRTimeoutException(message: String) extends TimeoutException(message)
2729

2830
class DotcomRenderingService extends GuLogging with ResultWithPreconnectPreload {
2931

@@ -62,7 +64,8 @@ class DotcomRenderingService extends GuLogging with ResultWithPreconnectPreload
6264
|To get started with dotcom-rendering, see:
6365
|
6466
| https://github.com/guardian/dotcom-rendering""".stripMargin
65-
Future.failed(new DCRLocalConnectException(msg))
67+
Future.failed(DCRLocalConnectException(msg))
68+
case t: TimeoutException => Future.failed(DCRTimeoutException(t.getMessage))
6669
})
6770
}
6871

0 commit comments

Comments
 (0)