Skip to content

Commit e46adfb

Browse files
authored
Fix <figcaption> Conversion (#197)
* Fix <figcaption> Conversion No longer discard parsed child elements when adding figure number reference. Change FigureCaptionExpression from inheriting PhrasingElementExpression to BlockElementExpression to support more complex <figcaption> contents. <figcaption> can only be the first or last element of <figure>. Do not set keepnext if <figcaption> is last.
1 parent 90bf035 commit e46adfb

File tree

1 file changed

+132
-81
lines changed

1 file changed

+132
-81
lines changed
Lines changed: 132 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,132 @@
1-
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2-
*
3-
* This source is subject to the Microsoft Permissive License.
4-
* Please see the License.txt file for more information.
5-
* All other rights reserved.
6-
*
7-
* THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8-
* KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9-
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10-
* PARTICULAR PURPOSE.
11-
*/
12-
using System.Collections.Generic;
13-
using System.Globalization;
14-
using System.Linq;
15-
using AngleSharp.Html.Dom;
16-
using DocumentFormat.OpenXml;
17-
using DocumentFormat.OpenXml.Wordprocessing;
18-
19-
namespace HtmlToOpenXml.Expressions;
20-
21-
/// <summary>
22-
/// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
23-
/// </summary>
24-
sealed class FigureCaptionExpression(IHtmlElement node) : PhrasingElementExpression(node)
25-
{
26-
27-
/// <inheritdoc/>
28-
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
29-
{
30-
ComposeStyles(context);
31-
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
32-
if (!childElements.Any())
33-
return [];
34-
35-
var p = new Paragraph (
36-
new Run(
37-
new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve }
38-
),
39-
new SimpleField(
40-
new Run(
41-
new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture)))
42-
) { Instruction = " SEQ Figure \\* ARABIC " }
43-
) {
44-
ParagraphProperties = new ParagraphProperties {
45-
ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
46-
KeepNext = new KeepNext()
47-
}
48-
};
49-
50-
if (childElements.First() is Run run) // any caption?
51-
{
52-
Text? t = run.GetFirstChild<Text>();
53-
if (t != null)
54-
t.Text = " " + t.InnerText; // append a space after the numero of the picture
55-
}
56-
57-
return [p];
58-
}
59-
60-
/// <summary>
61-
/// Add a new figure caption to the document.
62-
/// </summary>
63-
/// <returns>Returns the id of the new figure caption.</returns>
64-
private static int AddFigureCaption(ParsingContext context)
65-
{
66-
var figCaptionRef = context.Properties<int?>("figCaptionRef");
67-
if (!figCaptionRef.HasValue)
68-
{
69-
figCaptionRef = 0;
70-
foreach (var p in context.MainPart.Document.Descendants<SimpleField>())
71-
{
72-
if (p.Instruction == " SEQ Figure \\* ARABIC ")
73-
figCaptionRef++;
74-
}
75-
}
76-
figCaptionRef++;
77-
78-
context.Properties("figCaptionRef", figCaptionRef);
79-
return figCaptionRef.Value;
80-
}
81-
}
1+
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2+
*
3+
* This source is subject to the Microsoft Permissive License.
4+
* Please see the License.txt file for more information.
5+
* All other rights reserved.
6+
*
7+
* THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8+
* KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10+
* PARTICULAR PURPOSE.
11+
*/
12+
using System.Collections.Generic;
13+
using System.Globalization;
14+
using System.Linq;
15+
using AngleSharp.Dom;
16+
using AngleSharp.Html.Dom;
17+
using DocumentFormat.OpenXml;
18+
using DocumentFormat.OpenXml.Wordprocessing;
19+
20+
namespace HtmlToOpenXml.Expressions;
21+
22+
/// <summary>
23+
/// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
24+
/// </summary>
25+
sealed class FigureCaptionExpression(IHtmlElement node) : BlockElementExpression(node)
26+
{
27+
28+
/// <inheritdoc/>
29+
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
30+
{
31+
ComposeStyles(context);
32+
var childElements = Interpret(context.CreateChild(this), node.ChildNodes);
33+
34+
var figNumRef = new List<OpenXmlElement>()
35+
{
36+
new Run(
37+
new Text("Figure ") { Space = SpaceProcessingModeValues.Preserve }
38+
),
39+
new SimpleField(
40+
new Run(
41+
new Text(AddFigureCaption(context).ToString(CultureInfo.InvariantCulture)))
42+
)
43+
{ Instruction = " SEQ Figure \\* ARABIC " }
44+
};
45+
46+
47+
if (!childElements.Any())
48+
{
49+
return
50+
[new Paragraph(figNumRef)
51+
{
52+
ParagraphProperties = new ParagraphProperties
53+
{
54+
ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle),
55+
KeepNext = DetermineKeepNext(node),
56+
}
57+
}];
58+
}
59+
60+
//Add the figure number references to the start of the first paragraph.
61+
if(childElements.FirstOrDefault() is Paragraph p)
62+
{
63+
var properties = p.GetFirstChild<ParagraphProperties>();
64+
p.InsertAfter(new Run(
65+
new Text(" ") { Space = SpaceProcessingModeValues.Preserve }
66+
), properties);
67+
p.InsertAfter(figNumRef[1], properties);
68+
p.InsertAfter(figNumRef[0], properties);
69+
}
70+
else
71+
{
72+
//The first child of the figure caption is a table or something. Just prepend a new paragraph with the figure number reference.
73+
childElements =
74+
[
75+
new Paragraph(figNumRef),
76+
..childElements
77+
];
78+
}
79+
80+
foreach (var paragraph in childElements.OfType<Paragraph>())
81+
{
82+
paragraph.ParagraphProperties ??= new ParagraphProperties();
83+
paragraph.ParagraphProperties.ParagraphStyleId ??= context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.CaptionStyle);
84+
//Keep caption paragraphs together.
85+
paragraph.ParagraphProperties.KeepNext = new KeepNext();
86+
}
87+
88+
if(childElements.OfType<Paragraph>().LastOrDefault() is Paragraph lastPara)
89+
{
90+
lastPara.ParagraphProperties!.KeepNext = DetermineKeepNext(node);
91+
}
92+
93+
return childElements;
94+
}
95+
96+
/// <summary>
97+
/// Add a new figure caption to the document.
98+
/// </summary>
99+
/// <returns>Returns the id of the new figure caption.</returns>
100+
private static int AddFigureCaption(ParsingContext context)
101+
{
102+
var figCaptionRef = context.Properties<int?>("figCaptionRef");
103+
if (!figCaptionRef.HasValue)
104+
{
105+
figCaptionRef = 0;
106+
foreach (var p in context.MainPart.Document.Descendants<SimpleField>())
107+
{
108+
if (p.Instruction == " SEQ Figure \\* ARABIC ")
109+
figCaptionRef++;
110+
}
111+
}
112+
figCaptionRef++;
113+
114+
context.Properties("figCaptionRef", figCaptionRef);
115+
return figCaptionRef.Value;
116+
}
117+
118+
/// <summary>
119+
/// Determines whether the KeepNext property should apply this this caption.
120+
/// </summary>
121+
/// <param name="node"></param>
122+
/// <returns>A new <see cref="KeepNext"/> or null./></returns>
123+
private static KeepNext? DetermineKeepNext(IHtmlElement node)
124+
{
125+
// A caption at the end of a figure will have no next sibling.
126+
if(node.NextElementSibling is null)
127+
{
128+
return null;
129+
}
130+
return new();
131+
}
132+
}

0 commit comments

Comments
 (0)