1
- /* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2
- *
3
- * This source is subject to the Microsoft Permissive License.
4
- * Please see the License.txt file for more information.
5
- * All other rights reserved.
6
- *
7
- * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8
- * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10
- * PARTICULAR PURPOSE.
11
- */
12
- using System . Collections . Generic ;
13
- using System . Globalization ;
14
- using System . Linq ;
15
- using AngleSharp . Html . Dom ;
16
- using DocumentFormat . OpenXml ;
17
- using DocumentFormat . OpenXml . Wordprocessing ;
18
-
19
- namespace HtmlToOpenXml . Expressions ;
20
-
21
- /// <summary>
22
- /// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
23
- /// </summary>
24
- sealed class FigureCaptionExpression ( IHtmlElement node ) : PhrasingElementExpression ( node )
25
- {
26
-
27
- /// <inheritdoc/>
28
- public override IEnumerable < OpenXmlElement > Interpret ( ParsingContext context )
29
- {
30
- ComposeStyles ( context ) ;
31
- var childElements = Interpret ( context . CreateChild ( this ) , node . ChildNodes ) ;
32
- if ( ! childElements . Any ( ) )
33
- return [ ] ;
34
-
35
- var p = new Paragraph (
36
- new Run (
37
- new Text ( "Figure " ) { Space = SpaceProcessingModeValues . Preserve }
38
- ) ,
39
- new SimpleField (
40
- new Run (
41
- new Text ( AddFigureCaption ( context ) . ToString ( CultureInfo . InvariantCulture ) ) )
42
- ) { Instruction = " SEQ Figure \\ * ARABIC " }
43
- ) {
44
- ParagraphProperties = new ParagraphProperties {
45
- ParagraphStyleId = context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ,
46
- KeepNext = new KeepNext ( )
47
- }
48
- } ;
49
-
50
- if ( childElements . First ( ) is Run run ) // any caption?
51
- {
52
- Text ? t = run . GetFirstChild < Text > ( ) ;
53
- if ( t != null )
54
- t . Text = " " + t . InnerText ; // append a space after the numero of the picture
55
- }
56
-
57
- return [ p ] ;
58
- }
59
-
60
- /// <summary>
61
- /// Add a new figure caption to the document.
62
- /// </summary>
63
- /// <returns>Returns the id of the new figure caption.</returns>
64
- private static int AddFigureCaption ( ParsingContext context )
65
- {
66
- var figCaptionRef = context . Properties < int ? > ( "figCaptionRef" ) ;
67
- if ( ! figCaptionRef . HasValue )
68
- {
69
- figCaptionRef = 0 ;
70
- foreach ( var p in context . MainPart . Document . Descendants < SimpleField > ( ) )
71
- {
72
- if ( p . Instruction == " SEQ Figure \\ * ARABIC " )
73
- figCaptionRef ++ ;
74
- }
75
- }
76
- figCaptionRef ++ ;
77
-
78
- context . Properties ( "figCaptionRef" , figCaptionRef ) ;
79
- return figCaptionRef . Value ;
80
- }
81
- }
1
+ /* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2
+ *
3
+ * This source is subject to the Microsoft Permissive License.
4
+ * Please see the License.txt file for more information.
5
+ * All other rights reserved.
6
+ *
7
+ * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8
+ * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10
+ * PARTICULAR PURPOSE.
11
+ */
12
+ using System . Collections . Generic ;
13
+ using System . Globalization ;
14
+ using System . Linq ;
15
+ using AngleSharp . Dom ;
16
+ using AngleSharp . Html . Dom ;
17
+ using DocumentFormat . OpenXml ;
18
+ using DocumentFormat . OpenXml . Wordprocessing ;
19
+
20
+ namespace HtmlToOpenXml . Expressions ;
21
+
22
+ /// <summary>
23
+ /// Process the parsing of a <c>figcaption</c> element, which is used to describe an image.
24
+ /// </summary>
25
+ sealed class FigureCaptionExpression ( IHtmlElement node ) : BlockElementExpression ( node )
26
+ {
27
+
28
+ /// <inheritdoc/>
29
+ public override IEnumerable < OpenXmlElement > Interpret ( ParsingContext context )
30
+ {
31
+ ComposeStyles ( context ) ;
32
+ var childElements = Interpret ( context . CreateChild ( this ) , node . ChildNodes ) ;
33
+
34
+ var figNumRef = new List < OpenXmlElement > ( )
35
+ {
36
+ new Run (
37
+ new Text ( "Figure " ) { Space = SpaceProcessingModeValues . Preserve }
38
+ ) ,
39
+ new SimpleField (
40
+ new Run (
41
+ new Text ( AddFigureCaption ( context ) . ToString ( CultureInfo . InvariantCulture ) ) )
42
+ )
43
+ { Instruction = " SEQ Figure \\ * ARABIC " }
44
+ } ;
45
+
46
+
47
+ if ( ! childElements . Any ( ) )
48
+ {
49
+ return
50
+ [ new Paragraph ( figNumRef )
51
+ {
52
+ ParagraphProperties = new ParagraphProperties
53
+ {
54
+ ParagraphStyleId = context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ,
55
+ KeepNext = DetermineKeepNext ( node ) ,
56
+ }
57
+ } ] ;
58
+ }
59
+
60
+ //Add the figure number references to the start of the first paragraph.
61
+ if ( childElements . FirstOrDefault ( ) is Paragraph p )
62
+ {
63
+ var properties = p . GetFirstChild < ParagraphProperties > ( ) ;
64
+ p . InsertAfter ( new Run (
65
+ new Text ( " " ) { Space = SpaceProcessingModeValues . Preserve }
66
+ ) , properties ) ;
67
+ p . InsertAfter ( figNumRef [ 1 ] , properties ) ;
68
+ p . InsertAfter ( figNumRef [ 0 ] , properties ) ;
69
+ }
70
+ else
71
+ {
72
+ //The first child of the figure caption is a table or something. Just prepend a new paragraph with the figure number reference.
73
+ childElements =
74
+ [
75
+ new Paragraph ( figNumRef ) ,
76
+ ..childElements
77
+ ] ;
78
+ }
79
+
80
+ foreach ( var paragraph in childElements . OfType < Paragraph > ( ) )
81
+ {
82
+ paragraph . ParagraphProperties ??= new ParagraphProperties ( ) ;
83
+ paragraph . ParagraphProperties . ParagraphStyleId ??= context . DocumentStyle . GetParagraphStyle ( context . DocumentStyle . DefaultStyles . CaptionStyle ) ;
84
+ //Keep caption paragraphs together.
85
+ paragraph . ParagraphProperties . KeepNext = new KeepNext ( ) ;
86
+ }
87
+
88
+ if ( childElements . OfType < Paragraph > ( ) . LastOrDefault ( ) is Paragraph lastPara )
89
+ {
90
+ lastPara . ParagraphProperties ! . KeepNext = DetermineKeepNext ( node ) ;
91
+ }
92
+
93
+ return childElements ;
94
+ }
95
+
96
+ /// <summary>
97
+ /// Add a new figure caption to the document.
98
+ /// </summary>
99
+ /// <returns>Returns the id of the new figure caption.</returns>
100
+ private static int AddFigureCaption ( ParsingContext context )
101
+ {
102
+ var figCaptionRef = context . Properties < int ? > ( "figCaptionRef" ) ;
103
+ if ( ! figCaptionRef . HasValue )
104
+ {
105
+ figCaptionRef = 0 ;
106
+ foreach ( var p in context . MainPart . Document . Descendants < SimpleField > ( ) )
107
+ {
108
+ if ( p . Instruction == " SEQ Figure \\ * ARABIC " )
109
+ figCaptionRef ++ ;
110
+ }
111
+ }
112
+ figCaptionRef ++ ;
113
+
114
+ context . Properties ( "figCaptionRef" , figCaptionRef ) ;
115
+ return figCaptionRef . Value ;
116
+ }
117
+
118
+ /// <summary>
119
+ /// Determines whether the KeepNext property should apply this this caption.
120
+ /// </summary>
121
+ /// <param name="node"></param>
122
+ /// <returns>A new <see cref="KeepNext"/> or null./></returns>
123
+ private static KeepNext ? DetermineKeepNext ( IHtmlElement node )
124
+ {
125
+ // A caption at the end of a figure will have no next sibling.
126
+ if ( node . NextElementSibling is null )
127
+ {
128
+ return null ;
129
+ }
130
+ return new ( ) ;
131
+ }
132
+ }
0 commit comments