13
13
using System . Net . Http . Headers ;
14
14
using System . ServiceModel ;
15
15
using System . Text ;
16
+ using System . Threading ;
16
17
using System . Threading . Tasks ;
18
+ using System . Xml . Linq ;
17
19
using System . Xml . Serialization ;
18
20
19
21
class TextkernelParser : ITextkernelParser
@@ -23,7 +25,6 @@ class TextkernelParser : ITextkernelParser
23
25
string account ;
24
26
string username ;
25
27
string password ;
26
- string environment ;
27
28
28
29
entry [ ] entries = new entry [ 0 ] ;
29
30
@@ -39,21 +40,17 @@ public TextkernelParser(ILoggerFactory logger, string account, string username,
39
40
40
41
this . serializer . UnknownElement += this . UnknownElement ;
41
42
this . serializer . UnknownAttribute += this . UnknownAttribute ;
42
- this . serializer . UnknownNode += this . UnknownNode ;
43
43
this . serializer . UnreferencedObject += this . UnreferencedObject ;
44
44
}
45
45
46
- void UnreferencedObject ( object sender , UnreferencedObjectEventArgs e ) =>
46
+ void UnreferencedObject ( object sender , UnreferencedObjectEventArgs e ) =>
47
47
this . logger . LogWarning ( "Unreferenced Object: {ID} {Object}" , e . UnreferencedId , e . UnreferencedObject ) ;
48
48
49
- void UnknownNode ( object sender , XmlNodeEventArgs e ) =>
50
- this . logger . LogWarning ( "Unknown Node - n:{LineNumber}/p:{LinePosition}, {Node}" , e . LineNumber , e . LinePosition , e . Name ) ;
51
-
52
49
void UnknownAttribute ( object sender , XmlAttributeEventArgs e ) =>
53
- this . logger . LogWarning ( "Unknown Attribute - n:{LineNumber}/p:{LinePosition}, {Attr}" , e . LineNumber , e . LinePosition , e . Attr ) ;
50
+ this . logger . LogWarning ( "Unknown Attribute - n:{LineNumber}/p:{LinePosition}, {Attr}" , e . LineNumber , e . LinePosition , e . Attr . Name ) ;
54
51
55
52
void UnknownElement ( object sender , XmlElementEventArgs e ) =>
56
- this . logger . LogWarning ( "Unknown Element - n:{LineNumber}/p:{LinePosition}, {Element}" , e . LineNumber , e . LinePosition , e . Element ) ;
53
+ this . logger . LogWarning ( "Unknown Element - n:{LineNumber}/p:{LinePosition}, {Element}" , e . LineNumber , e . LinePosition , e . Element . Name ) ;
57
54
58
55
async Task < Profile > ITextkernelParser . Parse ( byte [ ] file )
59
56
{
@@ -66,10 +63,10 @@ async Task<Profile> ITextkernelParser.Parse(byte[] file)
66
63
var result = await extractService . extractAdvancedAsync ( this . account , this . username , this . password , this . entries , null , file , null , null , null ) ;
67
64
sw . Stop ( ) ;
68
65
string rawResult = result . @return ;
69
- this . logger . LogInformation ( "Textkernel Extract Response {Chars}chars in {Duration}ms" , rawResult . Length , sw . ElapsedMilliseconds ) ;
70
-
66
+ this . logger . LogInformation ( "Textkernel Extract Response {Chars}chars in {ServiceDuration}ms" , rawResult . Length , sw . ElapsedMilliseconds ) ;
71
67
72
68
sw . Restart ( ) ;
69
+
73
70
Profile p ;
74
71
75
72
using ( var stream = new MemoryStream ( ) )
@@ -79,10 +76,17 @@ async Task<Profile> ITextkernelParser.Parse(byte[] file)
79
76
writer . Flush ( ) ;
80
77
stream . Position = 0 ;
81
78
82
- p = this . serializer . Deserialize ( stream ) as Profile ;
79
+ // Textkernel includes all empty nodes, which results in lots of serialised empty strings,
80
+ // strip empty nodes out first
81
+ var cleaner = await XDocument . LoadAsync ( stream , LoadOptions . None , CancellationToken . None ) ;
82
+ cleaner . Descendants ( ) . Where ( e => string . IsNullOrEmpty ( e . Value ) ) . Remove ( ) ;
83
+
84
+ // Create a reader for the cleaned XML and deserialise
85
+ using ( var reader = cleaner . CreateReader ( ) )
86
+ p = this . serializer . Deserialize ( reader ) as Profile ;
83
87
}
84
88
sw . Stop ( ) ;
85
- this . logger . LogInformation ( "Textkernel Parsed: {CurrentJob} in {Duration }ms" , p ? . Summary ? . CurrentJob , sw . ElapsedMilliseconds ) ;
89
+ this . logger . LogInformation ( "Textkernel Parsed: {CurrentJob} in {ParseDuration }ms" , p ? . Summary ? . CurrentJob , sw . ElapsedMilliseconds ) ;
86
90
87
91
return p ;
88
92
}
0 commit comments