Skip to content

Commit 759357c

Browse files
committed
Improved parsing
1 parent b8bdecf commit 759357c

File tree

2 files changed

+22
-12
lines changed

2 files changed

+22
-12
lines changed

Models/Profile.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,11 @@ public class Profile
3939

4040
[XmlElement("Performance")]
4141
public Performance Performance { get; set; }
42+
43+
[XmlAttribute("lang")]
44+
public string Language { get; set; }
45+
46+
[XmlAttribute("file")]
47+
public string File { get; set; }
4248
}
4349
}

TextkernelParser.cs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
using System.Net.Http.Headers;
1414
using System.ServiceModel;
1515
using System.Text;
16+
using System.Threading;
1617
using System.Threading.Tasks;
18+
using System.Xml.Linq;
1719
using System.Xml.Serialization;
1820

1921
class TextkernelParser : ITextkernelParser
@@ -23,7 +25,6 @@ class TextkernelParser : ITextkernelParser
2325
string account;
2426
string username;
2527
string password;
26-
string environment;
2728

2829
entry[] entries = new entry[0];
2930

@@ -39,21 +40,17 @@ public TextkernelParser(ILoggerFactory logger, string account, string username,
3940

4041
this.serializer.UnknownElement += this.UnknownElement;
4142
this.serializer.UnknownAttribute += this.UnknownAttribute;
42-
this.serializer.UnknownNode += this.UnknownNode;
4343
this.serializer.UnreferencedObject += this.UnreferencedObject;
4444
}
4545

46-
void UnreferencedObject(object sender, UnreferencedObjectEventArgs e) =>
46+
void UnreferencedObject(object sender, UnreferencedObjectEventArgs e) =>
4747
this.logger.LogWarning("Unreferenced Object: {ID} {Object}", e.UnreferencedId, e.UnreferencedObject);
4848

49-
void UnknownNode(object sender, XmlNodeEventArgs e) =>
50-
this.logger.LogWarning("Unknown Node - n:{LineNumber}/p:{LinePosition}, {Node}", e.LineNumber, e.LinePosition, e.Name);
51-
5249
void UnknownAttribute(object sender, XmlAttributeEventArgs e) =>
53-
this.logger.LogWarning("Unknown Attribute - n:{LineNumber}/p:{LinePosition}, {Attr}", e.LineNumber, e.LinePosition, e.Attr);
50+
this.logger.LogWarning("Unknown Attribute - n:{LineNumber}/p:{LinePosition}, {Attr}", e.LineNumber, e.LinePosition, e.Attr.Name);
5451

5552
void UnknownElement(object sender, XmlElementEventArgs e) =>
56-
this.logger.LogWarning("Unknown Element - n:{LineNumber}/p:{LinePosition}, {Element}", e.LineNumber, e.LinePosition, e.Element);
53+
this.logger.LogWarning("Unknown Element - n:{LineNumber}/p:{LinePosition}, {Element}", e.LineNumber, e.LinePosition, e.Element.Name);
5754

5855
async Task<Profile> ITextkernelParser.Parse(byte[] file)
5956
{
@@ -66,10 +63,10 @@ async Task<Profile> ITextkernelParser.Parse(byte[] file)
6663
var result = await extractService.extractAdvancedAsync(this.account, this.username, this.password, this.entries, null, file, null, null, null);
6764
sw.Stop();
6865
string rawResult = result.@return;
69-
this.logger.LogInformation("Textkernel Extract Response {Chars}chars in {Duration}ms", rawResult.Length, sw.ElapsedMilliseconds);
70-
66+
this.logger.LogInformation("Textkernel Extract Response {Chars}chars in {ServiceDuration}ms", rawResult.Length, sw.ElapsedMilliseconds);
7167

7268
sw.Restart();
69+
7370
Profile p;
7471

7572
using (var stream = new MemoryStream())
@@ -79,10 +76,17 @@ async Task<Profile> ITextkernelParser.Parse(byte[] file)
7976
writer.Flush();
8077
stream.Position = 0;
8178

82-
p = this.serializer.Deserialize(stream) as Profile;
79+
// Textkernel includes all empty nodes, which results in lots of serialised empty strings,
80+
// strip empty nodes out first
81+
var cleaner = await XDocument.LoadAsync(stream, LoadOptions.None, CancellationToken.None);
82+
cleaner.Descendants().Where(e => string.IsNullOrEmpty(e.Value)).Remove();
83+
84+
// Create a reader for the cleaned XML and deserialise
85+
using (var reader = cleaner.CreateReader())
86+
p = this.serializer.Deserialize(reader) as Profile;
8387
}
8488
sw.Stop();
85-
this.logger.LogInformation("Textkernel Parsed: {CurrentJob} in {Duration}ms", p?.Summary?.CurrentJob, sw.ElapsedMilliseconds);
89+
this.logger.LogInformation("Textkernel Parsed: {CurrentJob} in {ParseDuration}ms", p?.Summary?.CurrentJob, sw.ElapsedMilliseconds);
8690

8791
return p;
8892
}

0 commit comments

Comments
 (0)