Skip to content

Commit 977ea6e

Browse files
committed
Change to AngleSharp
1 parent a3a1e91 commit 977ea6e

File tree

4 files changed

+80
-53
lines changed

4 files changed

+80
-53
lines changed

Functions.cs

+66-50
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Linq;
34
using System.Net;
45
using System.Net.Http;
56
using System.Text;
67
using System.Threading.Tasks;
78
using System.Xml;
9+
using AngleSharp;
10+
using AngleSharp.Dom;
11+
using AngleSharp.XPath;
812
using ExcelDna.Integration;
913
using ExcelDna.Registration;
10-
using HtmlAgilityPack;
1114

1215
namespace ImportFunctions
1316
{
1417
public static class Functions
1518
{
16-
// We will be using the single HttpClient from multiple threads,
17-
// which is OK as long as we're not changing the default request headers.
18-
static readonly HttpClient _httpClient;
19+
//// We will be using the single HttpClient from multiple threads,
20+
//// which is OK as long as we're not changing the default request headers.
21+
//static readonly HttpClient _httpClient;
1922

2023
static Functions()
2124
{
22-
_httpClient = new HttpClient();
25+
// _httpClient = new HttpClient();
2326
ServicePointManager.SecurityProtocol =
2427
SecurityProtocolType.Tls |
2528
SecurityProtocolType.Tls11 |
@@ -44,12 +47,22 @@ public static async Task<object> ImportXml(string url, string xpathQuery)
4447

4548
try
4649
{
47-
var response = await _httpClient.GetStringAsync(url);
48-
var doc = new HtmlDocument();
49-
doc.LoadHtml(response);
50+
IConfiguration config = Configuration.Default.WithDefaultLoader();
51+
IBrowsingContext context = BrowsingContext.New(config);
52+
IDocument document = await context.OpenAsync(url);
5053

51-
var node = doc.DocumentNode.SelectSingleNode(xpathQuery);
52-
return node?.InnerText ?? "Error: No data found for the given XPath query";
54+
var nodes = document.Body.SelectNodes(xpathQuery);
55+
56+
if (nodes == null || nodes.Count == 0)
57+
return "Error: No data found for the given XPath query";
58+
59+
// return an object[] array with a single column containing the InnterText of the nodes
60+
var resultArray = new object[nodes.Count, 1];
61+
for (int i = 0; i < nodes.Count; i++)
62+
{
63+
resultArray[i, 0] = nodes[i].TextContent;
64+
}
65+
return resultArray;
5366
}
5467
catch (HttpRequestException rex)
5568
{
@@ -88,38 +101,17 @@ public static async Task<object> ImportHtml(
88101

89102
try
90103
{
91-
var response = await _httpClient.GetStringAsync(url);
92-
var doc = new HtmlDocument();
93-
doc.LoadHtml(response);
104+
IConfiguration config = Configuration.Default.WithDefaultLoader();
105+
IBrowsingContext context = BrowsingContext.New(config);
106+
IDocument document = await context.OpenAsync(url);
94107

108+
object result;
95109
if (dataType == "table")
96-
return ExtractTable(doc, index);
110+
result = ExtractTable(document, index);
97111
else
98-
return ExtractList(doc, index);
99-
}
100-
catch (HttpRequestException rex)
101-
{
102-
return $"Error: Unable to fetch data from the URL - {rex.Message}";
103-
}
104-
catch (Exception ex)
105-
{
106-
return $"Error: {ex.Message}";
107-
}
108-
}
112+
result = ExtractList(document, index);
109113

110-
[ExcelFunction(Description = "Imports data from a given URL")]
111-
public static async Task<object> HttpGet(string url)
112-
{
113-
if (string.IsNullOrWhiteSpace(url))
114-
{
115-
return "Error: URL is required";
116-
// return ExcelError.ExcelErrorValue;
117-
}
118-
119-
try
120-
{
121-
var response = await _httpClient.GetStringAsync(url);
122-
return response;
114+
return result;
123115
}
124116
catch (HttpRequestException rex)
125117
{
@@ -131,21 +123,45 @@ public static async Task<object> HttpGet(string url)
131123
}
132124
}
133125

134-
static object ExtractTable(HtmlDocument doc, int indexOneBased)
126+
//[ExcelFunction(Description = "Imports data from a given URL")]
127+
//public static async Task<object> HttpGet(string url)
128+
//{
129+
// if (string.IsNullOrWhiteSpace(url))
130+
// {
131+
// return "Error: URL is required";
132+
// // return ExcelError.ExcelErrorValue;
133+
// }
134+
135+
// try
136+
// {
137+
// var response = await _httpClient.GetStringAsync(url);
138+
// return response;
139+
// }
140+
// catch (HttpRequestException rex)
141+
// {
142+
// return $"Error: Unable to fetch data from the URL - {rex.Message}";
143+
// }
144+
// catch (Exception ex)
145+
// {
146+
// return $"Error: {ex.Message}";
147+
// }
148+
//}
149+
150+
static object ExtractTable(IDocument document, int indexOneBased)
135151
{
136-
var tables = doc.DocumentNode.SelectNodes("//table");
152+
var tables = document.Body.SelectNodes("//table");
137153
if (tables == null || tables.Count < indexOneBased)
138154
return "Error: Table not found";
139155

140-
var table = tables[indexOneBased - 1];
156+
var table = (IElement)tables[indexOneBased - 1];
141157

142158
var results = new List<List<string>>();
143-
foreach (var row in table.SelectNodes(".//tr"))
159+
foreach (var row in table.SelectNodes(".//tr").Cast<IElement>())
144160
{
145161
var rowResult = new List<string>();
146-
foreach (var cell in row.SelectNodes(".//th|.//td"))
162+
foreach (var cell in row.SelectNodes(".//th|.//td").Cast<IElement>())
147163
{
148-
rowResult.Add(cell.InnerText.Trim());
164+
rowResult.Add(cell.TextContent);
149165
}
150166
results.Add(rowResult);
151167
}
@@ -162,21 +178,21 @@ static object ExtractTable(HtmlDocument doc, int indexOneBased)
162178
resultArray[i, j] = results[i][j];
163179
}
164180
}
165-
return results;
181+
return resultArray;
166182
}
167183

168-
static object ExtractList(HtmlDocument doc, int indexOneBased)
184+
static object ExtractList(IDocument document, int indexOneBased)
169185
{
170-
var lists = doc.DocumentNode.SelectNodes("//ul | //ol");
186+
var lists = document.Body.SelectNodes("//ul | //ol");
171187
if (lists == null || lists.Count < indexOneBased)
172188
return "Error: List not found";
173189

174-
var list = lists[indexOneBased-1];
190+
var list = (IElement)lists[indexOneBased - 1];
175191

176192
var results = new List<string>();
177193
foreach (var item in list.SelectNodes(".//li"))
178194
{
179-
results.Add(item.InnerText.Trim());
195+
results.Add(item.TextContent);
180196
}
181197

182198
// Convert results to a 2D object array with a single column
@@ -186,7 +202,7 @@ static object ExtractList(HtmlDocument doc, int indexOneBased)
186202
resultArray[i, 0] = results[i];
187203
}
188204

189-
return results;
205+
return resultArray;
190206
}
191207
}
192208
}

ImportFunctions-Example.xlsx

985 Bytes
Binary file not shown.

ImportFunctions.csproj

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
44
<TargetFramework>net48</TargetFramework>
@@ -13,11 +13,22 @@
1313
<PackageReference Include="ExcelDna.AddIn" Version="1.7.0" />
1414
<PackageReference Include="ExcelDna.IntelliSense" Version="1.7.0" />
1515
<PackageReference Include="ExcelDna.Registration" Version="1.7.0" />
16-
<PackageReference Include="HtmlAgilityPack" Version="1.11.55" />
16+
<PackageReference Include="AngleSharp" Version="1.0.7" />
17+
<PackageReference Include="AngleSharp.XPath" Version="2.0.3" />
1718
</ItemGroup>
1819

1920
<ItemGroup>
2021
<Reference Include="System.Net.Http" />
2122
</ItemGroup>
23+
24+
<!-- For .NET Framework, add all the .dll files in the output dir to the packing list -->
25+
<Target Name="PackedReferences" AfterTargets="AfterBuild" BeforeTargets="ExcelDnaBuild">
26+
<ItemGroup>
27+
<References Include="$(OutDir)*.dll" Exclude="$(OutDir)$(TargetFileName)"/>
28+
</ItemGroup>
29+
<PropertyGroup>
30+
<ExcelAddInInclude>@(References)</ExcelAddInInclude>
31+
</PropertyGroup>
32+
</Target>
2233

2334
</Project>

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ The initial functions are:
77
* IMPORTXML
88
* IMPORTHTML
99

10-
The add-in is developed in C# based on the Excel-DNA library, and uses the HtmlAgilityPack as a helper.
10+
The add-in is developed in C# based on the Excel-DNA library, and uses [AngleSharp](https://github.com/AngleSharp/AngleSharp) as the processor.
1111

1212
The add-in targets .NET Framework 4.8 and Excel 2007 or later (Windows only).
1313

0 commit comments

Comments
 (0)