1
1
using System ;
2
2
using System . Collections . Generic ;
3
+ using System . Linq ;
3
4
using System . Net ;
4
5
using System . Net . Http ;
5
6
using System . Text ;
6
7
using System . Threading . Tasks ;
7
8
using System . Xml ;
9
+ using AngleSharp ;
10
+ using AngleSharp . Dom ;
11
+ using AngleSharp . XPath ;
8
12
using ExcelDna . Integration ;
9
13
using ExcelDna . Registration ;
10
- using HtmlAgilityPack ;
11
14
12
15
namespace ImportFunctions
13
16
{
14
17
public static class Functions
15
18
{
16
- // We will be using the single HttpClient from multiple threads,
17
- // which is OK as long as we're not changing the default request headers.
18
- static readonly HttpClient _httpClient ;
19
+ //// We will be using the single HttpClient from multiple threads,
20
+ //// which is OK as long as we're not changing the default request headers.
21
+ // static readonly HttpClient _httpClient;
19
22
20
23
static Functions ( )
21
24
{
22
- _httpClient = new HttpClient ( ) ;
25
+ // _httpClient = new HttpClient();
23
26
ServicePointManager . SecurityProtocol =
24
27
SecurityProtocolType . Tls |
25
28
SecurityProtocolType . Tls11 |
@@ -44,12 +47,22 @@ public static async Task<object> ImportXml(string url, string xpathQuery)
44
47
45
48
try
46
49
{
47
- var response = await _httpClient . GetStringAsync ( url ) ;
48
- var doc = new HtmlDocument ( ) ;
49
- doc . LoadHtml ( response ) ;
50
+ IConfiguration config = Configuration . Default . WithDefaultLoader ( ) ;
51
+ IBrowsingContext context = BrowsingContext . New ( config ) ;
52
+ IDocument document = await context . OpenAsync ( url ) ;
50
53
51
- var node = doc . DocumentNode . SelectSingleNode ( xpathQuery ) ;
52
- return node ? . InnerText ?? "Error: No data found for the given XPath query" ;
54
+ var nodes = document . Body . SelectNodes ( xpathQuery ) ;
55
+
56
+ if ( nodes == null || nodes . Count == 0 )
57
+ return "Error: No data found for the given XPath query" ;
58
+
59
+ // return an object[] array with a single column containing the InnterText of the nodes
60
+ var resultArray = new object [ nodes . Count , 1 ] ;
61
+ for ( int i = 0 ; i < nodes . Count ; i ++ )
62
+ {
63
+ resultArray [ i , 0 ] = nodes [ i ] . TextContent ;
64
+ }
65
+ return resultArray ;
53
66
}
54
67
catch ( HttpRequestException rex )
55
68
{
@@ -88,38 +101,17 @@ public static async Task<object> ImportHtml(
88
101
89
102
try
90
103
{
91
- var response = await _httpClient . GetStringAsync ( url ) ;
92
- var doc = new HtmlDocument ( ) ;
93
- doc . LoadHtml ( response ) ;
104
+ IConfiguration config = Configuration . Default . WithDefaultLoader ( ) ;
105
+ IBrowsingContext context = BrowsingContext . New ( config ) ;
106
+ IDocument document = await context . OpenAsync ( url ) ;
94
107
108
+ object result ;
95
109
if ( dataType == "table" )
96
- return ExtractTable ( doc , index ) ;
110
+ result = ExtractTable ( document , index ) ;
97
111
else
98
- return ExtractList ( doc , index ) ;
99
- }
100
- catch ( HttpRequestException rex )
101
- {
102
- return $ "Error: Unable to fetch data from the URL - { rex . Message } ";
103
- }
104
- catch ( Exception ex )
105
- {
106
- return $ "Error: { ex . Message } ";
107
- }
108
- }
112
+ result = ExtractList ( document , index ) ;
109
113
110
- [ ExcelFunction ( Description = "Imports data from a given URL" ) ]
111
- public static async Task < object > HttpGet ( string url )
112
- {
113
- if ( string . IsNullOrWhiteSpace ( url ) )
114
- {
115
- return "Error: URL is required" ;
116
- // return ExcelError.ExcelErrorValue;
117
- }
118
-
119
- try
120
- {
121
- var response = await _httpClient . GetStringAsync ( url ) ;
122
- return response ;
114
+ return result ;
123
115
}
124
116
catch ( HttpRequestException rex )
125
117
{
@@ -131,21 +123,45 @@ public static async Task<object> HttpGet(string url)
131
123
}
132
124
}
133
125
134
- static object ExtractTable ( HtmlDocument doc , int indexOneBased )
126
+ //[ExcelFunction(Description = "Imports data from a given URL")]
127
+ //public static async Task<object> HttpGet(string url)
128
+ //{
129
+ // if (string.IsNullOrWhiteSpace(url))
130
+ // {
131
+ // return "Error: URL is required";
132
+ // // return ExcelError.ExcelErrorValue;
133
+ // }
134
+
135
+ // try
136
+ // {
137
+ // var response = await _httpClient.GetStringAsync(url);
138
+ // return response;
139
+ // }
140
+ // catch (HttpRequestException rex)
141
+ // {
142
+ // return $"Error: Unable to fetch data from the URL - {rex.Message}";
143
+ // }
144
+ // catch (Exception ex)
145
+ // {
146
+ // return $"Error: {ex.Message}";
147
+ // }
148
+ //}
149
+
150
+ static object ExtractTable ( IDocument document , int indexOneBased )
135
151
{
136
- var tables = doc . DocumentNode . SelectNodes ( "//table" ) ;
152
+ var tables = document . Body . SelectNodes ( "//table" ) ;
137
153
if ( tables == null || tables . Count < indexOneBased )
138
154
return "Error: Table not found" ;
139
155
140
- var table = tables [ indexOneBased - 1 ] ;
156
+ var table = ( IElement ) tables [ indexOneBased - 1 ] ;
141
157
142
158
var results = new List < List < string > > ( ) ;
143
- foreach ( var row in table . SelectNodes ( ".//tr" ) )
159
+ foreach ( var row in table . SelectNodes ( ".//tr" ) . Cast < IElement > ( ) )
144
160
{
145
161
var rowResult = new List < string > ( ) ;
146
- foreach ( var cell in row . SelectNodes ( ".//th|.//td" ) )
162
+ foreach ( var cell in row . SelectNodes ( ".//th|.//td" ) . Cast < IElement > ( ) )
147
163
{
148
- rowResult . Add ( cell . InnerText . Trim ( ) ) ;
164
+ rowResult . Add ( cell . TextContent ) ;
149
165
}
150
166
results . Add ( rowResult ) ;
151
167
}
@@ -162,21 +178,21 @@ static object ExtractTable(HtmlDocument doc, int indexOneBased)
162
178
resultArray [ i , j ] = results [ i ] [ j ] ;
163
179
}
164
180
}
165
- return results ;
181
+ return resultArray ;
166
182
}
167
183
168
- static object ExtractList ( HtmlDocument doc , int indexOneBased )
184
+ static object ExtractList ( IDocument document , int indexOneBased )
169
185
{
170
- var lists = doc . DocumentNode . SelectNodes ( "//ul | //ol" ) ;
186
+ var lists = document . Body . SelectNodes ( "//ul | //ol" ) ;
171
187
if ( lists == null || lists . Count < indexOneBased )
172
188
return "Error: List not found" ;
173
189
174
- var list = lists [ indexOneBased - 1 ] ;
190
+ var list = ( IElement ) lists [ indexOneBased - 1 ] ;
175
191
176
192
var results = new List < string > ( ) ;
177
193
foreach ( var item in list . SelectNodes ( ".//li" ) )
178
194
{
179
- results . Add ( item . InnerText . Trim ( ) ) ;
195
+ results . Add ( item . TextContent ) ;
180
196
}
181
197
182
198
// Convert results to a 2D object array with a single column
@@ -186,7 +202,7 @@ static object ExtractList(HtmlDocument doc, int indexOneBased)
186
202
resultArray [ i , 0 ] = results [ i ] ;
187
203
}
188
204
189
- return results ;
205
+ return resultArray ;
190
206
}
191
207
}
192
208
}
0 commit comments