Skip to content

Commit a3a1e91

Browse files
committed
Tweak ImportHtml ExtractX functions
1 parent c2c20e3 commit a3a1e91

File tree

2 files changed

+62
-22
lines changed

2 files changed

+62
-22
lines changed

Functions.cs

+62-18
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.Net;
34
using System.Net.Http;
45
using System.Text;
@@ -70,7 +71,7 @@ public static async Task<object> ImportHtml(
7071
string url,
7172
[ExcelArgument(Description = "Type of data to import. Accepts either 'table' for HTML tables or 'list' for HTML lists (ul/ol).")]
7273
string dataType,
73-
[ExcelArgument(Description = "Zero-based index of the table or list to import from the HTML page. For example, 0 for the first table/list, 1 for the second, and so on.")]
74+
[ExcelArgument(Description = "One-based index of the table or list to import from the HTML page. For example, 1 for the first table/list, 2 for the second, and so on.")]
7475
int index)
7576
{
7677
if (string.IsNullOrWhiteSpace(url))
@@ -106,43 +107,86 @@ public static async Task<object> ImportHtml(
106107
}
107108
}
108109

109-
static object ExtractTable(HtmlDocument doc, int index)
110+
[ExcelFunction(Description = "Imports data from a given URL")]
111+
public static async Task<object> HttpGet(string url)
112+
{
113+
if (string.IsNullOrWhiteSpace(url))
114+
{
115+
return "Error: URL is required";
116+
// return ExcelError.ExcelErrorValue;
117+
}
118+
119+
try
120+
{
121+
var response = await _httpClient.GetStringAsync(url);
122+
return response;
123+
}
124+
catch (HttpRequestException rex)
125+
{
126+
return $"Error: Unable to fetch data from the URL - {rex.Message}";
127+
}
128+
catch (Exception ex)
129+
{
130+
return $"Error: {ex.Message}";
131+
}
132+
}
133+
134+
static object ExtractTable(HtmlDocument doc, int indexOneBased)
110135
{
111136
var tables = doc.DocumentNode.SelectNodes("//table");
112-
if (tables == null || tables.Count <= index)
137+
if (tables == null || tables.Count < indexOneBased)
113138
return "Error: Table not found";
114139

115-
var table = tables[index];
116-
var sb = new StringBuilder();
140+
var table = tables[indexOneBased - 1];
117141

118-
foreach (var row in table.SelectNodes("tr"))
142+
var results = new List<List<string>>();
143+
foreach (var row in table.SelectNodes(".//tr"))
119144
{
120-
foreach (var cell in row.SelectNodes("th|td"))
145+
var rowResult = new List<string>();
146+
foreach (var cell in row.SelectNodes(".//th|.//td"))
121147
{
122-
sb.Append(cell.InnerText.Trim());
123-
sb.Append("\t"); // Tab-separated values
148+
rowResult.Add(cell.InnerText.Trim());
124149
}
125-
sb.AppendLine(); // New line at the end of each row
150+
results.Add(rowResult);
126151
}
127152

128-
return sb.ToString();
153+
if (results.Count == 0 || results[0].Count == 0)
154+
return "Error: No data found in the table";
155+
156+
// Convert results to a 2D object array
157+
var resultArray = new object[results.Count, results[0].Count];
158+
for (int i = 0; i < results.Count; i++)
159+
{
160+
for (int j = 0; j < results[i].Count; j++)
161+
{
162+
resultArray[i, j] = results[i][j];
163+
}
164+
}
165+
return results;
129166
}
130167

131-
static object ExtractList(HtmlDocument doc, int index)
168+
static object ExtractList(HtmlDocument doc, int indexOneBased)
132169
{
133170
var lists = doc.DocumentNode.SelectNodes("//ul | //ol");
134-
if (lists == null || lists.Count <= index)
171+
if (lists == null || lists.Count < indexOneBased)
135172
return "Error: List not found";
136173

137-
var list = lists[index];
138-
var sb = new StringBuilder();
174+
var list = lists[indexOneBased-1];
175+
176+
var results = new List<string>();
177+
foreach (var item in list.SelectNodes(".//li"))
178+
{
179+
results.Add(item.InnerText.Trim());
180+
}
139181

140-
foreach (var item in list.SelectNodes("li"))
182+
// Convert results to a 2D object array with a single column
183+
var resultArray = new object[results.Count, 1];
184+
for (int i = 0; i < results.Count; i++)
141185
{
142-
sb.AppendLine(item.InnerText.Trim());
186+
resultArray[i, 0] = results[i];
143187
}
144188

145-
return sb.ToString();
189+
return results;
146190
}
147191
}
148192
}

ImportFunctions.csproj

-4
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,7 @@
33
<PropertyGroup>
44
<TargetFramework>net48</TargetFramework>
55

6-
<AssemblyTitle>Excel-DNA ImportFunctions Add-In</AssemblyTitle>
7-
<Product>ExcelDna.ImportFunctions</Product>
8-
96
<ExcelAddInExplicitRegistration>true</ExcelAddInExplicitRegistration>
10-
117
<ExcelDnaPackXllSuffix></ExcelDnaPackXllSuffix>
128
<ExcelDnaPack32BitXllName>ExcelDna.ImportFunctions32</ExcelDnaPack32BitXllName>
139
<ExcelDnaPack64BitXllName>ExcelDna.ImportFunctions64</ExcelDnaPack64BitXllName>

0 commit comments

Comments
 (0)