Skip to content

Commit bc306e5

Browse files
DatabaseLoader sample loading an IDataView from SQL Server localdb (dotnet#617)
* DatabaseLoader sample - Baseline * DatabaseLoader working against localdb attached from DB path file. * Removed unneeded-old project file * Added tbd README.md file * Removed internal Nuget feeds from the nuget.config * Added a shorter name for the localdb database instead of based on the file path * Added Retries logic policy to the Connection String when using Azure SQL Database * Simplified so it doesnt use the ProviderFactory code but simple an SqlClientFactory.Instance * Updated to public Previews * Removed unneeded CsvParser reference * Comments update doe conn-strings
1 parent 6282487 commit bc306e5

File tree

11 files changed

+610
-6
lines changed

11 files changed

+610
-6
lines changed

.gitignore

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,6 @@ UpgradeLog*.XML
246246
UpgradeLog*.htm
247247
ServiceFabricBackup/
248248

249-
# SQL Server files
250-
*.mdf
251-
*.ldf
252-
*.ndf
253-
254249
# Business Intelligence projects
255250
*.rdl.data
256251
*.bim.layout
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 16
4+
VisualStudioVersion = 16.0.28803.452
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DatabaseLoaderConsoleApp", "DatabaseLoaderConsoleApp\DatabaseLoaderConsoleApp.csproj", "{9058A1BB-87A6-4A9A-A3B2-F2C81CB9B9EA}"
7+
EndProject
8+
Global
9+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
10+
Debug|Any CPU = Debug|Any CPU
11+
Release|Any CPU = Release|Any CPU
12+
EndGlobalSection
13+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
14+
{9058A1BB-87A6-4A9A-A3B2-F2C81CB9B9EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15+
{9058A1BB-87A6-4A9A-A3B2-F2C81CB9B9EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
16+
{9058A1BB-87A6-4A9A-A3B2-F2C81CB9B9EA}.Release|Any CPU.ActiveCfg = Release|Any CPU
17+
{9058A1BB-87A6-4A9A-A3B2-F2C81CB9B9EA}.Release|Any CPU.Build.0 = Release|Any CPU
18+
EndGlobalSection
19+
GlobalSection(SolutionProperties) = preSolution
20+
HideSolutionNode = FALSE
21+
EndGlobalSection
22+
GlobalSection(ExtensibilityGlobals) = postSolution
23+
SolutionGuid = {5B3ACA76-DA8A-40E8-A097-B51EF1BB9D12}
24+
EndGlobalSection
25+
EndGlobal

samples/csharp/getting-started/DatabaseLoader/DatabaseLoaderConsoleApp/Common/ConsoleHelper.cs

Lines changed: 293 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace DatabaseLoaderConsoleApp.DataModels
6+
{
7+
public class ClickPrediction
8+
{
9+
public bool PredictedLabel;
10+
11+
public float Score;
12+
}
13+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace DatabaseLoaderConsoleApp.DataModels
6+
{
7+
public class UrlClick
8+
{
9+
public string Label { get; set; }
10+
public string Feat01 { get; set; }
11+
public string Feat02 { get; set; }
12+
public string Feat03 { get; set; }
13+
public string Feat04 { get; set; }
14+
public string Feat05 { get; set; }
15+
public string Feat06 { get; set; }
16+
public string Feat07 { get; set; }
17+
public string Feat08 { get; set; }
18+
public string Feat09 { get; set; }
19+
public string Feat10 { get; set; }
20+
public string Feat11 { get; set; }
21+
public string Feat12 { get; set; }
22+
public string Feat13 { get; set; }
23+
public string Cat14 { get; set; }
24+
public string Cat15 { get; set; }
25+
public string Cat16 { get; set; }
26+
public string Cat17 { get; set; }
27+
public string Cat18 { get; set; }
28+
public string Cat19 { get; set; }
29+
public string Cat20 { get; set; }
30+
public string Cat21 { get; set; }
31+
public string Cat22 { get; set; }
32+
public string Cat23 { get; set; }
33+
public string Cat24 { get; set; }
34+
public string Cat25 { get; set; }
35+
public string Cat26 { get; set; }
36+
public string Cat27 { get; set; }
37+
public string Cat28 { get; set; }
38+
public string Cat29 { get; set; }
39+
public string Cat30 { get; set; }
40+
public string Cat31 { get; set; }
41+
public string Cat32 { get; set; }
42+
public string Cat33 { get; set; }
43+
public string Cat34 { get; set; }
44+
public string Cat35 { get; set; }
45+
public string Cat36 { get; set; }
46+
public string Cat37 { get; set; }
47+
public string Cat38 { get; set; }
48+
public string Cat39 { get; set; }
49+
}
50+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<RestoreSources>
6+
$(RestoreSources);
7+
</RestoreSources>
8+
<TargetFramework>netcoreapp2.1</TargetFramework>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="Microsoft.ML" Version="1.4.0-preview" />
13+
<PackageReference Include="Microsoft.ML.Experimental" Version="0.16.0-preview" />
14+
<PackageReference Include="Microsoft.ML.LightGBM" Version="1.4.0-preview" />
15+
<PackageReference Include="Microsoft.ML.FastTree" Version="1.4.0-preview" />
16+
<PackageReference Include="System.Data.SqlClient" Version="4.6.1" />
17+
</ItemGroup>
18+
19+
<ItemGroup>
20+
<Folder Include="SqlLocalDb\" />
21+
</ItemGroup>
22+
23+
<ItemGroup>
24+
<None Update="SqlLocalDb\Criteo-100k-rows.mdf">
25+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
26+
</None>
27+
<None Update="SqlLocalDb\Criteo-100k-rows_log.ldf">
28+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
29+
</None>
30+
</ItemGroup>
31+
32+
</Project>
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
using Common;
2+
using DatabaseLoaderConsoleApp.DataModels;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Transforms;
6+
using System;
7+
using System.Collections;
8+
using System.Data;
9+
using System.Data.Common;
10+
using System.Data.SqlClient;
11+
using System.Diagnostics;
12+
using System.Drawing;
13+
using System.IO;
14+
using System.Linq;
15+
16+
namespace DatabaseLoaderConsoleApp
17+
{
18+
public class Program
19+
{
20+
public static void Main()
21+
{
22+
var mlContext = new MLContext();
23+
24+
// localdb SQL database connection string using a filepath to attach the database file into localdb
25+
string dbFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "SqlLocalDb", "Criteo-100k-rows.mdf");
26+
string connectionString = $"Data Source = (LocalDB)\\MSSQLLocalDB;AttachDbFilename={dbFilePath};Database=Criteo-100k-rows;Integrated Security = True";
27+
28+
// ConnString Example: localdb SQL database connection string for 'localdb default location' (usually files located at /Users/YourUser/)
29+
//string connectionString = @"Data Source=(localdb)\MSSQLLocalDb;Initial Catalog=YOUR_DATABASE;Integrated Security=True;Pooling=False";
30+
//
31+
// ConnString Example: on-premises SQL Server Database (Integrated security)
32+
//string connectionString = @"Data Source=YOUR_SERVER;Initial Catalog=YOUR_DATABASE;Integrated Security=True;Pooling=False";
33+
//
34+
// ConnString Example: Azure SQL Database connection string
35+
//string connectionString = @"Server=tcp:yourserver.database.windows.net,1433; Initial Catalog = YOUR_DATABASE; Persist Security Info = False; User ID = YOUR_USER; Password = YOUR_PASSWORD; MultipleActiveResultSets = False; Encrypt = True; TrustServerCertificate = False; Connection Timeout = 60; ConnectRetryCount = 5; ConnectRetryInterval = 10;";
36+
37+
string commandText = "SELECT * from URLClicks";
38+
39+
DatabaseLoader loader = mlContext.Data.CreateDatabaseLoader<UrlClick>();
40+
41+
DatabaseSource dbSource = new DatabaseSource(SqlClientFactory.Instance,
42+
connectionString,
43+
commandText);
44+
45+
IDataView dataView = loader.Load(dbSource);
46+
47+
var trainTestData = mlContext.Data.TrainTestSplit(dataView);
48+
49+
//do the transformation in IDataView
50+
//Transform categorical features into binary
51+
var CatogoriesTranformer = mlContext.Transforms.Conversion.ConvertType(nameof(UrlClick.Label), outputKind:Microsoft.ML.Data.DataKind.Boolean).
52+
Append(mlContext.Transforms.Categorical.OneHotEncoding(new[] {
53+
new InputOutputColumnPair("Cat14Encoded", "Cat14"),
54+
new InputOutputColumnPair("Cat15Encoded", "Cat15"),
55+
new InputOutputColumnPair("Cat16Encoded", "Cat16"),
56+
new InputOutputColumnPair("Cat17Encoded", "Cat17"),
57+
new InputOutputColumnPair("Cat18Encoded", "Cat18"),
58+
new InputOutputColumnPair("Cat19Encoded", "Cat19"),
59+
new InputOutputColumnPair("Cat20Encoded", "Cat20"),
60+
new InputOutputColumnPair("Cat21Encoded", "Cat21"),
61+
new InputOutputColumnPair("Cat22Encoded", "Cat22"),
62+
new InputOutputColumnPair("Cat23Encoded", "Cat23"),
63+
new InputOutputColumnPair("Cat24Encoded", "Cat24"),
64+
new InputOutputColumnPair("Cat25Encoded", "Cat25"),
65+
new InputOutputColumnPair("Cat26Encoded", "Cat26"),
66+
new InputOutputColumnPair("Cat27Encoded", "Cat27"),
67+
new InputOutputColumnPair("Cat28Encoded", "Cat28"),
68+
new InputOutputColumnPair("Cat29Encoded", "Cat29"),
69+
new InputOutputColumnPair("Cat30Encoded", "Cat30"),
70+
new InputOutputColumnPair("Cat31Encoded", "Cat31"),
71+
new InputOutputColumnPair("Cat32Encoded", "Cat32"),
72+
new InputOutputColumnPair("Cat33Encoded", "Cat33"),
73+
new InputOutputColumnPair("Cat34Encoded", "Cat34"),
74+
new InputOutputColumnPair("Cat35Encoded", "Cat35"),
75+
new InputOutputColumnPair("Cat36Encoded", "Cat36"),
76+
new InputOutputColumnPair("Cat37Encoded", "Cat37"),
77+
new InputOutputColumnPair("Cat38Encoded", "Cat38"),
78+
new InputOutputColumnPair("Cat39Encoded", "Cat39")
79+
}, OneHotEncodingEstimator.OutputKind.Binary));
80+
81+
var featuresTransformer = CatogoriesTranformer.Append(
82+
mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat01Featurized", inputColumnName: nameof(UrlClick.Feat01)))
83+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat02Featurized", inputColumnName: nameof(UrlClick.Feat02)))
84+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat03Featurized", inputColumnName: nameof(UrlClick.Feat03)))
85+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat04Featurized", inputColumnName: nameof(UrlClick.Feat04)))
86+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat05Featurized", inputColumnName: nameof(UrlClick.Feat05)))
87+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat06Featurized", inputColumnName: nameof(UrlClick.Feat06)))
88+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat07Featurized", inputColumnName: nameof(UrlClick.Feat07)))
89+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat08Featurized", inputColumnName: nameof(UrlClick.Feat08)))
90+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat09Featurized", inputColumnName: nameof(UrlClick.Feat09)))
91+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat10Featurized", inputColumnName: nameof(UrlClick.Feat10)))
92+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat11Featurized", inputColumnName: nameof(UrlClick.Feat11)))
93+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat12Featurized", inputColumnName: nameof(UrlClick.Feat12)))
94+
.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Feat13Featurized", inputColumnName: nameof(UrlClick.Feat13)));
95+
96+
var finalTransformerPipeLine = featuresTransformer.Append(mlContext.Transforms.Concatenate("Features",
97+
"Feat01Featurized", "Feat02Featurized", "Feat03Featurized", "Feat04Featurized", "Feat05Featurized",
98+
"Feat06Featurized", "Feat07Featurized", "Feat08Featurized", "Feat09Featurized", "Feat10Featurized",
99+
"Feat11Featurized", "Feat12Featurized", "Feat12Featurized",
100+
"Cat14Encoded", "Cat15Encoded", "Cat16Encoded", "Cat17Encoded", "Cat18Encoded", "Cat19Encoded",
101+
"Cat20Encoded", "Cat21Encoded", "Cat22Encoded", "Cat23Encoded", "Cat24Encoded", "Cat25Encoded",
102+
"Cat26Encoded", "Cat27Encoded", "Cat28Encoded", "Cat29Encoded", "Cat30Encoded", "Cat31Encoded",
103+
"Cat32Encoded", "Cat33Encoded", "Cat34Encoded", "Cat35Encoded", "Cat36Encoded", "Cat37Encoded",
104+
"Cat38Encoded", "Cat39Encoded"));
105+
106+
// Apply the ML algorithm
107+
var trainingPipeLine = finalTransformerPipeLine.Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(labelColumnName: "Label", featureColumnName: "Features"));
108+
109+
Console.WriteLine("Training the ML model while streaming data from a SQL database...");
110+
Stopwatch watch = new Stopwatch();
111+
watch.Start();
112+
113+
var model = trainingPipeLine.Fit(trainTestData.TrainSet);
114+
115+
watch.Stop();
116+
Console.WriteLine("Elapsed time for training the model = {0} seconds", watch.ElapsedMilliseconds/1000);
117+
118+
Console.WriteLine("Evaluating the model...");
119+
Stopwatch watch2 = new Stopwatch();
120+
watch2.Start();
121+
122+
var predictions = model.Transform(trainTestData.TestSet);
123+
// Now that we have the test predictions, calculate the metrics of those predictions and output the results.
124+
var metrics = mlContext.BinaryClassification.Evaluate(predictions);
125+
126+
watch2.Stop();
127+
Console.WriteLine("Elapsed time for evaluating the model = {0} seconds", watch2.ElapsedMilliseconds / 1000);
128+
129+
ConsoleHelper.PrintBinaryClassificationMetrics("==== Evaluation Metrics training from a Database ====", metrics);
130+
131+
//
132+
Console.WriteLine("Trying a single prediction:");
133+
134+
var predictionEngine = mlContext.Model.CreatePredictionEngine<UrlClick, ClickPrediction>(model);
135+
136+
UrlClick sampleData = new UrlClick() {
137+
Label = String.Empty,
138+
Feat01 = "32", Feat02 = "3", Feat03 = "5", Feat04 = "NULL", Feat05 = "1",
139+
Feat06 = "0", Feat07 = "0", Feat08 = "61", Feat09 = "5", Feat10 = "0",
140+
Feat11 = "1", Feat12 = "3157", Feat13 = "5",
141+
Cat14 = "e5f3fd8d", Cat15 = "a0aaffa6", Cat16 = "aa15d56f", Cat17 = "da8a3421",
142+
Cat18 = "cd69f233", Cat19 = "6fcd6dcb", Cat20 = "ab16ed81", Cat21 = "43426c29",
143+
Cat22 = "1df5e154", Cat23 = "00c5ffb7", Cat24 = "be4ee537", Cat25 = "f3bbfe99",
144+
Cat26 = "7de9c0a9", Cat27 = "6652dc64", Cat28 = "99eb4e27", Cat29 = "4cdc3efa",
145+
Cat30 = "d20856aa", Cat31 = "a1eb1511", Cat32 = "9512c20b", Cat33 = "febfd863",
146+
Cat34 = "a3323ca1", Cat35 = "c8e1ee56", Cat36 = "1752e9e8", Cat37 = "75350c8a",
147+
Cat38 = "991321ea", Cat39 = "b757e957"
148+
};
149+
150+
var clickPrediction = predictionEngine.Predict(sampleData);
151+
152+
Console.WriteLine($"Predicted Label: {clickPrediction.PredictedLabel} - Score:{Sigmoid(clickPrediction.Score)}", Color.YellowGreen);
153+
Console.WriteLine();
154+
155+
//*** Detach database from localdb only if you used a conn-string with a filepath to attach the database file into localdb ***
156+
Console.WriteLine("... Detaching database from SQL localdb ...");
157+
DetachDatabase(connectionString);
158+
159+
Console.WriteLine("=============== Press any key ===============");
160+
Console.ReadKey();
161+
}
162+
163+
public static float Sigmoid(float x)
164+
{
165+
return (float)(100 / (1 + Math.Exp(-x)));
166+
}
167+
168+
public static void DetachDatabase(string userConnectionString) //DELETE PARAM *************
169+
{
170+
string dbName = string.Empty;
171+
using (SqlConnection userSqlDatabaseConnection = new SqlConnection(userConnectionString))
172+
{
173+
userSqlDatabaseConnection.Open();
174+
dbName = userSqlDatabaseConnection.Database;
175+
}
176+
177+
string masterConnString = $"Data Source = (LocalDB)\\MSSQLLocalDB;Integrated Security = True";
178+
using (SqlConnection sqlDatabaseConnection = new SqlConnection(masterConnString))
179+
{
180+
sqlDatabaseConnection.Open();
181+
182+
string prepareDbcommandString = $"ALTER DATABASE [{dbName}] SET OFFLINE WITH ROLLBACK IMMEDIATE ALTER DATABASE [{dbName}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE";
183+
//(ALTERNATIVE) string prepareDbcommandString = $"ALTER DATABASE [{dbName}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE";
184+
SqlCommand sqlPrepareCommand = new SqlCommand(prepareDbcommandString, sqlDatabaseConnection);
185+
sqlPrepareCommand.ExecuteNonQuery();
186+
187+
string detachCommandString = "sp_detach_db";
188+
SqlCommand sqlDetachCommand = new SqlCommand(detachCommandString, sqlDatabaseConnection);
189+
sqlDetachCommand.CommandType = CommandType.StoredProcedure;
190+
sqlDetachCommand.Parameters.AddWithValue("@dbname", dbName);
191+
sqlDetachCommand.ExecuteNonQuery();
192+
}
193+
}
194+
}
195+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
TBD

samples/nuget.config

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
<packageSources>
44
<clear />
55
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" />
6-
<add key="dotnet-core MyGet" value="https://dotnet.myget.org/F/dotnet-core/api/v3/index.json" />
76
</packageSources>
87
<disabledPackageSources>
98
</disabledPackageSources>

0 commit comments

Comments
 (0)