diff --git a/doc/proposals/2025/gsoc/images/dashboard 2.png b/doc/proposals/2025/gsoc/images/dashboard 2.png new file mode 100644 index 000000000..ceabf42c0 Binary files /dev/null and b/doc/proposals/2025/gsoc/images/dashboard 2.png differ diff --git a/doc/proposals/2025/gsoc/images/dashboard1.png b/doc/proposals/2025/gsoc/images/dashboard1.png new file mode 100644 index 000000000..d951996b7 Binary files /dev/null and b/doc/proposals/2025/gsoc/images/dashboard1.png differ diff --git a/doc/proposals/2025/gsoc/images/results.png b/doc/proposals/2025/gsoc/images/results.png new file mode 100644 index 000000000..cca3cc98c Binary files /dev/null and b/doc/proposals/2025/gsoc/images/results.png differ diff --git a/doc/proposals/2025/gsoc/poc_harsh_panchal_AI_API_EVAL.md b/doc/proposals/2025/gsoc/poc_harsh_panchal_AI_API_EVAL.md new file mode 100644 index 000000000..e11cc908c --- /dev/null +++ b/doc/proposals/2025/gsoc/poc_harsh_panchal_AI_API_EVAL.md @@ -0,0 +1,63 @@ +# AI API Evaluation Framework - Proof of Concept +This is Proof of Concept (PoC) for AI API evaluation on a structured framework. It benchmarks AI language models against various performance metrics and provides actionable insights by means of easy-to-interpret visualizations. The PoC involves end-to-end integration from API calls to result analysis. + +## Objectives + +Benchmark the AI models (Falcon 7B, LLaMA 3.2) against others such as BLEU-4, ROUGE-L, BERTScore, METEOR. + +Use radar charts to provide a visual comparison. + +Facilitate effective monitoring of model performance by real-time latency and cost measurement. + +--- +## Key Features Implemented +Backend (FastAPI) +Model Evaluation Endpoint: Tests AI models against given data sets. + +Scores such as BLEU-4, ROUGE-L, BERTScore, and METEOR are computed from Hugging Face models. + +Real-Time Performance Metrics: Tracks latency, cost, and processing time. + +Frontend (Flutter) +Interactive Dashboard: Displays model scores in radar charts. + +Real-Time Data Display: Presents results of evaluation in a formatted way. + +Model Selection: Enables users to select from amongst available AI models. + +## Screenshots and Visuals +![alt text](https://github.com/GANGSTER0910/apidash/blob/8fc7298824670397b07d0b42307bb1dd533af1fe/doc/proposals/2025/gsoc/images/dashboard1.png) + +![alt text](https://github.com/GANGSTER0910/apidash/blob/8fc7298824670397b07d0b42307bb1dd533af1fe/doc/proposals/2025/gsoc/images/dashboard%202.png) + +![alt text](https://github.com/GANGSTER0910/apidash/blob/8fc7298824670397b07d0b42307bb1dd533af1fe/doc/proposals/2025/gsoc/images/results.png) + +--- +## Proof of Concept Details +Models Evaluated: LLaMA 3.2 (3B) and Falcon 7B. + +Dataset: CNN Dailymail (3.0.0) used for benchmarking assessment. + +Evaluation Metrics + +BLEU-4: Scores n-gram overlap. + +ROUGE-L: Checks the longest consecutive matches. + +BERTScore: Scores contextual embeddings. + +METEOR: Considers synonyms, stemming, and grammar. + +To run the code +### Step 1: Clone the Repository +``` +# Clone AI API Evaluation Repository + +git clone https://github.com/GANGSTER0910/AI_API_EVAL.git +cd AI_API_EVAL + +# Install required Python packages +pip install -r requirements.txt +Provide your Hugging Face API token in the FastAPI.py file +run python FastAPI.py +``` diff --git a/lib/api_evalution/evalution.dart b/lib/api_evalution/evalution.dart new file mode 100644 index 000000000..578f721aa --- /dev/null +++ b/lib/api_evalution/evalution.dart @@ -0,0 +1,428 @@ +import 'package:flutter/material.dart'; +import 'package:fl_chart/fl_chart.dart'; +import 'dart:convert'; +import 'package:http/http.dart' as http; + +class EvaluationDashboard extends StatefulWidget { + const EvaluationDashboard({super.key}); + @override + State createState() => _EvaluationDashboardState(); +} + +class _EvaluationDashboardState extends State { + int _selectedIndex = 0; + String? _selectedModel; // Store selected model name + + final List _availableModels = [ + "falcon-7b", + "Llama-3.2-3B" + ]; // Add available models + final List _results = []; + bool _isLoading = false; + Future fetchModelResults() async { + if (_selectedModel == null) { + // print("No model selected!"); + return; + } + + final url = Uri.parse( + "http://localhost:8000/evaluate/$_selectedModel"); // Use selected model + + try { + final response = await http.get( + url, + headers: {'Content-Type': 'application/json'}, + ); + + if (response.statusCode == 200) { + final data = jsonDecode(response.body); + // print("Received Data: $data"); + + setState(() => _isLoading = true); + { + try { + _results.clear(); + for (var item in data) { + _results.add(ModelResult( + modelName: item["model_name"], + scores: Map.from(item["scores"]), + latency: item["latency"].toDouble(), + cost: item["cost"].toDouble(), + timestamp: DateTime.parse(item["timestamp"]), + )); + } + } finally { + setState(() => _isLoading = false); + } + } + } else { + print("Error: ${response.statusCode}"); + } + } catch (e) { + print("Exception: $e"); + } + } + + @override + Widget build(BuildContext context) { + return Scaffold( + appBar: AppBar( + title: Text('AI API Evaluator'), + actions: [ + // Model selection dropdown in app bar + Padding( + padding: EdgeInsets.only(right: 16), + child: DropdownButton( + value: _selectedModel, + hint: Text("Select Model"), + items: _availableModels.map((String model) { + return DropdownMenuItem( + value: model, + child: Text(model), + ); + }).toList(), + onChanged: (String? newValue) { + setState(() { + _selectedModel = newValue; + }); + }, + ), + ), + IconButton( + icon: Icon(Icons.refresh), + onPressed: fetchModelResults, + ), + ], + ), + body: _isLoading + ? Center(child: CircularProgressIndicator()) + : _buildBody(), // Use your dashboard layout + floatingActionButton: FloatingActionButton.extended( + onPressed: _startNewEvaluation, + icon: Icon(Icons.add), + label: Text("New Evaluation"), + backgroundColor: Colors.deepPurple, + ), + bottomNavigationBar: BottomNavigationBar( + currentIndex: _selectedIndex, + onTap: _onItemTapped, + items: [ + BottomNavigationBarItem( + icon: Icon(Icons.dashboard), label: 'Dashboard'), + BottomNavigationBarItem( + icon: Icon(Icons.assessment), label: 'Reports'), + BottomNavigationBarItem(icon: Icon(Icons.compare), label: 'Compare'), + ], + ), + ); + } + + Widget _buildBody() { + if (_results.isEmpty) { + return Center(child: Text("Select a model and evaluate")); + } + switch (_selectedIndex) { + case 0: + return SingleChildScrollView( + padding: EdgeInsets.all(16), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text('Model Performance Overview', + style: Theme.of(context).textTheme.headlineSmall), + SizedBox(height: 16), + _buildMetricCards(), + SizedBox(height: 24), + _buildRadarChart(), + SizedBox(height: 24), + _buildRecentEvaluations(), + ], + ), + ); + case 1: + return Center(child: Text('Detailed Reports')); + case 2: + return Center(child: Text('Model Comparison')); + default: + return Container(); + } + } + + Widget _buildMetricCards() { + return GridView.count( + crossAxisCount: 2, + shrinkWrap: true, + physics: NeverScrollableScrollPhysics(), + childAspectRatio: 1.5, + crossAxisSpacing: 16, + mainAxisSpacing: 16, + children: [ + _buildMetricCard("Best Performing Model", _selectedModel ?? "N/A", + Icons.emoji_events, Colors.amber), + _buildMetricCard( + "Average BERT-4", + _results.isEmpty + ? "N/A" + : _results.length == 1 + ? _results[0].scores['BERTScore']?.toStringAsFixed(2) ?? "N/A" + : _results[1].scores['BERTScore']?.toStringAsFixed(2) ?? "N/A", + Icons.score, + Colors.blue, + ), + _buildMetricCard("Fastest Model", _selectedModel ?? "N/A", Icons.speed, + Colors.green), + _buildMetricCard("Cost Efficiency", _selectedModel ?? "N/A", + Icons.attach_money, Colors.purple), + ], + ); + } + + Widget _buildMetricCard( + String title, String value, IconData icon, Color color) { + return Card( + elevation: 4, + shape: RoundedRectangleBorder(borderRadius: BorderRadius.circular(12)), + child: Padding( + padding: EdgeInsets.all(16), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Icon(icon, size: 32, color: color), + SizedBox(height: 8), + Text(title, + style: TextStyle(fontSize: 14, color: Colors.grey[600])), + SizedBox(height: 8), + Text(value, + style: TextStyle(fontSize: 20, fontWeight: FontWeight.bold)), + ], + ), + ), + ); + } + +// Replace the _buildRadarChart and related methods with this: + + Widget _buildRadarChart() { + return Card( + elevation: 4, + shape: RoundedRectangleBorder(borderRadius: BorderRadius.circular(12)), + child: Padding( + padding: const EdgeInsets.all(16.0), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text( + 'Model Comparison', + style: TextStyle(fontSize: 18, fontWeight: FontWeight.bold), + ), + const SizedBox(height: 16), + + // Show empty state if no results + if (_results.isEmpty) + Container( + height: 300, + alignment: Alignment.center, + child: Text( + 'No evaluation data available\nSelect a model and evaluate', + textAlign: TextAlign.center, + style: TextStyle(color: Colors.grey[600]), + ), + ) + else + SizedBox( + height: 300, + child: RadarChart( + RadarChartData( + dataSets: _createRadarData(), + radarBackgroundColor: Colors.transparent, + radarBorderData: const BorderSide(color: Colors.grey), + titlePositionPercentageOffset: 0.2, + titleTextStyle: const TextStyle( + color: Colors.black, + fontSize: 12, + ), + tickCount: 5, + ticksTextStyle: const TextStyle( + color: Colors.black54, + fontSize: 10, + ), + radarShape: RadarShape.polygon, + getTitle: (index, angle) { + final metrics = [ + 'BLEU-4', + 'ROUGE-L', + 'BERTScore', + 'METEOR', + 'CIDEr', + 'SPICE', + ]; + return RadarChartTitle( + text: metrics[index], + angle: angle, + ); + }, + ), + swapAnimationDuration: const Duration(milliseconds: 500), + ), + ), + + // Only show legend if we have data + if (_results.isNotEmpty) _buildRadarLegend(), + ], + ), + ), + ); + } + + List _createRadarData() { + if (_results.isEmpty) return []; + return [ + RadarDataSet( + dataEntries: [ + RadarEntry(value: _results[0].scores["BLEU-4"] ?? 0.0), + RadarEntry(value: _results[0].scores["ROUGE-L"] ?? 0.0), + RadarEntry(value: _results[0].scores["BERTScore"] ?? 0.0), + RadarEntry(value: _results[0].scores["METEOR"] ?? 0.0), + // RadarEntry(value: _results[0].scores["CIDEr"] ?? 0.0), + // RadarEntry(value: _results[0].scores["SPICE"] ?? 0.0), + ], + fillColor: Colors.deepPurple.withOpacity(0.3), + borderColor: Colors.deepPurple, + entryRadius: 2, + borderWidth: 2, + ), + if (_results.length > 1) + RadarDataSet( + dataEntries: [ + RadarEntry(value: _results[1].scores["BLEU-4"] ?? 0.0), + RadarEntry(value: _results[1].scores["ROUGE-L"] ?? 0.0), + RadarEntry(value: _results[1].scores["BERTScore"] ?? 0.0), + RadarEntry(value: _results[1].scores["METEOR"] ?? 0.0), + // RadarEntry(value: _results[1].scores["CIDEr"] ?? 0.0), + // RadarEntry(value: _results[1].scores["SPICE"] ?? 0.0), + ], + fillColor: Colors.blue.withOpacity(0.3), + borderColor: Colors.blue, + entryRadius: 2, + borderWidth: 2, + ), + ]; + } + + Widget _buildRadarLegend() { + return Row( + mainAxisAlignment: MainAxisAlignment.center, + children: [ + // First model legend (always show if we have at least 1 result) + Padding( + padding: const EdgeInsets.symmetric(horizontal: 8.0), + child: Row( + children: [ + Container( + width: 12, + height: 12, + color: Colors.deepPurple, + ), + const SizedBox(width: 4), + Text(_results[0].modelName), + ], + ), + ), + + // Second model legend (only show if we have 2+ results) + if (_results.length > 1) + Padding( + padding: const EdgeInsets.symmetric(horizontal: 8.0), + child: Row( + children: [ + Container( + width: 12, + height: 12, + color: Colors.blue, + ), + const SizedBox(width: 4), + Text(_results[1].modelName), + ], + ), + ), + ], + ); + } + + Widget _buildRecentEvaluations() { + return Card( + elevation: 4, + shape: RoundedRectangleBorder(borderRadius: BorderRadius.circular(12)), + child: Padding( + padding: EdgeInsets.all(16), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text('Recent Evaluations', + style: TextStyle(fontSize: 18, fontWeight: FontWeight.bold)), + SizedBox(height: 16), + DataTable( + columns: [ + DataColumn(label: Text('Model')), + DataColumn(label: Text('BLEU-4'), numeric: true), + DataColumn(label: Text('ROUGE-L'), numeric: true), + DataColumn(label: Text('BERTScore'), numeric: true), + DataColumn(label: Text('METEOR'), numeric: true), + DataColumn(label: Text('Time')), + ], + rows: _results + .map((result) => DataRow( + cells: [ + DataCell(Text(result.modelName)), + DataCell(Text( + result.scores["BLEU-4"]!.toStringAsFixed(2))), + DataCell(Text( + result.scores["ROUGE-L"]!.toStringAsFixed(2))), + DataCell(Text( + result.scores["BERTScore"]!.toStringAsFixed(2))), + DataCell(Text( + result.scores["METEOR"]!.toStringAsFixed(2))), + DataCell(Text("${result.latency/1000} sec")), + ], + )) + .toList(), + ), + ], + ), + ), + ); + } + + void _onItemTapped(int index) { + setState(() { + _selectedIndex = index; + }); + } + + void _startNewEvaluation() { + // Navigation to new evaluation screen + } +} + +class ModelResult { + final String modelName; + final Map scores; + final double latency; // in ms + final double cost; // in dollars + final DateTime timestamp; + + ModelResult({ + required this.modelName, + required this.scores, + required this.latency, + required this.cost, + required this.timestamp, + }); +} + +class LinearScore { + final String metric; + final double value; + + LinearScore(this.metric, this.value); +} diff --git a/lib/app.dart b/lib/app.dart index fcc83b50e..1a5c85e8d 100644 --- a/lib/app.dart +++ b/lib/app.dart @@ -9,6 +9,7 @@ import 'widgets/widgets.dart' show WindowCaption, WorkspaceSelector; import 'providers/providers.dart'; import 'services/services.dart'; import 'screens/screens.dart'; +import 'package:apidash/api_evalution/evalution.dart'; import 'consts.dart'; class App extends ConsumerStatefulWidget { @@ -101,6 +102,63 @@ class _AppState extends ConsumerState with WindowListener { } } +// class DashApp extends ConsumerWidget { +// const DashApp({super.key}); + +// @override +// Widget build(BuildContext context, WidgetRef ref) { +// final isDarkMode = +// ref.watch(settingsProvider.select((value) => value.isDark)); +// final workspaceFolderPath = ref +// .watch(settingsProvider.select((value) => value.workspaceFolderPath)); +// final showWorkspaceSelector = kIsDesktop && (workspaceFolderPath == null); +// return Portal( +// child: MaterialApp( +// debugShowCheckedModeBanner: false, +// theme: kLightMaterialAppTheme, +// darkTheme: kDarkMaterialAppTheme, +// themeMode: isDarkMode ? ThemeMode.dark : ThemeMode.light, +// routes: { +// '/evaluation': (context) => const EvaluationDashboard(), // Your screen +// }, +// home: showWorkspaceSelector +// ? WorkspaceSelector( +// onContinue: (val) async { +// await initHiveBoxes(kIsDesktop, val); +// ref +// .read(settingsProvider.notifier) +// .update(workspaceFolderPath: val); +// }, +// onCancel: () async { +// try { +// await windowManager.destroy(); +// } catch (e) { +// debugPrint(e.toString()); +// } +// }, +// ) +// : Stack( +// children: [ +// !kIsLinux && !kIsMobile +// ? const App() +// : context.isMediumWindow +// ? const MobileDashboard() +// : const Dashboard(), +// if (kIsWindows) +// SizedBox( +// height: 29, +// child: WindowCaption( +// backgroundColor: Colors.transparent, +// brightness: +// isDarkMode ? Brightness.dark : Brightness.light, +// ), +// ), +// ], +// ), +// ), +// ); +// } +// } class DashApp extends ConsumerWidget { const DashApp({super.key}); @@ -111,6 +169,7 @@ class DashApp extends ConsumerWidget { final workspaceFolderPath = ref .watch(settingsProvider.select((value) => value.workspaceFolderPath)); final showWorkspaceSelector = kIsDesktop && (workspaceFolderPath == null); + return Portal( child: MaterialApp( debugShowCheckedModeBanner: false, @@ -135,11 +194,14 @@ class DashApp extends ConsumerWidget { ) : Stack( children: [ + // Existing APIDash UI layers !kIsLinux && !kIsMobile ? const App() : context.isMediumWindow ? const MobileDashboard() : const Dashboard(), + + // Windows-specific caption (keep existing) if (kIsWindows) SizedBox( height: 29, @@ -149,6 +211,14 @@ class DashApp extends ConsumerWidget { isDarkMode ? Brightness.dark : Brightness.light, ), ), + + // Your EvaluationDashboard added here + Positioned( + right: 16, // 16 pixels from right + bottom: 16, // 16 pixels from bottom + width: 300, // Set a fixed width or use constraints + child: const EvaluationDashboard(), // Your widget + ), ], ), ), diff --git a/lib/screens/dashboard.dart b/lib/screens/dashboard.dart index 428ffaebc..3bfb4a4c8 100644 --- a/lib/screens/dashboard.dart +++ b/lib/screens/dashboard.dart @@ -5,6 +5,7 @@ import 'package:apidash/providers/providers.dart'; import 'package:apidash/widgets/widgets.dart'; import 'package:apidash/consts.dart'; import 'package:apidash/dashbot/dashbot.dart'; +import 'package:apidash/api_evalution/evalution.dart'; import 'common_widgets/common_widgets.dart'; import 'envvar/environment_page.dart'; import 'home_page/home_page.dart'; @@ -68,6 +69,14 @@ class Dashboard extends ConsumerWidget { 'History', style: Theme.of(context).textTheme.labelSmall, ), + kVSpacer10, + IconButton( + isSelected: railIdx == 3, // Use index 4 (assuming 3 is Settings) + onPressed: () => ref.read(navRailIndexStateProvider.notifier).state = 3, + icon: const Icon(Icons.assessment_outlined), + selectedIcon: const Icon(Icons.assessment), + ), + Text('Evaluation', style: Theme.of(context).textTheme.labelSmall), ], ), Expanded( @@ -118,6 +127,7 @@ class Dashboard extends ConsumerWidget { HomePage(), EnvironmentPage(), HistoryPage(), + EvaluationDashboard(), SettingsPage(), ], ), diff --git a/pubspec.lock b/pubspec.lock index b16f69dd4..6b227aa2a 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -494,6 +494,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.1" + fl_chart: + dependency: "direct main" + description: + name: fl_chart + sha256: "5276944c6ffc975ae796569a826c38a62d2abcf264e26b88fa6f482e107f4237" + url: "https://pub.dev" + source: hosted + version: "0.70.2" flex_color_scheme: dependency: "direct main" description: @@ -776,7 +784,7 @@ packages: source: hosted version: "2.0.0" http: - dependency: transitive + dependency: "direct main" description: name: http sha256: fe7ab022b76f3034adc518fb6ea04a82387620e19977665ea18d30a1cf43442f diff --git a/pubspec.yaml b/pubspec.yaml index af67db989..87f803aa7 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -17,6 +17,8 @@ dependencies: code_builder: ^4.10.0 csv: ^6.0.0 data_table_2: 2.5.16 + http: ^1.1.0 + fl_chart: ^0.70.2 dart_style: ^3.0.1 desktop_drop: ^0.5.0 extended_text_field: ^16.0.0