@@ -1271,7 +1271,7 @@ void BiPartitioningPartialLegalizer::spread_over_windows(std::vector<SpreadingWi
1271
1271
num_blocks_partitioned_ += window.contained_blocks .size ();
1272
1272
1273
1273
// 2) Partition the window.
1274
- auto partitioned_window = partition_window (window);
1274
+ auto partitioned_window = partition_window (window, group_id );
1275
1275
1276
1276
// 3) Partition the blocks.
1277
1277
partition_blocks_in_window (window, partitioned_window, group_id, p_placement);
@@ -1311,61 +1311,115 @@ void BiPartitioningPartialLegalizer::spread_over_windows(std::vector<SpreadingWi
1311
1311
VTR_ASSERT_SAFE (density_manager_->verify ());
1312
1312
}
1313
1313
1314
- PartitionedWindow BiPartitioningPartialLegalizer::partition_window (SpreadingWindow& window) {
1314
+ PartitionedWindow BiPartitioningPartialLegalizer::partition_window (
1315
+ SpreadingWindow& window,
1316
+ ModelGroupId group_id) {
1317
+
1318
+ // Search for the ideal partition line on the window. Here, we attempt each
1319
+ // partition and measure how well this cuts the capacity of the region in
1320
+ // half. Cutting the capacity of the region in half should allow the blocks
1321
+ // within the region to also be cut in half (assuming a good initial window
1322
+ // was chosen). This should allow the spreader to spread things more evenly
1323
+ // and converge faster. Hence, it is worth spending more time trying to find
1324
+ // better partition lines.
1325
+ //
1326
+ // Here, we compute the score of a partition as a number between 0 and 1
1327
+ // which represents how balanced the partition is. 0 means that all of the
1328
+ // capacity is on one side of the partition, 1 means that the capacities of
1329
+ // the two partitions are perfectly balanced (equal on both sides).
1330
+ float best_score = -1 .0f ;
1315
1331
PartitionedWindow partitioned_window;
1332
+ const std::vector<int >& model_indices = model_grouper_.get_models_in_group (group_id);
1316
1333
1317
- // Select the partition direction.
1318
- // To keep it simple, we partition the direction which would cut the
1319
- // region the most.
1320
- // TODO: Should explore making the partition line based on the capacity
1321
- // of the two partitioned regions. We may want to cut the
1322
- // region in half such that the mass of the atoms contained within
1323
- // the two future regions is equal.
1324
- partitioned_window.partition_dir = e_partition_dir::VERTICAL;
1325
- if (window.region .height () > window.region .width ())
1326
- partitioned_window.partition_dir = e_partition_dir::HORIZONTAL;
1327
-
1328
- // To keep it simple, just cut the space in half.
1329
- // TODO: Should investigate other cutting techniques. Cutting perfectly
1330
- // in half may not be the most efficient technique.
1331
- SpreadingWindow& lower_window = partitioned_window.lower_window ;
1332
- SpreadingWindow& upper_window = partitioned_window.upper_window ;
1333
- partitioned_window.pivot_pos = 0 .f ;
1334
- if (partitioned_window.partition_dir == e_partition_dir::VERTICAL) {
1335
- // Find the x-coordinate of a cut line directly in the middle of the
1336
- // region. We floor this to prevent fractional cut lines.
1337
- double pivot_x = std::floor ((window.region .xmin () + window.region .xmax ()) / 2.0 );
1334
+ // First, try all of the vertical partitions.
1335
+ double min_pivot_x = std::floor (window.region .xmin ()) + 1.0 ;
1336
+ double max_pivot_x = std::ceil (window.region .xmax ()) - 1.0 ;
1337
+ for (double pivot_x = min_pivot_x; pivot_x <= max_pivot_x; pivot_x++) {
1338
+ // Cut the region at this cut line.
1339
+ auto lower_region = vtr::Rect <double >(vtr::Point <double >(window.region .xmin (),
1340
+ window.region .ymin ()),
1341
+ vtr::Point <double >(pivot_x,
1342
+ window.region .ymax ()));
1343
+
1344
+ auto upper_region = vtr::Rect <double >(vtr::Point <double >(pivot_x,
1345
+ window.region .ymin ()),
1346
+ vtr::Point <double >(window.region .xmax (),
1347
+ window.region .ymax ()));
1348
+
1349
+ // Compute the capacity of each partition for the models that we care
1350
+ // about.
1351
+ // TODO: This can be made better by looking at the mass of all blocks
1352
+ // within the window and scaling the capacity based on that.
1353
+ float lower_window_capacity = capacity_prefix_sum_.get_sum (model_indices,
1354
+ lower_region).manhattan_norm ();
1355
+ lower_window_capacity = std::max (lower_window_capacity, 0 .0f );
1356
+ float upper_window_capacity = capacity_prefix_sum_.get_sum (model_indices,
1357
+ upper_region).manhattan_norm ();
1358
+ upper_window_capacity = std::max (upper_window_capacity, 0 .0f );
1359
+
1360
+ // Compute the score of this partition line. The score is simply just
1361
+ // the minimum of the two capacities dividided by the maximum of the
1362
+ // two capacities.
1363
+ float smaller_capacity = std::min (lower_window_capacity, upper_window_capacity);
1364
+ float larger_capacity = std::max (lower_window_capacity, upper_window_capacity);
1365
+ float cut_score = smaller_capacity / larger_capacity;
1366
+
1367
+ // If this is the best cut we have ever seen, save it as the result.
1368
+ if (cut_score > best_score) {
1369
+ best_score = cut_score;
1370
+ partitioned_window.partition_dir = e_partition_dir::VERTICAL;
1371
+ partitioned_window.pivot_pos = pivot_x;
1372
+ partitioned_window.lower_window .region = lower_region;
1373
+ partitioned_window.upper_window .region = upper_region;
1374
+ }
1375
+ }
1338
1376
1377
+ // Next, try all of the horizontal partitions.
1378
+ double min_pivot_y = std::floor (window.region .ymin ()) + 1.0 ;
1379
+ double max_pivot_y = std::ceil (window.region .ymax ()) - 1.0 ;
1380
+ for (double pivot_y = min_pivot_y; pivot_y <= max_pivot_y; pivot_y++) {
1339
1381
// Cut the region at this cut line.
1340
- lower_window.region = vtr::Rect <double >(vtr::Point <double >(window.region .xmin (),
1341
- window.region .ymin ()),
1342
- vtr::Point <double >(pivot_x,
1343
- window.region .ymax ()));
1344
-
1345
- upper_window.region = vtr::Rect <double >(vtr::Point <double >(pivot_x,
1346
- window.region .ymin ()),
1347
- vtr::Point <double >(window.region .xmax (),
1348
- window.region .ymax ()));
1349
- partitioned_window.pivot_pos = pivot_x;
1350
- } else {
1351
- VTR_ASSERT (partitioned_window.partition_dir == e_partition_dir::HORIZONTAL);
1352
- // Similarly in the y direction, find the non-fractional y coordinate
1353
- // to make a horizontal cut.
1354
- double pivot_y = std::floor ((window.region .ymin () + window.region .ymax ()) / 2.0 );
1355
-
1356
- // Then cut the window.
1357
- lower_window.region = vtr::Rect <double >(vtr::Point <double >(window.region .xmin (),
1358
- window.region .ymin ()),
1359
- vtr::Point <double >(window.region .xmax (),
1360
- pivot_y));
1361
-
1362
- upper_window.region = vtr::Rect <double >(vtr::Point <double >(window.region .xmin (),
1363
- pivot_y),
1364
- vtr::Point <double >(window.region .xmax (),
1365
- window.region .ymax ()));
1366
- partitioned_window.pivot_pos = pivot_y;
1382
+ auto lower_region = vtr::Rect <double >(vtr::Point <double >(window.region .xmin (),
1383
+ window.region .ymin ()),
1384
+ vtr::Point <double >(window.region .xmax (),
1385
+ pivot_y));
1386
+
1387
+ auto upper_region = vtr::Rect <double >(vtr::Point <double >(window.region .xmin (),
1388
+ pivot_y),
1389
+ vtr::Point <double >(window.region .xmax (),
1390
+ window.region .ymax ()));
1391
+
1392
+ // Compute the capacity of each partition for the models that we care
1393
+ // about.
1394
+ // TODO: This can be made better by looking at the mass of all blocks
1395
+ // within the window and scaling the capacity based on that.
1396
+ float lower_window_capacity = capacity_prefix_sum_.get_sum (model_indices,
1397
+ lower_region).manhattan_norm ();
1398
+ lower_window_capacity = std::max (lower_window_capacity, 0 .0f );
1399
+ float upper_window_capacity = capacity_prefix_sum_.get_sum (model_indices,
1400
+ upper_region).manhattan_norm ();
1401
+ upper_window_capacity = std::max (upper_window_capacity, 0 .0f );
1402
+
1403
+ // Compute the score of this partition line. The score is simply just
1404
+ // the minimum of the two capacities dividided by the maximum of the
1405
+ // two capacities.
1406
+ float smaller_capacity = std::min (lower_window_capacity, upper_window_capacity);
1407
+ float larger_capacity = std::max (lower_window_capacity, upper_window_capacity);
1408
+ float cut_score = smaller_capacity / larger_capacity;
1409
+
1410
+ // If this is the best cut we have ever seen, save it as the result.
1411
+ if (cut_score > best_score) {
1412
+ best_score = cut_score;
1413
+ partitioned_window.partition_dir = e_partition_dir::HORIZONTAL;
1414
+ partitioned_window.pivot_pos = pivot_y;
1415
+ partitioned_window.lower_window .region = lower_region;
1416
+ partitioned_window.upper_window .region = upper_region;
1417
+ }
1367
1418
}
1368
1419
1420
+ VTR_ASSERT_MSG (best_score >= 0 .0f ,
1421
+ " Could not find a partition line for given window" );
1422
+
1369
1423
return partitioned_window;
1370
1424
}
1371
1425
@@ -1475,7 +1529,7 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window(
1475
1529
// NOTE: This needs to be an int in case the pivot is 0.
1476
1530
for (int i = window.contained_blocks .size () - 1 ; i >= (int )pivot; i--) {
1477
1531
const PrimitiveVector& blk_mass = density_manager_->mass_calculator ().get_block_mass (window.contained_blocks [i]);
1478
- VTR_ASSERT_SAFE (lower_window_underfill .is_non_negative ());
1532
+ VTR_ASSERT_SAFE (upper_window_underfill .is_non_negative ());
1479
1533
upper_window_underfill -= blk_mass;
1480
1534
if (upper_window_underfill.is_non_negative ())
1481
1535
upper_window.contained_blocks .push_back (window.contained_blocks [i]);
@@ -1490,8 +1544,6 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window(
1490
1544
// windows. To do this we sort the unplaced blocks by largest mass to
1491
1545
// smallest mass. Then we place each block in the bin with the highest
1492
1546
// underfill.
1493
- // FIXME: Above was the intuition; however, after experimentation, found that
1494
- // sorting by smallest mass to largest mass worked better...
1495
1547
// FIXME: I think large blocks (like carry chains) need to be handled special
1496
1548
// early on. If they are put into a partition too late, they may have
1497
1549
// to create overfill! Perhaps the partitions can hold two lists.
@@ -1500,20 +1552,20 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window(
1500
1552
[&](APBlockId a, APBlockId b) {
1501
1553
const auto & blk_a_mass = density_manager_->mass_calculator ().get_block_mass (a);
1502
1554
const auto & blk_b_mass = density_manager_->mass_calculator ().get_block_mass (b);
1503
- return blk_a_mass.manhattan_norm () < blk_b_mass.manhattan_norm ();
1555
+ return blk_a_mass.manhattan_norm () > blk_b_mass.manhattan_norm ();
1504
1556
});
1505
1557
for (APBlockId blk_id : unplaced_blocks) {
1506
1558
// Project the underfill from each window onto the mass. This gives us
1507
1559
// the overfill in the dimensions the mass cares about.
1508
1560
const PrimitiveVector& blk_mass = density_manager_->mass_calculator ().get_block_mass (blk_id);
1509
1561
PrimitiveVector projected_lower_window_underfill = lower_window_underfill;
1510
- lower_window_underfill .project (blk_mass);
1562
+ projected_lower_window_underfill .project (blk_mass);
1511
1563
PrimitiveVector projected_upper_window_underfill = upper_window_underfill;
1512
- upper_window_underfill .project (blk_mass);
1564
+ projected_upper_window_underfill .project (blk_mass);
1513
1565
// Put the block in the window with a higher underfill. This tries to
1514
1566
// balance the overfill as much as possible. This works even if the
1515
1567
// overfill becomes negative.
1516
- if (projected_lower_window_underfill.manhattan_norm () >= projected_upper_window_underfill.manhattan_norm ()) {
1568
+ if (projected_lower_window_underfill.sum () >= projected_upper_window_underfill.sum ()) {
1517
1569
lower_window.contained_blocks .push_back (blk_id);
1518
1570
lower_window_underfill -= blk_mass;
1519
1571
} else {
0 commit comments