diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp index 7021734f8d5..1f5103ba02b 100644 --- a/vpr/src/analytical_place/partial_legalizer.cpp +++ b/vpr/src/analytical_place/partial_legalizer.cpp @@ -1271,7 +1271,7 @@ void BiPartitioningPartialLegalizer::spread_over_windows(std::vectorverify()); } -PartitionedWindow BiPartitioningPartialLegalizer::partition_window(SpreadingWindow& window) { +PartitionedWindow BiPartitioningPartialLegalizer::partition_window( + SpreadingWindow& window, + ModelGroupId group_id) { + + // Search for the ideal partition line on the window. Here, we attempt each + // partition and measure how well this cuts the capacity of the region in + // half. Cutting the capacity of the region in half should allow the blocks + // within the region to also be cut in half (assuming a good initial window + // was chosen). This should allow the spreader to spread things more evenly + // and converge faster. Hence, it is worth spending more time trying to find + // better partition lines. + // + // Here, we compute the score of a partition as a number between 0 and 1 + // which represents how balanced the partition is. 0 means that all of the + // capacity is on one side of the partition, 1 means that the capacities of + // the two partitions are perfectly balanced (equal on both sides). + float best_score = -1.0f; PartitionedWindow partitioned_window; + const std::vector& model_indices = model_grouper_.get_models_in_group(group_id); - // Select the partition direction. - // To keep it simple, we partition the direction which would cut the - // region the most. - // TODO: Should explore making the partition line based on the capacity - // of the two partitioned regions. We may want to cut the - // region in half such that the mass of the atoms contained within - // the two future regions is equal. - partitioned_window.partition_dir = e_partition_dir::VERTICAL; - if (window.region.height() > window.region.width()) - partitioned_window.partition_dir = e_partition_dir::HORIZONTAL; - - // To keep it simple, just cut the space in half. - // TODO: Should investigate other cutting techniques. Cutting perfectly - // in half may not be the most efficient technique. - SpreadingWindow& lower_window = partitioned_window.lower_window; - SpreadingWindow& upper_window = partitioned_window.upper_window; - partitioned_window.pivot_pos = 0.f; - if (partitioned_window.partition_dir == e_partition_dir::VERTICAL) { - // Find the x-coordinate of a cut line directly in the middle of the - // region. We floor this to prevent fractional cut lines. - double pivot_x = std::floor((window.region.xmin() + window.region.xmax()) / 2.0); + // First, try all of the vertical partitions. + double min_pivot_x = std::floor(window.region.xmin()) + 1.0; + double max_pivot_x = std::ceil(window.region.xmax()) - 1.0; + for (double pivot_x = min_pivot_x; pivot_x <= max_pivot_x; pivot_x++) { + // Cut the region at this cut line. + auto lower_region = vtr::Rect(vtr::Point(window.region.xmin(), + window.region.ymin()), + vtr::Point(pivot_x, + window.region.ymax())); + + auto upper_region = vtr::Rect(vtr::Point(pivot_x, + window.region.ymin()), + vtr::Point(window.region.xmax(), + window.region.ymax())); + + // Compute the capacity of each partition for the models that we care + // about. + // TODO: This can be made better by looking at the mass of all blocks + // within the window and scaling the capacity based on that. + float lower_window_capacity = capacity_prefix_sum_.get_sum(model_indices, lower_region).manhattan_norm(); + lower_window_capacity = std::max(lower_window_capacity, 0.0f); + float upper_window_capacity = capacity_prefix_sum_.get_sum(model_indices, upper_region).manhattan_norm(); + upper_window_capacity = std::max(upper_window_capacity, 0.0f); + + // Compute the score of this partition line. The score is simply just + // the minimum of the two capacities dividided by the maximum of the + // two capacities. + float smaller_capacity = std::min(lower_window_capacity, upper_window_capacity); + float larger_capacity = std::max(lower_window_capacity, upper_window_capacity); + float cut_score = smaller_capacity / larger_capacity; + + // If this is the best cut we have ever seen, save it as the result. + if (cut_score > best_score) { + best_score = cut_score; + partitioned_window.partition_dir = e_partition_dir::VERTICAL; + partitioned_window.pivot_pos = pivot_x; + partitioned_window.lower_window.region = lower_region; + partitioned_window.upper_window.region = upper_region; + } + } + // Next, try all of the horizontal partitions. + double min_pivot_y = std::floor(window.region.ymin()) + 1.0; + double max_pivot_y = std::ceil(window.region.ymax()) - 1.0; + for (double pivot_y = min_pivot_y; pivot_y <= max_pivot_y; pivot_y++) { // Cut the region at this cut line. - lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), - window.region.ymin()), - vtr::Point(pivot_x, - window.region.ymax())); - - upper_window.region = vtr::Rect(vtr::Point(pivot_x, - window.region.ymin()), - vtr::Point(window.region.xmax(), - window.region.ymax())); - partitioned_window.pivot_pos = pivot_x; - } else { - VTR_ASSERT(partitioned_window.partition_dir == e_partition_dir::HORIZONTAL); - // Similarly in the y direction, find the non-fractional y coordinate - // to make a horizontal cut. - double pivot_y = std::floor((window.region.ymin() + window.region.ymax()) / 2.0); - - // Then cut the window. - lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), - window.region.ymin()), - vtr::Point(window.region.xmax(), - pivot_y)); - - upper_window.region = vtr::Rect(vtr::Point(window.region.xmin(), - pivot_y), - vtr::Point(window.region.xmax(), - window.region.ymax())); - partitioned_window.pivot_pos = pivot_y; + auto lower_region = vtr::Rect(vtr::Point(window.region.xmin(), + window.region.ymin()), + vtr::Point(window.region.xmax(), + pivot_y)); + + auto upper_region = vtr::Rect(vtr::Point(window.region.xmin(), + pivot_y), + vtr::Point(window.region.xmax(), + window.region.ymax())); + + // Compute the capacity of each partition for the models that we care + // about. + // TODO: This can be made better by looking at the mass of all blocks + // within the window and scaling the capacity based on that. + float lower_window_capacity = capacity_prefix_sum_.get_sum(model_indices, lower_region).manhattan_norm(); + lower_window_capacity = std::max(lower_window_capacity, 0.0f); + float upper_window_capacity = capacity_prefix_sum_.get_sum(model_indices, upper_region).manhattan_norm(); + upper_window_capacity = std::max(upper_window_capacity, 0.0f); + + // Compute the score of this partition line. The score is simply just + // the minimum of the two capacities dividided by the maximum of the + // two capacities. + float smaller_capacity = std::min(lower_window_capacity, upper_window_capacity); + float larger_capacity = std::max(lower_window_capacity, upper_window_capacity); + float cut_score = smaller_capacity / larger_capacity; + + // If this is the best cut we have ever seen, save it as the result. + if (cut_score > best_score) { + best_score = cut_score; + partitioned_window.partition_dir = e_partition_dir::HORIZONTAL; + partitioned_window.pivot_pos = pivot_y; + partitioned_window.lower_window.region = lower_region; + partitioned_window.upper_window.region = upper_region; + } } + VTR_ASSERT_MSG(best_score >= 0.0f, + "Could not find a partition line for given window"); + return partitioned_window; } @@ -1475,7 +1525,7 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window( // NOTE: This needs to be an int in case the pivot is 0. for (int i = window.contained_blocks.size() - 1; i >= (int)pivot; i--) { const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(window.contained_blocks[i]); - VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative()); + VTR_ASSERT_SAFE(upper_window_underfill.is_non_negative()); upper_window_underfill -= blk_mass; if (upper_window_underfill.is_non_negative()) upper_window.contained_blocks.push_back(window.contained_blocks[i]); @@ -1490,8 +1540,6 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window( // windows. To do this we sort the unplaced blocks by largest mass to // smallest mass. Then we place each block in the bin with the highest // underfill. - // FIXME: Above was the intuition; however, after experimentation, found that - // sorting by smallest mass to largest mass worked better... // FIXME: I think large blocks (like carry chains) need to be handled special // early on. If they are put into a partition too late, they may have // to create overfill! Perhaps the partitions can hold two lists. @@ -1500,20 +1548,20 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window( [&](APBlockId a, APBlockId b) { const auto& blk_a_mass = density_manager_->mass_calculator().get_block_mass(a); const auto& blk_b_mass = density_manager_->mass_calculator().get_block_mass(b); - return blk_a_mass.manhattan_norm() < blk_b_mass.manhattan_norm(); + return blk_a_mass.manhattan_norm() > blk_b_mass.manhattan_norm(); }); for (APBlockId blk_id : unplaced_blocks) { // Project the underfill from each window onto the mass. This gives us // the overfill in the dimensions the mass cares about. const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(blk_id); PrimitiveVector projected_lower_window_underfill = lower_window_underfill; - lower_window_underfill.project(blk_mass); + projected_lower_window_underfill.project(blk_mass); PrimitiveVector projected_upper_window_underfill = upper_window_underfill; - upper_window_underfill.project(blk_mass); + projected_upper_window_underfill.project(blk_mass); // Put the block in the window with a higher underfill. This tries to // balance the overfill as much as possible. This works even if the // overfill becomes negative. - if (projected_lower_window_underfill.manhattan_norm() >= projected_upper_window_underfill.manhattan_norm()) { + if (projected_lower_window_underfill.sum() >= projected_upper_window_underfill.sum()) { lower_window.contained_blocks.push_back(blk_id); lower_window_underfill -= blk_mass; } else { diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h index 2eb4e5771da..5f82c787724 100644 --- a/vpr/src/analytical_place/partial_legalizer.h +++ b/vpr/src/analytical_place/partial_legalizer.h @@ -483,7 +483,7 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer { * the direction of the partition (vertical / horizontal) and the position * of the cut. */ - PartitionedWindow partition_window(SpreadingWindow& window); + PartitionedWindow partition_window(SpreadingWindow& window, ModelGroupId group_id); /** * @brief Partition the blocks in the given window into the partitioned diff --git a/vpr/src/analytical_place/primitive_vector.h b/vpr/src/analytical_place/primitive_vector.h index d76ae8b509d..52b0cb8a560 100644 --- a/vpr/src/analytical_place/primitive_vector.h +++ b/vpr/src/analytical_place/primitive_vector.h @@ -266,6 +266,20 @@ class PrimitiveVector { return mag; } + /** + * @brief Computes the sum across all dimensions of the vector. + * + * This is similar to manhattan_norm, however this does not take the + * absolute value of each dimension. + */ + inline float sum() const { + float sum = 0.f; + for (const auto& p : data_) { + sum += p.second; + } + return sum; + } + /** * @brief Project this vector onto the given vector. * diff --git a/vpr/test/test_ap_primitive_vector.cpp b/vpr/test/test_ap_primitive_vector.cpp index 425f4e20f35..d3455d7cfe7 100644 --- a/vpr/test/test_ap_primitive_vector.cpp +++ b/vpr/test/test_ap_primitive_vector.cpp @@ -241,6 +241,24 @@ TEST_CASE("test_ap_primitive_vector_verify", "[vpr_ap]") { vec2 *= -1.f; REQUIRE(vec2.manhattan_norm() == vec1.manhattan_norm()); + // sum: + vec1.clear(); + // Sum of the zero vector is zero. + REQUIRE(vec1.sum() == 0.f); + // Sum of a non-negative vector is the sum of its dims. + vec1.set_dim_val(0, 1.f); + REQUIRE(vec1.sum() == 1.f); + vec1.set_dim_val(1, 2.f); + vec1.set_dim_val(2, 3.f); + vec1.set_dim_val(3, 4.f); + vec1.set_dim_val(4, 5.f); + REQUIRE(vec1.sum() == 15.f); + // Sum of a negative vector is the opposite of the sum of the absolute + // value of its dims. + vec2 = vec1; + vec2 *= -1.f; + REQUIRE(vec2.sum() == -1.f * vec1.sum()); + // Projection: // Basic example: vec1.clear();