diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 5635a933e3d57..2a7d020909ea8 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -338,16 +338,23 @@ void context_impl::removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr) { void context_impl::addDeviceGlobalInitializer( ur_program_handle_t Program, const std::vector &Devs, const RTDeviceBinaryImage *BinImage) { + if (BinImage->getDeviceGlobals().empty()) + return; std::lock_guard Lock(MDeviceGlobalInitializersMutex); for (const device &Dev : Devs) { auto Key = std::make_pair(Program, getSyclObjImpl(Dev)->getHandleRef()); - MDeviceGlobalInitializers.emplace(Key, BinImage); + auto [Iter, Inserted] = MDeviceGlobalInitializers.emplace(Key, BinImage); + if (Inserted && !Iter->second.MDeviceGlobalsFullyInitialized) + ++MDeviceGlobalNotInitializedCnt; } } std::vector context_impl::initializeDeviceGlobals( ur_program_handle_t NativePrg, const std::shared_ptr &QueueImpl) { + if (!MDeviceGlobalNotInitializedCnt.load(std::memory_order_acquire)) + return {}; + const AdapterPtr &Adapter = getAdapter(); const DeviceImplPtr &DeviceImpl = QueueImpl->getDeviceImplPtr(); std::lock_guard NativeProgramLock(MDeviceGlobalInitializersMutex); @@ -369,7 +376,6 @@ std::vector context_impl::initializeDeviceGlobals( [&Adapter](const ur_event_handle_t &Event) { return get_event_info( Event, Adapter) == info::event_command_status::complete; - return false; }); // Release the removed events. for (auto EventIt = NewEnd; EventIt != InitEventsRef.end(); ++EventIt) @@ -377,8 +383,10 @@ std::vector context_impl::initializeDeviceGlobals( // Remove them from the collection. InitEventsRef.erase(NewEnd, InitEventsRef.end()); // If there are no more events, we can mark it as fully initialized. - if (InitEventsRef.empty()) + if (InitEventsRef.empty()) { InitRef.MDeviceGlobalsFullyInitialized = true; + --MDeviceGlobalNotInitializedCnt; + } return InitEventsRef; } else if (InitRef.MDeviceGlobalsFullyInitialized) { // MDeviceGlobalsFullyInitialized could have been set while we were @@ -387,7 +395,7 @@ std::vector context_impl::initializeDeviceGlobals( } // There were no events and it was not set as fully initialized, so this is - // responsible for intializing the device globals. + // responsible for initializing the device globals. auto DeviceGlobals = InitRef.MBinImage->getDeviceGlobals(); std::vector DeviceGlobalIds; DeviceGlobalIds.reserve(DeviceGlobals.size()); @@ -402,6 +410,7 @@ std::vector context_impl::initializeDeviceGlobals( // globals are trivially fully initialized and we can end early. if (DeviceGlobalEntries.empty()) { InitRef.MDeviceGlobalsFullyInitialized = true; + --MDeviceGlobalNotInitializedCnt; return {}; } diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 872790f07b886..25d1b1ab105c4 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -307,10 +307,21 @@ class context_impl { std::vector MDeviceGlobalInitEvents; }; - std::map, - DeviceGlobalInitializer> + using HandleDevicePair = std::pair; + + struct HandleDevicePairHash { + std::size_t operator()(const HandleDevicePair &Key) const { + return std::hash{}(Key.first) ^ + std::hash{}(Key.second); + } + }; + + std::unordered_map MDeviceGlobalInitializers; std::mutex MDeviceGlobalInitializersMutex; + // The number of device globals that have not been initialized yet. + std::atomic MDeviceGlobalNotInitializedCnt = 0; // For device_global variables that are not used in any kernel code we still // allow copy operations on them. MDeviceGlobalUnregisteredData stores the