Skip to content

Commit 808dd43

Browse files
committed
tests: add asynchronous Eigen threadpool runtime implementation
1 parent 6468abc commit 808dd43

File tree

4 files changed

+114
-17
lines changed

4 files changed

+114
-17
lines changed

cmake/Threadpool.cmake

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,28 @@ if("${DNNL_CPU_THREADING_RUNTIME}" STREQUAL "THREADPOOL")
3232
endif()
3333
endif()
3434

35-
if("${_DNNL_TEST_THREADPOOL_IMPL}" STREQUAL "EIGEN")
35+
if(_DNNL_TEST_THREADPOOL_IMPL MATCHES "^(EIGEN|EIGEN_ASYNC)$")
3636
find_package(Eigen3 5.0 REQUIRED NO_MODULE)
3737
if(Eigen3_FOUND)
3838
list(APPEND EXTRA_STATIC_LIBS Eigen3::Eigen)
3939
message(STATUS "Found Eigen: ${PACKAGE_PREFIX_DIR}")
4040
endif()
4141

42-
find_package(absl REQUIRED CONFIG)
42+
# Note: not every UNIX OS search in lib64 by default, but this path is
43+
# generated by absl packaging procedure.
44+
find_package(absl REQUIRED CONFIG PATH_SUFFIXES "lib64/cmake/absl")
4345
if(absl_FOUND)
4446
list(APPEND EXTRA_STATIC_LIBS absl::synchronization)
4547
message(STATUS "Found abseil-cpp: ${PACKAGE_PREFIX_DIR}")
4648
endif()
49+
50+
if("${_DNNL_TEST_THREADPOOL_IMPL}" STREQUAL "EIGEN_ASYNC")
51+
find_package(xla REQUIRED CONFIG PATH_SUFFIXES "lib64/cmake/xla")
52+
if(xla_FOUND)
53+
list(APPEND EXTRA_STATIC_LIBS xla)
54+
message(STATUS "Found XLA: ${PACKAGE_PREFIX_DIR}")
55+
endif()
56+
endif()
4757
endif()
4858

4959
if("${_DNNL_TEST_THREADPOOL_IMPL}" STREQUAL "STANDALONE")

cmake/options.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,8 @@ endif()
266266
set(_DNNL_TEST_THREADPOOL_IMPL "STANDALONE" CACHE STRING
267267
"specifies which threadpool implementation to use when
268268
DNNL_CPU_RUNTIME=THREADPOOL is selected. Valid values: STANDALONE, EIGEN,
269-
TBB")
270-
if(NOT "${_DNNL_TEST_THREADPOOL_IMPL}" MATCHES "^(STANDALONE|TBB|EIGEN)$")
269+
EIGEN_ASYNC, TBB")
270+
if(NOT "${_DNNL_TEST_THREADPOOL_IMPL}" MATCHES "^(STANDALONE|TBB|EIGEN|EIGEN_ASYNC)$")
271271
message(FATAL_ERROR
272272
"Unsupported threadpool implementation: ${_DNNL_TEST_THREADPOOL_IMPL}")
273273
endif()

doc/build/build_options.md

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -236,29 +236,38 @@ oneDNN has functional limitations if built with TBB:
236236

237237
#### Threadpool
238238
To build oneDNN with support for threadpool threading, set `ONEDNN_CPU_RUNTIME`
239-
to `THREADPOOL`
239+
to `THREADPOOL`:
240240

241241
~~~sh
242242
$ cmake -DONEDNN_CPU_RUNTIME=THREADPOOL ..
243243
~~~
244244

245+
Threadpool threading support has the same limitations as TBB plus more:
246+
* As threadpools are attached to streams which are only passed during primitive
247+
execution, work decomposition is performed statically at primitive creation
248+
time. At the primitive execution time, the threadpool is responsible for
249+
balancing this decomposition across available worker threads.
250+
251+
##### Threadpool validation
245252
The `_ONEDNN_TEST_THREADPOOL_IMPL` CMake variable controls which of the three
246-
threadpool implementations would be used for testing: `STANDALONE`, `TBB`, or
247-
`EIGEN`. The latter two require also passing `TBBROOT` or `Eigen3_DIR` paths
248-
to CMake. For example:
253+
threadpool implementations would be used for testing: `STANDALONE`, `TBB`,
254+
`EIGEN`, `EIGEN_ASYNC`.
255+
256+
The `TBB` requires passing `TBBROOT` for CMake to find a package.
257+
258+
The `EIGEN` requires Eigen 5.0 or higher and Abseil-CPP packages to be
259+
discoverable by CMake.
249260

261+
The `EIGEN_ASYNC` has same requirements as `EIGEN` and additionally requires
262+
OpenXLA threadpool package, however, additional actions might be required to
263+
compile tests since this threadpool implementation relies on internal OpenXLA
264+
headers.
265+
266+
For example:
250267
~~~sh
251-
$ cmake -DONEDNN_CPU_RUNTIME=THREADPOOL -D_ONEDNN_TEST_THREADPOOL_IMPL=EIGEN -DEigen3_DIR=/path/to/eigen/share/eigen3/cmake ..
268+
$ cmake -DONEDNN_CPU_RUNTIME=THREADPOOL -D_ONEDNN_TEST_THREADPOOL_IMPL=EIGEN -DCMAKE_PREFIX_PATH="/path/to/eigen/share/eigen3/cmake;/path/to/absl/lib64/cmake" ..
252269
~~~
253270

254-
Threadpool threading support is experimental and has the same limitations as
255-
TBB plus more:
256-
* As threadpools are attached to streams which are only passed during
257-
primitive execution, work decomposition is performed statically at the
258-
primitive creation time. At the primitive execution time, the threadpool is
259-
responsible for balancing the static decomposition from the previous item
260-
across available worker threads.
261-
262271
### AArch64 Options
263272

264273
oneDNN includes experimental support for Arm 64-bit Architecture (AArch64).

tests/test_thread.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,84 @@ class threadpool_t : public dnnl::threadpool_interop::threadpool_iface {
195195
} // namespace testing
196196
} // namespace dnnl
197197

198+
#elif defined(DNNL_TEST_THREADPOOL_USE_EIGEN_ASYNC)
199+
200+
// absl sources define its own version of `CHECK` macro. oneDNN's version is not
201+
// needed further the file, thus, disable it for compilation reason.
202+
#undef CHECK
203+
204+
#define EIGEN_USE_THREADS
205+
#include "Eigen/ThreadPool"
206+
207+
#include "xla/backends/cpu/runtime/work_queue.h"
208+
#include "xla/tsl/concurrency/async_value_ref.h"
209+
#include "xla/tsl/concurrency/chain.h"
210+
211+
#include <cstddef>
212+
#include <cstdint>
213+
#include <functional>
214+
215+
namespace dnnl {
216+
namespace testing {
217+
218+
static tsl::AsyncValueRef<tsl::Chain> OkDoneEventSingleton() {
219+
static std::unique_ptr<tsl::AsyncValueOwningRef<tsl::Chain>> singleton =
220+
[] {
221+
static auto storage = std::make_unique<
222+
tsl::internal::AsyncValueStorage<tsl::Chain>>();
223+
return std::make_unique<tsl::AsyncValueOwningRef<tsl::Chain>>(
224+
tsl::MakeAvailableAsyncValueRef<tsl::Chain>(*storage));
225+
}();
226+
return singleton->AsRef();
227+
}
228+
229+
class threadpool_t : public dnnl::threadpool_interop::threadpool_iface {
230+
private:
231+
// Original `OneDnnThreadPool` at
232+
// `xla/backends/cpu/runtime/onednn/onednn_threadpool.h` takes
233+
// `Eigen::ThreadPoolInterface` instead. Since `Eigen::ThreadPool` is
234+
// a parent class, which is an alias to `NonBlockingThreadPool`, it fits
235+
// the need.
236+
std::unique_ptr<Eigen::ThreadPool> thread_pool_;
237+
238+
// Async value that signals completion of the last scheduled parallel loop.
239+
// This is used only when is_async_ is true.
240+
tsl::AsyncValueRef<tsl::Chain> done_event_;
241+
242+
public:
243+
explicit threadpool_t(int num_threads = 0) {
244+
if (num_threads <= 0) num_threads = read_num_threads_from_env();
245+
thread_pool_.reset(new Eigen::ThreadPool(num_threads));
246+
done_event_ = OkDoneEventSingleton();
247+
}
248+
int get_num_threads() const override { return thread_pool_->NumThreads(); }
249+
bool get_in_parallel() const override { return false; }
250+
uint64_t get_flags() const override { return ASYNCHRONOUS; }
251+
void parallel_for(int n, const std::function<void(int, int)> &fn) override {
252+
// If we are using oneDNN with async support, we need to schedule the
253+
// parallel loop using the done_event_. This allows us to return
254+
// immediately and not block the caller thread.
255+
auto parallelize = [this, n, fn](tsl::Chain) {
256+
return xla::cpu::Worker::Parallelize(thread_pool_.get(),
257+
thread_pool_->NumThreads(), n,
258+
[fn, n](size_t i) { fn(static_cast<int>(i), n); });
259+
};
260+
261+
done_event_ = done_event_.FlatMap(parallelize);
262+
}
263+
void wait() override {
264+
// While performing asynchronous execution, wait() method is needed to
265+
// notify the user that the output is ready. oneDNN will not call wait()
266+
// inside the library to avoid deadlock.
267+
tsl::BlockUntilReady(done_event_);
268+
}
269+
270+
tsl::AsyncValueRef<tsl::Chain> done_event() const { return done_event_; }
271+
};
272+
273+
} // namespace testing
274+
} // namespace dnnl
275+
198276
#elif defined(DNNL_TEST_THREADPOOL_USE_TBB)
199277
#include "tbb/parallel_for.h"
200278
#include "tbb/task_arena.h"

0 commit comments

Comments
 (0)