Skip to content

Commit

Permalink
Move sterf to CPU; add experimental parallelism for sterf
Browse files Browse the repository at this point in the history
  • Loading branch information
m.dvizov committed Aug 22, 2022
1 parent ca6b50d commit def0e2a
Show file tree
Hide file tree
Showing 6 changed files with 820 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)

option(ROCSOLVER_EMBED_FMT "Hide libfmt symbols" OFF)
option(OPTIMAL "Build specialized kernels for small matrix sizes" ON)
option(HYBRID_CPU "Build hybrid schema with CPU using" ON)
option(EXPERIMENTAL "Experimental parallelization" OFF)
option(ROCSOLVER_FIND_PACKAGE_LAPACK_CONFIG "Skip module mode search for LAPACK" ON)

# Add our CMake helper files to the lookup path
Expand Down
9 changes: 9 additions & 0 deletions library/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@ if(OPTIMAL)
target_compile_definitions(rocsolver PRIVATE OPTIMAL)
endif()

if(HYBRID_CPU)
target_compile_definitions(rocsolver PRIVATE HYBRID_CPU)
endif()

if(EXPERIMENTAL)
target_compile_definitions(rocsolver PRIVATE EXPERIMENTAL)
endif()


target_compile_definitions(rocsolver PRIVATE
ROCM_USE_FLOAT16
ROCBLAS_INTERNAL_API
Expand Down
21 changes: 21 additions & 0 deletions library/src/auxiliary/rocauxiliary_sterf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,26 @@ rocblas_status
rocblas_stride strideE = 0;
rocblas_int batch_count = 1;

#ifdef EXPERIMENTAL
// additional memory for internal kernels (parallel sterf)
size_t size_ranges;
rocsolver_sterf_parallel_getMemorySize<T>(n, &size_ranges);

if(rocblas_is_device_memory_size_query(handle))
return rocblas_set_optimal_device_memory_size(handle, size_ranges);

// memory workspace allocation
void* ranges;
rocblas_device_malloc mem_range(handle, size_ranges);
if(!mem_range)
return rocblas_status_memory_error;

ranges = mem_range[0];

// execution
return rocsolver_sterf_template<T>(handle, n, D, shiftD, strideD, E, shiftE, strideE, info,
batch_count, (rocblas_int*)ranges);
#else
// memory workspace sizes:
// size for lasrt stack
size_t size_stack;
Expand All @@ -46,6 +66,7 @@ rocblas_status
// execution
return rocsolver_sterf_template<T>(handle, n, D, shiftD, strideD, E, shiftE, strideE, info,
batch_count, (rocblas_int*)stack);
#endif
}

/*
Expand Down
Loading

0 comments on commit def0e2a

Please sign in to comment.