From 61d518cc75dbca4be1a5a6e329cfbaea8bb34a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20J=C3=A4=C3=A4skel=C3=A4inen?= <pekka.jaaskelainen@parmance.com> Date: Mon, 16 Oct 2017 17:39:05 +0200 Subject: [PATCH 1/4] [HSABE] indexing of program scope functions Previously the binary image index contained only kernels while program scope functions are needed for indirect call implementations: The HSA Runtime API can be used to query their addresses. This patch mainly renames the corresponding structs and APIs that have 'kernel' in their name to 'function' and adds a separate flag to the function_index (ex. kernel_index) for separating host-callable kernels from program scope functions (of which addresses can be queried by the HSA runtime). --- gcc/hsa-brig.c | 282 ++++++++++++++++++++++-------------- gcc/hsa-common.c | 123 +++++++++------- gcc/hsa-common.h | 41 ++++-- gcc/hsa-gen.c | 28 ++-- gcc/ipa-hsa.c | 8 +- gcc/omp-expand.c | 6 +- libgomp/plugin/plugin-hsa.c | 167 ++++++++++++--------- 7 files changed, 394 insertions(+), 261 deletions(-) diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index d15ce261ed2..82b21b2c0cb 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -2144,86 +2144,128 @@ hsa_output_global_variables () return global_vars_table; } -/* Create __hsa_host_functions and __hsa_kernels that contain - all informations consumed by libgomp to register all kernels - in the BRIG binary. */ +/* Create __hsa_host_functions and __hsa_functions that contain all information + consumed by libgomp to register all HSA functions stored in the BRIG binary. + If HOST_FUNC_TABLE is non-NULL, then fill it with declarations of all CPU + functions that have HSA implementations and FUNCTIONS with information + about functions that have no CPU counterpart. If HOST_FUNC_TABLE is NULL then + ignore it and fill in FUNCTIONS with information about functions that do not + have a CPU counterpart. In both cases, return the number of descriptors + stored in the one or both of the tables. */ -static void -hsa_output_kernels (tree *host_func_table, tree *kernels) +static unsigned +hsa_output_functions (tree *host_func_table, tree *functions) { - unsigned map_count = hsa_get_number_decl_kernel_mappings (); - - tree int_num_of_kernels; - int_num_of_kernels = build_int_cst (uint32_type_node, map_count); - tree kernel_num_index_type = build_index_type (int_num_of_kernels); - tree host_functions_array_type = build_array_type (ptr_type_node, - kernel_num_index_type); - TYPE_ARTIFICIAL (host_functions_array_type) = 1; + char tmp_name[64]; + unsigned map_count = hsa_get_number_decl_function_mappings (); - vec<constructor_elt, va_gc> *host_functions_vec = NULL; + unsigned out_count = 0; for (unsigned i = 0; i < map_count; ++i) + if (host_func_table) + { + if (hsa_get_host_function (hsa_get_decl_function_mapping_decl (i))) + out_count++; + } + else + { + if (!hsa_get_host_function (hsa_get_decl_function_mapping_decl (i))) + out_count++; + } + + tree int_num_of_functions = build_int_cst (uint32_type_node, out_count); + tree function_num_index_type = build_index_type (int_num_of_functions); + + if (host_func_table) { - tree decl = hsa_get_decl_kernel_mapping_decl (i); - tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl)); - CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); + tree host_functions_array_type + = build_array_type (ptr_type_node, function_num_index_type); + TYPE_ARTIFICIAL (host_functions_array_type) = 1; + + vec<constructor_elt, va_gc> *host_functions_vec = NULL; + for (unsigned i = 0; i < map_count; ++i) + { + tree decl = hsa_get_decl_function_mapping_decl (i); + tree host_fn = hsa_get_host_function (decl); + if (!host_fn) + continue; + host_fn = build_fold_addr_expr (host_fn); + CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); + } + tree host_functions_ctor = build_constructor (host_functions_array_type, + host_functions_vec); + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); + tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + host_functions_array_type); + TREE_STATIC (hsa_host_func_table) = 1; + TREE_READONLY (hsa_host_func_table) = 1; + TREE_PUBLIC (hsa_host_func_table) = 0; + DECL_ARTIFICIAL (hsa_host_func_table) = 1; + DECL_IGNORED_P (hsa_host_func_table) = 1; + DECL_EXTERNAL (hsa_host_func_table) = 0; + TREE_CONSTANT (hsa_host_func_table) = 1; + DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; + varpool_node::finalize_decl (hsa_host_func_table); + *host_func_table = hsa_host_func_table; } - tree host_functions_ctor = build_constructor (host_functions_array_type, - host_functions_vec); - char tmp_name[64]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); - tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - host_functions_array_type); - TREE_STATIC (hsa_host_func_table) = 1; - TREE_READONLY (hsa_host_func_table) = 1; - TREE_PUBLIC (hsa_host_func_table) = 0; - DECL_ARTIFICIAL (hsa_host_func_table) = 1; - DECL_IGNORED_P (hsa_host_func_table) = 1; - DECL_EXTERNAL (hsa_host_func_table) = 0; - TREE_CONSTANT (hsa_host_func_table) = 1; - DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; - varpool_node::finalize_decl (hsa_host_func_table); - *host_func_table = hsa_host_func_table; - - /* Following code emits list of kernel_info structures. */ - - tree kernel_info_type = make_node (RECORD_TYPE); + + /* Following code emits a list of function_info structures. */ + + tree func_info_type = make_node (RECORD_TYPE); tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("name"), ptr_type_node); DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("omp_data_size"), unsigned_type_node); DECL_CHAIN (id_f2) = id_f1; + tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("gridified_kernel_p"), + get_identifier ("kernel_p"), boolean_type_node); DECL_CHAIN (id_f3) = id_f2; + tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernel_dependencies_count"), - unsigned_type_node); + get_identifier ("gridified_kernel_p"), + boolean_type_node); DECL_CHAIN (id_f4) = id_f3; + tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_dependencies_count"), + unsigned_type_node); + DECL_CHAIN (id_f5) = id_f4; + + tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("kernel_dependencies"), build_pointer_type (build_pointer_type (char_type_node))); - DECL_CHAIN (id_f5) = id_f4; - finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, + DECL_CHAIN (id_f6) = id_f5; + + finish_builtin_struct (func_info_type, "__hsa_function_info", id_f6, NULL_TREE); - int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); - tree kernel_info_vector_type - = build_array_type (kernel_info_type, - build_index_type (int_num_of_kernels)); - TYPE_ARTIFICIAL (kernel_info_vector_type) = 1; + tree func_info_vector_type + = build_array_type (func_info_type, function_num_index_type); + TYPE_ARTIFICIAL (func_info_vector_type) = 1; - vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL; - tree kernel_dependencies_vector_type = NULL; + vec<constructor_elt, va_gc> *func_info_vector_vec = NULL; + tree function_dependencies_vector_type = NULL; for (unsigned i = 0; i < map_count; ++i) { - tree kernel = hsa_get_decl_kernel_mapping_decl (i); - char *name = hsa_get_decl_kernel_mapping_name (i); + tree decl = hsa_get_decl_function_mapping_decl (i); + tree host_fn = hsa_get_host_function (decl); + if (host_func_table) + { + if (!host_fn) + continue; + } + else if (host_fn) + continue; + + tree func = hsa_get_decl_function_mapping_decl (i); + char *name = hsa_get_decl_function_mapping_name (i); unsigned len = strlen (name); char *copy = XNEWVEC (char, len + 2); copy[0] = '&'; @@ -2231,31 +2273,34 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) copy[len + 1] = '\0'; len++; - tree kern_name = build_string (len, copy); - TREE_TYPE (kern_name) + tree func_name = build_string (len, copy); + TREE_TYPE (func_name) = build_array_type (char_type_node, build_index_type (size_int (len))); free (copy); unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); - bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); + bool kernel_p = hsa_get_decl_function_mapping_kernel_p (i); + tree kernel_p_tree = build_int_cstu (boolean_type_node, kernel_p); + bool gridified_kernel_p + = hsa_get_decl_function_mapping_gridified_p (i); tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, gridified_kernel_p); unsigned count = 0; - vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL; - if (hsa_decl_kernel_dependencies) + vec<constructor_elt, va_gc> *function_dependencies_vec = NULL; + if (hsa_decl_function_dependencies) { vec<const char *> **slot; - slot = hsa_decl_kernel_dependencies->get (kernel); + slot = hsa_decl_function_dependencies->get (func); if (slot) { vec <const char *> *dependencies = *slot; count = dependencies->length (); - kernel_dependencies_vector_type + function_dependencies_vector_type = build_array_type (build_pointer_type (char_type_node), build_index_type (size_int (count))); - TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1; + TYPE_ARTIFICIAL (function_dependencies_vector_type) = 1; for (unsigned j = 0; j < count; j++) { @@ -2267,7 +2312,7 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) build_index_type (size_int (len))); CONSTRUCTOR_APPEND_ELT - (kernel_dependencies_vec, NULL_TREE, + (function_dependencies_vec, NULL_TREE, build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (dependency_name)), dependency_name)); @@ -2277,24 +2322,25 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) tree dependencies_count = build_int_cstu (unsigned_type_node, count); - vec<constructor_elt, va_gc> *kernel_info_vec = NULL; - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + vec<constructor_elt, va_gc> *func_info_vec = NULL; + CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE - (kern_name)), - kern_name)); - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + (func_name)), + func_name)); + CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, omp_data_size); + CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, kernel_p_tree); + CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, gridified_kernel_p_tree); - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); + CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, dependencies_count); if (count > 0) { ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); - gcc_checking_assert (kernel_dependencies_vector_type); + gcc_checking_assert (function_dependencies_vector_type); tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (tmp_name), - kernel_dependencies_vector_type); + function_dependencies_vector_type); TREE_STATIC (dependencies_list) = 1; TREE_READONLY (dependencies_list) = 1; @@ -2304,57 +2350,63 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) DECL_EXTERNAL (dependencies_list) = 0; TREE_CONSTANT (dependencies_list) = 1; DECL_INITIAL (dependencies_list) - = build_constructor (kernel_dependencies_vector_type, - kernel_dependencies_vec); + = build_constructor (function_dependencies_vector_type, + function_dependencies_vec); varpool_node::finalize_decl (dependencies_list); - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (dependencies_list)), dependencies_list)); } else - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); + CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, null_pointer_node); - tree kernel_info_ctor = build_constructor (kernel_info_type, - kernel_info_vec); + tree func_info_ctor = build_constructor (func_info_type, + func_info_vec); - CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, - kernel_info_ctor); + CONSTRUCTOR_APPEND_ELT (func_info_vector_vec, NULL_TREE, + func_info_ctor); } - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1); - tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - kernel_info_vector_type); - - TREE_STATIC (hsa_kernels) = 1; - TREE_READONLY (hsa_kernels) = 1; - TREE_PUBLIC (hsa_kernels) = 0; - DECL_ARTIFICIAL (hsa_kernels) = 1; - DECL_IGNORED_P (hsa_kernels) = 1; - DECL_EXTERNAL (hsa_kernels) = 0; - TREE_CONSTANT (hsa_kernels) = 1; - DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type, - kernel_info_vector_vec); - varpool_node::finalize_decl (hsa_kernels); - *kernels = hsa_kernels; + /* __hsa_mapped_functions are functions or kernels which have a matching + host function. __hsa_only_functions are functions with only HSA versions + available. */ + ASM_GENERATE_INTERNAL_LABEL (tmp_name, + host_func_table ? "__hsa_host_mapped_functions" + : "__hsa_only_functions", 1); + tree hsa_functions = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + func_info_vector_type); + + TREE_STATIC (hsa_functions) = 1; + TREE_READONLY (hsa_functions) = 1; + TREE_PUBLIC (hsa_functions) = 0; + DECL_ARTIFICIAL (hsa_functions) = 1; + DECL_IGNORED_P (hsa_functions) = 1; + DECL_EXTERNAL (hsa_functions) = 0; + TREE_CONSTANT (hsa_functions) = 1; + DECL_INITIAL (hsa_functions) = build_constructor (func_info_vector_type, + func_info_vector_vec); + varpool_node::finalize_decl (hsa_functions); + *functions = hsa_functions; + return out_count; } -/* Create a static constructor that will register out brig stuff with +/* Create a static constructor that will register our BRIG indexes with libgomp. */ static void hsa_output_libgomp_mapping (tree brig_decl) { - unsigned kernel_count = hsa_get_number_decl_kernel_mappings (); unsigned global_variable_count = hsa_global_variable_symbols->elements (); - tree kernels; + tree hsa_funcs; tree host_func_table; - hsa_output_kernels (&host_func_table, &kernels); + unsigned hsa_func_count = hsa_output_functions (&host_func_table, + &hsa_funcs); tree global_vars = hsa_output_global_variables (); tree hsa_image_desc_type = make_node (RECORD_TYPE); @@ -2362,12 +2414,12 @@ hsa_output_libgomp_mapping (tree brig_decl) get_identifier ("brig_module"), ptr_type_node); DECL_CHAIN (id_f1) = NULL_TREE; tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernel_count"), + get_identifier ("mapped_function_count"), unsigned_type_node); DECL_CHAIN (id_f2) = id_f1; tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("hsa_kernel_infos"), + get_identifier ("omp_kernel_infos"), ptr_type_node); DECL_CHAIN (id_f3) = id_f2; tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, @@ -2378,7 +2430,16 @@ hsa_output_libgomp_mapping (tree brig_decl) get_identifier ("hsa_global_variable_infos"), ptr_type_node); DECL_CHAIN (id_f5) = id_f4; - finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5, + tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("hsa_only_function_count"), + unsigned_type_node); + DECL_CHAIN (id_f6) = id_f5; + tree id_f7 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("hsa_only_function_infos"), + ptr_type_node); + DECL_CHAIN (id_f7) = id_f6; + + finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f7, NULL_TREE); TYPE_ARTIFICIAL (hsa_image_desc_type) = 1; @@ -2386,11 +2447,11 @@ hsa_output_libgomp_mapping (tree brig_decl) CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, build_fold_addr_expr (brig_decl)); CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, - build_int_cstu (unsigned_type_node, kernel_count)); + build_int_cstu (unsigned_type_node, hsa_func_count)); CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, build1 (ADDR_EXPR, - build_pointer_type (TREE_TYPE (kernels)), - kernels)); + build_pointer_type (TREE_TYPE (hsa_funcs)), + hsa_funcs)); CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, build_int_cstu (unsigned_type_node, global_variable_count)); @@ -2399,6 +2460,15 @@ hsa_output_libgomp_mapping (tree brig_decl) build_pointer_type (TREE_TYPE (global_vars)), global_vars)); + unsigned hsa_only_func_count = hsa_output_functions (NULL, &hsa_funcs); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build_int_cstu (unsigned_type_node, + hsa_only_func_count)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (hsa_funcs)), + hsa_funcs)); + tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec); char tmp_name[64]; @@ -2425,7 +2495,7 @@ hsa_output_libgomp_mapping (tree brig_decl) CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, host_func_table_addr); offset_int func_table_size - = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count; + = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * hsa_func_count; CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (host_func_table_addr), @@ -2599,7 +2669,7 @@ hsa_output_brig (void) hsa_output_libgomp_mapping (brig_decl); - hsa_free_decl_kernel_mapping (); + hsa_free_decl_function_mapping (); brig_release_data (); hsa_deinit_compilation_unit_data (); diff --git a/gcc/hsa-common.c b/gcc/hsa-common.c index c8c12afb04c..4f0b88fde5b 100644 --- a/gcc/hsa-common.c +++ b/gcc/hsa-common.c @@ -47,29 +47,31 @@ along with GCC; see the file COPYING3. If not see function. */ class hsa_function_representation *hsa_cfun; -/* Element of the mapping vector between a host decl and an HSA kernel. */ +/* Element of the mapping vector between a host decl and an HSA function. */ -struct GTY(()) hsa_decl_kernel_map_element +struct GTY(()) hsa_decl_function_map_element { /* The decl of the host function. */ tree decl; - /* Name of the HSA kernel in BRIG. */ + /* Name of the HSA function in BRIG. */ char * GTY((skip)) name; /* Size of OMP data, if the kernel contains a kernel dispatch. */ unsigned omp_data_size; - /* True if the function is gridified kernel. */ + /* True if the function is a host-callable kernel. */ + bool kernel_p; + /* True if the function is a gridified kernel. */ bool gridified_kernel_p; }; /* Mapping between decls and corresponding HSA kernels in this compilation unit. */ -static GTY (()) vec<hsa_decl_kernel_map_element, va_gc> - *hsa_decl_kernel_mapping; +static GTY (()) vec<hsa_decl_function_map_element, va_gc> + *hsa_decl_function_mapping; /* Mapping between decls and corresponding HSA kernels called by the function. */ -hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; +hash_map <tree, vec <const char *> *> *hsa_decl_function_dependencies; /* Hash function to lookup a symbol for a decl. */ hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols; @@ -649,87 +651,97 @@ hsa_destroy_operand (hsa_op_base *op) op->~hsa_op_base (); } -/* Create a mapping between the original function DECL and kernel name NAME. */ +/* Create a mapping between the original function DECL and HSA function + named NAME. */ void -hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size, - bool gridified_kernel_p) +hsa_add_function_decl_mapping (tree decl, char *name, unsigned omp_data_size, + struct hsa_function_summary *s) { - hsa_decl_kernel_map_element dkm; + hsa_decl_function_map_element dkm; dkm.decl = decl; dkm.name = name; dkm.omp_data_size = omp_data_size; - dkm.gridified_kernel_p = gridified_kernel_p; - vec_safe_push (hsa_decl_kernel_mapping, dkm); + dkm.kernel_p = s->m_kind == HSA_KERNEL; + dkm.gridified_kernel_p = s->m_gridified_kernel_p; + vec_safe_push (hsa_decl_function_mapping, dkm); } -/* Return the number of kernel decl name mappings. */ +/* Return the number of function decl name mappings. */ unsigned -hsa_get_number_decl_kernel_mappings (void) +hsa_get_number_decl_function_mappings (void) { - return vec_safe_length (hsa_decl_kernel_mapping); + return vec_safe_length (hsa_decl_function_mapping); } -/* Return the decl in the Ith kernel decl name mapping. */ +/* Return the decl in the Ith function decl name mapping. */ tree -hsa_get_decl_kernel_mapping_decl (unsigned i) +hsa_get_decl_function_mapping_decl (unsigned i) { - return (*hsa_decl_kernel_mapping)[i].decl; + return (*hsa_decl_function_mapping)[i].decl; } -/* Return the name in the Ith kernel decl name mapping. */ +/* Return the name in the Ith function decl name mapping. */ char * -hsa_get_decl_kernel_mapping_name (unsigned i) +hsa_get_decl_function_mapping_name (unsigned i) { - return (*hsa_decl_kernel_mapping)[i].name; + return (*hsa_decl_function_mapping)[i].name; } -/* Return maximum OMP size for kernel decl name mapping. */ +/* Return maximum OMP size for function decl name mapping. */ unsigned hsa_get_decl_kernel_mapping_omp_size (unsigned i) { - return (*hsa_decl_kernel_mapping)[i].omp_data_size; + return (*hsa_decl_function_mapping)[i].omp_data_size; } -/* Return if the function is gridified kernel in decl name mapping. */ +/* Return if the function is a gridified kernel in decl name mapping. */ bool -hsa_get_decl_kernel_mapping_gridified (unsigned i) +hsa_get_decl_function_mapping_gridified_p (unsigned i) { - return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p; + return (*hsa_decl_function_mapping)[i].gridified_kernel_p; } -/* Free the mapping between original decls and kernel names. */ +/* Return true if the function is a host-callable kernel. */ + +bool +hsa_get_decl_function_mapping_kernel_p (unsigned i) +{ + return (*hsa_decl_function_mapping)[i].kernel_p; +} + +/* Free the mapping between original decls and function names. */ void -hsa_free_decl_kernel_mapping (void) +hsa_free_decl_function_mapping (void) { - if (hsa_decl_kernel_mapping == NULL) + if (hsa_decl_function_mapping == NULL) return; - for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i) - free ((*hsa_decl_kernel_mapping)[i].name); - ggc_free (hsa_decl_kernel_mapping); + for (unsigned i = 0; i < hsa_decl_function_mapping->length (); ++i) + free ((*hsa_decl_function_mapping)[i].name); + ggc_free (hsa_decl_function_mapping); } -/* Add new kernel dependency. */ +/* Add new function dependency. */ void -hsa_add_kernel_dependency (tree caller, const char *called_function) +hsa_add_function_dependency (tree caller, const char *called_function) { - if (hsa_decl_kernel_dependencies == NULL) - hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> (); + if (hsa_decl_function_dependencies == NULL) + hsa_decl_function_dependencies = new hash_map<tree, vec<const char *> *> (); vec <const char *> *s = NULL; - vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller); + vec <const char *> **slot = hsa_decl_function_dependencies->get (caller); if (slot == NULL) { s = new vec <const char *> (); - hsa_decl_kernel_dependencies->put (caller, s); + hsa_decl_function_dependencies->put (caller, s); } else s = *slot; @@ -816,8 +828,8 @@ hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, gpu_summary->m_kind = kind; host_summary->m_kind = kind; - gpu_summary->m_gpu_implementation_p = true; - host_summary->m_gpu_implementation_p = false; + gpu_summary->m_hsa_implementation_p = true; + host_summary->m_hsa_implementation_p = false; gpu_summary->m_gridified_kernel_p = gridified_kernel_p; host_summary->m_gridified_kernel_p = gridified_kernel_p; @@ -827,32 +839,45 @@ hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, process_gpu_implementation_attributes (gpu->decl); - /* Create reference between a kernel and a corresponding host implementation - to quarantee LTO streaming to a same LTRANS. */ - if (kind == HSA_KERNEL) - gpu->create_reference (host, IPA_REF_ADDR); + /* Create reference between an HSA function and a corresponding host + implementation to quarantee LTO streaming to a same LTRANS. */ + gpu->create_reference (host, IPA_REF_ADDR); +} + +void +hsa_summary_t::mark_hsa_only_implementation (cgraph_node *node, + hsa_function_kind kind) +{ + hsa_function_summary *gpu_summary = get (node); + gpu_summary->m_kind = kind; + gpu_summary->m_hsa_implementation_p = true; + gcc_assert (!gpu_summary->m_bound_function); + process_gpu_implementation_attributes (node->decl); } /* Add a HOST function to HSA summaries. */ void -hsa_register_kernel (cgraph_node *host) +hsa_register_function (cgraph_node *host, bool kernel_p) { if (hsa_summaries == NULL) hsa_summaries = new hsa_summary_t (symtab); hsa_function_summary *s = hsa_summaries->get (host); - s->m_kind = HSA_KERNEL; + s->m_kind = kernel_p ? HSA_KERNEL : HSA_FUNCTION; } /* Add a pair of functions to HSA summaries. GPU is an HSA implementation of a HOST function. */ void -hsa_register_kernel (cgraph_node *gpu, cgraph_node *host) +hsa_register_function (cgraph_node *gpu, cgraph_node *host, bool kernel_p) { if (hsa_summaries == NULL) hsa_summaries = new hsa_summary_t (symtab); - hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true); + if (kernel_p) + hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true); + else + hsa_summaries->link_functions (gpu, host, HSA_FUNCTION, true); } /* Return true if expansion of the current HSA function has already failed. */ diff --git a/gcc/hsa-common.h b/gcc/hsa-common.h index 3075163a020..9df3a5f6442 100644 --- a/gcc/hsa-common.h +++ b/gcc/hsa-common.h @@ -1223,11 +1223,14 @@ struct hsa_function_summary /* Pointer to a cgraph node which is a HSA implementation of the function. In case of the function is a HSA function, the bound function points - to the host function. */ + to the host function. + This can also be NULL if there is no counterpart, which can happen for GPU + implementations if they are functions marked with hsa_kernel or + hsa_function attributes. */ cgraph_node *m_bound_function; - /* Identifies if the function is an HSA function or a host function. */ - bool m_gpu_implementation_p; + /* Identifies if the function is an HSA function. */ + bool m_hsa_implementation_p; /* True if the function is a gridified kernel. */ bool m_gridified_kernel_p; @@ -1235,7 +1238,7 @@ struct hsa_function_summary inline hsa_function_summary::hsa_function_summary (): m_kind (HSA_NONE), - m_bound_function (NULL), m_gpu_implementation_p (false) + m_bound_function (NULL), m_hsa_implementation_p (false) { } @@ -1254,6 +1257,13 @@ public: void link_functions (cgraph_node *gpu, cgraph_node *host, hsa_function_kind kind, bool gridified_kernel_p); + /* Mark a specific function NODE as a standalone HSA implementation (that has + no CPU counterpart). KIND determines whether this is a host-invokable + kernel or an agent-callable function. */ + + void mark_hsa_only_implementation (cgraph_node *node, + hsa_function_kind kind); + private: void process_gpu_implementation_attributes (tree gdecl); }; @@ -1314,7 +1324,7 @@ hsa_internal_fn_hasher::equal (const value_type a, const compare_type b) /* in hsa-common.c */ extern struct hsa_function_representation *hsa_cfun; -extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; +extern hash_map <tree, vec <const char *> *> *hsa_decl_function_dependencies; extern hsa_summary_t *hsa_summaries; extern hsa_symbol *hsa_num_threads; extern unsigned hsa_kernel_calls_counter; @@ -1343,13 +1353,15 @@ BrigAlignment8_t hsa_object_alignment (tree t); unsigned hsa_byte_alignment (BrigAlignment8_t alignment); void hsa_destroy_operand (hsa_op_base *op); void hsa_destroy_insn (hsa_insn_basic *insn); -void hsa_add_kern_decl_mapping (tree decl, char *name, unsigned, bool); -unsigned hsa_get_number_decl_kernel_mappings (void); -tree hsa_get_decl_kernel_mapping_decl (unsigned i); -char *hsa_get_decl_kernel_mapping_name (unsigned i); +void hsa_add_function_decl_mapping (tree decl, char *name, unsigned, + struct hsa_function_summary *s); +unsigned hsa_get_number_decl_function_mappings (void); +tree hsa_get_decl_function_mapping_decl (unsigned i); +char *hsa_get_decl_function_mapping_name (unsigned i); unsigned hsa_get_decl_kernel_mapping_omp_size (unsigned i); -bool hsa_get_decl_kernel_mapping_gridified (unsigned i); -void hsa_free_decl_kernel_mapping (void); +bool hsa_get_decl_function_mapping_gridified_p (unsigned i); +bool hsa_get_decl_function_mapping_kernel_p (unsigned i); +void hsa_free_decl_function_mapping (void); tree *hsa_get_ctor_statements (void); tree *hsa_get_dtor_statements (void); tree *hsa_get_kernel_dispatch_type (void); @@ -1357,8 +1369,9 @@ void hsa_add_kernel_dependency (tree caller, const char *called_function); void hsa_sanitize_name (char *p); char *hsa_brig_function_name (const char *p); const char *hsa_get_declaration_name (tree decl); -void hsa_register_kernel (cgraph_node *host); -void hsa_register_kernel (cgraph_node *gpu, cgraph_node *host); +void hsa_register_function (cgraph_node *host, bool kernel_p); +void hsa_register_function (cgraph_node *gpu, cgraph_node *host, + bool kernel_p); bool hsa_seen_error (void); void hsa_fail_cfun (void); @@ -1409,7 +1422,7 @@ hsa_gpu_implementation_p (tree decl) hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl)); - return s->m_gpu_implementation_p; + return s->m_hsa_implementation_p; } #endif /* HSA_H */ diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index b5a8c73731a..9d681a8bef3 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -233,11 +233,6 @@ hsa_function_representation::hsa_function_representation (hsa_internal_fn *fn) hsa_function_representation::~hsa_function_representation () { - /* Kernel names are deallocated at the end of BRIG output when deallocating - hsa_decl_kernel_mapping. */ - if (!m_kern_p || m_seen_error) - free (m_name); - for (unsigned i = 0; i < m_input_args.length (); i++) delete m_input_args[i]; m_input_args.release (); @@ -952,7 +947,9 @@ get_symbol_for_decl (tree decl) } /* For a given HSA function declaration, return a host - function declaration. */ + function declaration, NULL if the declaration is an HSA-only + function. +*/ tree hsa_get_host_function (tree decl) @@ -960,7 +957,10 @@ hsa_get_host_function (tree decl) hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl)); gcc_assert (s->m_kind != HSA_NONE); - gcc_assert (s->m_gpu_implementation_p); + gcc_assert (s->m_hsa_implementation_p); + + if (s->m_kind == HSA_KERNEL) + return NULL; return s->m_bound_function ? s->m_bound_function->decl : NULL; } @@ -974,7 +974,7 @@ get_brig_function_name (tree decl) hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (d)); if (s->m_kind != HSA_NONE - && s->m_gpu_implementation_p + && s->m_hsa_implementation_p && s->m_bound_function) d = s->m_bound_function->decl; @@ -5877,7 +5877,7 @@ init_prologue (void) hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); /* Create a magic number that is going to be printed by libgomp. */ - unsigned index = hsa_get_number_decl_kernel_mappings (); + unsigned index = hsa_get_number_decl_function_mappings (); /* Emit store to debug argument. */ if (PARAM_VALUE (PARAM_HSA_GEN_DEBUG_STORES) > 0) @@ -6549,14 +6549,10 @@ generate_hsa (bool kernel) if (hsa_cfun->m_kernel_dispatch_count) init_hsa_num_threads (); - if (hsa_cfun->m_kern_p) - { - hsa_function_summary *s - = hsa_summaries->get (cgraph_node::get (hsa_cfun->m_decl)); - hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->m_name, + hsa_add_function_decl_mapping (current_function_decl, hsa_cfun->m_name, hsa_cfun->m_maximum_omp_data_size, - s->m_gridified_kernel_p); - } + hsa_summaries->get + (cgraph_node::get (hsa_cfun->m_decl))); if (flag_checking) { diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c index c02dadaa016..08948ba9b43 100644 --- a/gcc/ipa-hsa.c +++ b/gcc/ipa-hsa.c @@ -131,10 +131,10 @@ process_hsa_functions (void) while (e) { hsa_function_summary *src = hsa_summaries->get (node); - if (src->m_kind != HSA_NONE && src->m_gpu_implementation_p) + if (src->m_kind != HSA_NONE && src->m_hsa_implementation_p) { hsa_function_summary *dst = hsa_summaries->get (e->callee); - if (dst->m_kind != HSA_NONE && !dst->m_gpu_implementation_p) + if (dst->m_kind != HSA_NONE && !dst->m_hsa_implementation_p) { e->redirect_callee (dst->m_bound_function); if (dump_file) @@ -197,7 +197,7 @@ ipa_hsa_write_summary (void) bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, s->m_kind, 2); - bp_pack_value (&bp, s->m_gpu_implementation_p, 1); + bp_pack_value (&bp, s->m_hsa_implementation_p, 1); bp_pack_value (&bp, s->m_bound_function != NULL, 1); streamer_write_bitpack (&bp); if (s->m_bound_function) @@ -248,7 +248,7 @@ ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data, struct bitpack_d bp = streamer_read_bitpack (&ib_main); s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2); - s->m_gpu_implementation_p = bp_unpack_value (&bp, 1); + s->m_hsa_implementation_p = bp_unpack_value (&bp, 1); bool has_tree = bp_unpack_value (&bp, 1); if (has_tree) diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 0f45563c57c..f159d87b974 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -724,7 +724,7 @@ expand_parallel_call (struct omp_region *region, basic_block bb, && parallel_needs_hsa_kernel_p (region)) { cgraph_node *child_cnode = cgraph_node::get (child_fndecl); - hsa_register_kernel (child_cnode); + hsa_register_function (child_cnode, true); } } @@ -7770,7 +7770,7 @@ grid_expand_target_grid_body (struct omp_region *target) OMP_CLAUSE__GRIDDIM_)); cgraph_node *n = cgraph_node::get (orig_child_fndecl); - hsa_register_kernel (n); + hsa_register_function (n, true); return; } @@ -7886,7 +7886,7 @@ grid_expand_target_grid_body (struct omp_region *target) kcn->mark_force_output (); cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); - hsa_register_kernel (kcn, orig_child); + hsa_register_function (kcn, orig_child, true); cgraph_node::add_new_function (kern_fndecl, true); push_cfun (kern_cfun); diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c index fc08f5df058..fc160b37164 100644 --- a/libgomp/plugin/plugin-hsa.c +++ b/libgomp/plugin/plugin-hsa.c @@ -310,13 +310,14 @@ hsa_error (const char *str, hsa_status_t status) return false; } -struct hsa_kernel_description +struct hsa_function_description { const char *name; unsigned omp_data_size; + bool kernel_p; bool gridified_kernel_p; - unsigned kernel_dependencies_count; - const char **kernel_dependencies; + unsigned function_dependencies_count; + const char **function_dependencies; }; struct global_var_info @@ -331,26 +332,31 @@ struct global_var_info struct brig_image_desc { hsa_ext_module_t brig_module; - const unsigned kernel_count; - struct hsa_kernel_description *kernel_infos; + const unsigned function_count; + struct hsa_function_description *function_infos; const unsigned global_variable_count; struct global_var_info *global_variables; + /* Functions/kernels that do not have a host-side version. */ + const unsigned hsa_only_function_count; + struct hsa_function_description *hsa_only_function_infos; }; struct agent_info; /* Information required to identify, finalize and run any given kernel. */ -struct kernel_info -{ - /* Name of the kernel, required to locate it within the brig module. */ - const char *name; +struct function_info + { + /* Name of the function, required to locate it within the BRIG module. */ + const char *name; + /* True if the function is a kernel. */ + bool kernel_p; /* Size of memory space for OMP data. */ unsigned omp_data_size; /* The specific agent the kernel has been or will be finalized for and run on. */ struct agent_info *agent; - /* The specific module where the kernel takes place. */ + /* The module where the function resides. */ struct module_info *module; /* Mutex enforcing that at most once thread ever initializes a kernel for use. A thread should have locked agent->modules_rwlock for reading before @@ -369,7 +375,7 @@ struct kernel_info uint32_t group_segment_size; /* Required size of private segment. */ uint32_t private_segment_size; - /* List of all kernel dependencies. */ + /* List of all function dependencies. */ const char **dependencies; /* Number of dependencies. */ unsigned dependencies_count; @@ -388,11 +394,11 @@ struct module_info /* The description with which the program has registered the image. */ struct brig_image_desc *image_desc; - /* Number of kernels in this module. */ - int kernel_count; - /* An array of kernel_info structures describing each kernel in this - module. */ - struct kernel_info kernels[]; + /* Number of functions (or kernels) in this module. */ + int function_count; + /* An array of function_info structures describing each function in this + module. */ + struct function_info functions[]; }; /* Information about shared brig library. */ @@ -513,16 +519,16 @@ init_hsa_runtime_functions (void) /* Find kernel for an AGENT by name provided in KERNEL_NAME. */ -static struct kernel_info * -get_kernel_for_agent (struct agent_info *agent, const char *kernel_name) +static struct function_info * +get_function_for_agent (struct agent_info *agent, const char *func_name) { struct module_info *module = agent->first_module; while (module) { - for (unsigned i = 0; i < module->kernel_count; i++) - if (strcmp (module->kernels[i].name, kernel_name) == 0) - return &module->kernels[i]; + for (unsigned i = 0; i < module->function_count; i++) + if (strcmp (module->functions[i].name, func_name) == 0) + return &module->functions[i]; module = module->next; } @@ -835,29 +841,30 @@ destroy_hsa_program (struct agent_info *agent) for (module = agent->first_module; module; module = module->next) { int i; - for (i = 0; i < module->kernel_count; i++) - module->kernels[i].initialized = false; + for (i = 0; i < module->function_count; i++) + module->functions[i].initialized = false; } agent->prog_finalized = false; return true; } -/* Initialize KERNEL from D and other parameters. Return true on success. */ +/* Initialize FUNCTION from D and other parameters. Return true on success. */ static bool -init_basic_kernel_info (struct kernel_info *kernel, - struct hsa_kernel_description *d, - struct agent_info *agent, - struct module_info *module) +init_basic_function_info (struct function_info *function, + struct hsa_function_description *d, + struct agent_info *agent, + struct module_info *module) { - kernel->agent = agent; - kernel->module = module; - kernel->name = d->name; - kernel->omp_data_size = d->omp_data_size; - kernel->gridified_kernel_p = d->gridified_kernel_p; - kernel->dependencies_count = d->kernel_dependencies_count; - kernel->dependencies = d->kernel_dependencies; - if (pthread_mutex_init (&kernel->init_mutex, NULL)) + function->agent = agent; + function->module = module; + function->name = d->name; + function->omp_data_size = d->omp_data_size; + function->kernel_p = d->kernel_p; + function->gridified_kernel_p = d->gridified_kernel_p; + function->dependencies_count = d->function_dependencies_count; + function->dependencies = d->function_dependencies; + if (pthread_mutex_init (&function->init_mutex, NULL)) { GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex"); return false; @@ -885,8 +892,17 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct agent_info *agent; struct addr_pair *pair; struct module_info *module; - struct kernel_info *kernel; - int kernel_count = image_desc->kernel_count; + struct function_info *function; + int host_mapped_function_count = image_desc->function_count; + int hsa_only_function_count; + + if (version == GOMP_VERSION_HSA) + hsa_only_function_count = image_desc->hsa_only_function_count; + else + hsa_only_function_count = 0; + + int total_function_count + = host_mapped_function_count + hsa_only_function_count; agent = get_agent_info (ord); if (!agent) @@ -897,33 +913,46 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock"); return -1; } - if (agent->prog_finalized - && !destroy_hsa_program (agent)) + if (agent->prog_finalized && !destroy_hsa_program (agent)) return -1; - HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count); - pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair)); + HSA_DEBUG ("Encountered %d mapped and %d HSA-only functions in the " + "image\n", host_mapped_function_count, hsa_only_function_count); + pair = GOMP_PLUGIN_malloc (host_mapped_function_count * sizeof (struct addr_pair)); *target_table = pair; module = (struct module_info *) GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info) - + kernel_count * sizeof (struct kernel_info)); + + (total_function_count + * sizeof (struct function_info))); module->image_desc = image_desc; - module->kernel_count = kernel_count; + module->function_count = total_function_count; - kernel = &module->kernels[0]; + function = &module->functions[0]; - /* Allocate memory for kernel dependencies. */ - for (unsigned i = 0; i < kernel_count; i++) + for (unsigned i = 0; i < host_mapped_function_count; i++) { - pair->start = (uintptr_t) kernel; - pair->end = (uintptr_t) (kernel + 1); + pair->start = (uintptr_t) function; + pair->end = (uintptr_t) (function + 1); - struct hsa_kernel_description *d = &image_desc->kernel_infos[i]; - if (!init_basic_kernel_info (kernel, d, agent, module)) + struct hsa_function_description *d = &image_desc->function_infos[i]; + if (!init_basic_function_info (function, d, agent, module)) return -1; - kernel++; + HSA_DEBUG ("Initialized host-mapped function with name '%s' " + "to info struct %p\n", function->name, function); + function++; pair++; } + for (unsigned i = 0; i < hsa_only_function_count; i++) + { + struct hsa_function_description *d + = &image_desc->hsa_only_function_infos[i]; + if (!init_basic_function_info (function, d, agent, module)) + return -1; + HSA_DEBUG ("Initialized HSA-only function with name '%s' to info " + "struct %p\n", function->name, function); + + function++; + } add_module_to_agent (agent, module); if (pthread_rwlock_unlock (&agent->modules_rwlock)) @@ -931,7 +960,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock"); return -1; } - return kernel_count; + return host_mapped_function_count; } /* Add a shared BRIG library from a FILE_NAME to an AGENT. */ @@ -1112,7 +1141,7 @@ final: /* Create kernel dispatch data structure for given KERNEL. */ static struct GOMP_hsa_kernel_dispatch * -create_single_kernel_dispatch (struct kernel_info *kernel, +create_single_kernel_dispatch (struct function_info *kernel, unsigned omp_data_size) { struct agent_info *agent = kernel->agent; @@ -1176,7 +1205,7 @@ release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow) to calculate maximum necessary memory for OMP data allocation. */ static void -init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size) +init_single_kernel (struct function_info *kernel, unsigned *max_omp_data_size) { hsa_status_t status; struct agent_info *agent = kernel->agent; @@ -1227,8 +1256,8 @@ init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size) for (unsigned i = 0; i < kernel->dependencies_count; i++) { - struct kernel_info *dependency - = get_kernel_for_agent (agent, kernel->dependencies[i]); + struct function_info *dependency + = get_function_for_agent (agent, kernel->dependencies[i]); if (dependency == NULL) { @@ -1301,7 +1330,7 @@ print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned inden dependencies. */ static struct GOMP_hsa_kernel_dispatch * -create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) +create_kernel_dispatch (struct function_info *kernel, unsigned omp_data_size) { struct GOMP_hsa_kernel_dispatch *shadow = create_single_kernel_dispatch (kernel, omp_data_size); @@ -1310,11 +1339,11 @@ create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0; /* Create kernel dispatch data structures. We do not allow to have - a kernel dispatch with depth bigger than one. */ + a kernel dispatch with depth larger than one. */ for (unsigned i = 0; i < kernel->dependencies_count; i++) { - struct kernel_info *dependency - = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]); + struct function_info *dependency + = get_function_for_agent (kernel->agent, kernel->dependencies[i]); shadow->children_dispatches[i] = create_single_kernel_dispatch (dependency, omp_data_size); shadow->children_dispatches[i]->queue @@ -1330,7 +1359,7 @@ create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) create_and_finalize_hsa_program. */ static void -init_kernel (struct kernel_info *kernel) +init_kernel (struct function_info *kernel) { if (pthread_mutex_lock (&kernel->init_mutex)) GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex"); @@ -1445,7 +1474,7 @@ get_group_size (uint32_t ndim, uint32_t grid, uint32_t group) bool GOMP_OFFLOAD_can_run (void *fn_ptr) { - struct kernel_info *kernel = (struct kernel_info *) fn_ptr; + struct function_info *kernel = (struct function_info *) fn_ptr; struct agent_info *agent = kernel->agent; create_and_finalize_hsa_program (agent); @@ -1474,10 +1503,10 @@ packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest) } /* Run KERNEL on its agent, pass VARS to it as arguments and take - launchattributes from KLA. */ + launch attributes from KLA. */ void -run_kernel (struct kernel_info *kernel, void *vars, +run_kernel (struct function_info *kernel, void *vars, struct GOMP_kernel_launch_attributes *kla) { struct agent_info *agent = kernel->agent; @@ -1608,13 +1637,13 @@ run_kernel (struct kernel_info *kernel, void *vars, /* Part of the libgomp plugin interface. Run a kernel on device N (the number is actually ignored, we assume the FN_PTR has been mapped using the correct device) and pass it an array of pointers in VARS as a parameter. The kernel - is identified by FN_PTR which must point to a kernel_info structure. */ + is identified by FN_PTR which must point to a function_info structure. */ void GOMP_OFFLOAD_run (int n __attribute__((unused)), void *fn_ptr, void *vars, void **args) { - struct kernel_info *kernel = (struct kernel_info *) fn_ptr; + struct function_info *kernel = (struct function_info *) fn_ptr; struct GOMP_kernel_launch_attributes def; struct GOMP_kernel_launch_attributes *kla; if (!parse_target_attributes (args, &def, &kla)) @@ -1690,8 +1719,8 @@ static bool destroy_module (struct module_info *module) { int i; - for (i = 0; i < module->kernel_count; i++) - if (pthread_mutex_destroy (&module->kernels[i].init_mutex)) + for (i = 0; i < module->function_count; i++) + if (pthread_mutex_destroy (&module->functions[i].init_mutex)) { GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization " "mutex"); -- GitLab From 42d9393041d3f30a12d1ab090e6916d13bd86157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pekka=20J=C3=A4=C3=A4skel=C3=A4inen?= <pekka.jaaskelainen@parmance.com> Date: Fri, 27 Oct 2017 11:38:13 +0300 Subject: [PATCH 2/4] Fix libgomp issues found by running in Carrizo. --- gcc/hsa-gen.c | 3 --- libgomp/plugin/plugin-hsa.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 9d681a8bef3..d93eb756cb9 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -959,9 +959,6 @@ hsa_get_host_function (tree decl) gcc_assert (s->m_kind != HSA_NONE); gcc_assert (s->m_hsa_implementation_p); - if (s->m_kind == HSA_KERNEL) - return NULL; - return s->m_bound_function ? s->m_bound_function->decl : NULL; } diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c index fc160b37164..d7aef62e804 100644 --- a/libgomp/plugin/plugin-hsa.c +++ b/libgomp/plugin/plugin-hsa.c @@ -896,7 +896,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, int host_mapped_function_count = image_desc->function_count; int hsa_only_function_count; - if (version == GOMP_VERSION_HSA) + if (GOMP_VERSION_DEV (version) == GOMP_VERSION_HSA) hsa_only_function_count = image_desc->hsa_only_function_count; else hsa_only_function_count = 0; -- GitLab From af94eed932afaeb2f2495949104ca03d67c4b867 Mon Sep 17 00:00:00 2001 From: Martin Jambor <mjambor@suse.cz> Date: Fri, 27 Oct 2017 16:59:29 +0300 Subject: [PATCH 3/4] [hsa] Add missing guard in OMP gridification --- gcc/omp-grid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/omp-grid.c b/gcc/omp-grid.c index a7b6f60aeaf..31beb144b3b 100644 --- a/gcc/omp-grid.c +++ b/gcc/omp-grid.c @@ -1315,6 +1315,7 @@ grid_attempt_target_gridification (gomp_target *target, n1 = fold_convert (itype, n1); n2 = fold_convert (itype, n2); + tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2); tree step = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); @@ -1327,7 +1328,7 @@ grid_attempt_target_gridification (gomp_target *target, fold_build1 (NEGATE_EXPR, itype, t), fold_build1 (NEGATE_EXPR, itype, step)); else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype)); if (grid.tiling) { if (cond_code == GT_EXPR) -- GitLab From 7d4434c43c720800b2bdb1e74202def02b22d38e Mon Sep 17 00:00:00 2001 From: Martin Jambor <mjambor@suse.cz> Date: Fri, 27 Oct 2017 16:59:29 +0300 Subject: [PATCH 4/4] [hsa] Add missing guard in OMP gridification --- gcc/omp-grid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/omp-grid.c b/gcc/omp-grid.c index a7b6f60aeaf..31beb144b3b 100644 --- a/gcc/omp-grid.c +++ b/gcc/omp-grid.c @@ -1315,6 +1315,7 @@ grid_attempt_target_gridification (gomp_target *target, n1 = fold_convert (itype, n1); n2 = fold_convert (itype, n2); + tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2); tree step = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); @@ -1327,7 +1328,7 @@ grid_attempt_target_gridification (gomp_target *target, fold_build1 (NEGATE_EXPR, itype, t), fold_build1 (NEGATE_EXPR, itype, step)); else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype)); if (grid.tiling) { if (cond_code == GT_EXPR) -- GitLab