From 61d518cc75dbca4be1a5a6e329cfbaea8bb34a6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pekka=20J=C3=A4=C3=A4skel=C3=A4inen?=
 <pekka.jaaskelainen@parmance.com>
Date: Mon, 16 Oct 2017 17:39:05 +0200
Subject: [PATCH 1/4] [HSABE] indexing of program scope functions

Previously the binary image index contained only kernels
while program scope functions are needed for indirect call
implementations: The HSA Runtime API can be used to query
their addresses.

This patch mainly renames the corresponding structs and APIs that
have 'kernel' in their name to 'function' and adds a separate
flag to the function_index (ex. kernel_index) for separating
host-callable kernels from program scope functions (of which
addresses can be queried by the HSA runtime).
---
 gcc/hsa-brig.c              | 282 ++++++++++++++++++++++--------------
 gcc/hsa-common.c            | 123 +++++++++-------
 gcc/hsa-common.h            |  41 ++++--
 gcc/hsa-gen.c               |  28 ++--
 gcc/ipa-hsa.c               |   8 +-
 gcc/omp-expand.c            |   6 +-
 libgomp/plugin/plugin-hsa.c | 167 ++++++++++++---------
 7 files changed, 394 insertions(+), 261 deletions(-)

diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index d15ce261ed2..82b21b2c0cb 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -2144,86 +2144,128 @@ hsa_output_global_variables ()
   return global_vars_table;
 }
 
-/* Create __hsa_host_functions and __hsa_kernels that contain
-   all informations consumed by libgomp to register all kernels
-   in the BRIG binary.  */
+/* Create __hsa_host_functions and __hsa_functions that contain all information
+   consumed by libgomp to register all HSA functions stored in the BRIG binary.
+   If HOST_FUNC_TABLE is non-NULL, then fill it with declarations of all CPU
+   functions that have HSA implementations and FUNCTIONS with information
+   about functions that have no CPU counterpart.  If HOST_FUNC_TABLE is NULL then
+   ignore it and fill in FUNCTIONS with information about functions that do not
+   have a CPU counterpart.  In both cases, return the number of descriptors
+   stored in the one or both of the tables.  */
 
-static void
-hsa_output_kernels (tree *host_func_table, tree *kernels)
+static unsigned
+hsa_output_functions (tree *host_func_table, tree *functions)
 {
-  unsigned map_count = hsa_get_number_decl_kernel_mappings ();
-
-  tree int_num_of_kernels;
-  int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
-  tree kernel_num_index_type = build_index_type (int_num_of_kernels);
-  tree host_functions_array_type = build_array_type (ptr_type_node,
-						     kernel_num_index_type);
-  TYPE_ARTIFICIAL (host_functions_array_type) = 1;
+  char tmp_name[64];
+  unsigned map_count = hsa_get_number_decl_function_mappings ();
 
-  vec<constructor_elt, va_gc> *host_functions_vec = NULL;
+  unsigned out_count = 0;
   for (unsigned i = 0; i < map_count; ++i)
+    if (host_func_table)
+      {
+	if (hsa_get_host_function (hsa_get_decl_function_mapping_decl (i)))
+	  out_count++;
+      }
+    else
+      {
+	if (!hsa_get_host_function (hsa_get_decl_function_mapping_decl (i)))
+	  out_count++;
+      }
+
+  tree int_num_of_functions = build_int_cst (uint32_type_node, out_count);
+  tree function_num_index_type = build_index_type (int_num_of_functions);
+
+  if (host_func_table)
     {
-      tree decl = hsa_get_decl_kernel_mapping_decl (i);
-      tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
-      CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
+      tree host_functions_array_type
+	= build_array_type (ptr_type_node, function_num_index_type);
+      TYPE_ARTIFICIAL (host_functions_array_type) = 1;
+
+      vec<constructor_elt, va_gc> *host_functions_vec = NULL;
+      for (unsigned i = 0; i < map_count; ++i)
+	{
+	  tree decl = hsa_get_decl_function_mapping_decl (i);
+	  tree host_fn = hsa_get_host_function (decl);
+	  if (!host_fn)
+	    continue;
+	  host_fn = build_fold_addr_expr (host_fn);
+	  CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
+	}
+      tree host_functions_ctor = build_constructor (host_functions_array_type,
+						    host_functions_vec);
+      ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
+      tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+					     get_identifier (tmp_name),
+					     host_functions_array_type);
+      TREE_STATIC (hsa_host_func_table) = 1;
+      TREE_READONLY (hsa_host_func_table) = 1;
+      TREE_PUBLIC (hsa_host_func_table) = 0;
+      DECL_ARTIFICIAL (hsa_host_func_table) = 1;
+      DECL_IGNORED_P (hsa_host_func_table) = 1;
+      DECL_EXTERNAL (hsa_host_func_table) = 0;
+      TREE_CONSTANT (hsa_host_func_table) = 1;
+      DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
+      varpool_node::finalize_decl (hsa_host_func_table);
+      *host_func_table = hsa_host_func_table;
     }
-  tree host_functions_ctor = build_constructor (host_functions_array_type,
-						host_functions_vec);
-  char tmp_name[64];
-  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
-  tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
-					 get_identifier (tmp_name),
-					 host_functions_array_type);
-  TREE_STATIC (hsa_host_func_table) = 1;
-  TREE_READONLY (hsa_host_func_table) = 1;
-  TREE_PUBLIC (hsa_host_func_table) = 0;
-  DECL_ARTIFICIAL (hsa_host_func_table) = 1;
-  DECL_IGNORED_P (hsa_host_func_table) = 1;
-  DECL_EXTERNAL (hsa_host_func_table) = 0;
-  TREE_CONSTANT (hsa_host_func_table) = 1;
-  DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
-  varpool_node::finalize_decl (hsa_host_func_table);
-  *host_func_table = hsa_host_func_table;
-
-  /* Following code emits list of kernel_info structures.  */
-
-  tree kernel_info_type = make_node (RECORD_TYPE);
+
+  /* Following code emits a list of function_info structures.  */
+
+  tree func_info_type = make_node (RECORD_TYPE);
   tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
 			   get_identifier ("name"), ptr_type_node);
   DECL_CHAIN (id_f1) = NULL_TREE;
+
   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
 			   get_identifier ("omp_data_size"),
 			   unsigned_type_node);
   DECL_CHAIN (id_f2) = id_f1;
+
   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-			   get_identifier ("gridified_kernel_p"),
+			   get_identifier ("kernel_p"),
 			   boolean_type_node);
   DECL_CHAIN (id_f3) = id_f2;
+
   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-			   get_identifier ("kernel_dependencies_count"),
-			   unsigned_type_node);
+			   get_identifier ("gridified_kernel_p"),
+			   boolean_type_node);
   DECL_CHAIN (id_f4) = id_f3;
+
   tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			   get_identifier ("kernel_dependencies_count"),
+			   unsigned_type_node);
+  DECL_CHAIN (id_f5) = id_f4;
+
+  tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
 			   get_identifier ("kernel_dependencies"),
 			   build_pointer_type (build_pointer_type
 					       (char_type_node)));
-  DECL_CHAIN (id_f5) = id_f4;
-  finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
+  DECL_CHAIN (id_f6) = id_f5;
+
+  finish_builtin_struct (func_info_type, "__hsa_function_info", id_f6,
 			 NULL_TREE);
 
-  int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
-  tree kernel_info_vector_type
-    = build_array_type (kernel_info_type,
-			build_index_type (int_num_of_kernels));
-  TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
+  tree func_info_vector_type
+    = build_array_type (func_info_type, function_num_index_type);
+  TYPE_ARTIFICIAL (func_info_vector_type) = 1;
 
-  vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
-  tree kernel_dependencies_vector_type = NULL;
+  vec<constructor_elt, va_gc> *func_info_vector_vec = NULL;
+  tree function_dependencies_vector_type = NULL;
 
   for (unsigned i = 0; i < map_count; ++i)
     {
-      tree kernel = hsa_get_decl_kernel_mapping_decl (i);
-      char *name = hsa_get_decl_kernel_mapping_name (i);
+      tree decl = hsa_get_decl_function_mapping_decl (i);
+      tree host_fn = hsa_get_host_function (decl);
+      if (host_func_table)
+	{
+	  if (!host_fn)
+	    continue;
+	}
+      else if (host_fn)
+	continue;
+
+      tree func = hsa_get_decl_function_mapping_decl (i);
+      char *name = hsa_get_decl_function_mapping_name (i);
       unsigned len = strlen (name);
       char *copy = XNEWVEC (char, len + 2);
       copy[0] = '&';
@@ -2231,31 +2273,34 @@ hsa_output_kernels (tree *host_func_table, tree *kernels)
       copy[len + 1] = '\0';
       len++;
 
-      tree kern_name = build_string (len, copy);
-      TREE_TYPE (kern_name)
+      tree func_name = build_string (len, copy);
+      TREE_TYPE (func_name)
 	= build_array_type (char_type_node, build_index_type (size_int (len)));
       free (copy);
 
       unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
       tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
-      bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
+      bool kernel_p = hsa_get_decl_function_mapping_kernel_p (i);
+      tree kernel_p_tree = build_int_cstu (boolean_type_node, kernel_p);
+      bool gridified_kernel_p
+	= hsa_get_decl_function_mapping_gridified_p (i);
       tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
 						     gridified_kernel_p);
       unsigned count = 0;
-      vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
-      if (hsa_decl_kernel_dependencies)
+      vec<constructor_elt, va_gc> *function_dependencies_vec = NULL;
+      if (hsa_decl_function_dependencies)
 	{
 	  vec<const char *> **slot;
-	  slot = hsa_decl_kernel_dependencies->get (kernel);
+	  slot = hsa_decl_function_dependencies->get (func);
 	  if (slot)
 	    {
 	      vec <const char *> *dependencies = *slot;
 	      count = dependencies->length ();
 
-	      kernel_dependencies_vector_type
+	      function_dependencies_vector_type
 		= build_array_type (build_pointer_type (char_type_node),
 				    build_index_type (size_int (count)));
-	      TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
+	      TYPE_ARTIFICIAL (function_dependencies_vector_type) = 1;
 
 	      for (unsigned j = 0; j < count; j++)
 		{
@@ -2267,7 +2312,7 @@ hsa_output_kernels (tree *host_func_table, tree *kernels)
 					build_index_type (size_int (len)));
 
 		  CONSTRUCTOR_APPEND_ELT
-		    (kernel_dependencies_vec, NULL_TREE,
+		    (function_dependencies_vec, NULL_TREE,
 		     build1 (ADDR_EXPR,
 			     build_pointer_type (TREE_TYPE (dependency_name)),
 			     dependency_name));
@@ -2277,24 +2322,25 @@ hsa_output_kernels (tree *host_func_table, tree *kernels)
 
       tree dependencies_count = build_int_cstu (unsigned_type_node, count);
 
-      vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
-      CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
+      vec<constructor_elt, va_gc> *func_info_vec = NULL;
+      CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE,
 			      build1 (ADDR_EXPR,
 				      build_pointer_type (TREE_TYPE
-							  (kern_name)),
-				      kern_name));
-      CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
-      CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
+							  (func_name)),
+				      func_name));
+      CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, omp_data_size);
+      CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, kernel_p_tree);
+      CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE,
 			      gridified_kernel_p_tree);
-      CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
+      CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, dependencies_count);
 
       if (count > 0)
 	{
 	  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
-	  gcc_checking_assert (kernel_dependencies_vector_type);
+	  gcc_checking_assert (function_dependencies_vector_type);
 	  tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
 					       get_identifier (tmp_name),
-					       kernel_dependencies_vector_type);
+					       function_dependencies_vector_type);
 
 	  TREE_STATIC (dependencies_list) = 1;
 	  TREE_READONLY (dependencies_list) = 1;
@@ -2304,57 +2350,63 @@ hsa_output_kernels (tree *host_func_table, tree *kernels)
 	  DECL_EXTERNAL (dependencies_list) = 0;
 	  TREE_CONSTANT (dependencies_list) = 1;
 	  DECL_INITIAL (dependencies_list)
-	    = build_constructor (kernel_dependencies_vector_type,
-				 kernel_dependencies_vec);
+	    = build_constructor (function_dependencies_vector_type,
+				 function_dependencies_vec);
 	  varpool_node::finalize_decl (dependencies_list);
 
-	  CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
+	  CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE,
 				  build1 (ADDR_EXPR,
 					  build_pointer_type
 					    (TREE_TYPE (dependencies_list)),
 					  dependencies_list));
 	}
       else
-	CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
+	CONSTRUCTOR_APPEND_ELT (func_info_vec, NULL_TREE, null_pointer_node);
 
-      tree kernel_info_ctor = build_constructor (kernel_info_type,
-						 kernel_info_vec);
+      tree func_info_ctor = build_constructor (func_info_type,
+					       func_info_vec);
 
-      CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
-			      kernel_info_ctor);
+      CONSTRUCTOR_APPEND_ELT (func_info_vector_vec, NULL_TREE,
+			      func_info_ctor);
     }
 
-  ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
-  tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
-				 get_identifier (tmp_name),
-				 kernel_info_vector_type);
-
-  TREE_STATIC (hsa_kernels) = 1;
-  TREE_READONLY (hsa_kernels) = 1;
-  TREE_PUBLIC (hsa_kernels) = 0;
-  DECL_ARTIFICIAL (hsa_kernels) = 1;
-  DECL_IGNORED_P (hsa_kernels) = 1;
-  DECL_EXTERNAL (hsa_kernels) = 0;
-  TREE_CONSTANT (hsa_kernels) = 1;
-  DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
-						  kernel_info_vector_vec);
-  varpool_node::finalize_decl (hsa_kernels);
-  *kernels = hsa_kernels;
+  /* __hsa_mapped_functions are functions or kernels which have a matching
+     host function.   __hsa_only_functions are functions with only HSA versions
+     available.  */
+  ASM_GENERATE_INTERNAL_LABEL (tmp_name,
+			       host_func_table ? "__hsa_host_mapped_functions"
+			       : "__hsa_only_functions", 1);
+  tree hsa_functions = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+				   get_identifier (tmp_name),
+				   func_info_vector_type);
+
+  TREE_STATIC (hsa_functions) = 1;
+  TREE_READONLY (hsa_functions) = 1;
+  TREE_PUBLIC (hsa_functions) = 0;
+  DECL_ARTIFICIAL (hsa_functions) = 1;
+  DECL_IGNORED_P (hsa_functions) = 1;
+  DECL_EXTERNAL (hsa_functions) = 0;
+  TREE_CONSTANT (hsa_functions) = 1;
+  DECL_INITIAL (hsa_functions) = build_constructor (func_info_vector_type,
+						    func_info_vector_vec);
+  varpool_node::finalize_decl (hsa_functions);
+  *functions = hsa_functions;
+  return out_count;
 }
 
-/* Create a static constructor that will register out brig stuff with
+/* Create a static constructor that will register our BRIG indexes with
    libgomp.  */
 
 static void
 hsa_output_libgomp_mapping (tree brig_decl)
 {
-  unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
   unsigned global_variable_count = hsa_global_variable_symbols->elements ();
 
-  tree kernels;
+  tree hsa_funcs;
   tree host_func_table;
 
-  hsa_output_kernels (&host_func_table, &kernels);
+  unsigned hsa_func_count = hsa_output_functions (&host_func_table,
+						  &hsa_funcs);
   tree global_vars = hsa_output_global_variables ();
 
   tree hsa_image_desc_type = make_node (RECORD_TYPE);
@@ -2362,12 +2414,12 @@ hsa_output_libgomp_mapping (tree brig_decl)
 			   get_identifier ("brig_module"), ptr_type_node);
   DECL_CHAIN (id_f1) = NULL_TREE;
   tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-			   get_identifier ("kernel_count"),
+			   get_identifier ("mapped_function_count"),
 			   unsigned_type_node);
 
   DECL_CHAIN (id_f2) = id_f1;
   tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
-			   get_identifier ("hsa_kernel_infos"),
+			   get_identifier ("omp_kernel_infos"),
 			   ptr_type_node);
   DECL_CHAIN (id_f3) = id_f2;
   tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
@@ -2378,7 +2430,16 @@ hsa_output_libgomp_mapping (tree brig_decl)
 			   get_identifier ("hsa_global_variable_infos"),
 			   ptr_type_node);
   DECL_CHAIN (id_f5) = id_f4;
-  finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
+  tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			   get_identifier ("hsa_only_function_count"),
+			   unsigned_type_node);
+  DECL_CHAIN (id_f6) = id_f5;
+  tree id_f7 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			   get_identifier ("hsa_only_function_infos"),
+			   ptr_type_node);
+  DECL_CHAIN (id_f7) = id_f6;
+
+  finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f7,
 			 NULL_TREE);
   TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
 
@@ -2386,11 +2447,11 @@ hsa_output_libgomp_mapping (tree brig_decl)
   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
 			  build_fold_addr_expr (brig_decl));
   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
-			  build_int_cstu (unsigned_type_node, kernel_count));
+			  build_int_cstu (unsigned_type_node, hsa_func_count));
   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
 			  build1 (ADDR_EXPR,
-				  build_pointer_type (TREE_TYPE (kernels)),
-				  kernels));
+				  build_pointer_type (TREE_TYPE (hsa_funcs)),
+				  hsa_funcs));
   CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
 			  build_int_cstu (unsigned_type_node,
 					  global_variable_count));
@@ -2399,6 +2460,15 @@ hsa_output_libgomp_mapping (tree brig_decl)
 				  build_pointer_type (TREE_TYPE (global_vars)),
 				  global_vars));
 
+  unsigned hsa_only_func_count = hsa_output_functions (NULL, &hsa_funcs);
+  CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
+			  build_int_cstu (unsigned_type_node,
+					  hsa_only_func_count));
+  CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
+			  build1 (ADDR_EXPR,
+				  build_pointer_type (TREE_TYPE (hsa_funcs)),
+				  hsa_funcs));
+
   tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
 
   char tmp_name[64];
@@ -2425,7 +2495,7 @@ hsa_output_libgomp_mapping (tree brig_decl)
   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
 			  host_func_table_addr);
   offset_int func_table_size
-    = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
+    = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * hsa_func_count;
   CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
 			  fold_build2 (POINTER_PLUS_EXPR,
 				       TREE_TYPE (host_func_table_addr),
@@ -2599,7 +2669,7 @@ hsa_output_brig (void)
 
   hsa_output_libgomp_mapping (brig_decl);
 
-  hsa_free_decl_kernel_mapping ();
+  hsa_free_decl_function_mapping ();
   brig_release_data ();
   hsa_deinit_compilation_unit_data ();
 
diff --git a/gcc/hsa-common.c b/gcc/hsa-common.c
index c8c12afb04c..4f0b88fde5b 100644
--- a/gcc/hsa-common.c
+++ b/gcc/hsa-common.c
@@ -47,29 +47,31 @@ along with GCC; see the file COPYING3.  If not see
    function.  */
 class hsa_function_representation *hsa_cfun;
 
-/* Element of the mapping vector between a host decl and an HSA kernel.  */
+/* Element of the mapping vector between a host decl and an HSA function.  */
 
-struct GTY(()) hsa_decl_kernel_map_element
+struct GTY(()) hsa_decl_function_map_element
 {
   /* The decl of the host function.  */
   tree decl;
-  /* Name of the HSA kernel in BRIG.  */
+  /* Name of the HSA function in BRIG.  */
   char * GTY((skip)) name;
   /* Size of OMP data, if the kernel contains a kernel dispatch.  */
   unsigned omp_data_size;
-  /* True if the function is gridified kernel.  */
+  /* True if the function is a host-callable kernel.  */
+  bool kernel_p;
+  /* True if the function is a gridified kernel.  */
   bool gridified_kernel_p;
 };
 
 /* Mapping between decls and corresponding HSA kernels in this compilation
    unit.  */
 
-static GTY (()) vec<hsa_decl_kernel_map_element, va_gc>
-  *hsa_decl_kernel_mapping;
+static GTY (()) vec<hsa_decl_function_map_element, va_gc>
+  *hsa_decl_function_mapping;
 
 /* Mapping between decls and corresponding HSA kernels
    called by the function.  */
-hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies;
+hash_map <tree, vec <const char *> *> *hsa_decl_function_dependencies;
 
 /* Hash function to lookup a symbol for a decl.  */
 hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols;
@@ -649,87 +651,97 @@ hsa_destroy_operand (hsa_op_base *op)
     op->~hsa_op_base ();
 }
 
-/* Create a mapping between the original function DECL and kernel name NAME.  */
+/* Create a mapping between the original function DECL and HSA function
+   named NAME.  */
 
 void
-hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size,
-			   bool gridified_kernel_p)
+hsa_add_function_decl_mapping (tree decl, char *name, unsigned omp_data_size,
+			       struct hsa_function_summary *s)
 {
-  hsa_decl_kernel_map_element dkm;
+  hsa_decl_function_map_element dkm;
   dkm.decl = decl;
   dkm.name = name;
   dkm.omp_data_size = omp_data_size;
-  dkm.gridified_kernel_p = gridified_kernel_p;
-  vec_safe_push (hsa_decl_kernel_mapping, dkm);
+  dkm.kernel_p = s->m_kind == HSA_KERNEL;
+  dkm.gridified_kernel_p = s->m_gridified_kernel_p;
+  vec_safe_push (hsa_decl_function_mapping, dkm);
 }
 
-/* Return the number of kernel decl name mappings.  */
+/* Return the number of function decl name mappings.  */
 
 unsigned
-hsa_get_number_decl_kernel_mappings (void)
+hsa_get_number_decl_function_mappings (void)
 {
-  return vec_safe_length (hsa_decl_kernel_mapping);
+  return vec_safe_length (hsa_decl_function_mapping);
 }
 
-/* Return the decl in the Ith kernel decl name mapping.  */
+/* Return the decl in the Ith function decl name mapping.  */
 
 tree
-hsa_get_decl_kernel_mapping_decl (unsigned i)
+hsa_get_decl_function_mapping_decl (unsigned i)
 {
-  return (*hsa_decl_kernel_mapping)[i].decl;
+  return (*hsa_decl_function_mapping)[i].decl;
 }
 
-/* Return the name in the Ith kernel decl name mapping.  */
+/* Return the name in the Ith function decl name mapping.  */
 
 char *
-hsa_get_decl_kernel_mapping_name (unsigned i)
+hsa_get_decl_function_mapping_name (unsigned i)
 {
-  return (*hsa_decl_kernel_mapping)[i].name;
+  return (*hsa_decl_function_mapping)[i].name;
 }
 
-/* Return maximum OMP size for kernel decl name mapping.  */
+/* Return maximum OMP size for function decl name mapping.  */
 
 unsigned
 hsa_get_decl_kernel_mapping_omp_size (unsigned i)
 {
-  return (*hsa_decl_kernel_mapping)[i].omp_data_size;
+  return (*hsa_decl_function_mapping)[i].omp_data_size;
 }
 
-/* Return if the function is gridified kernel in decl name mapping.  */
+/* Return if the function is a gridified kernel in decl name mapping.  */
 
 bool
-hsa_get_decl_kernel_mapping_gridified (unsigned i)
+hsa_get_decl_function_mapping_gridified_p (unsigned i)
 {
-  return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p;
+  return (*hsa_decl_function_mapping)[i].gridified_kernel_p;
 }
 
-/* Free the mapping between original decls and kernel names.  */
+/* Return true if the function is a host-callable kernel.  */
+
+bool
+hsa_get_decl_function_mapping_kernel_p (unsigned i)
+{
+  return (*hsa_decl_function_mapping)[i].kernel_p;
+}
+
+/* Free the mapping between original decls and function names.  */
 
 void
-hsa_free_decl_kernel_mapping (void)
+hsa_free_decl_function_mapping (void)
 {
-  if (hsa_decl_kernel_mapping == NULL)
+  if (hsa_decl_function_mapping == NULL)
     return;
 
-  for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i)
-    free ((*hsa_decl_kernel_mapping)[i].name);
-  ggc_free (hsa_decl_kernel_mapping);
+  for (unsigned i = 0; i < hsa_decl_function_mapping->length (); ++i)
+    free ((*hsa_decl_function_mapping)[i].name);
+  ggc_free (hsa_decl_function_mapping);
 }
 
-/* Add new kernel dependency.  */
+/* Add new function dependency.  */
 
 void
-hsa_add_kernel_dependency (tree caller, const char *called_function)
+hsa_add_function_dependency (tree caller, const char *called_function)
 {
-  if (hsa_decl_kernel_dependencies == NULL)
-    hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> ();
+  if (hsa_decl_function_dependencies == NULL)
+    hsa_decl_function_dependencies = new hash_map<tree, vec<const char *> *> ();
 
   vec <const char *> *s = NULL;
-  vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller);
+  vec <const char *> **slot = hsa_decl_function_dependencies->get (caller);
   if (slot == NULL)
     {
       s = new vec <const char *> ();
-      hsa_decl_kernel_dependencies->put (caller, s);
+      hsa_decl_function_dependencies->put (caller, s);
     }
   else
     s = *slot;
@@ -816,8 +828,8 @@ hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
   gpu_summary->m_kind = kind;
   host_summary->m_kind = kind;
 
-  gpu_summary->m_gpu_implementation_p = true;
-  host_summary->m_gpu_implementation_p = false;
+  gpu_summary->m_hsa_implementation_p = true;
+  host_summary->m_hsa_implementation_p = false;
 
   gpu_summary->m_gridified_kernel_p = gridified_kernel_p;
   host_summary->m_gridified_kernel_p = gridified_kernel_p;
@@ -827,32 +839,45 @@ hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
 
   process_gpu_implementation_attributes (gpu->decl);
 
-  /* Create reference between a kernel and a corresponding host implementation
-     to quarantee LTO streaming to a same LTRANS.  */
-  if (kind == HSA_KERNEL)
-    gpu->create_reference (host, IPA_REF_ADDR);
+  /* Create reference between an HSA function and a corresponding host
+     implementation to quarantee LTO streaming to a same LTRANS.  */
+  gpu->create_reference (host, IPA_REF_ADDR);
+}
+
+void
+hsa_summary_t::mark_hsa_only_implementation (cgraph_node *node,
+					     hsa_function_kind kind)
+{
+  hsa_function_summary *gpu_summary = get (node);
+  gpu_summary->m_kind = kind;
+  gpu_summary->m_hsa_implementation_p = true;
+  gcc_assert (!gpu_summary->m_bound_function);
+  process_gpu_implementation_attributes (node->decl);
 }
 
 /* Add a HOST function to HSA summaries.  */
 
 void
-hsa_register_kernel (cgraph_node *host)
+hsa_register_function (cgraph_node *host, bool kernel_p)
 {
   if (hsa_summaries == NULL)
     hsa_summaries = new hsa_summary_t (symtab);
   hsa_function_summary *s = hsa_summaries->get (host);
-  s->m_kind = HSA_KERNEL;
+  s->m_kind = kernel_p ? HSA_KERNEL : HSA_FUNCTION;
 }
 
 /* Add a pair of functions to HSA summaries.  GPU is an HSA implementation of
    a HOST function.  */
 
 void
-hsa_register_kernel (cgraph_node *gpu, cgraph_node *host)
+hsa_register_function (cgraph_node *gpu, cgraph_node *host, bool kernel_p)
 {
   if (hsa_summaries == NULL)
     hsa_summaries = new hsa_summary_t (symtab);
-  hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true);
+  if (kernel_p)
+    hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true);
+  else
+    hsa_summaries->link_functions (gpu, host, HSA_FUNCTION, true);
 }
 
 /* Return true if expansion of the current HSA function has already failed.  */
diff --git a/gcc/hsa-common.h b/gcc/hsa-common.h
index 3075163a020..9df3a5f6442 100644
--- a/gcc/hsa-common.h
+++ b/gcc/hsa-common.h
@@ -1223,11 +1223,14 @@ struct hsa_function_summary
 
   /* Pointer to a cgraph node which is a HSA implementation of the function.
      In case of the function is a HSA function, the bound function points
-     to the host function.  */
+     to the host function.
+     This can also be NULL if there is no counterpart, which can happen for GPU
+     implementations if they are functions marked with hsa_kernel or
+     hsa_function attributes.  */
   cgraph_node *m_bound_function;
 
-  /* Identifies if the function is an HSA function or a host function.  */
-  bool m_gpu_implementation_p;
+  /* Identifies if the function is an HSA function.  */
+  bool m_hsa_implementation_p;
 
   /* True if the function is a gridified kernel.  */
   bool m_gridified_kernel_p;
@@ -1235,7 +1238,7 @@ struct hsa_function_summary
 
 inline
 hsa_function_summary::hsa_function_summary (): m_kind (HSA_NONE),
-  m_bound_function (NULL), m_gpu_implementation_p (false)
+  m_bound_function (NULL), m_hsa_implementation_p (false)
 {
 }
 
@@ -1254,6 +1257,13 @@ public:
   void link_functions (cgraph_node *gpu, cgraph_node *host,
 		       hsa_function_kind kind, bool gridified_kernel_p);
 
+  /* Mark a specific function NODE as a standalone HSA implementation (that has
+     no CPU counterpart).  KIND determines whether this is a host-invokable
+     kernel or an agent-callable function.  */
+
+  void mark_hsa_only_implementation (cgraph_node *node,
+				     hsa_function_kind kind);
+
 private:
   void process_gpu_implementation_attributes (tree gdecl);
 };
@@ -1314,7 +1324,7 @@ hsa_internal_fn_hasher::equal (const value_type a, const compare_type b)
 
 /* in hsa-common.c */
 extern struct hsa_function_representation *hsa_cfun;
-extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies;
+extern hash_map <tree, vec <const char *> *> *hsa_decl_function_dependencies;
 extern hsa_summary_t *hsa_summaries;
 extern hsa_symbol *hsa_num_threads;
 extern unsigned hsa_kernel_calls_counter;
@@ -1343,13 +1353,15 @@ BrigAlignment8_t hsa_object_alignment (tree t);
 unsigned hsa_byte_alignment (BrigAlignment8_t alignment);
 void hsa_destroy_operand (hsa_op_base *op);
 void hsa_destroy_insn (hsa_insn_basic *insn);
-void hsa_add_kern_decl_mapping (tree decl, char *name, unsigned, bool);
-unsigned hsa_get_number_decl_kernel_mappings (void);
-tree hsa_get_decl_kernel_mapping_decl (unsigned i);
-char *hsa_get_decl_kernel_mapping_name (unsigned i);
+void hsa_add_function_decl_mapping (tree decl, char *name, unsigned,
+				    struct hsa_function_summary *s);
+unsigned hsa_get_number_decl_function_mappings (void);
+tree hsa_get_decl_function_mapping_decl (unsigned i);
+char *hsa_get_decl_function_mapping_name (unsigned i);
 unsigned hsa_get_decl_kernel_mapping_omp_size (unsigned i);
-bool hsa_get_decl_kernel_mapping_gridified (unsigned i);
-void hsa_free_decl_kernel_mapping (void);
+bool hsa_get_decl_function_mapping_gridified_p (unsigned i);
+bool hsa_get_decl_function_mapping_kernel_p (unsigned i);
+void hsa_free_decl_function_mapping (void);
 tree *hsa_get_ctor_statements (void);
 tree *hsa_get_dtor_statements (void);
 tree *hsa_get_kernel_dispatch_type (void);
@@ -1357,8 +1369,9 @@ void hsa_add_kernel_dependency (tree caller, const char *called_function);
 void hsa_sanitize_name (char *p);
 char *hsa_brig_function_name (const char *p);
 const char *hsa_get_declaration_name (tree decl);
-void hsa_register_kernel (cgraph_node *host);
-void hsa_register_kernel (cgraph_node *gpu, cgraph_node *host);
+void hsa_register_function (cgraph_node *host, bool kernel_p);
+void hsa_register_function (cgraph_node *gpu, cgraph_node *host,
+			    bool kernel_p);
 bool hsa_seen_error (void);
 void hsa_fail_cfun (void);
 
@@ -1409,7 +1422,7 @@ hsa_gpu_implementation_p (tree decl)
 
   hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl));
 
-  return s->m_gpu_implementation_p;
+  return s->m_hsa_implementation_p;
 }
 
 #endif /* HSA_H */
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index b5a8c73731a..9d681a8bef3 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -233,11 +233,6 @@ hsa_function_representation::hsa_function_representation (hsa_internal_fn *fn)
 
 hsa_function_representation::~hsa_function_representation ()
 {
-  /* Kernel names are deallocated at the end of BRIG output when deallocating
-     hsa_decl_kernel_mapping.  */
-  if (!m_kern_p || m_seen_error)
-    free (m_name);
-
   for (unsigned i = 0; i < m_input_args.length (); i++)
     delete m_input_args[i];
   m_input_args.release ();
@@ -952,7 +947,9 @@ get_symbol_for_decl (tree decl)
 }
 
 /* For a given HSA function declaration, return a host
-   function declaration.  */
+   function declaration, NULL if the declaration is an HSA-only
+   function.
+*/
 
 tree
 hsa_get_host_function (tree decl)
@@ -960,7 +957,10 @@ hsa_get_host_function (tree decl)
   hsa_function_summary *s
     = hsa_summaries->get (cgraph_node::get_create (decl));
   gcc_assert (s->m_kind != HSA_NONE);
-  gcc_assert (s->m_gpu_implementation_p);
+  gcc_assert (s->m_hsa_implementation_p);
+
+  if (s->m_kind == HSA_KERNEL)
+    return NULL;
 
   return s->m_bound_function ? s->m_bound_function->decl : NULL;
 }
@@ -974,7 +974,7 @@ get_brig_function_name (tree decl)
 
   hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (d));
   if (s->m_kind != HSA_NONE
-      && s->m_gpu_implementation_p
+      && s->m_hsa_implementation_p
       && s->m_bound_function)
     d = s->m_bound_function->decl;
 
@@ -5877,7 +5877,7 @@ init_prologue (void)
   hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
 
   /* Create a magic number that is going to be printed by libgomp.  */
-  unsigned index = hsa_get_number_decl_kernel_mappings ();
+  unsigned index = hsa_get_number_decl_function_mappings ();
 
   /* Emit store to debug argument.  */
   if (PARAM_VALUE (PARAM_HSA_GEN_DEBUG_STORES) > 0)
@@ -6549,14 +6549,10 @@ generate_hsa (bool kernel)
   if (hsa_cfun->m_kernel_dispatch_count)
     init_hsa_num_threads ();
 
-  if (hsa_cfun->m_kern_p)
-    {
-      hsa_function_summary *s
-	= hsa_summaries->get (cgraph_node::get (hsa_cfun->m_decl));
-      hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->m_name,
+  hsa_add_function_decl_mapping (current_function_decl, hsa_cfun->m_name,
 				 hsa_cfun->m_maximum_omp_data_size,
-				 s->m_gridified_kernel_p);
-    }
+				 hsa_summaries->get
+				 (cgraph_node::get (hsa_cfun->m_decl)));
 
   if (flag_checking)
     {
diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c
index c02dadaa016..08948ba9b43 100644
--- a/gcc/ipa-hsa.c
+++ b/gcc/ipa-hsa.c
@@ -131,10 +131,10 @@ process_hsa_functions (void)
       while (e)
 	{
 	  hsa_function_summary *src = hsa_summaries->get (node);
-	  if (src->m_kind != HSA_NONE && src->m_gpu_implementation_p)
+	  if (src->m_kind != HSA_NONE && src->m_hsa_implementation_p)
 	    {
 	      hsa_function_summary *dst = hsa_summaries->get (e->callee);
-	      if (dst->m_kind != HSA_NONE && !dst->m_gpu_implementation_p)
+	      if (dst->m_kind != HSA_NONE && !dst->m_hsa_implementation_p)
 		{
 		  e->redirect_callee (dst->m_bound_function);
 		  if (dump_file)
@@ -197,7 +197,7 @@ ipa_hsa_write_summary (void)
 
 	  bp = bitpack_create (ob->main_stream);
 	  bp_pack_value (&bp, s->m_kind, 2);
-	  bp_pack_value (&bp, s->m_gpu_implementation_p, 1);
+	  bp_pack_value (&bp, s->m_hsa_implementation_p, 1);
 	  bp_pack_value (&bp, s->m_bound_function != NULL, 1);
 	  streamer_write_bitpack (&bp);
 	  if (s->m_bound_function)
@@ -248,7 +248,7 @@ ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data,
 
       struct bitpack_d bp = streamer_read_bitpack (&ib_main);
       s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2);
-      s->m_gpu_implementation_p = bp_unpack_value (&bp, 1);
+      s->m_hsa_implementation_p = bp_unpack_value (&bp, 1);
       bool has_tree = bp_unpack_value (&bp, 1);
 
       if (has_tree)
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 0f45563c57c..f159d87b974 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -724,7 +724,7 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
       && parallel_needs_hsa_kernel_p (region))
     {
       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
-      hsa_register_kernel (child_cnode);
+      hsa_register_function (child_cnode, true);
     }
 }
 
@@ -7770,7 +7770,7 @@ grid_expand_target_grid_body (struct omp_region *target)
 				    OMP_CLAUSE__GRIDDIM_));
       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
 
-      hsa_register_kernel (n);
+      hsa_register_function (n, true);
       return;
     }
 
@@ -7886,7 +7886,7 @@ grid_expand_target_grid_body (struct omp_region *target)
   kcn->mark_force_output ();
   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
 
-  hsa_register_kernel (kcn, orig_child);
+  hsa_register_function (kcn, orig_child, true);
 
   cgraph_node::add_new_function (kern_fndecl, true);
   push_cfun (kern_cfun);
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index fc08f5df058..fc160b37164 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -310,13 +310,14 @@ hsa_error (const char *str, hsa_status_t status)
   return false;
 }
 
-struct hsa_kernel_description
+struct hsa_function_description
 {
   const char *name;
   unsigned omp_data_size;
+  bool kernel_p;
   bool gridified_kernel_p;
-  unsigned kernel_dependencies_count;
-  const char **kernel_dependencies;
+  unsigned function_dependencies_count;
+  const char **function_dependencies;
 };
 
 struct global_var_info
@@ -331,26 +332,31 @@ struct global_var_info
 struct brig_image_desc
 {
   hsa_ext_module_t brig_module;
-  const unsigned kernel_count;
-  struct hsa_kernel_description *kernel_infos;
+  const unsigned function_count;
+  struct hsa_function_description *function_infos;
   const unsigned global_variable_count;
   struct global_var_info *global_variables;
+  /* Functions/kernels that do not have a host-side version.  */
+  const unsigned hsa_only_function_count;
+  struct hsa_function_description *hsa_only_function_infos;
 };
 
 struct agent_info;
 
 /* Information required to identify, finalize and run any given kernel.  */
 
-struct kernel_info
-{
-  /* Name of the kernel, required to locate it within the brig module.  */
-  const char *name;
+struct function_info
+ {
+  /* Name of the function, required to locate it within the BRIG module.  */
+   const char *name;
+  /* True if the function is a kernel.  */
+  bool kernel_p;
   /* Size of memory space for OMP data.  */
   unsigned omp_data_size;
   /* The specific agent the kernel has been or will be finalized for and run
      on.  */
   struct agent_info *agent;
-  /* The specific module where the kernel takes place.  */
+  /* The module where the function resides.  */
   struct module_info *module;
   /* Mutex enforcing that at most once thread ever initializes a kernel for
      use.  A thread should have locked agent->modules_rwlock for reading before
@@ -369,7 +375,7 @@ struct kernel_info
   uint32_t group_segment_size;
   /* Required size of private segment.  */
   uint32_t private_segment_size;
-  /* List of all kernel dependencies.  */
+  /* List of all function dependencies.  */
   const char **dependencies;
   /* Number of dependencies.  */
   unsigned dependencies_count;
@@ -388,11 +394,11 @@ struct module_info
   /* The description with which the program has registered the image.  */
   struct brig_image_desc *image_desc;
 
-  /* Number of kernels in this module.  */
-  int kernel_count;
-  /* An array of kernel_info structures describing each kernel in this
-     module.  */
-  struct kernel_info kernels[];
+  /* Number of functions (or kernels) in this module.  */
+  int function_count;
+  /* An array of function_info structures describing each function in this
+      module.  */
+  struct function_info functions[];
 };
 
 /* Information about shared brig library.  */
@@ -513,16 +519,16 @@ init_hsa_runtime_functions (void)
 
 /* Find kernel for an AGENT by name provided in KERNEL_NAME.  */
 
-static struct kernel_info *
-get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
+static struct function_info *
+get_function_for_agent (struct agent_info *agent, const char *func_name)
 {
   struct module_info *module = agent->first_module;
 
   while (module)
     {
-      for (unsigned i = 0; i < module->kernel_count; i++)
-	if (strcmp (module->kernels[i].name, kernel_name) == 0)
-	  return &module->kernels[i];
+      for (unsigned i = 0; i < module->function_count; i++)
+	if (strcmp (module->functions[i].name, func_name) == 0)
+	  return &module->functions[i];
 
       module = module->next;
     }
@@ -835,29 +841,30 @@ destroy_hsa_program (struct agent_info *agent)
   for (module = agent->first_module; module; module = module->next)
     {
       int i;
-      for (i = 0; i < module->kernel_count; i++)
-	module->kernels[i].initialized = false;
+      for (i = 0; i < module->function_count; i++)
+	module->functions[i].initialized = false;
     }
   agent->prog_finalized = false;
   return true;
 }
 
-/* Initialize KERNEL from D and other parameters.  Return true on success. */
+/* Initialize FUNCTION from D and other parameters.  Return true on success. */
 
 static bool
-init_basic_kernel_info (struct kernel_info *kernel,
-			struct hsa_kernel_description *d,
-			struct agent_info *agent,
-			struct module_info *module)
+init_basic_function_info (struct function_info *function,
+			  struct hsa_function_description *d,
+			  struct agent_info *agent,
+			  struct module_info *module)
 {
-  kernel->agent = agent;
-  kernel->module = module;
-  kernel->name = d->name;
-  kernel->omp_data_size = d->omp_data_size;
-  kernel->gridified_kernel_p = d->gridified_kernel_p;
-  kernel->dependencies_count = d->kernel_dependencies_count;
-  kernel->dependencies = d->kernel_dependencies;
-  if (pthread_mutex_init (&kernel->init_mutex, NULL))
+  function->agent = agent;
+  function->module = module;
+  function->name = d->name;
+  function->omp_data_size = d->omp_data_size;
+  function->kernel_p = d->kernel_p;
+  function->gridified_kernel_p = d->gridified_kernel_p;
+  function->dependencies_count = d->function_dependencies_count;
+  function->dependencies = d->function_dependencies;
+  if (pthread_mutex_init (&function->init_mutex, NULL))
     {
       GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
       return false;
@@ -885,8 +892,17 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
   struct agent_info *agent;
   struct addr_pair *pair;
   struct module_info *module;
-  struct kernel_info *kernel;
-  int kernel_count = image_desc->kernel_count;
+  struct function_info *function;
+  int host_mapped_function_count = image_desc->function_count;
+  int hsa_only_function_count;
+
+  if (version == GOMP_VERSION_HSA)
+    hsa_only_function_count = image_desc->hsa_only_function_count;
+  else
+    hsa_only_function_count = 0;
+
+  int total_function_count
+    = host_mapped_function_count + hsa_only_function_count;
 
   agent = get_agent_info (ord);
   if (!agent)
@@ -897,33 +913,46 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
       return -1;
     }
-  if (agent->prog_finalized
-      && !destroy_hsa_program (agent))
+  if (agent->prog_finalized && !destroy_hsa_program (agent))
     return -1;
 
-  HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
-  pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
+  HSA_DEBUG ("Encountered %d mapped and %d HSA-only functions in the "
+	     "image\n", host_mapped_function_count, hsa_only_function_count);
+  pair = GOMP_PLUGIN_malloc (host_mapped_function_count * sizeof (struct addr_pair));
   *target_table = pair;
   module = (struct module_info *)
     GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
-				+ kernel_count * sizeof (struct kernel_info));
+				+ (total_function_count
+				   * sizeof (struct function_info)));
   module->image_desc = image_desc;
-  module->kernel_count = kernel_count;
+  module->function_count = total_function_count;
 
-  kernel = &module->kernels[0];
+  function = &module->functions[0];
 
-  /* Allocate memory for kernel dependencies.  */
-  for (unsigned i = 0; i < kernel_count; i++)
+  for (unsigned i = 0; i < host_mapped_function_count; i++)
     {
-      pair->start = (uintptr_t) kernel;
-      pair->end = (uintptr_t) (kernel + 1);
+      pair->start = (uintptr_t) function;
+      pair->end = (uintptr_t) (function + 1);
 
-      struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
-      if (!init_basic_kernel_info (kernel, d, agent, module))
+      struct hsa_function_description *d = &image_desc->function_infos[i];
+      if (!init_basic_function_info (function, d, agent, module))
 	return -1;
-      kernel++;
+      HSA_DEBUG ("Initialized host-mapped function with name '%s' "
+		 "to info struct %p\n", function->name, function);
+      function++;
       pair++;
     }
+  for (unsigned i = 0; i < hsa_only_function_count; i++)
+    {
+      struct hsa_function_description *d
+	= &image_desc->hsa_only_function_infos[i];
+      if (!init_basic_function_info (function, d, agent, module))
+	return -1;
+      HSA_DEBUG ("Initialized HSA-only function with name '%s' to info "
+		 "struct %p\n", function->name, function);
+
+      function++;
+    }
 
   add_module_to_agent (agent, module);
   if (pthread_rwlock_unlock (&agent->modules_rwlock))
@@ -931,7 +960,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
       return -1;
     }
-  return kernel_count;
+  return host_mapped_function_count;
 }
 
 /* Add a shared BRIG library from a FILE_NAME to an AGENT.  */
@@ -1112,7 +1141,7 @@ final:
 /* Create kernel dispatch data structure for given KERNEL.  */
 
 static struct GOMP_hsa_kernel_dispatch *
-create_single_kernel_dispatch (struct kernel_info *kernel,
+create_single_kernel_dispatch (struct function_info *kernel,
 			       unsigned omp_data_size)
 {
   struct agent_info *agent = kernel->agent;
@@ -1176,7 +1205,7 @@ release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
    to calculate maximum necessary memory for OMP data allocation.  */
 
 static void
-init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
+init_single_kernel (struct function_info *kernel, unsigned *max_omp_data_size)
 {
   hsa_status_t status;
   struct agent_info *agent = kernel->agent;
@@ -1227,8 +1256,8 @@ init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
 
   for (unsigned i = 0; i < kernel->dependencies_count; i++)
     {
-      struct kernel_info *dependency
-	= get_kernel_for_agent (agent, kernel->dependencies[i]);
+      struct function_info *dependency
+	= get_function_for_agent (agent, kernel->dependencies[i]);
 
       if (dependency == NULL)
 	{
@@ -1301,7 +1330,7 @@ print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned inden
    dependencies.  */
 
 static struct GOMP_hsa_kernel_dispatch *
-create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
+create_kernel_dispatch (struct function_info *kernel, unsigned omp_data_size)
 {
   struct GOMP_hsa_kernel_dispatch *shadow
     = create_single_kernel_dispatch (kernel, omp_data_size);
@@ -1310,11 +1339,11 @@ create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
   shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
 
   /* Create kernel dispatch data structures.  We do not allow to have
-     a kernel dispatch with depth bigger than one.  */
+     a kernel dispatch with depth larger than one.  */
   for (unsigned i = 0; i < kernel->dependencies_count; i++)
     {
-      struct kernel_info *dependency
-	= get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
+      struct function_info *dependency
+	= get_function_for_agent (kernel->agent, kernel->dependencies[i]);
       shadow->children_dispatches[i]
 	= create_single_kernel_dispatch (dependency, omp_data_size);
       shadow->children_dispatches[i]->queue
@@ -1330,7 +1359,7 @@ create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
    create_and_finalize_hsa_program.  */
 
 static void
-init_kernel (struct kernel_info *kernel)
+init_kernel (struct function_info *kernel)
 {
   if (pthread_mutex_lock (&kernel->init_mutex))
     GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
@@ -1445,7 +1474,7 @@ get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
 bool
 GOMP_OFFLOAD_can_run (void *fn_ptr)
 {
-  struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
+  struct function_info *kernel = (struct function_info *) fn_ptr;
   struct agent_info *agent = kernel->agent;
   create_and_finalize_hsa_program (agent);
 
@@ -1474,10 +1503,10 @@ packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
 }
 
 /* Run KERNEL on its agent, pass VARS to it as arguments and take
-   launchattributes from KLA.  */
+   launch attributes from KLA.  */
 
 void
-run_kernel (struct kernel_info *kernel, void *vars,
+run_kernel (struct function_info *kernel, void *vars,
 	    struct GOMP_kernel_launch_attributes *kla)
 {
   struct agent_info *agent = kernel->agent;
@@ -1608,13 +1637,13 @@ run_kernel (struct kernel_info *kernel, void *vars,
 /* Part of the libgomp plugin interface.  Run a kernel on device N (the number
    is actually ignored, we assume the FN_PTR has been mapped using the correct
    device) and pass it an array of pointers in VARS as a parameter.  The kernel
-   is identified by FN_PTR which must point to a kernel_info structure.  */
+   is identified by FN_PTR which must point to a function_info structure.  */
 
 void
 GOMP_OFFLOAD_run (int n __attribute__((unused)),
 		  void *fn_ptr, void *vars, void **args)
 {
-  struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
+  struct function_info *kernel = (struct function_info *) fn_ptr;
   struct GOMP_kernel_launch_attributes def;
   struct GOMP_kernel_launch_attributes *kla;
   if (!parse_target_attributes (args, &def, &kla))
@@ -1690,8 +1719,8 @@ static bool
 destroy_module (struct module_info *module)
 {
   int i;
-  for (i = 0; i < module->kernel_count; i++)
-    if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
+  for (i = 0; i < module->function_count; i++)
+    if (pthread_mutex_destroy (&module->functions[i].init_mutex))
       {
 	GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
 			   "mutex");
-- 
GitLab


From 42d9393041d3f30a12d1ab090e6916d13bd86157 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pekka=20J=C3=A4=C3=A4skel=C3=A4inen?=
 <pekka.jaaskelainen@parmance.com>
Date: Fri, 27 Oct 2017 11:38:13 +0300
Subject: [PATCH 2/4] Fix libgomp issues found by running in Carrizo.

---
 gcc/hsa-gen.c               | 3 ---
 libgomp/plugin/plugin-hsa.c | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 9d681a8bef3..d93eb756cb9 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -959,9 +959,6 @@ hsa_get_host_function (tree decl)
   gcc_assert (s->m_kind != HSA_NONE);
   gcc_assert (s->m_hsa_implementation_p);
 
-  if (s->m_kind == HSA_KERNEL)
-    return NULL;
-
   return s->m_bound_function ? s->m_bound_function->decl : NULL;
 }
 
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index fc160b37164..d7aef62e804 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -896,7 +896,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
   int host_mapped_function_count = image_desc->function_count;
   int hsa_only_function_count;
 
-  if (version == GOMP_VERSION_HSA)
+  if (GOMP_VERSION_DEV (version) == GOMP_VERSION_HSA)
     hsa_only_function_count = image_desc->hsa_only_function_count;
   else
     hsa_only_function_count = 0;
-- 
GitLab


From af94eed932afaeb2f2495949104ca03d67c4b867 Mon Sep 17 00:00:00 2001
From: Martin Jambor <mjambor@suse.cz>
Date: Fri, 27 Oct 2017 16:59:29 +0300
Subject: [PATCH 3/4] [hsa] Add missing guard in OMP gridification

---
 gcc/omp-grid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-grid.c b/gcc/omp-grid.c
index a7b6f60aeaf..31beb144b3b 100644
--- a/gcc/omp-grid.c
+++ b/gcc/omp-grid.c
@@ -1315,6 +1315,7 @@ grid_attempt_target_gridification (gomp_target *target,
       n1 = fold_convert (itype, n1);
       n2 = fold_convert (itype, n2);
 
+      tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
       tree step
 	= omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
 
@@ -1327,7 +1328,7 @@ grid_attempt_target_gridification (gomp_target *target,
 			 fold_build1 (NEGATE_EXPR, itype, t),
 			 fold_build1 (NEGATE_EXPR, itype, step));
       else
-	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+	t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
       if (grid.tiling)
 	{
 	  if (cond_code == GT_EXPR)
-- 
GitLab


From 7d4434c43c720800b2bdb1e74202def02b22d38e Mon Sep 17 00:00:00 2001
From: Martin Jambor <mjambor@suse.cz>
Date: Fri, 27 Oct 2017 16:59:29 +0300
Subject: [PATCH 4/4] [hsa] Add missing guard in OMP gridification

---
 gcc/omp-grid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-grid.c b/gcc/omp-grid.c
index a7b6f60aeaf..31beb144b3b 100644
--- a/gcc/omp-grid.c
+++ b/gcc/omp-grid.c
@@ -1315,6 +1315,7 @@ grid_attempt_target_gridification (gomp_target *target,
       n1 = fold_convert (itype, n1);
       n2 = fold_convert (itype, n2);
 
+      tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
       tree step
 	= omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
 
@@ -1327,7 +1328,7 @@ grid_attempt_target_gridification (gomp_target *target,
 			 fold_build1 (NEGATE_EXPR, itype, t),
 			 fold_build1 (NEGATE_EXPR, itype, step));
       else
-	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+	t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
       if (grid.tiling)
 	{
 	  if (cond_code == GT_EXPR)
-- 
GitLab