vx_ext_amd.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. /*
  2. Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
  3. Permission is hereby granted, free of charge, to any person obtaining a copy
  4. of this software and associated documentation files (the "Software"), to deal
  5. in the Software without restriction, including without limitation the rights
  6. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. copies of the Software, and to permit persons to whom the Software is
  8. furnished to do so, subject to the following conditions:
  9. The above copyright notice and this permission notice shall be included in
  10. all copies or substantial portions of the Software.
  11. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  12. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  13. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  14. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  15. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  16. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  17. THE SOFTWARE.
  18. */
  19. #ifndef _VX_EXT_AMD_H_
  20. #define _VX_EXT_AMD_H_
  21. #include <VX/vx.h>
  22. #include <VX/vx_khr_nn.h>
  23. #ifdef __cplusplus
  24. #include <string>
  25. #include <vector>
  26. #include <map>
  27. #endif
  28. /*! \brief AMD internal parameters. [TODO: This needs to be moved to ago_internal.h]
  29. */
  30. #define AGO_MAX_PARAMS 8
  31. #define AGO_MERGE_RULE_MAX_FIND 4
  32. #define AGO_MERGE_RULE_MAX_REPLACE 4
  33. #define AGO_MERGE_RULE_SOLITARY_FLAG 0x20
  34. /*! \brief Maximum size of scalar string buffer. The local buffers used for accessing scalar strings
  35. * should be of size VX_MAX_STRING_BUFFER_SIZE_AMD and the maximum allowed string length is
  36. * VX_MAX_STRING_BUFFER_SIZE_AMD-1.
  37. * \ingroup group_scalar
  38. */
  39. #define VX_MAX_STRING_BUFFER_SIZE_AMD 128
  40. /*! \brief The Neural Network activation functions vx_nn_activation_function_e extension.
  41. */
  42. #define VX_NN_ACTIVATION_LEAKY_RELU (VX_ENUM_BASE(VX_ID_AMD, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x9)
  43. /*! \brief The type enumeration lists all the AMD specific types in OpenVX.
  44. */
  45. enum ago_type_public_e {
  46. /*! \brief AMD data types
  47. */
  48. VX_TYPE_FLOAT16 = 0x00F, // 16-bit float data type
  49. VX_TYPE_STRING_AMD = 0x011, // scalar data type for string
  50. };
  51. /*! \brief The AMD context attributes list.
  52. */
  53. enum vx_context_attribute_amd_e {
  54. /*! \brief set a text macro definition. Use a <tt>\ref AgoContextMacroInfo</tt> parameter.*/
  55. VX_CONTEXT_ATTRIBUTE_AMD_SET_TEXT_MACRO = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_CONTEXT) + 0x03,
  56. /*! \brief set a merge rule. Use a <tt>\ref AgoNodeMergeRule</tt> parameter.*/
  57. VX_CONTEXT_ATTRIBUTE_AMD_SET_MERGE_RULE = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_CONTEXT) + 0x04,
  58. /*! \brief tensor Data max num of dimensions supported by HW. */
  59. VX_CONTEXT_MAX_TENSOR_DIMENSIONS = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_CONTEXT) + 0x05,
  60. };
  61. /*! \brief The AMD kernel attributes list.
  62. */
  63. enum vx_kernel_attribute_amd_e {
  64. /*! \brief kernel callback for node regeneration. Use a <tt>\ref amd_kernel_node_regen_callback_f</tt> parameter.*/
  65. VX_KERNEL_ATTRIBUTE_AMD_NODE_REGEN_CALLBACK = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_KERNEL) + 0x03,
  66. };
  67. /*! \brief The AMD graph attributes list.
  68. */
  69. enum vx_graph_attribute_amd_e {
  70. /*! \brief graph optimizer flags. Use a <tt>\ref vx_uint32</tt> parameter.*/
  71. VX_GRAPH_ATTRIBUTE_AMD_OPTIMIZER_FLAGS = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_GRAPH) + 0x04,
  72. };
  73. /*! \brief tensor Data attributes.
  74. * \ingroup group_tensor
  75. */
  76. enum vx_tensor_attribute_amd_e {
  77. VX_TENSOR_MEMORY_TYPE = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_TENSOR) + 0x8,
  78. VX_TENSOR_MEMORY_HANDLE = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_TENSOR) + 0x9,
  79. VX_TENSOR_MEMORY_PHYS_ADDR = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_TENSOR) + 0xa,
  80. VX_TENSOR_MEMORY_VIRT_ADDR = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_TENSOR) + 0xb,
  81. VX_TENSOR_MEMORY_SIZE = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_TENSOR) + 0xc,
  82. VX_TENSOR_FIXED_POINT_POSITION = VX_ATTRIBUTE_BASE(VX_ID_AMD, VX_TYPE_TENSOR) + 0xd,
  83. };
  84. /*! \brief The image color space list used by the <tt>\ref VX_IMAGE_SPACE</tt> attribute of a <tt>\ref vx_image</tt>.
  85. * \ingroup group_image
  86. */
  87. enum vx_color_space_amd_e {
  88. /*! \brief Use to indicate that the BT.2020 coefficients are used for conversions. */
  89. VX_COLOR_SPACE_BT2020 = VX_ENUM_BASE(VX_ID_AMD, VX_ENUM_COLOR_SPACE) + 0x1,
  90. };
  91. enum vx_action_vpu_e {
  92. VX_ACTION_VPU_RSZ = VX_ENUM_BASE(VX_ID_AMD, VX_ENUM_ACTION) + 0x10,
  93. VX_ACTION_VPU_MBG = VX_ENUM_BASE(VX_ID_AMD, VX_ENUM_ACTION) + 0x11,
  94. };
  95. /*! \brief The multidimensional data object (Tensor).
  96. * \see vxCreateTensor
  97. * \ingroup group_tensor
  98. * \extends vx_reference
  99. */
  100. typedef struct _vx_tensor_t * vx_tensor;
  101. /*! \brief AMD data structure to set a text macro.
  102. */
  103. typedef struct {
  104. vx_char macroName[256];
  105. vx_char * text;
  106. } AgoContextTextMacroInfo;
  107. /*! \brief AMD data structure to import a graph from a text.
  108. ** text:
  109. ** "macro <macro-name>" to use a pre-defined macro
  110. ** "file <file-name>" to load from a file
  111. ** otherwise use the text as is
  112. */
  113. typedef struct {
  114. vx_char * text;
  115. vx_uint32 num_ref;
  116. vx_reference * ref;
  117. vx_int32 dumpToConsole;
  118. void (VX_CALLBACK * data_registry_callback_f) (void * obj, vx_reference ref, const char * name, const char * app_params);
  119. void * data_registry_callback_obj;
  120. } AgoGraphImportInfo;
  121. /*! \brief AMD data structure to export a graph to a text.
  122. */
  123. typedef struct {
  124. vx_char fileName[256];
  125. vx_uint32 num_ref;
  126. vx_reference * ref;
  127. vx_char comment[64];
  128. } AgoGraphExportInfo;
  129. /*! \brief AMD data structure to get internal performance data.
  130. */
  131. typedef struct {
  132. vx_uint64 kernel_enqueue;
  133. vx_uint64 kernel_wait;
  134. vx_uint64 buffer_read;
  135. vx_uint64 buffer_write;
  136. } AgoGraphPerfInternalInfo;
  137. /*! \brief AMD data structure to specify node merge rule.
  138. */
  139. typedef struct AgoNodeMergeRule_t {
  140. struct {
  141. vx_enum kernel_id;
  142. vx_uint32 arg_spec[AGO_MAX_PARAMS];
  143. } find[AGO_MERGE_RULE_MAX_FIND];
  144. struct {
  145. vx_enum kernel_id;
  146. vx_uint32 arg_spec[AGO_MAX_PARAMS];
  147. } replace[AGO_MERGE_RULE_MAX_REPLACE];
  148. } AgoNodeMergeRule;
  149. #ifdef __cplusplus
  150. /*! \brief AMD usernode callback for regenerating a node.
  151. */
  152. typedef vx_status(VX_CALLBACK * amd_drama_add_node_f)(vx_node node, vx_enum kernel_id, vx_reference * paramList, vx_uint32 paramCount);
  153. typedef vx_status(VX_CALLBACK * amd_kernel_node_regen_callback_f)(vx_node node, amd_drama_add_node_f add_node_f, vx_bool& replace_original);
  154. #endif
  155. #ifdef __cplusplus
  156. extern "C" {
  157. #endif
  158. /**
  159. * \brief Retrieve the name of a reference
  160. * \ingroup vx_framework_reference
  161. *
  162. * This function is used to retrieve the name of a reference.
  163. *
  164. * \param [in] ref The reference.
  165. * \param [out] name Pointer to copy the name of the reference.
  166. * \param [in] size Size of the name buffer.
  167. * \return A \ref vx_status_e enumeration.
  168. * \retval VX_SUCCESS No errors.
  169. * \retval VX_ERROR_INVALID_REFERENCE if reference is not valid.
  170. */
  171. VX_API_ENTRY vx_status VX_API_CALL vxGetReferenceName(vx_reference ref, vx_char name[], vx_size size);
  172. /* Tensor */
  173. VX_API_ENTRY vx_tensor VX_API_CALL vxCreateGraphTensorFromHandle(vx_graph graph, vx_size number_of_dims, const vx_size* dims, vx_enum data_type, vx_int8 fixed_point_position, const vx_size* stride, void* ptr, vx_enum memory_type);
  174. /* extend for nna device */
  175. VX_API_ENTRY vx_graph VX_API_CALL vxGetScopeGraph(vx_reference reference);
  176. VX_API_ENTRY vx_bool VX_API_CALL vxGraphProfEnable(vx_graph graph);
  177. VX_API_ENTRY vx_size VX_API_CALL vxAllocateDeviceMemory(vx_context context, vx_size size);
  178. VX_API_ENTRY vx_status VX_API_CALL vxFreeDeviceMemory(vx_context context, vx_size handle);
  179. VX_API_ENTRY vx_size VX_API_CALL vxGetDeviceMemorySize(vx_size handle);
  180. VX_API_ENTRY vx_size VX_API_CALL vxGetDeviceMemoryPhysicalAddress(vx_size handle);
  181. VX_API_ENTRY vx_size VX_API_CALL vxGetDeviceMemoryVirtualAddress(vx_size handle);
  182. VX_API_ENTRY vx_status VX_API_CALL vxInvalidateDeviceMemoryCache(vx_context context, vx_size handle);
  183. VX_API_ENTRY vx_status VX_API_CALL vxInvalidateDeviceMemoryCacheAll(vx_context context);
  184. VX_API_ENTRY vx_status VX_API_CALL vxFlushDeviceMemoryCache(vx_context context, vx_size handle);
  185. VX_API_ENTRY vx_status VX_API_CALL vxFlushDeviceMemoryCacheAll(vx_context context);
  186. VX_API_ENTRY vx_status VX_API_CALL vxDeviceMemoryInfo(vx_context context, std::vector<vx_size>& mem_size, std::vector<vx_size>& virt_addr, std::vector<vx_size>& phys_addr);
  187. VX_API_ENTRY vx_status VX_API_CALL vxConfigGraphCommandMemory(vx_graph graph, vx_size handle, vx_bool dynamic_params);
  188. VX_API_ENTRY vx_status VX_API_CALL vxConfigGraphMemory(vx_graph graph, vx_size wt_handle, bool on_chip_mem, vx_size io_handle_size, vx_size io_handle);
  189. VX_API_ENTRY vx_int32 VX_API_CALL vxGetGraphInOutMemorySize(vx_graph graph);
  190. VX_API_ENTRY vx_status VX_API_CALL vxConfigInOutSharedMemory(vx_context context, vx_size handle);
  191. VX_API_ENTRY vx_status VX_API_CALL vxLockInOutSharedMemory(vx_graph graph);
  192. VX_API_ENTRY vx_status VX_API_CALL vxUnLockInOutSharedMemory(vx_graph graph);
  193. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphCommandParams(vx_graph graph, std::vector<int>& cmd_offset, std::vector<int>& cmd_length);
  194. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphCommandPacketParams(vx_graph graph, int cmd_blk_num, int first_cmd_len, int offset, int length, bool on_chip_mem_enable, bool next_soft_kernel);
  195. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphLayerBlockCommandParams(vx_graph graph, std::vector<int>& block_cmd_first, std::vector<int>& block_cmd_count, std::vector<int>& layers_of_block);
  196. VX_API_ENTRY vx_int32 VX_API_CALL vxGetGraphLayerBlockCount(vx_graph graph);
  197. VX_API_ENTRY vx_status VX_API_CALL vxQueueCommand(vx_context context, vx_graph graph);
  198. VX_API_ENTRY vx_status VX_API_CALL vxProcessCommand(vx_context context, vx_graph graph);
  199. VX_API_ENTRY vx_status VX_API_CALL vxPushGraphSoftBlockPerf(vx_graph graph, int time_eval);
  200. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphBlockType(vx_graph graph, int node_type, int block_idx);
  201. VX_API_ENTRY vx_size VX_API_CALL vxCreateHandleFromDeviceMemory(vx_context context, vx_size offset, vx_size length, vx_size dev_mem_handle);
  202. VX_API_ENTRY vx_status VX_API_CALL vxReleaseDeviceMemoryHandle(vx_context context, vx_size dev_mem_handle);
  203. VX_API_ENTRY vx_status VX_API_CALL vxVerifyDeviceMemoryHandle(vx_context context, vx_size dev_mem_handle);
  204. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphInputTensor(vx_graph graph, int batch_idx, std::string name, std::vector<int>& dims, std::vector<int>& channels, int align_mode, vx_tensor tensor);
  205. VX_API_ENTRY vx_status VX_API_CALL vxGetGraphInputBlob(vx_graph graph, std::map<std::string, std::vector<std::vector<void*>>>& input_blobs);
  206. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphOutputTensor(vx_graph graph, int batch_idx, bool final_output, std::string name, std::vector<int> & dims, std::vector<int>& channels, int align_mode, vx_tensor tensor);
  207. VX_API_ENTRY vx_status VX_API_CALL vxGetGraphOutputBlob(vx_graph graph, std::map<std::string, std::vector<std::vector<void*>>>& output_blobs);
  208. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphInputBlobPosition(vx_graph graph, std::map<std::string, std::vector<std::pair<int, int>>>& input_data_pos, std::map<std::string, std::vector<std::vector<std::tuple<int, int, int>>>>& input_cmd_pos);
  209. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphInputHandle(vx_graph graph, std::string& name, vx_size handle, int batch_idx);
  210. VX_API_ENTRY vx_status VX_API_CALL vxResetGraphInputHandle(vx_graph graph, std::string& name, int batch_idx);
  211. VX_API_ENTRY vx_status VX_API_CALL vxSetGraphQuantizationParam(vx_graph graph, std::string& name, bool qen);
  212. VX_API_ENTRY vx_uint64 VX_API_CALL vxGetSysTime();
  213. VX_API_ENTRY vx_int32 VX_API_CALL vxGetDeviceID(vx_context context, const vx_char* dev_name);
  214. VX_API_ENTRY vx_int32 VX_API_CALL vxGetDeviceVersion(vx_context context);
  215. VX_API_ENTRY vx_status VX_API_CALL vxSetDeviceFreq(vx_context context, const vx_char* dev_name, int freq);
  216. VX_API_ENTRY vx_status VX_API_CALL vxGetDeviceFreq(vx_context context, const vx_char* dev_name, int* freq);
  217. VX_API_ENTRY vx_status VX_API_CALL vxSetDeviceTimeOut(vx_context context, const vx_char * dev_name, int timeout);
  218. VX_API_ENTRY vx_status VX_API_CALL vxGetDeviceTimeOut(vx_context context, const vx_char * dev_name, int * timeout);
  219. VX_API_ENTRY vx_status VX_API_CALL vxNpuDeviceRun(vx_context context, int cmd_blk_num, int first_cmd_len, int warp_in_align64, vx_size internal_phys_base_addr, vx_size input_phys_base_addr, vx_size output_phys_base_addr, vx_size size_of_cmd, vx_uint32* cmd);
  220. VX_API_ENTRY vx_status VX_API_CALL vxVpuDeviceRun(vx_context context, vx_enum vpu_op, void* cfgs, vx_size size_of_cfgs);
  221. VX_API_ENTRY vx_status VX_API_CALL vxProfileGraph(vx_graph graph);
  222. VX_API_ENTRY vx_status VX_API_CALL vxExecuteCommand(vx_context context);
  223. VX_API_ENTRY vx_status VX_API_CALL vxFinish(vx_context context);
  224. VX_API_ENTRY vx_status VX_API_CALL vxFlush(vx_context context);
  225. VX_API_ENTRY vx_status VX_API_CALL vxWaitSync(vx_context context, vx_graph graph);
  226. VX_API_ENTRY vx_status VX_API_CALL vxLockContext(vx_context context);// internal use.
  227. VX_API_ENTRY vx_status VX_API_CALL vxUnLockContext(vx_context context);// internal use.
  228. #ifdef __cplusplus
  229. }
  230. #endif
  231. #endif