diff options
author | 2020-07-24 20:22:25 +0200 | |
---|---|---|
committer | 2020-07-24 20:22:25 +0200 | |
commit | 860e73b49cd933c708e3e1e1e07cdea81b6acd1c (patch) | |
tree | d79deee0e36096f56cadcb9abb0be9864bac9e7c /include | |
parent | Merge tag 'icc-5.9-rc1' of https://git.linaro.org/people/georgi.djakov/linux into char-misc-next (diff) | |
parent | habanalabs: Fix memory leak in error flow of context initialization (diff) | |
download | wireguard-linux-860e73b49cd933c708e3e1e1e07cdea81b6acd1c.tar.xz wireguard-linux-860e73b49cd933c708e3e1e1e07cdea81b6acd1c.zip |
Merge tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes:
This tag contains the following changes for kernel 5.9-rc1:
- Remove rate limiters from GAUDI configuration (no longer needed).
- Set maximum amount of in-flight CS per ASIC type and increase
the maximum amount for GAUDI.
- Refactor signal/wait command submissions code
- Calculate trace frequency from PLLs to show accurate profiling data
- Rephrase error messages to make them more clear to the common user
- Add statistics of dropped CS (counter per possible reason for drop)
- Get ECC information from firmware
- Remove support for partial SoC reset in Gaudi
- Halt device CPU only when reset is certain to happen. Sometimes we abort
the reset procedure and in that case we can't leave device CPU in halt
mode.
- set each CQ to its own work queue to prevent a race between completions
on different CQs.
- Use queue pi/ci in order to determine queue occupancy. This is done to
make the code reusable between current and future ASICs.
- Add more validations for user inputs.
- Refactor PCIe controller configuration to make the code reusable between
current and future ASICs.
- Update firmware interface headers to latest version
- Move all common code to a dedicated common sub-folder
* tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux: (28 commits)
habanalabs: Fix memory leak in error flow of context initialization
habanalabs: use no flags on MMU cache invalidation
habanalabs: enable device before hw_init()
habanalabs: create internal CB pool
habanalabs: update hl_boot_if.h from firmware
habanalabs: create common folder
habanalabs: check for DMA errors when clearing memory
habanalabs: verify queue can contain all cs jobs
habanalabs: Assign each CQ with its own work queue
habanalabs: halt device CPU only upon certain reset
habanalabs: remove unused hash
habanalabs: use queue pi/ci in order to determine queue occupancy
habanalabs: configure maximum queues per asic
habanalabs: remove soft-reset support from GAUDI
habanalabs: PCIe iATU refactoring
habanalabs: Extract ECC information from FW
habanalabs: Add dropped cs statistics info struct
habanalabs: extract cpu boot status lookup
habanalabs: rephrase error messages
habanalabs: Increase queues depth
...
Diffstat (limited to 'include')
-rw-r--r-- | include/uapi/misc/habanalabs.h | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index f6267a8d7416..d5c4f983b7a8 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -263,6 +263,7 @@ enum hl_device_status { * time the driver was loaded. * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time * for synchronization. + * HL_INFO_CS_COUNTERS - Retrieve command submission counters */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -274,6 +275,7 @@ enum hl_device_status { #define HL_INFO_CLK_RATE 8 #define HL_INFO_RESET_COUNT 9 #define HL_INFO_TIME_SYNC 10 +#define HL_INFO_CS_COUNTERS 11 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -338,6 +340,25 @@ struct hl_info_time_sync { __u64 host_time; }; +/** + * struct hl_info_cs_counters - command submission counters + * @out_of_mem_drop_cnt: dropped due to memory allocation issue + * @parsing_drop_cnt: dropped due to error in packet parsing + * @queue_full_drop_cnt: dropped due to queue full + * @device_in_reset_drop_cnt: dropped due to device in reset + */ +struct hl_cs_counters { + __u64 out_of_mem_drop_cnt; + __u64 parsing_drop_cnt; + __u64 queue_full_drop_cnt; + __u64 device_in_reset_drop_cnt; +}; + +struct hl_info_cs_counters { + struct hl_cs_counters cs_counters; + struct hl_cs_counters ctx_cs_counters; +}; + struct hl_info_args { /* Location of relevant struct in userspace */ __u64 return_pointer; @@ -530,13 +551,13 @@ union hl_wait_cs_args { struct hl_wait_cs_out out; }; -/* Opcode to alloc device memory */ +/* Opcode to allocate device memory */ #define HL_MEM_OP_ALLOC 0 /* Opcode to free previously allocated device memory */ #define HL_MEM_OP_FREE 1 -/* Opcode to map host memory */ +/* Opcode to map host and device memory */ #define HL_MEM_OP_MAP 2 -/* Opcode to unmap previously mapped host memory */ +/* Opcode to unmap previously mapped host and device memory */ #define HL_MEM_OP_UNMAP 3 /* Memory flags */ |