aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/common/habanalabs.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/habanalabs.h')
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h219
1 files changed, 164 insertions, 55 deletions
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 018d9d67e8e6..6ed974d2def0 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -8,21 +8,33 @@
#ifndef HABANALABSP_H_
#define HABANALABSP_H_
-#include "../include/common/armcp_if.h"
+#include "../include/common/cpucp_if.h"
#include "../include/common/qman_if.h"
#include <uapi/misc/habanalabs.h>
#include <linux/cdev.h>
#include <linux/iopoll.h>
#include <linux/irqreturn.h>
-#include <linux/dma-fence.h>
#include <linux/dma-direction.h>
#include <linux/scatterlist.h>
#include <linux/hashtable.h>
+#include <linux/bitfield.h>
#define HL_NAME "habanalabs"
-#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT)
+/* Use upper bits of mmap offset to store habana driver specific information.
+ * bits[63:62] - Encode mmap type
+ * bits[45:0] - mmap offset value
+ *
+ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
+ * defines are w.r.t to PAGE_SIZE
+ */
+#define HL_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK (0x3ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT)
+
+#define HL_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFull >> PAGE_SHIFT)
+#define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK)
#define HL_PENDING_RESET_PER_SEC 30
@@ -34,8 +46,8 @@
#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */
-#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
-#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
@@ -66,6 +78,8 @@
#define HL_PCI_NUM_BARS 6
+#define HL_MAX_DCORES 4
+
/**
* struct pgt_info - MMU hop page info.
* @node: hash linked-list node for the pgts shadow hash of pgts.
@@ -222,12 +236,15 @@ enum hl_device_hw_state {
* @hop2_shift: shift of hop 2 mask.
* @hop3_shift: shift of hop 3 mask.
* @hop4_shift: shift of hop 4 mask.
+ * @hop5_shift: shift of hop 5 mask.
* @hop0_mask: mask to get the PTE address in hop 0.
* @hop1_mask: mask to get the PTE address in hop 1.
* @hop2_mask: mask to get the PTE address in hop 2.
* @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4.
+ * @hop5_mask: mask to get the PTE address in hop 5.
* @page_size: default page size used to allocate memory.
+ * @num_hops: The amount of hops supported by the translation table.
*/
struct hl_mmu_properties {
u64 start_addr;
@@ -237,18 +254,21 @@ struct hl_mmu_properties {
u64 hop2_shift;
u64 hop3_shift;
u64 hop4_shift;
+ u64 hop5_shift;
u64 hop0_mask;
u64 hop1_mask;
u64 hop2_mask;
u64 hop3_mask;
u64 hop4_mask;
+ u64 hop5_mask;
u32 page_size;
+ u32 num_hops;
};
/**
* struct asic_fixed_properties - ASIC specific immutable properties.
* @hw_queues_props: H/W queues properties.
- * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
+ * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g.
* available sensors.
* @uboot_ver: F/W U-boot version.
* @preboot_ver: F/W Preboot version.
@@ -271,6 +291,10 @@ struct hl_mmu_properties {
* @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
* @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
* @mmu_dram_default_page_addr: DRAM default page physical address.
+ * @cb_va_start_addr: virtual start address of command buffers which are mapped
+ * to the device's MMU.
+ * @cb_va_end_addr: virtual end address of command buffers which are mapped to
+ * the device's MMU.
* @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size.
@@ -292,12 +316,16 @@ struct hl_mmu_properties {
* @max_queues: maximum amount of queues in the system
* @sync_stream_first_sob: first sync object available for sync stream use
* @sync_stream_first_mon: first monitor available for sync stream use
+ * @first_available_user_sob: first sob available for the user
+ * @first_available_user_mon: first monitor available for the user
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
+ * @fw_security_disabled: true if security measures are disabled in firmware,
+ * false otherwise
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
- struct armcp_info armcp_info;
+ struct cpucp_info cpucp_info;
char uboot_ver[VERSION_MAX_LEN];
char preboot_ver[VERSION_MAX_LEN];
struct hl_mmu_properties dmmu;
@@ -317,6 +345,8 @@ struct asic_fixed_properties {
u64 pcie_aux_dbi_reg_addr;
u64 mmu_pgt_addr;
u64 mmu_dram_default_page_addr;
+ u64 cb_va_start_addr;
+ u64 cb_va_end_addr;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
@@ -338,13 +368,29 @@ struct asic_fixed_properties {
u32 max_queues;
u16 sync_stream_first_sob;
u16 sync_stream_first_mon;
+ u16 first_available_user_sob[HL_MAX_DCORES];
+ u16 first_available_user_mon[HL_MAX_DCORES];
u8 tpc_enabled_mask;
u8 completion_queues_count;
+ u8 fw_security_disabled;
+};
+
+/**
+ * struct hl_fence - software synchronization primitive
+ * @completion: fence is implemented using completion
+ * @refcount: refcount for this fence
+ * @error: mark this fence with error
+ *
+ */
+struct hl_fence {
+ struct completion completion;
+ struct kref refcount;
+ int error;
};
/**
* struct hl_cs_compl - command submission completion object.
- * @base_fence: kernel fence object.
+ * @base_fence: hl fence object.
* @lock: spinlock to protect fence.
* @hdev: habanalabs device structure.
* @hw_sob: the H/W SOB used in this signal/wait CS.
@@ -353,7 +399,7 @@ struct asic_fixed_properties {
* @sob_val: the SOB value that is used in this signal/wait CS.
*/
struct hl_cs_compl {
- struct dma_fence base_fence;
+ struct hl_fence base_fence;
spinlock_t lock;
struct hl_device *hdev;
struct hl_hw_sob *hw_sob;
@@ -380,36 +426,41 @@ struct hl_cb_mgr {
* struct hl_cb - describes a Command Buffer.
* @refcount: reference counter for usage of the CB.
* @hdev: pointer to device this CB belongs to.
+ * @ctx: pointer to the CB owner's context.
* @lock: spinlock to protect mmap/cs flows.
* @debugfs_list: node in debugfs list of command buffers.
* @pool_list: node in pool list of command buffers.
+ * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
+ * the device's MMU.
+ * @id: the CB's ID.
* @kernel_address: Holds the CB's kernel virtual address.
* @bus_address: Holds the CB's DMA address.
* @mmap_size: Holds the CB's size that was mmaped.
* @size: holds the CB's size.
- * @id: the CB's ID.
* @cs_cnt: holds number of CS that this CB participates in.
- * @ctx_id: holds the ID of the owner's context.
* @mmap: true if the CB is currently mmaped to user.
* @is_pool: true if CB was acquired from the pool, false otherwise.
* @is_internal: internaly allocated
+ * @is_mmu_mapped: true if the CB is mapped to the device's MMU.
*/
struct hl_cb {
struct kref refcount;
struct hl_device *hdev;
+ struct hl_ctx *ctx;
spinlock_t lock;
struct list_head debugfs_list;
struct list_head pool_list;
- u64 kernel_address;
+ struct list_head va_block_list;
+ u64 id;
+ void *kernel_address;
dma_addr_t bus_address;
u32 mmap_size;
u32 size;
- u32 id;
u32 cs_cnt;
- u32 ctx_id;
u8 mmap;
u8 is_pool;
u8 is_internal;
+ u8 is_mmu_mapped;
};
@@ -435,7 +486,7 @@ struct hl_cs_job;
#define HL_EQ_LENGTH 64
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
-/* Host <-> ArmCP shared memory size */
+/* Host <-> CPU-CP shared memory size */
#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
/**
@@ -464,7 +515,7 @@ struct hl_hw_queue {
struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
struct hl_cs_job **shadow_queue;
enum hl_queue_type queue_type;
- u64 kernel_address;
+ void *kernel_address;
dma_addr_t bus_address;
u32 pi;
atomic_t ci;
@@ -493,7 +544,7 @@ struct hl_hw_queue {
*/
struct hl_cq {
struct hl_device *hdev;
- u64 kernel_address;
+ void *kernel_address;
dma_addr_t bus_address;
u32 cq_idx;
u32 hw_queue_id;
@@ -511,7 +562,7 @@ struct hl_cq {
*/
struct hl_eq {
struct hl_device *hdev;
- u64 kernel_address;
+ void *kernel_address;
dma_addr_t bus_address;
u32 ci;
};
@@ -617,7 +668,7 @@ enum div_select_defs {
* @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
* @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
* @add_device_attr: add ASIC specific device attributes.
- * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
+ * @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
* @set_pll_profile: change PLL profile (manual/automatic).
* @get_events_stat: retrieve event queue entries histogram.
* @read_pte: read MMU page table entry from DRAM.
@@ -626,7 +677,7 @@ enum div_select_defs {
* (L1 only) or hard (L0 & L1) flush.
* @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
* ASID-VA-size mask.
- * @send_heartbeat: send is-alive packet to ArmCP and verify response.
+ * @send_heartbeat: send is-alive packet to CPU-CP and verify response.
* @set_clock_gating: enable/disable clock gating per engine according to
* clock gating mask in hdev
* @disable_clock_gating: disable clock gating completely
@@ -644,8 +695,6 @@ enum div_select_defs {
* ASIC
* @get_hw_state: retrieve the H/W state
* @pci_bars_map: Map PCI BARs.
- * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
- * old address the bar pointed to or U64_MAX for failure
* @init_iatu: Initialize the iATU unit inside the PCI controller.
* @rreg: Read a register. Needed for simulator support.
* @wreg: Write a register. Needed for simulator support.
@@ -679,7 +728,7 @@ struct hl_asic_funcs {
int (*suspend)(struct hl_device *hdev);
int (*resume)(struct hl_device *hdev);
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
- u64 kaddress, phys_addr_t paddress, u32 size);
+ void *cpu_addr, dma_addr_t dma_addr, size_t size);
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
struct hl_bd *bd);
@@ -708,7 +757,7 @@ struct hl_asic_funcs {
u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
struct sg_table *sgt);
void (*add_end_of_cb_packets)(struct hl_device *hdev,
- u64 kernel_address, u32 len,
+ void *kernel_address, u32 len,
u64 cq_addr, u32 cq_val, u32 msix_num,
bool eb);
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
@@ -736,7 +785,7 @@ struct hl_asic_funcs {
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, void *data);
- bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
+ bool (*is_device_idle)(struct hl_device *hdev, u64 *mask,
struct seq_file *s);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
@@ -748,7 +797,6 @@ struct hl_asic_funcs {
u16 len, u32 timeout, long *result);
enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
int (*pci_bars_map)(struct hl_device *hdev);
- u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
int (*init_iatu)(struct hl_device *hdev);
u32 (*rreg)(struct hl_device *hdev, u32 reg);
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
@@ -800,7 +848,7 @@ struct hl_va_range {
* @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
- * @cs_pending: array of DMA fence objects representing pending CS.
+ * @cs_pending: array of hl fence objects representing pending CS.
* @host_va_range: holds available virtual addresses for host mappings.
* @host_huge_va_range: holds available virtual addresses for host mappings
* with huge pages.
@@ -809,6 +857,8 @@ struct hl_va_range {
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
* MMU hash or walking the PGT requires talking this lock.
* @debugfs_list: node in debugfs list of contexts.
+ * @cb_va_pool: device VA pool for command buffers which are mapped to the
+ * device's MMU.
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
* to user so user could inquire about CS. It is used as
* index to cs_pending array.
@@ -832,7 +882,7 @@ struct hl_ctx {
struct hl_fpriv *hpriv;
struct hl_device *hdev;
struct kref refcount;
- struct dma_fence **cs_pending;
+ struct hl_fence **cs_pending;
struct hl_va_range *host_va_range;
struct hl_va_range *host_huge_va_range;
struct hl_va_range *dram_va_range;
@@ -840,6 +890,7 @@ struct hl_ctx {
struct mutex mmu_lock;
struct list_head debugfs_list;
struct hl_cs_counters cs_counters;
+ struct gen_pool *cb_va_pool;
u64 cs_sequence;
u64 *dram_default_hops;
spinlock_t cs_lock;
@@ -919,8 +970,8 @@ struct hl_cs {
struct list_head job_list;
spinlock_t job_lock;
struct kref refcount;
- struct dma_fence *fence;
- struct dma_fence *signal_fence;
+ struct hl_fence *fence;
+ struct hl_fence *signal_fence;
struct work_struct finish_work;
struct delayed_work work_tdr;
struct list_head mirror_node;
@@ -1331,13 +1382,13 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
for (;;) { \
/* Verify we read updates done by other cores or by device */ \
mb(); \
- (val) = *((u32 *) (uintptr_t) (addr)); \
+ (val) = *((u32 *)(addr)); \
if (mem_written_by_device) \
(val) = le32_to_cpu(*(__le32 *) &(val)); \
if (cond) \
break; \
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
- (val) = *((u32 *) (uintptr_t) (addr)); \
+ (val) = *((u32 *)(addr)); \
if (mem_written_by_device) \
(val) = le32_to_cpu(*(__le32 *) &(val)); \
break; \
@@ -1395,6 +1446,44 @@ struct hl_device_idle_busy_ts {
ktime_t busy_to_idle_ts;
};
+
+/**
+ * struct hl_mmu_priv - used for holding per-device mmu internal information.
+ * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
+ * @mmu_shadow_hop0: shadow array of hop0 tables.
+ */
+struct hl_mmu_priv {
+ struct gen_pool *mmu_pgt_pool;
+ void *mmu_shadow_hop0;
+};
+
+/**
+ * struct hl_mmu_funcs - Device related MMU functions.
+ * @init: initialize the MMU module.
+ * @fini: release the MMU module.
+ * @ctx_init: Initialize a context for using the MMU module.
+ * @ctx_fini: disable a ctx from using the mmu module.
+ * @map: maps a virtual address to physical address for a context.
+ * @unmap: unmap a virtual address of a context.
+ * @flush: flush all writes from all cores to reach device MMU.
+ * @swap_out: marks all mapping of the given context as swapped out.
+ * @swap_in: marks all mapping of the given context as swapped in.
+ */
+struct hl_mmu_funcs {
+ int (*init)(struct hl_device *hdev);
+ void (*fini)(struct hl_device *hdev);
+ int (*ctx_init)(struct hl_ctx *ctx);
+ void (*ctx_fini)(struct hl_ctx *ctx);
+ int (*map)(struct hl_ctx *ctx,
+ u64 virt_addr, u64 phys_addr, u32 page_size,
+ bool is_dram_addr);
+ int (*unmap)(struct hl_ctx *ctx,
+ u64 virt_addr, bool is_dram_addr);
+ void (*flush)(struct hl_ctx *ctx);
+ void (*swap_out)(struct hl_ctx *ctx);
+ void (*swap_in)(struct hl_ctx *ctx);
+};
+
/**
* struct hl_device - habanalabs device structure.
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
@@ -1407,8 +1496,8 @@ struct hl_device_idle_busy_ts {
* @dev: related kernel basic device structure.
* @dev_ctrl: related kernel device structure for the control device
* @work_freq: delayed work to lower device frequency if possible.
- * @work_heartbeat: delayed work for ArmCP is-alive check.
- * @asic_name: ASIC specific nmae.
+ * @work_heartbeat: delayed work for CPU-CP is-alive check.
+ * @asic_name: ASIC specific name.
* @asic_type: ASIC specific type.
* @completion_queue: array of hl_cq.
* @cq_wq: work queues of completion queues for executing work in process
@@ -1419,22 +1508,20 @@ struct hl_device_idle_busy_ts {
* @hw_queues_mirror_list: CS mirror list for TDR.
* @hw_queues_mirror_lock: protects hw_queues_mirror_list.
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
- * @event_queue: event queue for IRQ from ArmCP.
+ * @event_queue: event queue for IRQ from CPU-CP.
* @dma_pool: DMA pool for small allocations.
- * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
- * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
- * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
+ * @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address.
+ * @cpu_accessible_dma_address: Host <-> CPU-CP shared memory DMA address.
+ * @cpu_accessible_dma_pool: Host <-> CPU-CP shared memory pool.
* @asid_bitmap: holds used/available ASIDs.
* @asid_mutex: protects asid_bitmap.
- * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
+ * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue.
* @debug_lock: protects critical section of setting debug mode for device
* @asic_prop: ASIC specific immutable properties.
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
- * @mmu_pgt_pool: pool of available MMU hops.
* @vm: virtual memory manager for MMU.
* @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
- * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
* @hwmon_dev: H/W monitor device.
* @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information.
@@ -1452,6 +1539,8 @@ struct hl_device_idle_busy_ts {
* @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
* and vice-versa
* @aggregated_cs_counters: aggregated cs counters among all contexts
+ * @mmu_priv: device-specific MMU data.
+ * @mmu_func: device-related MMU functions.
* @dram_used_mem: current DRAM memory consumption.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
@@ -1462,6 +1551,8 @@ struct hl_device_idle_busy_ts {
* details.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
+ * @card_type: Various ASICs have several card types. This indicates the card
+ * type of the current device.
* @cs_active_cnt: number of active command submissions on this device (active
* means already in H/W queues)
* @major: habanalabs kernel driver major.
@@ -1469,6 +1560,7 @@ struct hl_device_idle_busy_ts {
* @soft_reset_cnt: number of soft reset since the driver was loaded.
* @hard_reset_cnt: number of hard reset since the driver was loaded.
* @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
+ * @clk_throttling_reason: bitmask represents the current clk throttling reasons
* @id: device minor.
* @id_control: minor of the control device
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
@@ -1477,7 +1569,7 @@ struct hl_device_idle_busy_ts {
* @late_init_done: is late init stage was done during initialization.
* @hwmon_initialized: is H/W monitor sensors was initialized.
* @hard_reset_pending: is there a hard reset work pending.
- * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
+ * @heartbeat: is heartbeat sanity check towards CPU-CP enabled.
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
* otherwise.
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
@@ -1499,6 +1591,7 @@ struct hl_device_idle_busy_ts {
* @sync_stream_queue_idx: helper index for sync stream queues initialization.
* @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported.
+ * @supports_cb_mapping: is mapping a CB to the device's MMU supported.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1511,7 +1604,7 @@ struct hl_device {
struct device *dev_ctrl;
struct delayed_work work_freq;
struct delayed_work work_heartbeat;
- char asic_name[16];
+ char asic_name[32];
enum hl_asic_type asic_type;
struct hl_cq *completion_queue;
struct workqueue_struct **cq_wq;
@@ -1533,10 +1626,8 @@ struct hl_device {
struct asic_fixed_properties asic_prop;
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
- struct gen_pool *mmu_pgt_pool;
struct hl_vm vm;
struct mutex mmu_cache_lock;
- void *mmu_shadow_hop0;
struct device *hwmon_dev;
enum hl_pm_mng_profile pm_mng_profile;
struct hwmon_chip_info *hl_chip_info;
@@ -1560,18 +1651,23 @@ struct hl_device {
struct hl_cs_counters aggregated_cs_counters;
+ struct hl_mmu_priv mmu_priv;
+ struct hl_mmu_funcs mmu_func;
+
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
u64 clock_gating_mask;
atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile;
+ enum cpucp_card_types card_type;
int cs_active_cnt;
u32 major;
u32 high_pll;
u32 soft_reset_cnt;
u32 hard_reset_cnt;
u32 idle_busy_ts_idx;
+ u32 clk_throttling_reason;
u16 id;
u16 id_control;
u16 cpu_pci_msb_addr;
@@ -1595,6 +1691,7 @@ struct hl_device {
u8 sync_stream_queue_idx;
u8 supports_coresight;
u8 supports_soft_reset;
+ u8 supports_cb_mapping;
/* Parameters for bring-up */
u8 mmu_enable;
@@ -1651,7 +1748,7 @@ struct hl_ioctl_desc {
*
* Return: true if the area is inside the valid range, false otherwise.
*/
-static inline bool hl_mem_area_inside_range(u64 address, u32 size,
+static inline bool hl_mem_area_inside_range(u64 address, u64 size,
u64 range_start_address, u64 range_end_address)
{
u64 end_address = address + size;
@@ -1736,7 +1833,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
void hl_ctx_do_release(struct kref *ref);
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_put(struct hl_ctx *ctx);
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
@@ -1752,7 +1849,7 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
int hl_build_hwmon_channel_info(struct hl_device *hdev,
- struct armcp_sensor *sensors_arr);
+ struct cpucp_sensor *sensors_arr);
int hl_sysfs_init(struct hl_device *hdev);
void hl_sysfs_fini(struct hl_device *hdev);
@@ -1760,8 +1857,9 @@ void hl_sysfs_fini(struct hl_device *hdev);
int hl_hwmon_init(struct hl_device *hdev);
void hl_hwmon_fini(struct hl_device *hdev);
-int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
- u64 *handle, int ctx_id, bool internal_cb);
+int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+ struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
+ bool map_cb, u64 *handle);
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
@@ -1773,11 +1871,15 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
bool internal_cb);
int hl_cb_pool_init(struct hl_device *hdev);
int hl_cb_pool_fini(struct hl_device *hdev);
+int hl_cb_va_pool_init(struct hl_ctx *ctx);
+void hl_cb_va_pool_fini(struct hl_ctx *ctx);
void hl_cs_rollback_all(struct hl_device *hdev);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
+void hl_fence_put(struct hl_fence *fence);
+void hl_fence_get(struct hl_fence *fence);
void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -1807,6 +1909,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
+int hl_mmu_if_set_funcs(struct hl_device *hdev);
+void hl_mmu_v1_set_funcs(struct hl_device *hdev);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst);
@@ -1822,23 +1926,28 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
int hl_fw_send_heartbeat(struct hl_device *hdev);
-int hl_fw_armcp_info_get(struct hl_device *hdev);
+int hl_fw_cpucp_info_get(struct hl_device *hdev);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
+ struct hl_info_pci_counters *counters);
+int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
+ u64 *total_energy);
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 boot_err0_reg, bool skip_bmc,
u32 cpu_timeout, u32 boot_fit_timeout);
+int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ u32 boot_err0_reg, u32 timeout);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
-int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
- u64 addr);
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
struct hl_inbound_pci_region *pci_region);
int hl_pci_set_outbound_region(struct hl_device *hdev,
struct hl_outbound_pci_region *pci_region);
-int hl_pci_init(struct hl_device *hdev);
+int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ u32 boot_err0_reg, u32 preboot_ver_timeout);
void hl_pci_fini(struct hl_device *hdev);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
@@ -1858,7 +1967,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value);
u64 hl_get_max_power(struct hl_device *hdev);
-void hl_set_max_power(struct hl_device *hdev, u64 value);
+void hl_set_max_power(struct hl_device *hdev);
int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev,