aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x8
-rw-r--r--Documentation/ABI/testing/sysfs-devices-hisi_ptt61
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml6
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml3
-rw-r--r--Documentation/trace/coresight/coresight-cpu-debug.rst3
-rw-r--r--Documentation/trace/coresight/coresight-etm4x-reference.rst14
-rw-r--r--Documentation/trace/hisi-ptt.rst298
-rw-r--r--Documentation/trace/index.rst1
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/arm64/include/asm/sysreg.h1
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/hwtracing/Kconfig2
-rw-r--r--drivers/hwtracing/coresight/Kconfig4
-rw-r--r--drivers/hwtracing/coresight/coresight-catu.c27
-rw-r--r--drivers/hwtracing/coresight/coresight-catu.h8
-rw-r--r--drivers/hwtracing/coresight/coresight-core.c28
-rw-r--r--drivers/hwtracing/coresight/coresight-cti-sysfs.c213
-rw-r--r--drivers/hwtracing/coresight/coresight-etb10.c28
-rw-r--r--drivers/hwtracing/coresight/coresight-etm3x-sysfs.c34
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-sysfs.c29
-rw-r--r--drivers/hwtracing/coresight/coresight-priv.h74
-rw-r--r--drivers/hwtracing/coresight/coresight-replicator.c10
-rw-r--r--drivers/hwtracing/coresight/coresight-stm.c40
-rw-r--r--drivers/hwtracing/coresight/coresight-tmc-core.c48
-rw-r--r--drivers/hwtracing/coresight/coresight-tmc.h4
-rw-r--r--drivers/hwtracing/ptt/Kconfig12
-rw-r--r--drivers/hwtracing/ptt/Makefile2
-rw-r--r--drivers/hwtracing/ptt/hisi_ptt.c1046
-rw-r--r--drivers/hwtracing/ptt/hisi_ptt.h200
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c21
-rw-r--r--include/linux/coresight.h23
41 files changed, 1974 insertions, 313 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
index 8e53a32f8150..08b1964f27d3 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
@@ -516,3 +516,11 @@ Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (Read) Returns the number of special conditional P1 right-hand keys
that the trace unit can use (0x194). The value is taken
directly from the HW.
+
+What: /sys/bus/coresight/devices/etm<N>/ts_source
+Date: October 2022
+KernelVersion: 6.1
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org> or Suzuki K Poulose <suzuki.poulose@arm.com>
+Description: (Read) When FEAT_TRF is implemented, value of TRFCR_ELx.TS used for
+ trace session. Otherwise -1 indicates an unknown time source. Check
+ trcidr0.tssize to see if a global timestamp is available.
diff --git a/Documentation/ABI/testing/sysfs-devices-hisi_ptt b/Documentation/ABI/testing/sysfs-devices-hisi_ptt
new file mode 100644
index 000000000000..82de6d710266
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-hisi_ptt
@@ -0,0 +1,61 @@
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: This directory contains files for tuning the PCIe link
+ parameters(events). Each file is named after the event
+ of the PCIe link.
+
+ See Documentation/trace/hisi-ptt.rst for more information.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_cpl
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx completion TLPs, which influence
+ the proportion of outbound completion TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_np
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx non-posted TLPs, which influence
+ the proportion of outbound non-posted TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_p
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx posted TLPs, which influence the
+ proportion of outbound posted TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/rx_alloc_buf_level
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Control the allocated buffer watermark for inbound packets.
+ The packets will be stored in the buffer first and then transmitted
+ either when the watermark reached or when timed out.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/tx_alloc_buf_level
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Control the allocated buffer watermark of outbound packets.
+ The packets will be stored in the buffer first and then transmitted
+ either when the watermark reached or when timed out.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml
index d783d9276124..2bae06eed693 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-catu.yaml
@@ -61,6 +61,9 @@ properties:
maxItems: 1
description: Address translation error interrupt
+ power-domains:
+ maxItems: 1
+
in-ports:
$ref: /schemas/graph.yaml#/properties/ports
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml
index 72ffe4d1e948..0c5b875cb654 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml
@@ -98,6 +98,9 @@ properties:
base cti node if compatible string arm,coresight-cti-v8-arch is used,
or may appear in a trig-conns child node when appropriate.
+ power-domains:
+ maxItems: 1
+
arm,cti-ctm-id:
$ref: /schemas/types.yaml#/definitions/uint32
description:
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml
index 1eeedc22857c..44a1041cb0fc 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml
@@ -54,6 +54,9 @@ properties:
- const: apb_pclk
- const: atclk
+ power-domains:
+ maxItems: 1
+
in-ports:
$ref: /schemas/graph.yaml#/properties/ports
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml
index a26ed9214e00..03792e9bd97a 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml
@@ -54,6 +54,9 @@ properties:
- const: apb_pclk
- const: atclk
+ power-domains:
+ maxItems: 1
+
qcom,replicator-loses-context:
type: boolean
description:
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml
index fd06ede26ceb..90679788e0bf 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml
@@ -54,6 +54,9 @@ properties:
- const: apb_pclk
- const: atclk
+ power-domains:
+ maxItems: 1
+
in-ports:
$ref: /schemas/graph.yaml#/properties/ports
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml
index e0377ce48537..01200f67504a 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml
@@ -73,6 +73,9 @@ properties:
- const: apb_pclk
- const: atclk
+ power-domains:
+ maxItems: 1
+
arm,coresight-loses-context-with-cpu:
type: boolean
description:
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml
index 374083956b20..cc8c3baa79b4 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml
@@ -27,6 +27,9 @@ properties:
compatible:
const: arm,coresight-static-funnel
+ power-domains:
+ maxItems: 1
+
in-ports:
$ref: /schemas/graph.yaml#/properties/ports
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml
index a34d8583830c..1892a091ac35 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml
@@ -27,6 +27,9 @@ properties:
compatible:
const: arm,coresight-static-replicator
+ power-domains:
+ maxItems: 1
+
in-ports:
$ref: /schemas/graph.yaml#/properties/ports
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml
index 905008faa012..378380c3f5aa 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-stm.yaml
@@ -61,6 +61,9 @@ properties:
- const: apb_pclk
- const: atclk
+ power-domains:
+ maxItems: 1
+
out-ports:
$ref: /schemas/graph.yaml#/properties/ports
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml
index 3463b6e53aef..cb8dceaca70e 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml
@@ -55,6 +55,12 @@ properties:
- const: apb_pclk
- const: atclk
+ iommus:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
arm,buffer-size:
$ref: /schemas/types.yaml#/definitions/uint32
deprecated: true
diff --git a/Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml b/Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml
index e80d48200c37..61a0cdc27745 100644
--- a/Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml
@@ -54,6 +54,9 @@ properties:
- const: apb_pclk
- const: atclk
+ power-domains:
+ maxItems: 1
+
in-ports:
$ref: /schemas/graph.yaml#/properties/ports
additionalProperties: false
diff --git a/Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml b/Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml
index 5f07fb166c56..108460627d9a 100644
--- a/Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml
+++ b/Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml
@@ -33,6 +33,9 @@ properties:
Handle to the cpu this ETE is bound to.
$ref: /schemas/types.yaml#/definitions/phandle
+ power-domains:
+ maxItems: 1
+
out-ports:
description: |
Output connections from the ETE to legacy CoreSight trace bus.
diff --git a/Documentation/trace/coresight/coresight-cpu-debug.rst b/Documentation/trace/coresight/coresight-cpu-debug.rst
index 993dd294b81b..836b35532667 100644
--- a/Documentation/trace/coresight/coresight-cpu-debug.rst
+++ b/Documentation/trace/coresight/coresight-cpu-debug.rst
@@ -117,7 +117,8 @@ divide into below cases:
Device Tree Bindings
--------------------
-See Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt for details.
+See Documentation/devicetree/bindings/arm/arm,coresight-cpu-debug.yaml for
+details.
How to use the module
diff --git a/Documentation/trace/coresight/coresight-etm4x-reference.rst b/Documentation/trace/coresight/coresight-etm4x-reference.rst
index fb7578fd9372..70e34b8c81c1 100644
--- a/Documentation/trace/coresight/coresight-etm4x-reference.rst
+++ b/Documentation/trace/coresight/coresight-etm4x-reference.rst
@@ -71,6 +71,20 @@ the ‘TRC’ prefix.
----
+:File: ``ts_source`` (ro)
+:Trace Registers: None.
+:Notes:
+ When FEAT_TRF is implemented, value of TRFCR_ELx.TS used for trace session. Otherwise -1
+ indicates an unknown time source. Check trcidr0.tssize to see if a global timestamp is
+ available.
+
+:Example:
+ ``$> cat ts_source``
+
+ ``$> 1``
+
+----
+
:File: ``addr_idx`` (rw)
:Trace Registers: None.
:Notes:
diff --git a/Documentation/trace/hisi-ptt.rst b/Documentation/trace/hisi-ptt.rst
new file mode 100644
index 000000000000..4f87d8e21065
--- /dev/null
+++ b/Documentation/trace/hisi-ptt.rst
@@ -0,0 +1,298 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+HiSilicon PCIe Tune and Trace device
+======================================
+
+Introduction
+============
+
+HiSilicon PCIe tune and trace device (PTT) is a PCIe Root Complex
+integrated Endpoint (RCiEP) device, providing the capability
+to dynamically monitor and tune the PCIe link's events (tune),
+and trace the TLP headers (trace). The two functions are independent,
+but is recommended to use them together to analyze and enhance the
+PCIe link's performance.
+
+On Kunpeng 930 SoC, the PCIe Root Complex is composed of several
+PCIe cores. Each PCIe core includes several Root Ports and a PTT
+RCiEP, like below. The PTT device is capable of tuning and
+tracing the links of the PCIe core.
+::
+
+ +--------------Core 0-------+
+ | | [ PTT ] |
+ | | [Root Port]---[Endpoint]
+ | | [Root Port]---[Endpoint]
+ | | [Root Port]---[Endpoint]
+ Root Complex |------Core 1-------+
+ | | [ PTT ] |
+ | | [Root Port]---[ Switch ]---[Endpoint]
+ | | [Root Port]---[Endpoint] `-[Endpoint]
+ | | [Root Port]---[Endpoint]
+ +---------------------------+
+
+The PTT device driver registers one PMU device for each PTT device.
+The name of each PTT device is composed of 'hisi_ptt' prefix with
+the id of the SICL and the Core where it locates. The Kunpeng 930
+SoC encapsulates multiple CPU dies (SCCL, Super CPU Cluster) and
+IO dies (SICL, Super I/O Cluster), where there's one PCIe Root
+Complex for each SICL.
+::
+
+ /sys/devices/hisi_ptt<sicl_id>_<core_id>
+
+Tune
+====
+
+PTT tune is designed for monitoring and adjusting PCIe link parameters (events).
+Currently we support events in 2 classes. The scope of the events
+covers the PCIe core to which the PTT device belongs.
+
+Each event is presented as a file under $(PTT PMU dir)/tune, and
+a simple open/read/write/close cycle will be used to tune the event.
+::
+
+ $ cd /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune
+ $ ls
+ qos_tx_cpl qos_tx_np qos_tx_p
+ tx_path_rx_req_alloc_buf_level
+ tx_path_tx_req_alloc_buf_level
+ $ cat qos_tx_dp
+ 1
+ $ echo 2 > qos_tx_dp
+ $ cat qos_tx_dp
+ 2
+
+Current value (numerical value) of the event can be simply read
+from the file, and the desired value written to the file to tune.
+
+1. Tx Path QoS Control
+------------------------
+
+The following files are provided to tune the QoS of the tx path of
+the PCIe core.
+
+- qos_tx_cpl: weight of Tx completion TLPs
+- qos_tx_np: weight of Tx non-posted TLPs
+- qos_tx_p: weight of Tx posted TLPs
+
+The weight influences the proportion of certain packets on the PCIe link.
+For example, for the storage scenario, increase the proportion
+of the completion packets on the link to enhance the performance as
+more completions are consumed.
+
+The available tune data of these events is [0, 1, 2].
+Writing a negative value will return an error, and out of range
+values will be converted to 2. Note that the event value just
+indicates a probable level, but is not precise.
+
+2. Tx Path Buffer Control
+-------------------------
+
+Following files are provided to tune the buffer of tx path of the PCIe core.
+
+- rx_alloc_buf_level: watermark of Rx requested
+- tx_alloc_buf_level: watermark of Tx requested
+
+These events influence the watermark of the buffer allocated for each
+type. Rx means the inbound while Tx means outbound. The packets will
+be stored in the buffer first and then transmitted either when the
+watermark reached or when timed out. For a busy direction, you should
+increase the related buffer watermark to avoid frequently posting and
+thus enhance the performance. In most cases just keep the default value.
+
+The available tune data of above events is [0, 1, 2].
+Writing a negative value will return an error, and out of range
+values will be converted to 2. Note that the event value just
+indicates a probable level, but is not precise.
+
+Trace
+=====
+
+PTT trace is designed for dumping the TLP headers to the memory, which
+can be used to analyze the transactions and usage condition of the PCIe
+Link. You can choose to filter the traced headers by either Requester ID,
+or those downstream of a set of Root Ports on the same core of the PTT
+device. It's also supported to trace the headers of certain type and of
+certain direction.
+
+You can use the perf command `perf record` to set the parameters, start
+trace and get the data. It's also supported to decode the trace
+data with `perf report`. The control parameters for trace is inputted
+as event code for each events, which will be further illustrated later.
+An example usage is like
+::
+
+ $ perf record -e hisi_ptt0_2/filter=0x80001,type=1,direction=1,
+ format=1/ -- sleep 5
+
+This will trace the TLP headers downstream root port 0000:00:10.1 (event
+code for event 'filter' is 0x80001) with type of posted TLP requests,
+direction of inbound and traced data format of 8DW.
+
+1. Filter
+---------
+
+The TLP headers to trace can be filtered by the Root Ports or the Requester ID
+of the Endpoint, which are located on the same core of the PTT device. You can
+set the filter by specifying the `filter` parameter which is required to start
+the trace. The parameter value is 20 bit. Bit 19 indicates the filter type.
+1 for Root Port filter and 0 for Requester filter. Bit[15:0] indicates the
+filter value. The value for a Root Port is a mask of the core port id which is
+calculated from its PCI Slot ID as (slotid & 7) * 2. The value for a Requester
+is the Requester ID (Device ID of the PCIe function). Bit[18:16] is currently
+reserved for extension.
+
+For example, if the desired filter is Endpoint function 0000:01:00.1 the filter
+value will be 0x00101. If the desired filter is Root Port 0000:00:10.0 then
+then filter value is calculated as 0x80001.
+
+Note that multiple Root Ports can be specified at one time, but only one
+Endpoint function can be specified in one trace. Specifying both Root Port
+and function at the same time is not supported. Driver maintains a list of
+available filters and will check the invalid inputs.
+
+Currently the available filters are detected in driver's probe. If the supported
+devices are removed/added after probe, you may need to reload the driver to update
+the filters.
+
+2. Type
+-------
+
+You can trace the TLP headers of certain types by specifying the `type`
+parameter, which is required to start the trace. The parameter value is
+8 bit. Current supported types and related values are shown below:
+
+- 8'b00000001: posted requests (P)
+- 8'b00000010: non-posted requests (NP)
+- 8'b00000100: completions (CPL)
+
+You can specify multiple types when tracing inbound TLP headers, but can only
+specify one when tracing outbound TLP headers.
+
+3. Direction
+------------
+
+You can trace the TLP headers from certain direction, which is relative
+to the Root Port or the PCIe core, by specifying the `direction` parameter.
+This is optional and the default parameter is inbound. The parameter value
+is 4 bit. When the desired format is 4DW, directions and related values
+supported are shown below:
+
+- 4'b0000: inbound TLPs (P, NP, CPL)
+- 4'b0001: outbound TLPs (P, NP, CPL)
+- 4'b0010: outbound TLPs (P, NP, CPL) and inbound TLPs (P, NP, CPL B)
+- 4'b0011: outbound TLPs (P, NP, CPL) and inbound TLPs (CPL A)
+
+When the desired format is 8DW, directions and related values supported are
+shown below:
+
+- 4'b0000: reserved
+- 4'b0001: outbound TLPs (P, NP, CPL)
+- 4'b0010: inbound TLPs (P, NP, CPL B)
+- 4'b0011: inbound TLPs (CPL A)
+
+Inbound completions are classified into two types:
+
+- completion A (CPL A): completion of CHI/DMA/Native non-posted requests, except for CPL B
+- completion B (CPL B): completion of DMA remote2local and P2P non-posted requests
+
+4. Format
+--------------
+
+You can change the format of the traced TLP headers by specifying the
+`format` parameter. The default format is 4DW. The parameter value is 4 bit.
+Current supported formats and related values are shown below:
+
+- 4'b0000: 4DW length per TLP header
+- 4'b0001: 8DW length per TLP header
+
+The traced TLP header format is different from the PCIe standard.
+
+When using the 8DW data format, the entire TLP header is logged
+(Header DW0-3 shown below). For example, the TLP header for Memory
+Reads with 64-bit addresses is shown in PCIe r5.0, Figure 2-17;
+the header for Configuration Requests is shown in Figure 2.20, etc.
+
+In addition, 8DW trace buffer entries contain a timestamp and
+possibly a prefix for a PASID TLP prefix (see Figure 6-20, PCIe r5.0).
+Otherwise this field will be all 0.
+
+The bit[31:11] of DW0 is always 0x1fffff, which can be
+used to distinguish the data format. 8DW format is like
+::
+
+ bits [ 31:11 ][ 10:0 ]
+ |---------------------------------------|-------------------|
+ DW0 [ 0x1fffff ][ Reserved (0x7ff) ]
+ DW1 [ Prefix ]
+ DW2 [ Header DW0 ]
+ DW3 [ Header DW1 ]
+ DW4 [ Header DW2 ]
+ DW5 [ Header DW3 ]
+ DW6 [ Reserved (0x0) ]
+ DW7 [ Time ]
+
+When using the 4DW data format, DW0 of the trace buffer entry
+contains selected fields of DW0 of the TLP, together with a
+timestamp. DW1-DW3 of the trace buffer entry contain DW1-DW3
+directly from the TLP header.
+
+4DW format is like
+::
+
+ bits [31:30] [ 29:25 ][24][23][22][21][ 20:11 ][ 10:0 ]
+ |-----|---------|---|---|---|---|-------------|-------------|
+ DW0 [ Fmt ][ Type ][T9][T8][TH][SO][ Length ][ Time ]
+ DW1 [ Header DW1 ]
+ DW2 [ Header DW2 ]
+ DW3 [ Header DW3 ]
+
+5. Memory Management
+--------------------
+
+The traced TLP headers will be written to the memory allocated
+by the driver. The hardware accepts 4 DMA address with same size,
+and writes the buffer sequentially like below. If DMA addr 3 is
+finished and the trace is still on, it will return to addr 0.
+::
+
+ +->[DMA addr 0]->[DMA addr 1]->[DMA addr 2]->[DMA addr 3]-+
+ +---------------------------------------------------------+
+
+Driver will allocate each DMA buffer of 4MiB. The finished buffer
+will be copied to the perf AUX buffer allocated by the perf core.
+Once the AUX buffer is full while the trace is still on, driver
+will commit the AUX buffer first and then apply for a new one with
+the same size. The size of AUX buffer is default to 16MiB. User can
+adjust the size by specifying the `-m` parameter of the perf command.
+
+6. Decoding
+-----------
+
+You can decode the traced data with `perf report -D` command (currently
+only support to dump the raw trace data). The traced data will be decoded
+according to the format described previously (take 8DW as an example):
+::
+
+ [...perf headers and other information]
+ . ... HISI PTT data: size 4194304 bytes
+ . 00000000: 00 00 00 00 Prefix
+ . 00000004: 01 00 00 60 Header DW0
+ . 00000008: 0f 1e 00 01 Header DW1
+ . 0000000c: 04 00 00 00 Header DW2
+ . 00000010: 40 00 81 02 Header DW3
+ . 00000014: 33 c0 04 00 Time
+ . 00000020: 00 00 00 00 Prefix
+ . 00000024: 01 00 00 60 Header DW0
+ . 00000028: 0f 1e 00 01 Header DW1
+ . 0000002c: 04 00 00 00 Header DW2
+ . 00000030: 40 00 81 02 Header DW3
+ . 00000034: 02 00 00 00 Time
+ . 00000040: 00 00 00 00 Prefix
+ . 00000044: 01 00 00 60 Header DW0
+ . 00000048: 0f 1e 00 01 Header DW1
+ . 0000004c: 04 00 00 00 Header DW2
+ . 00000050: 40 00 81 02 Header DW3
+ [...]
diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst
index 2d73e8697523..ea25a9220f92 100644
--- a/Documentation/trace/index.rst
+++ b/Documentation/trace/index.rst
@@ -33,3 +33,4 @@ Linux Tracing Technologies
coresight/index
user_events
rv/index
+ hisi-ptt
diff --git a/MAINTAINERS b/MAINTAINERS
index 3e0be733e898..9f5c060473c6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9180,6 +9180,14 @@ S: Supported
F: Documentation/admin-guide/perf/hns3-pmu.rst
F: drivers/perf/hisilicon/hns3_pmu.c
+HISILICON PTT DRIVER
+M: Yicong Yang <yangyicong@hisilicon.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: Documentation/ABI/testing/sysfs-devices-hisi_ptt
+F: Documentation/trace/hisi-ptt.rst
+F: drivers/hwtracing/ptt/
+
HISILICON QM DRIVER
M: Weili Qian <qianweili@huawei.com>
M: Zhou Wang <wangzhou1@hisilicon.com>
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 818df938a7ad..4d9d3683a3df 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1021,6 +1021,7 @@
#define SYS_MPIDR_SAFE_VAL (BIT(31))
#define TRFCR_ELx_TS_SHIFT 5
+#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT)
#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT)
#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT)
#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT)
diff --git a/drivers/Makefile b/drivers/Makefile
index 057857258bfd..bdf1c66141c9 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -175,6 +175,7 @@ obj-$(CONFIG_USB4) += thunderbolt/
obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/
obj-y += hwtracing/intel_th/
obj-$(CONFIG_STM) += hwtracing/stm/
+obj-$(CONFIG_HISI_PTT) += hwtracing/ptt/
obj-y += android/
obj-$(CONFIG_NVMEM) += nvmem/
obj-$(CONFIG_FPGA) += fpga/
diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
index 13085835a636..911ee977103c 100644
--- a/drivers/hwtracing/Kconfig
+++ b/drivers/hwtracing/Kconfig
@@ -5,4 +5,6 @@ source "drivers/hwtracing/stm/Kconfig"
source "drivers/hwtracing/intel_th/Kconfig"
+source "drivers/hwtracing/ptt/Kconfig"
+
endmenu
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index 514a9b8086e3..45c1eb5dfcb7 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -193,10 +193,10 @@ config CORESIGHT_TRBE
depends on ARM64 && CORESIGHT_SOURCE_ETM4X
help
This driver provides support for percpu Trace Buffer Extension (TRBE).
- TRBE always needs to be used along with it's corresponding percpu ETE
+ TRBE always needs to be used along with its corresponding percpu ETE
component. ETE generates trace data which is then captured with TRBE.
Unlike traditional sink devices, TRBE is a CPU feature accessible via
- system registers. But it's explicit dependency with trace unit (ETE)
+ system registers. But its explicit dependency with trace unit (ETE)
requires it to be plugged in as a coresight sink device.
To compile this driver as a module, choose M here: the module will be
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
index e0740c6dbd54..bc90a03f478f 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -365,26 +365,15 @@ static const struct etr_buf_operations etr_catu_buf_ops = {
.get_data = catu_get_data_etr_buf,
};
-coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID);
-coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL);
-coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS);
-coresight_simple_reg32(struct catu_drvdata, mode, CATU_MODE);
-coresight_simple_reg32(struct catu_drvdata, axictrl, CATU_AXICTRL);
-coresight_simple_reg32(struct catu_drvdata, irqen, CATU_IRQEN);
-coresight_simple_reg64(struct catu_drvdata, sladdr,
- CATU_SLADDRLO, CATU_SLADDRHI);
-coresight_simple_reg64(struct catu_drvdata, inaddr,
- CATU_INADDRLO, CATU_INADDRHI);
-
static struct attribute *catu_mgmt_attrs[] = {
- &dev_attr_devid.attr,
- &dev_attr_control.attr,
- &dev_attr_status.attr,
- &dev_attr_mode.attr,
- &dev_attr_axictrl.attr,
- &dev_attr_irqen.attr,
- &dev_attr_sladdr.attr,
- &dev_attr_inaddr.attr,
+ coresight_simple_reg32(devid, CORESIGHT_DEVID),
+ coresight_simple_reg32(control, CATU_CONTROL),
+ coresight_simple_reg32(status, CATU_STATUS),
+ coresight_simple_reg32(mode, CATU_MODE),
+ coresight_simple_reg32(axictrl, CATU_AXICTRL),
+ coresight_simple_reg32(irqen, CATU_IRQEN),
+ coresight_simple_reg64(sladdr, CATU_SLADDRLO, CATU_SLADDRHI),
+ coresight_simple_reg64(inaddr, CATU_INADDRLO, CATU_INADDRHI),
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h
index 6160c2d75a56..442e034bbfba 100644
--- a/drivers/hwtracing/coresight/coresight-catu.h
+++ b/drivers/hwtracing/coresight/coresight-catu.h
@@ -70,24 +70,24 @@ struct catu_drvdata {
static inline u32 \
catu_read_##name(struct catu_drvdata *drvdata) \
{ \
- return coresight_read_reg_pair(drvdata->base, offset, -1); \
+ return csdev_access_relaxed_read32(&drvdata->csdev->access, offset); \
} \
static inline void \
catu_write_##name(struct catu_drvdata *drvdata, u32 val) \
{ \
- coresight_write_reg_pair(drvdata->base, val, offset, -1); \
+ csdev_access_relaxed_write32(&drvdata->csdev->access, val, offset); \
}
#define CATU_REG_PAIR(name, lo_off, hi_off) \
static inline u64 \
catu_read_##name(struct catu_drvdata *drvdata) \
{ \
- return coresight_read_reg_pair(drvdata->base, lo_off, hi_off); \
+ return csdev_access_relaxed_read_pair(&drvdata->csdev->access, lo_off, hi_off); \
} \
static inline void \
catu_write_##name(struct catu_drvdata *drvdata, u64 val) \
{ \
- coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off); \
+ csdev_access_relaxed_write_pair(&drvdata->csdev->access, val, lo_off, hi_off); \
}
CATU_REG32(control, CATU_CONTROL);
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 1edfec1e9d18..d5dbc67bacb4 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -60,6 +60,34 @@ EXPORT_SYMBOL_GPL(coresight_barrier_pkt);
static const struct cti_assoc_op *cti_assoc_ops;
+ssize_t coresight_simple_show_pair(struct device *_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct coresight_device *csdev = container_of(_dev, struct coresight_device, dev);
+ struct cs_pair_attribute *cs_attr = container_of(attr, struct cs_pair_attribute, attr);
+ u64 val;
+
+ pm_runtime_get_sync(_dev->parent);
+ val = csdev_access_relaxed_read_pair(&csdev->access, cs_attr->lo_off, cs_attr->hi_off);
+ pm_runtime_put_sync(_dev->parent);
+ return sysfs_emit(buf, "0x%llx\n", val);
+}
+EXPORT_SYMBOL_GPL(coresight_simple_show_pair);
+
+ssize_t coresight_simple_show32(struct device *_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct coresight_device *csdev = container_of(_dev, struct coresight_device, dev);
+ struct cs_off_attribute *cs_attr = container_of(attr, struct cs_off_attribute, attr);
+ u64 val;
+
+ pm_runtime_get_sync(_dev->parent);
+ val = csdev_access_relaxed_read32(&csdev->access, cs_attr->off);
+ pm_runtime_put_sync(_dev->parent);
+ return sysfs_emit(buf, "0x%llx\n", val);
+}
+EXPORT_SYMBOL_GPL(coresight_simple_show32);
+
void coresight_set_cti_ops(const struct cti_assoc_op *cti_op)
{
cti_assoc_ops = cti_op;
diff --git a/drivers/hwtracing/coresight/coresight-cti-sysfs.c b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
index 7ff7e7780bbf..6d59c815ecf5 100644
--- a/drivers/hwtracing/coresight/coresight-cti-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
@@ -163,48 +163,82 @@ static struct attribute *coresight_cti_attrs[] = {
/* register based attributes */
-/* macro to access RO registers with power check only (no enable check). */
-#define coresight_cti_reg(name, offset) \
-static ssize_t name##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); \
- u32 val = 0; \
- pm_runtime_get_sync(dev->parent); \
- spin_lock(&drvdata->spinlock); \
- if (drvdata->config.hw_powered) \
- val = readl_relaxed(drvdata->base + offset); \
- spin_unlock(&drvdata->spinlock); \
- pm_runtime_put_sync(dev->parent); \
- return sprintf(buf, "0x%x\n", val); \
-} \
-static DEVICE_ATTR_RO(name)
+/* Read registers with power check only (no enable check). */
+static ssize_t coresight_cti_reg_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+ struct cs_off_attribute *cti_attr = container_of(attr, struct cs_off_attribute, attr);
+ u32 val = 0;
-/* coresight management registers */
-coresight_cti_reg(devaff0, CTIDEVAFF0);
-coresight_cti_reg(devaff1, CTIDEVAFF1);
-coresight_cti_reg(authstatus, CORESIGHT_AUTHSTATUS);
-coresight_cti_reg(devarch, CORESIGHT_DEVARCH);
-coresight_cti_reg(devid, CORESIGHT_DEVID);
-coresight_cti_reg(devtype, CORESIGHT_DEVTYPE);
-coresight_cti_reg(pidr0, CORESIGHT_PERIPHIDR0);
-coresight_cti_reg(pidr1, CORESIGHT_PERIPHIDR1);
-coresight_cti_reg(pidr2, CORESIGHT_PERIPHIDR2);
-coresight_cti_reg(pidr3, CORESIGHT_PERIPHIDR3);
-coresight_cti_reg(pidr4, CORESIGHT_PERIPHIDR4);
+ pm_runtime_get_sync(dev->parent);
+ spin_lock(&drvdata->spinlock);
+ if (drvdata->config.hw_powered)
+ val = readl_relaxed(drvdata->base + cti_attr->off);
+ spin_unlock(&drvdata->spinlock);
+ pm_runtime_put_sync(dev->parent);
+ return sysfs_emit(buf, "0x%x\n", val);
+}
+/* Write registers with power check only (no enable check). */
+static __maybe_unused ssize_t coresight_cti_reg_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent);
+ struct cs_off_attribute *cti_attr = container_of(attr, struct cs_off_attribute, attr);
+ unsigned long val = 0;
+
+ if (kstrtoul(buf, 0, &val))
+ return -EINVAL;
+
+ pm_runtime_get_sync(dev->parent);
+ spin_lock(&drvdata->spinlock);
+ if (drvdata->config.hw_powered)
+ cti_write_single_reg(drvdata, cti_attr->off, val);
+ spin_unlock(&drvdata->spinlock);
+ pm_runtime_put_sync(dev->parent);
+ return size;
+}
+
+#define coresight_cti_reg(name, offset) \
+ (&((struct cs_off_attribute[]) { \
+ { \
+ __ATTR(name, 0444, coresight_cti_reg_show, NULL), \
+ offset \
+ } \
+ })[0].attr.attr)
+
+#define coresight_cti_reg_rw(name, offset) \
+ (&((struct cs_off_attribute[]) { \
+ { \
+ __ATTR(name, 0644, coresight_cti_reg_show, \
+ coresight_cti_reg_store), \
+ offset \
+ } \
+ })[0].attr.attr)
+
+#define coresight_cti_reg_wo(name, offset) \
+ (&((struct cs_off_attribute[]) { \
+ { \
+ __ATTR(name, 0200, NULL, coresight_cti_reg_store), \
+ offset \
+ } \
+ })[0].attr.attr)
+
+/* coresight management registers */
static struct attribute *coresight_cti_mgmt_attrs[] = {
- &dev_attr_devaff0.attr,
- &dev_attr_devaff1.attr,
- &dev_attr_authstatus.attr,
- &dev_attr_devarch.attr,
- &dev_attr_devid.attr,
- &dev_attr_devtype.attr,
- &dev_attr_pidr0.attr,
- &dev_attr_pidr1.attr,
- &dev_attr_pidr2.attr,
- &dev_attr_pidr3.attr,
- &dev_attr_pidr4.attr,
+ coresight_cti_reg(devaff0, CTIDEVAFF0),
+ coresight_cti_reg(devaff1, CTIDEVAFF1),
+ coresight_cti_reg(authstatus, CORESIGHT_AUTHSTATUS),
+ coresight_cti_reg(devarch, CORESIGHT_DEVARCH),
+ coresight_cti_reg(devid, CORESIGHT_DEVID),
+ coresight_cti_reg(devtype, CORESIGHT_DEVTYPE),
+ coresight_cti_reg(pidr0, CORESIGHT_PERIPHIDR0),
+ coresight_cti_reg(pidr1, CORESIGHT_PERIPHIDR1),
+ coresight_cti_reg(pidr2, CORESIGHT_PERIPHIDR2),
+ coresight_cti_reg(pidr3, CORESIGHT_PERIPHIDR3),
+ coresight_cti_reg(pidr4, CORESIGHT_PERIPHIDR4),
NULL,
};
@@ -454,86 +488,11 @@ static ssize_t apppulse_store(struct device *dev,
}
static DEVICE_ATTR_WO(apppulse);
-coresight_cti_reg(triginstatus, CTITRIGINSTATUS);
-coresight_cti_reg(trigoutstatus, CTITRIGOUTSTATUS);
-coresight_cti_reg(chinstatus, CTICHINSTATUS);
-coresight_cti_reg(choutstatus, CTICHOUTSTATUS);
-
/*
* Define CONFIG_CORESIGHT_CTI_INTEGRATION_REGS to enable the access to the
* integration control registers. Normally only used to investigate connection
* data.
*/
-#ifdef CONFIG_CORESIGHT_CTI_INTEGRATION_REGS
-
-/* macro to access RW registers with power check only (no enable check). */
-#define coresight_cti_reg_rw(name, offset) \
-static ssize_t name##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); \
- u32 val = 0; \
- pm_runtime_get_sync(dev->parent); \
- spin_lock(&drvdata->spinlock); \
- if (drvdata->config.hw_powered) \
- val = readl_relaxed(drvdata->base + offset); \
- spin_unlock(&drvdata->spinlock); \
- pm_runtime_put_sync(dev->parent); \
- return sprintf(buf, "0x%x\n", val); \
-} \
- \
-static ssize_t name##_store(struct device *dev, \
- struct device_attribute *attr, \
- const char *buf, size_t size) \
-{ \
- struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); \
- unsigned long val = 0; \
- if (kstrtoul(buf, 0, &val)) \
- return -EINVAL; \
- \
- pm_runtime_get_sync(dev->parent); \
- spin_lock(&drvdata->spinlock); \
- if (drvdata->config.hw_powered) \
- cti_write_single_reg(drvdata, offset, val); \
- spin_unlock(&drvdata->spinlock); \
- pm_runtime_put_sync(dev->parent); \
- return size; \
-} \
-static DEVICE_ATTR_RW(name)
-
-/* macro to access WO registers with power check only (no enable check). */
-#define coresight_cti_reg_wo(name, offset) \
-static ssize_t name##_store(struct device *dev, \
- struct device_attribute *attr, \
- const char *buf, size_t size) \
-{ \
- struct cti_drvdata *drvdata = dev_get_drvdata(dev->parent); \
- unsigned long val = 0; \
- if (kstrtoul(buf, 0, &val)) \
- return -EINVAL; \
- \
- pm_runtime_get_sync(dev->parent); \
- spin_lock(&drvdata->spinlock); \
- if (drvdata->config.hw_powered) \
- cti_write_single_reg(drvdata, offset, val); \
- spin_unlock(&drvdata->spinlock); \
- pm_runtime_put_sync(dev->parent); \
- return size; \
-} \
-static DEVICE_ATTR_WO(name)
-
-coresight_cti_reg_rw(itchout, ITCHOUT);
-coresight_cti_reg_rw(ittrigout, ITTRIGOUT);
-coresight_cti_reg_rw(itctrl, CORESIGHT_ITCTRL);
-coresight_cti_reg_wo(itchinack, ITCHINACK);
-coresight_cti_reg_wo(ittriginack, ITTRIGINACK);
-coresight_cti_reg(ittrigin, ITTRIGIN);
-coresight_cti_reg(itchin, ITCHIN);
-coresight_cti_reg(itchoutack, ITCHOUTACK);
-coresight_cti_reg(ittrigoutack, ITTRIGOUTACK);
-
-#endif /* CORESIGHT_CTI_INTEGRATION_REGS */
-
static struct attribute *coresight_cti_regs_attrs[] = {
&dev_attr_inout_sel.attr,
&dev_attr_inen.attr,
@@ -544,20 +503,20 @@ static struct attribute *coresight_cti_regs_attrs[] = {
&dev_attr_appset.attr,
&dev_attr_appclear.attr,
&dev_attr_apppulse.attr,
- &dev_attr_triginstatus.attr,
- &dev_attr_trigoutstatus.attr,
- &dev_attr_chinstatus.attr,
- &dev_attr_choutstatus.attr,
+ coresight_cti_reg(triginstatus, CTITRIGINSTATUS),
+ coresight_cti_reg(trigoutstatus, CTITRIGOUTSTATUS),
+ coresight_cti_reg(chinstatus, CTICHINSTATUS),
+ coresight_cti_reg(choutstatus, CTICHOUTSTATUS),
#ifdef CONFIG_CORESIGHT_CTI_INTEGRATION_REGS
- &dev_attr_itctrl.attr,
- &dev_attr_ittrigin.attr,
- &dev_attr_itchin.attr,
- &dev_attr_ittrigout.attr,
- &dev_attr_itchout.attr,
- &dev_attr_itchoutack.attr,
- &dev_attr_ittrigoutack.attr,
- &dev_attr_ittriginack.attr,
- &dev_attr_itchinack.attr,
+ coresight_cti_reg_rw(itctrl, CORESIGHT_ITCTRL),
+ coresight_cti_reg(ittrigin, ITTRIGIN),
+ coresight_cti_reg(itchin, ITCHIN),
+ coresight_cti_reg_rw(ittrigout, ITTRIGOUT),
+ coresight_cti_reg_rw(itchout, ITCHOUT),
+ coresight_cti_reg(itchoutack, ITCHOUTACK),
+ coresight_cti_reg(ittrigoutack, ITTRIGOUTACK),
+ coresight_cti_reg_wo(ittriginack, ITTRIGINACK),
+ coresight_cti_reg_wo(itchinack, ITCHINACK),
#endif
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index efa39820acec..8aa6e4f83e42 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -655,27 +655,15 @@ static const struct file_operations etb_fops = {
.llseek = no_llseek,
};
-#define coresight_etb10_reg(name, offset) \
- coresight_simple_reg32(struct etb_drvdata, name, offset)
-
-coresight_etb10_reg(rdp, ETB_RAM_DEPTH_REG);
-coresight_etb10_reg(sts, ETB_STATUS_REG);
-coresight_etb10_reg(rrp, ETB_RAM_READ_POINTER);
-coresight_etb10_reg(rwp, ETB_RAM_WRITE_POINTER);
-coresight_etb10_reg(trg, ETB_TRG);
-coresight_etb10_reg(ctl, ETB_CTL_REG);
-coresight_etb10_reg(ffsr, ETB_FFSR);
-coresight_etb10_reg(ffcr, ETB_FFCR);
-
static struct attribute *coresight_etb_mgmt_attrs[] = {
- &dev_attr_rdp.attr,
- &dev_attr_sts.attr,
- &dev_attr_rrp.attr,
- &dev_attr_rwp.attr,
- &dev_attr_trg.attr,
- &dev_attr_ctl.attr,
- &dev_attr_ffsr.attr,
- &dev_attr_ffcr.attr,
+ coresight_simple_reg32(rdp, ETB_RAM_DEPTH_REG),
+ coresight_simple_reg32(sts, ETB_STATUS_REG),
+ coresight_simple_reg32(rrp, ETB_RAM_READ_POINTER),
+ coresight_simple_reg32(rwp, ETB_RAM_WRITE_POINTER),
+ coresight_simple_reg32(trg, ETB_TRG),
+ coresight_simple_reg32(ctl, ETB_CTL_REG),
+ coresight_simple_reg32(ffsr, ETB_FFSR),
+ coresight_simple_reg32(ffcr, ETB_FFCR),
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
index 68fcbf4ce7a8..fd81eca3ec18 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -1252,31 +1252,17 @@ static struct attribute *coresight_etm_attrs[] = {
NULL,
};
-#define coresight_etm3x_reg(name, offset) \
- coresight_simple_reg32(struct etm_drvdata, name, offset)
-
-coresight_etm3x_reg(etmccr, ETMCCR);
-coresight_etm3x_reg(etmccer, ETMCCER);
-coresight_etm3x_reg(etmscr, ETMSCR);
-coresight_etm3x_reg(etmidr, ETMIDR);
-coresight_etm3x_reg(etmcr, ETMCR);
-coresight_etm3x_reg(etmtraceidr, ETMTRACEIDR);
-coresight_etm3x_reg(etmteevr, ETMTEEVR);
-coresight_etm3x_reg(etmtssvr, ETMTSSCR);
-coresight_etm3x_reg(etmtecr1, ETMTECR1);
-coresight_etm3x_reg(etmtecr2, ETMTECR2);
-
static struct attribute *coresight_etm_mgmt_attrs[] = {
- &dev_attr_etmccr.attr,
- &dev_attr_etmccer.attr,
- &dev_attr_etmscr.attr,
- &dev_attr_etmidr.attr,
- &dev_attr_etmcr.attr,
- &dev_attr_etmtraceidr.attr,
- &dev_attr_etmteevr.attr,
- &dev_attr_etmtssvr.attr,
- &dev_attr_etmtecr1.attr,
- &dev_attr_etmtecr2.attr,
+ coresight_simple_reg32(etmccr, ETMCCR),
+ coresight_simple_reg32(etmccer, ETMCCER),
+ coresight_simple_reg32(etmscr, ETMSCR),
+ coresight_simple_reg32(etmidr, ETMIDR),
+ coresight_simple_reg32(etmcr, ETMCR),
+ coresight_simple_reg32(etmtraceidr, ETMTRACEIDR),
+ coresight_simple_reg32(etmteevr, ETMTEEVR),
+ coresight_simple_reg32(etmtssvr, ETMTSSCR),
+ coresight_simple_reg32(etmtecr1, ETMTECR1),
+ coresight_simple_reg32(etmtecr2, ETMTECR2),
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 6ea8181816fc..9cac848cffaf 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -2306,6 +2306,34 @@ static ssize_t cpu_show(struct device *dev,
}
static DEVICE_ATTR_RO(cpu);
+static ssize_t ts_source_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int val;
+ struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+ if (!drvdata->trfcr) {
+ val = -1;
+ goto out;
+ }
+
+ switch (drvdata->trfcr & TRFCR_ELx_TS_MASK) {
+ case TRFCR_ELx_TS_VIRTUAL:
+ case TRFCR_ELx_TS_GUEST_PHYSICAL:
+ case TRFCR_ELx_TS_PHYSICAL:
+ val = FIELD_GET(TRFCR_ELx_TS_MASK, drvdata->trfcr);
+ break;
+ default:
+ val = -1;
+ break;
+ }
+
+out:
+ return sysfs_emit(buf, "%d\n", val);
+}
+static DEVICE_ATTR_RO(ts_source);
+
static struct attribute *coresight_etmv4_attrs[] = {
&dev_attr_nr_pe_cmp.attr,
&dev_attr_nr_addr_cmp.attr,
@@ -2360,6 +2388,7 @@ static struct attribute *coresight_etmv4_attrs[] = {
&dev_attr_vmid_val.attr,
&dev_attr_vmid_masks.attr,
&dev_attr_cpu.attr,
+ &dev_attr_ts_source.attr,
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index ff1dd2092ac5..595ce5862056 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -39,32 +39,37 @@
#define ETM_MODE_EXCL_KERN BIT(30)
#define ETM_MODE_EXCL_USER BIT(31)
+struct cs_pair_attribute {
+ struct device_attribute attr;
+ u32 lo_off;
+ u32 hi_off;
+};
+
+struct cs_off_attribute {
+ struct device_attribute attr;
+ u32 off;
+};
-typedef u32 (*coresight_read_fn)(const struct device *, u32 offset);
-#define __coresight_simple_func(type, func, name, lo_off, hi_off) \
-static ssize_t name##_show(struct device *_dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- type *drvdata = dev_get_drvdata(_dev->parent); \
- coresight_read_fn fn = func; \
- u64 val; \
- pm_runtime_get_sync(_dev->parent); \
- if (fn) \
- val = (u64)fn(_dev->parent, lo_off); \
- else \
- val = coresight_read_reg_pair(drvdata->base, \
- lo_off, hi_off); \
- pm_runtime_put_sync(_dev->parent); \
- return scnprintf(buf, PAGE_SIZE, "0x%llx\n", val); \
-} \
-static DEVICE_ATTR_RO(name)
-
-#define coresight_simple_func(type, func, name, offset) \
- __coresight_simple_func(type, func, name, offset, -1)
-#define coresight_simple_reg32(type, name, offset) \
- __coresight_simple_func(type, NULL, name, offset, -1)
-#define coresight_simple_reg64(type, name, lo_off, hi_off) \
- __coresight_simple_func(type, NULL, name, lo_off, hi_off)
+extern ssize_t coresight_simple_show32(struct device *_dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t coresight_simple_show_pair(struct device *_dev,
+ struct device_attribute *attr, char *buf);
+
+#define coresight_simple_reg32(name, offset) \
+ (&((struct cs_off_attribute[]) { \
+ { \
+ __ATTR(name, 0444, coresight_simple_show32, NULL), \
+ offset \
+ } \
+ })[0].attr.attr)
+
+#define coresight_simple_reg64(name, lo_off, hi_off) \
+ (&((struct cs_pair_attribute[]) { \
+ { \
+ __ATTR(name, 0444, coresight_simple_show_pair, NULL), \
+ lo_off, hi_off \
+ } \
+ })[0].attr.attr)
extern const u32 coresight_barrier_pkt[4];
#define CORESIGHT_BARRIER_PKT_SIZE (sizeof(coresight_barrier_pkt))
@@ -127,25 +132,6 @@ static inline void CS_UNLOCK(void __iomem *addr)
} while (0);
}
-static inline u64
-coresight_read_reg_pair(void __iomem *addr, s32 lo_offset, s32 hi_offset)
-{
- u64 val;
-
- val = readl_relaxed(addr + lo_offset);
- val |= (hi_offset < 0) ? 0 :
- (u64)readl_relaxed(addr + hi_offset) << 32;
- return val;
-}
-
-static inline void coresight_write_reg_pair(void __iomem *addr, u64 val,
- s32 lo_offset, s32 hi_offset)
-{
- writel_relaxed((u32)val, addr + lo_offset);
- if (hi_offset >= 0)
- writel_relaxed((u32)(val >> 32), addr + hi_offset);
-}
-
void coresight_disable_path(struct list_head *path);
int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data);
struct coresight_device *coresight_get_sink(struct list_head *path);
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index b86acbc74cf0..4dd50546d7e4 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -196,15 +196,9 @@ static const struct coresight_ops replicator_cs_ops = {
.link_ops = &replicator_link_ops,
};
-#define coresight_replicator_reg(name, offset) \
- coresight_simple_reg32(struct replicator_drvdata, name, offset)
-
-coresight_replicator_reg(idfilter0, REPLICATOR_IDFILTER0);
-coresight_replicator_reg(idfilter1, REPLICATOR_IDFILTER1);
-
static struct attribute *replicator_mgmt_attrs[] = {
- &dev_attr_idfilter0.attr,
- &dev_attr_idfilter1.attr,
+ coresight_simple_reg32(idfilter0, REPLICATOR_IDFILTER0),
+ coresight_simple_reg32(idfilter1, REPLICATOR_IDFILTER1),
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index bb14a3a8a921..463f449cfb79 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -634,22 +634,6 @@ static ssize_t traceid_store(struct device *dev,
}
static DEVICE_ATTR_RW(traceid);
-#define coresight_stm_reg(name, offset) \
- coresight_simple_reg32(struct stm_drvdata, name, offset)
-
-coresight_stm_reg(tcsr, STMTCSR);
-coresight_stm_reg(tsfreqr, STMTSFREQR);
-coresight_stm_reg(syncr, STMSYNCR);
-coresight_stm_reg(sper, STMSPER);
-coresight_stm_reg(spter, STMSPTER);
-coresight_stm_reg(privmaskr, STMPRIVMASKR);
-coresight_stm_reg(spscr, STMSPSCR);
-coresight_stm_reg(spmscr, STMSPMSCR);
-coresight_stm_reg(spfeat1r, STMSPFEAT1R);
-coresight_stm_reg(spfeat2r, STMSPFEAT2R);
-coresight_stm_reg(spfeat3r, STMSPFEAT3R);
-coresight_stm_reg(devid, CORESIGHT_DEVID);
-
static struct attribute *coresight_stm_attrs[] = {
&dev_attr_hwevent_enable.attr,
&dev_attr_hwevent_select.attr,
@@ -660,18 +644,18 @@ static struct attribute *coresight_stm_attrs[] = {
};
static struct attribute *coresight_stm_mgmt_attrs[] = {
- &dev_attr_tcsr.attr,
- &dev_attr_tsfreqr.attr,
- &dev_attr_syncr.attr,
- &dev_attr_sper.attr,
- &dev_attr_spter.attr,
- &dev_attr_privmaskr.attr,
- &dev_attr_spscr.attr,
- &dev_attr_spmscr.attr,
- &dev_attr_spfeat1r.attr,
- &dev_attr_spfeat2r.attr,
- &dev_attr_spfeat3r.attr,
- &dev_attr_devid.attr,
+ coresight_simple_reg32(tcsr, STMTCSR),
+ coresight_simple_reg32(tsfreqr, STMTSFREQR),
+ coresight_simple_reg32(syncr, STMSYNCR),
+ coresight_simple_reg32(sper, STMSPER),
+ coresight_simple_reg32(spter, STMSPTER),
+ coresight_simple_reg32(privmaskr, STMPRIVMASKR),
+ coresight_simple_reg32(spscr, STMSPSCR),
+ coresight_simple_reg32(spmscr, STMSPMSCR),
+ coresight_simple_reg32(spfeat1r, STMSPFEAT1R),
+ coresight_simple_reg32(spfeat2r, STMSPFEAT2R),
+ coresight_simple_reg32(spfeat3r, STMSPFEAT3R),
+ coresight_simple_reg32(devid, CORESIGHT_DEVID),
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c
index d0276af82494..07abf28ad725 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-core.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
@@ -251,41 +251,21 @@ static enum tmc_mem_intf_width tmc_get_memwidth(u32 devid)
return memwidth;
}
-#define coresight_tmc_reg(name, offset) \
- coresight_simple_reg32(struct tmc_drvdata, name, offset)
-#define coresight_tmc_reg64(name, lo_off, hi_off) \
- coresight_simple_reg64(struct tmc_drvdata, name, lo_off, hi_off)
-
-coresight_tmc_reg(rsz, TMC_RSZ);
-coresight_tmc_reg(sts, TMC_STS);
-coresight_tmc_reg(trg, TMC_TRG);
-coresight_tmc_reg(ctl, TMC_CTL);
-coresight_tmc_reg(ffsr, TMC_FFSR);
-coresight_tmc_reg(ffcr, TMC_FFCR);
-coresight_tmc_reg(mode, TMC_MODE);
-coresight_tmc_reg(pscr, TMC_PSCR);
-coresight_tmc_reg(axictl, TMC_AXICTL);
-coresight_tmc_reg(authstatus, TMC_AUTHSTATUS);
-coresight_tmc_reg(devid, CORESIGHT_DEVID);
-coresight_tmc_reg64(rrp, TMC_RRP, TMC_RRPHI);
-coresight_tmc_reg64(rwp, TMC_RWP, TMC_RWPHI);
-coresight_tmc_reg64(dba, TMC_DBALO, TMC_DBAHI);
-
static struct attribute *coresight_tmc_mgmt_attrs[] = {
- &dev_attr_rsz.attr,
- &dev_attr_sts.attr,
- &dev_attr_rrp.attr,
- &dev_attr_rwp.attr,
- &dev_attr_trg.attr,
- &dev_attr_ctl.attr,
- &dev_attr_ffsr.attr,
- &dev_attr_ffcr.attr,
- &dev_attr_mode.attr,
- &dev_attr_pscr.attr,
- &dev_attr_devid.attr,
- &dev_attr_dba.attr,
- &dev_attr_axictl.attr,
- &dev_attr_authstatus.attr,
+ coresight_simple_reg32(rsz, TMC_RSZ),
+ coresight_simple_reg32(sts, TMC_STS),
+ coresight_simple_reg64(rrp, TMC_RRP, TMC_RRPHI),
+ coresight_simple_reg64(rwp, TMC_RWP, TMC_RWPHI),
+ coresight_simple_reg32(trg, TMC_TRG),
+ coresight_simple_reg32(ctl, TMC_CTL),
+ coresight_simple_reg32(ffsr, TMC_FFSR),
+ coresight_simple_reg32(ffcr, TMC_FFCR),
+ coresight_simple_reg32(mode, TMC_MODE),
+ coresight_simple_reg32(pscr, TMC_PSCR),
+ coresight_simple_reg32(devid, CORESIGHT_DEVID),
+ coresight_simple_reg64(dba, TMC_DBALO, TMC_DBAHI),
+ coresight_simple_reg32(axictl, TMC_AXICTL),
+ coresight_simple_reg32(authstatus, TMC_AUTHSTATUS),
NULL,
};
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 6bec20a392b3..66959557cf39 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -282,12 +282,12 @@ ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata,
static inline u64 \
tmc_read_##name(struct tmc_drvdata *drvdata) \
{ \
- return coresight_read_reg_pair(drvdata->base, lo_off, hi_off); \
+ return csdev_access_relaxed_read_pair(&drvdata->csdev->access, lo_off, hi_off); \
} \
static inline void \
tmc_write_##name(struct tmc_drvdata *drvdata, u64 val) \
{ \
- coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off); \
+ csdev_access_relaxed_write_pair(&drvdata->csdev->access, val, lo_off, hi_off); \
}
TMC_REG_PAIR(rrp, TMC_RRP, TMC_RRPHI)
diff --git a/drivers/hwtracing/ptt/Kconfig b/drivers/hwtracing/ptt/Kconfig
new file mode 100644
index 000000000000..6d46a09ffeb9
--- /dev/null
+++ b/drivers/hwtracing/ptt/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HISI_PTT
+ tristate "HiSilicon PCIe Tune and Trace Device"
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on PCI && HAS_DMA && HAS_IOMEM && PERF_EVENTS
+ help
+ HiSilicon PCIe Tune and Trace device exists as a PCIe RCiEP
+ device, and it provides support for PCIe traffic tuning and
+ tracing TLP headers to the memory.
+
+ This driver can also be built as a module. If so, the module
+ will be called hisi_ptt.
diff --git a/drivers/hwtracing/ptt/Makefile b/drivers/hwtracing/ptt/Makefile
new file mode 100644
index 000000000000..908c09a98161
--- /dev/null
+++ b/drivers/hwtracing/ptt/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_HISI_PTT) += hisi_ptt.o
diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c
new file mode 100644
index 000000000000..5d5526aa60c4
--- /dev/null
+++ b/drivers/hwtracing/ptt/hisi_ptt.c
@@ -0,0 +1,1046 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for HiSilicon PCIe tune and trace device
+ *
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+
+#include "hisi_ptt.h"
+
+/* Dynamic CPU hotplug state used by PTT */
+static enum cpuhp_state hisi_ptt_pmu_online;
+
+static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
+{
+ u32 val;
+
+ return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
+ val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
+ HISI_PTT_WAIT_POLL_INTERVAL_US,
+ HISI_PTT_WAIT_TUNE_TIMEOUT_US);
+}
+
+static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
+ struct dev_ext_attribute *ext_attr;
+ struct hisi_ptt_tune_desc *desc;
+ u32 reg;
+ u16 val;
+
+ ext_attr = container_of(attr, struct dev_ext_attribute, attr);
+ desc = ext_attr->var;
+
+ mutex_lock(&hisi_ptt->tune_lock);
+
+ reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+ reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
+ reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
+ desc->event_code);
+ writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+
+ /* Write all 1 to indicates it's the read process */
+ writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
+
+ if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
+ mutex_unlock(&hisi_ptt->tune_lock);
+ return -ETIMEDOUT;
+ }
+
+ reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
+ reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
+ val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);
+
+ mutex_unlock(&hisi_ptt->tune_lock);
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
+ struct dev_ext_attribute *ext_attr;
+ struct hisi_ptt_tune_desc *desc;
+ u32 reg;
+ u16 val;
+
+ ext_attr = container_of(attr, struct dev_ext_attribute, attr);
+ desc = ext_attr->var;
+
+ if (kstrtou16(buf, 10, &val))
+ return -EINVAL;
+
+ mutex_lock(&hisi_ptt->tune_lock);
+
+ reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+ reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
+ reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
+ desc->event_code);
+ writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
+ writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
+ hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
+
+ if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
+ mutex_unlock(&hisi_ptt->tune_lock);
+ return -ETIMEDOUT;
+ }
+
+ mutex_unlock(&hisi_ptt->tune_lock);
+ return count;
+}
+
+#define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store) \
+ static struct hisi_ptt_tune_desc _name##_desc = { \
+ .name = #_name, \
+ .event_code = (_val), \
+ }; \
+ static struct dev_ext_attribute hisi_ptt_##_name##_attr = { \
+ .attr = __ATTR(_name, 0600, _show, _store), \
+ .var = &_name##_desc, \
+ }
+
+#define HISI_PTT_TUNE_ATTR_COMMON(_name, _val) \
+ HISI_PTT_TUNE_ATTR(_name, _val, \
+ hisi_ptt_tune_attr_show, \
+ hisi_ptt_tune_attr_store)
+
+/*
+ * The value of the tuning event are composed of two parts: main event code
+ * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
+ * a subevent of 'Tx path QoS control' which for tuning the weight of Tx
+ * completion TLPs. See hisi_ptt.rst documentation for more information.
+ */
+#define HISI_PTT_TUNE_QOS_TX_CPL (0x4 | (3 << 16))
+#define HISI_PTT_TUNE_QOS_TX_NP (0x4 | (4 << 16))
+#define HISI_PTT_TUNE_QOS_TX_P (0x4 | (5 << 16))
+#define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL (0x5 | (6 << 16))
+#define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL (0x5 | (7 << 16))
+
+HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
+HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
+HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
+HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
+HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);
+
+static struct attribute *hisi_ptt_tune_attrs[] = {
+ &hisi_ptt_qos_tx_cpl_attr.attr.attr,
+ &hisi_ptt_qos_tx_np_attr.attr.attr,
+ &hisi_ptt_qos_tx_p_attr.attr.attr,
+ &hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
+ &hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
+ NULL,
+};
+
+static struct attribute_group hisi_ptt_tune_group = {
+ .name = "tune",
+ .attrs = hisi_ptt_tune_attrs,
+};
+
+static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
+{
+ if (is_port)
+ return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));
+
+ return devid;
+}
+
+static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
+{
+ u32 val;
+
+ return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
+ val, val & HISI_PTT_TRACE_IDLE,
+ HISI_PTT_WAIT_POLL_INTERVAL_US,
+ HISI_PTT_WAIT_TRACE_TIMEOUT_US);
+}
+
+static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
+{
+ u32 val;
+
+ readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
+ val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
+ HISI_PTT_RESET_TIMEOUT_US);
+}
+
+static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
+{
+ writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+ hisi_ptt->trace_ctrl.started = false;
+}
+
+static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
+{
+ struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+ u32 val;
+ int i;
+
+ /* Check device idle before start trace */
+ if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
+ pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
+ return -EBUSY;
+ }
+
+ ctrl->started = true;
+
+ /* Reset the DMA before start tracing */
+ val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+ val |= HISI_PTT_TRACE_CTRL_RST;
+ writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+
+ hisi_ptt_wait_dma_reset_done(hisi_ptt);
+
+ val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+ val &= ~HISI_PTT_TRACE_CTRL_RST;
+ writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+
+ /* Reset the index of current buffer */
+ hisi_ptt->trace_ctrl.buf_index = 0;
+
+ /* Zero the trace buffers */
+ for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
+ memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
+
+ /* Clear the interrupt status */
+ writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
+ writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);
+
+ /* Set the trace control register */
+ val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
+ val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
+ val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
+ val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
+ if (!hisi_ptt->trace_ctrl.is_port)
+ val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;
+
+ /* Start the Trace */
+ val |= HISI_PTT_TRACE_CTRL_EN;
+ writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
+
+ return 0;
+}
+
+static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
+{
+ struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+ struct perf_output_handle *handle = &ctrl->handle;
+ struct perf_event *event = handle->event;
+ struct hisi_ptt_pmu_buf *buf;
+ size_t size;
+ void *addr;
+
+ buf = perf_get_aux(handle);
+ if (!buf || !handle->size)
+ return -EINVAL;
+
+ addr = ctrl->trace_buf[ctrl->buf_index].addr;
+
+ /*
+ * If we're going to stop, read the size of already traced data from
+ * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
+ * the data size is always HISI_PTT_TRACE_BUF_SIZE.
+ */
+ if (stop) {
+ u32 reg;
+
+ reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
+ size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
+ } else {
+ size = HISI_PTT_TRACE_BUF_SIZE;
+ }
+
+ memcpy(buf->base + buf->pos, addr, size);
+ buf->pos += size;
+
+ /*
+ * Just commit the traced data if we're going to stop. Otherwise if the
+ * resident AUX buffer cannot contain the data of next trace buffer,
+ * apply a new one.
+ */
+ if (stop) {
+ perf_aux_output_end(handle, buf->pos);
+ } else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
+ perf_aux_output_end(handle, buf->pos);
+
+ buf = perf_aux_output_begin(handle, event);
+ if (!buf)
+ return -EINVAL;
+
+ buf->pos = handle->head % buf->length;
+ if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
+ perf_aux_output_end(handle, 0);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static irqreturn_t hisi_ptt_isr(int irq, void *context)
+{
+ struct hisi_ptt *hisi_ptt = context;
+ u32 status, buf_idx;
+
+ status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
+ if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
+ return IRQ_NONE;
+
+ buf_idx = ffs(status) - 1;
+
+ /* Clear the interrupt status of buffer @buf_idx */
+ writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
+
+ /*
+ * Update the AUX buffer and cache the current buffer index,
+ * as we need to know this and save the data when the trace
+ * is ended out of the interrupt handler. End the trace
+ * if the updating fails.
+ */
+ if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
+ hisi_ptt_trace_end(hisi_ptt);
+ else
+ hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;
+
+ return IRQ_HANDLED;
+}
+
+static void hisi_ptt_irq_free_vectors(void *pdev)
+{
+ pci_free_irq_vectors(pdev);
+}
+
+static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
+{
+ struct pci_dev *pdev = hisi_ptt->pdev;
+ int ret;
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+ if (ret < 0) {
+ pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
+ return ret;
+ }
+
+ ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_request_threaded_irq(&pdev->dev,
+ pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ),
+ NULL, hisi_ptt_isr, 0,
+ DRV_NAME, hisi_ptt);
+ if (ret) {
+ pci_err(pdev, "failed to request irq %d, ret = %d\n",
+ pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ), ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
+{
+ struct hisi_ptt_filter_desc *filter;
+ struct hisi_ptt *hisi_ptt = data;
+
+ /*
+ * We won't fail the probe if filter allocation failed here. The filters
+ * should be partial initialized and users would know which filter fails
+ * through the log. Other functions of PTT device are still available.
+ */
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter) {
+ pci_err(hisi_ptt->pdev, "failed to add filter %s\n", pci_name(pdev));
+ return -ENOMEM;
+ }
+
+ filter->devid = PCI_DEVID(pdev->bus->number, pdev->devfn);
+
+ if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) {
+ filter->is_port = true;
+ list_add_tail(&filter->list, &hisi_ptt->port_filters);
+
+ /* Update the available port mask */
+ hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
+ } else {
+ list_add_tail(&filter->list, &hisi_ptt->req_filters);
+ }
+
+ return 0;
+}
+
+static void hisi_ptt_release_filters(void *data)
+{
+ struct hisi_ptt_filter_desc *filter, *tmp;
+ struct hisi_ptt *hisi_ptt = data;
+
+ list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list) {
+ list_del(&filter->list);
+ kfree(filter);
+ }
+
+ list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list) {
+ list_del(&filter->list);
+ kfree(filter);
+ }
+}
+
+static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
+{
+ struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+ struct device *dev = &hisi_ptt->pdev->dev;
+ int i;
+
+ ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
+ sizeof(*ctrl->trace_buf), GFP_KERNEL);
+ if (!ctrl->trace_buf)
+ return -ENOMEM;
+
+ for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
+ ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
+ &ctrl->trace_buf[i].dma,
+ GFP_KERNEL);
+ if (!ctrl->trace_buf[i].addr)
+ return -ENOMEM;
+ }
+
+ /* Configure the trace DMA buffer */
+ for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
+ writel(lower_32_bits(ctrl->trace_buf[i].dma),
+ hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
+ i * HISI_PTT_TRACE_ADDR_STRIDE);
+ writel(upper_32_bits(ctrl->trace_buf[i].dma),
+ hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
+ i * HISI_PTT_TRACE_ADDR_STRIDE);
+ }
+ writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);
+
+ return 0;
+}
+
+static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
+{
+ struct pci_dev *pdev = hisi_ptt->pdev;
+ struct pci_bus *bus;
+ int ret;
+ u32 reg;
+
+ INIT_LIST_HEAD(&hisi_ptt->port_filters);
+ INIT_LIST_HEAD(&hisi_ptt->req_filters);
+
+ ret = hisi_ptt_config_trace_buf(hisi_ptt);
+ if (ret)
+ return ret;
+
+ /*
+ * The device range register provides the information about the root
+ * ports which the RCiEP can control and trace. The RCiEP and the root
+ * ports which it supports are on the same PCIe core, with same domain
+ * number but maybe different bus number. The device range register
+ * will tell us which root ports we can support, Bit[31:16] indicates
+ * the upper BDF numbers of the root port, while Bit[15:0] indicates
+ * the lower.
+ */
+ reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
+ hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
+ hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
+
+ bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
+ if (bus)
+ pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
+
+ ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
+ if (ret)
+ return ret;
+
+ hisi_ptt->trace_ctrl.on_cpu = -1;
+ return 0;
+}
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
+ const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *hisi_ptt_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static const struct attribute_group hisi_ptt_cpumask_attr_group = {
+ .attrs = hisi_ptt_cpumask_attrs,
+};
+
+/*
+ * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
+ * filter. Bit[15:0] indicates the filter value, for Root Port filter it's
+ * a bit mask of desired ports and for Requester filter it's the Requester ID
+ * of the desired PCIe function. Bit[18:16] is reserved for extension.
+ *
+ * See hisi_ptt.rst documentation for detailed information.
+ */
+PMU_FORMAT_ATTR(filter, "config:0-19");
+PMU_FORMAT_ATTR(direction, "config:20-23");
+PMU_FORMAT_ATTR(type, "config:24-31");
+PMU_FORMAT_ATTR(format, "config:32-35");
+
+static struct attribute *hisi_ptt_pmu_format_attrs[] = {
+ &format_attr_filter.attr,
+ &format_attr_direction.attr,
+ &format_attr_type.attr,
+ &format_attr_format.attr,
+ NULL
+};
+
+static struct attribute_group hisi_ptt_pmu_format_group = {
+ .name = "format",
+ .attrs = hisi_ptt_pmu_format_attrs,
+};
+
+static const struct attribute_group *hisi_ptt_pmu_groups[] = {
+ &hisi_ptt_cpumask_attr_group,
+ &hisi_ptt_pmu_format_group,
+ &hisi_ptt_tune_group,
+ NULL
+};
+
+static int hisi_ptt_trace_valid_direction(u32 val)
+{
+ /*
+ * The direction values have different effects according to the data
+ * format (specified in the parentheses). TLP set A/B means different
+ * set of TLP types. See hisi_ptt.rst documentation for more details.
+ */
+ static const u32 hisi_ptt_trace_available_direction[] = {
+ 0, /* inbound(4DW) or reserved(8DW) */
+ 1, /* outbound(4DW) */
+ 2, /* {in, out}bound(4DW) or inbound(8DW), TLP set A */
+ 3, /* {in, out}bound(4DW) or inbound(8DW), TLP set B */
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
+ if (val == hisi_ptt_trace_available_direction[i])
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int hisi_ptt_trace_valid_type(u32 val)
+{
+ /* Different types can be set simultaneously */
+ static const u32 hisi_ptt_trace_available_type[] = {
+ 1, /* posted_request */
+ 2, /* non-posted_request */
+ 4, /* completion */
+ };
+ int i;
+
+ if (!val)
+ return -EINVAL;
+
+ /*
+ * Walk the available list and clear the valid bits of
+ * the config. If there is any resident bit after the
+ * walk then the config is invalid.
+ */
+ for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
+ val &= ~hisi_ptt_trace_available_type[i];
+
+ if (val)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int hisi_ptt_trace_valid_format(u32 val)
+{
+ static const u32 hisi_ptt_trace_availble_format[] = {
+ 0, /* 4DW */
+ 1, /* 8DW */
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
+ if (val == hisi_ptt_trace_availble_format[i])
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
+{
+ unsigned long val, port_mask = hisi_ptt->port_mask;
+ struct hisi_ptt_filter_desc *filter;
+
+ hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
+ val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
+
+ /*
+ * Port filters are defined as bit mask. For port filters, check
+ * the bits in the @val are within the range of hisi_ptt->port_mask
+ * and whether it's empty or not, otherwise user has specified
+ * some unsupported root ports.
+ *
+ * For Requester ID filters, walk the available filter list to see
+ * whether we have one matched.
+ */
+ if (!hisi_ptt->trace_ctrl.is_port) {
+ list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
+ if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
+ return 0;
+ }
+ } else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
+{
+ struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+ u32 val;
+
+ val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
+ hisi_ptt->trace_ctrl.filter = val;
+
+ val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
+ ctrl->direction = val;
+
+ val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
+ ctrl->type = val;
+
+ val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
+ ctrl->format = val;
+}
+
+static int hisi_ptt_pmu_event_init(struct perf_event *event)
+{
+ struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+ int ret;
+ u32 val;
+
+ if (event->cpu < 0) {
+ dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
+ return -ENOENT;
+
+ ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
+ if (ret < 0)
+ return ret;
+
+ val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
+ ret = hisi_ptt_trace_valid_direction(val);
+ if (ret < 0)
+ return ret;
+
+ val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
+ ret = hisi_ptt_trace_valid_type(val);
+ if (ret < 0)
+ return ret;
+
+ val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
+ return hisi_ptt_trace_valid_format(val);
+}
+
+static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
+ int nr_pages, bool overwrite)
+{
+ struct hisi_ptt_pmu_buf *buf;
+ struct page **pagelist;
+ int i;
+
+ if (overwrite) {
+ dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
+ return NULL;
+ }
+
+ /* If the pages size less than buffers, we cannot start trace */
+ if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
+ return NULL;
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL);
+ if (!pagelist)
+ goto err;
+
+ for (i = 0; i < nr_pages; i++)
+ pagelist[i] = virt_to_page(pages[i]);
+
+ buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!buf->base) {
+ kfree(pagelist);
+ goto err;
+ }
+
+ buf->nr_pages = nr_pages;
+ buf->length = nr_pages * PAGE_SIZE;
+ buf->pos = 0;
+
+ kfree(pagelist);
+ return buf;
+err:
+ kfree(buf);
+ return NULL;
+}
+
+static void hisi_ptt_pmu_free_aux(void *aux)
+{
+ struct hisi_ptt_pmu_buf *buf = aux;
+
+ vunmap(buf->base);
+ kfree(buf);
+}
+
+static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
+{
+ struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+ struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
+ struct hw_perf_event *hwc = &event->hw;
+ struct device *dev = event->pmu->dev;
+ struct hisi_ptt_pmu_buf *buf;
+ int cpu = event->cpu;
+ int ret;
+
+ hwc->state = 0;
+
+ /* Serialize the perf process if user specified several CPUs */
+ spin_lock(&hisi_ptt->pmu_lock);
+ if (hisi_ptt->trace_ctrl.started) {
+ dev_dbg(dev, "trace has already started\n");
+ goto stop;
+ }
+
+ /*
+ * Handle the interrupt on the same cpu which starts the trace to avoid
+ * context mismatch. Otherwise we'll trigger the WARN from the perf
+ * core in event_function_local(). If CPU passed is offline we'll fail
+ * here, just log it since we can do nothing here.
+ */
+ ret = irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
+ cpumask_of(cpu));
+ if (ret)
+ dev_warn(dev, "failed to set the affinity of trace interrupt\n");
+
+ hisi_ptt->trace_ctrl.on_cpu = cpu;
+
+ buf = perf_aux_output_begin(handle, event);
+ if (!buf) {
+ dev_dbg(dev, "aux output begin failed\n");
+ goto stop;
+ }
+
+ buf->pos = handle->head % buf->length;
+
+ hisi_ptt_pmu_init_configs(hisi_ptt, event);
+
+ ret = hisi_ptt_trace_start(hisi_ptt);
+ if (ret) {
+ dev_dbg(dev, "trace start failed, ret = %d\n", ret);
+ perf_aux_output_end(handle, 0);
+ goto stop;
+ }
+
+ spin_unlock(&hisi_ptt->pmu_lock);
+ return;
+stop:
+ event->hw.state |= PERF_HES_STOPPED;
+ spin_unlock(&hisi_ptt->pmu_lock);
+}
+
+static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->state & PERF_HES_STOPPED)
+ return;
+
+ spin_lock(&hisi_ptt->pmu_lock);
+ if (hisi_ptt->trace_ctrl.started) {
+ hisi_ptt_trace_end(hisi_ptt);
+
+ if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
+ dev_warn(event->pmu->dev, "Device is still busy\n");
+
+ hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
+ }
+ spin_unlock(&hisi_ptt->pmu_lock);
+
+ hwc->state |= PERF_HES_STOPPED;
+ perf_event_update_userpage(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
+{
+ struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int cpu = event->cpu;
+
+ /* Only allow the cpus on the device's node to add the event */
+ if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
+ return 0;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ if (flags & PERF_EF_START) {
+ hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
+ if (hwc->state & PERF_HES_STOPPED)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
+{
+ hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
+}
+
+static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
+}
+
+static void hisi_ptt_unregister_pmu(void *pmu)
+{
+ perf_pmu_unregister(pmu);
+}
+
+static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
+{
+ u16 core_id, sicl_id;
+ char *pmu_name;
+ u32 reg;
+ int ret;
+
+ ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
+ &hisi_ptt->hotplug_node);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
+ hisi_ptt_remove_cpuhp_instance,
+ &hisi_ptt->hotplug_node);
+ if (ret)
+ return ret;
+
+ mutex_init(&hisi_ptt->tune_lock);
+ spin_lock_init(&hisi_ptt->pmu_lock);
+
+ hisi_ptt->hisi_ptt_pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
+ .task_ctx_nr = perf_sw_context,
+ .attr_groups = hisi_ptt_pmu_groups,
+ .event_init = hisi_ptt_pmu_event_init,
+ .setup_aux = hisi_ptt_pmu_setup_aux,
+ .free_aux = hisi_ptt_pmu_free_aux,
+ .start = hisi_ptt_pmu_start,
+ .stop = hisi_ptt_pmu_stop,
+ .add = hisi_ptt_pmu_add,
+ .del = hisi_ptt_pmu_del,
+ };
+
+ reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
+ core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
+ sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
+
+ pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
+ sicl_id, core_id);
+ if (!pmu_name)
+ return -ENOMEM;
+
+ ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
+ if (ret)
+ return ret;
+
+ return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
+ hisi_ptt_unregister_pmu,
+ &hisi_ptt->hisi_ptt_pmu);
+}
+
+/*
+ * The DMA of PTT trace can only use direct mappings due to some
+ * hardware restriction. Check whether there is no IOMMU or the
+ * policy of the IOMMU domain is passthrough, otherwise the trace
+ * cannot work.
+ *
+ * The PTT device is supposed to behind an ARM SMMUv3, which
+ * should have passthrough the device by a quirk.
+ */
+static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
+{
+ struct iommu_domain *iommu_domain;
+
+ iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
+ if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
+ return 0;
+
+ return -EOPNOTSUPP;
+}
+
+static int hisi_ptt_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct hisi_ptt *hisi_ptt;
+ int ret;
+
+ ret = hisi_ptt_check_iommu_mapping(pdev);
+ if (ret) {
+ pci_err(pdev, "requires direct DMA mappings\n");
+ return ret;
+ }
+
+ hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
+ if (!hisi_ptt)
+ return -ENOMEM;
+
+ hisi_ptt->pdev = pdev;
+ pci_set_drvdata(pdev, hisi_ptt);
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ pci_err(pdev, "failed to enable device, ret = %d\n", ret);
+ return ret;
+ }
+
+ ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
+ if (ret) {
+ pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
+ return ret;
+ }
+
+ hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
+
+ ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
+ return ret;
+ }
+
+ pci_set_master(pdev);
+
+ ret = hisi_ptt_register_irq(hisi_ptt);
+ if (ret)
+ return ret;
+
+ ret = hisi_ptt_init_ctrls(hisi_ptt);
+ if (ret) {
+ pci_err(pdev, "failed to init controls, ret = %d\n", ret);
+ return ret;
+ }
+
+ ret = hisi_ptt_register_pmu(hisi_ptt);
+ if (ret) {
+ pci_err(pdev, "failed to register PMU device, ret = %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct pci_device_id hisi_ptt_id_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);
+
+static struct pci_driver hisi_ptt_driver = {
+ .name = DRV_NAME,
+ .id_table = hisi_ptt_id_tbl,
+ .probe = hisi_ptt_probe,
+};
+
+static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+ struct hisi_ptt *hisi_ptt;
+ struct device *dev;
+ int target, src;
+
+ hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
+ src = hisi_ptt->trace_ctrl.on_cpu;
+ dev = hisi_ptt->hisi_ptt_pmu.dev;
+
+ if (!hisi_ptt->trace_ctrl.started || src != cpu)
+ return 0;
+
+ target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
+ if (target >= nr_cpu_ids) {
+ dev_err(dev, "no available cpu for perf context migration\n");
+ return 0;
+ }
+
+ perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);
+
+ /*
+ * Also make sure the interrupt bind to the migrated CPU as well. Warn
+ * the user on failure here.
+ */
+ if (irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
+ cpumask_of(target)))
+ dev_warn(dev, "failed to set the affinity of trace interrupt\n");
+
+ hisi_ptt->trace_ctrl.on_cpu = target;
+ return 0;
+}
+
+static int __init hisi_ptt_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
+ hisi_ptt_cpu_teardown);
+ if (ret < 0)
+ return ret;
+ hisi_ptt_pmu_online = ret;
+
+ ret = pci_register_driver(&hisi_ptt_driver);
+ if (ret)
+ cpuhp_remove_multi_state(hisi_ptt_pmu_online);
+
+ return ret;
+}
+module_init(hisi_ptt_init);
+
+static void __exit hisi_ptt_exit(void)
+{
+ pci_unregister_driver(&hisi_ptt_driver);
+ cpuhp_remove_multi_state(hisi_ptt_pmu_online);
+}
+module_exit(hisi_ptt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
+MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");
diff --git a/drivers/hwtracing/ptt/hisi_ptt.h b/drivers/hwtracing/ptt/hisi_ptt.h
new file mode 100644
index 000000000000..5beb1648c93a
--- /dev/null
+++ b/drivers/hwtracing/ptt/hisi_ptt.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for HiSilicon PCIe tune and trace device
+ *
+ * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ */
+
+#ifndef _HISI_PTT_H
+#define _HISI_PTT_H
+
+#include <linux/bits.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define DRV_NAME "hisi_ptt"
+
+/*
+ * The definition of the device registers and register fields.
+ */
+#define HISI_PTT_TUNING_CTRL 0x0000
+#define HISI_PTT_TUNING_CTRL_CODE GENMASK(15, 0)
+#define HISI_PTT_TUNING_CTRL_SUB GENMASK(23, 16)
+#define HISI_PTT_TUNING_DATA 0x0004
+#define HISI_PTT_TUNING_DATA_VAL_MASK GENMASK(15, 0)
+#define HISI_PTT_TRACE_ADDR_SIZE 0x0800
+#define HISI_PTT_TRACE_ADDR_BASE_LO_0 0x0810
+#define HISI_PTT_TRACE_ADDR_BASE_HI_0 0x0814
+#define HISI_PTT_TRACE_ADDR_STRIDE 0x8
+#define HISI_PTT_TRACE_CTRL 0x0850
+#define HISI_PTT_TRACE_CTRL_EN BIT(0)
+#define HISI_PTT_TRACE_CTRL_RST BIT(1)
+#define HISI_PTT_TRACE_CTRL_RXTX_SEL GENMASK(3, 2)
+#define HISI_PTT_TRACE_CTRL_TYPE_SEL GENMASK(7, 4)
+#define HISI_PTT_TRACE_CTRL_DATA_FORMAT BIT(14)
+#define HISI_PTT_TRACE_CTRL_FILTER_MODE BIT(15)
+#define HISI_PTT_TRACE_CTRL_TARGET_SEL GENMASK(31, 16)
+#define HISI_PTT_TRACE_INT_STAT 0x0890
+#define HISI_PTT_TRACE_INT_STAT_MASK GENMASK(3, 0)
+#define HISI_PTT_TRACE_INT_MASK 0x0894
+#define HISI_PTT_TUNING_INT_STAT 0x0898
+#define HISI_PTT_TUNING_INT_STAT_MASK BIT(0)
+#define HISI_PTT_TRACE_WR_STS 0x08a0
+#define HISI_PTT_TRACE_WR_STS_WRITE GENMASK(27, 0)
+#define HISI_PTT_TRACE_WR_STS_BUFFER GENMASK(29, 28)
+#define HISI_PTT_TRACE_STS 0x08b0
+#define HISI_PTT_TRACE_IDLE BIT(0)
+#define HISI_PTT_DEVICE_RANGE 0x0fe0
+#define HISI_PTT_DEVICE_RANGE_UPPER GENMASK(31, 16)
+#define HISI_PTT_DEVICE_RANGE_LOWER GENMASK(15, 0)
+#define HISI_PTT_LOCATION 0x0fe8
+#define HISI_PTT_CORE_ID GENMASK(15, 0)
+#define HISI_PTT_SICL_ID GENMASK(31, 16)
+
+/* Parameters of PTT trace DMA part. */
+#define HISI_PTT_TRACE_DMA_IRQ 0
+#define HISI_PTT_TRACE_BUF_CNT 4
+#define HISI_PTT_TRACE_BUF_SIZE SZ_4M
+#define HISI_PTT_TRACE_TOTAL_BUF_SIZE (HISI_PTT_TRACE_BUF_SIZE * \
+ HISI_PTT_TRACE_BUF_CNT)
+/* Wait time for hardware DMA to reset */
+#define HISI_PTT_RESET_TIMEOUT_US 10UL
+#define HISI_PTT_RESET_POLL_INTERVAL_US 1UL
+/* Poll timeout and interval for waiting hardware work to finish */
+#define HISI_PTT_WAIT_TUNE_TIMEOUT_US 1000000UL
+#define HISI_PTT_WAIT_TRACE_TIMEOUT_US 100UL
+#define HISI_PTT_WAIT_POLL_INTERVAL_US 10UL
+
+#define HISI_PCIE_CORE_PORT_ID(devfn) ((PCI_SLOT(devfn) & 0x7) << 1)
+
+/* Definition of the PMU configs */
+#define HISI_PTT_PMU_FILTER_IS_PORT BIT(19)
+#define HISI_PTT_PMU_FILTER_VAL_MASK GENMASK(15, 0)
+#define HISI_PTT_PMU_DIRECTION_MASK GENMASK(23, 20)
+#define HISI_PTT_PMU_TYPE_MASK GENMASK(31, 24)
+#define HISI_PTT_PMU_FORMAT_MASK GENMASK(35, 32)
+
+/**
+ * struct hisi_ptt_tune_desc - Describe tune event for PTT tune
+ * @hisi_ptt: PTT device this tune event belongs to
+ * @name: name of this event
+ * @event_code: code of the event
+ */
+struct hisi_ptt_tune_desc {
+ struct hisi_ptt *hisi_ptt;
+ const char *name;
+ u32 event_code;
+};
+
+/**
+ * struct hisi_ptt_dma_buffer - Describe a single trace buffer of PTT trace.
+ * The detail of the data format is described
+ * in the documentation of PTT device.
+ * @dma: DMA address of this buffer visible to the device
+ * @addr: virtual address of this buffer visible to the cpu
+ */
+struct hisi_ptt_dma_buffer {
+ dma_addr_t dma;
+ void *addr;
+};
+
+/**
+ * struct hisi_ptt_trace_ctrl - Control and status of PTT trace
+ * @trace_buf: array of the trace buffers for holding the trace data.
+ * the length will be HISI_PTT_TRACE_BUF_CNT.
+ * @handle: perf output handle of current trace session
+ * @buf_index: the index of current using trace buffer
+ * @on_cpu: current tracing cpu
+ * @started: current trace status, true for started
+ * @is_port: whether we're tracing root port or not
+ * @direction: direction of the TLP headers to trace
+ * @filter: filter value for tracing the TLP headers
+ * @format: format of the TLP headers to trace
+ * @type: type of the TLP headers to trace
+ */
+struct hisi_ptt_trace_ctrl {
+ struct hisi_ptt_dma_buffer *trace_buf;
+ struct perf_output_handle handle;
+ u32 buf_index;
+ int on_cpu;
+ bool started;
+ bool is_port;
+ u32 direction:2;
+ u32 filter:16;
+ u32 format:1;
+ u32 type:4;
+};
+
+/**
+ * struct hisi_ptt_filter_desc - Descriptor of the PTT trace filter
+ * @list: entry of this descriptor in the filter list
+ * @is_port: the PCI device of the filter is a Root Port or not
+ * @devid: the PCI device's devid of the filter
+ */
+struct hisi_ptt_filter_desc {
+ struct list_head list;
+ bool is_port;
+ u16 devid;
+};
+
+/**
+ * struct hisi_ptt_pmu_buf - Descriptor of the AUX buffer of PTT trace
+ * @length: size of the AUX buffer
+ * @nr_pages: number of pages of the AUX buffer
+ * @base: start address of AUX buffer
+ * @pos: position in the AUX buffer to commit traced data
+ */
+struct hisi_ptt_pmu_buf {
+ size_t length;
+ int nr_pages;
+ void *base;
+ long pos;
+};
+
+/**
+ * struct hisi_ptt - Per PTT device data
+ * @trace_ctrl: the control information of PTT trace
+ * @hotplug_node: node for register cpu hotplug event
+ * @hisi_ptt_pmu: the pum device of trace
+ * @iobase: base IO address of the device
+ * @pdev: pci_dev of this PTT device
+ * @tune_lock: lock to serialize the tune process
+ * @pmu_lock: lock to serialize the perf process
+ * @upper_bdf: the upper BDF range of the PCI devices managed by this PTT device
+ * @lower_bdf: the lower BDF range of the PCI devices managed by this PTT device
+ * @port_filters: the filter list of root ports
+ * @req_filters: the filter list of requester ID
+ * @port_mask: port mask of the managed root ports
+ */
+struct hisi_ptt {
+ struct hisi_ptt_trace_ctrl trace_ctrl;
+ struct hlist_node hotplug_node;
+ struct pmu hisi_ptt_pmu;
+ void __iomem *iobase;
+ struct pci_dev *pdev;
+ struct mutex tune_lock;
+ spinlock_t pmu_lock;
+ u32 upper_bdf;
+ u32 lower_bdf;
+
+ /*
+ * The trace TLP headers can either be filtered by certain
+ * root port, or by the requester ID. Organize the filters
+ * by @port_filters and @req_filters here. The mask of all
+ * the valid ports is also cached for doing sanity check
+ * of user input.
+ */
+ struct list_head port_filters;
+ struct list_head req_filters;
+ u16 port_mask;
+};
+
+#define to_hisi_ptt(pmu) container_of(pmu, struct hisi_ptt, hisi_ptt_pmu)
+
+#endif /* _HISI_PTT_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index d32b02336411..71f7edded9cf 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2817,6 +2817,26 @@ static int arm_smmu_dev_disable_feature(struct device *dev,
}
}
+/*
+ * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
+ * PCIe link and save the data to memory by DMA. The hardware is restricted to
+ * use identity mapping only.
+ */
+#define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
+ (pdev)->device == 0xa12e)
+
+static int arm_smmu_def_domain_type(struct device *dev)
+{
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (IS_HISI_PTT_DEVICE(pdev))
+ return IOMMU_DOMAIN_IDENTITY;
+ }
+
+ return 0;
+}
+
static struct iommu_ops arm_smmu_ops = {
.capable = arm_smmu_capable,
.domain_alloc = arm_smmu_domain_alloc,
@@ -2831,6 +2851,7 @@ static struct iommu_ops arm_smmu_ops = {
.sva_unbind = arm_smmu_sva_unbind,
.sva_get_pasid = arm_smmu_sva_get_pasid,
.page_response = arm_smmu_page_response,
+ .def_domain_type = arm_smmu_def_domain_type,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 9f445f09fcfe..1554021231f9 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -372,6 +372,29 @@ static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa,
return csa->read(offset, true, false);
}
+static inline u64 csdev_access_relaxed_read_pair(struct csdev_access *csa,
+ u32 lo_offset, u32 hi_offset)
+{
+ if (likely(csa->io_mem)) {
+ return readl_relaxed(csa->base + lo_offset) |
+ ((u64)readl_relaxed(csa->base + hi_offset) << 32);
+ }
+
+ return csa->read(lo_offset, true, false) | (csa->read(hi_offset, true, false) << 32);
+}
+
+static inline void csdev_access_relaxed_write_pair(struct csdev_access *csa, u64 val,
+ u32 lo_offset, u32 hi_offset)
+{
+ if (likely(csa->io_mem)) {
+ writel_relaxed((u32)val, csa->base + lo_offset);
+ writel_relaxed((u32)(val >> 32), csa->base + hi_offset);
+ } else {
+ csa->write((u32)val, lo_offset, true, false);
+ csa->write((u32)(val >> 32), hi_offset, true, false);
+ }
+}
+
static inline u32 csdev_access_read32(struct csdev_access *csa, u32 offset)
{
if (likely(csa->io_mem))