From 4edb4ffe39c9bdaec50186d0ca583a7ff01143de Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 5 Jan 2022 23:14:45 +0200 Subject: habanalabs/gaudi: disable CGM permanently Due to the need of SynapseAI to configure all TPC engines from a single QMAN, the driver must disable CGM and never allow the user to enable it. Otherwise, the configuration of the TPC engines will fail. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) (limited to 'drivers/misc/habanalabs/common/debugfs.c') diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index fc084ee5106e..e3ee5f45d20c 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -1054,42 +1054,12 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf, static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, size_t count, loff_t *ppos) { - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - char tmp_buf[200]; - ssize_t rc; - - if (*ppos) - return 0; - - sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); - rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + return 0; } static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, size_t count, loff_t *ppos) { - struct hl_dbg_device_entry *entry = file_inode(f)->i_private; - struct hl_device *hdev = entry->hdev; - u64 value; - ssize_t rc; - - if (hdev->reset_info.in_reset) { - dev_warn_ratelimited(hdev->dev, - "Can't change clock gating during reset\n"); - return 0; - } - - rc = kstrtoull_from_user(buf, count, 16, &value); - if (rc) - return rc; - - hdev->clock_gating_mask = value; - hdev->asic_funcs->set_clock_gating(hdev); - return count; } -- cgit v1.2.3 From d01e6cc97b44d8501b5c24a36d60c9cab94640cd Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 12 Jan 2022 20:08:01 +0200 Subject: habanalabs: enable stop-on-error debugfs setting per ASIC On Goya and Gaudi, the stop-on-error configuration can be set via debugfs. However, in future devices, this configuration will always be enabled. Modify the debugfs node to be allowed only for ASICs that support this dynamic configuration. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- Documentation/ABI/testing/debugfs-driver-habanalabs | 1 + drivers/misc/habanalabs/common/debugfs.c | 6 ++++++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/gaudi/gaudi.c | 2 ++ drivers/misc/habanalabs/goya/goya.c | 2 ++ 5 files changed, 13 insertions(+) (limited to 'drivers/misc/habanalabs/common/debugfs.c') diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index 783001a574b3..bcf6915987e4 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -222,6 +222,7 @@ KernelVersion: 5.6 Contact: ogabbay@kernel.org Description: Sets the stop-on_error option for the device engines. Value of "0" is for disable, otherwise enable. + Relevant only for GOYA and GAUDI. What: /sys/kernel/debug/habanalabs/hl/timeout_locked Date: Sep 2021 diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index e3ee5f45d20c..9f0aaf0ef43b 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -1071,6 +1071,9 @@ static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf, char tmp_buf[200]; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (*ppos) return 0; @@ -1089,6 +1092,9 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, u32 value; ssize_t rc; + if (!hdev->asic_prop.configurable_stop_on_err) + return -EOPNOTSUPP; + if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, "Can't change stop on error during reset\n"); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index b06e2b0812b6..93116fe71ef6 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -561,6 +561,7 @@ struct hl_hints_range { * use-case of doing soft-reset in training (due * to the fact that training runs on multiple * devices) + * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -644,6 +645,7 @@ struct asic_fixed_properties { u8 use_get_power_for_reset_history; u8 supports_soft_reset; u8 allow_inference_soft_reset; + u8 configurable_stop_on_err; }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f2242aa3baa2..61aa6dce6dde 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -669,6 +669,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + return 0; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3785fb33260d..c8143b6616af 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -483,6 +483,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->use_get_power_for_reset_history = true; + prop->configurable_stop_on_err = true; + return 0; } -- cgit v1.2.3 From 9c27896ac1bb83ea5c461ce6f7089d02102a2b21 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 24 Feb 2022 14:58:05 +0800 Subject: habanalabs: Add check for pci_enable_device As the potential failure of the pci_enable_device(), it should be better to check the return value and return error if fails. Fixes: 70b2f993ea4a ("habanalabs: create common folder") Signed-off-by: Jiasheng Jiang Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/misc/habanalabs/common/debugfs.c') diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 9f0aaf0ef43b..f18495545854 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -890,6 +890,8 @@ static ssize_t hl_set_power_state(struct file *f, const char __user *buf, pci_set_power_state(hdev->pdev, PCI_D0); pci_restore_state(hdev->pdev); rc = pci_enable_device(hdev->pdev); + if (rc < 0) + return rc; } else if (value == 2) { pci_save_state(hdev->pdev); pci_disable_device(hdev->pdev); -- cgit v1.2.3