aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorEran Ben Elisha <eranbe@mellanox.com>2019-01-17 23:59:12 +0200
committerDavid S. Miller <davem@davemloft.net>2019-01-18 14:51:22 -0800
commitc7af343b4e33578b7de91786a3f639c8cfa0d97b (patch)
treee60cab858369020cc94ce5f569e82d53a62e57f0 /include
parentdevlink: Add health reporter create/destroy functionality (diff)
downloadlinux-dev-c7af343b4e33578b7de91786a3f639c8cfa0d97b.tar.xz
linux-dev-c7af343b4e33578b7de91786a3f639c8cfa0d97b.zip
devlink: Add health report functionality
Upon error discover, every driver can report it to the devlink health mechanism via devlink_health_report function, using the appropriate reporter registered to it. Driver can pass error specific context which will be delivered to it as part of the dump / recovery callbacks. Once an error is reported, devlink health will do the following actions: * A log is being send to the kernel trace events buffer * Health status and statistics are being updated for the reporter instance * Object dump is being taken and stored at the reporter instance (as long as there is no other dump which is already stored) * Auto recovery attempt is being done. depends on: - Auto Recovery configuration - Grace period vs. time since last recover Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com> Reviewed-by: Moshe Shemesh <moshe@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/devlink.h9
-rw-r--r--include/trace/events/devlink.h62
2 files changed, 71 insertions, 0 deletions
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7fe30d67678a..a81a1b7a67d7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -641,6 +641,8 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
void *
devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
+int devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx);
#else
static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -979,6 +981,13 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
{
return NULL;
}
+
+static inline int
+devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx)
+{
+ return 0;
+}
#endif
#endif /* _NET_DEVLINK_H_ */
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 44acfbca1266..7e39d2fc7c75 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -46,6 +46,65 @@ TRACE_EVENT(devlink_hwmsg,
(int) __entry->len, __get_dynamic_array(buf), __entry->len)
);
+TRACE_EVENT(devlink_health_report,
+ TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+ const char *msg),
+
+ TP_ARGS(devlink, reporter_name, msg),
+
+ TP_STRUCT__entry(
+ __string(bus_name, devlink->dev->bus->name)
+ __string(dev_name, dev_name(devlink->dev))
+ __string(driver_name, devlink->dev->driver->name)
+ __string(reporter_name, msg)
+ __string(msg, msg)
+ ),
+
+ TP_fast_assign(
+ __assign_str(bus_name, devlink->dev->bus->name);
+ __assign_str(dev_name, dev_name(devlink->dev));
+ __assign_str(driver_name, devlink->dev->driver->name);
+ __assign_str(reporter_name, reporter_name);
+ __assign_str(msg, msg);
+ ),
+
+ TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
+ __get_str(bus_name), __get_str(dev_name),
+ __get_str(driver_name), __get_str(reporter_name),
+ __get_str(msg))
+);
+
+TRACE_EVENT(devlink_health_recover_aborted,
+ TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+ bool health_state, u64 time_since_last_recover),
+
+ TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
+
+ TP_STRUCT__entry(
+ __string(bus_name, devlink->dev->bus->name)
+ __string(dev_name, dev_name(devlink->dev))
+ __string(driver_name, devlink->dev->driver->name)
+ __string(reporter_name, reporter_name)
+ __field(bool, health_state)
+ __field(u64, time_since_last_recover)
+ ),
+
+ TP_fast_assign(
+ __assign_str(bus_name, devlink->dev->bus->name);
+ __assign_str(dev_name, dev_name(devlink->dev));
+ __assign_str(driver_name, devlink->dev->driver->name);
+ __assign_str(reporter_name, reporter_name);
+ __entry->health_state = health_state;
+ __entry->time_since_last_recover = time_since_last_recover;
+ ),
+
+ TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover = %llu recover aborted",
+ __get_str(bus_name), __get_str(dev_name),
+ __get_str(driver_name), __get_str(reporter_name),
+ __entry->health_state,
+ __entry->time_since_last_recover)
+);
+
#endif /* _TRACE_DEVLINK_H */
/* This part must be outside protection */
@@ -64,6 +123,9 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink,
{
}
+static inline void trace_devlink_health(const char *msg)
+{
+}
#endif /* _TRACE_DEVLINK_H */
#endif