diff --git a/config.yaml b/config.yaml
index 60924863..a273a583 100644
--- a/config.yaml
+++ b/config.yaml
@@ -21,6 +21,12 @@ options:
     description: |
       Channel to install the DCGM snap if the hardware has NVIDIA GPU. By default, it will install
       from latest/stable
+  smartctl-exporter-snap-channel:
+    type: string
+    default: "latest/stable"
+    description: |
+      Channel to install the Smartctl exporter snap if the hardware has smart disk. By default, it will install
+      from latest/stable.
   exporter-log-level:
     type: string
     default: "INFO"
diff --git a/src/charm.py b/src/charm.py
index 62ea2a9a..11900a41 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -12,7 +12,7 @@
 from ops.framework import EventBase, StoredState
 from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus
 
-from hw_tools import HWTool, HWToolHelper, detect_available_tools
+from hw_tools import HWTool, HWToolHelper, detect_available_tools, remove_legacy_smartctl_exporter
 from service import BaseExporter, DCGMExporter, ExporterError, HardwareExporter, SmartCtlExporter
 
 logger = logging.getLogger(__name__)
@@ -81,10 +81,10 @@ def exporters(self) -> List[BaseExporter]:
             )
 
         if stored_tools & SmartCtlExporter.hw_tools():
-            exporters.append(SmartCtlExporter(self.charm_dir, self.model.config))
+            exporters.append(SmartCtlExporter(self.model.config))
 
         if stored_tools & DCGMExporter.hw_tools():
-            exporters.append(DCGMExporter(self.model.config))
+            exporters.append(DCGMExporter(self.charm_dir, self.model.config))
 
         return exporters
 
@@ -97,6 +97,8 @@ def get_stored_tools(self) -> Set[HWTool]:
         if not self._stored.stored_tools:  # type: ignore[truthy-function]
             available_tools = detect_available_tools()  # type: ignore[unreachable]
             self._stored.stored_tools = {tool.value for tool in available_tools}
+        if "smartctl" in self._stored.stored_tools:  # type: ignore[operator]
+            self._stored.stored_tools.remove("smartctl")  # type: ignore[attr-defined]
         return {HWTool(value) for value in self._stored.stored_tools}  # type: ignore[attr-defined]
 
     def _on_redetect_hardware(self, event: ops.ActionEvent) -> None:
@@ -130,6 +132,8 @@ def _on_install_or_upgrade(self, event: EventBase) -> None:
         """Install or upgrade charm."""
         self.model.unit.status = MaintenanceStatus("Installing resources...")
 
+        remove_legacy_smartctl_exporter()
+
         stored_tools = self.get_stored_tools()
 
         msg: str
diff --git a/src/config.py b/src/config.py
index 47bcd17b..a5fe935b 100644
--- a/src/config.py
+++ b/src/config.py
@@ -36,20 +36,6 @@ class HardwareExporterSettings(ExporterSettings):  # pylint: disable = too-few-p
 HARDWARE_EXPORTER_SETTINGS = HardwareExporterSettings()
 
 
-class SmartCtlExporterSettings(ExporterSettings):  # pylint: disable = too-few-public-methods
-    """Constant settings for SmartCtl Exporter."""
-
-    name: str = "smartctl-exporter"
-    config_path: Path = Path(f"/etc/{name}-config.yaml")
-    service_path: Path = Path(f"/etc/systemd/system/{name}.service")
-    config_template: str = f"{name}-config.yaml.j2"
-    service_template: str = f"{name}.service.j2"
-    crash_msg: str = "SmartCtl exporter crashed unexpectedly, please refer to systemd logs..."
-
-
-SMARTCTL_EXPORTER_SETTINGS = SmartCtlExporterSettings()
-
-
 class SystemVendor(str, Enum):
     """Different hardware system vendor."""
 
@@ -77,8 +63,7 @@ class HWTool(str, Enum):
     IPMI_SEL = "ipmi_sel"
     IPMI_SENSOR = "ipmi_sensor"
     REDFISH = "redfish"
-    SMARTCTL = "smartctl"
-    SMARTCTL_EXPORTER = "smartctl_exporter"
+    SMARTCTL_EXPORTER = "smartctl-exporter"
     DCGM = "dcgm"
 
 
diff --git a/src/gpu_metrics/dcgm_metrics.csv b/src/gpu_metrics/dcgm_metrics.csv
new file mode 100644
index 00000000..5fb0a4d8
--- /dev/null
+++ b/src/gpu_metrics/dcgm_metrics.csv
@@ -0,0 +1,160 @@
+###############################################################################
+# [ WARNING ]
+# Configuration file maintained by Juju. Local changes may be overwritten.
+###############################################################################
+
+# Selected metrics for dcgm-exporter
+# Default metric list https://github.com/NVIDIA/dcgm-exporter/blob/main/etc/default-counters.csv
+
+# Format
+# If line starts with a '#' it is considered a comment
+# Boolean values decode to - 1 = enabled 0 = disabled
+# DCGM FIELD, Prometheus metric type, help message
+
+
+
+
+# DEFAULT METRICS
+# Clocks
+DCGM_FI_DEV_SM_CLOCK,    gauge, SM clock frequency (in MHz).
+DCGM_FI_DEV_MEM_CLOCK,   gauge, Memory clock frequency (in MHz).
+
+# Temperature
+DCGM_FI_DEV_MEMORY_TEMP, gauge, Memory temperature (in C).
+DCGM_FI_DEV_GPU_TEMP,    gauge, GPU temperature (in C).
+
+# Power
+DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION, counter, Total energy consumption since boot (in mJ).
+DCGM_FI_DEV_POWER_USAGE,                gauge, Power draw (in W).
+
+# PCIE
+DCGM_FI_PROF_PCIE_TX_BYTES,      counter, Total number of bytes transmitted through PCIe TX via NVML.
+DCGM_FI_PROF_PCIE_RX_BYTES,      counter, Total number of bytes received through PCIe RX via NVML.
+DCGM_FI_DEV_PCIE_REPLAY_COUNTER, counter, Total number of PCIe retries.
+
+# Utilization (the sample period varies depending on the product)
+DCGM_FI_DEV_GPU_UTIL,      gauge, GPU utilization (in %).
+DCGM_FI_DEV_MEM_COPY_UTIL, gauge, Memory utilization (in %).
+DCGM_FI_DEV_ENC_UTIL,      gauge, Encoder utilization (in %).
+DCGM_FI_DEV_DEC_UTIL,      gauge, Decoder utilization (in %).
+
+# Errors and violations
+DCGM_FI_DEV_XID_ERRORS, gauge, Value of the last XID error encountered.
+
+# Memory usage
+DCGM_FI_DEV_FB_FREE, gauge, Frame buffer memory free (in MB).
+DCGM_FI_DEV_FB_USED, gauge, Frame buffer memory used (in MB).
+
+# NVLink
+DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL, counter, Total number of NVLink bandwidth counters for all lanes
+
+# VGPU License status 
+DCGM_FI_DEV_VGPU_LICENSE_STATUS, gauge, vGPU License status
+
+# Remapped rows
+DCGM_FI_DEV_UNCORRECTABLE_REMAPPED_ROWS, counter, Number of remapped rows for uncorrectable errors
+DCGM_FI_DEV_CORRECTABLE_REMAPPED_ROWS,   counter, Number of remapped rows for correctable errors
+DCGM_FI_DEV_ROW_REMAP_FAILURE,             gauge, Whether remapping of rows has failed
+
+# Static configuration information and features
+DCGM_FI_DRIVER_VERSION, label, Driver Version
+
+
+
+
+# CUSTOM METRICS
+# Clocks
+DCGM_FI_DEV_VIDEO_CLOCK, gauge, Video encoder/decoder clock (in MHz).
+
+# Temperature
+DCGM_FI_DEV_FAN_SPEED, gauge, Fan speed (in 0-100%)
+
+# Power
+DCGM_FI_DEV_POWER_USAGE_INSTANT, gauge, Current instantaneous power usage (in W).
+
+# Errors and violations
+DCGM_FI_DEV_CLOCK_THROTTLE_REASONS, counter, Throttling reasons bitmask
+DCGM_FI_DEV_POWER_VIOLATION,        counter, Throttling duration due to power constraints (in us).
+DCGM_FI_DEV_THERMAL_VIOLATION,      counter, Throttling duration due to thermal constraints (in us).
+DCGM_FI_DEV_SYNC_BOOST_VIOLATION,   counter, Throttling duration due to sync-boost constraints (in us).
+DCGM_FI_DEV_BOARD_LIMIT_VIOLATION,  counter, Throttling duration due to board limit constraints (in us).
+DCGM_FI_DEV_LOW_UTIL_VIOLATION,     counter, Throttling duration due to low utilization (in us).
+DCGM_FI_DEV_RELIABILITY_VIOLATION,  counter, Throttling duration due to reliability constraints (in us).
+
+# Memory usage
+DCGM_FI_DEV_FB_RESERVED,     gauge, Frame buffer memory reserved (in MB).
+DCGM_FI_DEV_FB_USED_PERCENT, gauge, Frame buffer percentage used (in 0-100%) - Used/(Total - Reserved)
+
+# ECC
+DCGM_FI_DEV_ECC_SBE_VOL_TOTAL, counter, Total number of single-bit volatile ECC errors.
+DCGM_FI_DEV_ECC_DBE_VOL_TOTAL, counter, Total number of double-bit volatile ECC errors.
+DCGM_FI_DEV_ECC_SBE_AGG_TOTAL, counter, Total number of single-bit persistent ECC errors.
+DCGM_FI_DEV_ECC_DBE_AGG_TOTAL, counter, Total number of double-bit persistent ECC errors.
+
+# Retired pages
+DCGM_FI_DEV_RETIRED_SBE,     counter, Total number of retired pages due to single-bit errors.
+DCGM_FI_DEV_RETIRED_DBE,     counter, Total number of retired pages due to double-bit errors.
+DCGM_FI_DEV_RETIRED_PENDING, counter, Total number of pages pending retirement.
+
+# NVLink
+DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL, counter, Total number of NVLink flow-control CRC errors.
+DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL, counter, Total number of NVLink data CRC errors.
+DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL,   counter, Total number of NVLink retries.
+DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL, counter, Total number of NVLink recovery errors.
+
+# VGPU
+DCGM_FI_DEV_VGPU_UTILIZATIONS,   gauge, vGPUs utilization
+
+# Bar
+DCGM_FI_DEV_BAR1_USED,  gauge, Used BAR1 (in MB)
+DCGM_FI_DEV_BAR1_FREE,  gauge, Free BAR1 (in MB)
+
+# DCP metrics
+DCGM_FI_PROF_GR_ENGINE_ACTIVE,   gauge, Ratio of time the graphics engine is active.
+DCGM_FI_PROF_SM_ACTIVE,          gauge, The ratio of cycles an SM has at least 1 warp assigned.
+DCGM_FI_PROF_SM_OCCUPANCY,       gauge, The ratio of number of warps resident on an SM.
+DCGM_FI_PROF_PIPE_TENSOR_ACTIVE, gauge, Ratio of cycles the tensor (HMMA) pipe is active.
+DCGM_FI_PROF_DRAM_ACTIVE,        gauge, Ratio of cycles the device memory interface is active sending or receiving data.
+DCGM_FI_PROF_PIPE_FP64_ACTIVE,   gauge, Ratio of cycles the fp64 pipes are active.
+DCGM_FI_PROF_PIPE_FP32_ACTIVE,   gauge, Ratio of cycles the fp32 pipes are active.
+DCGM_FI_PROF_PIPE_FP16_ACTIVE,   gauge, Ratio of cycles the fp16 pipes are active.
+DCGM_FI_PROF_PCIE_TX_BYTES,      gauge, The rate of data transmitted over the PCIe bus - including both protocol headers and data payloads - in bytes per second.
+DCGM_FI_PROF_PCIE_RX_BYTES,      gauge, The rate of data received over the PCIe bus - including both protocol headers and data payloads - in bytes per second.
+
+# Static configuration information and features
+DCGM_FI_NVML_VERSION,                label, NVML Version
+DCGM_FI_DEV_BRAND,                   label, Device Brand
+DCGM_FI_DEV_SERIAL,                  label, Device Serial Number
+DCGM_FI_DEV_NAME,                    label, Device Name
+DCGM_FI_DEV_MINOR_NUMBER,            label, Device node minor (/dev/nvidia#)
+DCGM_FI_DEV_CUDA_COMPUTE_CAPABILITY, label, Cuda compute capability for the device (The major version is the upper 32 bits and the minor version is the lower 32 bits)
+DCGM_FI_DEV_OEM_INFOROM_VER,         label, OEM inforom version
+DCGM_FI_DEV_ECC_INFOROM_VER,         label, ECC inforom version
+DCGM_FI_DEV_POWER_INFOROM_VER,       label, Power management object inforom version
+DCGM_FI_DEV_INFOROM_IMAGE_VER,       label, Inforom image version
+DCGM_FI_DEV_VBIOS_VERSION,           label, VBIOS version of the device
+
+DCGM_FI_DEV_COMPUTE_MODE,            label, Compute mode
+DCGM_FI_DEV_PERSISTENCE_MODE,        label, Persistance mode (1 or 0)
+DCGM_FI_DEV_CC_MODE,                 label, ConfidentialCompute/AmpereProtectedMemory status (1 or 0)
+DCGM_FI_DEV_ECC_CURRENT,             label, Current ECC mode
+DCGM_FI_DEV_VIRTUAL_MODE,            label, Virtualization mode
+DCGM_FI_DEV_AUTOBOOST,               label, Auto-boost enabled
+
+DCGM_FI_DEV_BAR1_TOTAL,              label, Total BAR1 (in MB)
+
+DCGM_FI_DEV_MAX_SM_CLOCK,            label, Maximum supported SM clock
+DCGM_FI_DEV_MAX_MEM_CLOCK,           label, Maximum supported Memory clock
+
+DCGM_FI_DEV_GPU_MAX_OP_TEMP,         label, Maximum operating temperature
+DCGM_FI_DEV_SLOWDOWN_TEMP,           label, Slowdown temperature
+DCGM_FI_DEV_SHUTDOWN_TEMP,           label, Shutdown temperature
+
+DCGM_FI_DEV_POWER_MGMT_LIMIT,        label, Current Power limit
+DCGM_FI_DEV_POWER_MGMT_LIMIT_MIN,    label, Minimum Power limit
+DCGM_FI_DEV_POWER_MGMT_LIMIT_MAX,    label, Maximum Power limit
+DCGM_FI_DEV_ENFORCED_POWER_LIMIT,    label, Effective Power limit that the driver enforces after taking into account all limiters
+
+DCGM_FI_DEV_FB_TOTAL,                label, Total Frame buffer (in MB)
+
+DCGM_FI_DEV_COUNT,                   label, Number of devices on the node
diff --git a/src/grafana_dashboards/GPU.json b/src/grafana_dashboards/GPU.json
new file mode 100644
index 00000000..74a2fbde
--- /dev/null
+++ b/src/grafana_dashboards/GPU.json
@@ -0,0 +1,1046 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "$$hashKey": "object:192",
+        "builtIn": 1,
+        "datasource": {
+          "type": "datasource",
+          "uid": "grafana"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "description": "This dashboard is to display the metrics for NVIDIA and AMD GPUs",
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "gnetId": 12239,
+  "graphTooltip": 0,
+  "id": 228,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineStyle": {
+              "fill": "solid"
+            },
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "celsius"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 18,
+        "x": 0,
+        "y": 0
+      },
+      "id": 12,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "DCGM_FI_DEV_GPU_TEMP{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"}",
+          "instant": false,
+          "interval": "",
+          "legendFormat": "NVIDIA GPU: {{gpu}}",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "label_replace(node_hwmon_temp_celsius{instance=~\"^$instance.*\", chip=~\"$amd_gpu\"} * on(chip) group_right() node_drm_card_info{chip=~\"$amd_gpu\"}, \"gpu\", \"$1\", \"card\", \"card([0-9]+)\")\n",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "AMD GPU: {{gpu}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "GPU Temperature",
+      "transformations": [],
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "#EAB839",
+                "value": 83
+              },
+              {
+                "color": "red",
+                "value": 87
+              }
+            ]
+          },
+          "unit": "celsius"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 18,
+        "y": 0
+      },
+      "id": 14,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "mean"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "avg(DCGM_FI_DEV_GPU_TEMP{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"})",
+          "interval": "",
+          "legendFormat": "NVDIA GPUs",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "avg(node_hwmon_temp_celsius{instance=~\"^$instance.*\", chip=~\"$amd_gpu\"})",
+          "hide": false,
+          "legendFormat": "AMD GPUs",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "GPU Avg. Temp",
+      "type": "gauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "watt"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 18,
+        "x": 0,
+        "y": 8
+      },
+      "id": 10,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "DCGM_FI_DEV_POWER_USAGE{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"}",
+          "interval": "",
+          "legendFormat": "NVIDIA GPU: {{gpu}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "label_replace(node_hwmon_power_average_watt{agent_hostname=~\"$instance\", chip=~\"$amd_gpu\"} * on(chip) group_right() node_drm_card_info{chip=~\"$amd_gpu\"}, \"gpu\", \"$1\", \"card\", \"card([0-9]+)\")",
+          "hide": false,
+          "legendFormat": "AMD GPU: {{gpu}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "GPU Power Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "max": 2400,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "#EAB839",
+                "value": 1800
+              },
+              {
+                "color": "red",
+                "value": 2200
+              }
+            ]
+          },
+          "unit": "watt"
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "C"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "Total"
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 18,
+        "y": 8
+      },
+      "id": 16,
+      "links": [],
+      "options": {
+        "orientation": "vertical",
+        "reduceOptions": {
+          "calcs": [
+            "sum"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "sum(DCGM_FI_DEV_POWER_USAGE{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"})",
+          "instant": true,
+          "interval": "",
+          "legendFormat": "NVIDIA GPUs",
+          "range": false,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "sum(node_hwmon_power_average_watt{instance=~\"^$instance.*\", chip=~\"$amd_gpu\"})",
+          "hide": false,
+          "instant": true,
+          "legendFormat": "AMD GPUs",
+          "range": false,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "name": "Expression",
+            "type": "__expr__",
+            "uid": "__expr__"
+          },
+          "expression": "($A+$B)",
+          "hide": false,
+          "refId": "C",
+          "type": "math"
+        }
+      ],
+      "title": "GPU Total Power",
+      "type": "gauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "DCGM_FI_DEV_GPU_UTIL{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"}",
+          "interval": "",
+          "legendFormat": "NVIDIA GPU: {{gpu}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "label_replace(node_drm_gpu_busy_percent{instance=~\"^$instance.*\"} * on(card) group_right() node_drm_card_info{chip=~\"$amd_gpu\"}, \"gpu\", \"$1\", \"card\", \"card([0-9]+)\")\n",
+          "hide": false,
+          "legendFormat": "AMD GPU: {{gpu}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "GPU Utilization",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 17,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "DCGM_FI_DEV_FAN_SPEED{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"}",
+          "interval": "",
+          "legendFormat": "NVIDIA GPU: {{gpu}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "label_replace(\n  (\n    node_hwmon_fan_rpm{instance=~\"^$instance.*\", chip=~\"$amd_gpu\"} * on(chip) group_right() node_drm_card_info{chip=~\"$amd_gpu\"}\n  ) /\n  (\n    node_hwmon_fan_max_rpm{instance=~\"^$instance.*\", chip=~\"$amd_gpu\"} * on(chip) group_right() node_drm_card_info{chip=~\"$amd_gpu\"}\n  ) * 100,\n  \"gpu\", \"$1\", \"card\", \"card([0-9]+)\"\n)",
+          "hide": false,
+          "legendFormat": "AMD GPU: {{gpu}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "GPU Fan Speed",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "hertz"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 24
+      },
+      "id": 2,
+      "interval": "",
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "DCGM_FI_DEV_MEM_CLOCK{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"} * 1000000",
+          "format": "time_series",
+          "interval": "",
+          "intervalFactor": 1,
+          "legendFormat": "NVIDIA GPU: {{gpu}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "label_replace(node_hwmon_freq_freq_mhz{agent_hostname=~\"^$instance.*\", chip=~\"$amd_gpu\", sensor=\"sclk\"} * on(chip) group_right() node_drm_card_info{chip=~\"$amd_gpu\"} * 1000000, \"gpu\", \"$1\", \"card\", \"card([0-9]+)\")",
+          "hide": false,
+          "legendFormat": "AMD GPU: {{gpu}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "GPU Memory Clocks",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "id": 18,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "9.5.3",
+      "targets": [
+        {
+          "datasource": {
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{Hostname=~\"$instance\", gpu=~\"$nvidia_gpu\"}",
+          "interval": "",
+          "legendFormat": "NVIDIA GPU: {{gpu}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "$datasource"
+          },
+          "editorMode": "code",
+          "expr": "label_replace(\n    (\n        node_drm_memory_vram_used_bytes{instance=~\"^$instance.*\"} * on(card) group_right() node_drm_card_info{chip=~\"$amd_gpu\"}\n    ) \n    / \n    (\n        node_drm_memory_vram_size_bytes{instance=~\"^$instance.*\"} * on(card) group_right() node_drm_card_info{chip=~\"$amd_gpu\"}\n    ) * 100,\n    \"gpu\", \"$1\", \"card\", \"card([0-9]+)\"\n)",
+          "hide": false,
+          "legendFormat": "AMD GPU: {{gpu}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "GPU Memory Utilization",
+      "type": "timeseries"
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 38,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "juju_cos_f8e88307-f0df-40a4-88f1-745e6ab57e8e_prometheus_0",
+          "value": "juju_cos_f8e88307-f0df-40a4-88f1-745e6ab57e8e_prometheus_0"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "multi": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "queryValue": "",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "type": "datasource"
+      },
+      {
+        "current": {
+          "selected": true,
+          "text": [
+            "All"
+          ],
+          "value": [
+            "$__all"
+          ]
+        },
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "label_values(node_hwmon_chip_names,agent_hostname)",
+        "hide": 0,
+        "includeAll": true,
+        "label": "Host",
+        "multi": true,
+        "name": "instance",
+        "options": [],
+        "query": {
+          "query": "label_values(node_hwmon_chip_names,agent_hostname)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      },
+      {
+        "current": {
+          "selected": true,
+          "text": [
+            "All"
+          ],
+          "value": [
+            "$__all"
+          ]
+        },
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "label_values(DCGM_FI_DEV_GPU_TEMP, gpu)",
+        "hide": 2,
+        "includeAll": true,
+        "label": "NVIDIA GPU",
+        "multi": true,
+        "name": "nvidia_gpu",
+        "options": [],
+        "query": "label_values(DCGM_FI_DEV_GPU_TEMP, gpu)",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      },
+      {
+        "current": {
+          "selected": true,
+          "text": [
+            "All"
+          ],
+          "value": [
+            "$__all"
+          ]
+        },
+        "datasource": {
+          "uid": "$datasource"
+        },
+        "definition": "label_values(node_drm_card_info,chip)",
+        "hide": 2,
+        "includeAll": true,
+        "label": "AMD GPU",
+        "multi": true,
+        "name": "amd_gpu",
+        "options": [],
+        "query": {
+          "query": "label_values(node_drm_card_info,chip)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      }
+    ]
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ]
+  },
+  "timezone": "",
+  "title": "GPU Dashboard",
+  "uid": "Oxed_c6Wzv",
+  "version": 1,
+  "weekStart": ""
+}
diff --git a/src/hw_tools.py b/src/hw_tools.py
index 76e883a8..a3ec7bee 100644
--- a/src/hw_tools.py
+++ b/src/hw_tools.py
@@ -3,21 +3,19 @@
 Define strategy for install, remove and verifier for different hardware.
 """
 
-import io
 import logging
 import os
 import shutil
 import stat
 import subprocess
-import tarfile
 from abc import ABCMeta, abstractmethod
-from http import HTTPStatus
 from pathlib import Path
 from typing import Dict, List, Set, Tuple
 
 import requests
 import urllib3
 from charms.operator_libs_linux.v0 import apt
+from charms.operator_libs_linux.v1 import systemd
 from charms.operator_libs_linux.v2 import snap
 from ops.model import ModelError, Resources
 
@@ -63,14 +61,6 @@ def __init__(self, tool: HWTool, path: Path):
         self.message = f"Tool: {tool} path: {path} size is zero"
 
 
-class ResourceInstallationError(Exception):
-    """Exception raised when a hardware tool installation fails."""
-
-    def __init__(self, tool: HWTool):
-        """Init."""
-        super().__init__(f"Installation failed for tool: {tool}")
-
-
 def copy_to_snap_common_bin(source: Path, filename: str) -> None:
     """Copy file to $SNAP_COMMON/bin folder."""
     Path(f"{SNAP_COMMON}/bin").mkdir(parents=False, exist_ok=True)
@@ -239,6 +229,16 @@ def __init__(self, channel: str) -> None:
         self.channel = channel
 
 
+class SmartCtlExporterStrategy(SnapStrategy):
+    """SmartCtl strategy class."""
+
+    _name = HWTool.SMARTCTL_EXPORTER
+
+    def __init__(self, channel: str) -> None:
+        """Init."""
+        self.channel = channel
+
+
 class StorCLIStrategy(TPRStrategyABC):
     """Strategy to install storcli."""
 
@@ -444,75 +444,6 @@ def check(self) -> bool:
         return True
 
 
-class SmartCtlStrategy(APTStrategyABC):
-    """Strategy for installing ipmi."""
-
-    pkg = "smartmontools"
-    _name = HWTool.SMARTCTL
-
-    def install(self) -> None:
-        apt_helpers.add_pkg_with_candidate_version(self.pkg)
-
-    def remove(self) -> None:
-        # Skip removing because this may cause dependency error
-        # for other services on the same machine.
-        logger.info("%s skip removing %s", self._name, self.pkg)
-
-    def check(self) -> bool:
-        """Check package status."""
-        return check_deb_pkg_installed(self.pkg)
-
-
-class SmartCtlExporterStrategy(StrategyABC):  # pylint: disable=R0903
-    """Install smartctl exporter binary."""
-
-    _name = HWTool.SMARTCTL_EXPORTER
-
-    _resource_dir = Path("/opt/SmartCtlExporter/")
-    _release = (
-        "https://github.com/prometheus-community/"
-        "smartctl_exporter/releases/download/v0.12.0/smartctl_exporter-0.12.0.linux-amd64.tar.gz"
-    )
-    _exporter_name = "smartctl_exporter"
-    _exporter_path = Path(_resource_dir / "smartctl_exporter")
-
-    def install(self) -> None:
-        """Install exporter binary from internet."""
-        logger.debug("Installing SmartCtlExporter")
-        self._resource_dir.mkdir(parents=True, exist_ok=True)
-
-        resp = requests.get(self._release, timeout=60)
-        if resp.status_code != HTTPStatus.OK:
-            logger.error("Failed to download smartctl exporter binary.")
-            raise ResourceInstallationError(self._name)
-
-        success = False
-        fileobj = io.BytesIO(resp.content)
-        with tarfile.open(fileobj=fileobj, mode="r:gz") as tar:
-            for member in tar.getmembers():
-                if member.name.endswith(self._exporter_name):
-                    with open(self._exporter_path, "wb") as outfile:
-                        member_file = tar.extractfile(member)
-                        if member_file:
-                            outfile.write(member_file.read())
-                            success = True
-                    if success:
-                        make_executable(self._exporter_path)
-        if not success:
-            logger.error("Failed to install SmartCtlExporter binary.")
-            raise ResourceInstallationError(self._name)
-
-    def remove(self) -> None:
-        """Remove downloaded exporter binary."""
-        logger.debug("Remove SmartCtlExporter")
-        shutil.rmtree(self._resource_dir)
-
-    def check(self) -> bool:
-        """Check package status."""
-        logger.debug("Check SmartCtlExporter resources")
-        return self._exporter_path.is_file()
-
-
 def _raid_hw_verifier_hwinfo() -> Set[HWTool]:
     """Verify if a supported RAID card exists on the machine using the hwinfo command."""
     hwinfo_output = hwinfo("storage")
@@ -650,7 +581,7 @@ def bmc_hw_verifier() -> Set[HWTool]:
 
 def disk_hw_verifier() -> Set[HWTool]:
     """Verify if the disk exists on the machine."""
-    return {HWTool.SMARTCTL} if lshw(class_filter="disk") else set()
+    return {HWTool.SMARTCTL_EXPORTER} if lshw(class_filter="disk") else set()
 
 
 def nvidia_gpu_verifier() -> Set[HWTool]:
@@ -664,6 +595,25 @@ def detect_available_tools() -> Set[HWTool]:
     return raid_hw_verifier() | bmc_hw_verifier() | disk_hw_verifier() | nvidia_gpu_verifier()
 
 
+def remove_legacy_smartctl_exporter() -> None:
+    """Remove any legacy tool from older revision.
+
+    Workaround for migrating legacy smartctl exporter to snap package.
+    """
+    name = "smartctl-exporter"
+    smartctl_exporter = Path("opt/SmartCtlExporter/")
+    smartctl_exporter_config_path = Path(f"/etc/{name}-config.yaml")
+    smartctl_exporter_service_path = Path(f"/etc/systemd/system/{name}.service")
+    if smartctl_exporter_service_path.exists():
+        systemd.service_stop(name)
+        systemd.service_disable(name)
+        smartctl_exporter_service_path.unlink()
+    if smartctl_exporter_config_path.exists():
+        smartctl_exporter_config_path.unlink()
+    if smartctl_exporter.exists():
+        shutil.rmtree("/opt/SmartCtlExporter/")
+
+
 class HWToolHelper:
     """Helper to install vendor's or hardware related tools."""
 
@@ -680,7 +630,6 @@ def strategies(self) -> List[StrategyABC]:
             IPMIDCMIStrategy(),
             IPMISENSORStrategy(),
             RedFishStrategy(),
-            SmartCtlStrategy(),
         ]
 
     def fetch_tools(  # pylint: disable=W0102
diff --git a/src/prometheus_alert_rules/dcgm.yaml b/src/prometheus_alert_rules/dcgm.yaml
new file mode 100644
index 00000000..97e43cd2
--- /dev/null
+++ b/src/prometheus_alert_rules/dcgm.yaml
@@ -0,0 +1,93 @@
+# The alerts use DCGM_FI_DEV_CLOCK_THROTTLE_REASONS metric to detect throttling events on NVIDIA GPUs,
+# which is a bitmask of throttle reasons found here: https://docs.nvidia.com/datacenter/dcgm/2.1/dcgm-api/group__dcgmFieldConstants.html.
+# The 8 least significant bits are used for the alerts, with each bit representing a different throttle reason.
+
+groups:
+- name: NVIDIA DCGM Throttling Alerts
+  rules:
+    - alert: GPUPowerBrakeThrottle
+      # isolate the least significant 8 bits with % 256
+      # check whether bit 7 (starts from bit 0) has been set with the >= bool 128 comparison
+      expr: DCGM_FI_DEV_CLOCK_THROTTLE_REASONS % 256 >= 128
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: GPU Hardware Power Brake Slowdown throttling detected. (instance {{ $labels.Hostname }})
+        description: |
+          HW Power Brake Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: {{ $labels.gpu }}
+          This is an indicator of: 
+              - External Power Brake Assertion being triggered (e.g. by the system power supply)
+            LABELS = {{ $labels }}
+    - alert: GPUThermalHWThrottle
+      # isolate the least significant 7 bits with % 128
+      # check whether bit 6 (starts from bit 0) has been set with the >= bool 64 comparison
+      expr: DCGM_FI_DEV_CLOCK_THROTTLE_REASONS % 128 >= 64
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: GPU Hardware Thermal throttling detected. (instance {{ $labels.Hostname }})
+        description: |
+          HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: {{ $labels.gpu }}
+          This is an indicator of:
+              - Temperature being too high
+            LABELS = {{ $labels }}
+    - alert: GPUThermalSWThrottle
+      # isolate the least significant 6 bits with % 64
+      # check whether bit 5 (starts from bit 0) has been set with the >= bool 32 comparison
+      expr: DCGM_FI_DEV_CLOCK_THROTTLE_REASONS % 64 >= 32
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: GPU Software Thermal throttling detected. (instance {{ $labels.Hostname }})
+        description: |
+          SW Thermal Slowdown is engaged on NVIDIA GPU: {{ $labels.gpu }}
+          This is an indicator of:
+              - Current GPU temperature above the GPU Max Operating Temperature
+              - Current memory temperature above the Memory Max Operating Temperature
+            LABELS = {{ $labels }}
+    - alert: GPUSyncBoostThrottle
+      # isolate the least significant 5 bits with % 32
+      # check whether bit 4 (starts from bit 0) has been set with the >= bool 16 comparison
+      expr: DCGM_FI_DEV_CLOCK_THROTTLE_REASONS % 32 >= 16
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: GPU Sync Boost throttling detected. (instance {{ $labels.Hostname }})
+        description: |
+          This NVIDIA GPU: {{ $labels.gpu }} has been added to a Sync boost group with nvidia-smi or DCGM in order to maximize performance per watt.
+          All GPUs in the sync boost group will boost to the minimum possible clocks across the entire group.
+          Look at the throttle reasons for other GPUs in the system to see why those GPUs are holding this one at lower clocks.
+            LABELS = {{ $labels }}
+    - alert: GPUSlowdownThrottle
+      # isolate the least significant 4 bits with % 16
+      # check whether bit 3 (starts from bit 0) has been set with the >= bool 8 comparison
+      expr: DCGM_FI_DEV_CLOCK_THROTTLE_REASONS % 16 >= 8
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: GPU Hardware Slowdown throttling detected. (instance {{ $labels.Hostname }})
+        description: |
+          HW Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: {{ $labels.gpu }}
+          This is an indicator of:
+              - Temperature being too high
+              - External Power Brake Assertion is triggered (e.g. by the system power supply)
+              - Power draw is too high and Fast Trigger protection is reducing the clocks
+              - May be also reported during PState or clock change
+            LABELS = {{ $labels }}
+    - alert: GPUPowerThrottle
+      # isolate the least significant 3 bits with % 8
+      # check whether bit 2 (starts from bit 0) has been set with the >= bool 4 comparison
+      expr: DCGM_FI_DEV_CLOCK_THROTTLE_REASONS % 8 >= 4
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: GPU Software Power throttling detected. (instance {{ $labels.Hostname }})
+        description: |
+          SW Power Scaling algorithm is reducing the clocks below requested clocks on NVIDIA GPU: {{ $labels.gpu }}
+            LABELS = {{ $labels }}
diff --git a/src/service.py b/src/service.py
index 154ce865..d71f4e9e 100644
--- a/src/service.py
+++ b/src/service.py
@@ -1,6 +1,7 @@
 """Exporter service helper."""
 
 import os
+import shutil
 from abc import ABC, abstractmethod
 from logging import getLogger
 from pathlib import Path
@@ -17,7 +18,6 @@
 from config import (
     HARDWARE_EXPORTER_COLLECTOR_MAPPING,
     HARDWARE_EXPORTER_SETTINGS,
-    SMARTCTL_EXPORTER_SETTINGS,
     ExporterSettings,
     HWTool,
 )
@@ -309,67 +309,6 @@ def remove_file(path: Path) -> bool:
     return success
 
 
-class SmartCtlExporter(RenderableExporter):
-    """A class representing the smartctl exporter and the metric endpoints."""
-
-    required_config: bool = False
-
-    def __init__(self, charm_dir: Path, config: ConfigData) -> None:
-        """Initialize the Hardware Exporter class."""
-        super().__init__(charm_dir, config, SMARTCTL_EXPORTER_SETTINGS)
-
-        self.port = int(config["smartctl-exporter-port"])
-        self.collect_timeout = int(config["collect-timeout"])
-        self.log_level = str(config["exporter-log-level"])
-        self.strategy = SmartCtlExporterStrategy()
-
-    def render_service(self) -> bool:
-        """Render required files for service."""
-        service_rendered = self._render_service(
-            {
-                "PORT": str(self.port),
-                "LEVEL": self.log_level,
-            }
-        )
-        return service_rendered
-
-    def configure(self) -> bool:
-        """Override base configure to render the service file.
-
-        This is because smartctl_exporter doesn't support providing config file.
-        The config options need to be provided as flags while exectuting
-        smartctl_exporter. So, the service file must be re-rendered when a config
-        value is changed.
-        """
-        service_rendered = self.render_service()
-        if service_rendered:
-            systemd.daemon_reload()
-        return service_rendered
-
-    @staticmethod
-    def hw_tools() -> Set[HWTool]:
-        """Return hardware tools to watch."""
-        return {HWTool.SMARTCTL}
-
-    def install_resources(self) -> bool:
-        restart = False
-        if self.check_active():
-            systemd.service_stop(self.exporter_name)
-            restart = True
-        self.strategy.install()
-        if restart:
-            systemd.service_restart(self.exporter_name)
-        logger.debug("Finish install resources for %s", self.exporter_name)
-        return True
-
-    def resources_exist(self) -> bool:
-        return self.strategy.check()
-
-    def remove_resources(self) -> bool:
-        self.strategy.remove()
-        return True
-
-
 class SnapExporter(BaseExporter):
     """A class representing a snap exporter."""
 
@@ -381,7 +320,11 @@ class SnapExporter(BaseExporter):
     def __init__(self, config: ConfigData):
         """Init."""
         self.config = config
-        self.snap_client = snap.SnapCache()[self.strategy.snap]
+
+    @property
+    def snap_client(self) -> snap.Snap:
+        """Return the snap client."""
+        return snap.SnapCache()[self.strategy.snap]
 
     @staticmethod
     def hw_tools() -> Set[HWTool]:
@@ -395,7 +338,6 @@ def install(self) -> bool:
         """
         try:
             self.strategy.install()
-            # dcgm-exporter is disabled by default
             self.enable_and_start()
             return self.snap_client.present is True
         except Exception:  # pylint: disable=broad-except
@@ -429,6 +371,18 @@ def restart(self) -> None:
         """Restart the exporter daemon."""
         self.snap_client.restart(reload=True)
 
+    def set(self, snap_config: dict) -> bool:
+        """Set config options for the snap service.
+
+        Return true if successfully updated snap config, otherwise false.
+        """
+        try:
+            self.snap_client.set(snap_config, typed=True)
+        except snap.SnapError as err:
+            logger.error("Failed to update snap configs %s: %s", self.strategy.snap, err)
+            return False
+        return True
+
     def check_health(self) -> bool:
         """Check if all services are active.
 
@@ -449,18 +403,66 @@ class DCGMExporter(SnapExporter):
 
     exporter_name: str = "dcgm"
     port: int = 9400
+    snap_common: Path = Path("/var/snap/dcgm/common/")
+    metric_config: str = "dcgm-exporter-metrics-file"
 
-    def __init__(self, config: ConfigData):
+    def __init__(self, charm_dir: Path, config: ConfigData):
         """Init."""
         self.strategy = DCGMExporterStrategy(str(config["dcgm-snap-channel"]))
+        self.charm_dir = charm_dir
+        self.metrics_file = self.charm_dir / "src/gpu_metrics/dcgm_metrics.csv"
+        self.metric_config_value = self.metrics_file.name
         super().__init__(config)
 
+    def install(self) -> bool:
+        """Install the DCGM exporter and configure custom metrics."""
+        if not super().install():
+            return False
+
+        logger.info("Creating a custom metrics file and configuring the DCGM snap to use it")
+        try:
+            shutil.copy(self.metrics_file, self.snap_common)
+            self.snap_client.set({self.metric_config: self.metric_config_value})
+            self.snap_client.restart(reload=True)
+        except Exception as err:  # pylint: disable=broad-except
+            logger.error("Failed to configure custom DCGM metrics: %s", err)
+            return False
+
+        return True
+
     @staticmethod
     def hw_tools() -> Set[HWTool]:
         """Return hardware tools to watch."""
         return {HWTool.DCGM}
 
 
+class SmartCtlExporter(SnapExporter):
+    """A class representing the smartctl exporter and the metric endpoints."""
+
+    exporter_name: str = "smartctl-exporter"
+
+    def __init__(self, config: ConfigData) -> None:
+        """Initialize the SmartctlExporter class."""
+        self.port = int(config["smartctl-exporter-port"])
+        self.log_level = str(config["exporter-log-level"])
+        self.strategy = SmartCtlExporterStrategy(str(config["smartctl-exporter-snap-channel"]))
+        super().__init__(config)
+
+    @staticmethod
+    def hw_tools() -> Set[HWTool]:
+        """Return hardware tools to watch."""
+        return {HWTool.SMARTCTL_EXPORTER}
+
+    def configure(self) -> bool:
+        """Set the necessary exporter configurations or change snap channel."""
+        return super().configure() and self.set(
+            {
+                "log.level": self.log_level.lower(),
+                "web.listen-address": f":{self.port}",
+            }
+        )
+
+
 class HardwareExporter(RenderableExporter):
     """A class representing the hardware exporter and the metric endpoints."""
 
diff --git a/templates/smartctl-exporter.service.j2 b/templates/smartctl-exporter.service.j2
deleted file mode 100644
index ef9fb374..00000000
--- a/templates/smartctl-exporter.service.j2
+++ /dev/null
@@ -1,20 +0,0 @@
-[Unit]
-Description=smartctl exporter service
-After=network-online.target
-
-[Service]
-Type=simple
-PIDFile=/run/smartctl_exporter.pid
-ExecStart=/opt/SmartCtlExporter/smartctl_exporter --web.listen-address=:{{ PORT }}
-User=root
-Group=root
-SyslogIdentifier=smartctl_exporter
-SyslogLevel={{ LEVEL }}
-Restart=on-failure
-RemainAfterExit=no
-RestartSec=100ms
-StandardOutput=journal
-StandardError=journal
-
-[Install]
-WantedBy=multi-user.target
diff --git a/tests/unit/test_alert_rules/test_dcgm.yaml b/tests/unit/test_alert_rules/test_dcgm.yaml
new file mode 100644
index 00000000..3e860486
--- /dev/null
+++ b/tests/unit/test_alert_rules/test_dcgm.yaml
@@ -0,0 +1,397 @@
+rule_files:
+  - ../../../src/prometheus_alert_rules/dcgm.yaml
+
+evaluation_interval: 1m
+
+tests:
+#  HW Power Brake Throttle active
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-0", gpu="0"}'
+      values: '128'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-0
+            gpu: 0
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Power Brake Slowdown throttling detected. (instance ubuntu-0)
+            description: |
+              HW Power Brake Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 0
+              This is an indicator of: 
+                  - External Power Brake Assertion being triggered (e.g. by the system power supply)
+                LABELS = map[Hostname:ubuntu-0 gpu:0]
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 6 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 5 isn't set
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts: # alerts shouldn't fire since bit 4 isn't set
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts: # alerts shouldn't fire since bit 3 isn't set
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts: # alerts shouldn't fire since bit 2 isn't set
+
+# HW Thermal Throttle active
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-0", gpu="1"}'
+      values: '64'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-0
+            gpu: 1
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Thermal throttling detected. (instance ubuntu-0)
+            description: |
+              HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 1
+              This is an indicator of:
+                  - Temperature being too high
+                LABELS = map[Hostname:ubuntu-0 gpu:1]
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts: # alerts shouldn't fire since bit 7 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 5 isn't set
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts: # alerts shouldn't fire since bit 4 isn't set
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts: # alerts shouldn't fire since bit 3 isn't set
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts: # alerts shouldn't fire since bit 2 isn't set
+
+# SW Thermal Throttle active
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-1", gpu="0"}'
+      values: '32'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-1
+            gpu: 0
+            severity: warning
+          exp_annotations:
+            summary: GPU Software Thermal throttling detected. (instance ubuntu-1)
+            description: |
+              SW Thermal Slowdown is engaged on NVIDIA GPU: 0
+              This is an indicator of:
+                  - Current GPU temperature above the GPU Max Operating Temperature
+                  - Current memory temperature above the Memory Max Operating Temperature
+                LABELS = map[Hostname:ubuntu-1 gpu:0]
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts: # alerts shouldn't fire since bit 7 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 6 isn't set
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts: # alerts shouldn't fire since bit 4 isn't set
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts: # alerts shouldn't fire since bit 3 isn't set
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts: # alerts shouldn't fire since bit 2 isn't set
+
+# Sync Boost Throttle active
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-1", gpu="1"}'
+      values: '16'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-1
+            gpu: 1
+            severity: warning
+          exp_annotations:
+            summary: GPU Sync Boost throttling detected. (instance ubuntu-1)
+            description: |
+              This NVIDIA GPU: 1 has been added to a Sync boost group with nvidia-smi or DCGM in order to maximize performance per watt.
+              All GPUs in the sync boost group will boost to the minimum possible clocks across the entire group.
+              Look at the throttle reasons for other GPUs in the system to see why those GPUs are holding this one at lower clocks.
+                LABELS = map[Hostname:ubuntu-1 gpu:1]
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts: # alerts shouldn't fire since bit 7 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 6 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 5 isn't set
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts: # alerts shouldn't fire since bit 3 isn't set
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts: # alerts shouldn't fire since bit 2 isn't set
+
+# HW Slowdown Throttle active
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-2", gpu="0"}'
+      values: '8'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-2
+            gpu: 0
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Slowdown throttling detected. (instance ubuntu-2)
+            description: |
+              HW Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 0
+              This is an indicator of:
+                  - Temperature being too high
+                  - External Power Brake Assertion is triggered (e.g. by the system power supply)
+                  - Power draw is too high and Fast Trigger protection is reducing the clocks
+                  - May be also reported during PState or clock change
+                LABELS = map[Hostname:ubuntu-2 gpu:0]
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts: # alerts shouldn't fire since bit 7 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalThrottle
+      exp_alerts: # alerts shouldn't fire since bit 6 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalThrottle
+      exp_alerts: # alerts shouldn't fire since bit 5 isn't set
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts: # alerts shouldn't fire since bit 4 isn't set
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts: # alerts shouldn't fire since bit 2 isn't set
+
+# SW Power Throttle active
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-2", gpu="1"}'
+      values: '4'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-2
+            gpu: 1
+            severity: warning
+          exp_annotations:
+            summary: GPU Software Power throttling detected. (instance ubuntu-2)
+            description: |
+              SW Power Scaling algorithm is reducing the clocks below requested clocks on NVIDIA GPU: 1
+                LABELS = map[Hostname:ubuntu-2 gpu:1]
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts: # alerts shouldn't fire since bit 7 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 6 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 5 isn't set
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts: # alerts shouldn't fire since bit 4 isn't set
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts: # alerts shouldn't fire since bit 3 isn't set
+
+# No throttling
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-0", gpu="0"}'
+      values: '1'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts: # alerts shouldn't fire since bit 7 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 6 isn't set
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 5 isn't set
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts: # alerts shouldn't fire since bit 4 isn't set
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts: # alerts shouldn't fire since bit 3 isn't set
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts: # alerts shouldn't fire since bit 2 isn't set
+
+#  All throttling reasons active
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-3", gpu="2"}'
+      values: '511'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-3
+            gpu: 2
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Power Brake Slowdown throttling detected. (instance ubuntu-3)
+            description: |
+              HW Power Brake Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 2
+              This is an indicator of: 
+                  - External Power Brake Assertion being triggered (e.g. by the system power supply)
+                LABELS = map[Hostname:ubuntu-3 gpu:2]
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-3
+            gpu: 2
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Thermal throttling detected. (instance ubuntu-3)
+            description: |
+              HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 2
+              This is an indicator of:
+                  - Temperature being too high
+                LABELS = map[Hostname:ubuntu-3 gpu:2]
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-3
+            gpu: 2
+            severity: warning
+          exp_annotations:
+            summary: GPU Software Thermal throttling detected. (instance ubuntu-3)
+            description: |
+              SW Thermal Slowdown is engaged on NVIDIA GPU: 2
+              This is an indicator of:
+                  - Current GPU temperature above the GPU Max Operating Temperature
+                  - Current memory temperature above the Memory Max Operating Temperature
+                LABELS = map[Hostname:ubuntu-3 gpu:2]
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-3
+            gpu: 2
+            severity: warning
+          exp_annotations:
+            summary: GPU Sync Boost throttling detected. (instance ubuntu-3)
+            description: |
+              This NVIDIA GPU: 2 has been added to a Sync boost group with nvidia-smi or DCGM in order to maximize performance per watt.
+              All GPUs in the sync boost group will boost to the minimum possible clocks across the entire group.
+              Look at the throttle reasons for other GPUs in the system to see why those GPUs are holding this one at lower clocks.
+                LABELS = map[Hostname:ubuntu-3 gpu:2]
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-3
+            gpu: 2
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Slowdown throttling detected. (instance ubuntu-3)
+            description: |
+              HW Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 2
+              This is an indicator of:
+                  - Temperature being too high
+                  - External Power Brake Assertion is triggered (e.g. by the system power supply)
+                  - Power draw is too high and Fast Trigger protection is reducing the clocks
+                  - May be also reported during PState or clock change
+                LABELS = map[Hostname:ubuntu-3 gpu:2]
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-3
+            gpu: 2
+            severity: warning
+          exp_annotations:
+            summary: GPU Software Power throttling detected. (instance ubuntu-3)
+            description: |
+              SW Power Scaling algorithm is reducing the clocks below requested clocks on NVIDIA GPU: 2
+                LABELS = map[Hostname:ubuntu-3 gpu:2]
+
+# Multiple throttling reasons
+- interval: 1m
+  input_series:
+    - series: 'DCGM_FI_DEV_CLOCK_THROTTLE_REASONS{Hostname="ubuntu-0", gpu="0"}'
+      values: '196'
+  alert_rule_test:
+    - eval_time: 5m
+      alertname: GPUPowerBrakeThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-0
+            gpu: 0
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Power Brake Slowdown throttling detected. (instance ubuntu-0)
+            description: |
+              HW Power Brake Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 0
+              This is an indicator of: 
+                  - External Power Brake Assertion being triggered (e.g. by the system power supply)
+                LABELS = map[Hostname:ubuntu-0 gpu:0]
+    - eval_time: 5m
+      alertname: GPUThermalHWThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-0
+            gpu: 0
+            severity: warning
+          exp_annotations:
+            summary: GPU Hardware Thermal throttling detected. (instance ubuntu-0)
+            description: |
+              HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged on NVIDIA GPU: 0
+              This is an indicator of:
+                  - Temperature being too high
+                LABELS = map[Hostname:ubuntu-0 gpu:0]
+    - eval_time: 5m
+      alertname: GPUPowerThrottle
+      exp_alerts:
+        - exp_labels:
+            Hostname: ubuntu-0
+            gpu: 0
+            severity: warning
+          exp_annotations:
+            summary: GPU Software Power throttling detected. (instance ubuntu-0)
+            description: |
+              SW Power Scaling algorithm is reducing the clocks below requested clocks on NVIDIA GPU: 0
+                LABELS = map[Hostname:ubuntu-0 gpu:0]
+    - eval_time: 5m
+      alertname: GPUThermalSWThrottle
+      exp_alerts: # alerts shouldn't fire since bit 5 isn't set
+    - eval_time: 5m
+      alertname: GPUSyncBoostThrottle
+      exp_alerts: # alerts shouldn't fire since bit 4 isn't set
+    - eval_time: 5m
+      alertname: GPUSlowdownThrottle
+      exp_alerts: # alerts shouldn't fire since bit 3 isn't set
diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py
index 68bc3890..bb643713 100644
--- a/tests/unit/test_charm.py
+++ b/tests/unit/test_charm.py
@@ -15,13 +15,7 @@
 import charm
 from charm import ExporterError, HardwareObserverCharm
 from config import HWTool
-from service import (
-    HARDWARE_EXPORTER_SETTINGS,
-    SMARTCTL_EXPORTER_SETTINGS,
-    DCGMExporter,
-    HardwareExporter,
-    SmartCtlExporter,
-)
+from service import HARDWARE_EXPORTER_SETTINGS, DCGMExporter, HardwareExporter, SmartCtlExporter
 
 
 class TestCharm(unittest.TestCase):
@@ -72,12 +66,12 @@ def test_harness(self) -> None:
             ),
             (
                 "Enable two exporters",
-                {HWTool.IPMI_SEL, HWTool.SMARTCTL},
+                {HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
                 {"hardware-exporter", "smartctl-exporter"},
             ),
             (
                 "Enable all exporters",
-                {HWTool.IPMI_SEL, HWTool.SMARTCTL, HWTool.DCGM},
+                {HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER, HWTool.DCGM},
                 {"hardware-exporter", "smartctl-exporter", "dcgm"},
             ),
         ]
@@ -98,7 +92,7 @@ def test_exporters(
         mock_hw_exporter.return_value = hw_exporter
 
         smart_exporter = mock.MagicMock()
-        smart_exporter.exporter_name = SMARTCTL_EXPORTER_SETTINGS.name
+        smart_exporter.exporter_name = SmartCtlExporter.exporter_name
         mock_smart_exporter.hw_tools.return_value = SmartCtlExporter.hw_tools()
         mock_smart_exporter.return_value = smart_exporter
 
@@ -117,7 +111,7 @@ def test_exporters(
             (
                 "happy case",
                 "install",
-                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL},
+                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
                 (True, ""),
                 [mock.MagicMock(), mock.MagicMock()],
                 [True, True],
@@ -125,7 +119,7 @@ def test_exporters(
             (
                 "happy case",
                 "upgrade",
-                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL},
+                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
                 (True, ""),
                 [mock.MagicMock(), mock.MagicMock()],
                 [True, True],
@@ -133,7 +127,7 @@ def test_exporters(
             (
                 "missing resource",
                 "install",
-                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL},
+                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
                 (False, "miss something"),
                 [mock.MagicMock(), mock.MagicMock()],
                 [True, True],
@@ -141,7 +135,7 @@ def test_exporters(
             (
                 "missing resource",
                 "upgrade",
-                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL},
+                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
                 (False, "miss something"),
                 [mock.MagicMock(), mock.MagicMock()],
                 [True, True],
@@ -149,7 +143,7 @@ def test_exporters(
             (
                 "Exporter install fail",
                 "install",
-                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL},
+                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
                 (True, ""),
                 [mock.MagicMock(), mock.MagicMock()],
                 [False, True],
@@ -157,7 +151,7 @@ def test_exporters(
             (
                 "Exporter install fail",
                 "upgrade",
-                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL},
+                {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
                 (True, ""),
                 [mock.MagicMock(), mock.MagicMock()],
                 [False, True],
@@ -227,14 +221,14 @@ def test_remove(self):
             self.harness.charm.get_stored_tools.return_value = {
                 HWTool.IPMI_SENSOR,
                 HWTool.IPMI_SEL,
-                HWTool.SMARTCTL,
+                HWTool.SMARTCTL_EXPORTER,
             }
 
             self.harness.charm.on.remove.emit()
 
         self.harness.charm.hw_tool_helper.remove.assert_called_with(
             self.harness.charm.model.resources,
-            {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL},
+            {HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL_EXPORTER},
         )
         for mock_exporter in mock_exporters:
             mock_exporter.uninstall.assert_called()
@@ -792,3 +786,8 @@ def test_validate_configs(
             self.harness.begin()
             result = self.harness.charm.validate_configs()
             self.assertEqual(result, expect)
+
+    def test_get_stored_tools_remove_legacy_smartctl(self):
+        self.harness.begin()
+        self.harness.charm._stored.stored_tools = {"smartctl"}
+        assert self.harness.charm.get_stored_tools() == set()
diff --git a/tests/unit/test_hw_tools.py b/tests/unit/test_hw_tools.py
index 4e51fba8..0def3264 100644
--- a/tests/unit/test_hw_tools.py
+++ b/tests/unit/test_hw_tools.py
@@ -1,8 +1,6 @@
 import stat
 import subprocess
-import tempfile
 import unittest
-from http import HTTPStatus
 from pathlib import Path
 from unittest import mock
 
@@ -31,11 +29,8 @@
     PercCLIStrategy,
     ResourceChecksumError,
     ResourceFileSizeZeroError,
-    ResourceInstallationError,
     SAS2IRCUStrategy,
     SAS3IRCUStrategy,
-    SmartCtlExporterStrategy,
-    SmartCtlStrategy,
     SnapStrategy,
     SSACLIStrategy,
     StorCLIStrategy,
@@ -55,6 +50,7 @@
     raid_hw_verifier,
     redfish_available,
     remove_deb,
+    remove_legacy_smartctl_exporter,
     symlink,
 )
 from keys import HP_KEYS
@@ -729,126 +725,6 @@ def test_remove(self, mock_apt):
         mock_apt.remove_package.assert_not_called()
 
 
-class TestSmartCtlStrategy(unittest.TestCase):
-    @mock.patch("apt_helpers.get_candidate_version")
-    @mock.patch("apt_helpers.apt")
-    def test_install(self, mock_apt, mock_candidate_version):
-        strategy = SmartCtlStrategy()
-        mock_candidate_version.return_value = "some-candidate-version"
-        strategy.install()
-
-        mock_apt.add_package.assert_called_with(
-            "smartmontools", version="some-candidate-version", update_cache=False
-        )
-
-    @mock.patch("hw_tools.apt")
-    def test_remove(self, mock_apt):
-        strategy = SmartCtlStrategy()
-        strategy.remove()
-
-        mock_apt.remove_package.assert_not_called()
-
-    @mock.patch("hw_tools.check_deb_pkg_installed")
-    def test_check(self, mock_check_deb_method):
-        strategy = SmartCtlStrategy()
-        strategy.check()
-
-        mock_check_deb_method.assert_called_with("smartmontools")
-
-
-class TestSmartCtlExporterStrategy(unittest.TestCase):
-    def setUp(self):
-        self.temp_dir = tempfile.TemporaryDirectory()
-        self.tmp_path = Path(self.temp_dir.name)
-
-    def tearDown(self):
-        self.temp_dir.cleanup()
-
-    @mock.patch("hw_tools.requests.get")
-    @mock.patch("hw_tools.tarfile.open")
-    @mock.patch("hw_tools.make_executable")
-    def test_install_success(
-        self,
-        mock_make_executable,
-        mock_tar_open,
-        mock_requests_get,
-    ):
-        strategy = SmartCtlExporterStrategy()
-        strategy._resource_dir = self.tmp_path
-        strategy._exporter_path = self.tmp_path / "smartctl_exporter"
-
-        mock_response = mock.MagicMock(status_code=HTTPStatus.OK)
-        mock_response.content = b"dummy content"
-        mock_requests_get.return_value = mock_response
-        mock_member = mock.MagicMock(name="member")
-        mock_member.name = "smartctl_exporter"
-        mock_member_file = mock.MagicMock()
-        mock_member_file.read.return_value = b"dummy content"
-        mock_tar_open.return_value.__enter__.return_value.getmembers.return_value = [mock_member]
-        mock_tar_open.return_value.__enter__.return_value.extractfile.return_value = (
-            mock_member_file  # noqa: E501
-        )
-
-        strategy.install()
-
-        mock_requests_get.assert_called_with(strategy._release, timeout=60)
-        # mock_tar_open.assert_called_with(fileobj=BytesIO(b"dummy content"), mode="r:gz")
-        mock_make_executable.assert_called_with(strategy._exporter_path)
-        self.assertTrue(strategy._resource_dir.exists())
-
-    @mock.patch("hw_tools.requests.get")
-    def test_install_download_failure(self, mock_requests_get):
-        strategy = SmartCtlExporterStrategy()
-        strategy._resource_dir = self.tmp_path
-        strategy._exporter_path = self.tmp_path / "smartctl_exporter"
-
-        mock_response = mock.MagicMock(status_code=HTTPStatus.NOT_FOUND)
-        mock_requests_get.return_value = mock_response
-
-        with self.assertRaises(ResourceInstallationError):
-            strategy.install()
-
-    @mock.patch("hw_tools.requests.get")
-    @mock.patch("hw_tools.tarfile.open")
-    def test_install_parse_failure(self, mock_tar_open, mock_requests_get):
-        strategy = SmartCtlExporterStrategy()
-        strategy._resource_dir = self.tmp_path
-        strategy._exporter_path = self.tmp_path / "smartctl_exporter"
-
-        mock_response = mock.MagicMock(status_code=HTTPStatus.OK)
-        mock_response.content = b"dummy content"
-        mock_requests_get.return_value = mock_response
-        mock_member = mock.MagicMock(name="member")
-        mock_member.name = "random name"
-        mock_member_file = mock.MagicMock()
-        mock_member_file.read.return_value = b"dummy content"
-        mock_tar_open.return_value.__enter__.return_value.getmembers.return_value = [mock_member]
-        mock_tar_open.return_value.__enter__.return_value.extractfile.return_value = (
-            mock_member_file  # noqa: E501
-        )
-
-        with self.assertRaises(ResourceInstallationError):
-            strategy.install()
-
-    @mock.patch("hw_tools.shutil.rmtree")
-    def test_remove(self, mock_shutil_rmtree):
-        strategy = SmartCtlExporterStrategy()
-
-        strategy.remove()
-
-        mock_shutil_rmtree.assert_called_with(strategy._resource_dir)
-
-    def test_check(self):
-        strategy = SmartCtlExporterStrategy()
-        strategy._exporter_path = mock.MagicMock()
-        strategy._exporter_path.is_file.return_value = True
-
-        result = strategy.check()
-        self.assertTrue(result)
-
-        strategy._exporter_path.is_file.assert_called()
-
-
 @mock.patch("hw_tools.disk_hw_verifier", return_value={7, 8, 9})
 @mock.patch("hw_tools.bmc_hw_verifier", return_value={1, 2, 3})
 @mock.patch("hw_tools.raid_hw_verifier", return_value={4, 5, 6})
@@ -982,7 +858,7 @@ class TestDiskHWVerifier(unittest.TestCase):
     @mock.patch("hw_tools.lshw", return_value=[True])
     def test_disk_available(self, mock_lshw):
         tools = disk_hw_verifier()
-        self.assertEqual(tools, {HWTool.SMARTCTL})
+        self.assertEqual(tools, {HWTool.SMARTCTL_EXPORTER})
 
     @mock.patch("hw_tools.lshw", return_value=[])
     def test_disk_not_available(self, mock_lshw):
@@ -1265,3 +1141,36 @@ def test_snap_strategy_check(snap_exporter, mock_snap_lib, services, expected):
     mock_snap_lib.SnapCache.return_value = {"my-snap": mock_snap_client}
 
     assert snap_exporter.check() is expected
+
+
+@mock.patch("hw_tools.Path.unlink")
+@mock.patch("hw_tools.Path.exists")
+@mock.patch("hw_tools.shutil")
+@mock.patch("hw_tools.systemd")
+def test_remove_legacy_smartctl_exporter_exist(
+    mock_systemd, mock_shutil, mock_path_exists, mock_path_unlink
+):
+    mock_path_exists.return_value = True
+    remove_legacy_smartctl_exporter()
+
+    mock_systemd.service_stop.assert_called_once()
+    mock_systemd.service_disable.assert_called_once()
+    mock_path_unlink.assert_called()
+    assert mock_path_unlink.call_count == 2
+    mock_shutil.rmtree.assert_called_once()
+
+
+@mock.patch("hw_tools.Path.unlink")
+@mock.patch("hw_tools.Path.exists")
+@mock.patch("hw_tools.shutil")
+@mock.patch("hw_tools.systemd")
+def test_remove_legacy_smartctl_exporter_not_exists(
+    mock_systemd, mock_shutil, mock_path_exists, mock_path_unlink
+):
+    mock_path_exists.return_value = False
+    remove_legacy_smartctl_exporter()
+
+    mock_systemd.service_stop.assert_not_called()
+    mock_systemd.service_disable.assert_not_called()
+    mock_path_unlink.assert_not_called()
+    mock_shutil.rmtree.assert_not_called()
diff --git a/tests/unit/test_service.py b/tests/unit/test_service.py
index 312afb46..a0fb8835 100644
--- a/tests/unit/test_service.py
+++ b/tests/unit/test_service.py
@@ -8,6 +8,7 @@
 
 import pytest
 import yaml
+from charms.operator_libs_linux.v2 import snap
 from parameterized import parameterized
 from redfish.rest.v1 import InvalidCredentialsError
 
@@ -714,103 +715,65 @@ def test_hw_tools(self):
         )
 
 
-class TestSmartMetricExporter(unittest.TestCase):
-    """Test SmartCtlExporter's methods."""
+class TestDCGMSnapExporter(unittest.TestCase):
+    """Test DCGM Snap exporter's methods."""
 
     def setUp(self) -> None:
         """Set up harness for each test case."""
-        systemd_lib_patcher = mock.patch.object(service, "systemd")
-        self.mock_systemd = systemd_lib_patcher.start()
-        self.addCleanup(systemd_lib_patcher.stop)
+        snap_lib_patcher = mock.patch.object(service, "snap")
+        shutil_lib_patcher = mock.patch.object(service, "shutil")
+        self.mock_snap = snap_lib_patcher.start()
+        self.mock_shutil = shutil_lib_patcher.start()
+        self.addCleanup(snap_lib_patcher.stop)
+        self.addCleanup(shutil_lib_patcher.stop)
 
         search_path = pathlib.Path(f"{__file__}/../../..").resolve()
         self.mock_config = {
-            "smartctl-exporter-port": 10201,
-            "collect-timeout": 10,
-            "exporter-log-level": "INFO",
+            "dcgm-snap-channel": "latest/stable",
         }
-        self.exporter = service.SmartCtlExporter(search_path, self.mock_config)
-
-    def test_render_service(self):
-        """Test render service."""
-        self.exporter._render_service = mock.MagicMock()
-        self.exporter._render_service.return_value = "some result"
-
-        result = self.exporter.render_service()
-        self.assertEqual(result, "some result")
 
-        self.exporter._render_service.assert_called_with(
-            {
-                "PORT": str(self.exporter.port),
-                "LEVEL": self.exporter.log_level,
-            }
-        )
-
-    @parameterized.expand(
-        [
-            (True,),
-            (False,),
-        ]
-    )
-    def test_set_config(self, service_render_success):
-        """Test render config."""
-        self.exporter.render_service = mock.MagicMock()
-        self.exporter.render_service.return_value = service_render_success
+        self.exporter = service.DCGMExporter(search_path, self.mock_config)
+        self.exporter.strategy = mock.MagicMock()
 
-        result = self.exporter.configure()
-        self.assertEqual(result, service_render_success)
+    def test_exporter_name(self):
+        self.assertEqual(self.exporter.exporter_name, "dcgm")
 
     def test_hw_tools(self):
-        self.assertEqual(self.exporter.hw_tools(), {HWTool.SMARTCTL})
+        self.assertEqual(self.exporter.hw_tools(), {HWTool.DCGM})
 
-    @mock.patch("service.systemd", return_value=mock.MagicMock())
-    def test_install_resource_restart(self, mock_systemd):
-        self.exporter.strategy = mock.MagicMock()
-        self.exporter.check_active = mock.MagicMock()
-        self.exporter.check_active.return_value = True
+    def test_install_failed(self):
+        self.exporter.snap_client.present = False
 
-        self.exporter.install_resources()
+        exporter_install_ok = self.exporter.install()
 
         self.exporter.strategy.install.assert_called()
-        self.exporter.check_active.assert_called()
-        mock_systemd.service_stop.assert_called_with(self.exporter.exporter_name)
-        mock_systemd.service_restart.assert_called_with(self.exporter.exporter_name)
+        self.mock_shutil.copy.assert_not_called()
+        self.assertFalse(exporter_install_ok)
 
-    @mock.patch("service.systemd", return_value=mock.MagicMock())
-    def test_install_resource_no_restart(self, mock_systemd):
-        self.exporter.strategy = mock.MagicMock()
-        self.exporter.check_active = mock.MagicMock()
-        self.exporter.check_active.return_value = False
+    def test_install_success(self):
+        self.exporter.snap_client.present = True
 
-        self.exporter.install_resources()
+        exporter_install_ok = self.exporter.install()
 
         self.exporter.strategy.install.assert_called()
-        self.exporter.check_active.assert_called()
-        mock_systemd.service_stop.assert_not_called()
-        mock_systemd.service_restart.assert_not_called()
-
-    def test_resource_exists(self):
-        self.exporter.strategy = mock.MagicMock()
-
-        self.exporter.resources_exist()
-        self.exporter.strategy.check.assert_called()
-
-    def test_resources_exist(self):
-        self.exporter.strategy = mock.MagicMock()
-        self.exporter.strategy.check.return_value = "some result"
-
-        result = self.exporter.resources_exist()
-
-        self.assertEqual(result, "some result")
-        self.exporter.strategy.check.assert_called()
+        self.mock_shutil.copy.assert_called_with(
+            self.exporter.metrics_file, self.exporter.snap_common
+        )
+        self.exporter.snap_client.set.assert_called_with(
+            {self.exporter.metric_config: self.exporter.metric_config_value}
+        )
+        self.exporter.snap_client.restart.assert_called_with(reload=True)
+        self.assertTrue(exporter_install_ok)
 
-    def test_resource_remove(self):
-        self.exporter.strategy = mock.MagicMock()
+    def test_install_metrics_copy_fail(self):
+        self.exporter.snap_client.present = True
+        self.mock_shutil.copy.side_effect = FileNotFoundError
 
-        result = self.exporter.remove_resources()
-        self.assertEqual(result, True)
+        exporter_install_ok = self.exporter.install()
 
-        self.exporter.strategy.remove.assert_called()
+        self.exporter.strategy.install.assert_called()
+        self.exporter.snap_client.restart.assert_not_called()
+        self.assertFalse(exporter_install_ok)
 
 
 class TestWriteToFile(unittest.TestCase):
@@ -957,6 +920,19 @@ def test_snap_exporter_restart(snap_exporter):
     snap_exporter.snap_client.restart.assert_called_once_with(reload=True)
 
 
+def test_snap_exporter_set(snap_exporter):
+    snap_config = {}
+    assert snap_exporter.set(snap_config) is True
+    snap_exporter.snap_client.set.assert_called_once_with(snap_config, typed=True)
+
+
+def test_snap_exporter_set_failed(snap_exporter):
+    snap_config = {}
+    snap_exporter.snap_client.set.side_effect = snap.SnapError()
+    assert snap_exporter.set(snap_config) is False
+    snap_exporter.snap_client.set.assert_called_once_with(snap_config, typed=True)
+
+
 def test_snap_exporter_check_health(snap_exporter):
     snap_exporter.check_health()
     snap_exporter.strategy.check.assert_called_once()
@@ -971,14 +947,22 @@ def test_snap_exporter_configure(mock_install, snap_exporter, install_result, ex
     mock_install.assert_called_once()
 
 
-def test_dcgm_exporter():
+@pytest.mark.parametrize("result, expected_result", [(True, True), (False, False)])
+@mock.patch("service.SnapExporter.install")
+@mock.patch("service.SnapExporter.set")
+def test_smartctl_exporter_configure(mock_set, mock_install, result, expected_result):
     mock_config = {
-        "dcgm-snap-channel": "latest/stable",
+        "smartctl-exporter-port": "10000",
+        "exporter-log-level": "info",
+        "smartctl-exporter-snap-channel": "latest/stable",
     }
 
-    exporter = service.DCGMExporter(mock_config)
-    assert exporter.exporter_name == "dcgm"
-    assert exporter.hw_tools() == {HWTool.DCGM}
+    mock_set.return_value = result
+    mock_install.return_value = result
+    exporter = service.SmartCtlExporter(mock_config)
+    assert exporter.exporter_name == "smartctl-exporter"
+    assert exporter.hw_tools() == {HWTool.SMARTCTL_EXPORTER}
+    assert exporter.configure() is expected_result
 
 
 if __name__ == "__main__":