diff --git a/README.md b/README.md
index 355a9a8..2e3bd40 100644
--- a/README.md
+++ b/README.md
@@ -19,24 +19,31 @@ The remainder of the charts in the response can be produced from code in the rep
 - Activate conda environment: `conda activate asf_welsh_energy_consultation`
 - Run `make inputs-pull` to pull the zipped supplementary data from S3 and put it in `/inputs/data`. There will be one folder per historical analysis
   containing the supplementary data files as listed in the `Historical analysis` section below.
+
+## Run the script
+
 - Run `python asf_welsh_energy_consultation/analysis/produce_plots_and_stats.py --local_data_dir <YOUR_LOCAL_DIR>`. You need to specify the path to the local
   directory where your local copy of the EPC data is/will be saved by replacing `<YOUR_LOCAL_DIR>` with the path to your "ASF_data" directory or equivalent.
   If you don't have a local directory for ASF core data, you can create a folder called "ASF_data" in your home directory.
   - You can specify which batch of EPC data to download and MCS data to load from S3 by passing the `--epc_batch` and `--mcs_batch` arguments, both
     default to downloading/loading the newest data from S3, respectively.
   - You can specify which supplementary data folder to use by passing the `--supp_data` argument. It defaults to using the latest supplementary data folder.
+  - You can specify which batch of gold MCS-EPC merged data to use with the `--gold_mcs_epc_batch` argument. Passing batch as YYMMDD.
+  - If you wish to download and process a new gold MCS-EPC batch (i.e. a different batch from the preprocessed `hp_installed_gold_[YYMMDD].csv` file in the supplementary data folder
+    in `inputs/data`), you can download and process a new gold MCS-EPC merged dataset by setting the `--download_gold_data_from_s3` argument to `True`. Note that this download can take ~30 minutes.
+  - Run `python asf_welsh_energy_consultation/analysis/produce_plots_and_stats.py -h` for more info.
   - To recreate the full October 2023 analysis, set the `--calculate_average_installations` argument to `True`. This will calculate some additional numbers on MCS installations per year included in the October 2023 response. For other historical analyses, this argument is not required and defaults to `False`.
   - Run `python asf_welsh_energy_consultation/analysis/produce_plots_and_stats.py -h` for more info.
 
-The script should generate the following seven plots which will be saved in your local repo in `outputs/figures`:
+## The script should generate the following ten plots which will be saved in your local repo in `outputs/figures`:
 
 - `cumulative_retrofits.html`
 - `electric_tenure.html`
-- `installations_by_gas_status.html`
-- `installations_by_rurality.html`
+- `[gold_]installations_by_gas_status.html`
+- `[gold_]installations_by_rurality.html`
 - `new_build_hp_cumulative.html`
 - `new_build_hp_proportion.html`
-- `total_cumulative_installations.html`
+- `[gold_]total_cumulative_installations.html`
 
 It should generate a further 10 plots, five in English and five in Welsh, saved in `outputs/figures/english` and `outputs/figures/welsh`, respectively:
 
@@ -79,6 +86,7 @@ Versions/batches of data used for previous analysis are listed below.
 October 2023 analysis (`/inputs/data/data_202310`):
 
 - EPC: 2023_Q2_complete (preprocessed, and preprocessed and deduplicated)
+- EPC & MCS gold merged: batcgh 231009
 - mcs_installations_231009.csv
 - mcs_installations_epc_full_231009.csv
 - dwellings_2021.xlsx - [Number of dwellings by housing characteristics in England and Wales 2021 (released 30 March 2023)](https://www.ons.gov.uk/peoplepopulationandcommunity/housing/datasets/numberofdwellingsbyhousingcharacteristicsinenglandandwales)
diff --git a/asf_welsh_energy_consultation/analysis/produce_plots_and_stats.py b/asf_welsh_energy_consultation/analysis/produce_plots_and_stats.py
index 7057df9..734e17f 100644
--- a/asf_welsh_energy_consultation/analysis/produce_plots_and_stats.py
+++ b/asf_welsh_energy_consultation/analysis/produce_plots_and_stats.py
@@ -34,9 +34,11 @@
 
 if __name__ == "__main__":
     # ======================================================
-    # MCS installations, by off-gas status
-
-    total_cumulative_installations = process_data.get_total_cumsums()
+    # Total MCS installations
+    enhanced_mcs = process_data.get_enhanced_combined(mcs_or_gold="mcs")
+    total_cumulative_installations = process_data.get_total_cumsums(
+        data=enhanced_mcs, installation_date_col="commission_date"
+    )
 
     total_cumulative_installations_chart = time_series_comparison(
         data=total_cumulative_installations,
@@ -52,7 +54,10 @@
     # MCS installations, by off-gas status
 
     installations_by_gas_status = process_data.cumsums_by_variable(
-        "off_gas", "Gas status"
+        "off_gas",
+        "Gas status",
+        data=enhanced_mcs,
+        installation_date_col="commission_date",
     )
 
     installations_by_gas_status_chart = time_series_comparison(
@@ -72,7 +77,10 @@
     # MCS installations, by rurality
 
     installations_by_rurality = process_data.cumsums_by_variable(
-        "rurality_2_label", "Rurality"
+        "rurality_2_label",
+        "Rurality",
+        data=enhanced_mcs,
+        installation_date_col="commission_date",
     )
 
     installations_by_rurality_chart = time_series_comparison(
@@ -89,6 +97,70 @@
         output_dir=output_folder,
     )
 
+    # ======================================================
+    # Total MCS and EPC installations
+    enhanced_combined = process_data.get_enhanced_combined(mcs_or_gold="gold")
+    gold_total_cumulative_installations = process_data.get_total_cumsums(
+        data=enhanced_combined, installation_date_col="HP_INSTALL_DATE"
+    )
+
+    gold_total_cumulative_installations_chart = time_series_comparison(
+        data=gold_total_cumulative_installations,
+        title="Cumulative heat pump installations over time",
+        y_var="cumsum:Q",
+        y_title="Number of heat pump installations",
+        color_var="colour:N",
+        filename="gold_total_cumulative_installations",
+        output_dir=output_folder,
+    )
+
+    # ======================================================
+    # MCS and EPC installations, by off-gas status
+
+    gold_installations_by_gas_status = process_data.cumsums_by_variable(
+        "off_gas",
+        "Gas status",
+        data=enhanced_combined,
+        installation_date_col="HP_INSTALL_DATE",
+    )
+
+    gold_installations_by_gas_status_chart = time_series_comparison(
+        data=gold_installations_by_gas_status,
+        title=[
+            "Cumulative number of heat pump installations in Welsh homes",
+            "located in off- and on-gas postcodes",
+        ],
+        y_var="Number of heat pumps:Q",
+        y_title="Number of heat pump installations",
+        color_var="Gas status:N",
+        filename="gold_installations_by_gas_status",
+        output_dir=output_folder,
+    )
+
+    # ======================================================
+    # MCS and EPC installations, by rurality
+
+    gold_installations_by_rurality = process_data.cumsums_by_variable(
+        "rurality_2_label",
+        "Rurality",
+        data=enhanced_combined,
+        installation_date_col="HP_INSTALL_DATE",
+    )
+
+    gold_installations_by_rurality_chart = time_series_comparison(
+        data=gold_installations_by_rurality,
+        title=[
+            "Cumulative number of heat pump installations",
+            "in Welsh homes located in rural vs urban postcodes",
+        ],
+        y_var="Number of heat pumps:Q",
+        y_title="Number of heat pump installations",
+        color_var="Rurality:N",
+        domain_max=installations_by_rurality.date.max(),
+        filename="gold_installations_by_rurality",
+        output_dir=output_folder,
+    )
+
     # ======================================================
     # Proportions of new builds that have heat pumps
 
@@ -148,7 +220,10 @@
 
     mcs_retrofits = process_data.get_mcs_retrofits()
     mcs_retrofit_cumsums = process_data.cumsums_by_variable(
-        "country", "wales_col", data=mcs_retrofits
+        "country",
+        "wales_col",
+        data=mcs_retrofits,
+        installation_date_col="commission_date",
     )
     # this function works without separating by category - 'wales_col' is a whole column of "Wales" (not used)
 
@@ -206,19 +281,24 @@
 
     wales_df = load_wales_df(from_csv=False)
     wales_hp = load_wales_hp(wales_df)
-    wales_mcs = process_data.get_enhanced_mcs()
+    wales_mcs = process_data.get_enhanced_combined(mcs_or_gold="mcs")
 
     # English plots
 
     # Key statistics
     intro = "Summary statistics for heat pumps in Wales\n\n"
-    total_hp = f"Number of heat pumps: {len(wales_hp)}\n"
-    total_epc = f"Number of properties in EPC: {len(wales_df)}\n"
-    hp_perc = "Estimated percentage of properties with a heat pump: \
+    total_epc_hp = f"Number of heat pumps in EPC: {len(wales_hp)}\n"
+    total_epc_properties = f"Number of properties in EPC: {len(wales_df)}\n"
+    hp_perc = "Estimated percentage of EPC properties with a heat pump: \
         {:.2%}\n\n".format(
         len(wales_hp) / len(wales_df)
     )
 
+    total_hp = f"Number of heat pumps in MCS and EPC: {len(enhanced_combined)}\n"
+    total_mcs_installations = (
+        f"Number of MCS-certified heat pump installations: {len(enhanced_mcs)}\n"
+    )
+
     tenure_value_counts = wales_hp.TENURE.value_counts(normalize=True).to_string()
 
     epc_c_or_above_and_good_walls = wales_df.loc[
@@ -262,9 +342,11 @@
         stats_txt.writelines(
             [
                 intro,
-                total_hp,
-                total_epc,
+                total_epc_hp,
+                total_epc_properties,
                 hp_perc,
+                total_hp,
+                total_mcs_installations,
                 tenure_value_counts,
                 epc_c_wall,
                 epc_c_wall_proportion,
diff --git a/asf_welsh_energy_consultation/getters/get_data.py b/asf_welsh_energy_consultation/getters/get_data.py
index 5f294f0..2f37474 100644
--- a/asf_welsh_energy_consultation/getters/get_data.py
+++ b/asf_welsh_energy_consultation/getters/get_data.py
@@ -13,11 +13,16 @@
 
 from asf_core_data.getters.epc.data_batches import get_batch_path
 from asf_core_data.config import base_config
-from asf_core_data.getters.data_getters import download_core_data, logger
+from asf_core_data.getters.data_getters import (
+    download_core_data,
+    logger,
+    download_from_s3,
+)
 
 import pandas as pd
 import numpy as np
 import os
+import dask.dataframe as dd
 
 from argparse import ArgumentParser
 
@@ -71,6 +76,20 @@ def create_argparser():
         type=bool,
     )
 
+    parser.add_argument(
+        "--gold_mcs_epc_batch",
+        help="Specifies which gold merged EPC-MCS_installation-MCS_installer data batch to use. Only date required in YYMMDD format.",
+        type=str,
+    )
+
+    parser.add_argument(
+        "--download_gold_data_from_s3",
+        help="If set to True, downloads specified batch of gold merged EPC-MCS_installation-MCS_installer data from S3 locally. "
+        "Note that this download can take 30 minutes and not recommended if `hp_installed_gold_[YYMMDD]` already in supplementary data folder in `inputs`.",
+        default=False,
+        type=str,
+    )
+
     return parser
 
 
@@ -554,3 +573,73 @@ def load_wales_hp(wales_epc):
     wales_hp = wales_epc.loc[wales_epc.HP_INSTALLED].reset_index(drop=True)
 
     return wales_hp
+
+
+def load_mcs_epc_combined():
+    """
+    Get combined gold MCS-EPC dataset filtered for rows with heat pump installations in domestic dwellings. Use local preprocessed dataset unless specified
+    to download data from S3. Downloaded data goes through pre-processing to produce desired pd.DataFrame.
+
+    Returns:
+        pd.DataFrame: Gold MCS-EPC dataset for domestic dwellings with heat pumps.
+    """
+    args = get_args()
+    batch = args.gold_mcs_epc_batch
+    download_data = args.download_gold_data_from_s3
+
+    if not download_data:
+        path = os.path.join(input_data_path, f"hp_installed_gold_{batch}.csv")
+        return pd.read_csv(path)
+
+    else:
+        path = f"outputs/gold/merged_epc_mcs_installations_installers_{batch}.csv"
+
+        logger.info(f"Loading {path} from S3. This will take a while.")
+
+        download_from_s3(path_to_file=path, output_path=input_data_path)
+
+        ddf = dd.read_csv(
+            os.path.join(
+                input_data_path,
+                f"merged_epc_mcs_installations_installers_{batch}.csv",
+            ),
+            dtype={
+                "HP_INSTALL_DATE": "object",
+                "UPRN": "object",
+                "installation_type": "object",
+            },
+        )
+
+        # Get rows with HP installed only, data already filtered for domestic only
+        hp_installed = ddf[ddf["HP_INSTALLED"] == True]
+        hp_installed = hp_installed[
+            [
+                "POSTCODE",
+                "INSPECTION_DATE",
+                "COUNTRY",
+                "UPRN",
+                "HP_INSTALLED",
+                "HP_TYPE",
+                "HP_INSTALL_DATE",
+                "MCS_AVAILABLE",
+                "EPC_AVAILABLE",
+            ]
+        ]
+
+        hp_installed = hp_installed.rename(columns={"POSTCODE": "postcode"})
+
+        # Convert to pandas df
+        df = hp_installed.compute()
+
+        df["HP_INSTALL_DATE"] = pd.to_datetime(df["HP_INSTALL_DATE"])
+
+        # Batch 231009 contains data from MCS up to 30 June 2023 and data from EPC up to 31 July 2023
+        # Must remove additional month of EPC data for consistency
+        if batch == "231009":
+            df = df[df["HP_INSTALL_DATE"] < "2023-07-01"]
+
+        df.to_csv(
+            os.path.join(input_data_path, f"hp_installed_gold_{batch}.csv"), index=False
+        )
+
+        return df
diff --git a/asf_welsh_energy_consultation/pipeline/process_data.py b/asf_welsh_energy_consultation/pipeline/process_data.py
index c7aa243..783454a 100644
--- a/asf_welsh_energy_consultation/pipeline/process_data.py
+++ b/asf_welsh_energy_consultation/pipeline/process_data.py
@@ -13,42 +13,48 @@
 # PROCESSING MCS
 
 
-def get_enhanced_mcs():
-    """Get dataset of domestic MCS installations with attached off-gas, country and rurality fields.
-
+def get_enhanced_combined(mcs_or_gold="mcs"):
+    """Get dataset of either MCS installations or gold merged EPC and MCS installations data with attached off-gas, country and rurality fields for Wales only.
+    Args:
+        mcs_or_gold, str: Specifies use of MCS installations data or MCS-EPC gold merged data for creating combined dataset. Defaults to "mcs".
     Returns:
         pd.DataFrame: Dataset as described above.
     """
-    mcs = get_data.get_mcs_domestic()
+    if mcs_or_gold == "gold":
+        df = get_data.load_mcs_epc_combined()
+        df_name = "gold merged EPC-MCS installation"
+    else:
+        df = get_data.get_mcs_domestic()
+        df_name = "MCS installation"
     og = get_data.get_offgas()
     countries = get_data.get_countries()
     rural = get_data.get_rurality_by_oa()
 
     # join with off-gas data
-    mcs = mcs.merge(og, on="postcode", how="left")
-    mcs["off_gas"] = mcs["off_gas"].fillna("On gas").replace({True: "Off gas"})
+    df = df.merge(og, on="postcode", how="left")
+    df["off_gas"] = df["off_gas"].fillna("On gas").replace({True: "Off gas"})
 
     # join with regions in order to filter to Wales
-    mcs = mcs.merge(countries, on="postcode", how="left")
-    if mcs.country.isna().sum() > 0:
+    df = df.merge(countries, on="postcode", how="left")
+    if df.country.isna().sum() > 0:
         logger.warning(
-            f"{mcs.country.isna().sum()} MCS installation records have no country match. "
+            f"{df.country.isna().sum()} {df_name} records have no country match. "
             f"Potential loss of data when filtering for Wales."
         )
-    mcs = mcs.loc[mcs["country"] == "Wales"].reset_index(drop=True)
+    df = df.loc[df["country"] == "Wales"].reset_index(drop=True)
     # There will be records with no match
     # Some will be new postcodes (new build developments)
     # and some may be expired postcodes
 
     # join with rurality data
-    mcs = mcs.merge(rural, on="postcode", how="left")
-    if mcs.rurality_10_code.isna().sum() > 0:
+    df = df.merge(rural, on="postcode", how="left")
+    if df.rurality_10_code.isna().sum() > 0:
         logger.warning(
-            f"Loss of data when using rurality variable: {mcs.rurality_10_code.isna().sum()} Welsh MCS installation records have no rurality code match."
+            f"Loss of data when using rurality variable: {df.rurality_10_code.isna().sum()} Welsh {df_name} records have no rurality code match."
         )
 
     # add custom rurality column (rurality "type 7": all different types of urban mapped to Urban)
-    mcs["rurality_7"] = mcs["rurality_10_label"].replace(
+    df["rurality_7"] = df["rurality_10_label"].replace(
         {
             "Urban city and town": "Urban",
             "Urban major conurbation": "Urban",
@@ -57,40 +63,41 @@ def get_enhanced_mcs():
         }
     )
 
-    return mcs
-
-
-# load enhanced MCS as part of this script, so only needs to be done once
-enhanced_mcs = get_enhanced_mcs()
+    return df
 
 
-def get_total_cumsums():
+def get_total_cumsums(data, installation_date_col):
     """
-    Gets cumulative number of MCS-certified HP installations for Wales.
+    Gets cumulative number of HP installations for Wales.
+
+    Args:
+        data pd.Dataframe: Dataframe of HP installations in Wales.
+        installation_date_col str: Name of column containing HP installation date.
 
     Returns:
-        pd.Dataframe containing cumulative MCS installations for Wales over time.
+        pd.Dataframe containing cumulative number of HP installations for Wales over time.
 
     """
-    mcs = get_enhanced_mcs()
-    mcs["n"] = 1
-    cumulative_total = mcs.groupby("commission_date")["n"].sum().reset_index()
+    data["n"] = 1
+    cumulative_total = data.groupby(installation_date_col)["n"].sum().reset_index()
 
     # Sort by date ascending
-    cumulative_total = cumulative_total.sort_values("commission_date")
+    cumulative_total = cumulative_total.sort_values(installation_date_col)
 
     # Get cumulative total
     cumulative_total["cumsum"] = cumulative_total.n.cumsum()
     cumulative_total = cumulative_total.loc[
-        cumulative_total.commission_date >= "2015-01-01"
+        cumulative_total[installation_date_col] >= "2015-01-01"
     ].reset_index(drop=True)
-    cumulative_total = cumulative_total.rename(columns={"commission_date": "date"})
+    cumulative_total = cumulative_total.rename(columns={installation_date_col: "date"})
     cumulative_total["colour"] = 1  # add single colour category for plotting
 
     return cumulative_total
 
 
-def cumsums_by_variable(variable, new_var_name, data=enhanced_mcs):
+def cumsums_by_variable(
+    variable, new_var_name, data, installation_date_col="HP_INSTALL_DATE"
+):
     """Process data into a form giving the cumulative total of
     installations on each date for each category of a variable.
 
@@ -104,15 +111,15 @@ def cumsums_by_variable(variable, new_var_name, data=enhanced_mcs):
     """
 
     # calculate total number of installations for each date/category pair
-    totals = data.groupby(["commission_date", variable]).size()
+    totals = data.groupby([installation_date_col, variable]).size()
 
     totals = totals.reset_index().rename(columns={0: "sum"})
 
     idx = pd.date_range(
-        totals["commission_date"].min(), totals["commission_date"].max()
+        totals[installation_date_col].min(), totals[installation_date_col].max()
     )
 
-    totals = totals.pivot(index="commission_date", columns=variable).fillna(0)
+    totals = totals.pivot(index=installation_date_col, columns=variable).fillna(0)
 
     totals.index = pd.DatetimeIndex(totals.index)
 
@@ -373,7 +380,7 @@ def get_mcs_retrofits():
     # this makes sense because if they had been built with a HP we would expect them to appear in EPC
     # due to new build EPC requirements
 
-    enhanced_mcs = get_enhanced_mcs()
+    enhanced_mcs = get_enhanced_combined(mcs_or_gold="mcs")
     enhanced_mcs = add_unique_mcs_id(enhanced_mcs)
     mcs_retrofits = enhanced_mcs.loc[
         ~enhanced_mcs["unique_id"].isin(hp_when_built_indices)
@@ -448,7 +455,7 @@ def get_installations_per_year():
         pandas.DataFrame of MCS installations per year in Wales.
 
     """
-    mcs = get_enhanced_mcs()
+    mcs = get_enhanced_combined(mcs_or_gold="mcs")
     mcs["n"] = 1
     mcs["year"] = pd.to_datetime(mcs["commission_date"]).dt.year
     installations_by_year = mcs.groupby("year")["n"].sum().reset_index()
diff --git a/requirements.txt b/requirements.txt
index 28a0ff2..c681af7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,13 @@
 altair==4.2.2
-numpy==1.23.4
-pandas==1.5.1
 altair_viewer==0.4.0
 altair_saver==0.5.0
-matplotlib
-odfpy
-selenium==4.2.0
 argparse==1.4.0
+dask==2023.5.0
+matplotlib==3.7.3
+numpy==1.23.4
+odfpy
+pandas==1.5.1
 s3fs>=2023.3.0
+selenium==4.2.0
 asf_core_data@ git+ssh://git@github.com/nestauk/asf_core_data.git
 nesta_ds_utils@ git+ssh://git@github.com/nestauk/nesta_ds_utils.git