From 9f689aa08ddf78dd87a56cde663ff497a42dc920 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Fri, 17 Mar 2023 23:01:26 -0700 Subject: [PATCH 01/19] init kv features - backup - upgrade - disable - include in vars and post-install steps --- roles/splunk/defaults/main.yml | 4 + roles/splunk/tasks/adhoc_backup_kvstore.yml | 15 ++ roles/splunk/tasks/configure_kvstore.yml | 28 ++++ roles/splunk/tasks/kvstore_disable.yml | 11 ++ roles/splunk/tasks/kvstore_upgrade.yml | 152 ++++++++++++++++++++ roles/splunk/tasks/post_install.yml | 4 + 6 files changed, 214 insertions(+) create mode 100644 roles/splunk/tasks/adhoc_backup_kvstore.yml create mode 100644 roles/splunk/tasks/configure_kvstore.yml create mode 100644 roles/splunk/tasks/kvstore_disable.yml create mode 100644 roles/splunk/tasks/kvstore_upgrade.yml diff --git a/roles/splunk/defaults/main.yml b/roles/splunk/defaults/main.yml index b31589ae..00010dc3 100644 --- a/roles/splunk/defaults/main.yml +++ b/roles/splunk/defaults/main.yml @@ -69,6 +69,10 @@ splunk_shc_target_group: shc splunk_shc_deployer: "{{ groups['shdeployer'] | first }}" # If you manage multiple SHCs, configure the var value in group_vars splunk_shc_uri_list: "{% for h in groups[splunk_shc_target_group] %}https://{{ hostvars[h].ansible_fqdn }}:{{ splunkd_port }}{% if not loop.last %},{% endif %}{% endfor %}" # If you manage multiple SHCs, configure the var value in group_vars start_splunk_handler_fired: false # Do not change; used to prevent unnecessary splunk restarts +splunk_enable_kvstore: true +splunk_kvstore_storage: undefined # Can be defined here or at the group_vars level - accepted values: "wiredTiger" or "undefined", which leaves as default +splunk_kvstore_version: undefined # Can be defined here or at the group_vars level - accepted values: 4.2 or "undefined", which leaves as default1 +splunk_oplog_size: 1000 # Default for Splunk Enterprise - should be changed at the group_vars level only at the behest of Splunk support with special care taken # Linux and scripting related vars add_crashlog_script: false # Set to true to install a script and cron job to automatically cleanup splunk crash logs older than 7 days add_diag_script: false # Set to true to install a script and cron job to automatically cleanup splunk diag files older than 30 days diff --git a/roles/splunk/tasks/adhoc_backup_kvstore.yml b/roles/splunk/tasks/adhoc_backup_kvstore.yml new file mode 100644 index 00000000..f350248d --- /dev/null +++ b/roles/splunk/tasks/adhoc_backup_kvstore.yml @@ -0,0 +1,15 @@ +--- +- name: Backup KVStore on desired host + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk backup kvstore {{ archive_name | default("") }} + become: true + become_user: "{{ splunk_nix_user }}" + register: splunk_kvstore_backup_out + changed_when: splunk_kvstore_backup_out.rc == 0 + failed_when: splunk_kvstore_backup_out.rc != 0 + +- name: Check that backup has finished + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' + register: splunk_kvstore_status_out + until: "{{ splunk_kvstore_status_out.stdout }} == 'Ready'" diff --git a/roles/splunk/tasks/configure_kvstore.yml b/roles/splunk/tasks/configure_kvstore.yml new file mode 100644 index 00000000..d63f6c5a --- /dev/null +++ b/roles/splunk/tasks/configure_kvstore.yml @@ -0,0 +1,28 @@ +--- +- name: Disable KVStore if specified + include_tasks: kvstore_disable.yml + when: not splunk_enable_kvstore + +- name: Configure initial KVStore storage engine in server.conf + community.general.ini_file: + path: "{{ splunk_home }}/etc/system/local/server.conf" + section: kvstore + option: storageEngine + value: "{{ splunk_kvstore_storage }}" + owner: "{{ splunk_nix_user }}" + group: "{{ splunk_nix_group }}" + mode: 0644 + become: true + when: + - splunk_kvstore_storage == "wiredTiger" + - splunk_enable_kvstore + +- name: Configure initial KVStore oplog size in server.conf + community.general.ini_file: + path: "{{ splunk_home }}/etc/system/local/server.conf" + section: kvstore + option: oplogSize + value: "{{ splunk_oplog_size }}" + become: true + become_user: "{{ splunk_nix_user }}" + when: splunk_enable_kvstore diff --git a/roles/splunk/tasks/kvstore_disable.yml b/roles/splunk/tasks/kvstore_disable.yml new file mode 100644 index 00000000..718853e7 --- /dev/null +++ b/roles/splunk/tasks/kvstore_disable.yml @@ -0,0 +1,11 @@ +--- +- name: Disable kvstore + community.general.ini_file: + path: "{{ splunk_home }}/etc/system/local/server.conf" + section: kvstore + option: disabled + value: "true" + become: true + become_user: "{{ splunk_nix_user }}" + when: not splunk_enable_kvstore + notify: restart splunk diff --git a/roles/splunk/tasks/kvstore_upgrade.yml b/roles/splunk/tasks/kvstore_upgrade.yml new file mode 100644 index 00000000..90a97dbc --- /dev/null +++ b/roles/splunk/tasks/kvstore_upgrade.yml @@ -0,0 +1,152 @@ +--- +- name: Set fact for kvstore Upgrade + ansible.builtin.set_fact: + splunk_upgrade_server: false + +- name: Check the current kvstore storage backend + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk show kvstore-status --verbose | grep storageEngine | sed -r 's/\s+storageEngine : //g' + become: true + become_user: "{{ splunk_nix_user }}" + register: splunk_kvstore_backend_out + changed_when: splunk_kvstore_backend_out.rc == 0 + failed_when: splunk_kvstore_backend_out.rc != 0 + +- name: Check the current kvstore server version + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk show kvstore-status --verbose | grep serverVersion | sed -r 's/\s+serverVersion : //g' + become: true + become_user: "{{ splunk_nix_user }}" + register: splunk_current_server_version_out + changed_when: splunk_current_server_version_out.rc == 0 + failed_when: splunk_current_server_version_out.rc != 0 + +- name: Debug print kvstore backend engine + ansible.builtin.debug: + var: splunk_kvstore_backend_out.stdout + verbosity: 2 + +- name: Debug print kvstore backend engine + ansible.builtin.debug: + var: splunk_current_server_version_out.stdout + verbosity: 2 + +- name: Upgrade KVstore if needed + block: + - name: Backup KVStore + include_tasks: adhoc_backup_kvstore.yml + vars: + - archive_name: "-archiveName preAnsibleVersionUpgradeBackup" + + - name: Perform single-install upgrade steps + block: + - name: Perform < 9.0 migration steps + block: + - name: Define storage migration as true in server.conf + community.general.ini_file: + path: "{{ splunk_home }}/etc/system/local/server.conf" + section: kvstore + option: storageEngineMigration + value: "true" + become: true + become_user: "{{ splunk_nix_user }}" + + - name: Start storage engine migration on single instance + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk migrate kvstore-storage-engine --target-engine wiredTiger --enable-compression + register: splunk_migration_single_early_out + changed_when: splunk_migration_single_early_out.rc == 0 + failed_when: splunk_migration_single_early_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + when: splunk_package_version is version(9.0, '<') + + - name: Perform >= 9.0 upgrade if necessary + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk migrate migrate-kvstore + register: splunk_migration_single_early_out + changed_when: splunk_migration_single_early_out.rc == 0 + failed_when: splunk_migration_single_early_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + when: + - splunk_package_version is version(9.0, '>=') + - splunk_current_server_version_out.stdout is version(4.2 '<') + when: splunk_shc_target_group not in group_names + + - name: Perform SHC KVStore upgrade + block: + - name: Perform SHC pre-migration Steps + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -isDryRun true + register: splunk_shc_pre_steps_out + changed_when: splunk_shc_pre_steps_out.rc == 0 + failed_when: splunk_shc_pre_steps_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + + - name: Start Backend migration + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -clusterPerc 50 + register: splunk_shc_kvstore_backend_migration_out + changed_when: splunk_shc_kvstore_backend_migration_out.rc == 0 + failed_when: splunk_shc_kvstore_backend_migration_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + + - name: Make sure migration is successful + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk show shcluster-kvmigration-status | sed -r 's/\s+migrationStatus : //g' + register: splunk_kvstore_migration_status_out + changed_when: splunk_kvstore_migration_status_out.rc == 0 + failed_when: splunk_kvstore_migration_status_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + until: "{{ splunk_kvstore_migration_status_out.stdout }} == 'notStarted'" + + - name: Perform SHC pre-upgrade steps + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 -isDryRun true + register: splunk_kvstore_version_check_out + changed_when: splunk_kvstore_version_check_out.rc == 0 + failed_when: splunk_kvstore_version_check_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + + - name: Start Version upgrade + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 + register: splunk_kvstore_version_upgrade_out + changed_when: splunk_kvstore_version_upgrade_out.rc == 0 + failed_when: splunk_kvstore_version_upgrade_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + + - name: Make sure upgrade is successful + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk show kvstore-status --verbose | sed -r 's/\s+serverVersion : //g' + register: splunk_kvstore_version_status_out + changed_when: splunk_kvstore_version_status_out.rc == 0 + failed_when: splunk_kvstore_version_status_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + until: "{{ splunk_kvstore_version_status_out.stdout is version(4.2, '>=') }}" + when: splunk_shc_target_group in group_names + + - name: Clean up older binaries from older versions + ansible.builtin.file: + path: "{{ splunk_home }}/bin/{{ item }}" + state: absent + loop: + - mongod-3.6 + - mongod-4.0 + - mongodump-3.6 + - mongorestore-3.6 + become: true + become_user: "{{ splunk_nix_user }}" + when: + - splunk_enable_kvstore + - splunk_kvstore_storage == "wiredTiger" + - "'full' in group_names" + - splunk_kvstore_backend_out.stdout != "wiredTiger" + - splunk_current_server_version_out.stdout is version(4.2 '<') \ No newline at end of file diff --git a/roles/splunk/tasks/post_install.yml b/roles/splunk/tasks/post_install.yml index 8c52a021..c16b8101 100644 --- a/roles/splunk/tasks/post_install.yml +++ b/roles/splunk/tasks/post_install.yml @@ -30,3 +30,7 @@ - name: Install additional utilities and troubleshooting tools include_tasks: install_utilities.yml when: install_utilities + +- name: Disable KVStore if necessary + include_tasks: kvstore_disable.yml + when: not splunk_enable_kvstore From 2e10ee30887124340dcc97d7f76a563d81476a6f Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Tue, 21 Mar 2023 16:12:06 -0700 Subject: [PATCH 02/19] README and post_install changes --- README.md | 3 +++ roles/splunk/tasks/post_install.yml | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 01b91b29..188606e6 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ This section contains additional reference documentation. Note: Any task with an **adhoc** prefix means that it can be used independently as a `deployment_task` in a playbook. You can use the tasks to resolve various Splunk problems or perform one-time activities, such as decommissioning an indexer from an indexer cluster. +- **adhoc_backup_kvstore.yml** - Backup your KVStore to a given point - use the var `archive_name` to specify a tar name other than the default. - **adhoc_clean_dispatch.yml** - This task is intended to be used for restoring service to search heads should the dispatch directory become full. You should not need to use this task in a healthy environment, but it is at your disposal should the need arise. The task will stop splunk, remove all files in the dispatch directory, and then start splunk. - **adhoc_configure_hostname** - Configure a Splunk server's hostname using the value from inventory_hostname. It configures the system hostname, serverName in server.conf and host in inputs.conf. All Splunk configuration changes are made using the ini_file module, which will preserve any other existing configurations that may exist in server.conf and/or inputs.conf. - **adhoc_decom_indexer.yml** - Executes a splunk offline --enforce-counts command. This is useful when decommissioning one or more indexers from an indexer cluster. @@ -141,6 +142,7 @@ Note: Any task with an **adhoc** prefix means that it can be used independently - **configure_idxc_manager.yml** - Configures a Splunk host to act as a manager node using `splunk_idxc_rf`, `splunk_idxc_sf`, `splunk_idxc_key`, and `splunk_idxc_label`. - **configure_idxc_member.yml** - Configures a Splunk host as an indexer cluster member using `splunk_uri_cm`, `splunk_idxc_rep_port`, and `splunk_idxc_key`. - **configure_idxc_sh.yml** - Configures a search head to join an existing indexer cluster using `splunk_uri_cm` and `splunk_idxc_key`. +- **configure_kvstore.yml** - Disables KVStore when disabled by `splunk_enable_kvstore` and sets vars related to KVStore in `server.conf` configured in the defaults, like `splunk_kvstore_storage` and `splunk_oplog_size` - **configure_license.yml** - Configure the license group to the `splunk_license_group` variable defined. Default is `Trial`. Available values are "Trial, Free, Enterprise, Forwarder, Manager or Peer. If set to `Peer`, the `splunk_uri_lm` must be defined. Note: This could also be accomplished using configure_apps.yml with a git repository. - **configure_os.yml** - Increases ulimits for the splunk user and disables Transparent Huge Pages (THP) per Splunk implementation best practices. - **configure_serverclass.yml** - Generates a new serverclass.conf file from the serverclass.conf.j2 template and installs it to $SPLUNK_HOME/etc/system/local/serverclass.conf. @@ -160,6 +162,7 @@ Note: Any task with an **adhoc** prefix means that it can be used independently - **install_splunk.yml** - *Do not call install_splunk.yml directly! Use check_splunk.yml* - Called by check_splunk.yml to install/upgrade Splunk and Splunk Universal Forwarders, as well as perform any initial configurations. This task is called by check_splunk.yml when the check determines that Splunk is not currently installed. This task will create the splunk user and splunk group, configure the bash profile for the splunk user (by calling configure_bash.yml), configure THP and ulimits (by calling configure_os.ym), download and install the appropriate Splunk package (by calling download_and_unarchive.yml), configure a common splunk.secret (by calling configure_splunk_secret.yml, if configure_secret is defined), create a deploymentclient.conf file with the splunk_ds_uri and clientName (by calling configure_deploymentclient.yml, if clientName is defined), install a user-seed.conf with a prehashed admin password (if used_seed is defined), and will then call the post_install.yml task. See post_install.yml entry for details on post-installation tasks. - **install_utilities.yml** - Installs Linux packages that are useful for troubleshooting Splunk-related issues when `install_utilities: true` and `linux_packages` is defined with a list of packages to install. - **configure_dmesg.yml** - Some distros restrict access to read `dmesg` for non-root users. This allows the `splunk` user to run the `dmesg` command. Defaults to `false`. +- **kvstore_upgrade.yml** - Upgrades a KVStore storage backend and/or server version on either a single or distributed instance. - **main.yml** - This is the main task that will always be called when executing this role. This task sets the appropriate variables for full vs uf packages, sends a Slack notification about the play if the slack_token and slack_channel are defined, checks the current boot-start configuration to determine if it's in the expected state, and then includes the task from the role to execute against, as defined by the value of the deployment_task variable. The deployment_task variable should be defined in your playbook(s). Refer to the included example playbooks to see this in action. - **post_install.yml** - Executes post-installation tasks. Performs a touch on the .ui_login file which disables the first-time login prompt to change your password, ensures that `splunk_home` is owned by the correct user and group, and optionally configures three scripts to: cleanup crash logs and old diags (by calling add_crashlog_script.yml and add_diag_script.yml, respectively), and a pstack generation shell script for troubleshooting purposes (by calling add_pstack_script.yml). This task will install various Linux troubleshooting utilities (by calling install_utilities.yml) when `install_utilities: true`. - **set_maintenance_mode.yml** - Enables or disables maintenance mode on a cluster manager. Intended to be called by playbooks for indexer cluster upgrades/maintenance. Requires the `state` variable to be defined. Valid values: enabled, disabled diff --git a/roles/splunk/tasks/post_install.yml b/roles/splunk/tasks/post_install.yml index c16b8101..db3605e2 100644 --- a/roles/splunk/tasks/post_install.yml +++ b/roles/splunk/tasks/post_install.yml @@ -31,6 +31,5 @@ include_tasks: install_utilities.yml when: install_utilities -- name: Disable KVStore if necessary - include_tasks: kvstore_disable.yml - when: not splunk_enable_kvstore +- name: Configure KVStore vars + include_tasks: configure_kvstore.yml From ce2c80a4bbd06fedb016c01c403d2b927a4abdf6 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Thu, 23 Mar 2023 15:05:34 -0700 Subject: [PATCH 03/19] Fixes for auth and disable --- roles/splunk/tasks/adhoc_backup_kvstore.yml | 4 ++-- roles/splunk/tasks/configure_kvstore.yml | 2 +- roles/splunk/tasks/disable_kvstore.yml | 6 ++++-- roles/splunk/tasks/kvstore_disable.yml | 11 ----------- roles/splunk/tasks/kvstore_upgrade.yml | 16 ++++++++-------- 5 files changed, 15 insertions(+), 24 deletions(-) delete mode 100644 roles/splunk/tasks/kvstore_disable.yml diff --git a/roles/splunk/tasks/adhoc_backup_kvstore.yml b/roles/splunk/tasks/adhoc_backup_kvstore.yml index f350248d..5f977f6e 100644 --- a/roles/splunk/tasks/adhoc_backup_kvstore.yml +++ b/roles/splunk/tasks/adhoc_backup_kvstore.yml @@ -1,7 +1,7 @@ --- - name: Backup KVStore on desired host ansible.builtin.command: | - {{ splunk_home }}/bin/splunk backup kvstore {{ archive_name | default("") }} + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} backup kvstore {{ archive_name | default("") }} become: true become_user: "{{ splunk_nix_user }}" register: splunk_kvstore_backup_out @@ -10,6 +10,6 @@ - name: Check that backup has finished ansible.builtin.command: | - {{ splunk_home }}/bin/splunk splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' register: splunk_kvstore_status_out until: "{{ splunk_kvstore_status_out.stdout }} == 'Ready'" diff --git a/roles/splunk/tasks/configure_kvstore.yml b/roles/splunk/tasks/configure_kvstore.yml index d63f6c5a..a13c6b25 100644 --- a/roles/splunk/tasks/configure_kvstore.yml +++ b/roles/splunk/tasks/configure_kvstore.yml @@ -1,6 +1,6 @@ --- - name: Disable KVStore if specified - include_tasks: kvstore_disable.yml + include_tasks: disable_kvstore.yml when: not splunk_enable_kvstore - name: Configure initial KVStore storage engine in server.conf diff --git a/roles/splunk/tasks/disable_kvstore.yml b/roles/splunk/tasks/disable_kvstore.yml index 42b079a6..ff839101 100644 --- a/roles/splunk/tasks/disable_kvstore.yml +++ b/roles/splunk/tasks/disable_kvstore.yml @@ -1,10 +1,12 @@ --- - name: Disable KVStore - when: ansible_system == "Linux" + when: + - ansible_system == "Linux" + - not splunk_enable_kvstore ini_file: path: "{{ splunk_home }}/etc/system/local/server.conf" section: kvstore option: disabled value: "true" become: True - become_user: "{{ splunk_nix_user }}" \ No newline at end of file + become_user: "{{ splunk_nix_user }}" diff --git a/roles/splunk/tasks/kvstore_disable.yml b/roles/splunk/tasks/kvstore_disable.yml deleted file mode 100644 index 718853e7..00000000 --- a/roles/splunk/tasks/kvstore_disable.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -- name: Disable kvstore - community.general.ini_file: - path: "{{ splunk_home }}/etc/system/local/server.conf" - section: kvstore - option: disabled - value: "true" - become: true - become_user: "{{ splunk_nix_user }}" - when: not splunk_enable_kvstore - notify: restart splunk diff --git a/roles/splunk/tasks/kvstore_upgrade.yml b/roles/splunk/tasks/kvstore_upgrade.yml index 90a97dbc..7d055da9 100644 --- a/roles/splunk/tasks/kvstore_upgrade.yml +++ b/roles/splunk/tasks/kvstore_upgrade.yml @@ -5,7 +5,7 @@ - name: Check the current kvstore storage backend ansible.builtin.command: | - {{ splunk_home }}/bin/splunk show kvstore-status --verbose | grep storageEngine | sed -r 's/\s+storageEngine : //g' + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status --verbose | grep storageEngine | sed -r 's/\s+storageEngine : //g' become: true become_user: "{{ splunk_nix_user }}" register: splunk_kvstore_backend_out @@ -14,7 +14,7 @@ - name: Check the current kvstore server version ansible.builtin.command: | - {{ splunk_home }}/bin/splunk show kvstore-status --verbose | grep serverVersion | sed -r 's/\s+serverVersion : //g' + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status --verbose | grep serverVersion | sed -r 's/\s+serverVersion : //g' become: true become_user: "{{ splunk_nix_user }}" register: splunk_current_server_version_out @@ -78,7 +78,7 @@ block: - name: Perform SHC pre-migration Steps ansible.builtin.command: | - {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -isDryRun true + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-migration kvstore -storageEngine wiredTiger -isDryRun true register: splunk_shc_pre_steps_out changed_when: splunk_shc_pre_steps_out.rc == 0 failed_when: splunk_shc_pre_steps_out.rc != 0 @@ -87,7 +87,7 @@ - name: Start Backend migration ansible.builtin.command: | - {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -clusterPerc 50 + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-migration kvstore -storageEngine wiredTiger -clusterPerc 50 register: splunk_shc_kvstore_backend_migration_out changed_when: splunk_shc_kvstore_backend_migration_out.rc == 0 failed_when: splunk_shc_kvstore_backend_migration_out.rc != 0 @@ -96,7 +96,7 @@ - name: Make sure migration is successful ansible.builtin.command: | - {{ splunk_home }}/bin/splunk show shcluster-kvmigration-status | sed -r 's/\s+migrationStatus : //g' + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show shcluster-kvmigration-status | sed -r 's/\s+migrationStatus : //g' register: splunk_kvstore_migration_status_out changed_when: splunk_kvstore_migration_status_out.rc == 0 failed_when: splunk_kvstore_migration_status_out.rc != 0 @@ -106,7 +106,7 @@ - name: Perform SHC pre-upgrade steps ansible.builtin.command: | - {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 -isDryRun true + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-upgrade kvstore -version 4.2 -isDryRun true register: splunk_kvstore_version_check_out changed_when: splunk_kvstore_version_check_out.rc == 0 failed_when: splunk_kvstore_version_check_out.rc != 0 @@ -115,7 +115,7 @@ - name: Start Version upgrade ansible.builtin.command: | - {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-upgrade kvstore -version 4.2 register: splunk_kvstore_version_upgrade_out changed_when: splunk_kvstore_version_upgrade_out.rc == 0 failed_when: splunk_kvstore_version_upgrade_out.rc != 0 @@ -124,7 +124,7 @@ - name: Make sure upgrade is successful ansible.builtin.command: | - {{ splunk_home }}/bin/splunk show kvstore-status --verbose | sed -r 's/\s+serverVersion : //g' + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status --verbose | sed -r 's/\s+serverVersion : //g' register: splunk_kvstore_version_status_out changed_when: splunk_kvstore_version_status_out.rc == 0 failed_when: splunk_kvstore_version_status_out.rc != 0 From e30dc3a2888b23cc89cef059cda3f2ba776ba907 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Thu, 23 Mar 2023 15:33:06 -0700 Subject: [PATCH 04/19] Additional KVstore helpers and tasks - clean - destructive resync - get kvstore captain - get shcluster captain --- README.md | 4 ++++ roles/splunk/tasks/adhoc_clean_kvstore.yml | 14 ++++++++++++ .../adhoc_destructive_resync_kvstore.yml | 22 +++++++++++++++++++ roles/splunk/tasks/get_kvstore_captain.yml | 12 ++++++++++ roles/splunk/tasks/get_shcluster_captain.yml | 12 ++++++++++ 5 files changed, 64 insertions(+) create mode 100644 roles/splunk/tasks/adhoc_clean_kvstore.yml create mode 100644 roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml create mode 100644 roles/splunk/tasks/get_kvstore_captain.yml create mode 100644 roles/splunk/tasks/get_shcluster_captain.yml diff --git a/README.md b/README.md index 188606e6..9ccc8baa 100644 --- a/README.md +++ b/README.md @@ -127,8 +127,10 @@ Note: Any task with an **adhoc** prefix means that it can be used independently - **adhoc_backup_kvstore.yml** - Backup your KVStore to a given point - use the var `archive_name` to specify a tar name other than the default. - **adhoc_clean_dispatch.yml** - This task is intended to be used for restoring service to search heads should the dispatch directory become full. You should not need to use this task in a healthy environment, but it is at your disposal should the need arise. The task will stop splunk, remove all files in the dispatch directory, and then start splunk. +- **adhoc_clean_kvstore.yml** - Cleans the KVStore from all data, allowing it to pull the latest data from the KVStore captain - usually done when a KVStore is down, but Splunkd is still running fine. - **adhoc_configure_hostname** - Configure a Splunk server's hostname using the value from inventory_hostname. It configures the system hostname, serverName in server.conf and host in inputs.conf. All Splunk configuration changes are made using the ini_file module, which will preserve any other existing configurations that may exist in server.conf and/or inputs.conf. - **adhoc_decom_indexer.yml** - Executes a splunk offline --enforce-counts command. This is useful when decommissioning one or more indexers from an indexer cluster. +- **adhoc_destructive_resync_kvstore.yml** - Removes an SH Member from the cluster, cleans it's KVStore, then puts it back into the cluster. Usually used when SH Bundle and KV Bundle are out of sync for longer than a few hours. - **adhoc_fix_mongo.yml** - Use when Splunk is in a stopped state to fix mongodb/kvstore issues. This task ensures that permissions are set correctly on mongo's splunk.key file and deletes mongod.lock if it exists. - **adhoc_fix_server_certificate.yml** - Use to delete an expired server.pem and generate a new one (default certs). Useful if your server.pem certificate has expired and you are using Splunk's default certificate for splunkd. Note that default certificates present a security risk and that their use should be avoided, if possible. - **adhoc_kill_splunkd.yml** - Some releases of Splunk have a "feature" that leaves zombie splunkd processes after a 'splunk stop'. Use this task after a 'splunk stop' to make sure that it's really stopped. Useful for upgrades on some of the 7.x releases, and automatically called by the upgrade_splunk.yml task. @@ -158,6 +160,8 @@ Note: Any task with an **adhoc** prefix means that it can be used independently You can set if the download/unarchive process uses the Ansible host or if each host downloads and unarchives the package individually by setting `splunk_download_local`. Default is `true` which will download the package to the Ansible host once and unarchive to each host from there. If set to `false` the package will be downloaded and unarchived to each host individually. Immediately after unarchive the package will be removed from the host. +- **get_kvstore_captain.yml** - Gets the current captain in the KVStore cluster. +- **get_shcluster_captain.yml** - Gets the current captain in the SHCluster. - **install_apps.yml** - *Do not call install_apps.yml directly! Use configure_apps.yml* - Called by configure_apps.yml to perform app installation on the Splunk host. - **install_splunk.yml** - *Do not call install_splunk.yml directly! Use check_splunk.yml* - Called by check_splunk.yml to install/upgrade Splunk and Splunk Universal Forwarders, as well as perform any initial configurations. This task is called by check_splunk.yml when the check determines that Splunk is not currently installed. This task will create the splunk user and splunk group, configure the bash profile for the splunk user (by calling configure_bash.yml), configure THP and ulimits (by calling configure_os.ym), download and install the appropriate Splunk package (by calling download_and_unarchive.yml), configure a common splunk.secret (by calling configure_splunk_secret.yml, if configure_secret is defined), create a deploymentclient.conf file with the splunk_ds_uri and clientName (by calling configure_deploymentclient.yml, if clientName is defined), install a user-seed.conf with a prehashed admin password (if used_seed is defined), and will then call the post_install.yml task. See post_install.yml entry for details on post-installation tasks. - **install_utilities.yml** - Installs Linux packages that are useful for troubleshooting Splunk-related issues when `install_utilities: true` and `linux_packages` is defined with a list of packages to install. diff --git a/roles/splunk/tasks/adhoc_clean_kvstore.yml b/roles/splunk/tasks/adhoc_clean_kvstore.yml new file mode 100644 index 00000000..2b115880 --- /dev/null +++ b/roles/splunk/tasks/adhoc_clean_kvstore.yml @@ -0,0 +1,14 @@ +--- +- name: Stop Splunkd service + include_tasks: splunk_stop.yml + +- name: Clean KVStore + ansible.builtin.command: "{{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} clean kvstore --local --answer-yes" + become: true + become_user: "{{ splunk_nix_user }}" + register: clean_result + changed_when: clean_result.rc == 0 + failed_when: clean_result.rc != 0 + notify: + - start splunk + no_log: true diff --git a/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml b/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml new file mode 100644 index 00000000..b683aafb --- /dev/null +++ b/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml @@ -0,0 +1,22 @@ +--- +# We have to do this first so that we store the captain before removing from the cluster +- name: Get SHCluster captain + include_tasks: get_shcluster_captain.yml + +- name: Remove SHCluster member + ansible.builtin.command: "{{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} remove shcluster-member" + register: splunk_remove_shcluster_member + changed_when: splunk_remove_shcluster_member.rc == 0 + failed_when: splunk_remove_shcluster_member.rc != 0 + no_log: true + +- name: Clean KVStore + include_tasks: adhoc_clean_kvstore.yml + +- name: Add SHCluster member from current member + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} add shcluster-member -current_member_uri {{ splunk_shc_captain }} + register: splunk_remove_shcluster_member + changed_when: splunk_remove_shcluster_member.rc == 0 + failed_when: splunk_remove_shcluster_member.rc != 0 + no_log: true diff --git a/roles/splunk/tasks/get_kvstore_captain.yml b/roles/splunk/tasks/get_kvstore_captain.yml new file mode 100644 index 00000000..84826250 --- /dev/null +++ b/roles/splunk/tasks/get_kvstore_captain.yml @@ -0,0 +1,12 @@ +--- +# Gets KVStore captain hostname - like splunk_captain.domain.com +- name: Get current KVStore captain + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status | grep -B10 "KV store captain" | grep "hostAndPort" | sed -r 's/\s+hostAndPort : //g' | sed -r 's/:[0-9]+//g' + register: splunk_get_kvcaptain + changed_when: splunk_get_kvcaptain.rc == 0 + failed_when: splunk_get_kvcaptain.rc != 0 + +- name: Register KVStore captain fact + ansible.builtin.set_fact: + splunk_kv_captain: "{{ splunk_get_kvcaptain.stdout }}" diff --git a/roles/splunk/tasks/get_shcluster_captain.yml b/roles/splunk/tasks/get_shcluster_captain.yml new file mode 100644 index 00000000..91e58fb1 --- /dev/null +++ b/roles/splunk/tasks/get_shcluster_captain.yml @@ -0,0 +1,12 @@ +--- +# Gets SHC captain management uri - like https://splunk_captain.example:8089 +- name: Get current SHCluster captain + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show shcluster-status | grep -A6 Captain | grep mgmt_uri | sed -r 's/\s+mgmt_uri : //g' + register: splunk_get_shcaptain + changed_when: splunk_get_shcaptain.rc == 0 + failed_when: splunk_get_shcaptain.rc != 0 + +- name: Register SHCluster captain fact + ansible.builtin.set_fact: + splunk_shc_captain: "{{ splunk_get_shcaptain.stdout }}" From 5b71f97e60ceb18501c95bba93fc669cf3622caa Mon Sep 17 00:00:00 2001 From: David Twersky Date: Wed, 29 Mar 2023 12:56:13 -0400 Subject: [PATCH 05/19] added a login task and included in kvstore related tasks --- README.md | 1 + roles/splunk/defaults/main.yml | 1 + roles/splunk/tasks/adhoc_backup_kvstore.yml | 8 +++++-- roles/splunk/tasks/adhoc_clean_kvstore.yml | 3 +-- .../adhoc_destructive_resync_kvstore.yml | 10 +++++---- roles/splunk/tasks/get_kvstore_captain.yml | 6 ++++- roles/splunk/tasks/get_shcluster_captain.yml | 6 ++++- roles/splunk/tasks/kvstore_upgrade.yml | 22 +++++++++++-------- roles/splunk/tasks/splunk_login.yml | 11 ++++++++++ roles/splunk/tasks/splunk_restart.yml | 4 ++++ roles/splunk/tasks/splunk_start.yml | 5 +++++ roles/splunk/tasks/splunk_stop.yml | 4 ++++ 12 files changed, 62 insertions(+), 19 deletions(-) create mode 100644 roles/splunk/tasks/splunk_login.yml diff --git a/README.md b/README.md index 9ccc8baa..9ee3dd2a 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ Note: Any task with an **adhoc** prefix means that it can be used independently - **post_install.yml** - Executes post-installation tasks. Performs a touch on the .ui_login file which disables the first-time login prompt to change your password, ensures that `splunk_home` is owned by the correct user and group, and optionally configures three scripts to: cleanup crash logs and old diags (by calling add_crashlog_script.yml and add_diag_script.yml, respectively), and a pstack generation shell script for troubleshooting purposes (by calling add_pstack_script.yml). This task will install various Linux troubleshooting utilities (by calling install_utilities.yml) when `install_utilities: true`. - **set_maintenance_mode.yml** - Enables or disables maintenance mode on a cluster manager. Intended to be called by playbooks for indexer cluster upgrades/maintenance. Requires the `state` variable to be defined. Valid values: enabled, disabled - **set_upgrade_state.yml** - Executes a splunk upgrade-{{ peer_state }} cluster-peers command on the cluster manager. This task can be used for upgrading indexer clusters with new minor and maintenance releases of Splunk (assuming you are at Splunk v7.1.0 or higher). Refer to https://docs.splunk.com/Documentation/Splunk/latest/Indexer/Searchablerollingupgrade for more information. +- **splunk_login.yml** - Authenticated to splunk. This will avoid having to pass `-auth` for every command that meeds authentication. This sets the `splunk_authenticated` variable to true. To include this in a task, you can set a conditional to only run it when `splunk_authenticated == false`. If included in a task that manually calls the `splunk_stop.yml`, `splunk_start.yml` or `splunk_restart.yml` task, the `splunk_authenticated will be reset to false. - **splunk_offline.yml** - Runs a splunk offline CLI command. Useful for bringing down indexers non-intrusively by allowing searches to complete before stopping splunk. - **splunk_restart.yml** - Restarts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. - **splunk_start.yml** - Starts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. diff --git a/roles/splunk/defaults/main.yml b/roles/splunk/defaults/main.yml index 00010dc3..44c4564b 100644 --- a/roles/splunk/defaults/main.yml +++ b/roles/splunk/defaults/main.yml @@ -29,6 +29,7 @@ splunk_general_key: undefined # Configures a pass4SymmKey in server.conf under t splunk_ds_key: undefined # Configures a pass4SymmKey in server.conf for authenticating against a deployment server splunk_admin_username: admin splunk_admin_password: undefined # Use ansible-vault encrypt_string, e.g. ansible-vault encrypt_string --ask-vault-pass 'var_value_to_encrypt' --name 'var_name' +splunk_authenticated: false # DO NOT CHANGE. This fact is set to true in the `splunk_login.yml` task, and reset to false if `splunk_restart.yml`, `splunk_stop.yml` or `splunk_start.yml` are manually called in another task. splunk_configure_secret: false # If set to true, you need to update files/splunk.secret splunk_secret_file: splunk.secret # Used to specify your splunk.secret filename(s), files should be placed in the "files" folder of the role # Although there are tasks for the following Splunk configurations in this role, they are not included in any tasks by default. You can add them to your install_splunk.yml if you would like to have Ansible manage any of these files diff --git a/roles/splunk/tasks/adhoc_backup_kvstore.yml b/roles/splunk/tasks/adhoc_backup_kvstore.yml index 5f977f6e..969aa305 100644 --- a/roles/splunk/tasks/adhoc_backup_kvstore.yml +++ b/roles/splunk/tasks/adhoc_backup_kvstore.yml @@ -1,7 +1,11 @@ --- +- name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated + - name: Backup KVStore on desired host ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} backup kvstore {{ archive_name | default("") }} + {{ splunk_home }}/bin/splunk backup kvstore {{ archive_name | default("") }} become: true become_user: "{{ splunk_nix_user }}" register: splunk_kvstore_backup_out @@ -10,6 +14,6 @@ - name: Check that backup has finished ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' + {{ splunk_home }}/bin/splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' register: splunk_kvstore_status_out until: "{{ splunk_kvstore_status_out.stdout }} == 'Ready'" diff --git a/roles/splunk/tasks/adhoc_clean_kvstore.yml b/roles/splunk/tasks/adhoc_clean_kvstore.yml index 2b115880..8f50206c 100644 --- a/roles/splunk/tasks/adhoc_clean_kvstore.yml +++ b/roles/splunk/tasks/adhoc_clean_kvstore.yml @@ -3,7 +3,7 @@ include_tasks: splunk_stop.yml - name: Clean KVStore - ansible.builtin.command: "{{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} clean kvstore --local --answer-yes" + ansible.builtin.command: "{{ splunk_home }}/bin/splunk clean kvstore --local --answer-yes" become: true become_user: "{{ splunk_nix_user }}" register: clean_result @@ -11,4 +11,3 @@ failed_when: clean_result.rc != 0 notify: - start splunk - no_log: true diff --git a/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml b/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml index b683aafb..78b04d1f 100644 --- a/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml +++ b/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml @@ -1,22 +1,24 @@ --- # We have to do this first so that we store the captain before removing from the cluster +- name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated + - name: Get SHCluster captain include_tasks: get_shcluster_captain.yml - name: Remove SHCluster member - ansible.builtin.command: "{{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} remove shcluster-member" + ansible.builtin.command: "{{ splunk_home }}/bin/splunk remove shcluster-member" register: splunk_remove_shcluster_member changed_when: splunk_remove_shcluster_member.rc == 0 failed_when: splunk_remove_shcluster_member.rc != 0 - no_log: true - name: Clean KVStore include_tasks: adhoc_clean_kvstore.yml - name: Add SHCluster member from current member ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} add shcluster-member -current_member_uri {{ splunk_shc_captain }} + {{ splunk_home }}/bin/splunk add shcluster-member -current_member_uri {{ splunk_shc_captain }} register: splunk_remove_shcluster_member changed_when: splunk_remove_shcluster_member.rc == 0 failed_when: splunk_remove_shcluster_member.rc != 0 - no_log: true diff --git a/roles/splunk/tasks/get_kvstore_captain.yml b/roles/splunk/tasks/get_kvstore_captain.yml index 84826250..28ff9d19 100644 --- a/roles/splunk/tasks/get_kvstore_captain.yml +++ b/roles/splunk/tasks/get_kvstore_captain.yml @@ -1,8 +1,12 @@ --- +- name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated + # Gets KVStore captain hostname - like splunk_captain.domain.com - name: Get current KVStore captain ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status | grep -B10 "KV store captain" | grep "hostAndPort" | sed -r 's/\s+hostAndPort : //g' | sed -r 's/:[0-9]+//g' + {{ splunk_home }}/bin/splunk show kvstore-status | grep -B10 "KV store captain" | grep "hostAndPort" | sed -r 's/\s+hostAndPort : //g' | sed -r 's/:[0-9]+//g' register: splunk_get_kvcaptain changed_when: splunk_get_kvcaptain.rc == 0 failed_when: splunk_get_kvcaptain.rc != 0 diff --git a/roles/splunk/tasks/get_shcluster_captain.yml b/roles/splunk/tasks/get_shcluster_captain.yml index 91e58fb1..2d298596 100644 --- a/roles/splunk/tasks/get_shcluster_captain.yml +++ b/roles/splunk/tasks/get_shcluster_captain.yml @@ -1,8 +1,12 @@ --- # Gets SHC captain management uri - like https://splunk_captain.example:8089 +- name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated + - name: Get current SHCluster captain ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show shcluster-status | grep -A6 Captain | grep mgmt_uri | sed -r 's/\s+mgmt_uri : //g' + {{ splunk_home }}/bin/splunk show shcluster-status | grep -A6 Captain | grep mgmt_uri | sed -r 's/\s+mgmt_uri : //g' register: splunk_get_shcaptain changed_when: splunk_get_shcaptain.rc == 0 failed_when: splunk_get_shcaptain.rc != 0 diff --git a/roles/splunk/tasks/kvstore_upgrade.yml b/roles/splunk/tasks/kvstore_upgrade.yml index 7d055da9..e42bf6f3 100644 --- a/roles/splunk/tasks/kvstore_upgrade.yml +++ b/roles/splunk/tasks/kvstore_upgrade.yml @@ -1,11 +1,15 @@ --- +- name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated + - name: Set fact for kvstore Upgrade ansible.builtin.set_fact: splunk_upgrade_server: false - name: Check the current kvstore storage backend ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status --verbose | grep storageEngine | sed -r 's/\s+storageEngine : //g' + {{ splunk_home }}/bin/splunk show kvstore-status | grep storageEngine | sed -r 's/\s+storageEngine : //g' become: true become_user: "{{ splunk_nix_user }}" register: splunk_kvstore_backend_out @@ -14,7 +18,7 @@ - name: Check the current kvstore server version ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status --verbose | grep serverVersion | sed -r 's/\s+serverVersion : //g' + {{ splunk_home }}/bin/splunk show kvstore-status | grep serverVersion | sed -r 's/\s+serverVersion : //g' become: true become_user: "{{ splunk_nix_user }}" register: splunk_current_server_version_out @@ -78,7 +82,7 @@ block: - name: Perform SHC pre-migration Steps ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-migration kvstore -storageEngine wiredTiger -isDryRun true + {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -isDryRun true register: splunk_shc_pre_steps_out changed_when: splunk_shc_pre_steps_out.rc == 0 failed_when: splunk_shc_pre_steps_out.rc != 0 @@ -87,7 +91,7 @@ - name: Start Backend migration ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-migration kvstore -storageEngine wiredTiger -clusterPerc 50 + {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -clusterPerc 50 register: splunk_shc_kvstore_backend_migration_out changed_when: splunk_shc_kvstore_backend_migration_out.rc == 0 failed_when: splunk_shc_kvstore_backend_migration_out.rc != 0 @@ -96,7 +100,7 @@ - name: Make sure migration is successful ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show shcluster-kvmigration-status | sed -r 's/\s+migrationStatus : //g' + {{ splunk_home }}/bin/splunk show shcluster-kvmigration-status | sed -r 's/\s+migrationStatus : //g' register: splunk_kvstore_migration_status_out changed_when: splunk_kvstore_migration_status_out.rc == 0 failed_when: splunk_kvstore_migration_status_out.rc != 0 @@ -106,7 +110,7 @@ - name: Perform SHC pre-upgrade steps ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-upgrade kvstore -version 4.2 -isDryRun true + {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 -isDryRun true register: splunk_kvstore_version_check_out changed_when: splunk_kvstore_version_check_out.rc == 0 failed_when: splunk_kvstore_version_check_out.rc != 0 @@ -115,7 +119,7 @@ - name: Start Version upgrade ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} start-shcluster-upgrade kvstore -version 4.2 + {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 register: splunk_kvstore_version_upgrade_out changed_when: splunk_kvstore_version_upgrade_out.rc == 0 failed_when: splunk_kvstore_version_upgrade_out.rc != 0 @@ -124,7 +128,7 @@ - name: Make sure upgrade is successful ansible.builtin.command: | - {{ splunk_home }}/bin/splunk -auth {{ splunk_auth }} show kvstore-status --verbose | sed -r 's/\s+serverVersion : //g' + {{ splunk_home }}/bin/splunk show kvstore-status --verbose | sed -r 's/\s+serverVersion : //g' register: splunk_kvstore_version_status_out changed_when: splunk_kvstore_version_status_out.rc == 0 failed_when: splunk_kvstore_version_status_out.rc != 0 @@ -149,4 +153,4 @@ - splunk_kvstore_storage == "wiredTiger" - "'full' in group_names" - splunk_kvstore_backend_out.stdout != "wiredTiger" - - splunk_current_server_version_out.stdout is version(4.2 '<') \ No newline at end of file + - splunk_current_server_version_out.stdout is version(4.2 '<') diff --git a/roles/splunk/tasks/splunk_login.yml b/roles/splunk/tasks/splunk_login.yml new file mode 100644 index 00000000..8976239f --- /dev/null +++ b/roles/splunk/tasks/splunk_login.yml @@ -0,0 +1,11 @@ +--- +- name: Login to splunk + ansible.builtin.command: "{{ splunk_home }}/bin/splunk login -auth {{ splunk_auth }}" + register: splunk_authenticated + failed_when: splunk_authenticated.rc != 0 + changed_when: false + no_log: true + +- name: Set Authenticated variable + ansible.builtin.set_fact: + splunk_authenticated: true diff --git a/roles/splunk/tasks/splunk_restart.yml b/roles/splunk/tasks/splunk_restart.yml index ded76cf8..18ac0937 100644 --- a/roles/splunk/tasks/splunk_restart.yml +++ b/roles/splunk/tasks/splunk_restart.yml @@ -4,3 +4,7 @@ name: "{{ splunk_service }}" state: restarted become: true + +- name: Reset splunk_authenticated variable + ansible.builtin.set_fact: + splunk_authenticated: false diff --git a/roles/splunk/tasks/splunk_start.yml b/roles/splunk/tasks/splunk_start.yml index a78d41a9..1efa1242 100644 --- a/roles/splunk/tasks/splunk_start.yml +++ b/roles/splunk/tasks/splunk_start.yml @@ -4,3 +4,8 @@ name: "{{ splunk_service }}" state: started become: true + +- name: Reset splunk_authenticated variable + ansible.builtin.set_fact: + splunk_authenticated: false + diff --git a/roles/splunk/tasks/splunk_stop.yml b/roles/splunk/tasks/splunk_stop.yml index 06f07252..f9ea58cf 100644 --- a/roles/splunk/tasks/splunk_stop.yml +++ b/roles/splunk/tasks/splunk_stop.yml @@ -4,3 +4,7 @@ name: "{{ splunk_service }}" state: stopped become: true + +- name: Reset splunk_authenticated variable + ansible.builtin.set_fact: + splunk_authenticated: false From abd4739056e21bab0fc7f76991e638ece3a6a675 Mon Sep 17 00:00:00 2001 From: David Twersky Date: Wed, 29 Mar 2023 12:57:35 -0400 Subject: [PATCH 06/19] fixed missing tick in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ee3dd2a..4158c520 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ Note: Any task with an **adhoc** prefix means that it can be used independently - **post_install.yml** - Executes post-installation tasks. Performs a touch on the .ui_login file which disables the first-time login prompt to change your password, ensures that `splunk_home` is owned by the correct user and group, and optionally configures three scripts to: cleanup crash logs and old diags (by calling add_crashlog_script.yml and add_diag_script.yml, respectively), and a pstack generation shell script for troubleshooting purposes (by calling add_pstack_script.yml). This task will install various Linux troubleshooting utilities (by calling install_utilities.yml) when `install_utilities: true`. - **set_maintenance_mode.yml** - Enables or disables maintenance mode on a cluster manager. Intended to be called by playbooks for indexer cluster upgrades/maintenance. Requires the `state` variable to be defined. Valid values: enabled, disabled - **set_upgrade_state.yml** - Executes a splunk upgrade-{{ peer_state }} cluster-peers command on the cluster manager. This task can be used for upgrading indexer clusters with new minor and maintenance releases of Splunk (assuming you are at Splunk v7.1.0 or higher). Refer to https://docs.splunk.com/Documentation/Splunk/latest/Indexer/Searchablerollingupgrade for more information. -- **splunk_login.yml** - Authenticated to splunk. This will avoid having to pass `-auth` for every command that meeds authentication. This sets the `splunk_authenticated` variable to true. To include this in a task, you can set a conditional to only run it when `splunk_authenticated == false`. If included in a task that manually calls the `splunk_stop.yml`, `splunk_start.yml` or `splunk_restart.yml` task, the `splunk_authenticated will be reset to false. +- **splunk_login.yml** - Authenticated to splunk. This will avoid having to pass `-auth` for every command that meeds authentication. This sets the `splunk_authenticated` variable to true. To include this in a task, you can set a conditional to only run it when `splunk_authenticated == false`. If included in a task that manually calls the `splunk_stop.yml`, `splunk_start.yml` or `splunk_restart.yml` task, the `splunk_authenticated` will be reset to false. - **splunk_offline.yml** - Runs a splunk offline CLI command. Useful for bringing down indexers non-intrusively by allowing searches to complete before stopping splunk. - **splunk_restart.yml** - Restarts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. - **splunk_start.yml** - Starts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. From 2fe810b0e90a18ef5c8b304b79793d394a31cb6e Mon Sep 17 00:00:00 2001 From: David Twersky Date: Wed, 29 Mar 2023 12:59:08 -0400 Subject: [PATCH 07/19] fixed another typo in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4158c520..d813a75a 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ Note: Any task with an **adhoc** prefix means that it can be used independently - **post_install.yml** - Executes post-installation tasks. Performs a touch on the .ui_login file which disables the first-time login prompt to change your password, ensures that `splunk_home` is owned by the correct user and group, and optionally configures three scripts to: cleanup crash logs and old diags (by calling add_crashlog_script.yml and add_diag_script.yml, respectively), and a pstack generation shell script for troubleshooting purposes (by calling add_pstack_script.yml). This task will install various Linux troubleshooting utilities (by calling install_utilities.yml) when `install_utilities: true`. - **set_maintenance_mode.yml** - Enables or disables maintenance mode on a cluster manager. Intended to be called by playbooks for indexer cluster upgrades/maintenance. Requires the `state` variable to be defined. Valid values: enabled, disabled - **set_upgrade_state.yml** - Executes a splunk upgrade-{{ peer_state }} cluster-peers command on the cluster manager. This task can be used for upgrading indexer clusters with new minor and maintenance releases of Splunk (assuming you are at Splunk v7.1.0 or higher). Refer to https://docs.splunk.com/Documentation/Splunk/latest/Indexer/Searchablerollingupgrade for more information. -- **splunk_login.yml** - Authenticated to splunk. This will avoid having to pass `-auth` for every command that meeds authentication. This sets the `splunk_authenticated` variable to true. To include this in a task, you can set a conditional to only run it when `splunk_authenticated == false`. If included in a task that manually calls the `splunk_stop.yml`, `splunk_start.yml` or `splunk_restart.yml` task, the `splunk_authenticated` will be reset to false. +- **splunk_login.yml** - Authenticate to splunk. This will avoid having to pass `-auth` for every command that meeds authentication. This sets the `splunk_authenticated` variable to true. To include this in a task, you can set a conditional to only run it when `splunk_authenticated == false`. If included in a task that manually calls the `splunk_stop.yml`, `splunk_start.yml` or `splunk_restart.yml` task, the `splunk_authenticated` will be reset to false. - **splunk_offline.yml** - Runs a splunk offline CLI command. Useful for bringing down indexers non-intrusively by allowing searches to complete before stopping splunk. - **splunk_restart.yml** - Restarts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. - **splunk_start.yml** - Starts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. From 1fbac54c1d261e3c5f40c5f368bfe78ddc9175ec Mon Sep 17 00:00:00 2001 From: David Twersky Date: Wed, 29 Mar 2023 19:24:37 -0400 Subject: [PATCH 08/19] become_user to splunk for login --- roles/splunk/tasks/splunk_login.yml | 2 ++ roles/splunk/tasks/splunk_start.yml | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/roles/splunk/tasks/splunk_login.yml b/roles/splunk/tasks/splunk_login.yml index 8976239f..9edb3c85 100644 --- a/roles/splunk/tasks/splunk_login.yml +++ b/roles/splunk/tasks/splunk_login.yml @@ -4,6 +4,8 @@ register: splunk_authenticated failed_when: splunk_authenticated.rc != 0 changed_when: false + become: true + become_user: "{{ splunk_nix_user }}" no_log: true - name: Set Authenticated variable diff --git a/roles/splunk/tasks/splunk_start.yml b/roles/splunk/tasks/splunk_start.yml index 1efa1242..eef59379 100644 --- a/roles/splunk/tasks/splunk_start.yml +++ b/roles/splunk/tasks/splunk_start.yml @@ -8,4 +8,3 @@ - name: Reset splunk_authenticated variable ansible.builtin.set_fact: splunk_authenticated: false - From 3263c35509de7ece33be496ed9f6b0fbead62f94 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Thu, 30 Mar 2023 17:38:40 -0700 Subject: [PATCH 09/19] kvstore tools fixes - removed unused var - check that we can backup before we do - checks are changed_when false --- roles/splunk/tasks/adhoc_backup_kvstore.yml | 8 +++++++- roles/splunk/tasks/kvstore_upgrade.yml | 12 ++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/roles/splunk/tasks/adhoc_backup_kvstore.yml b/roles/splunk/tasks/adhoc_backup_kvstore.yml index 969aa305..2a67c675 100644 --- a/roles/splunk/tasks/adhoc_backup_kvstore.yml +++ b/roles/splunk/tasks/adhoc_backup_kvstore.yml @@ -3,6 +3,12 @@ include_tasks: splunk_login.yml when: not splunk_authenticated +- name: Check if we're okay to backup + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' + register: splunk_kvstore_pre_backup_status_out + until: "{{ splunk_kvstore_pre_backup_status_out.stdout }} == 'Ready'" + - name: Backup KVStore on desired host ansible.builtin.command: | {{ splunk_home }}/bin/splunk backup kvstore {{ archive_name | default("") }} @@ -13,7 +19,7 @@ failed_when: splunk_kvstore_backup_out.rc != 0 - name: Check that backup has finished - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' register: splunk_kvstore_status_out until: "{{ splunk_kvstore_status_out.stdout }} == 'Ready'" diff --git a/roles/splunk/tasks/kvstore_upgrade.yml b/roles/splunk/tasks/kvstore_upgrade.yml index e42bf6f3..bd5cadc7 100644 --- a/roles/splunk/tasks/kvstore_upgrade.yml +++ b/roles/splunk/tasks/kvstore_upgrade.yml @@ -1,11 +1,7 @@ --- - name: Check if authenticated include_tasks: splunk_login.yml - when: not splunk_authenticated - -- name: Set fact for kvstore Upgrade - ansible.builtin.set_fact: - splunk_upgrade_server: false + when: not splunk_authenticatedl - name: Check the current kvstore storage backend ansible.builtin.command: | @@ -13,7 +9,7 @@ become: true become_user: "{{ splunk_nix_user }}" register: splunk_kvstore_backend_out - changed_when: splunk_kvstore_backend_out.rc == 0 + changed_when: false failed_when: splunk_kvstore_backend_out.rc != 0 - name: Check the current kvstore server version @@ -22,7 +18,7 @@ become: true become_user: "{{ splunk_nix_user }}" register: splunk_current_server_version_out - changed_when: splunk_current_server_version_out.rc == 0 + changed_when: false failed_when: splunk_current_server_version_out.rc != 0 - name: Debug print kvstore backend engine @@ -112,7 +108,7 @@ ansible.builtin.command: | {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 -isDryRun true register: splunk_kvstore_version_check_out - changed_when: splunk_kvstore_version_check_out.rc == 0 + changed_when: false failed_when: splunk_kvstore_version_check_out.rc != 0 become: true become_user: "{{ splunk_nix_user }}" From 099913b00286ae9b205e8f68b67973d9efa81462 Mon Sep 17 00:00:00 2001 From: David Twersky Date: Fri, 31 Mar 2023 14:12:09 -0400 Subject: [PATCH 10/19] become and changed_when:false for Get current SHCluster captain --- roles/splunk/tasks/get_shcluster_captain.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/roles/splunk/tasks/get_shcluster_captain.yml b/roles/splunk/tasks/get_shcluster_captain.yml index 2d298596..bb68444f 100644 --- a/roles/splunk/tasks/get_shcluster_captain.yml +++ b/roles/splunk/tasks/get_shcluster_captain.yml @@ -8,8 +8,10 @@ ansible.builtin.command: | {{ splunk_home }}/bin/splunk show shcluster-status | grep -A6 Captain | grep mgmt_uri | sed -r 's/\s+mgmt_uri : //g' register: splunk_get_shcaptain - changed_when: splunk_get_shcaptain.rc == 0 + changed_when: false failed_when: splunk_get_shcaptain.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" - name: Register SHCluster captain fact ansible.builtin.set_fact: From 0abf12afe36586a34d659ac268b6d7eb750ad964 Mon Sep 17 00:00:00 2001 From: David Twersky Date: Fri, 31 Mar 2023 14:16:12 -0400 Subject: [PATCH 11/19] become and checked_when:false for Get current KVStore captain --- roles/splunk/tasks/get_kvstore_captain.yml | 4 +++- roles/splunk/tasks/get_shcluster_captain.yml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/roles/splunk/tasks/get_kvstore_captain.yml b/roles/splunk/tasks/get_kvstore_captain.yml index 28ff9d19..68b6202f 100644 --- a/roles/splunk/tasks/get_kvstore_captain.yml +++ b/roles/splunk/tasks/get_kvstore_captain.yml @@ -7,8 +7,10 @@ - name: Get current KVStore captain ansible.builtin.command: | {{ splunk_home }}/bin/splunk show kvstore-status | grep -B10 "KV store captain" | grep "hostAndPort" | sed -r 's/\s+hostAndPort : //g' | sed -r 's/:[0-9]+//g' + become: true + become_user: "{{ splunk_nix_user }}" register: splunk_get_kvcaptain - changed_when: splunk_get_kvcaptain.rc == 0 + changed_when: false failed_when: splunk_get_kvcaptain.rc != 0 - name: Register KVStore captain fact diff --git a/roles/splunk/tasks/get_shcluster_captain.yml b/roles/splunk/tasks/get_shcluster_captain.yml index bb68444f..17990309 100644 --- a/roles/splunk/tasks/get_shcluster_captain.yml +++ b/roles/splunk/tasks/get_shcluster_captain.yml @@ -7,11 +7,11 @@ - name: Get current SHCluster captain ansible.builtin.command: | {{ splunk_home }}/bin/splunk show shcluster-status | grep -A6 Captain | grep mgmt_uri | sed -r 's/\s+mgmt_uri : //g' + become: true + become_user: "{{ splunk_nix_user }}" register: splunk_get_shcaptain changed_when: false failed_when: splunk_get_shcaptain.rc != 0 - become: true - become_user: "{{ splunk_nix_user }}" - name: Register SHCluster captain fact ansible.builtin.set_fact: From 340a3b74776d7c9a0a05ad784e8f76a85c61e14e Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Mon, 3 Apr 2023 00:04:41 -0700 Subject: [PATCH 12/19] Using version var & cleaning upgrade conditionals --- roles/splunk/tasks/kvstore_upgrade.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/splunk/tasks/kvstore_upgrade.yml b/roles/splunk/tasks/kvstore_upgrade.yml index bd5cadc7..47b0a6b1 100644 --- a/roles/splunk/tasks/kvstore_upgrade.yml +++ b/roles/splunk/tasks/kvstore_upgrade.yml @@ -147,6 +147,6 @@ when: - splunk_enable_kvstore - splunk_kvstore_storage == "wiredTiger" + - splunk_kvstore_version is version(4.2, '>=') - "'full' in group_names" - - splunk_kvstore_backend_out.stdout != "wiredTiger" - - splunk_current_server_version_out.stdout is version(4.2 '<') + - splunk_kvstore_backend_out.stdout != "wiredTiger" or splunk_current_server_version_out.stdout is version(4.2, '<') From 360e7e4fe2dbc3500f0dae88bf77b5fd9361cfeb Mon Sep 17 00:00:00 2001 From: David Twersky Date: Mon, 3 Apr 2023 09:37:34 -0400 Subject: [PATCH 13/19] created block for task. added become to whole block --- roles/splunk/tasks/adhoc_backup_kvstore.yml | 43 ++++++++++--------- .../adhoc_destructive_resync_kvstore.yml | 43 +++++++++++-------- 2 files changed, 47 insertions(+), 39 deletions(-) diff --git a/roles/splunk/tasks/adhoc_backup_kvstore.yml b/roles/splunk/tasks/adhoc_backup_kvstore.yml index 2a67c675..d28c2a62 100644 --- a/roles/splunk/tasks/adhoc_backup_kvstore.yml +++ b/roles/splunk/tasks/adhoc_backup_kvstore.yml @@ -1,25 +1,28 @@ --- -- name: Check if authenticated - include_tasks: splunk_login.yml - when: not splunk_authenticated +- name: Adhoc KVStore Backup + block: + - name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated -- name: Check if we're okay to backup - ansible.builtin.command: | - {{ splunk_home }}/bin/splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' - register: splunk_kvstore_pre_backup_status_out - until: "{{ splunk_kvstore_pre_backup_status_out.stdout }} == 'Ready'" + - name: Check if we're okay to backup + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' + register: splunk_kvstore_pre_backup_status_out + until: "{{ splunk_kvstore_pre_backup_status_out.stdout }} == 'Ready'" + + - name: Backup KVStore on desired host + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk backup kvstore {{ archive_name | default("") }} + register: splunk_kvstore_backup_out + changed_when: splunk_kvstore_backup_out.rc == 0 + failed_when: splunk_kvstore_backup_out.rc != 0 + + - name: Check that backup has finished + ansible.builtin.shell: | + {{ splunk_home }}/bin/splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' + register: splunk_kvstore_status_out + until: "{{ splunk_kvstore_status_out.stdout }} == 'Ready'" -- name: Backup KVStore on desired host - ansible.builtin.command: | - {{ splunk_home }}/bin/splunk backup kvstore {{ archive_name | default("") }} become: true become_user: "{{ splunk_nix_user }}" - register: splunk_kvstore_backup_out - changed_when: splunk_kvstore_backup_out.rc == 0 - failed_when: splunk_kvstore_backup_out.rc != 0 - -- name: Check that backup has finished - ansible.builtin.shell: | - {{ splunk_home }}/bin/splunk show kvstore-status | grep backupRestoreStatus | sed -r 's/\s+backupRestoreStatus : //g' - register: splunk_kvstore_status_out - until: "{{ splunk_kvstore_status_out.stdout }} == 'Ready'" diff --git a/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml b/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml index 78b04d1f..39913417 100644 --- a/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml +++ b/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml @@ -1,24 +1,29 @@ --- -# We have to do this first so that we store the captain before removing from the cluster -- name: Check if authenticated - include_tasks: splunk_login.yml - when: not splunk_authenticated +- name: Destructive KVStore Resync + block: + - name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated -- name: Get SHCluster captain - include_tasks: get_shcluster_captain.yml + # We have to do this first so that we store the captain before removing from the cluster + - name: Get SHCluster captain + include_tasks: get_shcluster_captain.yml -- name: Remove SHCluster member - ansible.builtin.command: "{{ splunk_home }}/bin/splunk remove shcluster-member" - register: splunk_remove_shcluster_member - changed_when: splunk_remove_shcluster_member.rc == 0 - failed_when: splunk_remove_shcluster_member.rc != 0 + - name: Remove SHCluster member + ansible.builtin.command: "{{ splunk_home }}/bin/splunk remove shcluster-member" + register: splunk_remove_shcluster_member + changed_when: splunk_remove_shcluster_member.rc == 0 + failed_when: splunk_remove_shcluster_member.rc != 0 -- name: Clean KVStore - include_tasks: adhoc_clean_kvstore.yml + - name: Clean KVStore + include_tasks: adhoc_clean_kvstore.yml -- name: Add SHCluster member from current member - ansible.builtin.command: | - {{ splunk_home }}/bin/splunk add shcluster-member -current_member_uri {{ splunk_shc_captain }} - register: splunk_remove_shcluster_member - changed_when: splunk_remove_shcluster_member.rc == 0 - failed_when: splunk_remove_shcluster_member.rc != 0 + - name: Add SHCluster member from current member + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk add shcluster-member -current_member_uri {{ splunk_shc_captain }} + register: splunk_remove_shcluster_member + changed_when: splunk_remove_shcluster_member.rc == 0 + failed_when: splunk_remove_shcluster_member.rc != 0 + + become: true + become_user: "{{ splunk_nix_user }}" From 38a5c77277e1fbb4bac47e6bfbedc63f01ae89ce Mon Sep 17 00:00:00 2001 From: David Twersky Date: Mon, 3 Apr 2023 12:29:40 -0400 Subject: [PATCH 14/19] fixed splunk_authenticated typo. replaced command with shell --- roles/splunk/tasks/kvstore_upgrade.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/roles/splunk/tasks/kvstore_upgrade.yml b/roles/splunk/tasks/kvstore_upgrade.yml index 47b0a6b1..fd3e23ae 100644 --- a/roles/splunk/tasks/kvstore_upgrade.yml +++ b/roles/splunk/tasks/kvstore_upgrade.yml @@ -1,10 +1,10 @@ --- - name: Check if authenticated include_tasks: splunk_login.yml - when: not splunk_authenticatedl + when: not splunk_authenticated - name: Check the current kvstore storage backend - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk show kvstore-status | grep storageEngine | sed -r 's/\s+storageEngine : //g' become: true become_user: "{{ splunk_nix_user }}" @@ -13,7 +13,7 @@ failed_when: splunk_kvstore_backend_out.rc != 0 - name: Check the current kvstore server version - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk show kvstore-status | grep serverVersion | sed -r 's/\s+serverVersion : //g' become: true become_user: "{{ splunk_nix_user }}" @@ -52,7 +52,7 @@ become_user: "{{ splunk_nix_user }}" - name: Start storage engine migration on single instance - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk migrate kvstore-storage-engine --target-engine wiredTiger --enable-compression register: splunk_migration_single_early_out changed_when: splunk_migration_single_early_out.rc == 0 @@ -62,7 +62,7 @@ when: splunk_package_version is version(9.0, '<') - name: Perform >= 9.0 upgrade if necessary - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk migrate migrate-kvstore register: splunk_migration_single_early_out changed_when: splunk_migration_single_early_out.rc == 0 @@ -77,7 +77,7 @@ - name: Perform SHC KVStore upgrade block: - name: Perform SHC pre-migration Steps - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -isDryRun true register: splunk_shc_pre_steps_out changed_when: splunk_shc_pre_steps_out.rc == 0 @@ -86,7 +86,7 @@ become_user: "{{ splunk_nix_user }}" - name: Start Backend migration - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk start-shcluster-migration kvstore -storageEngine wiredTiger -clusterPerc 50 register: splunk_shc_kvstore_backend_migration_out changed_when: splunk_shc_kvstore_backend_migration_out.rc == 0 @@ -95,7 +95,7 @@ become_user: "{{ splunk_nix_user }}" - name: Make sure migration is successful - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk show shcluster-kvmigration-status | sed -r 's/\s+migrationStatus : //g' register: splunk_kvstore_migration_status_out changed_when: splunk_kvstore_migration_status_out.rc == 0 @@ -105,7 +105,7 @@ until: "{{ splunk_kvstore_migration_status_out.stdout }} == 'notStarted'" - name: Perform SHC pre-upgrade steps - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 -isDryRun true register: splunk_kvstore_version_check_out changed_when: false @@ -114,7 +114,7 @@ become_user: "{{ splunk_nix_user }}" - name: Start Version upgrade - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk start-shcluster-upgrade kvstore -version 4.2 register: splunk_kvstore_version_upgrade_out changed_when: splunk_kvstore_version_upgrade_out.rc == 0 @@ -123,7 +123,7 @@ become_user: "{{ splunk_nix_user }}" - name: Make sure upgrade is successful - ansible.builtin.command: | + ansible.builtin.shell: | {{ splunk_home }}/bin/splunk show kvstore-status --verbose | sed -r 's/\s+serverVersion : //g' register: splunk_kvstore_version_status_out changed_when: splunk_kvstore_version_status_out.rc == 0 From ffe30c4bc98f569071d1d52810d6a85292ef7653 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Fri, 28 Apr 2023 15:54:18 -0700 Subject: [PATCH 15/19] default values for when kvstore-status doesn't return serverVersion deleting destructive resync task --- .../adhoc_destructive_resync_kvstore.yml | 29 ------------------- roles/splunk/tasks/kvstore_upgrade.yml | 2 +- 2 files changed, 1 insertion(+), 30 deletions(-) delete mode 100644 roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml diff --git a/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml b/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml deleted file mode 100644 index 39913417..00000000 --- a/roles/splunk/tasks/adhoc_destructive_resync_kvstore.yml +++ /dev/null @@ -1,29 +0,0 @@ ---- -- name: Destructive KVStore Resync - block: - - name: Check if authenticated - include_tasks: splunk_login.yml - when: not splunk_authenticated - - # We have to do this first so that we store the captain before removing from the cluster - - name: Get SHCluster captain - include_tasks: get_shcluster_captain.yml - - - name: Remove SHCluster member - ansible.builtin.command: "{{ splunk_home }}/bin/splunk remove shcluster-member" - register: splunk_remove_shcluster_member - changed_when: splunk_remove_shcluster_member.rc == 0 - failed_when: splunk_remove_shcluster_member.rc != 0 - - - name: Clean KVStore - include_tasks: adhoc_clean_kvstore.yml - - - name: Add SHCluster member from current member - ansible.builtin.command: | - {{ splunk_home }}/bin/splunk add shcluster-member -current_member_uri {{ splunk_shc_captain }} - register: splunk_remove_shcluster_member - changed_when: splunk_remove_shcluster_member.rc == 0 - failed_when: splunk_remove_shcluster_member.rc != 0 - - become: true - become_user: "{{ splunk_nix_user }}" diff --git a/roles/splunk/tasks/kvstore_upgrade.yml b/roles/splunk/tasks/kvstore_upgrade.yml index fd3e23ae..b32f4ad3 100644 --- a/roles/splunk/tasks/kvstore_upgrade.yml +++ b/roles/splunk/tasks/kvstore_upgrade.yml @@ -149,4 +149,4 @@ - splunk_kvstore_storage == "wiredTiger" - splunk_kvstore_version is version(4.2, '>=') - "'full' in group_names" - - splunk_kvstore_backend_out.stdout != "wiredTiger" or splunk_current_server_version_out.stdout is version(4.2, '<') + - splunk_kvstore_backend_out.stdout != "wiredTiger" or splunk_current_server_version_out.stdout | default(3.6) is version(4.2, '<') From dcaaa7f2c9fbb50331dabeff1de35d338f4b4705 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:55:00 -0700 Subject: [PATCH 16/19] Change Oplog size based on support recommendations Get SHCluster and KVstore status as JSON blobs --- roles/splunk/tasks/adhoc_change_oplog_shc.yml | 98 +++++++++++++++++++ .../tasks/adhoc_increase_oplog_helper.yml | 33 +++++++ roles/splunk/tasks/get_kvstore_status.yml | 20 ++++ roles/splunk/tasks/get_shcluster_captain.yml | 4 +- roles/splunk/tasks/get_shcluster_status.yml | 20 ++++ 5 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 roles/splunk/tasks/adhoc_change_oplog_shc.yml create mode 100644 roles/splunk/tasks/adhoc_increase_oplog_helper.yml create mode 100644 roles/splunk/tasks/get_kvstore_status.yml create mode 100644 roles/splunk/tasks/get_shcluster_status.yml diff --git a/roles/splunk/tasks/adhoc_change_oplog_shc.yml b/roles/splunk/tasks/adhoc_change_oplog_shc.yml new file mode 100644 index 00000000..e703c878 --- /dev/null +++ b/roles/splunk/tasks/adhoc_change_oplog_shc.yml @@ -0,0 +1,98 @@ +--- +# oplog size should not be changed unless the oplog window is too small causing members to become stale - or is gradually shrinking. +# Do NOT use this on a standalone instance - oplog size does not matter for a standalone KV Store. +# Deployments should monitor the oplog window and react in time. If the window is already too small - KV Store may have to be re-crated with increased oplog size. + +- name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated + +- name: Make sure we're in an SHC + ansible.builin.fail: + msg: "SHC not found in group_names - detected group names are \"{{ group_names }}\". This play will only run on an SHC" + when: splunk_shc_target_group not in group_names + +# sets fact splunk_shc_captain +- name: Find SHC Captain + include_tasks: get_shcluster_captain.yml + +# GUID from SPLUNK_HOME/etc/instance.cfg, not just hostname +- name: Find KVStore Captain + block: + - name: Check if authenticated + include_tasks: splunk_login.yml + when: not splunk_authenticated + + # Gets KVStore captain hostname - like splunk_captain.domain.com + # guid for GUID, hostAndPort for host with port - like `| sed -r 's/\s+hostAndPort : //g' | sed -r 's/:[0-9]+//g'` + - name: Get current KVStore captain + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk show kvstore-status | grep -B10 "KV store captain" | grep "guid" | sed -r 's/\s+guid : //g' + become: true + become_user: "{{ splunk_nix_user }}" + register: splunk_get_kvcaptain + changed_when: false + failed_when: splunk_get_kvcaptain.rc != 0 + + - name: Register KVStore captain fact + ansible.builtin.set_fact: + splunk_kv_captain_guid: "{{ splunk_get_kvcaptain.stdout }}" + +- name: Make KVCaptain SHC captain + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk transfer shcluster-captain -mgmt_uri "https://{{ splunk_shc_captain }}:{{ splunkd_port }}" + register: transfer_captain_out + changed_when: transfer_captain_out.rc == 0 + failed_when: transfer_captain_out.rc != 0 + +- name: Get current KVCaptain + include_tasks: get_kvstore_captain.yml + +- name: Ensure SHC Captain and KV Captain are the same + include_tasks: get_shcluster_captain.yml + until: "'{{ splunk_shc_captain }}' == '{{ splunk_kv_captain }}'" + delay: 10 + retries: 30 + +- name: Make a backup of the whole kvstore located in $SPLUNK_DB/kvstore only on one member + include_tasks: adhoc_backup_kvstore.yml + run_once: true # this works here to run the entire task only once, whereas import_tasks would run this on all hosts + vars: + archive_name: "{{ inventory_hostname }}-preoplog-backup" +#- name: Make a backup of the whole KVStore directory ($SPLUNK_DB/kvstore) on only one member +# ansible.builtin.shell: | +# echo "{{ splunk_home }}/{{ inventory_hostname }}-preoplog-backup.tar.gz"; tar -czf {{ splunk_home }}/{{ inventory_hostname }}-preoplog-backup.tar.gz {{ splunk_db_path }}/kvstore +# register: kvstore_backup_out +# changed_when: kvstore_backup_out.rc == 0 +# failed_when: kvstore_backup_out.rc != 1 +# become: true +# run_once: true + +# Note - make this a separate task so that we can repeat it later for the final member +- name: For each of the other SHC cluster members - increase the oplog + include_tasks: adhoc_increase_oplog_helper.yml + when: splunk_shc_captain != inventory_hostname + +- name: Select new node to be SHC captain + ansible.builtin.set_fact: + splunk_new_shc_captain: "{% for h in groups[splunk_shc_target_group] %}https://{{ hostvars[h].ansible_fqdn }}:{{ splunkd_port }}{% if not loop.last and hostvars[h].ansible_fqdn != splunk_shc_captain %},{% endif %}{% endfor %}" # If you manage multiple SHCs, configure the var value in group_vars + + +- name: Transfer SHC captain to a different node + ansible.builtin.command: | + {{ splunk_home }}/bin/splunk transfer shcluster-captain -mgmt_uri "{{ splunk_new_shc_captain }}" + register: transfer_captain_out + changed_when: transfer_captain_out.rc == 0 + failed_when: transfer_captain_out.rc != 0 + +- name: Increase oplog on final member + include_tasks: adhoc_increase_oplog_helper.yml + when: splunk_shc_captain == inventory_hostname + +- name: Try to check data + ansible.builtin.debug: + msg: + - Check if the data is available - if something went wrong during the process + - use backup to restore the data. Backup is on this searchhead with this name={{ kvstore_backup_out.stdout }} + - If members are out of sync, resync the KVStore from the SHCluster captain {{ splunk_new_shc_captain }} + - with `splunk resync kvstore -source {{ splunk_kv_captain_guid }}` diff --git a/roles/splunk/tasks/adhoc_increase_oplog_helper.yml b/roles/splunk/tasks/adhoc_increase_oplog_helper.yml new file mode 100644 index 00000000..0a5e1ae3 --- /dev/null +++ b/roles/splunk/tasks/adhoc_increase_oplog_helper.yml @@ -0,0 +1,33 @@ +--- +- name: Stop Splunk + include_tasks: splunk_stop.yml + +- name: Clean KVStore + ansible.builtin.command: "{{ splunk_home }}/bin/splunk clean kvstore --local --answer-yes" + become: true + become_user: "{{ splunk_nix_user }}" + register: clean_result + changed_when: clean_result.rc == 0 + failed_when: clean_result.rc != 0 + +- name: Edit server.conf to increase the oplogSize setting + community.general.ini_file: + path: "{{ splunk_home }}/etc/system/local/server.conf" + section: kvstore + option: oplogSize + value: "{{ splunk_oplog_size }}" + owner: "{{ splunk_nix_user }}" + group: "{{ splunk_nix_group }}" + mode: 0644 + become: true + become_user: "{{ splunk_nix_user }}" + +- name: Start Splunk to trigger synchronisation + include_tasks: splunk_start.yml + +# sets fact splunk_kvstore_status_json +- name: Verify synchronisation with show kvstore-status + include_tasks: get_kvstore_status.yml + until: "'{{ splunk_kvstore_status_json.status }}' == 'ready'" + delay: 10 + retries: 30 diff --git a/roles/splunk/tasks/get_kvstore_status.yml b/roles/splunk/tasks/get_kvstore_status.yml new file mode 100644 index 00000000..510c0ed4 --- /dev/null +++ b/roles/splunk/tasks/get_kvstore_status.yml @@ -0,0 +1,20 @@ +--- +# This file gets our KVStore status from our current member as JSON +- name: Get KVStore status + ansible.builtin.shell: | + set -o pipefail + {{ splunk_home }}/bin/splunk show kvstore-status \ + | grep -A13 "This member:" \ + | tail -n +2 | sed -Er 's/^\s+//g' \ + | awk -F ' * : *' '{ printf "\"%s\":\"%s\",", $1, $2 }' \ + | sed 's/,$/}/' | sed 's/^/{/' + register: get_splunk_kvstore_status_out + failed_when: get_splunk_kvstore_status_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + +- name: Convert KVStore status to JSON + ansible.builtin.set_fact: + splunk_kvstore_status_json: "{{ get_splunk_kvstore_status_out.stdout_lines[0] | from_json }}" + +# output: {"date":"Tue Jul 21 16:42:24 2016","dateSec":"1466541744.143000","disabled":"0","guid":"6244DF36-D883-4D59-AHD3-1354FCB4BL91","oplogEndTimestamp":"Tue Jul 21 16:41:12 2016","oplogEndTimestampSec":"1466541672.000000","oplogStartTimestamp":"Tue Jul 21 16:34:55 2016","oplogStartTimestampSec":"1466541295.000000","port":"8191","replicaSet":"splunkrs","replicationStatus":"KV store captain","standalone":"0","status":"ready"} diff --git a/roles/splunk/tasks/get_shcluster_captain.yml b/roles/splunk/tasks/get_shcluster_captain.yml index 17990309..f6ba2594 100644 --- a/roles/splunk/tasks/get_shcluster_captain.yml +++ b/roles/splunk/tasks/get_shcluster_captain.yml @@ -1,12 +1,12 @@ --- -# Gets SHC captain management uri - like https://splunk_captain.example:8089 +# Gets SHC captain management domain - like splunk_captain.example - name: Check if authenticated include_tasks: splunk_login.yml when: not splunk_authenticated - name: Get current SHCluster captain ansible.builtin.command: | - {{ splunk_home }}/bin/splunk show shcluster-status | grep -A6 Captain | grep mgmt_uri | sed -r 's/\s+mgmt_uri : //g' + {{ splunk_home }}/bin/splunk show shcluster-status | grep -A6 Captain | grep mgmt_uri | sed -r 's/\s+mgmt_uri : //g' | sed -Er 's/http(s)?:\/\///' | sed -Er 's/:[0-9]+//' become: true become_user: "{{ splunk_nix_user }}" register: splunk_get_shcaptain diff --git a/roles/splunk/tasks/get_shcluster_status.yml b/roles/splunk/tasks/get_shcluster_status.yml new file mode 100644 index 00000000..efcdbff5 --- /dev/null +++ b/roles/splunk/tasks/get_shcluster_status.yml @@ -0,0 +1,20 @@ +--- +# This file gets our shcluster status from our current member as JSON +- name: Get shcluster status + ansible.builtin.shell: | + set -o pipefail + {{ splunk_home }}/bin/splunk show shcluster-status \ + | grep -A5 " {{ inventory_hostname }}" \ + | tail -n +2 | sed -Er 's/^\s+//g' \ + | awk -F ' * : *' '{ printf "\"%s\":\"%s\",", $1, $2 }' \ + | sed 's/,$/}/' | sed 's/^/{/' + register: get_splunk_shcluster_status_out + failed_when: get_splunk_shcluster_status_out.rc != 0 + become: true + become_user: "{{ splunk_nix_user }}" + +- name: Convert shcluster status to JSON + ansible.builtin.set_fact: + splunk_shcluster_status_json: "{{ get_splunk_shcluster_status_out.stdout_lines[0] | from_json }}" + +# output: {"label":"splunk-search.example.com","last_conf_replication":"Fri Mar 14 11:12:17 2024","mgmt_uri":"https://splunk-search.example.com:8089","mgmt_uri_alias":"https://10.1.1.5:8089","status":"Up"} From 50611983db619772245fa668f11f2b2b0c2247f6 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Tue, 19 Mar 2024 13:05:44 -0700 Subject: [PATCH 17/19] Check current oplog size against requested oplog size --- roles/splunk/tasks/adhoc_change_oplog_shc.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/roles/splunk/tasks/adhoc_change_oplog_shc.yml b/roles/splunk/tasks/adhoc_change_oplog_shc.yml index e703c878..8bcb1529 100644 --- a/roles/splunk/tasks/adhoc_change_oplog_shc.yml +++ b/roles/splunk/tasks/adhoc_change_oplog_shc.yml @@ -12,6 +12,24 @@ msg: "SHC not found in group_names - detected group names are \"{{ group_names }}\". This play will only run on an SHC" when: splunk_shc_target_group not in group_names +- name: Get current oplog size + ansible.builtin.shell: | + {{ splunk_home }}/bin/splunk btool server list kvstore | grep oplogSize | sed 's/[^0-9]*//g' + register: current_oplog_size_out + failed_when: current_oplog_size_out.rc != 0 + run_once: true + become: true + become_user: "{{ splunk_nix_user }}" + +- name: Debug current OpLog Size in MB + ansible.builtin.debug: + var: current_oplog_size_out.stdout + verbosity: 1 + +- name: Make sure the oplog size var differs from our current value, if they're the same, exit play + ansible.builtin.meta: end_play + when: current_oplog_size_out == splunk_oplog_size + # sets fact splunk_shc_captain - name: Find SHC Captain include_tasks: get_shcluster_captain.yml From 2fa341d8de55f66fdf8600b5df689c1f948e620e Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Tue, 19 Mar 2024 17:56:49 -0700 Subject: [PATCH 18/19] auth for statuses --- roles/splunk/tasks/get_kvstore_status.yml | 2 +- roles/splunk/tasks/get_shcluster_status.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/splunk/tasks/get_kvstore_status.yml b/roles/splunk/tasks/get_kvstore_status.yml index 510c0ed4..a43e23d0 100644 --- a/roles/splunk/tasks/get_kvstore_status.yml +++ b/roles/splunk/tasks/get_kvstore_status.yml @@ -3,7 +3,7 @@ - name: Get KVStore status ansible.builtin.shell: | set -o pipefail - {{ splunk_home }}/bin/splunk show kvstore-status \ + {{ splunk_home }}/bin/splunk show kvstore-status -auth {{ splunk_auth }} \ | grep -A13 "This member:" \ | tail -n +2 | sed -Er 's/^\s+//g' \ | awk -F ' * : *' '{ printf "\"%s\":\"%s\",", $1, $2 }' \ diff --git a/roles/splunk/tasks/get_shcluster_status.yml b/roles/splunk/tasks/get_shcluster_status.yml index efcdbff5..4a85871b 100644 --- a/roles/splunk/tasks/get_shcluster_status.yml +++ b/roles/splunk/tasks/get_shcluster_status.yml @@ -3,7 +3,7 @@ - name: Get shcluster status ansible.builtin.shell: | set -o pipefail - {{ splunk_home }}/bin/splunk show shcluster-status \ + {{ splunk_home }}/bin/splunk show shcluster-status -auth {{ splunk_auth }} \ | grep -A5 " {{ inventory_hostname }}" \ | tail -n +2 | sed -Er 's/^\s+//g' \ | awk -F ' * : *' '{ printf "\"%s\":\"%s\",", $1, $2 }' \ From 3f8d4d2a276f46e46115d0ef72c703fd4461ad09 Mon Sep 17 00:00:00 2001 From: arcsector <26469747+arcsector@users.noreply.github.com> Date: Thu, 23 Jan 2025 12:38:28 -0800 Subject: [PATCH 19/19] documenting oplog kv task --- README.md | 407 +++++++++++++++++++++++++++--------------------------- 1 file changed, 204 insertions(+), 203 deletions(-) diff --git a/README.md b/README.md index d813a75a..3942069c 100644 --- a/README.md +++ b/README.md @@ -1,203 +1,204 @@ -# ansible-role-for-splunk: An Ansible role for Splunk admins - -[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)  -[![GitHub release](https://img.shields.io/github/v/tag/splunk/ansible-role-for-splunk?sort=semver&label=Version)](https://github.com/splunk/ansible-role-for-splunk/releases) - -This repository contains Splunk's official Ansible role for performing Splunk administration of remote hosts over SSH. This role can manage Splunk Enterprise and Universal Forwarders that are on Linux-based platforms (CentOS/Redhat/Ubuntu/Amazon Linux/OpenSUSE), as well as deploy configurations from Git repositories. Example playbooks and inventory files are also provided to help new Ansible users make the most out of this project. - -ansible-role-for-splunk is used by the Splunk@Splunk team to manage Splunk's corporate deployment of Splunk. - ----- - -## Table of Contents - -1. [Purpose](#purpose) -1. [Getting Started](#getting-started) -1. [Extended Documentation](#extended-documentation) -1. [Frequently Asked Questions](#frequently-asked-questions) -1. [Support](#support) -1. [License](#license) - ----- - -## Purpose - -#### What is ansible-role-for-splunk? -ansible-role-for-splunk is a single Ansible role for deploying and administering production Splunk deployments. It supports all Splunk deployment roles (Universal Forwarder, Heavy Forwarder, Indexer, Search Head, Deployment Server, Cluster Master, SHC Deployer, DMC, License Master) as well as management of all apps and configurations (via git repositories). - -This codebase is used by the Splunk@Splunk team internally to manage our deployment, so it has been thoroughly vetted since it was first developed in late 2018. For more information about Ansible best practices, checkout [our related .conf20 session](https://conf.splunk.com/learn/session-catalog.html?search=TRU1537C) for this project. - -#### Design Philosophy -A few different design philosophies have been applied in the development of this project. - -First, ansible-role-for-splunk was designed under the "Don't Repeat Yourself (DRY)" philosophy. This means that the project contains minimal code redundancy. If you want to fork this project and change any functionality, you only need to update the code in one place. - -Second, ansible-role-for-splunk was designed to be idempotent. This means that if the system is already in the desired state that Ansible expects, it will not make any changes. This even applies to our app management code, which can update apps on search heads without modifying existing local/ files that may have been created through actions in Splunk Web. For example, if you want to upgrade an app on a search head, and your repository does not contain a local/ folder, Ansible will not touch the existing local/ folder on the search head. This is accomplished using the synchronize module. For more information on that, refer to the `configure_apps.yml` task description. - -Third, ansible-role-for-splunk was designed to manage all Splunk configurations as code. What do I mean by that? You're not going to find tasks for installing web certificates, templating indexes.conf, or managing every Splunk configuration possible. Instead, you will find that we have a generic configure_apps.yml task which can deploy any version of any git repository to any path under $SPLUNK_HOME on the hosts in your inventory. We believe that having all configurations in git repositories is the best way to perform version control and configuration management for Splunk deployments. That said, we've made a handful of exceptions: -1. Creation of the local splunk admin user. We are able to do this securely using ansible-vault to encrypt `splunk_admin_password` so that we can create a `user-seed.conf` during the initial installation. Please note that if you do not configure the `splunk_admin_password` variable with a new value, an admin account will not be created when deploying a new Splunk installation via `check_splunk.yml`. -1. Configuring deploymentclient.conf for Deployment Server (DS) clients. We realize that some environments may have hundreds of clientNames configured and that creating a git repository for each variation would be pretty inefficient. Therefore, we support configuring deploymentclient.conf for your Ansible-managed forwarders using variables. The current version is based on a single template that supports only the clientName and targetUri keys. However, this can be easily extended with additional variables (or static content) of your choosing. -1. Deployment of a new search head cluster. In order to initialize a new search head cluster, we cannot rely solely on creating backend files. Therefore, the role supports deploy a new search head cluster using provided variable values that are stored in your Ansible configurations (preferably via group_vars, although host_vars or inventory variables will also work). - -## Getting Started -Getting started with this role will requires you to: -1. Install Ansible (version >=v2.7 is supported and should work through v2.10) -1. Setup your inventory correctly -1. Configure the appropriate variables to describe the desired state of your environment -1. Create a playbook or leverage one of the included example playbooks that specifies the deployment_task you'd like to run - -#### Ansible Setup -Ansible only needs to be installed on the host that you want to use to manage your Splunk deployments. We recommend having a dedicated server that is used only for Ansible orchestration, but technically you can run Ansible from any host, including your laptop, as long as you have the network connectivity and credentials required to SSH into hosts that are in your Ansible inventory. -* [Ansible Installation Guide](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) -* [Ansible User Guide](https://docs.ansible.com/ansible/latest/user_guide/index.html) - -#### Inventory -The layout of your inventory is critical for the tasks included in ansible-role-for-splunk to run correctly. The "role" of your host is determined by it being a member of one or more inventory groups that define its Splunk role. Ansible expects each host to be a member of one of these groups and uses that membership to determine the package that should be used, the installation path, the default deployment path for app deployments, and several other things. The following group names are currently supported: -* full -* uf -* clustermanager -* deploymentserver -* indexer -* licensemaster -* search -* shdeployer -* dmc - -Note that in Ansible you may nest groups within groups, and groups within those groups, and so on. We depend on this heavily to differentiate a full Splunk installation vs a Universal Forwarder (UF) installation, and to map variables in group_vars to specific groups of hosts. You will see examples of this within the sample `inventory.yml` files that are included in the "environments" folder of this project. - -#### Variables -As proper usage of this role requires a thorough understanding of variables, familiarity with [Ansible variable precedence](https://docs.ansible.com/ansible/latest/user_guide/playbooks_variables.html#ansible-variable-precedence) is highly recommended. Almost all variables used in this role have been added to [roles/splunk/defaults/main.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/roles/splunk/defaults/main.yml) (lowest precendence) for reference. Default values of "unconfigured" are automatically ignored at the task level. - -Although a number of variables ship with this role, many of them automatically configure themselves when the play is executed. For example, during the upgrade check, the desired version of Splunk that you want to be at is based solely upon the value of `splunk_package_url_full` or `splunk_package_url_uf`. We extract the version and build numbers from the URL automagically, and then compare those values to the output of the "splunk version" command during the `check_splunk.yml` task to determine if an upgrade is required or not. - -There are a few variables that need to configure out of the box to use this role with your environment: - -``` -splunk_uri_lm - The URI for your license master (e.g. https://my_license_master:8089) -ansible_user - The username that you want Ansible to connect as for SSH access -ansible_ssh_private_key_file - The file path to the private key that the Ansible user should use for SSH access authentication -``` - -In addition, you may want to configure some of the optional variables that are mentioned in [roles/splunk/defaults/main.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/roles/splunk/defaults/main.yml) to manage things like splunk.secret, send Slack notifications, automatically install useful scripts or additional Linux packages, etc. For a full description of the configurable variables, refer to the comments in [roles/splunk/defaults/main.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/roles/splunk/defaults/main.yml) and be sure to read-up on the task descriptions in this README file. - -As of the v1.0.4 release for this role, an additional variable called `target_shc_group_name` must be defined in the host_vars for each SHC Deployer host. This variable tells Ansible which group of hosts in the inventory contain the SHC members that the SHC Deployer host is managing. This change improves the app deployment process for SHCs by performing a REST call to the first SH in the list from the inventory group whose name matches the value of `target_shc_group_name`. If the SHC is not in a ready state, then the play will halt and no changes will be made. It will also automatically grab the captain URI and use the captain as the deploy target for the `apply shcluster-bundle` handler. An example of how `target_shc_group_name` should be used has been included in the sample inventory at [environments/production/inventory.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/environments/production/inventory.yml). - -In order to use the app management functionality, you will need to configure the following additional variables: -``` -git_server: ssh://git@git.mydomain.com -git_key: ~/.ssh/mygit.key -git_project: FOO -git_version: bar -git_apps: - - name: my_app - version: master -``` -You will find additional examples in the included sample [group_vars](https://github.com/splunk/ansible-role-for-splunk/blob/master/environments/production/group_vars/deploymentserver.yml) and [host_vars](https://github.com/splunk/ansible-role-for-splunk/blob/master/environments/production/host_vars/my-shc-deployer.yml) files. Note that you may also specify `git_server`, `git_key`, `git_project`, and `git_version` within `git_apps` down to the repository (`name`) level. -You may also override the auto-configured `splunk_app_deploy_path` at the repository level as well. For example, to deploy apps to $SPLUNK_HOME/etc/apps on a deployment server rather than the default of $SPLUNK_HOME/etc/deployment-apps. If not set, configure_apps.yml will determine the app deployment path based on the host's group membership within the inventory. -**Tip:** If you only use one git server, you may want to define the `git_server` and related values in an all.yml group_var file. - -**Configure local splunk admin password at install** -``` -splunk_admin_username: youradminusername (optional, defaults to admin) -splunk_admin_password: yourpassword (required, but see note below about encryption) -``` - -**Note:** If you do not configure these 2 variables, new Splunk installations will be installed without an admin account present. This has no impact on upgrades to existing installations. - -**Configure splunk admin password for existing installations** -We recommend that the `splunk_admin_username` (if not using "admin) and `splunk_admin_password` variables be configured in either group_vars or host_vars. If you use the same username and/or password across your deployment, then an `all.yml` group_vars file is a great location. If you have different passwords for different hosts, then place these variables in a corresponding group_vars or host_vars file. You can then encrypt the password to use in-line with other unencrypted variables by using the following command: `ansible-vault encrypt_string --ask-vault-pass 'var_value_to_encrypt' --name 'splunk_admin_password'`. Once that is done, use either the `--ask-vault-pass` or `--vault-password-file` argument when running the playbook to have Ansible automatically decrypt the value for the play to use. - -#### Playbooks -The following example playbooks have been included in this project for your reference: -- **splunk_app_install.yml** - Install or upgrade apps on Splunk hosts using the configure_apps.yml task in the splunk role. Note that the apps you want to deploy should be defined in either host_vars or group_vars, along with a splunk_app_deploy_path. Refer to the documentation for app deployment for details. -- **splunk_install_or_upgrade.yml** - Install or upgrade Splunk (or Splunk UFs) on hosts using the check_splunk.yml task in the splunk role. -- **splunk_shc_deploy.yml** - Installs Splunk and initializes search head clustering on a shdeployer and group of hosts that will serve as a new search head cluster. -- **splunk_upgrade_full_stack.yml** - Example playbook that demonstrates how to upgrade an entire Splunk deployment with a single-site indexer cluster and a search head cluster using the splunk role. Note: This playbook does not upgrade forwarders, although you could easily add an extra play to do that. - -## Extended Documentation -This section contains additional reference documentation. ----- -#### Task File Descriptions - -- **add_crashlog_script.yml** - Installs a bash script and cron job that will automatically clean-up splunkd crash log files. By default, every night at midnight, it will find any crash logs that are more than 7 days old and will delete them. You may change how many days of crash logs are retained by editing the cleanup_crashlogs.sh.j2 template. -- **add_diag_script.yml** - Installs a bash script and cron job that will automatically clean-up splunk diag files. By default, every night at midnight, it will find any diags that are more than 30 days old and will delete them. You may change how many days of splunk diags are retained by editing the cleanup_diags.sh.j2 template. -- **add_pstack_script.yml** - Copies the genpstacks.sh script to $SPLUNK_HOME/genpstacks.sh. This file is useful to have on all of your Splunk servers for when Splunk Support asks you to capture pstacks. - -Note: Any task with an **adhoc** prefix means that it can be used independently as a `deployment_task` in a playbook. You can use the tasks to resolve various Splunk problems or perform one-time activities, such as decommissioning an indexer from an indexer cluster. - -- **adhoc_backup_kvstore.yml** - Backup your KVStore to a given point - use the var `archive_name` to specify a tar name other than the default. -- **adhoc_clean_dispatch.yml** - This task is intended to be used for restoring service to search heads should the dispatch directory become full. You should not need to use this task in a healthy environment, but it is at your disposal should the need arise. The task will stop splunk, remove all files in the dispatch directory, and then start splunk. -- **adhoc_clean_kvstore.yml** - Cleans the KVStore from all data, allowing it to pull the latest data from the KVStore captain - usually done when a KVStore is down, but Splunkd is still running fine. -- **adhoc_configure_hostname** - Configure a Splunk server's hostname using the value from inventory_hostname. It configures the system hostname, serverName in server.conf and host in inputs.conf. All Splunk configuration changes are made using the ini_file module, which will preserve any other existing configurations that may exist in server.conf and/or inputs.conf. -- **adhoc_decom_indexer.yml** - Executes a splunk offline --enforce-counts command. This is useful when decommissioning one or more indexers from an indexer cluster. -- **adhoc_destructive_resync_kvstore.yml** - Removes an SH Member from the cluster, cleans it's KVStore, then puts it back into the cluster. Usually used when SH Bundle and KV Bundle are out of sync for longer than a few hours. -- **adhoc_fix_mongo.yml** - Use when Splunk is in a stopped state to fix mongodb/kvstore issues. This task ensures that permissions are set correctly on mongo's splunk.key file and deletes mongod.lock if it exists. -- **adhoc_fix_server_certificate.yml** - Use to delete an expired server.pem and generate a new one (default certs). Useful if your server.pem certificate has expired and you are using Splunk's default certificate for splunkd. Note that default certificates present a security risk and that their use should be avoided, if possible. -- **adhoc_kill_splunkd.yml** - Some releases of Splunk have a "feature" that leaves zombie splunkd processes after a 'splunk stop'. Use this task after a 'splunk stop' to make sure that it's really stopped. Useful for upgrades on some of the 7.x releases, and automatically called by the upgrade_splunk.yml task. -- **check_splunk.yml** - Check if Splunk is installed. If Splunk is not installed, it will be installed on the host. If Splunk is already installed, the task will execute a "splunk version" command on the host, and then compare the version and build number of Splunk to the version and build number of the expected version of Splunk. Note that the expected version of Splunk does not need to be statically defined; The expected Splunk version and build are automatically extracted from the value of splunk_package_url_full or splunk_package_url_uf using Jinja regex filters. This task will work for both the Universal Forwarder and full Splunk Enterprise packages. You define which host uses what package by organizing it under the appropriate group ('full' or 'uf') in your Ansible inventory. -- **configure_apps.yml** - This task should be called directly from a playbook in order to deploy apps or configurations (from git repositories) to Splunk hosts. Tip: Add a this task to a playbook after the check_splunk.yml play. Doing so will perform a "install (or upgrade) and deploy apps" run, all in one playbook. -- **configure_authentication.yml** - Uses the template identified by the `splunk_authenticationconf` variable to install an authentication.conf file to $SPLUNK_HOME/etc/system/local/authentication.conf. We are including this task here since Ansible is able to securely deploy an authentication.conf configuration by using ansible-vault to encrypt sensitive values such as the value of the `ad_bind_password` variable. Note: If you are using a common splunk.secret file, you can omit this task and instead use configure_apps.yml to deploy an authentication.conf file from a Git repository containing an authentication.conf app with pre-hashed credentials. -- **configure_bash.yml** - Configures bashrc and bash_profile files for the splunk user. Please note that the templates included with this role will overwrite any existing files for the splunk user (if they exist). The templates will define a custom PS1 at the bash prompt, configure the $SPLUNK_HOME environment variable so that you can issue "splunk " without specifying the full path to the Splunk binary, and will enable auto-completion of Splunk CLI commands in bash. -- **configure_deploymentclient.yml** - Generates a new deploymentclient.conf file from the deploymentclient.conf.j2 template and installs it to $SPLUNK_HOME/etc/system/local/deploymentclient.conf. This task is included automatically during new installations when values have been configured for the `clientName` and `splunk_uri_ds` variables. -- **configure_dmc.yml** - Configures the DMC as an Indexer Peer in SH mode, adds hosts to the host as search peers, and configures the host MC in auto mode -- **configure_facl.yml** - Configure file system access control lists (FACLs) to allow the splunk user to read /var/log files and add the splunk user's group to /etc/audit/auditd.conf to read /var/log/audit/ directory. This allows the splunk user to read privileged files from a non-privileged system account. Note: This task is performed automatically during new installations when splunk is installed as a non-root user. -- **configure_idxc_manager.yml** - Configures a Splunk host to act as a manager node using `splunk_idxc_rf`, `splunk_idxc_sf`, `splunk_idxc_key`, and `splunk_idxc_label`. -- **configure_idxc_member.yml** - Configures a Splunk host as an indexer cluster member using `splunk_uri_cm`, `splunk_idxc_rep_port`, and `splunk_idxc_key`. -- **configure_idxc_sh.yml** - Configures a search head to join an existing indexer cluster using `splunk_uri_cm` and `splunk_idxc_key`. -- **configure_kvstore.yml** - Disables KVStore when disabled by `splunk_enable_kvstore` and sets vars related to KVStore in `server.conf` configured in the defaults, like `splunk_kvstore_storage` and `splunk_oplog_size` -- **configure_license.yml** - Configure the license group to the `splunk_license_group` variable defined. Default is `Trial`. Available values are "Trial, Free, Enterprise, Forwarder, Manager or Peer. If set to `Peer`, the `splunk_uri_lm` must be defined. Note: This could also be accomplished using configure_apps.yml with a git repository. -- **configure_os.yml** - Increases ulimits for the splunk user and disables Transparent Huge Pages (THP) per Splunk implementation best practices. -- **configure_serverclass.yml** - Generates a new serverclass.conf file from the serverclass.conf.j2 template and installs it to $SPLUNK_HOME/etc/system/local/serverclass.conf. -- **configure_shc_captain.yml** - Perform a `bootstrap shcluster-captain` using the server list provided in `splunk_shc_uri_list`. -- **configure_shc_deployer.yml** - Configures a Splunk host to act as a search head deployer by configuring the pass4SymmKey contained in `splunk_shc_key` and the shcluster_label contained in `splunk_shc_label`. -- **configure_shc_members.yml** - Initializes search head clustering on Splunk hosts that will be participating in a new search head cluster. Relies on the values of: `splunk_shc_key`, `splunk_shc_label`, `splunk_shc_deployer`, `splunk_shc_rf`, `splunk_shc_rep_port`, `splunkd_port`, `splunk_admin_username`, and `splunk_admin_password`. Be sure to review the default values for the role for these and configure them appropriately in your group_vars. -- **configure_splunk_forwarder_meta.yml** - Configures a new indexed field called splunk_forwarder and sets its default value to the value of `ansible_hostname`. Note that you will need to install a fields.conf on your search head(s) if you wish to use this custom indexed field. -- **configure_splunk_boot.yml** - Used during installation to automatically configure splunk boot-start to the desired state. This task can also be used to enable boot-start on an existing host that does not have it enabled, or to switch from init.d to systemd, or vice-versa. The desired boot-start method is determined using the boolean value of `splunk_use_initd` (true=initd, false=systemd). In addition it is also possible for splunk to create a polkit rule, if using systemd, that allows the `splunk_nix_user` to managed the splunk service without authentication. You may also set the `systemd_unit_full` or the `systemd_unit_uf` variables to customize the service name systemd will use. -- **configure_splunk_secret.yml** - Configures a common splunk.secret file from the files/authentication/splunk.secret so that pre-hashed passwords can be securely deployed. Note that changing splunk.secret will require re-encryption of any passwords that were encrypted using the previous splunk.secret since Splunk will no longer be able to decrypt them successfully. -- **configure_systemd.yml** - Updates Splunk's systemd file using best practices and tips from the community. Also allows Splunk to start successfully using systemd after an upgrade without the need to run `splunk ftr --accept-license`. -- **configure_thp.yml** - Installs a new systemd service (disable-thp) that disables THP for RedHat|CentOS systems 6.0+. This task is automatically called by the configure_os.yml task. Optionally, you can set `use_tuned_thp` to configure THP via `tuned` instead of a service. Default is `false`. Mote: Make sure your host does not require a specific `tuned` profile before applying this one. -- **download_and_unarchive.yml** - Downloads the appropriate Splunk package using `splunk_package_url` (derived automatically from the values of `splunk_package_url_full` or `splunk_package_url_uf` variables). The package is then installed to `splunk_install_path` (derived automatically in main.yml using the `splunk_install_path` and the host's membership of either a `uf` or `full` group in the inventory). - You can set if the download/unarchive process uses the Ansible host or if each host downloads and unarchives the package individually by setting `splunk_download_local`. - Default is `true` which will download the package to the Ansible host once and unarchive to each host from there. - If set to `false` the package will be downloaded and unarchived to each host individually. Immediately after unarchive the package will be removed from the host. -- **get_kvstore_captain.yml** - Gets the current captain in the KVStore cluster. -- **get_shcluster_captain.yml** - Gets the current captain in the SHCluster. -- **install_apps.yml** - *Do not call install_apps.yml directly! Use configure_apps.yml* - Called by configure_apps.yml to perform app installation on the Splunk host. -- **install_splunk.yml** - *Do not call install_splunk.yml directly! Use check_splunk.yml* - Called by check_splunk.yml to install/upgrade Splunk and Splunk Universal Forwarders, as well as perform any initial configurations. This task is called by check_splunk.yml when the check determines that Splunk is not currently installed. This task will create the splunk user and splunk group, configure the bash profile for the splunk user (by calling configure_bash.yml), configure THP and ulimits (by calling configure_os.ym), download and install the appropriate Splunk package (by calling download_and_unarchive.yml), configure a common splunk.secret (by calling configure_splunk_secret.yml, if configure_secret is defined), create a deploymentclient.conf file with the splunk_ds_uri and clientName (by calling configure_deploymentclient.yml, if clientName is defined), install a user-seed.conf with a prehashed admin password (if used_seed is defined), and will then call the post_install.yml task. See post_install.yml entry for details on post-installation tasks. -- **install_utilities.yml** - Installs Linux packages that are useful for troubleshooting Splunk-related issues when `install_utilities: true` and `linux_packages` is defined with a list of packages to install. -- **configure_dmesg.yml** - Some distros restrict access to read `dmesg` for non-root users. This allows the `splunk` user to run the `dmesg` command. Defaults to `false`. -- **kvstore_upgrade.yml** - Upgrades a KVStore storage backend and/or server version on either a single or distributed instance. -- **main.yml** - This is the main task that will always be called when executing this role. This task sets the appropriate variables for full vs uf packages, sends a Slack notification about the play if the slack_token and slack_channel are defined, checks the current boot-start configuration to determine if it's in the expected state, and then includes the task from the role to execute against, as defined by the value of the deployment_task variable. The deployment_task variable should be defined in your playbook(s). Refer to the included example playbooks to see this in action. -- **post_install.yml** - Executes post-installation tasks. Performs a touch on the .ui_login file which disables the first-time login prompt to change your password, ensures that `splunk_home` is owned by the correct user and group, and optionally configures three scripts to: cleanup crash logs and old diags (by calling add_crashlog_script.yml and add_diag_script.yml, respectively), and a pstack generation shell script for troubleshooting purposes (by calling add_pstack_script.yml). This task will install various Linux troubleshooting utilities (by calling install_utilities.yml) when `install_utilities: true`. -- **set_maintenance_mode.yml** - Enables or disables maintenance mode on a cluster manager. Intended to be called by playbooks for indexer cluster upgrades/maintenance. Requires the `state` variable to be defined. Valid values: enabled, disabled -- **set_upgrade_state.yml** - Executes a splunk upgrade-{{ peer_state }} cluster-peers command on the cluster manager. This task can be used for upgrading indexer clusters with new minor and maintenance releases of Splunk (assuming you are at Splunk v7.1.0 or higher). Refer to https://docs.splunk.com/Documentation/Splunk/latest/Indexer/Searchablerollingupgrade for more information. -- **splunk_login.yml** - Authenticate to splunk. This will avoid having to pass `-auth` for every command that meeds authentication. This sets the `splunk_authenticated` variable to true. To include this in a task, you can set a conditional to only run it when `splunk_authenticated == false`. If included in a task that manually calls the `splunk_stop.yml`, `splunk_start.yml` or `splunk_restart.yml` task, the `splunk_authenticated` will be reset to false. -- **splunk_offline.yml** - Runs a splunk offline CLI command. Useful for bringing down indexers non-intrusively by allowing searches to complete before stopping splunk. -- **splunk_restart.yml** - Restarts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. -- **splunk_start.yml** - Starts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. -- **splunk_stop.yml** - Stops splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. -- **upgrade_splunk.yml** - *Do not call upgrade_splunk.yml directly! Use check_splunk.yml* - Called by check_splunk.yml. Performs an upgrade of an existing splunk installation. Configures .bash_profile and .bashrc for splunk user (by calling configure_bash.yml), disables THP and increases ulimits (by calling configure_os.yml), kills any stale splunkd processes present when `splunk_force_kill` is set to `True` (by calling adhoc_kill_splunkd.yml). Note: You should NOT run the upgrade_splunk.yml task directly from a playbook. check_splunk.yml will call upgrade_splunk.yml if it determines that an upgrade is needed; It will then download and unarchive the new version of Splunk (by calling download_and_unarchive.yml), ensure that mongod is in a good stopped state (by calling adhoc_fix_mongo.yml), and will then perform post-installation tasks using the post_install.yml task. - -## Frequently Asked Questions -**Q:** What is the difference between this and splunk-ansible? - -**A:** The splunk-ansible project was built for the docker-splunk project, which is a completely different use case. The way that docker-splunk works is by spinning-up an image that already has splunk-ansible inside of it, and then any arguments provided to Docker are passed into splunk-ansible so that it can run locally inside of the container to install and configure Splunk there. While it's a cool use case, we didn't feel that splunk-ansible met our needs as Splunk administrators to manage production Splunk deployments, so we wrote our own. -## - -**Q:** When using configure_apps.yml, the play fails on the synchronize module. What gives? - -**A:** This is due to a [known Ansible bug](https://github.com/ansible/ansible/issues/56629) related to password-based authentication. To workaround this issue, use a key pair for SSH authentication instead by setting the `ansible_user` and `ansible_ssh_private_key_file` variables. -## - -## Support -If you have questions or need support, you can: - -* Use the [GitHub issue tracker](https://github.com/splunk/splunk-ansible/issues) to submit bugs or request features. -* Post a question to [Splunk Answers](http://answers.splunk.com). -* Join the #ansible channel on [Splunk-Usergroups Slack](https://docs.splunk.com/Documentation/Community/1.0/community/Chat#Join_us_on_Slack). -* Please do not file cases in the Splunk support portal related to this project, as they will not be able to help you. - -## License -Copyright 2018-2021 Splunk. - -Distributed under the terms of the Apache 2.0 license, ansible-role-for-splunk is free and open-source software. +# ansible-role-for-splunk: An Ansible role for Splunk admins + +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)  +[![GitHub release](https://img.shields.io/github/v/tag/splunk/ansible-role-for-splunk?sort=semver&label=Version)](https://github.com/splunk/ansible-role-for-splunk/releases) + +This repository contains Splunk's official Ansible role for performing Splunk administration of remote hosts over SSH. This role can manage Splunk Enterprise and Universal Forwarders that are on Linux-based platforms (CentOS/Redhat/Ubuntu/Amazon Linux/OpenSUSE), as well as deploy configurations from Git repositories. Example playbooks and inventory files are also provided to help new Ansible users make the most out of this project. + +ansible-role-for-splunk is used by the Splunk@Splunk team to manage Splunk's corporate deployment of Splunk. + +---- + +## Table of Contents + +1. [Purpose](#purpose) +1. [Getting Started](#getting-started) +1. [Extended Documentation](#extended-documentation) +1. [Frequently Asked Questions](#frequently-asked-questions) +1. [Support](#support) +1. [License](#license) + +---- + +## Purpose + +#### What is ansible-role-for-splunk? +ansible-role-for-splunk is a single Ansible role for deploying and administering production Splunk deployments. It supports all Splunk deployment roles (Universal Forwarder, Heavy Forwarder, Indexer, Search Head, Deployment Server, Cluster Master, SHC Deployer, DMC, License Master) as well as management of all apps and configurations (via git repositories). + +This codebase is used by the Splunk@Splunk team internally to manage our deployment, so it has been thoroughly vetted since it was first developed in late 2018. For more information about Ansible best practices, checkout [our related .conf20 session](https://conf.splunk.com/learn/session-catalog.html?search=TRU1537C) for this project. + +#### Design Philosophy +A few different design philosophies have been applied in the development of this project. + +First, ansible-role-for-splunk was designed under the "Don't Repeat Yourself (DRY)" philosophy. This means that the project contains minimal code redundancy. If you want to fork this project and change any functionality, you only need to update the code in one place. + +Second, ansible-role-for-splunk was designed to be idempotent. This means that if the system is already in the desired state that Ansible expects, it will not make any changes. This even applies to our app management code, which can update apps on search heads without modifying existing local/ files that may have been created through actions in Splunk Web. For example, if you want to upgrade an app on a search head, and your repository does not contain a local/ folder, Ansible will not touch the existing local/ folder on the search head. This is accomplished using the synchronize module. For more information on that, refer to the `configure_apps.yml` task description. + +Third, ansible-role-for-splunk was designed to manage all Splunk configurations as code. What do I mean by that? You're not going to find tasks for installing web certificates, templating indexes.conf, or managing every Splunk configuration possible. Instead, you will find that we have a generic configure_apps.yml task which can deploy any version of any git repository to any path under $SPLUNK_HOME on the hosts in your inventory. We believe that having all configurations in git repositories is the best way to perform version control and configuration management for Splunk deployments. That said, we've made a handful of exceptions: +1. Creation of the local splunk admin user. We are able to do this securely using ansible-vault to encrypt `splunk_admin_password` so that we can create a `user-seed.conf` during the initial installation. Please note that if you do not configure the `splunk_admin_password` variable with a new value, an admin account will not be created when deploying a new Splunk installation via `check_splunk.yml`. +1. Configuring deploymentclient.conf for Deployment Server (DS) clients. We realize that some environments may have hundreds of clientNames configured and that creating a git repository for each variation would be pretty inefficient. Therefore, we support configuring deploymentclient.conf for your Ansible-managed forwarders using variables. The current version is based on a single template that supports only the clientName and targetUri keys. However, this can be easily extended with additional variables (or static content) of your choosing. +1. Deployment of a new search head cluster. In order to initialize a new search head cluster, we cannot rely solely on creating backend files. Therefore, the role supports deploy a new search head cluster using provided variable values that are stored in your Ansible configurations (preferably via group_vars, although host_vars or inventory variables will also work). + +## Getting Started +Getting started with this role will requires you to: +1. Install Ansible (version >=v2.7 is supported and should work through v2.10) +1. Setup your inventory correctly +1. Configure the appropriate variables to describe the desired state of your environment +1. Create a playbook or leverage one of the included example playbooks that specifies the deployment_task you'd like to run + +#### Ansible Setup +Ansible only needs to be installed on the host that you want to use to manage your Splunk deployments. We recommend having a dedicated server that is used only for Ansible orchestration, but technically you can run Ansible from any host, including your laptop, as long as you have the network connectivity and credentials required to SSH into hosts that are in your Ansible inventory. +* [Ansible Installation Guide](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html) +* [Ansible User Guide](https://docs.ansible.com/ansible/latest/user_guide/index.html) + +#### Inventory +The layout of your inventory is critical for the tasks included in ansible-role-for-splunk to run correctly. The "role" of your host is determined by it being a member of one or more inventory groups that define its Splunk role. Ansible expects each host to be a member of one of these groups and uses that membership to determine the package that should be used, the installation path, the default deployment path for app deployments, and several other things. The following group names are currently supported: +* full +* uf +* clustermanager +* deploymentserver +* indexer +* licensemaster +* search +* shdeployer +* dmc + +Note that in Ansible you may nest groups within groups, and groups within those groups, and so on. We depend on this heavily to differentiate a full Splunk installation vs a Universal Forwarder (UF) installation, and to map variables in group_vars to specific groups of hosts. You will see examples of this within the sample `inventory.yml` files that are included in the "environments" folder of this project. + +#### Variables +As proper usage of this role requires a thorough understanding of variables, familiarity with [Ansible variable precedence](https://docs.ansible.com/ansible/latest/user_guide/playbooks_variables.html#ansible-variable-precedence) is highly recommended. Almost all variables used in this role have been added to [roles/splunk/defaults/main.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/roles/splunk/defaults/main.yml) (lowest precendence) for reference. Default values of "unconfigured" are automatically ignored at the task level. + +Although a number of variables ship with this role, many of them automatically configure themselves when the play is executed. For example, during the upgrade check, the desired version of Splunk that you want to be at is based solely upon the value of `splunk_package_url_full` or `splunk_package_url_uf`. We extract the version and build numbers from the URL automagically, and then compare those values to the output of the "splunk version" command during the `check_splunk.yml` task to determine if an upgrade is required or not. + +There are a few variables that need to configure out of the box to use this role with your environment: + +``` +splunk_uri_lm - The URI for your license master (e.g. https://my_license_master:8089) +ansible_user - The username that you want Ansible to connect as for SSH access +ansible_ssh_private_key_file - The file path to the private key that the Ansible user should use for SSH access authentication +``` + +In addition, you may want to configure some of the optional variables that are mentioned in [roles/splunk/defaults/main.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/roles/splunk/defaults/main.yml) to manage things like splunk.secret, send Slack notifications, automatically install useful scripts or additional Linux packages, etc. For a full description of the configurable variables, refer to the comments in [roles/splunk/defaults/main.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/roles/splunk/defaults/main.yml) and be sure to read-up on the task descriptions in this README file. + +As of the v1.0.4 release for this role, an additional variable called `target_shc_group_name` must be defined in the host_vars for each SHC Deployer host. This variable tells Ansible which group of hosts in the inventory contain the SHC members that the SHC Deployer host is managing. This change improves the app deployment process for SHCs by performing a REST call to the first SH in the list from the inventory group whose name matches the value of `target_shc_group_name`. If the SHC is not in a ready state, then the play will halt and no changes will be made. It will also automatically grab the captain URI and use the captain as the deploy target for the `apply shcluster-bundle` handler. An example of how `target_shc_group_name` should be used has been included in the sample inventory at [environments/production/inventory.yml](https://github.com/splunk/ansible-role-for-splunk/blob/master/environments/production/inventory.yml). + +In order to use the app management functionality, you will need to configure the following additional variables: +``` +git_server: ssh://git@git.mydomain.com +git_key: ~/.ssh/mygit.key +git_project: FOO +git_version: bar +git_apps: + - name: my_app + version: master +``` +You will find additional examples in the included sample [group_vars](https://github.com/splunk/ansible-role-for-splunk/blob/master/environments/production/group_vars/deploymentserver.yml) and [host_vars](https://github.com/splunk/ansible-role-for-splunk/blob/master/environments/production/host_vars/my-shc-deployer.yml) files. Note that you may also specify `git_server`, `git_key`, `git_project`, and `git_version` within `git_apps` down to the repository (`name`) level. +You may also override the auto-configured `splunk_app_deploy_path` at the repository level as well. For example, to deploy apps to $SPLUNK_HOME/etc/apps on a deployment server rather than the default of $SPLUNK_HOME/etc/deployment-apps. If not set, configure_apps.yml will determine the app deployment path based on the host's group membership within the inventory. +**Tip:** If you only use one git server, you may want to define the `git_server` and related values in an all.yml group_var file. + +**Configure local splunk admin password at install** +``` +splunk_admin_username: youradminusername (optional, defaults to admin) +splunk_admin_password: yourpassword (required, but see note below about encryption) +``` + +**Note:** If you do not configure these 2 variables, new Splunk installations will be installed without an admin account present. This has no impact on upgrades to existing installations. + +**Configure splunk admin password for existing installations** +We recommend that the `splunk_admin_username` (if not using "admin) and `splunk_admin_password` variables be configured in either group_vars or host_vars. If you use the same username and/or password across your deployment, then an `all.yml` group_vars file is a great location. If you have different passwords for different hosts, then place these variables in a corresponding group_vars or host_vars file. You can then encrypt the password to use in-line with other unencrypted variables by using the following command: `ansible-vault encrypt_string --ask-vault-pass 'var_value_to_encrypt' --name 'splunk_admin_password'`. Once that is done, use either the `--ask-vault-pass` or `--vault-password-file` argument when running the playbook to have Ansible automatically decrypt the value for the play to use. + +#### Playbooks +The following example playbooks have been included in this project for your reference: +- **splunk_app_install.yml** - Install or upgrade apps on Splunk hosts using the configure_apps.yml task in the splunk role. Note that the apps you want to deploy should be defined in either host_vars or group_vars, along with a splunk_app_deploy_path. Refer to the documentation for app deployment for details. +- **splunk_install_or_upgrade.yml** - Install or upgrade Splunk (or Splunk UFs) on hosts using the check_splunk.yml task in the splunk role. +- **splunk_shc_deploy.yml** - Installs Splunk and initializes search head clustering on a shdeployer and group of hosts that will serve as a new search head cluster. +- **splunk_upgrade_full_stack.yml** - Example playbook that demonstrates how to upgrade an entire Splunk deployment with a single-site indexer cluster and a search head cluster using the splunk role. Note: This playbook does not upgrade forwarders, although you could easily add an extra play to do that. + +## Extended Documentation +This section contains additional reference documentation. +---- +#### Task File Descriptions + +- **add_crashlog_script.yml** - Installs a bash script and cron job that will automatically clean-up splunkd crash log files. By default, every night at midnight, it will find any crash logs that are more than 7 days old and will delete them. You may change how many days of crash logs are retained by editing the cleanup_crashlogs.sh.j2 template. +- **add_diag_script.yml** - Installs a bash script and cron job that will automatically clean-up splunk diag files. By default, every night at midnight, it will find any diags that are more than 30 days old and will delete them. You may change how many days of splunk diags are retained by editing the cleanup_diags.sh.j2 template. +- **add_pstack_script.yml** - Copies the genpstacks.sh script to $SPLUNK_HOME/genpstacks.sh. This file is useful to have on all of your Splunk servers for when Splunk Support asks you to capture pstacks. + +Note: Any task with an **adhoc** prefix means that it can be used independently as a `deployment_task` in a playbook. You can use the tasks to resolve various Splunk problems or perform one-time activities, such as decommissioning an indexer from an indexer cluster. + +- **adhoc_backup_kvstore.yml** - Backup your KVStore to a given point - use the var `archive_name` to specify a tar name other than the default. +- **adhoc_change_oplog_shc.yml** - Changes the oplog for your SHC to a specific number. Uses the defaults var `splunk_oplog_size` to determine whether to modify the value or not. +- **adhoc_clean_dispatch.yml** - This task is intended to be used for restoring service to search heads should the dispatch directory become full. You should not need to use this task in a healthy environment, but it is at your disposal should the need arise. The task will stop splunk, remove all files in the dispatch directory, and then start splunk. +- **adhoc_clean_kvstore.yml** - Cleans the KVStore from all data, allowing it to pull the latest data from the KVStore captain - usually done when a KVStore is down, but Splunkd is still running fine. +- **adhoc_configure_hostname** - Configure a Splunk server's hostname using the value from inventory_hostname. It configures the system hostname, serverName in server.conf and host in inputs.conf. All Splunk configuration changes are made using the ini_file module, which will preserve any other existing configurations that may exist in server.conf and/or inputs.conf. +- **adhoc_decom_indexer.yml** - Executes a splunk offline --enforce-counts command. This is useful when decommissioning one or more indexers from an indexer cluster. +- **adhoc_destructive_resync_kvstore.yml** - Removes an SH Member from the cluster, cleans it's KVStore, then puts it back into the cluster. Usually used when SH Bundle and KV Bundle are out of sync for longer than a few hours. +- **adhoc_fix_mongo.yml** - Use when Splunk is in a stopped state to fix mongodb/kvstore issues. This task ensures that permissions are set correctly on mongo's splunk.key file and deletes mongod.lock if it exists. +- **adhoc_fix_server_certificate.yml** - Use to delete an expired server.pem and generate a new one (default certs). Useful if your server.pem certificate has expired and you are using Splunk's default certificate for splunkd. Note that default certificates present a security risk and that their use should be avoided, if possible. +- **adhoc_kill_splunkd.yml** - Some releases of Splunk have a "feature" that leaves zombie splunkd processes after a 'splunk stop'. Use this task after a 'splunk stop' to make sure that it's really stopped. Useful for upgrades on some of the 7.x releases, and automatically called by the upgrade_splunk.yml task. +- **check_splunk.yml** - Check if Splunk is installed. If Splunk is not installed, it will be installed on the host. If Splunk is already installed, the task will execute a "splunk version" command on the host, and then compare the version and build number of Splunk to the version and build number of the expected version of Splunk. Note that the expected version of Splunk does not need to be statically defined; The expected Splunk version and build are automatically extracted from the value of splunk_package_url_full or splunk_package_url_uf using Jinja regex filters. This task will work for both the Universal Forwarder and full Splunk Enterprise packages. You define which host uses what package by organizing it under the appropriate group ('full' or 'uf') in your Ansible inventory. +- **configure_apps.yml** - This task should be called directly from a playbook in order to deploy apps or configurations (from git repositories) to Splunk hosts. Tip: Add a this task to a playbook after the check_splunk.yml play. Doing so will perform a "install (or upgrade) and deploy apps" run, all in one playbook. +- **configure_authentication.yml** - Uses the template identified by the `splunk_authenticationconf` variable to install an authentication.conf file to $SPLUNK_HOME/etc/system/local/authentication.conf. We are including this task here since Ansible is able to securely deploy an authentication.conf configuration by using ansible-vault to encrypt sensitive values such as the value of the `ad_bind_password` variable. Note: If you are using a common splunk.secret file, you can omit this task and instead use configure_apps.yml to deploy an authentication.conf file from a Git repository containing an authentication.conf app with pre-hashed credentials. +- **configure_bash.yml** - Configures bashrc and bash_profile files for the splunk user. Please note that the templates included with this role will overwrite any existing files for the splunk user (if they exist). The templates will define a custom PS1 at the bash prompt, configure the $SPLUNK_HOME environment variable so that you can issue "splunk " without specifying the full path to the Splunk binary, and will enable auto-completion of Splunk CLI commands in bash. +- **configure_deploymentclient.yml** - Generates a new deploymentclient.conf file from the deploymentclient.conf.j2 template and installs it to $SPLUNK_HOME/etc/system/local/deploymentclient.conf. This task is included automatically during new installations when values have been configured for the `clientName` and `splunk_uri_ds` variables. +- **configure_dmc.yml** - Configures the DMC as an Indexer Peer in SH mode, adds hosts to the host as search peers, and configures the host MC in auto mode +- **configure_facl.yml** - Configure file system access control lists (FACLs) to allow the splunk user to read /var/log files and add the splunk user's group to /etc/audit/auditd.conf to read /var/log/audit/ directory. This allows the splunk user to read privileged files from a non-privileged system account. Note: This task is performed automatically during new installations when splunk is installed as a non-root user. +- **configure_idxc_manager.yml** - Configures a Splunk host to act as a manager node using `splunk_idxc_rf`, `splunk_idxc_sf`, `splunk_idxc_key`, and `splunk_idxc_label`. +- **configure_idxc_member.yml** - Configures a Splunk host as an indexer cluster member using `splunk_uri_cm`, `splunk_idxc_rep_port`, and `splunk_idxc_key`. +- **configure_idxc_sh.yml** - Configures a search head to join an existing indexer cluster using `splunk_uri_cm` and `splunk_idxc_key`. +- **configure_kvstore.yml** - Disables KVStore when disabled by `splunk_enable_kvstore` and sets vars related to KVStore in `server.conf` configured in the defaults, like `splunk_kvstore_storage` and `splunk_oplog_size` +- **configure_license.yml** - Configure the license group to the `splunk_license_group` variable defined. Default is `Trial`. Available values are "Trial, Free, Enterprise, Forwarder, Manager or Peer. If set to `Peer`, the `splunk_uri_lm` must be defined. Note: This could also be accomplished using configure_apps.yml with a git repository. +- **configure_os.yml** - Increases ulimits for the splunk user and disables Transparent Huge Pages (THP) per Splunk implementation best practices. +- **configure_serverclass.yml** - Generates a new serverclass.conf file from the serverclass.conf.j2 template and installs it to $SPLUNK_HOME/etc/system/local/serverclass.conf. +- **configure_shc_captain.yml** - Perform a `bootstrap shcluster-captain` using the server list provided in `splunk_shc_uri_list`. +- **configure_shc_deployer.yml** - Configures a Splunk host to act as a search head deployer by configuring the pass4SymmKey contained in `splunk_shc_key` and the shcluster_label contained in `splunk_shc_label`. +- **configure_shc_members.yml** - Initializes search head clustering on Splunk hosts that will be participating in a new search head cluster. Relies on the values of: `splunk_shc_key`, `splunk_shc_label`, `splunk_shc_deployer`, `splunk_shc_rf`, `splunk_shc_rep_port`, `splunkd_port`, `splunk_admin_username`, and `splunk_admin_password`. Be sure to review the default values for the role for these and configure them appropriately in your group_vars. +- **configure_splunk_forwarder_meta.yml** - Configures a new indexed field called splunk_forwarder and sets its default value to the value of `ansible_hostname`. Note that you will need to install a fields.conf on your search head(s) if you wish to use this custom indexed field. +- **configure_splunk_boot.yml** - Used during installation to automatically configure splunk boot-start to the desired state. This task can also be used to enable boot-start on an existing host that does not have it enabled, or to switch from init.d to systemd, or vice-versa. The desired boot-start method is determined using the boolean value of `splunk_use_initd` (true=initd, false=systemd). In addition it is also possible for splunk to create a polkit rule, if using systemd, that allows the `splunk_nix_user` to managed the splunk service without authentication. You may also set the `systemd_unit_full` or the `systemd_unit_uf` variables to customize the service name systemd will use. +- **configure_splunk_secret.yml** - Configures a common splunk.secret file from the files/authentication/splunk.secret so that pre-hashed passwords can be securely deployed. Note that changing splunk.secret will require re-encryption of any passwords that were encrypted using the previous splunk.secret since Splunk will no longer be able to decrypt them successfully. +- **configure_systemd.yml** - Updates Splunk's systemd file using best practices and tips from the community. Also allows Splunk to start successfully using systemd after an upgrade without the need to run `splunk ftr --accept-license`. +- **configure_thp.yml** - Installs a new systemd service (disable-thp) that disables THP for RedHat|CentOS systems 6.0+. This task is automatically called by the configure_os.yml task. Optionally, you can set `use_tuned_thp` to configure THP via `tuned` instead of a service. Default is `false`. Mote: Make sure your host does not require a specific `tuned` profile before applying this one. +- **download_and_unarchive.yml** - Downloads the appropriate Splunk package using `splunk_package_url` (derived automatically from the values of `splunk_package_url_full` or `splunk_package_url_uf` variables). The package is then installed to `splunk_install_path` (derived automatically in main.yml using the `splunk_install_path` and the host's membership of either a `uf` or `full` group in the inventory). + You can set if the download/unarchive process uses the Ansible host or if each host downloads and unarchives the package individually by setting `splunk_download_local`. + Default is `true` which will download the package to the Ansible host once and unarchive to each host from there. + If set to `false` the package will be downloaded and unarchived to each host individually. Immediately after unarchive the package will be removed from the host. +- **get_kvstore_captain.yml** - Gets the current captain in the KVStore cluster. +- **get_shcluster_captain.yml** - Gets the current captain in the SHCluster. +- **install_apps.yml** - *Do not call install_apps.yml directly! Use configure_apps.yml* - Called by configure_apps.yml to perform app installation on the Splunk host. +- **install_splunk.yml** - *Do not call install_splunk.yml directly! Use check_splunk.yml* - Called by check_splunk.yml to install/upgrade Splunk and Splunk Universal Forwarders, as well as perform any initial configurations. This task is called by check_splunk.yml when the check determines that Splunk is not currently installed. This task will create the splunk user and splunk group, configure the bash profile for the splunk user (by calling configure_bash.yml), configure THP and ulimits (by calling configure_os.ym), download and install the appropriate Splunk package (by calling download_and_unarchive.yml), configure a common splunk.secret (by calling configure_splunk_secret.yml, if configure_secret is defined), create a deploymentclient.conf file with the splunk_ds_uri and clientName (by calling configure_deploymentclient.yml, if clientName is defined), install a user-seed.conf with a prehashed admin password (if used_seed is defined), and will then call the post_install.yml task. See post_install.yml entry for details on post-installation tasks. +- **install_utilities.yml** - Installs Linux packages that are useful for troubleshooting Splunk-related issues when `install_utilities: true` and `linux_packages` is defined with a list of packages to install. +- **configure_dmesg.yml** - Some distros restrict access to read `dmesg` for non-root users. This allows the `splunk` user to run the `dmesg` command. Defaults to `false`. +- **kvstore_upgrade.yml** - Upgrades a KVStore storage backend and/or server version on either a single or distributed instance. +- **main.yml** - This is the main task that will always be called when executing this role. This task sets the appropriate variables for full vs uf packages, sends a Slack notification about the play if the slack_token and slack_channel are defined, checks the current boot-start configuration to determine if it's in the expected state, and then includes the task from the role to execute against, as defined by the value of the deployment_task variable. The deployment_task variable should be defined in your playbook(s). Refer to the included example playbooks to see this in action. +- **post_install.yml** - Executes post-installation tasks. Performs a touch on the .ui_login file which disables the first-time login prompt to change your password, ensures that `splunk_home` is owned by the correct user and group, and optionally configures three scripts to: cleanup crash logs and old diags (by calling add_crashlog_script.yml and add_diag_script.yml, respectively), and a pstack generation shell script for troubleshooting purposes (by calling add_pstack_script.yml). This task will install various Linux troubleshooting utilities (by calling install_utilities.yml) when `install_utilities: true`. +- **set_maintenance_mode.yml** - Enables or disables maintenance mode on a cluster manager. Intended to be called by playbooks for indexer cluster upgrades/maintenance. Requires the `state` variable to be defined. Valid values: enabled, disabled +- **set_upgrade_state.yml** - Executes a splunk upgrade-{{ peer_state }} cluster-peers command on the cluster manager. This task can be used for upgrading indexer clusters with new minor and maintenance releases of Splunk (assuming you are at Splunk v7.1.0 or higher). Refer to https://docs.splunk.com/Documentation/Splunk/latest/Indexer/Searchablerollingupgrade for more information. +- **splunk_login.yml** - Authenticate to splunk. This will avoid having to pass `-auth` for every command that meeds authentication. This sets the `splunk_authenticated` variable to true. To include this in a task, you can set a conditional to only run it when `splunk_authenticated == false`. If included in a task that manually calls the `splunk_stop.yml`, `splunk_start.yml` or `splunk_restart.yml` task, the `splunk_authenticated` will be reset to false. +- **splunk_offline.yml** - Runs a splunk offline CLI command. Useful for bringing down indexers non-intrusively by allowing searches to complete before stopping splunk. +- **splunk_restart.yml** - Restarts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. +- **splunk_start.yml** - Starts splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. +- **splunk_stop.yml** - Stops splunk via the service module. Used when waiting for a handler to run at the end of the play would be inappropriate. +- **upgrade_splunk.yml** - *Do not call upgrade_splunk.yml directly! Use check_splunk.yml* - Called by check_splunk.yml. Performs an upgrade of an existing splunk installation. Configures .bash_profile and .bashrc for splunk user (by calling configure_bash.yml), disables THP and increases ulimits (by calling configure_os.yml), kills any stale splunkd processes present when `splunk_force_kill` is set to `True` (by calling adhoc_kill_splunkd.yml). Note: You should NOT run the upgrade_splunk.yml task directly from a playbook. check_splunk.yml will call upgrade_splunk.yml if it determines that an upgrade is needed; It will then download and unarchive the new version of Splunk (by calling download_and_unarchive.yml), ensure that mongod is in a good stopped state (by calling adhoc_fix_mongo.yml), and will then perform post-installation tasks using the post_install.yml task. + +## Frequently Asked Questions +**Q:** What is the difference between this and splunk-ansible? + +**A:** The splunk-ansible project was built for the docker-splunk project, which is a completely different use case. The way that docker-splunk works is by spinning-up an image that already has splunk-ansible inside of it, and then any arguments provided to Docker are passed into splunk-ansible so that it can run locally inside of the container to install and configure Splunk there. While it's a cool use case, we didn't feel that splunk-ansible met our needs as Splunk administrators to manage production Splunk deployments, so we wrote our own. +## + +**Q:** When using configure_apps.yml, the play fails on the synchronize module. What gives? + +**A:** This is due to a [known Ansible bug](https://github.com/ansible/ansible/issues/56629) related to password-based authentication. To workaround this issue, use a key pair for SSH authentication instead by setting the `ansible_user` and `ansible_ssh_private_key_file` variables. +## + +## Support +If you have questions or need support, you can: + +* Use the [GitHub issue tracker](https://github.com/splunk/splunk-ansible/issues) to submit bugs or request features. +* Post a question to [Splunk Answers](http://answers.splunk.com). +* Join the #ansible channel on [Splunk-Usergroups Slack](https://docs.splunk.com/Documentation/Community/1.0/community/Chat#Join_us_on_Slack). +* Please do not file cases in the Splunk support portal related to this project, as they will not be able to help you. + +## License +Copyright 2018-2021 Splunk. + +Distributed under the terms of the Apache 2.0 license, ansible-role-for-splunk is free and open-source software.