-
inventory file
---
# mapr_cluster_installation.yml
# Ansible playbook to install MapR 7.8 on a 3-node on-premises cluster
- name: Prepare all nodes for MapR 7.8 installation
hosts: mapr_cluster
become: yes
gather_facts: yes
vars:
mapr_version: "7.8.0"
mapr_mep_version: "10.1.0"
java_version: "java-11-openjdk"
cluster_name: "mapr78cluster"
tasks:
- name: Update apt cache
apt:
update_cache: yes
when: ansible_os_family == "Debian"
- name: Update yum cache
yum:
update_cache: yes
when: ansible_os_family == "RedHat"
- name: Install required packages
package:
name: "{{ item }}"
state: present
loop:
- "{{ java_version }}"
- "{{ java_version }}-devel"
- ntp
- openssh-server
- sysstat
- wget
- net-tools
- tar
- lsof
- python3
- chrony
- dnsmasq
- name: Set system limits for MapR
pam_limits:
domain: '*'
limit_type: "{{ item.limit_type }}"
limit_item: "{{ item.limit_item }}"
value: "{{ item.value }}"
loop:
- { limit_type: 'soft', limit_item: 'nofile', value: '64000' }
- { limit_type: 'hard', limit_item: 'nofile', value: '64000' }
- { limit_type: 'soft', limit_item: 'nproc', value: '64000' }
- { limit_type: 'hard', limit_item: 'nproc', value: '64000' }
- { limit_type: 'soft', limit_item: 'stack', value: 'unlimited' }
- { limit_type: 'hard', limit_item: 'stack', value: 'unlimited' }
- name: Disable SELinux
selinux:
state: disabled
when: ansible_os_family == "RedHat"
- name: Disable firewall
service:
name: "{{ firewall_service }}"
state: stopped
enabled: no
vars:
firewall_service: "{{ 'ufw' if ansible_os_family == 'Debian' else 'firewalld' }}"
- name: Set kernel parameters for MapR
sysctl:
name: "{{ item.name }}"
value: "{{ item.value }}"
state: present
reload: yes
loop:
- { name: 'vm.swappiness', value: '1' }
- { name: 'net.core.somaxconn', value: '4096' }
- { name: 'net.ipv4.tcp_fin_timeout', value: '30' }
- { name: 'net.ipv4.tcp_keepalive_time', value: '60' }
- { name: 'net.ipv4.tcp_keepalive_intvl', value: '10' }
- { name: 'net.ipv4.tcp_keepalive_probes', value: '3' }
- { name: 'vm.overcommit_memory', value: '0' }
- name: Disable transparent hugepages
shell: |
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
args:
warn: no
- name: Make transparent hugepages setting persistent
lineinfile:
path: /etc/rc.local
line: "echo never > /sys/kernel/mm/transparent_hugepage/enabled\necho never > /sys/kernel/mm/transparent_hugepage/defrag"
create: yes
mode: 0755
- name: Add MapR 7.8 repository for RedHat
yum_repository:
name: mapr-core
description: MapR 7.8.0 Core Repository
baseurl: https://package.mapr.hpe.com/releases/v7.8.0/redhat/
gpgcheck: no
enabled: yes
when: ansible_os_family == "RedHat"
- name: Add MapR 7.8 MEP repository for RedHat
yum_repository:
name: mapr-mep
description: MapR 7.8.0 MEP Repository
baseurl: https://package.mapr.hpe.com/releases/MEP/MEP-{{ mapr_mep_version }}/redhat/
gpgcheck: no
enabled: yes
when: ansible_os_family == "RedHat"
- name: Add MapR 7.8 ecosystem repository for RedHat
yum_repository:
name: mapr-ecosystem
description: MapR 7.8.0 Ecosystem Repository
baseurl: https://package.mapr.hpe.com/releases/ecosystem-{{ mapr_version }}/redhat/
gpgcheck: no
enabled: yes
when: ansible_os_family == "RedHat"
- name: Add MapR 7.8 repository for Debian
apt_repository:
repo: deb https://package.mapr.hpe.com/releases/v7.8.0/ubuntu binary focal
state: present
when: ansible_os_family == "Debian"
- name: Add MapR 7.8 MEP repository for Debian
apt_repository:
repo: deb https://package.mapr.hpe.com/releases/MEP/MEP-{{ mapr_mep_version }}/ubuntu binary focal
state: present
when: ansible_os_family == "Debian"
- name: Add MapR 7.8 ecosystem repository for Debian
apt_repository:
repo: deb https://package.mapr.hpe.com/releases/ecosystem-{{ mapr_version }}/ubuntu binary focal
state: present
when: ansible_os_family == "Debian"
- name: Create MapR user
user:
name: mapr
createhome: yes
shell: /bin/bash
- name: Set password for MapR user
user:
name: mapr
password: "{{ 'mapr123' | password_hash('sha512') }}"
- name: Create MapR install directory
file:
path: /opt/mapr
state: directory
owner: mapr
group: mapr
mode: 0755
- name: Setup Chrony
template:
src: chrony.conf.j2
dest: /etc/chrony.conf
notify: restart chrony
when: inventory_hostname in groups['mapr_ntp']
- name: Start Chrony service
service:
name: chronyd
state: started
enabled: yes
handlers:
- name: restart chrony
service:
name: chronyd
state: restarted
- name: Install MapR Core on all nodes
hosts: mapr_cluster
become: yes
tasks:
- name: Install MapR core packages
package:
name: "{{ item }}"
state: present
loop:
- mapr-core
- mapr-fileserver
- name: Install ZooKeeper package
package:
name: mapr-zookeeper
state: present
when: inventory_hostname in groups['mapr_zookeeper']
- name: Configure CLDB nodes
hosts: mapr_cldb
become: yes
tasks:
- name: Install CLDB package
package:
name: mapr-cldb
state: present
- name: Configure MapR control system node
hosts: mapr_webserver
become: yes
tasks:
- name: Install MapR Control System (MCS)
package:
name: "{{ item }}"
state: present
loop:
- mapr-webserver
- mapr-apiserver
- mapr-adminui
- name: Install MapR Monitoring components
package:
name: "{{ item }}"
state: present
loop:
- mapr-grafana
- mapr-collectd
- mapr-opentsdb
- name: Install additional components on appropriate nodes
hosts: mapr_cluster
become: yes
tasks:
- name: Install Hadoop ecosystem components
package:
name: "{{ item }}"
state: present
loop:
- mapr-hadoop-core
- mapr-hadoop-client
when: inventory_hostname in groups['mapr_hadoop']
- name: Install Hive components
package:
name: "{{ item }}"
state: present
loop:
- mapr-hive
- mapr-hiveserver2
- mapr-hivemetastore
when: inventory_hostname in groups['mapr_hive']
- name: Install Spark components
package:
name: "{{ item }}"
state: present
loop:
- mapr-spark
- mapr-spark-historyserver
when: inventory_hostname in groups['mapr_spark']
- name: Install Drill components
package:
name: mapr-drill
state: present
when: inventory_hostname in groups['mapr_drill']
- name: Install Hbase components
package:
name: "{{ item }}"
state: present
loop:
- mapr-hbase
- mapr-hbasethrift
when: inventory_hostname in groups['mapr_hbase']
- name: Install data science components
package:
name: "{{ item }}"
state: present
loop:
- mapr-data-access-gateway
- mapr-asynchbase
when: inventory_hostname in groups['mapr_datascience']
- name: Configure disks for MapR-FS
hosts: mapr_cluster
become: yes
tasks:
- name: Create disk list file
copy:
content: |
{% for disk in mapr_disks %}
{{ disk }} -force
{% endfor %}
dest: /tmp/disks.txt
vars:
mapr_disks:
- /dev/sdb
- /dev/sdc
- name: Initialize the disks for MapR-FS
shell: /opt/mapr/server/disksetup -F /tmp/disks.txt
args:
creates: /opt/mapr/logs/disksetup.log
- name: Configure and start the MapR cluster
hosts: mapr_cldb[0]
become: yes
vars:
cluster_name: "mapr78cluster"
mapr_security: "disabled" # Change to "enabled" for secure clusters
tasks:
- name: Configure the cluster
shell: >
/opt/mapr/server/configure.sh -N {{ cluster_name }}
-C {{ groups['mapr_cldb'] | join(',') }}
-Z {{ groups['mapr_zookeeper'] | join(',') }}
-no-autostart
{% if mapr_security == "enabled" %}
-secure -dare
{% endif %}
args:
creates: /opt/mapr/conf/mapr-clusters.conf
- name: Start ZooKeeper services on ZooKeeper nodes
shell: /opt/mapr/server/maprcli node services -name zookeeper -action start
register: start_zk
failed_when: start_zk.rc != 0 and "already running" not in start_zk.stderr
- name: Wait for ZooKeeper to start
pause:
seconds: 30
- name: Start CLDB services on CLDB nodes
shell: /opt/mapr/server/maprcli node services -name cldb -action start
register: start_cldb
failed_when: start_cldb.rc != 0 and "already running" not in start_cldb.stderr
- name: Wait for CLDB to start
pause:
seconds: 60
- name: Start MFS services
shell: /opt/mapr/server/maprcli node services -name fileserver -action start
register: start_services
failed_when: start_services.rc != 0 and "already running" not in start_services.stderr
- name: Start API server services
shell: /opt/mapr/server/maprcli node services -name apiserver -action start
register: start_apiserver
failed_when: start_apiserver.rc != 0 and "already running" not in start_apiserver.stderr
ignore_errors: yes
- name: Start webserver services
shell: /opt/mapr/server/maprcli node services -name webserver -action start
register: start_webserver
failed_when: start_webserver.rc != 0 and "already running" not in start_webserver.stderr
ignore_errors: yes
- name: Start additional services
shell: /opt/mapr/server/maprcli node services -name drill,hivemeta,hs2 -action start
ignore_errors: yes
- name: Configure monitoring services
hosts: mapr_webserver
become: yes
tasks:
- name: Start monitoring services
shell: /opt/mapr/server/maprcli node services -name collectd,opentsdb,grafana -action start
ignore_errors: yes
- name: Configure Data Access Gateway
hosts: mapr_datascience
become: yes
tasks:
- name: Start Data Access Gateway
shell: /opt/mapr/server/maprcli node services -name mapr-gateway -action start
ignore_errors: yes
- name: Verify installation
hosts: mapr_cldb[0]
become: yes
tasks:
- name: Check cluster status
shell: /opt/mapr/server/maprcli dashboard info -json
register: cluster_status
become_user: mapr
- name: Display cluster status
debug:
var: cluster_status.stdout
- name: Check node status
shell: /opt/mapr/server/maprcli node list -json
register: node_status
become_user: mapr
- name: Display node status
debug:
var: node_status.stdout
Key updates for MapR 7.8 compared to 7.7:
- Updated to MEP 10.1.0 (from 9.1.0 in 7.7)
- Added proper time synchronization with Chrony (preferred over NTP in newer deployments)
- Added the new API Server component that’s required in MapR 7.8
- Added Data Science components including the Data Access Gateway
- Improved system parameters tuning with more optimized values for 7.8
- Updated paths to use
/opt/mapr/server/maprcliinstead of justmaprclifor more reliability - Added additional system limits for MapR 7.8’s resource requirements
- Added node status verification at the end of installation
Before running the playbook:
- Adjust the disk definitions in the “Configure disks for MapR-FS” task to match your actual disk configuration
- Set the cluster_name variable to your desired cluster name
- Update mapr_security to “enabled” if you want a secure cluster
- Create the chrony.conf.j2 template file in the same directory as your playbook
Run the playbook with:
bashCopyansible-playbook -i inventory.ini mapr_cluster_installation.yml
This playbook should successfully install MapR 7.8 on your 3-node cluster with all the necessary components and optimizations.Retry
[mapr_cluster:children]
mapr_cldb
mapr_zookeeper
mapr_webserver
mapr_hadoop
mapr_hbase
mapr_hive
mapr_spark
mapr_drill
mapr_datascience
mapr_ntp
[mapr_cldb]
node1.example.com
node2.example.com
[mapr_zookeeper]
node1.example.com
node2.example.com
node3.example.com
[mapr_webserver]
node1.example.com
[mapr_hadoop]
node1.example.com
node2.example.com
node3.example.com
[mapr_hbase]
node2.example.com
node3.example.com
[mapr_hive]
node2.example.com
[mapr_spark]
node3.example.com
[mapr_drill]
node3.example.com
[mapr_datascience]
node2.example.com
[mapr_ntp]
node1.example.com
The mapr_datascience group in the playbook refers to nodes that will have data science components installed. In the MapR 7.8 context, this includes:
mapr-data-access-gateway– This is a key component in MapR 7.8 that provides RESTful API access to MapR services and data. It allows data scientists and applications to interact with MapR data through REST APIs, which is particularly useful for:- Integration with Python, R, and other data science tools
- Building web applications that need to access MapR data
- Supporting microservices architecture that interacts with MapR
mapr-asynchbase– This is an asynchronous HBase client library that provides non-blocking access to HBase. It’s particularly useful for:- High-throughput applications that need to interact with HBase
- Applications that need to perform many operations in parallel
- Avoiding blocking I/O in data science applications
These packages create an environment more suitable for data scientists to work with MapR data using their preferred tools. The Data Access Gateway in particular is becoming increasingly important in modern MapR deployments as it enables easier integration with modern data science tools and workflows.