About mail server and imap ha active-active cluster

I have setup a mail server, for testing.
My goal is to have a HA mailserver with imaps, when a client connect to a virtual ip, it redirect to two real servers, if a real server crash the other real server “take” the connection.
I have setup a cluster with two keepalived/haproxy lb and two real servers with postfix and Dovecot.The two lb are Debian, the mail servers are Fedora 31.
This is my configuration, on the two lb(load balancers)

Keepalived.conf

global_defs {
}
vrrp_instance VI_1 {
       interface nm-team
       state MASTER
       virtual_router_id 51
       priority 101                    # 101 on master, 100 on backup
       advert_int 1
       smtp_alert
authentication {
auth_type PASS
auth_pass mypass

}
}

       virtual_ipaddress {
           10.2.0.4/24 brd 10.2.0.255 dev nm-team
}

 virtual_server 10.2.0.4 25 {
   delay_loop 30
   lb_algo rr
   lb_kind DR
   protocol TCP
   persistence_timeout 360

   real_server 10.2.0.5 25 {
   weight 1
       TCP_CHECK {
               connect_timeout 10
       connect_port 25
       delay_before_retry 3
       }
   }
   real_server 10.2.0.6 25 {
       weight 1
       TCP_CHECK {
               connect_timeout 10
       connect_port 25
       delay_before_retry 3
       }
   }
}

virtual_server 10.2.0.4 993 {
delay_loop 30
lb_algo rr
lb_kind DR
protocol TCP
persistence_timeout 360

real_server 10.2.0.5 993 {
weight 1
    TCP_CHECK {
            connect_timeout 10
    connect_port 993
    nb_get_retry 3
    delay_before_retry 3
    }
}
real_server 10.2.0.6 993 {
    weight 1
    TCP_CHECK {
            connect_timeout 10
    connect_port 993
    nb_get_retry 3
    delay_before_retry 3
    }
}
}

haproxy.cfg

global
    log /dev/log    local0
    log /dev/log    local1 notice
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
    stats timeout 30s
    user haproxy
    group haproxy
    daemon

    # Default SSL material locations
    ca-base /etc/ssl/certs
    crt-base /etc/ssl/private

    # Default ciphers to use on SSL-enabled listening sockets.
    # For more information, see ciphers(1SSL). This list is from:
    #  https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
    # An alternative list with additional directives can be obtained from
    #  https://mozilla.github.io/server-side-tls/ssl-config-generator/?server=haproxy
    ssl-default-bind-ciphers ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:RSA+AESGCM:RSA+AES:!aNULL:!MD5:!DSS
    ssl-default-bind-options no-sslv3

defaults
    log global
    mode    tcp

#postfix
listen smtp
bind mail.mydomain.priv:25
balance roundrobin
timeout client 30s
timeout connect 10s
timeout server 1m
no option http-server-close
mode tcp
option smtpchk
option tcplog
server mail1 mail1.mydomain.priv:25 send-proxy
server mail2 mail2.mydomain.priv:25 send-proxy

#dovecot
listen imap
bind mail.mydomain.priv:993
timeout client 30s
timeout connect 10s
timeout server 1m
no option http-server-close
balance leastconn
stick store-request src
stick-table type ip size 200k expire 30m
mode tcp
option tcplog
server mail1 mail1.mydomain.priv:993 send-proxy
server mail2 mail2.mydomain.priv:993 send-proxy

As you can see, the mail.domain.priv is the “virtual” server
binded to virtual ip 10.2.0.4(created by keepalived), the real
servers are 10.2.0.5 and 10.2.0.6.
The virtual ip 10.2.0.4 is alias to lo interface, I have created it
with those lines, in the lb

ip addr add 10.2.0.4/32 dev lo label lo:0

and in the real servers

echo 1 >/proc/sys/net/ipv4/conf/all/arp_ignore
echo 2 >/proc/sys/net/ipv4/conf/all/arp_announce
ip addr add 10.2.0.4/32 dev lo label lo:0

I skip to post the dovecot/postfix configuration because is
too long, but I have tested it and works fine, as single
server and with the 10.2.0.4 virtual ip.
Of course the real server has the /var/vmail/mydomain shared
using glusterfs(I know is slow, but is only for testing).
I have connected a client, and I can get emails with dovecot
and send emails with postfix using imaps and smtp with starttls
without any problem.
So, what is the problem?
I have tested the cluster shutting down one of the real servers
with a client open(Thunderbird), and the client is “freeze”, as
cluster don’t exist and cannot read emails.
If I kill the client, or restart it, it reconnect without problems
to 10.2.0.4 virtual ip(mail.mydomain.priv).
What is wrong?
Is possible to create an ha cluster active/active using keepalived
and haproxy?

Answer

Solution found, thanks to help from unix forum: removed virtual-ip from lo:0 and create a nm-team:0 alias only on haproxy/keepalived servers.

Then I edit haproxy.cfg

global
    log /dev/log    local0
    log /dev/log    local1 notice
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock mode 660 level admin expose-fd listeners
    stats timeout 30s
    user haproxy
    group haproxy
    daemon

    # Default SSL material locations
    ca-base /etc/ssl/certs
    crt-base /etc/ssl/private

    # Default ciphers to use on SSL-enabled listening sockets.
    # For more information, see ciphers(1SSL). This list is from:
    #  https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
    # An alternative list with additional directives can be obtained from
    #  https://mozilla.github.io/server-side-tls/ssl-config-generator/?server=haproxy
    ssl-default-bind-ciphers ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:RSA+AESGCM:RSA+AES:!aNULL:!MD5:!DSS
    ssl-default-bind-options no-sslv3

defaults
    log         global
    mode            tcp
        option                  dontlognull
        option                  redispatch
        retries                 3
        timeout http-request    10s
        timeout queue           1m
        timeout connect         10s
        timeout client          1m
        timeout server          1m
        timeout http-keep-alive 10s
        timeout check           10s
        maxconn                 3000

frontend mail-in
    bind mail.mydomain.priv:25
    mode tcp
    option tcplog
    default_backend             mail-in-back

backend mail-in-back
    balance     roundrobin
    server      mail1.mydomain.priv mail1.mydomain.priv:25 check
    server      mail2.mydomain.priv mail2.mydomain.priv:25 check


frontend imaps-in
    bind mail.mydomain.priv:993
    mode tcp
    option tcplog
    default_backend             imaps-in-back

backend imaps-in-back
    balance     roundrobin
    server      mail1.mydomain.priv mail1.mydomain.priv:993 check
    server      mail2.mydomain.priv mail2.mydomain.priv:993 check

Then I edit keepalived.conf

vrrp_script chk_haproxy {
  script "killall -0 haproxy"           # check the haproxy process
  interval 2                            # every 2 seconds
  weight 2                              # add 2 points if OK
}

vrrp_instance VI_1 {
  interface nm-team                     # interface to monitor
  state MASTER                          # MASTER on haproxy1, BACKUP on haproxy2
  virtual_router_id 51
  priority 100                          # 100 on haproxy1, 101 on haproxy2
  advert_int 1
  smtp_alert
  authentication {
  auth_type PASS
  auth_pass yourpass
}

  virtual_ipaddress {
       10.2.0.4                        # virtual ip address
  }
  track_script {
       chk_haproxy
  }
}

Then I copy keepalived.conf on haproxy2 and adjust some voices (MASTER become BACKUP and id 100 become 101).
On haproxy servers I retain this configuration for sysctl

net.ipv4.tcp_syncookies=1
net.ipv4.ip_forward=1
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv4.conf.team0.send_redirects = 0
net.ipv4.conf.nm-team.send_redirects = 0

And after restarting keepalived and haproxy all works fine
,I have tested a client connection, shutting down one mailserver
and after 5-10 seconds of inactivity the connection return alive
without restarting the MUA.

Attribution
Source : Link , Question Author : elbarna , Answer Author : elbarna

Leave a Comment