From 1e356f9cdfa885c78791d5d6e5d2baef22f01853 Mon Sep 17 00:00:00 2001 From: "Rumen G. Bogdanovski" Date: Wed, 7 Nov 2007 02:35:54 -0800 Subject: [IPVS]: Bind connections on stanby if the destination exists This patch fixes the problem with node overload on director fail-over. Given the scenario: 2 nodes each accepting 3 connections at a time and 2 directors, director failover occurs when the nodes are fully loaded (6 connections to the cluster) in this case the new director will assign another 6 connections to the cluster, If the same real servers exist there. The problem turned to be in not binding the inherited connections to the real servers (destinations) on the backup director. Therefore: "ipvsadm -l" reports 0 connections: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 0 -> node484.local:5999 Route 1000 0 0 while "ipvs -lnc" is right root@test2:~# ipvsadm -lnc IPVS connection entries pro expire state source virtual destination TCP 14:56 ESTABLISHED 192.168.0.10:39164 192.168.0.222:5999 192.168.0.51:5999 TCP 14:59 ESTABLISHED 192.168.0.10:39165 192.168.0.222:5999 192.168.0.52:5999 So the patch I am sending fixes the problem by binding the received connections to the appropriate service on the backup director, if it exists, else the connection will be handled the old way. So if the master and the backup directors are synchronized in terms of real services there will be no problem with server over-committing since new connections will not be created on the nonexistent real services on the backup. However if the service is created later on the backup, the binding will be performed when the next connection update is received. With this patch the inherited connections will show as inactive on the backup: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 1 -> node484.local:5999 Route 1000 0 1 rumen@test2:~$ cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP C0A800DE:176F wlc -> C0A80033:176F Route 1000 0 1 -> C0A80032:176F Route 1000 0 1 Regards, Rumen Bogdanovski Acked-by: Julian Anastasov Signed-off-by: Rumen G. Bogdanovski Signed-off-by: Simon Horman --- net/ipv4/ipvs/ip_vs_sync.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'net/ipv4/ipvs/ip_vs_sync.c') diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 0d4d9721cbd4..b1694d67abb9 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -284,6 +284,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) struct ip_vs_sync_conn_options *opt; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_dest *dest; char *p; int i; @@ -317,20 +318,35 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) s->caddr, s->cport, s->vaddr, s->vport); if (!cp) { + /* + * Find the appropriate destination for the connection. + * If it is not found the connection will remain unbound + * but still handled. + */ + dest = ip_vs_find_dest(s->daddr, s->dport, + s->vaddr, s->vport, + s->protocol); cp = ip_vs_conn_new(s->protocol, s->caddr, s->cport, s->vaddr, s->vport, s->daddr, s->dport, - flags, NULL); + flags, dest); + if (dest) + atomic_dec(&dest->refcnt); if (!cp) { IP_VS_ERR("ip_vs_conn_new failed\n"); return; } cp->state = ntohs(s->state); } else if (!cp->dest) { - /* it is an entry created by the synchronization */ - cp->state = ntohs(s->state); - cp->flags = flags | IP_VS_CONN_F_HASHED; + dest = ip_vs_try_bind_dest(cp); + if (!dest) { + /* it is an unbound entry created by + * synchronization */ + cp->state = ntohs(s->state); + cp->flags = flags | IP_VS_CONN_F_HASHED; + } else + atomic_dec(&dest->refcnt); } /* Note that we don't touch its state and flags if it is a normal entry. */ -- cgit v1.2.3-59-g8ed1b