From a56dbfa5b9d15db39662086c3b8753c96d5ad5af Mon Sep 17 00:00:00 2001 From: John Brooks Date: Sun, 6 Nov 2016 19:35:15 -0700 Subject: [PATCH] core: Improve OnionConnector backoff and retry behavior OnionConnector will now reset the backoff timer and try again immediately when network connection status changes. Connection attempts now scale up to 15 minutes, instead of waiting only 10 seconds each time. --- core/network.go | 8 ++- core/onionconnector.go | 122 ++++++++++++++++++++++++++++++----------- 2 files changed, 97 insertions(+), 33 deletions(-) diff --git a/core/network.go b/core/network.go index b1a962b..56f96d8 100644 --- a/core/network.go +++ b/core/network.go @@ -264,12 +264,16 @@ func (n *Network) GetProxyDialer(forward proxy.Dialer) (proxy.Dialer, error) { func (n *Network) WaitForProxyDialer(forward proxy.Dialer, c context.Context) (proxy.Dialer, error) { var monitor <-chan interface{} for { - // Check if there's a proxy address available + // Check if there's a proxy address available and connection status is Ready n.controlMutex.Lock() socks := n.socksAddress + var connectionStatus ricochet.TorConnectionStatus + if n.status.Connection != nil { + connectionStatus = *n.status.Connection + } n.controlMutex.Unlock() - if socks.IsValid() { + if connectionStatus.Status == ricochet.TorConnectionStatus_READY && socks.IsValid() { return proxy.SOCKS5(socks.Network, socks.Address, nil, forward) } diff --git a/core/onionconnector.go b/core/onionconnector.go index 55ca903..95ea2ac 100644 --- a/core/onionconnector.go +++ b/core/onionconnector.go @@ -2,16 +2,14 @@ package core import ( "errors" + "github.com/ricochet-im/ricochet-go/rpc" "golang.org/x/net/context" "log" + "math/rand" "net" "time" ) -// XXX on failures, decide how long to wait before trying again -// XXX limit and queue global attempts? -// XXX circ/stream/fetch monitoring - type OnionConnector struct { Network *Network NeverGiveUp bool @@ -24,13 +22,14 @@ type OnionConnector struct { // If NeverGiveUp is set, failed connections will be retried automatically, // with appropriate backoff periods, and an error is only returned in fatal // situations. The backoff is defined by AttemptCount on the OnionConnector -// instance -- it is not reset after Connect returns. +// instance. The backoff counter is __not__ reset after Connect returns. // -// If the Network is not ready, this function will wait and monitor the -// Network's status. +// If the Network is not ready, this function will wait until the network +// becomes ready. The backoff attempt counter is automatically reset when +// the network status changes. // // Context can be used to set a timeout, deadline, or cancel function for -// the connection attempt. +// the overall connection attempts. func (oc *OnionConnector) Connect(address string, c context.Context) (net.Conn, error) { if oc.Network == nil { return nil, errors.New("No network configured for connector") @@ -43,45 +42,106 @@ func (oc *OnionConnector) Connect(address string, c context.Context) (net.Conn, options := &net.Dialer{ Cancel: c.Done(), - Timeout: 0, // XXX should use timeout + Timeout: 60 * time.Second, // Maximum time for a single connect attempt } - for { - // XXX This waits to know SOCKS info, but does not wait for connection - // ready state; should it? - // XXX Also, we're supposed to change backoff when tor connectivity state - // changes, but this won't. - proxy, err := oc.Network.WaitForProxyDialer(options, c) - if err != nil { - return nil, err - } + // Internal context used by blocking functions, assigned in the loop + var waitCtx context.Context + cancelWaitFunc := func() {} + // Cancel at return, extra closure required to call the current value + defer func() { cancelWaitFunc() }() - conn, err := proxy.Dial("tcp", address) + // Monitor for network connection status changes. On any change, reset backoff + // and call cancelWaitFunc, which is set with waitCtx in the loop below, to cancel + // the current wait and try again. + networkMonitor := oc.Network.EventMonitor().Subscribe(20) + defer oc.Network.EventMonitor().Unsubscribe(networkMonitor) + go func() { + var prevConnStatus ricochet.TorConnectionStatus_Status + + for { + select { + case <-c.Done(): + return + case v, ok := <-networkMonitor: + if !ok { + // monitor channel is closed when Connect() returns + return + } + + event := v.(ricochet.NetworkStatus) + + var connStatus ricochet.TorConnectionStatus_Status + if event.Connection != nil { + connStatus = event.Connection.Status + } + if connStatus != prevConnStatus { + prevConnStatus = connStatus + oc.ResetBackoff() + cancelWaitFunc() + } + } + } + }() + + for { + waitCtx, cancelWaitFunc = context.WithCancel(c) + + proxy, err := oc.Network.WaitForProxyDialer(options, waitCtx) if err != nil { if c.Err() != nil { return nil, c.Err() - } - if !oc.NeverGiveUp { + } else if waitCtx.Err() != nil { + continue + } else { return nil, err } - - log.Printf("Connection attempt to %s failed: %s", address, err) - if err := oc.Backoff(c); err != nil { - return nil, err - } - - continue } - return conn, nil + conn, err := proxy.Dial("tcp", address) + if err == nil { + // Success! + return conn, nil + } else if c.Err() != nil { + return nil, c.Err() + } else if !oc.NeverGiveUp { + return nil, err + } + + log.Printf("Connection attempt %d to %s failed: %s", oc.AttemptCount, address, err) + + if err := oc.Backoff(waitCtx); err != nil { + if c.Err() != nil { + return nil, c.Err() + } else if waitCtx.Err() != nil { + continue + } else { + return nil, err + } + } } } +var backoffDelay [7]int = [7]int{0, 30, 60, 120, 300, 600, 900} + func (oc *OnionConnector) Backoff(c context.Context) error { oc.AttemptCount++ - // XXX This should actually do backoff and be less dumb - waitCtx, finish := context.WithTimeout(c, 10*time.Second) + + var delay int + if oc.AttemptCount < len(backoffDelay) { + delay = backoffDelay[oc.AttemptCount] + } else { + delay = backoffDelay[len(backoffDelay)-1] + } + // Jitter by +/-20% + delay += int(float32(delay) * (rand.Float32()*0.4 - 0.2)) + + waitCtx, finish := context.WithTimeout(c, time.Duration(delay)*time.Second) defer finish() <-waitCtx.Done() return c.Err() } + +func (oc *OnionConnector) ResetBackoff() { + oc.AttemptCount = 0 +}