core: Improve OnionConnector backoff and retry behavior

OnionConnector will now reset the backoff timer and try again
immediately when network connection status changes. Connection attempts
now scale up to 15 minutes, instead of waiting only 10 seconds each
time.
This commit is contained in:
John Brooks 2016-11-06 19:35:15 -07:00
parent fe1107b117
commit a56dbfa5b9
2 changed files with 97 additions and 33 deletions

View File

@ -264,12 +264,16 @@ func (n *Network) GetProxyDialer(forward proxy.Dialer) (proxy.Dialer, error) {
func (n *Network) WaitForProxyDialer(forward proxy.Dialer, c context.Context) (proxy.Dialer, error) { func (n *Network) WaitForProxyDialer(forward proxy.Dialer, c context.Context) (proxy.Dialer, error) {
var monitor <-chan interface{} var monitor <-chan interface{}
for { for {
// Check if there's a proxy address available // Check if there's a proxy address available and connection status is Ready
n.controlMutex.Lock() n.controlMutex.Lock()
socks := n.socksAddress socks := n.socksAddress
var connectionStatus ricochet.TorConnectionStatus
if n.status.Connection != nil {
connectionStatus = *n.status.Connection
}
n.controlMutex.Unlock() n.controlMutex.Unlock()
if socks.IsValid() { if connectionStatus.Status == ricochet.TorConnectionStatus_READY && socks.IsValid() {
return proxy.SOCKS5(socks.Network, socks.Address, nil, forward) return proxy.SOCKS5(socks.Network, socks.Address, nil, forward)
} }

View File

@ -2,16 +2,14 @@ package core
import ( import (
"errors" "errors"
"github.com/ricochet-im/ricochet-go/rpc"
"golang.org/x/net/context" "golang.org/x/net/context"
"log" "log"
"math/rand"
"net" "net"
"time" "time"
) )
// XXX on failures, decide how long to wait before trying again
// XXX limit and queue global attempts?
// XXX circ/stream/fetch monitoring
type OnionConnector struct { type OnionConnector struct {
Network *Network Network *Network
NeverGiveUp bool NeverGiveUp bool
@ -24,13 +22,14 @@ type OnionConnector struct {
// If NeverGiveUp is set, failed connections will be retried automatically, // If NeverGiveUp is set, failed connections will be retried automatically,
// with appropriate backoff periods, and an error is only returned in fatal // with appropriate backoff periods, and an error is only returned in fatal
// situations. The backoff is defined by AttemptCount on the OnionConnector // situations. The backoff is defined by AttemptCount on the OnionConnector
// instance -- it is not reset after Connect returns. // instance. The backoff counter is __not__ reset after Connect returns.
// //
// If the Network is not ready, this function will wait and monitor the // If the Network is not ready, this function will wait until the network
// Network's status. // becomes ready. The backoff attempt counter is automatically reset when
// the network status changes.
// //
// Context can be used to set a timeout, deadline, or cancel function for // Context can be used to set a timeout, deadline, or cancel function for
// the connection attempt. // the overall connection attempts.
func (oc *OnionConnector) Connect(address string, c context.Context) (net.Conn, error) { func (oc *OnionConnector) Connect(address string, c context.Context) (net.Conn, error) {
if oc.Network == nil { if oc.Network == nil {
return nil, errors.New("No network configured for connector") return nil, errors.New("No network configured for connector")
@ -43,45 +42,106 @@ func (oc *OnionConnector) Connect(address string, c context.Context) (net.Conn,
options := &net.Dialer{ options := &net.Dialer{
Cancel: c.Done(), Cancel: c.Done(),
Timeout: 0, // XXX should use timeout Timeout: 60 * time.Second, // Maximum time for a single connect attempt
} }
for { // Internal context used by blocking functions, assigned in the loop
// XXX This waits to know SOCKS info, but does not wait for connection var waitCtx context.Context
// ready state; should it? cancelWaitFunc := func() {}
// XXX Also, we're supposed to change backoff when tor connectivity state // Cancel at return, extra closure required to call the current value
// changes, but this won't. defer func() { cancelWaitFunc() }()
proxy, err := oc.Network.WaitForProxyDialer(options, c)
if err != nil {
return nil, err
}
conn, err := proxy.Dial("tcp", address) // Monitor for network connection status changes. On any change, reset backoff
// and call cancelWaitFunc, which is set with waitCtx in the loop below, to cancel
// the current wait and try again.
networkMonitor := oc.Network.EventMonitor().Subscribe(20)
defer oc.Network.EventMonitor().Unsubscribe(networkMonitor)
go func() {
var prevConnStatus ricochet.TorConnectionStatus_Status
for {
select {
case <-c.Done():
return
case v, ok := <-networkMonitor:
if !ok {
// monitor channel is closed when Connect() returns
return
}
event := v.(ricochet.NetworkStatus)
var connStatus ricochet.TorConnectionStatus_Status
if event.Connection != nil {
connStatus = event.Connection.Status
}
if connStatus != prevConnStatus {
prevConnStatus = connStatus
oc.ResetBackoff()
cancelWaitFunc()
}
}
}
}()
for {
waitCtx, cancelWaitFunc = context.WithCancel(c)
proxy, err := oc.Network.WaitForProxyDialer(options, waitCtx)
if err != nil { if err != nil {
if c.Err() != nil { if c.Err() != nil {
return nil, c.Err() return nil, c.Err()
} } else if waitCtx.Err() != nil {
if !oc.NeverGiveUp { continue
} else {
return nil, err return nil, err
} }
log.Printf("Connection attempt to %s failed: %s", address, err)
if err := oc.Backoff(c); err != nil {
return nil, err
}
continue
} }
return conn, nil conn, err := proxy.Dial("tcp", address)
if err == nil {
// Success!
return conn, nil
} else if c.Err() != nil {
return nil, c.Err()
} else if !oc.NeverGiveUp {
return nil, err
}
log.Printf("Connection attempt %d to %s failed: %s", oc.AttemptCount, address, err)
if err := oc.Backoff(waitCtx); err != nil {
if c.Err() != nil {
return nil, c.Err()
} else if waitCtx.Err() != nil {
continue
} else {
return nil, err
}
}
} }
} }
var backoffDelay [7]int = [7]int{0, 30, 60, 120, 300, 600, 900}
func (oc *OnionConnector) Backoff(c context.Context) error { func (oc *OnionConnector) Backoff(c context.Context) error {
oc.AttemptCount++ oc.AttemptCount++
// XXX This should actually do backoff and be less dumb
waitCtx, finish := context.WithTimeout(c, 10*time.Second) var delay int
if oc.AttemptCount < len(backoffDelay) {
delay = backoffDelay[oc.AttemptCount]
} else {
delay = backoffDelay[len(backoffDelay)-1]
}
// Jitter by +/-20%
delay += int(float32(delay) * (rand.Float32()*0.4 - 0.2))
waitCtx, finish := context.WithTimeout(c, time.Duration(delay)*time.Second)
defer finish() defer finish()
<-waitCtx.Done() <-waitCtx.Done()
return c.Err() return c.Err()
} }
func (oc *OnionConnector) ResetBackoff() {
oc.AttemptCount = 0
}