mirror of
https://github.com/cloudflare/pingora.git
synced 2024-09-20 02:31:35 +02:00
Add support for binding to local port ranges and retrying on EADDRNOTAVAIL
This commit is contained in:
parent
acffb8aaf2
commit
d1d7a87b76
5 changed files with 285 additions and 37 deletions
2
.bleep
2
.bleep
|
@ -1 +1 @@
|
|||
28f94f2a402bbf66341bdac8fa670caf5b7311e9
|
||||
90c70086397a4708a4dadfed6e6915ce6dc33481
|
|
@ -33,8 +33,59 @@ pub trait Connect: std::fmt::Debug {
|
|||
async fn connect(&self, addr: &SocketAddr) -> Result<Stream>;
|
||||
}
|
||||
|
||||
/// Settings for binding on connect
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct BindTo {
|
||||
// local ip address
|
||||
pub addr: Option<InetSocketAddr>,
|
||||
// port range
|
||||
port_range: Option<(u16, u16)>,
|
||||
// whether we fallback and try again on bind errors when a port range is set
|
||||
fallback: bool,
|
||||
}
|
||||
|
||||
impl BindTo {
|
||||
/// Sets the port range we will bind to where the first item in the tuple is the lower bound
|
||||
/// and the second item is the upper bound.
|
||||
///
|
||||
/// Note this bind option is only supported on Linux since 6.3, this is a no-op on other systems.
|
||||
/// To reset the range, pass a `None` or `Some((0,0))`, more information can be found [here](https://man7.org/linux/man-pages/man7/ip.7.html)
|
||||
pub fn set_port_range(&mut self, range: Option<(u16, u16)>) -> Result<()> {
|
||||
if range.is_none() && self.port_range.is_none() {
|
||||
// nothing to do
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match range {
|
||||
// 0,0 is valid for resets
|
||||
None | Some((0, 0)) => self.port_range = Some((0, 0)),
|
||||
// set the port range if valid
|
||||
Some((low, high)) if low > 0 && low < high => {
|
||||
self.port_range = Some((low, high));
|
||||
}
|
||||
_ => return Error::e_explain(SocketError, "invalid port range: {range}"),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Set whether we fallback on no address available if a port range is set
|
||||
pub fn set_fallback(&mut self, fallback: bool) {
|
||||
self.fallback = fallback
|
||||
}
|
||||
|
||||
/// Configured bind port range
|
||||
pub fn port_range(&self) -> Option<(u16, u16)> {
|
||||
self.port_range
|
||||
}
|
||||
|
||||
/// Whether we attempt to fallback on no address available
|
||||
pub fn will_fallback(&self) -> bool {
|
||||
self.fallback && self.port_range.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
/// Establish a connection (l4) to the given peer using its settings and an optional bind address.
|
||||
pub(crate) async fn connect<P>(peer: &P, bind_to: Option<InetSocketAddr>) -> Result<Stream>
|
||||
pub(crate) async fn connect<P>(peer: &P, bind_to: Option<BindTo>) -> Result<Stream>
|
||||
where
|
||||
P: Peer + Send + Sync,
|
||||
{
|
||||
|
@ -142,12 +193,8 @@ pub(crate) fn bind_to_random<P: Peer>(
|
|||
peer: &P,
|
||||
v4_list: &[InetSocketAddr],
|
||||
v6_list: &[InetSocketAddr],
|
||||
) -> Option<InetSocketAddr> {
|
||||
let selected = peer.get_peer_options().and_then(|o| o.bind_to);
|
||||
if selected.is_some() {
|
||||
return selected;
|
||||
}
|
||||
|
||||
) -> Option<BindTo> {
|
||||
// helper function for randomly picking address
|
||||
fn bind_to_ips(ips: &[InetSocketAddr]) -> Option<InetSocketAddr> {
|
||||
match ips.len() {
|
||||
0 => None,
|
||||
|
@ -159,13 +206,31 @@ pub(crate) fn bind_to_random<P: Peer>(
|
|||
}
|
||||
}
|
||||
|
||||
match peer.address() {
|
||||
let mut bind_to = peer.get_peer_options().and_then(|o| o.bind_to.clone());
|
||||
if bind_to.as_ref().map(|b| b.addr).is_some() {
|
||||
// already have a bind address selected
|
||||
return bind_to;
|
||||
}
|
||||
|
||||
let addr = match peer.address() {
|
||||
SocketAddr::Inet(sockaddr) => match sockaddr {
|
||||
InetSocketAddr::V4(_) => bind_to_ips(v4_list),
|
||||
InetSocketAddr::V6(_) => bind_to_ips(v6_list),
|
||||
},
|
||||
SocketAddr::Unix(_) => None,
|
||||
};
|
||||
|
||||
if addr.is_some() {
|
||||
if let Some(bind_to) = bind_to.as_mut() {
|
||||
bind_to.addr = addr;
|
||||
} else {
|
||||
bind_to = Some(BindTo {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
}
|
||||
bind_to
|
||||
}
|
||||
|
||||
use crate::protocols::raw_connect;
|
||||
|
@ -238,16 +303,25 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_conn_error_addr_not_avail() {
|
||||
let peer = HttpPeer::new("127.0.0.1:121".to_string(), false, "".to_string());
|
||||
let new_session = connect(&peer, Some("192.0.2.2:0".parse().unwrap())).await;
|
||||
let addr = "192.0.2.2:0".parse().ok();
|
||||
let bind_to = BindTo {
|
||||
addr,
|
||||
..Default::default()
|
||||
};
|
||||
let new_session = connect(&peer, Some(bind_to)).await;
|
||||
assert_eq!(new_session.unwrap_err().etype(), &InternalError)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_conn_error_other() {
|
||||
let peer = HttpPeer::new("240.0.0.1:80".to_string(), false, "".to_string()); // non localhost
|
||||
|
||||
let addr = "127.0.0.1:0".parse().ok();
|
||||
// create an error: cannot send from src addr: localhost to dst addr: a public IP
|
||||
let new_session = connect(&peer, Some("127.0.0.1:0".parse().unwrap())).await;
|
||||
let bind_to = BindTo {
|
||||
addr,
|
||||
..Default::default()
|
||||
};
|
||||
let new_session = connect(&peer, Some(bind_to)).await;
|
||||
let error = new_session.unwrap_err();
|
||||
// XXX: some system will allow the socket to bind and connect without error, only to timeout
|
||||
assert!(error.etype() == &ConnectError || error.etype() == &ConnectTimedout)
|
||||
|
@ -371,4 +445,114 @@ mod tests {
|
|||
assert_eq!(err.etype(), &ConnectionClosed);
|
||||
assert!(!err.retry());
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_bind_to_port_range_on_connect() {
|
||||
fn get_ip_local_port_range() -> (u16, u16) {
|
||||
let path = "/proc/sys/net/ipv4/ip_local_port_range";
|
||||
let file = std::fs::read_to_string(path).unwrap();
|
||||
let mut parts = file.split_whitespace();
|
||||
(
|
||||
parts.next().unwrap().parse().unwrap(),
|
||||
parts.next().unwrap().parse().unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
// one-off mock server
|
||||
async fn mock_inet_connect_server() {
|
||||
use tokio::net::TcpListener;
|
||||
let listener = TcpListener::bind("127.0.0.1:10020").await.unwrap();
|
||||
if let Ok((mut stream, _addr)) = listener.accept().await {
|
||||
stream.write_all(b"HTTP/1.1 200 OK\r\n\r\n").await.unwrap();
|
||||
// wait a bit so that the client can read
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
||||
fn in_port_range(session: Stream, lower: u16, upper: u16) -> bool {
|
||||
let digest = session.get_socket_digest();
|
||||
let local_addr = digest
|
||||
.as_ref()
|
||||
.and_then(|s| s.local_addr())
|
||||
.unwrap()
|
||||
.as_inet()
|
||||
.unwrap();
|
||||
|
||||
// assert range
|
||||
local_addr.port() >= lower && local_addr.port() <= upper
|
||||
}
|
||||
|
||||
tokio::spawn(async {
|
||||
mock_inet_connect_server().await;
|
||||
});
|
||||
// wait for the server to start
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
|
||||
// need to read /proc/sys/net/ipv4/ip_local_port_range for this test to work
|
||||
// IP_LOCAL_PORT_RANGE clamp only works on ports in /proc/sys/net/ipv4/ip_local_port_range
|
||||
let (low, _) = get_ip_local_port_range();
|
||||
let high = low + 1;
|
||||
|
||||
let peer = HttpPeer::new("127.0.0.1:10020".to_string(), false, "".to_string());
|
||||
let mut bind_to = BindTo {
|
||||
addr: "127.0.0.1:0".parse().ok(),
|
||||
..Default::default()
|
||||
};
|
||||
bind_to.set_port_range(Some((low, high))).unwrap();
|
||||
|
||||
let session1 = connect(&peer, Some(bind_to.clone())).await.unwrap();
|
||||
assert!(in_port_range(session1, low, high));
|
||||
|
||||
// execute more connect()
|
||||
let session2 = connect(&peer, Some(bind_to.clone())).await.unwrap();
|
||||
assert!(in_port_range(session2, low, high));
|
||||
let session3 = connect(&peer, Some(bind_to.clone())).await.unwrap();
|
||||
assert!(in_port_range(session3, low, high));
|
||||
|
||||
// disabled fallback, should be AddrNotAvailable error
|
||||
let err = connect(&peer, Some(bind_to.clone())).await.unwrap_err();
|
||||
assert_eq!(err.etype(), &InternalError);
|
||||
|
||||
// enable fallback, assert not in port range but successful
|
||||
bind_to.set_fallback(true);
|
||||
let session4 = connect(&peer, Some(bind_to.clone())).await.unwrap();
|
||||
assert!(!in_port_range(session4, low, high));
|
||||
|
||||
// works without bind IP, shift up to use new ports
|
||||
let low = low + 2;
|
||||
let high = low + 1;
|
||||
let mut bind_to = BindTo::default();
|
||||
bind_to.set_port_range(Some((low, high))).unwrap();
|
||||
let session5 = connect(&peer, Some(bind_to.clone())).await.unwrap();
|
||||
assert!(in_port_range(session5, low, high));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bind_to_port_ranges() {
|
||||
let addr = "127.0.0.1:0".parse().ok();
|
||||
let mut bind_to = BindTo {
|
||||
addr,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// None because the previous value was None
|
||||
bind_to.set_port_range(None).unwrap();
|
||||
assert!(bind_to.port_range.is_none());
|
||||
|
||||
// zeroes are handled
|
||||
bind_to.set_port_range(Some((0, 0))).unwrap();
|
||||
assert_eq!(bind_to.port_range, Some((0, 0)));
|
||||
|
||||
// zeroes because the previous value was Some
|
||||
bind_to.set_port_range(None).unwrap();
|
||||
assert_eq!(bind_to.port_range, Some((0, 0)));
|
||||
|
||||
// low > high is error
|
||||
assert!(bind_to.set_port_range(Some((2000, 1000))).is_err());
|
||||
|
||||
// low < high success
|
||||
bind_to.set_port_range(Some((1000, 2000))).unwrap();
|
||||
assert_eq!(bind_to.port_range, Some((1000, 2000)));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,8 +24,8 @@ use crate::server::configuration::ServerConf;
|
|||
use crate::tls::ssl::SslConnector;
|
||||
use crate::upstreams::peer::{Peer, ALPN};
|
||||
|
||||
use l4::connect as l4_connect;
|
||||
pub use l4::Connect as L4Connect;
|
||||
use l4::{connect as l4_connect, BindTo};
|
||||
use log::{debug, error, warn};
|
||||
use offload::OffloadRuntime;
|
||||
use parking_lot::RwLock;
|
||||
|
@ -273,7 +273,7 @@ impl TransportConnector {
|
|||
// connection timeout if there is one
|
||||
async fn do_connect<P: Peer + Send + Sync>(
|
||||
peer: &P,
|
||||
bind_to: Option<SocketAddr>,
|
||||
bind_to: Option<BindTo>,
|
||||
alpn_override: Option<ALPN>,
|
||||
tls_ctx: &SslConnector,
|
||||
) -> Result<Stream> {
|
||||
|
@ -296,7 +296,7 @@ async fn do_connect<P: Peer + Send + Sync>(
|
|||
// Perform the actual L4 and tls connection steps with no timeout
|
||||
async fn do_connect_inner<P: Peer + Send + Sync>(
|
||||
peer: &P,
|
||||
bind_to: Option<SocketAddr>,
|
||||
bind_to: Option<BindTo>,
|
||||
alpn_override: Option<ALPN>,
|
||||
tls_ctx: &SslConnector,
|
||||
) -> Result<Stream> {
|
||||
|
|
|
@ -27,6 +27,8 @@ use std::os::unix::io::{AsRawFd, RawFd};
|
|||
use std::time::Duration;
|
||||
use tokio::net::{TcpSocket, TcpStream, UnixStream};
|
||||
|
||||
use crate::connectors::l4::BindTo;
|
||||
|
||||
/// The (copy of) the kernel struct tcp_info returns
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
@ -160,9 +162,12 @@ fn cvt_linux_error(t: i32) -> io::Result<i32> {
|
|||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn ip_bind_addr_no_port(fd: RawFd, val: bool) -> io::Result<()> {
|
||||
const IP_BIND_ADDRESS_NO_PORT: i32 = 24;
|
||||
|
||||
set_opt(fd, libc::IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, val as c_int)
|
||||
set_opt(
|
||||
fd,
|
||||
libc::IPPROTO_IP,
|
||||
libc::IP_BIND_ADDRESS_NO_PORT,
|
||||
val as c_int,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
|
@ -170,6 +175,26 @@ fn ip_bind_addr_no_port(_fd: RawFd, _val: bool) -> io::Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// IP_LOCAL_PORT_RANGE is only supported on Linux 6.3 and higher,
|
||||
/// ip_local_port_range() is a no-op on unsupported versions.
|
||||
/// See the [man page](https://man7.org/linux/man-pages/man7/ip.7.html) for more details.
|
||||
#[cfg(target_os = "linux")]
|
||||
fn ip_local_port_range(fd: RawFd, low: u16, high: u16) -> io::Result<()> {
|
||||
const IP_LOCAL_PORT_RANGE: i32 = 51;
|
||||
let range: u32 = (low as u32) | ((high as u32) << 16);
|
||||
|
||||
let result = set_opt(fd, libc::IPPROTO_IP, IP_LOCAL_PORT_RANGE, range as c_int);
|
||||
match result {
|
||||
Err(e) if e.raw_os_error() != Some(libc::ENOPROTOOPT) => Err(e),
|
||||
_ => Ok(()), // no error or ENOPROTOOPT
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn ip_local_port_range(_fd: RawFd, _low: u16, _high: u16) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn set_so_keepalive(fd: RawFd, val: bool) -> io::Result<()> {
|
||||
set_opt(fd, libc::SOL_SOCKET, libc::SO_KEEPALIVE, val as c_int)
|
||||
|
@ -310,14 +335,42 @@ pub fn get_socket_cookie(_fd: RawFd) -> io::Result<u64> {
|
|||
Ok(0) // SO_COOKIE is a Linux concept
|
||||
}
|
||||
|
||||
/// connect() to the given address while optionally binding to the specific source address.
|
||||
/// connect() to the given address while optionally binding to the specific source address and port range.
|
||||
///
|
||||
/// The `set_socket` callback can be used to tune the socket before `connect()` is called.
|
||||
///
|
||||
/// If a [`BindTo`] is set with a port range and fallback setting enabled this function will retry
|
||||
/// on EADDRNOTAVAIL ignoring the port range.
|
||||
///
|
||||
/// `IP_BIND_ADDRESS_NO_PORT` is used.
|
||||
pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
|
||||
/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
|
||||
pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()> + Clone>(
|
||||
addr: &SocketAddr,
|
||||
bind_to: Option<&SocketAddr>,
|
||||
bind_to: Option<&BindTo>,
|
||||
set_socket: F,
|
||||
) -> Result<TcpStream> {
|
||||
if bind_to.as_ref().map_or(false, |b| b.will_fallback()) {
|
||||
// if we see an EADDRNOTAVAIL error clear the port range and try again
|
||||
let connect_result = inner_connect_with(addr, bind_to, set_socket.clone()).await;
|
||||
if let Err(e) = connect_result.as_ref() {
|
||||
if matches!(e.etype(), BindError) {
|
||||
let mut new_bind_to = BindTo::default();
|
||||
new_bind_to.addr = bind_to.as_ref().and_then(|b| b.addr);
|
||||
// reset the port range
|
||||
new_bind_to.set_port_range(None).unwrap();
|
||||
return inner_connect_with(addr, Some(&new_bind_to), set_socket).await;
|
||||
}
|
||||
}
|
||||
connect_result
|
||||
} else {
|
||||
// not retryable
|
||||
inner_connect_with(addr, bind_to, set_socket).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn inner_connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
|
||||
addr: &SocketAddr,
|
||||
bind_to: Option<&BindTo>,
|
||||
set_socket: F,
|
||||
) -> Result<TcpStream> {
|
||||
let socket = if addr.is_ipv4() {
|
||||
|
@ -328,14 +381,23 @@ pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
|
|||
.or_err(SocketError, "failed to create socket")?;
|
||||
|
||||
if cfg!(target_os = "linux") {
|
||||
ip_bind_addr_no_port(socket.as_raw_fd(), true)
|
||||
.or_err(SocketError, "failed to set socket opts")?;
|
||||
ip_bind_addr_no_port(socket.as_raw_fd(), true).or_err(
|
||||
SocketError,
|
||||
"failed to set socket opts IP_BIND_ADDRESS_NO_PORT",
|
||||
)?;
|
||||
|
||||
if let Some(baddr) = bind_to {
|
||||
if let Some(bind_to) = bind_to {
|
||||
if let Some((low, high)) = bind_to.port_range() {
|
||||
ip_local_port_range(socket.as_raw_fd(), low, high)
|
||||
.or_err(SocketError, "failed to set socket opts IP_LOCAL_PORT_RANGE")?;
|
||||
}
|
||||
|
||||
if let Some(baddr) = bind_to.addr {
|
||||
socket
|
||||
.bind(*baddr)
|
||||
.or_err_with(BindError, || format!("failed to bind to socket {}", *baddr))?;
|
||||
};
|
||||
.bind(baddr)
|
||||
.or_err_with(BindError, || format!("failed to bind to socket {}", baddr))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: add support for bind on other platforms
|
||||
|
||||
|
@ -349,8 +411,9 @@ pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
|
|||
|
||||
/// connect() to the given address while optionally binding to the specific source address.
|
||||
///
|
||||
/// `IP_BIND_ADDRESS_NO_PORT` is used.
|
||||
pub async fn connect(addr: &SocketAddr, bind_to: Option<&SocketAddr>) -> Result<TcpStream> {
|
||||
/// `IP_BIND_ADDRESS_NO_PORT` is used
|
||||
/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
|
||||
pub async fn connect(addr: &SocketAddr, bind_to: Option<&BindTo>) -> Result<TcpStream> {
|
||||
connect_with(addr, bind_to, |_| Ok(())).await
|
||||
}
|
||||
|
||||
|
@ -365,7 +428,8 @@ fn wrap_os_connect_error(e: std::io::Error, context: String) -> Box<Error> {
|
|||
match e.kind() {
|
||||
ErrorKind::ConnectionRefused => Error::because(ConnectRefused, context, e),
|
||||
ErrorKind::TimedOut => Error::because(ConnectTimedout, context, e),
|
||||
ErrorKind::PermissionDenied | ErrorKind::AddrInUse | ErrorKind::AddrNotAvailable => {
|
||||
ErrorKind::AddrNotAvailable => Error::because(BindError, context, e),
|
||||
ErrorKind::PermissionDenied | ErrorKind::AddrInUse => {
|
||||
Error::because(InternalError, context, e)
|
||||
}
|
||||
_ => match e.raw_os_error() {
|
||||
|
|
|
@ -29,7 +29,7 @@ use std::path::{Path, PathBuf};
|
|||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::connectors::L4Connect;
|
||||
use crate::connectors::{l4::BindTo, L4Connect};
|
||||
use crate::protocols::l4::socket::SocketAddr;
|
||||
use crate::protocols::ConnFdReusable;
|
||||
use crate::protocols::TcpKeepalive;
|
||||
|
@ -110,8 +110,8 @@ pub trait Peer: Display + Clone {
|
|||
None => None,
|
||||
}
|
||||
}
|
||||
/// Which local source address this connection should be bind to.
|
||||
fn bind_to(&self) -> Option<&InetSocketAddr> {
|
||||
/// Information about the local source address this connection should be bound to.
|
||||
fn bind_to(&self) -> Option<&BindTo> {
|
||||
match self.get_peer_options() {
|
||||
Some(opt) => opt.bind_to.as_ref(),
|
||||
None => None,
|
||||
|
@ -243,7 +243,7 @@ impl Peer for BasicPeer {
|
|||
!self.sni.is_empty()
|
||||
}
|
||||
|
||||
fn bind_to(&self) -> Option<&InetSocketAddr> {
|
||||
fn bind_to(&self) -> Option<&BindTo> {
|
||||
None
|
||||
}
|
||||
|
||||
|
@ -294,7 +294,7 @@ impl Scheme {
|
|||
/// See [`Peer`] for the meaning of the fields
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PeerOptions {
|
||||
pub bind_to: Option<InetSocketAddr>,
|
||||
pub bind_to: Option<BindTo>,
|
||||
pub connection_timeout: Option<Duration>,
|
||||
pub total_connection_timeout: Option<Duration>,
|
||||
pub read_timeout: Option<Duration>,
|
||||
|
@ -365,7 +365,7 @@ impl PeerOptions {
|
|||
|
||||
impl Display for PeerOptions {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
||||
if let Some(b) = self.bind_to {
|
||||
if let Some(b) = self.bind_to.as_ref() {
|
||||
write!(f, "bind_to: {:?},", b)?;
|
||||
}
|
||||
if let Some(t) = self.connection_timeout {
|
||||
|
|
Loading…
Reference in a new issue