Add support for binding to local port ranges and retrying on EADDRNOTAVAIL

This commit is contained in:
Andrew Hauck 2024-08-20 10:44:20 -07:00 committed by Edward Wang
parent acffb8aaf2
commit d1d7a87b76
5 changed files with 285 additions and 37 deletions

2
.bleep
View file

@ -1 +1 @@
28f94f2a402bbf66341bdac8fa670caf5b7311e9
90c70086397a4708a4dadfed6e6915ce6dc33481

View file

@ -33,8 +33,59 @@ pub trait Connect: std::fmt::Debug {
async fn connect(&self, addr: &SocketAddr) -> Result<Stream>;
}
/// Settings for binding on connect
#[derive(Clone, Debug, Default)]
pub struct BindTo {
// local ip address
pub addr: Option<InetSocketAddr>,
// port range
port_range: Option<(u16, u16)>,
// whether we fallback and try again on bind errors when a port range is set
fallback: bool,
}
impl BindTo {
/// Sets the port range we will bind to where the first item in the tuple is the lower bound
/// and the second item is the upper bound.
///
/// Note this bind option is only supported on Linux since 6.3, this is a no-op on other systems.
/// To reset the range, pass a `None` or `Some((0,0))`, more information can be found [here](https://man7.org/linux/man-pages/man7/ip.7.html)
pub fn set_port_range(&mut self, range: Option<(u16, u16)>) -> Result<()> {
if range.is_none() && self.port_range.is_none() {
// nothing to do
return Ok(());
}
match range {
// 0,0 is valid for resets
None | Some((0, 0)) => self.port_range = Some((0, 0)),
// set the port range if valid
Some((low, high)) if low > 0 && low < high => {
self.port_range = Some((low, high));
}
_ => return Error::e_explain(SocketError, "invalid port range: {range}"),
}
Ok(())
}
/// Set whether we fallback on no address available if a port range is set
pub fn set_fallback(&mut self, fallback: bool) {
self.fallback = fallback
}
/// Configured bind port range
pub fn port_range(&self) -> Option<(u16, u16)> {
self.port_range
}
/// Whether we attempt to fallback on no address available
pub fn will_fallback(&self) -> bool {
self.fallback && self.port_range.is_some()
}
}
/// Establish a connection (l4) to the given peer using its settings and an optional bind address.
pub(crate) async fn connect<P>(peer: &P, bind_to: Option<InetSocketAddr>) -> Result<Stream>
pub(crate) async fn connect<P>(peer: &P, bind_to: Option<BindTo>) -> Result<Stream>
where
P: Peer + Send + Sync,
{
@ -142,12 +193,8 @@ pub(crate) fn bind_to_random<P: Peer>(
peer: &P,
v4_list: &[InetSocketAddr],
v6_list: &[InetSocketAddr],
) -> Option<InetSocketAddr> {
let selected = peer.get_peer_options().and_then(|o| o.bind_to);
if selected.is_some() {
return selected;
}
) -> Option<BindTo> {
// helper function for randomly picking address
fn bind_to_ips(ips: &[InetSocketAddr]) -> Option<InetSocketAddr> {
match ips.len() {
0 => None,
@ -159,13 +206,31 @@ pub(crate) fn bind_to_random<P: Peer>(
}
}
match peer.address() {
let mut bind_to = peer.get_peer_options().and_then(|o| o.bind_to.clone());
if bind_to.as_ref().map(|b| b.addr).is_some() {
// already have a bind address selected
return bind_to;
}
let addr = match peer.address() {
SocketAddr::Inet(sockaddr) => match sockaddr {
InetSocketAddr::V4(_) => bind_to_ips(v4_list),
InetSocketAddr::V6(_) => bind_to_ips(v6_list),
},
SocketAddr::Unix(_) => None,
};
if addr.is_some() {
if let Some(bind_to) = bind_to.as_mut() {
bind_to.addr = addr;
} else {
bind_to = Some(BindTo {
addr,
..Default::default()
});
}
}
bind_to
}
use crate::protocols::raw_connect;
@ -238,16 +303,25 @@ mod tests {
#[tokio::test]
async fn test_conn_error_addr_not_avail() {
let peer = HttpPeer::new("127.0.0.1:121".to_string(), false, "".to_string());
let new_session = connect(&peer, Some("192.0.2.2:0".parse().unwrap())).await;
let addr = "192.0.2.2:0".parse().ok();
let bind_to = BindTo {
addr,
..Default::default()
};
let new_session = connect(&peer, Some(bind_to)).await;
assert_eq!(new_session.unwrap_err().etype(), &InternalError)
}
#[tokio::test]
async fn test_conn_error_other() {
let peer = HttpPeer::new("240.0.0.1:80".to_string(), false, "".to_string()); // non localhost
let addr = "127.0.0.1:0".parse().ok();
// create an error: cannot send from src addr: localhost to dst addr: a public IP
let new_session = connect(&peer, Some("127.0.0.1:0".parse().unwrap())).await;
let bind_to = BindTo {
addr,
..Default::default()
};
let new_session = connect(&peer, Some(bind_to)).await;
let error = new_session.unwrap_err();
// XXX: some system will allow the socket to bind and connect without error, only to timeout
assert!(error.etype() == &ConnectError || error.etype() == &ConnectTimedout)
@ -371,4 +445,114 @@ mod tests {
assert_eq!(err.etype(), &ConnectionClosed);
assert!(!err.retry());
}
#[cfg(target_os = "linux")]
#[tokio::test(flavor = "multi_thread")]
async fn test_bind_to_port_range_on_connect() {
fn get_ip_local_port_range() -> (u16, u16) {
let path = "/proc/sys/net/ipv4/ip_local_port_range";
let file = std::fs::read_to_string(path).unwrap();
let mut parts = file.split_whitespace();
(
parts.next().unwrap().parse().unwrap(),
parts.next().unwrap().parse().unwrap(),
)
}
// one-off mock server
async fn mock_inet_connect_server() {
use tokio::net::TcpListener;
let listener = TcpListener::bind("127.0.0.1:10020").await.unwrap();
if let Ok((mut stream, _addr)) = listener.accept().await {
stream.write_all(b"HTTP/1.1 200 OK\r\n\r\n").await.unwrap();
// wait a bit so that the client can read
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
}
}
fn in_port_range(session: Stream, lower: u16, upper: u16) -> bool {
let digest = session.get_socket_digest();
let local_addr = digest
.as_ref()
.and_then(|s| s.local_addr())
.unwrap()
.as_inet()
.unwrap();
// assert range
local_addr.port() >= lower && local_addr.port() <= upper
}
tokio::spawn(async {
mock_inet_connect_server().await;
});
// wait for the server to start
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
// need to read /proc/sys/net/ipv4/ip_local_port_range for this test to work
// IP_LOCAL_PORT_RANGE clamp only works on ports in /proc/sys/net/ipv4/ip_local_port_range
let (low, _) = get_ip_local_port_range();
let high = low + 1;
let peer = HttpPeer::new("127.0.0.1:10020".to_string(), false, "".to_string());
let mut bind_to = BindTo {
addr: "127.0.0.1:0".parse().ok(),
..Default::default()
};
bind_to.set_port_range(Some((low, high))).unwrap();
let session1 = connect(&peer, Some(bind_to.clone())).await.unwrap();
assert!(in_port_range(session1, low, high));
// execute more connect()
let session2 = connect(&peer, Some(bind_to.clone())).await.unwrap();
assert!(in_port_range(session2, low, high));
let session3 = connect(&peer, Some(bind_to.clone())).await.unwrap();
assert!(in_port_range(session3, low, high));
// disabled fallback, should be AddrNotAvailable error
let err = connect(&peer, Some(bind_to.clone())).await.unwrap_err();
assert_eq!(err.etype(), &InternalError);
// enable fallback, assert not in port range but successful
bind_to.set_fallback(true);
let session4 = connect(&peer, Some(bind_to.clone())).await.unwrap();
assert!(!in_port_range(session4, low, high));
// works without bind IP, shift up to use new ports
let low = low + 2;
let high = low + 1;
let mut bind_to = BindTo::default();
bind_to.set_port_range(Some((low, high))).unwrap();
let session5 = connect(&peer, Some(bind_to.clone())).await.unwrap();
assert!(in_port_range(session5, low, high));
}
#[test]
fn test_bind_to_port_ranges() {
let addr = "127.0.0.1:0".parse().ok();
let mut bind_to = BindTo {
addr,
..Default::default()
};
// None because the previous value was None
bind_to.set_port_range(None).unwrap();
assert!(bind_to.port_range.is_none());
// zeroes are handled
bind_to.set_port_range(Some((0, 0))).unwrap();
assert_eq!(bind_to.port_range, Some((0, 0)));
// zeroes because the previous value was Some
bind_to.set_port_range(None).unwrap();
assert_eq!(bind_to.port_range, Some((0, 0)));
// low > high is error
assert!(bind_to.set_port_range(Some((2000, 1000))).is_err());
// low < high success
bind_to.set_port_range(Some((1000, 2000))).unwrap();
assert_eq!(bind_to.port_range, Some((1000, 2000)));
}
}

View file

@ -24,8 +24,8 @@ use crate::server::configuration::ServerConf;
use crate::tls::ssl::SslConnector;
use crate::upstreams::peer::{Peer, ALPN};
use l4::connect as l4_connect;
pub use l4::Connect as L4Connect;
use l4::{connect as l4_connect, BindTo};
use log::{debug, error, warn};
use offload::OffloadRuntime;
use parking_lot::RwLock;
@ -273,7 +273,7 @@ impl TransportConnector {
// connection timeout if there is one
async fn do_connect<P: Peer + Send + Sync>(
peer: &P,
bind_to: Option<SocketAddr>,
bind_to: Option<BindTo>,
alpn_override: Option<ALPN>,
tls_ctx: &SslConnector,
) -> Result<Stream> {
@ -296,7 +296,7 @@ async fn do_connect<P: Peer + Send + Sync>(
// Perform the actual L4 and tls connection steps with no timeout
async fn do_connect_inner<P: Peer + Send + Sync>(
peer: &P,
bind_to: Option<SocketAddr>,
bind_to: Option<BindTo>,
alpn_override: Option<ALPN>,
tls_ctx: &SslConnector,
) -> Result<Stream> {

View file

@ -27,6 +27,8 @@ use std::os::unix::io::{AsRawFd, RawFd};
use std::time::Duration;
use tokio::net::{TcpSocket, TcpStream, UnixStream};
use crate::connectors::l4::BindTo;
/// The (copy of) the kernel struct tcp_info returns
#[repr(C)]
#[derive(Copy, Clone, Debug)]
@ -160,9 +162,12 @@ fn cvt_linux_error(t: i32) -> io::Result<i32> {
#[cfg(target_os = "linux")]
fn ip_bind_addr_no_port(fd: RawFd, val: bool) -> io::Result<()> {
const IP_BIND_ADDRESS_NO_PORT: i32 = 24;
set_opt(fd, libc::IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, val as c_int)
set_opt(
fd,
libc::IPPROTO_IP,
libc::IP_BIND_ADDRESS_NO_PORT,
val as c_int,
)
}
#[cfg(not(target_os = "linux"))]
@ -170,6 +175,26 @@ fn ip_bind_addr_no_port(_fd: RawFd, _val: bool) -> io::Result<()> {
Ok(())
}
/// IP_LOCAL_PORT_RANGE is only supported on Linux 6.3 and higher,
/// ip_local_port_range() is a no-op on unsupported versions.
/// See the [man page](https://man7.org/linux/man-pages/man7/ip.7.html) for more details.
#[cfg(target_os = "linux")]
fn ip_local_port_range(fd: RawFd, low: u16, high: u16) -> io::Result<()> {
const IP_LOCAL_PORT_RANGE: i32 = 51;
let range: u32 = (low as u32) | ((high as u32) << 16);
let result = set_opt(fd, libc::IPPROTO_IP, IP_LOCAL_PORT_RANGE, range as c_int);
match result {
Err(e) if e.raw_os_error() != Some(libc::ENOPROTOOPT) => Err(e),
_ => Ok(()), // no error or ENOPROTOOPT
}
}
#[cfg(not(target_os = "linux"))]
fn ip_local_port_range(_fd: RawFd, _low: u16, _high: u16) -> io::Result<()> {
Ok(())
}
#[cfg(target_os = "linux")]
fn set_so_keepalive(fd: RawFd, val: bool) -> io::Result<()> {
set_opt(fd, libc::SOL_SOCKET, libc::SO_KEEPALIVE, val as c_int)
@ -310,14 +335,42 @@ pub fn get_socket_cookie(_fd: RawFd) -> io::Result<u64> {
Ok(0) // SO_COOKIE is a Linux concept
}
/// connect() to the given address while optionally binding to the specific source address.
/// connect() to the given address while optionally binding to the specific source address and port range.
///
/// The `set_socket` callback can be used to tune the socket before `connect()` is called.
///
/// If a [`BindTo`] is set with a port range and fallback setting enabled this function will retry
/// on EADDRNOTAVAIL ignoring the port range.
///
/// `IP_BIND_ADDRESS_NO_PORT` is used.
pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()> + Clone>(
addr: &SocketAddr,
bind_to: Option<&SocketAddr>,
bind_to: Option<&BindTo>,
set_socket: F,
) -> Result<TcpStream> {
if bind_to.as_ref().map_or(false, |b| b.will_fallback()) {
// if we see an EADDRNOTAVAIL error clear the port range and try again
let connect_result = inner_connect_with(addr, bind_to, set_socket.clone()).await;
if let Err(e) = connect_result.as_ref() {
if matches!(e.etype(), BindError) {
let mut new_bind_to = BindTo::default();
new_bind_to.addr = bind_to.as_ref().and_then(|b| b.addr);
// reset the port range
new_bind_to.set_port_range(None).unwrap();
return inner_connect_with(addr, Some(&new_bind_to), set_socket).await;
}
}
connect_result
} else {
// not retryable
inner_connect_with(addr, bind_to, set_socket).await
}
}
async fn inner_connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
addr: &SocketAddr,
bind_to: Option<&BindTo>,
set_socket: F,
) -> Result<TcpStream> {
let socket = if addr.is_ipv4() {
@ -328,14 +381,23 @@ pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
.or_err(SocketError, "failed to create socket")?;
if cfg!(target_os = "linux") {
ip_bind_addr_no_port(socket.as_raw_fd(), true)
.or_err(SocketError, "failed to set socket opts")?;
ip_bind_addr_no_port(socket.as_raw_fd(), true).or_err(
SocketError,
"failed to set socket opts IP_BIND_ADDRESS_NO_PORT",
)?;
if let Some(baddr) = bind_to {
socket
.bind(*baddr)
.or_err_with(BindError, || format!("failed to bind to socket {}", *baddr))?;
};
if let Some(bind_to) = bind_to {
if let Some((low, high)) = bind_to.port_range() {
ip_local_port_range(socket.as_raw_fd(), low, high)
.or_err(SocketError, "failed to set socket opts IP_LOCAL_PORT_RANGE")?;
}
if let Some(baddr) = bind_to.addr {
socket
.bind(baddr)
.or_err_with(BindError, || format!("failed to bind to socket {}", baddr))?;
}
}
}
// TODO: add support for bind on other platforms
@ -349,8 +411,9 @@ pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
/// connect() to the given address while optionally binding to the specific source address.
///
/// `IP_BIND_ADDRESS_NO_PORT` is used.
pub async fn connect(addr: &SocketAddr, bind_to: Option<&SocketAddr>) -> Result<TcpStream> {
/// `IP_BIND_ADDRESS_NO_PORT` is used
/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
pub async fn connect(addr: &SocketAddr, bind_to: Option<&BindTo>) -> Result<TcpStream> {
connect_with(addr, bind_to, |_| Ok(())).await
}
@ -365,7 +428,8 @@ fn wrap_os_connect_error(e: std::io::Error, context: String) -> Box<Error> {
match e.kind() {
ErrorKind::ConnectionRefused => Error::because(ConnectRefused, context, e),
ErrorKind::TimedOut => Error::because(ConnectTimedout, context, e),
ErrorKind::PermissionDenied | ErrorKind::AddrInUse | ErrorKind::AddrNotAvailable => {
ErrorKind::AddrNotAvailable => Error::because(BindError, context, e),
ErrorKind::PermissionDenied | ErrorKind::AddrInUse => {
Error::because(InternalError, context, e)
}
_ => match e.raw_os_error() {

View file

@ -29,7 +29,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use crate::connectors::L4Connect;
use crate::connectors::{l4::BindTo, L4Connect};
use crate::protocols::l4::socket::SocketAddr;
use crate::protocols::ConnFdReusable;
use crate::protocols::TcpKeepalive;
@ -110,8 +110,8 @@ pub trait Peer: Display + Clone {
None => None,
}
}
/// Which local source address this connection should be bind to.
fn bind_to(&self) -> Option<&InetSocketAddr> {
/// Information about the local source address this connection should be bound to.
fn bind_to(&self) -> Option<&BindTo> {
match self.get_peer_options() {
Some(opt) => opt.bind_to.as_ref(),
None => None,
@ -243,7 +243,7 @@ impl Peer for BasicPeer {
!self.sni.is_empty()
}
fn bind_to(&self) -> Option<&InetSocketAddr> {
fn bind_to(&self) -> Option<&BindTo> {
None
}
@ -294,7 +294,7 @@ impl Scheme {
/// See [`Peer`] for the meaning of the fields
#[derive(Clone, Debug)]
pub struct PeerOptions {
pub bind_to: Option<InetSocketAddr>,
pub bind_to: Option<BindTo>,
pub connection_timeout: Option<Duration>,
pub total_connection_timeout: Option<Duration>,
pub read_timeout: Option<Duration>,
@ -365,7 +365,7 @@ impl PeerOptions {
impl Display for PeerOptions {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
if let Some(b) = self.bind_to {
if let Some(b) = self.bind_to.as_ref() {
write!(f, "bind_to: {:?},", b)?;
}
if let Some(t) = self.connection_timeout {