mirror of
https://github.com/cloudflare/pingora.git
synced 2024-09-20 02:31:35 +02:00
Add cache_not_modified_filter, handle etag/last-modified
This filter allows customizing the `ETag`/`Last-Modified` check to see if the proxy cache can return 304 Not Modified instead of the full response. Also flesh out the `ETag`/`Last-Modified` handling in the base `not_modified_filter` with `If-None-Match` and `If-Modified-Since`.
This commit is contained in:
parent
acee67f870
commit
3f682ea37f
9 changed files with 511 additions and 107 deletions
2
.bleep
2
.bleep
|
@ -1 +1 @@
|
||||||
d37f942d73e5b2921f026cf6e2b1f93a216d895b
|
f20e1a069e9b58d1472763f6b818d7962bee3a3f
|
|
@ -65,6 +65,7 @@ brotli = "3"
|
||||||
openssl-probe = "0.1"
|
openssl-probe = "0.1"
|
||||||
tokio-test = "0.4"
|
tokio-test = "0.4"
|
||||||
zstd = "0"
|
zstd = "0"
|
||||||
|
httpdate = "1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
matches = "0.1"
|
matches = "0.1"
|
||||||
|
|
322
pingora-core/src/protocols/http/conditional_filter.rs
Normal file
322
pingora-core/src/protocols/http/conditional_filter.rs
Normal file
|
@ -0,0 +1,322 @@
|
||||||
|
// Copyright 2024 Cloudflare, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//! Conditional filter (not modified) utilities
|
||||||
|
|
||||||
|
use http::{header::*, StatusCode};
|
||||||
|
use httpdate::{parse_http_date, HttpDate};
|
||||||
|
use pingora_error::{ErrorType::InvalidHTTPHeader, OrErr, Result};
|
||||||
|
use pingora_http::{RequestHeader, ResponseHeader};
|
||||||
|
|
||||||
|
/// Evaluates conditional headers according to the [RFC](https://datatracker.ietf.org/doc/html/rfc9111#name-handling-a-received-validat).
|
||||||
|
///
|
||||||
|
/// Returns true if the request should receive 304 Not Modified.
|
||||||
|
pub fn not_modified_filter(req: &RequestHeader, resp: &ResponseHeader) -> bool {
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc9110#name-304-not-modified
|
||||||
|
// 304 can only validate 200
|
||||||
|
if resp.status != StatusCode::OK {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Evulation of conditional headers, based on RFC:
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc9111#name-handling-a-received-validat
|
||||||
|
|
||||||
|
// TODO: If-Match and If-Unmodified-Since, and returning 412 Precondition Failed
|
||||||
|
// Note that this function is currently used only for proxy cache,
|
||||||
|
// and the current RFCs have some conflicting opinions as to whether
|
||||||
|
// If-Match and If-Unmodified-Since can be used. https://github.com/httpwg/http-core/issues/1111
|
||||||
|
|
||||||
|
// Conditional request precedence:
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc9110#name-precedence-of-preconditions
|
||||||
|
// If-None-Match should be handled before If-Modified-Since.
|
||||||
|
// XXX: In nginx, IMS is actually checked first, which may cause compatibility issues
|
||||||
|
// for certain origins/clients.
|
||||||
|
|
||||||
|
if req.headers.contains_key(IF_NONE_MATCH) {
|
||||||
|
if let Some(etag) = resp.headers.get(ETAG) {
|
||||||
|
for inm in req.headers.get_all(IF_NONE_MATCH) {
|
||||||
|
if weak_validate_etag(inm.as_bytes(), etag.as_bytes()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc9110#field.if-modified-since
|
||||||
|
// "MUST ignore If-Modified-Since if the request contains an If-None-Match header"
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GET/HEAD only https://datatracker.ietf.org/doc/html/rfc9110#field.if-modified-since
|
||||||
|
if matches!(req.method, http::Method::GET | http::Method::HEAD) {
|
||||||
|
if let Ok(Some(if_modified_since)) = req_header_as_http_date(req, &IF_MODIFIED_SINCE) {
|
||||||
|
if let Ok(Some(last_modified)) = resp_header_as_http_date(resp, &LAST_MODIFIED) {
|
||||||
|
if if_modified_since >= last_modified {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim ASCII whitespace bytes from the start of the slice.
|
||||||
|
// This is pretty much copied from the nightly API.
|
||||||
|
// TODO: use `trim_ascii_start` when it stabilizes https://doc.rust-lang.org/std/primitive.slice.html#method.trim_ascii_start
|
||||||
|
fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
|
||||||
|
while let [first, rest @ ..] = bytes {
|
||||||
|
if first.is_ascii_whitespace() {
|
||||||
|
bytes = rest;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search for an ETag matching `target_etag` from the input header, using
|
||||||
|
/// [weak comparison](https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.3.2).
|
||||||
|
/// Multiple ETags can exist in the header as a comma-separated list.
|
||||||
|
///
|
||||||
|
/// Returns true if a matching ETag exists.
|
||||||
|
pub fn weak_validate_etag(input_etag_header: &[u8], target_etag: &[u8]) -> bool {
|
||||||
|
// ETag comparison: https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.3.2
|
||||||
|
fn strip_weak_prefix(etag: &[u8]) -> &[u8] {
|
||||||
|
// Weak ETags are prefaced with `W/`
|
||||||
|
etag.strip_prefix(b"W/").unwrap_or(etag)
|
||||||
|
}
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2 unsafe method only
|
||||||
|
if input_etag_header == b"*" {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The RFC defines ETags here: https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.3
|
||||||
|
// The RFC requires ETags to be wrapped in double quotes, though some legacy origins or clients
|
||||||
|
// don't adhere to this.
|
||||||
|
// Unfortunately by allowing non-quoted etags, parsing becomes a little more complicated.
|
||||||
|
//
|
||||||
|
// This implementation uses nginx's algorithm for parsing ETags, which can handle both quoted
|
||||||
|
// and non-quoted ETags. It essentially does a substring comparison at each comma divider,
|
||||||
|
// searching for an exact match of the ETag (optional double quotes included) followed by
|
||||||
|
// either EOF or another comma.
|
||||||
|
//
|
||||||
|
// Clients and upstreams should still ideally adhere to quoted ETags to disambiguate
|
||||||
|
// situations where commas are contained within the ETag (allowed by the RFC).
|
||||||
|
// XXX: This nginx algorithm will handle matching against ETags with commas correctly, but only
|
||||||
|
// if the target ETag is a quoted RFC-compliant ETag.
|
||||||
|
//
|
||||||
|
// For example, consider an if-none-match header: `"xyzzy,xyz,x,y", "xyzzy"`.
|
||||||
|
// If the target ETag is double quoted as mandated by the RFC like `"xyz,x"`, this algorithm
|
||||||
|
// will correctly report no matching ETag.
|
||||||
|
// But if the target ETag is not double quoted like `xyz,x`, it will "incorrectly" match
|
||||||
|
// against the substring after the first comma inside the first quoted ETag.
|
||||||
|
|
||||||
|
// Search for the target at each comma delimiter
|
||||||
|
let target_etag = strip_weak_prefix(target_etag);
|
||||||
|
let mut remaining = strip_weak_prefix(input_etag_header);
|
||||||
|
while let Some(search_slice) = remaining.get(0..target_etag.len()) {
|
||||||
|
if search_slice == target_etag {
|
||||||
|
remaining = &remaining[target_etag.len()..];
|
||||||
|
// check if there's any content after the matched substring
|
||||||
|
// skip any whitespace
|
||||||
|
remaining = trim_ascii_start(remaining);
|
||||||
|
if matches!(remaining.first(), None | Some(b',')) {
|
||||||
|
// we are either at the end of the header, or at a comma delimiter
|
||||||
|
// which means this is a match
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// find the next delimiter (ignore any remaining part of the non-matching etag)
|
||||||
|
let Some(next_delimiter_pos) = remaining.iter().position(|&b| b == b',') else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
remaining = &remaining[next_delimiter_pos..];
|
||||||
|
// find the next etag slice to compare
|
||||||
|
// ignore extraneous delimiters and whitespace
|
||||||
|
let Some(next_etag_pos) = remaining
|
||||||
|
.iter()
|
||||||
|
.position(|&b| !b.is_ascii_whitespace() && b != b',')
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
remaining = &remaining[next_etag_pos..];
|
||||||
|
|
||||||
|
remaining = strip_weak_prefix(remaining);
|
||||||
|
}
|
||||||
|
// remaining length < target etag length
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Utility function to parse an HTTP request header as an [HTTP-date](https://datatracker.ietf.org/doc/html/rfc9110#name-date-time-formats).
|
||||||
|
pub fn req_header_as_http_date<H>(req: &RequestHeader, header_name: H) -> Result<Option<HttpDate>>
|
||||||
|
where
|
||||||
|
H: AsHeaderName,
|
||||||
|
{
|
||||||
|
let Some(header_value) = req.headers.get(header_name) else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
Ok(Some(parse_bytes_as_http_date(header_value.as_bytes())?))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Utility function to parse an HTTP response header as an [HTTP-date](https://datatracker.ietf.org/doc/html/rfc9110#name-date-time-formats).
|
||||||
|
pub fn resp_header_as_http_date<H>(
|
||||||
|
resp: &ResponseHeader,
|
||||||
|
header_name: H,
|
||||||
|
) -> Result<Option<HttpDate>>
|
||||||
|
where
|
||||||
|
H: AsHeaderName,
|
||||||
|
{
|
||||||
|
let Some(header_value) = resp.headers.get(header_name) else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
Ok(Some(parse_bytes_as_http_date(header_value.as_bytes())?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_bytes_as_http_date(bytes: &[u8]) -> Result<HttpDate> {
|
||||||
|
let input_time = std::str::from_utf8(bytes).explain_err(InvalidHTTPHeader, |_| {
|
||||||
|
"HTTP date has unsupported characters (bytes outside of UTF-8)"
|
||||||
|
})?;
|
||||||
|
Ok(parse_http_date(input_time)
|
||||||
|
.or_err(InvalidHTTPHeader, "Invalid HTTP date")?
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Utility function to convert the input response header to a 304 Not Modified response.
|
||||||
|
pub fn to_304(resp: &mut ResponseHeader) {
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc9110#name-304-not-modified
|
||||||
|
// XXX: https://datatracker.ietf.org/doc/html/rfc9110#name-content-length
|
||||||
|
// "A server may send content-length in 304", but no common web server does it
|
||||||
|
// So we drop both content-length and content-type for consistency/less surprise
|
||||||
|
resp.set_status(StatusCode::NOT_MODIFIED).unwrap();
|
||||||
|
resp.remove_header(&CONTENT_LENGTH);
|
||||||
|
resp.remove_header(&CONTENT_TYPE);
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc9110#section-15.4.5-4
|
||||||
|
// "SHOULD NOT generate representation metadata other than the above listed fields
|
||||||
|
// unless said metadata exists for the purpose of guiding cache updates"
|
||||||
|
// Remove some more representation metadata headers
|
||||||
|
resp.remove_header(&TRANSFER_ENCODING);
|
||||||
|
// note that the following are also stripped by nginx
|
||||||
|
resp.remove_header(&CONTENT_ENCODING);
|
||||||
|
resp.remove_header(&ACCEPT_RANGES);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_if_modified_since() {
|
||||||
|
fn build_req(if_modified_since: &[u8]) -> RequestHeader {
|
||||||
|
let mut req = RequestHeader::build("GET", b"/", None).unwrap();
|
||||||
|
req.insert_header("If-Modified-Since", if_modified_since)
|
||||||
|
.unwrap();
|
||||||
|
req
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_resp(last_modified: &[u8]) -> ResponseHeader {
|
||||||
|
let mut resp = ResponseHeader::build(200, None).unwrap();
|
||||||
|
resp.insert_header("Last-Modified", last_modified).unwrap();
|
||||||
|
resp
|
||||||
|
}
|
||||||
|
|
||||||
|
// same date
|
||||||
|
let last_modified = b"Fri, 26 Mar 2010 00:05:00 GMT";
|
||||||
|
let req = build_req(b"Fri, 26 Mar 2010 00:05:00 GMT");
|
||||||
|
let resp = build_resp(last_modified);
|
||||||
|
assert!(not_modified_filter(&req, &resp));
|
||||||
|
|
||||||
|
// before
|
||||||
|
let req = build_req(b"Fri, 26 Mar 2010 00:03:00 GMT");
|
||||||
|
let resp = build_resp(last_modified);
|
||||||
|
assert!(!not_modified_filter(&req, &resp));
|
||||||
|
|
||||||
|
// after
|
||||||
|
let req = build_req(b"Sun, 28 Mar 2010 01:07:00 GMT");
|
||||||
|
let resp = build_resp(last_modified);
|
||||||
|
assert!(not_modified_filter(&req, &resp));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_weak_validate_etag() {
|
||||||
|
let target_weak_etag = br#"W/"xyzzy""#;
|
||||||
|
let target_etag = br#""xyzzy""#;
|
||||||
|
assert!(weak_validate_etag(b"*", target_weak_etag));
|
||||||
|
assert!(weak_validate_etag(b"*", target_etag));
|
||||||
|
|
||||||
|
assert!(weak_validate_etag(target_etag, target_etag));
|
||||||
|
assert!(weak_validate_etag(target_etag, target_weak_etag));
|
||||||
|
assert!(weak_validate_etag(target_weak_etag, target_etag));
|
||||||
|
assert!(weak_validate_etag(target_weak_etag, target_weak_etag));
|
||||||
|
|
||||||
|
let mismatch_weak_etag = br#"W/"abc""#;
|
||||||
|
let mismatch_etag = br#""abc""#;
|
||||||
|
assert!(!weak_validate_etag(mismatch_etag, target_etag));
|
||||||
|
assert!(!weak_validate_etag(mismatch_etag, target_weak_etag));
|
||||||
|
assert!(!weak_validate_etag(mismatch_weak_etag, target_etag));
|
||||||
|
assert!(!weak_validate_etag(mismatch_weak_etag, target_weak_etag));
|
||||||
|
|
||||||
|
let multiple_etags = br#"a, "xyzzy","r2d2xxxx", "c3piozzzz",zzzfoo"#;
|
||||||
|
assert!(weak_validate_etag(multiple_etags, target_etag));
|
||||||
|
assert!(weak_validate_etag(multiple_etags, target_weak_etag));
|
||||||
|
|
||||||
|
let multiple_mismatch_etags = br#"foobar", "r2d2xxxx", "c3piozzzz",zzzfoo"#;
|
||||||
|
assert!(!weak_validate_etag(multiple_mismatch_etags, target_etag));
|
||||||
|
assert!(!weak_validate_etag(
|
||||||
|
multiple_mismatch_etags,
|
||||||
|
target_weak_etag
|
||||||
|
));
|
||||||
|
|
||||||
|
let multiple_mismatch_etags =
|
||||||
|
br#"foobar", "r2d2xxxxyzzy", "c3piozzzz",zzzfoo, "xyzzy,xyzzy""#;
|
||||||
|
assert!(!weak_validate_etag(multiple_mismatch_etags, target_etag));
|
||||||
|
assert!(!weak_validate_etag(
|
||||||
|
multiple_mismatch_etags,
|
||||||
|
target_weak_etag
|
||||||
|
));
|
||||||
|
|
||||||
|
let target_comma_etag = br#"",,,""#;
|
||||||
|
let multiple_mismatch_etags = br#",", ",,,,", ,,,,,,,,",,",",,,,,,""#;
|
||||||
|
assert!(!weak_validate_etag(
|
||||||
|
multiple_mismatch_etags,
|
||||||
|
target_comma_etag
|
||||||
|
));
|
||||||
|
let multiple_etags = br#",", ",,,,", ,,,,,,,,",,,",",,,,,,""#;
|
||||||
|
assert!(weak_validate_etag(multiple_etags, target_comma_etag));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_weak_validate_etag_unquoted() {
|
||||||
|
// legacy unquoted etag
|
||||||
|
let target_unquoted = b"xyzzy";
|
||||||
|
assert!(weak_validate_etag(b"*", target_unquoted));
|
||||||
|
|
||||||
|
let strong_etag = br#""xyzzy""#;
|
||||||
|
assert!(!weak_validate_etag(strong_etag, target_unquoted));
|
||||||
|
assert!(!weak_validate_etag(target_unquoted, strong_etag));
|
||||||
|
|
||||||
|
let multiple_etags = br#"a, "r2d2xxxx", "c3piozzzz", xyzzy"#;
|
||||||
|
assert!(weak_validate_etag(multiple_etags, target_unquoted));
|
||||||
|
|
||||||
|
let multiple_mismatch_etags =
|
||||||
|
br#"foobar", "r2d2xxxxyzzy", "c3piozzzz",zzzfoo, "xyzzy,xyzzy""#;
|
||||||
|
assert!(!weak_validate_etag(
|
||||||
|
multiple_mismatch_etags,
|
||||||
|
target_unquoted
|
||||||
|
));
|
||||||
|
|
||||||
|
// in certain edge cases where commas are used alongside quoted ETags,
|
||||||
|
// the test can fail if target is unquoted (the last ETag is intended to be one ETag)
|
||||||
|
let multiple_mismatch_etags =
|
||||||
|
br#"foobar", "r2d2xxxxyzzy", "c3piozzzz",zzzfoo, "xyzzy,xyzzy,xy""#;
|
||||||
|
assert!(weak_validate_etag(multiple_mismatch_etags, target_unquoted));
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,6 +17,7 @@
|
||||||
mod body_buffer;
|
mod body_buffer;
|
||||||
pub mod client;
|
pub mod client;
|
||||||
pub mod compression;
|
pub mod compression;
|
||||||
|
pub mod conditional_filter;
|
||||||
pub(crate) mod date;
|
pub(crate) mod date;
|
||||||
pub mod error_resp;
|
pub mod error_resp;
|
||||||
pub mod server;
|
pub mod server;
|
||||||
|
|
|
@ -18,6 +18,7 @@ use pingora_cache::key::CacheHashKey;
|
||||||
use pingora_cache::lock::LockStatus;
|
use pingora_cache::lock::LockStatus;
|
||||||
use pingora_cache::max_file_size::ERR_RESPONSE_TOO_LARGE;
|
use pingora_cache::max_file_size::ERR_RESPONSE_TOO_LARGE;
|
||||||
use pingora_cache::{HitStatus, RespCacheable::*};
|
use pingora_cache::{HitStatus, RespCacheable::*};
|
||||||
|
use pingora_core::protocols::http::conditional_filter::to_304;
|
||||||
use pingora_core::protocols::http::v1::common::header_value_content_length;
|
use pingora_core::protocols::http::v1::common::header_value_content_length;
|
||||||
use pingora_core::ErrorType;
|
use pingora_core::ErrorType;
|
||||||
|
|
||||||
|
@ -256,8 +257,22 @@ impl<SV> HttpProxy<SV> {
|
||||||
|
|
||||||
let req = session.req_header();
|
let req = session.req_header();
|
||||||
|
|
||||||
let header_only = conditional_filter::not_modified_filter(req, &mut header)
|
let not_modified = match self.inner.cache_not_modified_filter(session, &header, ctx) {
|
||||||
|| req.method == http::method::Method::HEAD;
|
Ok(not_modified) => not_modified,
|
||||||
|
Err(e) => {
|
||||||
|
// fail open if cache_not_modified_filter errors,
|
||||||
|
// just return the whole original response
|
||||||
|
warn!(
|
||||||
|
"Failed to run cache not modified filter: {e}, {}",
|
||||||
|
self.inner.request_summary(session, ctx)
|
||||||
|
);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if not_modified {
|
||||||
|
to_304(&mut header);
|
||||||
|
}
|
||||||
|
let header_only = not_modified || req.method == http::method::Method::HEAD;
|
||||||
|
|
||||||
// process range header if the cache storage supports seek
|
// process range header if the cache storage supports seek
|
||||||
let range_type = if seekable && !session.ignore_downstream_range {
|
let range_type = if seekable && !session.ignore_downstream_range {
|
||||||
|
@ -332,6 +347,51 @@ impl<SV> HttpProxy<SV> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Downstream revalidation, only needed when cache is on because otherwise origin
|
||||||
|
* will handle it */
|
||||||
|
pub(crate) fn downstream_response_conditional_filter(
|
||||||
|
&self,
|
||||||
|
use_cache: &mut ServeFromCache,
|
||||||
|
session: &Session,
|
||||||
|
resp: &mut ResponseHeader,
|
||||||
|
ctx: &mut SV::CTX,
|
||||||
|
) where
|
||||||
|
SV: ProxyHttp,
|
||||||
|
{
|
||||||
|
// TODO: range
|
||||||
|
let req = session.req_header();
|
||||||
|
|
||||||
|
let not_modified = match self.inner.cache_not_modified_filter(session, resp, ctx) {
|
||||||
|
Ok(not_modified) => not_modified,
|
||||||
|
Err(e) => {
|
||||||
|
// fail open if cache_not_modified_filter errors,
|
||||||
|
// just return the whole original response
|
||||||
|
warn!(
|
||||||
|
"Failed to run cache not modified filter: {e}, {}",
|
||||||
|
self.inner.request_summary(session, ctx)
|
||||||
|
);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if not_modified {
|
||||||
|
to_304(resp);
|
||||||
|
}
|
||||||
|
let header_only = not_modified || req.method == http::method::Method::HEAD;
|
||||||
|
if header_only {
|
||||||
|
if use_cache.is_on() {
|
||||||
|
// tell cache to stop after yielding header
|
||||||
|
use_cache.enable_header_only();
|
||||||
|
} else {
|
||||||
|
// headers only during cache miss, upstream should continue send
|
||||||
|
// body to cache, `session` will ignore body automatically because
|
||||||
|
// of the signature of `header` (304)
|
||||||
|
// TODO: we should drop body before/within this filter so that body
|
||||||
|
// filter only runs on data downstream sees
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: cache upstream header filter to add/remove headers
|
// TODO: cache upstream header filter to add/remove headers
|
||||||
|
|
||||||
pub(crate) async fn cache_http_task(
|
pub(crate) async fn cache_http_task(
|
||||||
|
@ -1022,72 +1082,6 @@ pub(crate) mod range_filter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://datatracker.ietf.org/doc/html/rfc7232
|
|
||||||
// Strictly speaking this module is also usable for web server, not just proxy
|
|
||||||
mod conditional_filter {
|
|
||||||
use super::*;
|
|
||||||
use http::header::*;
|
|
||||||
|
|
||||||
// return if 304 is applied to the response
|
|
||||||
pub fn not_modified_filter(req: &RequestHeader, resp: &mut ResponseHeader) -> bool {
|
|
||||||
// https://datatracker.ietf.org/doc/html/rfc7232#section-4.1
|
|
||||||
// 304 can only validate 200
|
|
||||||
if resp.status != StatusCode::OK {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: If-Match and if If-Unmodified-Since
|
|
||||||
|
|
||||||
// https://datatracker.ietf.org/doc/html/rfc7232#section-6
|
|
||||||
|
|
||||||
if let Some(inm) = req.headers.get(IF_NONE_MATCH) {
|
|
||||||
if let Some(etag) = resp.headers.get(ETAG) {
|
|
||||||
if validate_etag(inm.as_bytes(), etag.as_bytes()) {
|
|
||||||
to_304(resp);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// MUST ignore If-Modified-Since if the request contains an If-None-Match header
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: GET/HEAD only https://datatracker.ietf.org/doc/html/rfc7232#section-3.3
|
|
||||||
if let Some(since) = req.headers.get(IF_MODIFIED_SINCE) {
|
|
||||||
if let Some(last) = resp.headers.get(LAST_MODIFIED) {
|
|
||||||
if test_not_modified(since.as_bytes(), last.as_bytes()) {
|
|
||||||
to_304(resp);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
fn validate_etag(input_etag: &[u8], target_etag: &[u8]) -> bool {
|
|
||||||
// https://datatracker.ietf.org/doc/html/rfc7232#section-3.2 unsafe method only
|
|
||||||
if input_etag == b"*" {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// TODO: etag validation: https://datatracker.ietf.org/doc/html/rfc7232#section-2.3.2
|
|
||||||
input_etag == target_etag
|
|
||||||
}
|
|
||||||
|
|
||||||
fn test_not_modified(input_time: &[u8], last_modified_time: &[u8]) -> bool {
|
|
||||||
// TODO: http-date comparison: https://datatracker.ietf.org/doc/html/rfc7232#section-2.2.2
|
|
||||||
input_time == last_modified_time
|
|
||||||
}
|
|
||||||
|
|
||||||
fn to_304(resp: &mut ResponseHeader) {
|
|
||||||
// https://datatracker.ietf.org/doc/html/rfc7232#section-4.1
|
|
||||||
// XXX: https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2
|
|
||||||
// "A server may send content-length in 304", but no common web server does it
|
|
||||||
// So we drop both content-length and content-type for consistency/less surprise
|
|
||||||
resp.set_status(StatusCode::NOT_MODIFIED).unwrap();
|
|
||||||
resp.remove_header(&CONTENT_LENGTH);
|
|
||||||
resp.remove_header(&CONTENT_TYPE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// a state machine for proxy logic to tell when to use cache in the case of
|
// a state machine for proxy logic to tell when to use cache in the case of
|
||||||
// miss/revalidation/error.
|
// miss/revalidation/error.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -1190,27 +1184,3 @@ impl ServeFromCache {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Downstream revalidation, only needed when cache is on because otherwise origin
|
|
||||||
* will handle it */
|
|
||||||
pub(crate) fn downstream_response_conditional_filter(
|
|
||||||
use_cache: &mut ServeFromCache,
|
|
||||||
req: &RequestHeader,
|
|
||||||
resp: &mut ResponseHeader,
|
|
||||||
) {
|
|
||||||
// TODO: range
|
|
||||||
let header_only = conditional_filter::not_modified_filter(req, resp)
|
|
||||||
|| req.method == http::method::Method::HEAD;
|
|
||||||
if header_only {
|
|
||||||
if use_cache.is_on() {
|
|
||||||
// tell cache to stop after yielding header
|
|
||||||
use_cache.enable_header_only();
|
|
||||||
} else {
|
|
||||||
// headers only during cache miss, upstream should continue send
|
|
||||||
// body to cache, `session` will ignore body automatically because
|
|
||||||
// of the signature of `header` (304)
|
|
||||||
// TODO: we should drop body before/within this filter so that body
|
|
||||||
// filter only runs on data downstream sees
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -463,20 +463,21 @@ impl<SV> HttpProxy<SV> {
|
||||||
|
|
||||||
match task {
|
match task {
|
||||||
HttpTask::Header(mut header, end) => {
|
HttpTask::Header(mut header, end) => {
|
||||||
let req = session.req_header();
|
|
||||||
|
|
||||||
/* Downstream revalidation/range, only needed when cache is on because otherwise origin
|
/* Downstream revalidation/range, only needed when cache is on because otherwise origin
|
||||||
* will handle it */
|
* will handle it */
|
||||||
// TODO: if cache is disabled during response phase, we should still do the filter
|
// TODO: if cache is disabled during response phase, we should still do the filter
|
||||||
if session.cache.enabled() {
|
if session.cache.enabled() {
|
||||||
proxy_cache::downstream_response_conditional_filter(
|
self.downstream_response_conditional_filter(
|
||||||
serve_from_cache,
|
serve_from_cache,
|
||||||
req,
|
session,
|
||||||
&mut header,
|
&mut header,
|
||||||
|
ctx,
|
||||||
);
|
);
|
||||||
if !session.ignore_downstream_range {
|
if !session.ignore_downstream_range {
|
||||||
let range_type =
|
let range_type = proxy_cache::range_filter::range_header_filter(
|
||||||
proxy_cache::range_filter::range_header_filter(req, &mut header);
|
session.req_header(),
|
||||||
|
&mut header,
|
||||||
|
);
|
||||||
range_body_filter.set(range_type);
|
range_body_filter.set(range_type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -484,7 +485,7 @@ impl<SV> HttpProxy<SV> {
|
||||||
/* Convert HTTP 1.0 style response to chunked encoding so that we don't
|
/* Convert HTTP 1.0 style response to chunked encoding so that we don't
|
||||||
* have to close the downstream connection */
|
* have to close the downstream connection */
|
||||||
// these status codes / method cannot have body, so no need to add chunked encoding
|
// these status codes / method cannot have body, so no need to add chunked encoding
|
||||||
let no_body = req.method == http::method::Method::HEAD
|
let no_body = session.req_header().method == http::method::Method::HEAD
|
||||||
|| matches!(header.status.as_u16(), 204 | 304);
|
|| matches!(header.status.as_u16(), 204 | 304);
|
||||||
if !no_body
|
if !no_body
|
||||||
&& !header.status.is_informational()
|
&& !header.status.is_informational()
|
||||||
|
|
|
@ -418,10 +418,11 @@ impl<SV> HttpProxy<SV> {
|
||||||
* will handle it */
|
* will handle it */
|
||||||
// TODO: if cache is disabled during response phase, we should still do the filter
|
// TODO: if cache is disabled during response phase, we should still do the filter
|
||||||
if session.cache.enabled() {
|
if session.cache.enabled() {
|
||||||
proxy_cache::downstream_response_conditional_filter(
|
self.downstream_response_conditional_filter(
|
||||||
serve_from_cache,
|
serve_from_cache,
|
||||||
req,
|
session,
|
||||||
&mut header,
|
&mut header,
|
||||||
|
ctx,
|
||||||
);
|
);
|
||||||
if !session.ignore_downstream_range {
|
if !session.ignore_downstream_range {
|
||||||
let range_type =
|
let range_type =
|
||||||
|
|
|
@ -138,6 +138,29 @@ pub trait ProxyHttp {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Decide if the incoming request's condition _fails_ against the cached response.
|
||||||
|
///
|
||||||
|
/// Returning `Ok(true)` means that the response does _not_ match against the condition, and
|
||||||
|
/// that the proxy can return `304 Not Modified` downstream.
|
||||||
|
///
|
||||||
|
/// An example is a conditional GET request with `If-None-Match: "foobar"`. If the cached
|
||||||
|
/// response contains the `ETag: "foobar"`, then the condition fails, and `304 Not Modified`
|
||||||
|
/// should be returned. Else, the condition passes which means the full `200 OK` response must
|
||||||
|
/// be sent.
|
||||||
|
fn cache_not_modified_filter(
|
||||||
|
&self,
|
||||||
|
session: &Session,
|
||||||
|
resp: &ResponseHeader,
|
||||||
|
_ctx: &mut Self::CTX,
|
||||||
|
) -> Result<bool> {
|
||||||
|
Ok(
|
||||||
|
pingora_core::protocols::http::conditional_filter::not_modified_filter(
|
||||||
|
session.req_header(),
|
||||||
|
resp,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// Modify the request before it is sent to the upstream
|
/// Modify the request before it is sent to the upstream
|
||||||
///
|
///
|
||||||
/// Unlike [Self::request_filter()], this filter allows to change the request headers to send
|
/// Unlike [Self::request_filter()], this filter allows to change the request headers to send
|
||||||
|
|
|
@ -333,15 +333,16 @@ mod test_cache {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_cache_downstream_revalidation() {
|
async fn test_cache_downstream_revalidation_etag() {
|
||||||
init();
|
init();
|
||||||
let url = "http://127.0.0.1:6148/unique/test_downstream_revalidation/revalidate_now";
|
let url = "http://127.0.0.1:6148/unique/test_downstream_revalidation_etag/revalidate_now";
|
||||||
let client = reqwest::Client::new();
|
let client = reqwest::Client::new();
|
||||||
|
|
||||||
// MISS + 304
|
// MISS + 304
|
||||||
let res = client
|
let res = client
|
||||||
.get(url)
|
.get(url)
|
||||||
.header("If-None-Match", "\"abcd\"") // the fixed etag of this endpoint
|
.header("If-None-Match", "\"abcd\", \"foobar\"") // "abcd" is the fixed etag of this
|
||||||
|
// endpoint
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -354,7 +355,7 @@ mod test_cache {
|
||||||
// HIT + 304
|
// HIT + 304
|
||||||
let res = client
|
let res = client
|
||||||
.get(url)
|
.get(url)
|
||||||
.header("If-None-Match", "\"abcd\"") // the fixed etag of this endpoint
|
.header("If-None-Match", "\"abcd\", \"foobar\"")
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -366,12 +367,96 @@ mod test_cache {
|
||||||
|
|
||||||
assert_eq!(cache_miss_epoch, cache_hit_epoch);
|
assert_eq!(cache_miss_epoch, cache_hit_epoch);
|
||||||
|
|
||||||
|
// HIT + 200 (condition passed)
|
||||||
|
let res = client
|
||||||
|
.get(url)
|
||||||
|
.header("If-None-Match", "\"foobar\"")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(res.status(), StatusCode::OK);
|
||||||
|
let headers = res.headers();
|
||||||
|
let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
|
||||||
|
assert_eq!(headers["x-cache-status"], "hit");
|
||||||
|
assert_eq!(res.text().await.unwrap(), "hello world");
|
||||||
|
|
||||||
|
assert_eq!(cache_miss_epoch, cache_hit_epoch);
|
||||||
|
|
||||||
sleep(Duration::from_millis(1100)).await; // ttl is 1
|
sleep(Duration::from_millis(1100)).await; // ttl is 1
|
||||||
|
|
||||||
// revalidated + 304
|
// revalidated + 304
|
||||||
let res = client
|
let res = client
|
||||||
.get(url)
|
.get(url)
|
||||||
.header("If-None-Match", "\"abcd\"") // the fixed etag of this endpoint
|
.header("If-None-Match", "\"abcd\", \"foobar\"")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
|
||||||
|
let headers = res.headers();
|
||||||
|
let cache_expired_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
|
||||||
|
assert_eq!(headers["x-cache-status"], "revalidated");
|
||||||
|
assert_eq!(res.text().await.unwrap(), ""); // 304 no body
|
||||||
|
|
||||||
|
// still the old object
|
||||||
|
assert_eq!(cache_expired_epoch, cache_hit_epoch);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_cache_downstream_revalidation_last_modified() {
|
||||||
|
init();
|
||||||
|
let url = "http://127.0.0.1:6148/unique/test_downstream_revalidation_last_modified/revalidate_now";
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
|
||||||
|
// MISS + 304
|
||||||
|
let res = client
|
||||||
|
.get(url)
|
||||||
|
.header("If-Modified-Since", "Tue, 03 May 2022 01:04:39 GMT") // fixed last-modified of
|
||||||
|
// the endpoint
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
|
||||||
|
let headers = res.headers();
|
||||||
|
let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
|
||||||
|
assert_eq!(headers["x-cache-status"], "miss");
|
||||||
|
assert_eq!(res.text().await.unwrap(), ""); // 304 no body
|
||||||
|
|
||||||
|
// HIT + 304
|
||||||
|
let res = client
|
||||||
|
.get(url)
|
||||||
|
.header("If-Modified-Since", "Tue, 03 May 2022 01:11:39 GMT")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
|
||||||
|
let headers = res.headers();
|
||||||
|
let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
|
||||||
|
assert_eq!(headers["x-cache-status"], "hit");
|
||||||
|
assert_eq!(res.text().await.unwrap(), ""); // 304 no body
|
||||||
|
|
||||||
|
assert_eq!(cache_miss_epoch, cache_hit_epoch);
|
||||||
|
|
||||||
|
// HIT + 200 (condition passed)
|
||||||
|
let res = client
|
||||||
|
.get(url)
|
||||||
|
.header("If-Modified-Since", "Tue, 03 May 2022 00:11:39 GMT")
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(res.status(), StatusCode::OK);
|
||||||
|
let headers = res.headers();
|
||||||
|
let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
|
||||||
|
assert_eq!(headers["x-cache-status"], "hit");
|
||||||
|
assert_eq!(res.text().await.unwrap(), "hello world");
|
||||||
|
|
||||||
|
assert_eq!(cache_miss_epoch, cache_hit_epoch);
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(1100)).await; // ttl is 1
|
||||||
|
|
||||||
|
// revalidated + 304
|
||||||
|
let res = client
|
||||||
|
.get(url)
|
||||||
|
.header("If-Modified-Since", "Tue, 03 May 2022 01:11:39 GMT")
|
||||||
.send()
|
.send()
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
Loading…
Reference in a new issue