Eliminate unnecessary decode operations in node-web-streams-helpers.ts (#63427)

This PR is strictly a performance improvement. It should not change
implementation behavior in anyway.

This PR replaces `decoder.decode()` operations by operating with the
encoded `Uint8Array` instances directly. I added some utility functions
to make things a bit easier to understand.

Ideally, this change also maintains a fair amount of code readability. 

Will measure estimate performance improvement shortly.

Closes NEXT-2848
This commit is contained in:
Ethan Arrowood 2024-03-18 17:23:35 -06:00 committed by GitHub
parent 1439503b3b
commit 229cb14834
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 143 additions and 54 deletions

View file

@ -0,0 +1,21 @@
export const ENCODED_TAGS = {
// opening tags do not have the closing `>` since they can contain other attributes such as `<body className=''>`
OPENING: {
// <html
HTML: new Uint8Array([60, 104, 116, 109, 108]),
// <body
BODY: new Uint8Array([60, 98, 111, 100, 121]),
},
CLOSED: {
// </head>
HEAD: new Uint8Array([60, 47, 104, 101, 97, 100, 62]),
// </body>
BODY: new Uint8Array([60, 47, 98, 111, 100, 121, 62]),
// </html>
HTML: new Uint8Array([60, 47, 104, 116, 109, 108, 62]),
// </body></html>
BODY_AND_HTML: new Uint8Array([
60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62,
]),
},
} as const

View file

@ -2,6 +2,12 @@ import { getTracer } from '../lib/trace/tracer'
import { AppRenderSpan } from '../lib/trace/constants'
import { DetachedPromise } from '../../lib/detached-promise'
import { scheduleImmediate, atLeastOneTask } from '../../lib/scheduler'
import { ENCODED_TAGS } from './encodedTags'
import {
indexOfUint8Array,
isEquivalentUint8Arrays,
removeFromUint8Array,
} from './uint8array-helpers'
function voidCatch() {
// this catcher is designed to be used with pipeTo where we expect the underlying
@ -175,8 +181,6 @@ function createHeadInsertionTransformStream(
let inserted = false
let freezing = false
const decoder = new TextDecoder()
// We need to track if this transform saw any bytes because if it didn't
// we won't want to insert any server HTML at all
let hasBytes = false
@ -191,17 +195,25 @@ function createHeadInsertionTransformStream(
}
const insertion = await insert()
const encodedInsertion = encoder.encode(insertion)
if (inserted) {
controller.enqueue(encoder.encode(insertion))
controller.enqueue(encodedInsertion)
controller.enqueue(chunk)
freezing = true
} else {
const content = decoder.decode(chunk)
const index = content.indexOf('</head>')
// TODO (@Ethan-Arrowood): Replace the generic `indexOfUint8Array` method with something finely tuned for the subset of things actually being checked for.
const index = indexOfUint8Array(chunk, ENCODED_TAGS.CLOSED.HEAD)
if (index !== -1) {
const insertedHeadContent =
content.slice(0, index) + insertion + content.slice(index)
controller.enqueue(encoder.encode(insertedHeadContent))
const insertedHeadContent = new Uint8Array(
chunk.length + encodedInsertion.length
)
insertedHeadContent.set(chunk.slice(0, index))
insertedHeadContent.set(encodedInsertion, index)
insertedHeadContent.set(
chunk.slice(index),
index + encodedInsertion.length
)
controller.enqueue(insertedHeadContent)
freezing = true
inserted = true
}
@ -344,7 +356,7 @@ function createMoveSuffixStream(
): TransformStream<Uint8Array, Uint8Array> {
let foundSuffix = false
const decoder = new TextDecoder()
const encodedSuffix = encoder.encode(suffix)
return new TransformStream({
transform(chunk, controller) {
@ -352,29 +364,26 @@ function createMoveSuffixStream(
return controller.enqueue(chunk)
}
const buf = decoder.decode(chunk)
const index = buf.indexOf(suffix)
const index = indexOfUint8Array(chunk, encodedSuffix)
if (index > -1) {
foundSuffix = true
// If the whole chunk is the suffix, then don't write anything, it will
// be written in the flush.
if (buf.length === suffix.length) {
if (chunk.length === suffix.length) {
return
}
// Write out the part before the suffix.
const before = buf.slice(0, index)
chunk = encoder.encode(before)
controller.enqueue(chunk)
const before = chunk.slice(0, index)
controller.enqueue(before)
// In the case where the suffix is in the middle of the chunk, we need
// to split the chunk into two parts.
if (buf.length > suffix.length + index) {
if (chunk.length > suffix.length + index) {
// Write out the part after the suffix.
const after = buf.slice(index + suffix.length)
chunk = encoder.encode(after)
controller.enqueue(chunk)
const after = chunk.slice(index + suffix.length)
controller.enqueue(after)
}
} else {
controller.enqueue(chunk)
@ -383,7 +392,7 @@ function createMoveSuffixStream(
flush(controller) {
// Even if we didn't find the suffix, the HTML is not valid if we don't
// add it, so insert it at the end.
controller.enqueue(encoder.encode(suffix))
controller.enqueue(encodedSuffix)
},
})
}
@ -392,7 +401,6 @@ function createStripDocumentClosingTagsTransform(): TransformStream<
Uint8Array,
Uint8Array
> {
const decoder = new TextDecoder()
return new TransformStream({
transform(chunk, controller) {
// We rely on the assumption that chunks will never break across a code unit.
@ -400,26 +408,21 @@ function createStripDocumentClosingTagsTransform(): TransformStream<
// flush into one chunk before streaming it forward which means the chunk will represent
// a single coherent utf-8 string. This is not safe to use if we change our streaming to no
// longer do this large buffered chunk
let originalContent = decoder.decode(chunk)
let content = originalContent
if (
content === '</body></html>' ||
content === '</body>' ||
content === '</html>'
isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.BODY_AND_HTML) ||
isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.BODY) ||
isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.HTML)
) {
// the entire chunk is the closing tags.
// the entire chunk is the closing tags; return without enqueueing anything.
return
} else {
// We assume these tags will go at together at the end of the document and that
// they won't appear anywhere else in the document. This is not really a safe assumption
// but until we revamp our streaming infra this is a performant way to string the tags
content = content.replace('</body>', '').replace('</html>', '')
if (content.length !== originalContent.length) {
return controller.enqueue(encoder.encode(content))
}
}
// We assume these tags will go at together at the end of the document and that
// they won't appear anywhere else in the document. This is not really a safe assumption
// but until we revamp our streaming infra this is a performant way to string the tags
chunk = removeFromUint8Array(chunk, ENCODED_TAGS.CLOSED.BODY)
chunk = removeFromUint8Array(chunk, ENCODED_TAGS.CLOSED.HTML)
controller.enqueue(chunk)
},
})
@ -436,30 +439,36 @@ export function createRootLayoutValidatorStream(): TransformStream<
> {
let foundHtml = false
let foundBody = false
const decoder = new TextDecoder()
let content = ''
let chunks: Uint8Array[] = []
let size = 0
return new TransformStream({
async transform(chunk, controller) {
// Peek into the streamed chunk to see if the tags are present.
if (!foundHtml || !foundBody) {
content += decoder.decode(chunk, { stream: true })
if (!foundHtml && content.includes('<html')) {
foundHtml = true
}
if (!foundBody && content.includes('<body')) {
foundBody = true
}
}
chunks.push(chunk)
size += chunk.length
controller.enqueue(chunk)
},
flush(controller) {
// Flush the decoder.
const content = new Uint8Array(size)
let offset = 0
for (const chunk of chunks) {
content.set(chunk, offset)
offset += chunk.length
}
// Peek into the streamed chunk to see if the tags are present.
if (!foundHtml || !foundBody) {
content += decoder.decode()
if (!foundHtml && content.includes('<html')) foundHtml = true
if (!foundBody && content.includes('<body')) foundBody = true
if (
!foundHtml &&
indexOfUint8Array(content, ENCODED_TAGS.OPENING.HTML) > -1
) {
foundHtml = true
}
if (
!foundBody &&
indexOfUint8Array(content, ENCODED_TAGS.OPENING.BODY) > -1
) {
foundBody = true
}
}
const missingTags: typeof window.__next_root_layout_missing_tags = []

View file

@ -0,0 +1,59 @@
/**
* Find the starting index of Uint8Array `b` within Uint8Array `a`.
*/
export function indexOfUint8Array(a: Uint8Array, b: Uint8Array) {
if (b.length === 0) return 0
if (a.length === 0 || b.length > a.length) return -1
// start iterating through `a`
for (let i = 0; i <= a.length - b.length; i++) {
let completeMatch = true
// from index `i`, iterate through `b` and check for mismatch
for (let j = 0; j < b.length; j++) {
// if the values do not match, then this isn't a complete match, exit `b` iteration early and iterate to next index of `a`.
if (a[i + j] !== b[j]) {
completeMatch = false
break
}
}
if (completeMatch) {
return i
}
}
return -1
}
/**
* Check if two Uint8Arrays are strictly equivalent.
*/
export function isEquivalentUint8Arrays(a: Uint8Array, b: Uint8Array) {
if (a.length !== b.length) return false
for (let i = 0; i < a.length; i++) {
if (a[i] !== b[i]) return false
}
return true
}
/**
* Remove Uint8Array `b` from Uint8Array `a`.
*
* If `b` is not in `a`, `a` is returned unchanged.
*
* Otherwise, the function returns a new Uint8Array instance with size `a.length - b.length`
*/
export function removeFromUint8Array(a: Uint8Array, b: Uint8Array) {
const tagIndex = indexOfUint8Array(a, b)
if (tagIndex === 0) return a.subarray(b.length)
if (tagIndex > -1) {
const removed = new Uint8Array(a.length - b.length)
removed.set(a.slice(0, tagIndex))
removed.set(a.slice(tagIndex + b.length), tagIndex)
return removed
} else {
return a
}
}