Eliminate unnecessary decode operations in node-web-streams-helpers.ts (#63427)

This PR is strictly a performance improvement. It should not change implementation behavior in anyway. This PR replaces `decoder.decode()` operations by operating with the encoded `Uint8Array` instances directly. I added some utility functions to make things a bit easier to understand. Ideally, this change also maintains a fair amount of code readability. Will measure estimate performance improvement shortly. Closes NEXT-2848
2024-03-18 17:23:35 -06:00 · 2024-03-18 17:23:35 -06:00 · 229cb14834
commit 229cb14834
parent 1439503b3b
3 changed files with 143 additions and 54 deletions
--- a/packages/next/src/server/stream-utils/encodedTags.ts
+++ b/packages/next/src/server/stream-utils/encodedTags.ts
@ -0,0 +1,21 @@
+export const ENCODED_TAGS = {
+  // opening tags do not have the closing `>` since they can contain other attributes such as `<body className=''>`
+  OPENING: {
+    // <html
+    HTML: new Uint8Array([60, 104, 116, 109, 108]),
+    // <body
+    BODY: new Uint8Array([60, 98, 111, 100, 121]),
+  },
+  CLOSED: {
+    // </head>
+    HEAD: new Uint8Array([60, 47, 104, 101, 97, 100, 62]),
+    // </body>
+    BODY: new Uint8Array([60, 47, 98, 111, 100, 121, 62]),
+    // </html>
+    HTML: new Uint8Array([60, 47, 104, 116, 109, 108, 62]),
+    // </body></html>
+    BODY_AND_HTML: new Uint8Array([
+      60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62,
+    ]),
+  },
+} as const
--- a/packages/next/src/server/stream-utils/node-web-streams-helper.ts
+++ b/packages/next/src/server/stream-utils/node-web-streams-helper.ts
@ -2,6 +2,12 @@ import { getTracer } from '../lib/trace/tracer'
 import { AppRenderSpan } from '../lib/trace/constants'
 import { DetachedPromise } from '../../lib/detached-promise'
 import { scheduleImmediate, atLeastOneTask } from '../../lib/scheduler'
+import { ENCODED_TAGS } from './encodedTags'
+import {
+  indexOfUint8Array,
+  isEquivalentUint8Arrays,
+  removeFromUint8Array,
+} from './uint8array-helpers'

 function voidCatch() {
  // this catcher is designed to be used with pipeTo where we expect the underlying
@ -175,8 +181,6 @@ function createHeadInsertionTransformStream(
  let inserted = false
  let freezing = false

-  const decoder = new TextDecoder()
-
  // We need to track if this transform saw any bytes because if it didn't
  // we won't want to insert any server HTML at all
  let hasBytes = false
@ -191,17 +195,25 @@ function createHeadInsertionTransformStream(
      }

      const insertion = await insert()
+      const encodedInsertion = encoder.encode(insertion)
      if (inserted) {
-        controller.enqueue(encoder.encode(insertion))
+        controller.enqueue(encodedInsertion)
        controller.enqueue(chunk)
        freezing = true
      } else {
-        const content = decoder.decode(chunk)
-        const index = content.indexOf('</head>')
+        // TODO (@Ethan-Arrowood): Replace the generic `indexOfUint8Array` method with something finely tuned for the subset of things actually being checked for.
+        const index = indexOfUint8Array(chunk, ENCODED_TAGS.CLOSED.HEAD)
        if (index !== -1) {
-          const insertedHeadContent =
-            content.slice(0, index) + insertion + content.slice(index)
-          controller.enqueue(encoder.encode(insertedHeadContent))
+          const insertedHeadContent = new Uint8Array(
+            chunk.length + encodedInsertion.length
+          )
+          insertedHeadContent.set(chunk.slice(0, index))
+          insertedHeadContent.set(encodedInsertion, index)
+          insertedHeadContent.set(
+            chunk.slice(index),
+            index + encodedInsertion.length
+          )
+          controller.enqueue(insertedHeadContent)
          freezing = true
          inserted = true
        }
@ -344,7 +356,7 @@ function createMoveSuffixStream(
 ): TransformStream<Uint8Array, Uint8Array> {
  let foundSuffix = false

-  const decoder = new TextDecoder()
+  const encodedSuffix = encoder.encode(suffix)

  return new TransformStream({
    transform(chunk, controller) {
@ -352,29 +364,26 @@ function createMoveSuffixStream(
        return controller.enqueue(chunk)
      }

-      const buf = decoder.decode(chunk)
-      const index = buf.indexOf(suffix)
+      const index = indexOfUint8Array(chunk, encodedSuffix)
      if (index > -1) {
        foundSuffix = true

        // If the whole chunk is the suffix, then don't write anything, it will
        // be written in the flush.
-        if (buf.length === suffix.length) {
+        if (chunk.length === suffix.length) {
          return
        }

        // Write out the part before the suffix.
-        const before = buf.slice(0, index)
-        chunk = encoder.encode(before)
-        controller.enqueue(chunk)
+        const before = chunk.slice(0, index)
+        controller.enqueue(before)

        // In the case where the suffix is in the middle of the chunk, we need
        // to split the chunk into two parts.
-        if (buf.length > suffix.length + index) {
+        if (chunk.length > suffix.length + index) {
          // Write out the part after the suffix.
-          const after = buf.slice(index + suffix.length)
-          chunk = encoder.encode(after)
-          controller.enqueue(chunk)
+          const after = chunk.slice(index + suffix.length)
+          controller.enqueue(after)
        }
      } else {
        controller.enqueue(chunk)
@ -383,7 +392,7 @@ function createMoveSuffixStream(
    flush(controller) {
      // Even if we didn't find the suffix, the HTML is not valid if we don't
      // add it, so insert it at the end.
-      controller.enqueue(encoder.encode(suffix))
+      controller.enqueue(encodedSuffix)
    },
  })
 }
@ -392,7 +401,6 @@ function createStripDocumentClosingTagsTransform(): TransformStream<
  Uint8Array,
  Uint8Array
 > {
-  const decoder = new TextDecoder()
  return new TransformStream({
    transform(chunk, controller) {
      // We rely on the assumption that chunks will never break across a code unit.
@ -400,26 +408,21 @@ function createStripDocumentClosingTagsTransform(): TransformStream<
      // flush into one chunk before streaming it forward which means the chunk will represent
      // a single coherent utf-8 string. This is not safe to use if we change our streaming to no
      // longer do this large buffered chunk
-      let originalContent = decoder.decode(chunk)
-      let content = originalContent
-
      if (
-        content === '</body></html>' ||
-        content === '</body>' ||
-        content === '</html>'
+        isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.BODY_AND_HTML) ||
+        isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.BODY) ||
+        isEquivalentUint8Arrays(chunk, ENCODED_TAGS.CLOSED.HTML)
      ) {
-        // the entire chunk is the closing tags.
+        // the entire chunk is the closing tags; return without enqueueing anything.
        return
-      } else {
-        // We assume these tags will go at together at the end of the document and that
-        // they won't appear anywhere else in the document. This is not really a safe assumption
-        // but until we revamp our streaming infra this is a performant way to string the tags
-        content = content.replace('</body>', '').replace('</html>', '')
-        if (content.length !== originalContent.length) {
-          return controller.enqueue(encoder.encode(content))
-        }
      }

+      // We assume these tags will go at together at the end of the document and that
+      // they won't appear anywhere else in the document. This is not really a safe assumption
+      // but until we revamp our streaming infra this is a performant way to string the tags
+      chunk = removeFromUint8Array(chunk, ENCODED_TAGS.CLOSED.BODY)
+      chunk = removeFromUint8Array(chunk, ENCODED_TAGS.CLOSED.HTML)
+
      controller.enqueue(chunk)
    },
  })
@ -436,30 +439,36 @@ export function createRootLayoutValidatorStream(): TransformStream<
 > {
  let foundHtml = false
  let foundBody = false
-
-  const decoder = new TextDecoder()
-
-  let content = ''
+  let chunks: Uint8Array[] = []
+  let size = 0
  return new TransformStream({
    async transform(chunk, controller) {
-      // Peek into the streamed chunk to see if the tags are present.
-      if (!foundHtml || !foundBody) {
-        content += decoder.decode(chunk, { stream: true })
-        if (!foundHtml && content.includes('<html')) {
-          foundHtml = true
-        }
-        if (!foundBody && content.includes('<body')) {
-          foundBody = true
-        }
-      }
+      chunks.push(chunk)
+      size += chunk.length
      controller.enqueue(chunk)
    },
    flush(controller) {
-      // Flush the decoder.
+      const content = new Uint8Array(size)
+      let offset = 0
+      for (const chunk of chunks) {
+        content.set(chunk, offset)
+        offset += chunk.length
+      }
+
+      // Peek into the streamed chunk to see if the tags are present.
      if (!foundHtml || !foundBody) {
-        content += decoder.decode()
-        if (!foundHtml && content.includes('<html')) foundHtml = true
-        if (!foundBody && content.includes('<body')) foundBody = true
+        if (
+          !foundHtml &&
+          indexOfUint8Array(content, ENCODED_TAGS.OPENING.HTML) > -1
+        ) {
+          foundHtml = true
+        }
+        if (
+          !foundBody &&
+          indexOfUint8Array(content, ENCODED_TAGS.OPENING.BODY) > -1
+        ) {
+          foundBody = true
+        }
      }

      const missingTags: typeof window.__next_root_layout_missing_tags = []
--- a/packages/next/src/server/stream-utils/uint8array-helpers.ts
+++ b/packages/next/src/server/stream-utils/uint8array-helpers.ts
@ -0,0 +1,59 @@
+/**
+ * Find the starting index of Uint8Array `b` within Uint8Array `a`.
+ */
+export function indexOfUint8Array(a: Uint8Array, b: Uint8Array) {
+  if (b.length === 0) return 0
+  if (a.length === 0 || b.length > a.length) return -1
+
+  // start iterating through `a`
+  for (let i = 0; i <= a.length - b.length; i++) {
+    let completeMatch = true
+    // from index `i`, iterate through `b` and check for mismatch
+    for (let j = 0; j < b.length; j++) {
+      // if the values do not match, then this isn't a complete match, exit `b` iteration early and iterate to next index of `a`.
+      if (a[i + j] !== b[j]) {
+        completeMatch = false
+        break
+      }
+    }
+
+    if (completeMatch) {
+      return i
+    }
+  }
+
+  return -1
+}
+
+/**
+ * Check if two Uint8Arrays are strictly equivalent.
+ */
+export function isEquivalentUint8Arrays(a: Uint8Array, b: Uint8Array) {
+  if (a.length !== b.length) return false
+
+  for (let i = 0; i < a.length; i++) {
+    if (a[i] !== b[i]) return false
+  }
+
+  return true
+}
+
+/**
+ * Remove Uint8Array `b` from Uint8Array `a`.
+ *
+ * If `b` is not in `a`, `a` is returned unchanged.
+ *
+ * Otherwise, the function returns a new Uint8Array instance with size `a.length - b.length`
+ */
+export function removeFromUint8Array(a: Uint8Array, b: Uint8Array) {
+  const tagIndex = indexOfUint8Array(a, b)
+  if (tagIndex === 0) return a.subarray(b.length)
+  if (tagIndex > -1) {
+    const removed = new Uint8Array(a.length - b.length)
+    removed.set(a.slice(0, tagIndex))
+    removed.set(a.slice(tagIndex + b.length), tagIndex)
+    return removed
+  } else {
+    return a
+  }
+}