// Copyright 2009 Dolphin Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoBackends/Software/TextureSampler.h" #include #include #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" #include "Core/HW/Memmap.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/TextureDecoder.h" #define ALLOW_MIPMAP 1 namespace TextureSampler { static inline void WrapCoord(int* coordp, WrapMode wrap_mode, int image_size) { int coord = *coordp; switch (wrap_mode) { case WrapMode::Clamp: coord = std::clamp(coord, 0, image_size - 1); break; case WrapMode::Repeat: // Per YAGCD's info on TX_SETMODE1_I0 (et al.), mirror "requires the texture size to be a power // of two. (wrapping is implemented by a logical AND (SIZE-1))". So though this doesn't wrap // nicely for non-power-of-2 sizes, that's how hardware does it. coord = coord & (image_size - 1); break; case WrapMode::Mirror: { // YAGCD doesn't mention this, but this seems to be the check used to implement mirroring. // With power-of-2 sizes, this correctly checks if it's an even-numbered repeat or an // odd-numbered one, and thus can decide whether to reflect. It fails in unusual ways // with non-power-of-2 sizes, but seems to match what happens on actual hardware. if ((coord & image_size) != 0) coord = ~coord; coord = coord & (image_size - 1); break; } default: // Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp. PanicAlertFmt("Invalid wrap mode: {}", wrap_mode); coord = std::clamp(coord, 0, image_size - 1); break; } *coordp = coord; } static inline void SetTexel(const u8* inTexel, u32* outTexel, u32 fract) { outTexel[0] = inTexel[0] * fract; outTexel[1] = inTexel[1] * fract; outTexel[2] = inTexel[2] * fract; outTexel[3] = inTexel[3] * fract; } static inline void AddTexel(const u8* inTexel, u32* outTexel, u32 fract) { outTexel[0] += inTexel[0] * fract; outTexel[1] += inTexel[1] * fract; outTexel[2] += inTexel[2] * fract; outTexel[3] += inTexel[3] * fract; } void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample) { int baseMip = 0; bool mipLinear = false; #if (ALLOW_MIPMAP) auto texUnit = bpmem.tex.GetUnit(texmap); const TexMode0& tm0 = texUnit.texMode0; const s32 lodFract = lod & 0xf; if (lod > 0 && tm0.mipmap_filter != MipMode::None) { // use mipmap baseMip = lod >> 4; mipLinear = (lodFract && tm0.mipmap_filter == MipMode::Linear); // if using nearest mip filter and lodFract >= 0.5 round up to next mip if (tm0.mipmap_filter == MipMode::Point && lodFract >= 8) baseMip++; } if (mipLinear) { u8 sampledTex[4]; u32 texel[4]; SampleMip(s, t, baseMip, linear, texmap, sampledTex); SetTexel(sampledTex, texel, (16 - lodFract)); SampleMip(s, t, baseMip + 1, linear, texmap, sampledTex); AddTexel(sampledTex, texel, lodFract); sample[0] = (u8)(texel[0] >> 4); sample[1] = (u8)(texel[1] >> 4); sample[2] = (u8)(texel[2] >> 4); sample[3] = (u8)(texel[3] >> 4); } else #endif { SampleMip(s, t, baseMip, linear, texmap, sample); } } void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample) { auto texUnit = bpmem.tex.GetUnit(texmap); const TexMode0& tm0 = texUnit.texMode0; const TexImage0& ti0 = texUnit.texImage0; const TexTLUT& texTlut = texUnit.texTlut; const TextureFormat texfmt = ti0.format; const TLUTFormat tlutfmt = texTlut.tlut_format; const u8* imageSrc; const u8* imageSrcOdd = nullptr; if (texUnit.texImage1.cache_manually_managed) { imageSrc = &texMem[texUnit.texImage1.tmem_even * TMEM_LINE_SIZE]; if (texfmt == TextureFormat::RGBA8) imageSrcOdd = &texMem[texUnit.texImage2.tmem_odd * TMEM_LINE_SIZE]; } else { const u32 imageBase = texUnit.texImage3.image_base << 5; imageSrc = Memory::GetPointer(imageBase); } int image_width_minus_1 = ti0.width; int image_height_minus_1 = ti0.height; const int tlutAddress = texTlut.tmem_offset << 9; const u8* tlut = &texMem[tlutAddress]; // reduce sample location and texture size to mip level // move texture pointer to mip location if (mip) { int mipWidth = image_width_minus_1 + 1; int mipHeight = image_height_minus_1 + 1; const int fmtWidth = TexDecoder_GetBlockWidthInTexels(texfmt); const int fmtHeight = TexDecoder_GetBlockHeightInTexels(texfmt); const int fmtDepth = TexDecoder_GetTexelSizeInNibbles(texfmt); image_width_minus_1 >>= mip; image_height_minus_1 >>= mip; s >>= mip; t >>= mip; while (mip) { mipWidth = std::max(mipWidth, fmtWidth); mipHeight = std::max(mipHeight, fmtHeight); const u32 size = (mipWidth * mipHeight * fmtDepth) >> 1; imageSrc += size; mipWidth >>= 1; mipHeight >>= 1; mip--; } } if (linear) { // offset linear sampling s -= 64; t -= 64; // integer part of sample location int imageS = s >> 7; int imageT = t >> 7; // linear sampling int imageSPlus1 = imageS + 1; const int fractS = s & 0x7f; int imageTPlus1 = imageT + 1; const int fractT = t & 0x7f; u8 sampledTex[4]; u32 texel[4]; WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1); WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1); WrapCoord(&imageSPlus1, tm0.wrap_s, image_width_minus_1 + 1); WrapCoord(&imageTPlus1, tm0.wrap_t, image_height_minus_1 + 1); if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed)) { TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut, tlutfmt); SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, image_width_minus_1, texfmt, tlut, tlutfmt); AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, image_width_minus_1, texfmt, tlut, tlutfmt); AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, image_width_minus_1, texfmt, tlut, tlutfmt); AddTexel(sampledTex, texel, (fractS) * (fractT)); } else { TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageT, image_width_minus_1); SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, imageT, image_width_minus_1); AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageTPlus1, image_width_minus_1); AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, imageTPlus1, image_width_minus_1); AddTexel(sampledTex, texel, (fractS) * (fractT)); } sample[0] = (u8)(texel[0] >> 14); sample[1] = (u8)(texel[1] >> 14); sample[2] = (u8)(texel[2] >> 14); sample[3] = (u8)(texel[3] >> 14); } else { // integer part of sample location int imageS = s >> 7; int imageT = t >> 7; // nearest neighbor sampling WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1); WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1); if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed)) TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut, tlutfmt); else TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, image_width_minus_1); } } } // namespace TextureSampler