From 5b7ec71fb7cc81ea0f98f019cb0dac0b6bcb2fa2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 15 Feb 2019 13:15:28 -0400 Subject: [PATCH 1/4] Correct CNTPCT to use Clock Cycles instead of Cpu Cycles. --- src/core/arm/dynarmic/arm_dynarmic.cpp | 5 +++-- src/core/core_timing_util.cpp | 7 +++++++ src/core/core_timing_util.h | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 9b7ca40308..d36538257f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -12,6 +12,7 @@ #include "core/core.h" #include "core/core_cpu.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/svc.h" @@ -119,7 +120,7 @@ public: return std::max(parent.core_timing.GetDowncount(), 0); } u64 GetCNTPCT() override { - return parent.core_timing.GetTicks(); + return CpuCyclesToClockCycles(parent.core_timing.GetTicks()); } ARM_Dynarmic& parent; @@ -151,7 +152,7 @@ std::unique_ptr ARM_Dynarmic::MakeJit() const { config.tpidr_el0 = &cb->tpidr_el0; config.dczid_el0 = 4; config.ctr_el0 = 0x8444c004; - config.cntfrq_el0 = 19200000; // Value from fusee. + config.cntfrq_el0 = Timing::CNTFREQ; // Value from fusee. // Unpredictable instructions config.define_unpredictable_behaviour = true; diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index 88ff70233a..8fc92560af 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -60,4 +60,11 @@ s64 nsToCycles(u64 ns) { return (BASE_CLOCK_RATE * static_cast(ns)) / 1000000000; } +u64 CpuCyclesToClockCycles(u64 ticks) { + u64 result = ticks; + result *= CNTFREQ; + result /= BASE_CLOCK_RATE; + return static_cast(result); +} + } // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index 513cfac1b6..545d3a260b 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -11,6 +11,7 @@ namespace Core::Timing { // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz // The exact value used is of course unverified. constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked +constexpr u64 CNTFREQ = 19200000; // Value from fusee. inline s64 msToCycles(int ms) { // since ms is int there is no way to overflow @@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) { return cycles * 1000 / BASE_CLOCK_RATE; } +u64 CpuCyclesToClockCycles(u64 ticks); + } // namespace Core::Timing From 3ea48e8ebe25686f2342cd79b32409fcd1bccb28 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 15 Feb 2019 19:26:41 -0400 Subject: [PATCH 2/4] Implement 128 bits Unsigned Integer Multiplication and Division. --- src/common/CMakeLists.txt | 2 ++ src/common/uint128.cpp | 18 ++++++++++++++++++ src/common/uint128.h | 30 ++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 src/common/uint128.cpp create mode 100644 src/common/uint128.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index bdd885273c..b0174b445e 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -113,6 +113,8 @@ add_library(common STATIC threadsafe_queue.h timer.cpp timer.h + uint128.cpp + uint128.h vector_math.h web_result.h ) diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp new file mode 100644 index 0000000000..aea7f03e21 --- /dev/null +++ b/src/common/uint128.cpp @@ -0,0 +1,18 @@ + +namespace Common { + +std::pair udiv128(u128 dividend, u64 divisor) { + u64 remainder = dividend[0] % divisor; + u64 accum = dividend[0] / divisor; + if (dividend[1] == 0) + return {accum, remainder}; + // We ignore dividend[1] / divisor as that overflows + u64 first_segment = (dividend[1] % divisor) << 32; + accum += (first_segment / divisor) << 32; + u64 second_segment = (first_segment % divisor) << 32; + accum += (second_segment / divisor); + remainder += second_segment % divisor; + return {accum, remainder}; +} + +} // namespace Common diff --git a/src/common/uint128.h b/src/common/uint128.h new file mode 100644 index 0000000000..fda313bccc --- /dev/null +++ b/src/common/uint128.h @@ -0,0 +1,30 @@ +#include +#include +#include +#include +#include "common/common_types.h" + +namespace Common { + +#ifdef _MSC_VER +#include + +#pragma intrinsic(_umul128) +#endif + +inline u128 umul128(u64 a, u64 b) { +#ifdef _MSC_VER +u128 result; +result[0] = _umul128(a, b, &result[1]); +#else +unsigned __int128 tmp = a; +tmp *= b; +u128 result; +std::memcpy(&result, &tmp, sizeof(u128)); +#endif +return result; +} + +std::pair udiv128(u128 dividend, u64 divisor); + +} // namespace Common From ecccfe033777d6ae7d29bcf0cfc30412f7d3be24 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 15 Feb 2019 20:04:11 -0400 Subject: [PATCH 3/4] Use u128 on Clock Cycles calculation. --- src/common/uint128.cpp | 24 +++++++++++++++++++++++- src/common/uint128.h | 23 +++-------------------- src/core/arm/dynarmic/arm_dynarmic.cpp | 2 +- src/core/core_timing_util.cpp | 8 ++++---- src/core/core_timing_util.h | 2 +- 5 files changed, 32 insertions(+), 27 deletions(-) diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp index aea7f03e21..8548ba8085 100644 --- a/src/common/uint128.cpp +++ b/src/common/uint128.cpp @@ -1,7 +1,25 @@ +#ifdef _MSC_VER +#include + +#pragma intrinsic(_umul128) +#endif +#include "common/uint128.h" namespace Common { +u128 Multiply64Into128(u64 a, u64 b) { +#ifdef _MSC_VER + u128 result; + result[0] = _umul128(a, b, &result[1]); +#else + unsigned __int128 tmp = a; + tmp *= b; + u128 result; + std::memcpy(&result, &tmp, sizeof(u128)); +#endif + return result; +} -std::pair udiv128(u128 dividend, u64 divisor) { +std::pair Divide128On64(u128 dividend, u64 divisor) { u64 remainder = dividend[0] % divisor; u64 accum = dividend[0] / divisor; if (dividend[1] == 0) @@ -12,6 +30,10 @@ std::pair udiv128(u128 dividend, u64 divisor) { u64 second_segment = (first_segment % divisor) << 32; accum += (second_segment / divisor); remainder += second_segment % divisor; + if (remainder >= divisor) { + accum++; + remainder -= divisor; + } return {accum, remainder}; } diff --git a/src/common/uint128.h b/src/common/uint128.h index fda313bccc..45e384c337 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -1,30 +1,13 @@ #include #include -#include #include +#include #include "common/common_types.h" namespace Common { -#ifdef _MSC_VER -#include +u128 Multiply64Into128(u64 a, u64 b); -#pragma intrinsic(_umul128) -#endif - -inline u128 umul128(u64 a, u64 b) { -#ifdef _MSC_VER -u128 result; -result[0] = _umul128(a, b, &result[1]); -#else -unsigned __int128 tmp = a; -tmp *= b; -u128 result; -std::memcpy(&result, &tmp, sizeof(u128)); -#endif -return result; -} - -std::pair udiv128(u128 dividend, u64 divisor); +std::pair Divide128On64(u128 dividend, u64 divisor); } // namespace Common diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index d36538257f..25f76259bb 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -120,7 +120,7 @@ public: return std::max(parent.core_timing.GetDowncount(), 0); } u64 GetCNTPCT() override { - return CpuCyclesToClockCycles(parent.core_timing.GetTicks()); + return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks()); } ARM_Dynarmic& parent; diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index 8fc92560af..aab4aa6978 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -7,6 +7,7 @@ #include #include #include "common/logging/log.h" +#include "common/uint128.h" namespace Core::Timing { @@ -61,10 +62,9 @@ s64 nsToCycles(u64 ns) { } u64 CpuCyclesToClockCycles(u64 ticks) { - u64 result = ticks; - result *= CNTFREQ; - result /= BASE_CLOCK_RATE; - return static_cast(result); + u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ); + std::pair result = Common::Divide128On64(temporal, BASE_CLOCK_RATE); + return result.first; } } // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index 545d3a260b..679aa31231 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -11,7 +11,7 @@ namespace Core::Timing { // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz // The exact value used is of course unverified. constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked -constexpr u64 CNTFREQ = 19200000; // Value from fusee. +constexpr u64 CNTFREQ = 19200000; // Value from fusee. inline s64 msToCycles(int ms) { // since ms is int there is no way to overflow From a8d4927e29bb1acbf5f3267f368801847acd4222 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 16 Feb 2019 16:52:24 -0400 Subject: [PATCH 4/4] Corrections, documenting and fixes. --- src/common/uint128.cpp | 11 ++++++----- src/common/uint128.h | 9 +++++---- src/core/arm/dynarmic/arm_dynarmic.cpp | 2 +- src/core/core_timing_util.cpp | 5 ++--- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp index 8548ba8085..2238a52c53 100644 --- a/src/common/uint128.cpp +++ b/src/common/uint128.cpp @@ -3,31 +3,32 @@ #pragma intrinsic(_umul128) #endif +#include #include "common/uint128.h" namespace Common { + u128 Multiply64Into128(u64 a, u64 b) { -#ifdef _MSC_VER u128 result; +#ifdef _MSC_VER result[0] = _umul128(a, b, &result[1]); #else unsigned __int128 tmp = a; tmp *= b; - u128 result; std::memcpy(&result, &tmp, sizeof(u128)); #endif return result; } -std::pair Divide128On64(u128 dividend, u64 divisor) { +std::pair Divide128On32(u128 dividend, u32 divisor) { u64 remainder = dividend[0] % divisor; u64 accum = dividend[0] / divisor; if (dividend[1] == 0) return {accum, remainder}; // We ignore dividend[1] / divisor as that overflows - u64 first_segment = (dividend[1] % divisor) << 32; + const u64 first_segment = (dividend[1] % divisor) << 32; accum += (first_segment / divisor) << 32; - u64 second_segment = (first_segment % divisor) << 32; + const u64 second_segment = (first_segment % divisor) << 32; accum += (second_segment / divisor); remainder += second_segment % divisor; if (remainder >= divisor) { diff --git a/src/common/uint128.h b/src/common/uint128.h index 45e384c337..52e6b46eb7 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -1,13 +1,14 @@ -#include -#include -#include + #include #include "common/common_types.h" namespace Common { +// This function multiplies 2 u64 values and produces a u128 value; u128 Multiply64Into128(u64 a, u64 b); -std::pair Divide128On64(u128 dividend, u64 divisor); +// This function divides a u128 by a u32 value and produces two u64 values: +// the result of division and the remainder +std::pair Divide128On32(u128 dividend, u32 divisor); } // namespace Common diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 25f76259bb..4fdc12f111 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -152,7 +152,7 @@ std::unique_ptr ARM_Dynarmic::MakeJit() const { config.tpidr_el0 = &cb->tpidr_el0; config.dczid_el0 = 4; config.ctr_el0 = 0x8444c004; - config.cntfrq_el0 = Timing::CNTFREQ; // Value from fusee. + config.cntfrq_el0 = Timing::CNTFREQ; // Unpredictable instructions config.define_unpredictable_behaviour = true; diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index aab4aa6978..7942f30d66 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -62,9 +62,8 @@ s64 nsToCycles(u64 ns) { } u64 CpuCyclesToClockCycles(u64 ticks) { - u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ); - std::pair result = Common::Divide128On64(temporal, BASE_CLOCK_RATE); - return result.first; + const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ); + return Common::Divide128On32(temporal, static_cast(BASE_CLOCK_RATE)).first; } } // namespace Core::Timing