diff --git a/highwayhash/arch_specific.cc b/highwayhash/arch_specific.cc index 2a05860..314c84f 100644 --- a/highwayhash/arch_specific.cc +++ b/highwayhash/arch_specific.cc @@ -16,7 +16,7 @@ #include -#if HH_ARCH_X64 && !HH_MSC_VERSION +#if HH_ARCH_X86_X64 && !HH_MSC_VERSION #include #endif @@ -53,7 +53,7 @@ const char* TargetName(const TargetBits target_bit) { } } -#if HH_ARCH_X64 +#if HH_ARCH_X86_X64 namespace { @@ -101,12 +101,12 @@ uint32_t ApicId() { return abcd[1] >> 24; // ebx } -#endif // HH_ARCH_X64 +#endif // HH_ARCH_X86_X64 namespace { double DetectNominalClockRate() { -#if HH_ARCH_X64 +#if HH_ARCH_X86_X64 const std::string& brand_string = BrandString(); // Brand strings include the maximum configured frequency. These prefixes are // defined by Intel CPUID documentation. diff --git a/highwayhash/arch_specific.h b/highwayhash/arch_specific.h index 0b8c384..1cc35b5 100644 --- a/highwayhash/arch_specific.h +++ b/highwayhash/arch_specific.h @@ -52,6 +52,18 @@ namespace highwayhash { #define HH_ARCH_X64 0 #endif +#if defined(__i386__) || defined(_M_IX86) +#define HH_ARCH_X86 1 +#else +#define HH_ARCH_X86 0 +#endif + +#if HH_ARCH_X86 || HH_ARCH_X64 +#define HH_ARCH_X86_X64 1 +#else +#define HH_ARCH_X86_X64 0 +#endif + #if defined(__aarch64__) || defined(__arm64__) #define HH_ARCH_AARCH64 1 #else @@ -162,7 +174,7 @@ double NominalClockRate(); // frequency on PPC and NominalClockRate on all other platforms. double InvariantTicksPerSecond(); -#if HH_ARCH_X64 +#if HH_ARCH_X86_X64 // Calls CPUID instruction with eax=level and ecx=count and returns the result // in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd). @@ -172,7 +184,7 @@ void Cpuid(const uint32_t level, const uint32_t count, // Returns the APIC ID of the CPU on which we're currently running. uint32_t ApicId(); -#endif // HH_ARCH_X64 +#endif // HH_ARCH_X86_X64 } // namespace highwayhash diff --git a/highwayhash/hh_avx2.h b/highwayhash/hh_avx2.h index db44f53..4d76adb 100644 --- a/highwayhash/hh_avx2.h +++ b/highwayhash/hh_avx2.h @@ -78,7 +78,7 @@ def x(a,b,c): // size/32. mod32 is sufficient because each Update behaves as if a // counter were injected, because the state is large and mixed thoroughly. const V8x32U size256( - _mm256_broadcastd_epi32(_mm_cvtsi64_si128(size_mod32))); + _mm256_broadcastd_epi32(_mm_cvtsi32_si128(static_cast(size_mod32)))); // Equivalent to storing size_mod32 in packet. v0 += V4x64U(size256); // Boosts the avalanche effect of mod32. @@ -105,12 +105,12 @@ def x(a,b,c): } else { // size_mod32 < 16 const V4x32U int_mask = IntMask<0>()(size); const V4x32U packetL = MaskedLoadInt(bytes, int_mask); - const uint64_t last3 = + const uint32_t last3 = Load3()(Load3::AllowUnordered(), remainder, size_mod4); // Rather than insert into packetL[3], it is faster to initialize // the otherwise empty packetH. - const V4x32U packetH(_mm_cvtsi64_si128(last3)); + const V4x32U packetH(_mm_cvtsi32_si128(last3)); Update(packetH, packetL); } } @@ -255,7 +255,7 @@ def x(a,b,c): static HH_INLINE V4x32U Load0To16(const char* from, const size_t size_mod32, const V4x32U& size) { const char* remainder = from + (size_mod32 & ~3); - const uint64_t last3 = Load3()(Load3Policy(), remainder, size_mod32 & 3); + const uint32_t last3 = Load3()(Load3Policy(), remainder, size_mod32 & 3); const V4x32U int_mask = IntMask()(size); const V4x32U int_lanes = MaskedLoadInt(from, int_mask); return Insert4AboveMask(last3, int_mask, int_lanes); diff --git a/highwayhash/hh_portable.h b/highwayhash/hh_portable.h index 3b1a394..628e1aa 100644 --- a/highwayhash/hh_portable.h +++ b/highwayhash/hh_portable.h @@ -233,7 +233,7 @@ class HHStatePortable { } } - static HH_INLINE void Rotate32By(uint32_t* halves, const uint64_t count) { + static HH_INLINE void Rotate32By(uint32_t* halves, const size_t count) { for (int i = 0; i < 2 * kNumLanes; ++i) { const uint32_t x = halves[i]; halves[i] = (x << count) | (x >> (32 - count)); diff --git a/highwayhash/hh_sse41.h b/highwayhash/hh_sse41.h index 6bbed22..7f71de8 100644 --- a/highwayhash/hh_sse41.h +++ b/highwayhash/hh_sse41.h @@ -97,12 +97,12 @@ class HHStateSSE41 { } else { // size_mod32 < 16 const V2x64U packetL = LoadMultipleOfFour(bytes, size_mod32); - const uint64_t last4 = + const uint32_t last4 = Load3()(Load3::AllowUnordered(), remainder, size_mod4); // Rather than insert into packetL[3], it is faster to initialize // the otherwise empty packetH. - const V2x64U packetH(_mm_cvtsi64_si128(last4)); + const V2x64U packetH(_mm_cvtsi32_si128(last4)); Update(packetH, packetL); } } @@ -192,11 +192,11 @@ class HHStateSSE41 { // Rotates 32-bit lanes by "count" bits. static HH_INLINE void Rotate32By(V2x64U* HH_RESTRICT vH, V2x64U* HH_RESTRICT vL, - const uint64_t count) { + const size_t count) { // WARNING: the shift count is 64 bits, so we can't reuse vsize_mod32, // which is broadcast into 32-bit lanes. - const __m128i count_left = _mm_cvtsi64_si128(count); - const __m128i count_right = _mm_cvtsi64_si128(32 - count); + const __m128i count_left = _mm_cvtsi32_si128(static_cast(count)); + const __m128i count_right = _mm_cvtsi32_si128(static_cast(32 - count)); const V2x64U shifted_leftL(_mm_sll_epi32(*vL, count_left)); const V2x64U shifted_leftH(_mm_sll_epi32(*vH, count_left)); const V2x64U shifted_rightL(_mm_srl_epi32(*vL, count_right)); @@ -250,7 +250,7 @@ class HHStateSSE41 { const uint32_t* words = reinterpret_cast(bytes); // Mask of 1-bits where the final 4 bytes should be inserted (replacement // for variable shift/insert using broadcast+blend). - V2x64U mask4(_mm_cvtsi64_si128(0xFFFFFFFFULL)); // 'insert' into lane 0 + V2x64U mask4(_mm_cvtsi32_si128(0xFFFFFFFFU)); // 'insert' into lane 0 V2x64U ret(0); if (size & 8) { ret = V2x64U(_mm_loadl_epi64(reinterpret_cast(words))); diff --git a/highwayhash/highwayhash.h b/highwayhash/highwayhash.h index 3655ce3..a9b18ab 100644 --- a/highwayhash/highwayhash.h +++ b/highwayhash/highwayhash.h @@ -31,7 +31,7 @@ #include "highwayhash/compiler_specific.h" #include "highwayhash/hh_types.h" -#if HH_ARCH_X64 +#if HH_ARCH_X86_X64 #include "highwayhash/iaca.h" #endif diff --git a/highwayhash/highwayhash_target.cc b/highwayhash/highwayhash_target.cc index 74022f6..34d4020 100644 --- a/highwayhash/highwayhash_target.cc +++ b/highwayhash/highwayhash_target.cc @@ -25,7 +25,7 @@ namespace highwayhash { extern "C" { uint64_t HH_ADD_TARGET_SUFFIX(HighwayHash64_)(const HHKey key, const char* bytes, - const uint64_t size) { + const size_t size) { HHStateT state(key); HHResult64 result; HighwayHashT(&state, bytes, size, &result); diff --git a/highwayhash/highwayhash_test.cc b/highwayhash/highwayhash_test.cc index aed9a9e..60e4bae 100644 --- a/highwayhash/highwayhash_test.cc +++ b/highwayhash/highwayhash_test.cc @@ -79,7 +79,7 @@ TargetBits VerifyImplementations(const Result (&known_good)[kMaxSize + 1]) { // For each test input: empty string, 00, 00 01, ... char in[kMaxSize + 1] = {0}; // Fast enough that we don't need a thread pool. - for (uint64_t size = 0; size <= kMaxSize; ++size) { + for (size_t size = 0; size <= kMaxSize; ++size) { in[size] = static_cast(size); #if PRINT_RESULTS Result actual; diff --git a/highwayhash/highwayhash_test_target.cc b/highwayhash/highwayhash_test_target.cc index e999d9f..96af42e 100644 --- a/highwayhash/highwayhash_test_target.cc +++ b/highwayhash/highwayhash_test_target.cc @@ -127,7 +127,7 @@ void HighwayHashTest::operator()(const HHKey& key, template void HighwayHashCatTest::operator()(const HHKey& key, const char* HH_RESTRICT bytes, - const uint64_t size, + const size_t size, const HHResult64* expected, const HHNotify notify) const { TestHighwayHashCat(key, bytes, size, expected, notify); @@ -136,7 +136,7 @@ void HighwayHashCatTest::operator()(const HHKey& key, template void HighwayHashCatTest::operator()(const HHKey& key, const char* HH_RESTRICT bytes, - const uint64_t size, + const size_t size, const HHResult128* expected, const HHNotify notify) const { TestHighwayHashCat(key, bytes, size, expected, notify); @@ -145,7 +145,7 @@ void HighwayHashCatTest::operator()(const HHKey& key, template void HighwayHashCatTest::operator()(const HHKey& key, const char* HH_RESTRICT bytes, - const uint64_t size, + const size_t size, const HHResult256* expected, const HHNotify notify) const { TestHighwayHashCat(key, bytes, size, expected, notify); diff --git a/highwayhash/highwayhash_test_target.h b/highwayhash/highwayhash_test_target.h index 56ae960..93be728 100644 --- a/highwayhash/highwayhash_test_target.h +++ b/highwayhash/highwayhash_test_target.h @@ -54,13 +54,13 @@ struct HighwayHashTest { template struct HighwayHashCatTest { void operator()(const HHKey& key, const char* HH_RESTRICT bytes, - const uint64_t size, const HHResult64* expected, + const size_t size, const HHResult64* expected, const HHNotify notify) const; void operator()(const HHKey& key, const char* HH_RESTRICT bytes, - const uint64_t size, const HHResult128* expected, + const size_t size, const HHResult128* expected, const HHNotify notify) const; void operator()(const HHKey& key, const char* HH_RESTRICT bytes, - const uint64_t size, const HHResult256* expected, + const size_t size, const HHResult256* expected, const HHNotify notify) const; }; diff --git a/highwayhash/instruction_sets.cc b/highwayhash/instruction_sets.cc index a02e1f8..0aae595 100644 --- a/highwayhash/instruction_sets.cc +++ b/highwayhash/instruction_sets.cc @@ -17,7 +17,7 @@ // Currently there are only specialized targets for X64; other architectures // only use HH_TARGET_Portable, in which case Supported() just returns that. -#if HH_ARCH_X64 +#if HH_ARCH_X86_X64 #include @@ -138,4 +138,4 @@ TargetBits InstructionSets::Supported() { } // namespace highwayhash -#endif // HH_ARCH_X64 +#endif // HH_ARCH_X86_X64 diff --git a/highwayhash/instruction_sets.h b/highwayhash/instruction_sets.h index aa7bd6b..756f561 100644 --- a/highwayhash/instruction_sets.h +++ b/highwayhash/instruction_sets.h @@ -34,7 +34,7 @@ class InstructionSets { public: // Returns bit array of HH_TARGET_* supported by the current CPU. // The HH_TARGET_Portable bit is guaranteed to be set. -#if HH_ARCH_X64 +#if HH_ARCH_X86_X64 static TargetBits Supported(); #elif HH_ARCH_PPC static HH_INLINE TargetBits Supported() { @@ -54,7 +54,7 @@ class InstructionSets { // this should only be called infrequently (e.g. hoisting it out of loops). template