Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
aba79ef
Add JIT code generator for PPC64
cyrozap Mar 28, 2026
65ba514
Correct comment for STORE_LE_VR
cyrozap Apr 5, 2026
05ff7dd
Optimize STORE_LE_VR on little-endian POWER
cyrozap Apr 5, 2026
dcc710a
Make it clear that f0-f31 are aliased by vs0-vs31
cyrozap Apr 5, 2026
279514b
Mark r23 as unused
cyrozap Apr 5, 2026
712da1d
Optimize CBRANCH
cyrozap Apr 5, 2026
09ddaf3
Add a comment on the importance of using dcbt to prefetch the next block
cyrozap Apr 6, 2026
c103a1b
Optimize scratchpad address calculation in program suffix
cyrozap Apr 6, 2026
2241c36
Optimize emitMovImm64 for rotated 32-bit immediates
cyrozap Apr 6, 2026
cf9ca50
Use cpu.hasAes() instead of getauxval on PPC64
cyrozap Apr 6, 2026
65950a8
Set default PPC64 CPU based on whether the system is BE or LE
cyrozap Apr 6, 2026
bb57fcd
Enable compatibility with PPC64 ELF ABI V1
cyrozap Apr 7, 2026
807bdf5
Fix BE PPC64 cache and dataset endianness
cyrozap Apr 7, 2026
205f4a5
Fix BE PPC64 scratchpad and register endianness
cyrozap Apr 8, 2026
1865a43
Fix interpreter v2 tests on big-endian PPC64
cyrozap Apr 8, 2026
b7a3154
Remove unnecessary immediate load on PPC64 BE with v1 ABI
cyrozap Apr 8, 2026
778a58f
Move PPC64 VM prologue generation into prefix generation function
cyrozap Apr 10, 2026
c2e4355
Factor out the common parts of the scratchpad store
cyrozap Apr 10, 2026
2bbb740
Flush the cache on PPC64 for real
cyrozap Apr 10, 2026
d8f508d
Optimize CFROUND for POWER9 (ISA v3.0B)
cyrozap Apr 11, 2026
31ff28d
Cache reciprocals in PPC64 JIT compiler
cyrozap Apr 11, 2026
8abf44d
Simplify scratchpad loading code
cyrozap Apr 11, 2026
7f815b9
Move the creation of the zero vector further from where it's used
cyrozap Apr 11, 2026
1d6d26c
Use AltiVec instructions instead of VSX instructions where possible
cyrozap Apr 13, 2026
2016660
Implement software AES for PPC64
cyrozap Apr 13, 2026
9a77acf
Avoid dependency on Linux kernel headers
cyrozap Apr 6, 2026
62e9457
Use round-robin temporary register allocator in PPC64 JIT compiler
cyrozap Apr 17, 2026
087edd1
Fix PPC64 build for musl libc
cyrozap Apr 17, 2026
23b22fb
Rename the PPC64 byte-reverse mask to better reflect its purpose
cyrozap Apr 24, 2026
2e4c986
Use .octa for vector byte-reverse mask to avoid confusion
cyrozap Apr 24, 2026
bb7633a
Correct the Group E AND mask
cyrozap Apr 24, 2026
8cd6435
Optimize Group E register conversion on PPC64
cyrozap Apr 24, 2026
694dd00
PPC64 JIT: Correct maximum RandomX instruction code size
cyrozap Apr 26, 2026
2bb1ab2
PPC64 JIT: Optimize emitAddImm32 by using `addis` for supported values
cyrozap Apr 27, 2026
f633ec1
PPC64 JIT: Add some notes on optimizing emitAddImm32
cyrozap Apr 27, 2026
bf0b5cd
PPC64 JIT: Make sure groups of four loads use different temporary regs
cyrozap Apr 27, 2026
578ada3
PPC64 JIT: Group loads four at a time
cyrozap Apr 27, 2026
279a960
PPC64 JIT: Reorder ld arguments to match the assembly instruction
cyrozap Apr 28, 2026
8f1c2f3
PPC64 JIT: Rename scratchpad store prologue/epilogue
cyrozap Apr 28, 2026
7144c2d
PPC64 JIT: Move the Group F scratchpad store into the code generator
cyrozap Apr 28, 2026
69f019f
PPC64 JIT: Optimize Group F scratchpad store on v3.0 and later
cyrozap Apr 28, 2026
7b48443
PPC64 JIT: Avoid moving register ma when we don't need to
cyrozap Apr 28, 2026
047320a
PPC64 JIT: Optimize Group F register scratchpad stores on pre-v3.0
cyrozap Apr 29, 2026
8b87ee8
PPC64 JIT: Optimize IXOR_R for 16-bit and shifted unsigned 16-bit values
cyrozap Apr 29, 2026
50b0658
PPC64 JIT: Rearrange the beq/bne instruction formatters
cyrozap Apr 29, 2026
78be8c8
PPC64 JIT: Add branch hint for CFROUND in V2 mode
cyrozap Apr 29, 2026
ef71a76
PPC64 JIT: Convert the BO field values to hexadecimal
cyrozap May 6, 2026
b4d3168
PPC64 JIT: Add branch hint for CBRANCH
cyrozap May 6, 2026
e9a2c8e
PPC64 JIT: Remove STORE_LE_VR macro
cyrozap May 7, 2026
7eacafe
PPC64 JIT: Interleave immediate loads with vector loads and stores
cyrozap May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,22 @@ endif()

# PowerPC
if(ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le")
if(ARCH STREQUAL "native")
add_flag("-mcpu=native")
list(APPEND randomx_sources
src/jit_compiler_ppc64_static.S
src/jit_compiler_ppc64.cpp)

set_property(SOURCE src/jit_compiler_ppc64_static.S PROPERTY LANGUAGE C)

if(ARCH STREQUAL "default")
if(ARCH_ID STREQUAL "ppc64le")
# Little-endian defaults to POWER8
add_flag("-mcpu=power8")
else()
# Big-endian defaults to POWER7
add_flag("-mcpu=power7")
endif()
else()
add_flag("-mcpu=${ARCH}")
endif()
# PowerPC AES requires ALTIVEC (POWER7+), so it cannot be enabled in the default build
endif()
Expand Down
5 changes: 5 additions & 0 deletions src/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ namespace randomx {
#define RANDOMX_COMPILER_RV64
class JitCompilerRV64;
using JitCompiler = JitCompilerRV64;
#elif defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_PPC64
class JitCompilerPPC64;
using JitCompiler = JitCompilerPPC64;
#else
#define RANDOMX_HAVE_COMPILER 0
class JitCompilerFallback;
Expand Down
16 changes: 15 additions & 1 deletion src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <asm/hwcap.h>
#endif

#if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
#include <sys/auxv.h>
// From asm/cputable.h:
#ifndef PPC_FEATURE2_VEC_CRYPTO
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
#endif
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
#endif

#ifdef __riscv
#include <signal.h>
#include <setjmp.h>
Expand Down Expand Up @@ -120,8 +131,11 @@ namespace randomx {

sigaction(SIGILL, &old_action, nullptr);
}
#elif defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
unsigned long hwcaps2 = getauxval(AT_HWCAP2);
aes_ = (hwcaps2 & PPC_FEATURE2_VEC_CRYPTO) != 0;
v3p0_ = (hwcaps2 & PPC_FEATURE2_ARCH_3_00) != 0;
#endif
//TODO POWER8 AES
}

const Cpu cpu;
Expand Down
6 changes: 6 additions & 0 deletions src/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ namespace randomx {
inline bool hasRVV() const { return rvv_; }
inline int getRVV_Length() const { return rvv_length; }
#endif
#if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
inline bool hasV3P0() const { return v3p0_; }
#endif

private:
bool aes_ = false;
Expand All @@ -49,6 +52,9 @@ namespace randomx {
#ifdef __riscv
bool rvv_ = false;
int rvv_length = 0;
#endif
#if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
bool v3p0_ = false;
#endif
};

Expand Down
8 changes: 8 additions & 0 deletions src/intrin_portable.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,19 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
}

FORCE_INLINE rx_vec_f128 rx_cast_vec_i2f(rx_vec_i128 a) {
#if defined(NATIVE_LITTLE_ENDIAN)
return (rx_vec_f128)a;
#else
return (rx_vec_f128)vec_perm((__m128i)a, (__m128i)a, (__m128i){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
#endif
}

FORCE_INLINE rx_vec_i128 rx_cast_vec_f2i(rx_vec_f128 a) {
#if defined(NATIVE_LITTLE_ENDIAN)
return (rx_vec_i128)a;
#else
return (rx_vec_i128)vec_perm((__m128i)a, (__m128i)a, (__m128i){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
#endif
}

FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
Expand Down
2 changes: 2 additions & 0 deletions src/jit_compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ namespace randomx {
#include "jit_compiler_a64.hpp"
#elif defined(RANDOMX_COMPILER_RV64)
#include "jit_compiler_rv64.hpp"
#elif defined(RANDOMX_COMPILER_PPC64)
#include "jit_compiler_ppc64.hpp"
#else
#include "jit_compiler_fallback.hpp"
#endif
Expand Down
1,622 changes: 1,622 additions & 0 deletions src/jit_compiler_ppc64.cpp

Large diffs are not rendered by default.

120 changes: 120 additions & 0 deletions src/jit_compiler_ppc64.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
Copyright (c) 2023 tevador <tevador@gmail.com>
Copyright (c) 2026, Forest Crossman <cyrozap@gmail.com>

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#pragma once

#include <vector>

#include "common.hpp"
#include "jit_compiler.hpp"

#include "jit_compiler_ppc64_static.hpp"

#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define PPC_BIG_ENDIAN 1
#else
#define PPC_BIG_ENDIAN 0
#endif

#if (defined(_CALL_ELF) && _CALL_ELF == 2) || (!defined(_CALL_ELF) && !PPC_BIG_ENDIAN)
#define PPC_ABI_V2 1
#else
#define PPC_ABI_V2 0
#endif

namespace randomx {

class Program;
struct ProgramConfiguration;
class SuperscalarProgram;
class Instruction;

class JitCompilerPPC64 {
public:
JitCompilerPPC64();
~JitCompilerPPC64();

void generateProgram(Program&, ProgramConfiguration&);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);

void generateSuperscalarHash(SuperscalarProgramList& programs, std::vector<uint64_t> &);

void generateDatasetInitCode() {}

ProgramFunc* getProgramFunc() {
#if PPC_ABI_V2
return reinterpret_cast<ProgramFunc*>(entryProgram);
#else
return reinterpret_cast<ProgramFunc*>(descriptorProgram);
#endif
}
DatasetInitFunc* getDatasetInitFunc() {
#if PPC_ABI_V2
return reinterpret_cast<DatasetInitFunc*>(entryDataInit);
#else
return reinterpret_cast<DatasetInitFunc*>(descriptorDataInit);
#endif
}
uint8_t* getCode() { return state.code; }
size_t getCodeSize();

void enableWriting();
void enableExecution();
void enableAll();

void setFlags(randomx_flags f) { flags = f; }

uint32_t getTempGpr();
uint32_t getTempVr();

static uint8_t instMap[256];

private:
void emitProgramPrefix(CompilerState& state, Program& prog, ProgramConfiguration& pcfg, randomx_flags flags);
void emitProgramSuffix(CompilerState& state, ProgramConfiguration& pcfg, randomx_flags flags);

CompilerState state;
randomx_flags flags;

void* entryDataInit = nullptr;
void* entryProgram = nullptr;
#if !PPC_ABI_V2
uint64_t descriptorProgram[3];
uint64_t descriptorDataInit[3];
#endif

int32_t RandomXCodePos;
int32_t SshashSingleItemPos;
int32_t LoopBeginPos;

uint32_t tempGprIndex = 0;
uint32_t tempVrIndex = 0;
};

}
Loading
Loading