diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index dacc8ce9c261c..eec979efbc0d1 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -800,6 +800,9 @@ product(bool, IncrementalInlineForceCleanup, false, DIAGNOSTIC, \ "do cleanup after every iteration of incremental inlining") \ \ + product(bool, IncrementalInlineVector, true, DIAGNOSTIC, \ + "Inline fallback implementation of failed vector intrinsics") \ + \ product(intx, LiveNodeCountInliningCutoff, 40000, \ "max number of live nodes in a method") \ range(0, max_juint / 8) \ diff --git a/src/hotspot/share/opto/callGenerator.cpp b/src/hotspot/share/opto/callGenerator.cpp index 49897ca3c1763..0e56eb67a1ad7 100644 --- a/src/hotspot/share/opto/callGenerator.cpp +++ b/src/hotspot/share/opto/callGenerator.cpp @@ -437,6 +437,31 @@ CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* cal return cg; } +class LateInlineVectorCallGenerator : public LateInlineCallGenerator { + private: + CallGenerator* _fallback_cg; + + public: + LateInlineVectorCallGenerator(ciMethod* method, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) : + LateInlineCallGenerator(method, intrinsic_cg), _fallback_cg(fallback_cg) { + assert(_fallback_cg != nullptr && _fallback_cg->is_parse(), ""); + } + + virtual bool is_vector_late_inline() const { return true; } + + virtual JVMState* generate(JVMState* jvms) { + JVMState* new_jvms = LateInlineCallGenerator::generate(jvms); + CallGenerator* fallback = CallGenerator::for_late_inline(method(), _fallback_cg)->with_call_node(call_node()); + Compile::current()->add_vector_late_inline(fallback); + return new_jvms; + } +}; + +CallGenerator* CallGenerator::for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) { + return new LateInlineVectorCallGenerator(m, intrinsic_cg, fallback_cg); +} + + // Allow inlining decisions to be delayed class LateInlineVirtualCallGenerator : public VirtualCallGenerator { private: diff --git a/src/hotspot/share/opto/callGenerator.hpp b/src/hotspot/share/opto/callGenerator.hpp index 75ba6f709c07c..2dd102b9e0601 100644 --- a/src/hotspot/share/opto/callGenerator.hpp +++ b/src/hotspot/share/opto/callGenerator.hpp @@ -75,6 +75,7 @@ class CallGenerator : public ArenaObj { // same but for method handle calls virtual bool is_mh_late_inline() const { return false; } virtual bool is_string_late_inline() const { return false; } + virtual bool is_vector_late_inline() const { return false; } virtual bool is_boxing_late_inline() const { return false; } virtual bool is_vector_reboxing_late_inline() const { return false; } virtual bool is_virtual_late_inline() const { return false; } @@ -142,6 +143,7 @@ class CallGenerator : public ArenaObj { static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const); static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg); + static CallGenerator* for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg); static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_late_inline_virtual(ciMethod* m, int vtable_index, float expected_uses); diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 382c8f89a5fcb..4658332b3ab3b 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -416,6 +416,7 @@ void Compile::remove_useless_node(Node* dead) { remove_useless_late_inlines( &_late_inlines, dead); remove_useless_late_inlines( &_string_late_inlines, dead); remove_useless_late_inlines( &_boxing_late_inlines, dead); + remove_useless_late_inlines( &_vector_late_inlines, dead); remove_useless_late_inlines(&_vector_reboxing_late_inlines, dead); if (dead->is_CallStaticJava()) { @@ -480,6 +481,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis remove_useless_late_inlines( &_late_inlines, useful); remove_useless_late_inlines( &_string_late_inlines, useful); remove_useless_late_inlines( &_boxing_late_inlines, useful); + remove_useless_late_inlines( &_vector_late_inlines, useful); remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful); DEBUG_ONLY(verify_graph_edges(true /*check for no_dead_code*/, root_and_safepoints);) } @@ -693,6 +695,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci, _string_late_inlines(comp_arena(), 2, 0, nullptr), _boxing_late_inlines(comp_arena(), 2, 0, nullptr), _vector_reboxing_late_inlines(comp_arena(), 2, 0, nullptr), + _vector_late_inlines(comp_arena(), 2, 0, nullptr), _late_inlines_pos(0), _has_mh_late_inlines(false), _oom(false), @@ -2158,6 +2161,25 @@ void Compile::shuffle_late_inlines() { shuffle_array(*C, _late_inlines); } +void Compile::process_vector_late_inlines() { + for (int i = 0; i < _vector_late_inlines.length(); i++) { + CallGenerator* cg = _vector_late_inlines.at(i); + + // When a vector intrinsic fails, set_generator(cg) caches the + // LateInlineVectorCallGenerator on the call node to allow retries + // if IGVN optimizes the call node's inputs. If the call node is not + // on the IGVN worklist when cleanup runs, CallStaticJavaNode::Ideal + // does not fire and the cached generator persists. Once _late_inlines + // drains and we commit to the fallback here, clear the stale generator + // to prevent a subsequent IGVN pass from re-registering the intrinsic + // attempt into _late_inlines alongside the fallback, which would create + // duplicate call_node entries. + cg->call_node()->as_CallJava()->set_generator(nullptr); + add_late_inline(cg); + } + _vector_late_inlines.clear(); +} + // Perform incremental inlining until bound on number of live nodes is reached void Compile::inline_incrementally(PhaseIterGVN& igvn) { TracePhase tp(_t_incrInline); @@ -2215,6 +2237,10 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) { print_method(PHASE_INCREMENTAL_INLINE_STEP, 3); if (failing()) return; + + if (_late_inlines.length() == 0) { + process_vector_late_inlines(); + } } igvn_worklist()->ensure_empty(); // should be done with igvn diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index ff0085d79deac..a3ee4ed6f8ef7 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -480,6 +480,7 @@ class Compile : public Phase { GrowableArray _boxing_late_inlines; // same but for boxing operations GrowableArray _vector_reboxing_late_inlines; // same but for vector reboxing operations + GrowableArray _vector_late_inlines; // inline fallback implementation for failed intrinsics int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining) bool _has_mh_late_inlines; // Can there still be a method handle late inlining pending? @@ -508,6 +509,12 @@ class Compile : public Phase { InlinePrinter _inline_printer; public: + + void add_vector_late_inline(CallGenerator* cg) { + _vector_late_inlines.push(cg); + } + void process_vector_late_inlines(); + void* barrier_set_state() const { return _barrier_set_state; } InlinePrinter* inline_printer() { return &_inline_printer; } diff --git a/src/hotspot/share/opto/doCall.cpp b/src/hotspot/share/opto/doCall.cpp index d6e75f17f5012..13c64ad8fce91 100644 --- a/src/hotspot/share/opto/doCall.cpp +++ b/src/hotspot/share/opto/doCall.cpp @@ -166,6 +166,17 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool cg_intrinsic = cg; cg = nullptr; } else if (IncrementalInline && should_delay_vector_inlining(callee, jvms)) { + if (IncrementalInlineVector && allow_inline) { + // Try to late inline fallback implementation if intrinsification attempt fails. + CallGenerator* fallback_cg = call_generator(callee, vtable_index, call_does_dispatch, jvms, + true /*allow_inline*/, prof_factor, + speculative_receiver_type, false /*allow_intrinsics*/); + if (fallback_cg != nullptr && fallback_cg->is_parse()) { + return CallGenerator::for_vector_late_inline(callee, cg, fallback_cg); + } + // Fallback not inlineable by regular heuristics; fall through. + } + // Don't try to inline fallback implementation. return CallGenerator::for_late_inline(callee, cg); } else { return cg; diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java index c6329c70f6594..9b1a95dfb1440 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java @@ -40,7 +40,8 @@ */ public class TestVectorTest { public static void main(String[] args) { - TestFramework.runWithFlags("--add-modules=jdk.incubator.vector"); + TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", + "-XX:-IncrementalInlineVector"); } @DontInline diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java index 26e159fb768d0..0b1bfd2c80209 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorCompareWithZeroTest.java @@ -253,7 +253,8 @@ public static void testLongVectorUnsignedCondition() { public static void main(String[] args) { TestFramework testFramework = new TestFramework(); testFramework.setDefaultWarmup(10000) - .addFlags("--add-modules=jdk.incubator.vector") + .addFlags("--add-modules=jdk.incubator.vector", + "-XX:-IncrementalInlineVector") .addFlags("-XX:UseSVE=0") .start(); } diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java index 09185f63c6942..4aeb5ba36b08c 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java @@ -1294,7 +1294,8 @@ public static void testCompareMaskNotDoubleNegative() { public static void main(String[] args) { TestFramework testFramework = new TestFramework(); testFramework.setDefaultWarmup(5000) - .addFlags("--add-modules=jdk.incubator.vector") + .addFlags("--add-modules=jdk.incubator.vector", + "-XX:-IncrementalInlineVector") .start(); } }