From c14a6089ba6e8fc8ccefce57138263e456ee75ff Mon Sep 17 00:00:00 2001 From: Remi NGUYEN VAN Date: Tue, 24 May 2022 12:51:03 +0900 Subject: [PATCH 1/2] Update compile.source / target to 1.8 The project would currently not build at all, as it already uses 1.7 features like the diamond operator. Update to 1.8 so the project can build and reasonably recent features can be used. --- build.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.xml b/build.xml index 9eb947a..35311f8 100644 --- a/build.xml +++ b/build.xml @@ -5,8 +5,8 @@ - - + + From 585e2f1575157570bcf1bd4d15fcda5c09ac8433 Mon Sep 17 00:00:00 2001 From: Remi NGUYEN VAN Date: Fri, 20 May 2022 11:34:32 +0900 Subject: [PATCH 2/2] Speed up jarjar with prefix matches A common use-case for jarjar is to have rule with the format my.package.name.**, or even my.package.name.MyClass with no wildcards. Jarjar will convert that pattern to a regular expression and do regular expression matching on every symbol it scans to determine if the rule matches. This is very inefficient as most symbols will not match, and a simple prefix match can rule out the match in most cases. When parsing rules, extract the plain-text prefix of each rule, which is everything before the first wildcard. Then index all rules by their prefix in a prefix trie. This allows: - Avoiding slow regular expression matches in most cases, as jarjar can immediately observe that there is no matching prefix in the map. - Scaling in O(log(n)) with the number of rules when they have distinct prefixes (instead of O(n), as a matching prefix can be found from the trie instead of looping through all rules. This was tested to make jarjar faster on the Android codebase (running all jarjar rules to build a device back-to-back goes from 295s to 270s), while keeping outputs byte-identical, and providing significant speedups when many jarjar rules are used. --- .../tonicsystems/jarjar/PackageRemapper.java | 6 +- .../tonicsystems/jarjar/PatternElement.java | 4 +- .../com/tonicsystems/jarjar/Wildcard.java | 21 ++++- .../com/tonicsystems/jarjar/WildcardTrie.java | 91 +++++++++++++++++++ 4 files changed, 117 insertions(+), 5 deletions(-) create mode 100644 src/main/com/tonicsystems/jarjar/WildcardTrie.java diff --git a/src/main/com/tonicsystems/jarjar/PackageRemapper.java b/src/main/com/tonicsystems/jarjar/PackageRemapper.java index 22f4dce..7533082 100644 --- a/src/main/com/tonicsystems/jarjar/PackageRemapper.java +++ b/src/main/com/tonicsystems/jarjar/PackageRemapper.java @@ -28,7 +28,7 @@ class PackageRemapper extends Remapper { private static final Pattern ARRAY_FOR_NAME_PATTERN = Pattern.compile("\\[L[\\p{javaJavaIdentifierPart}\\.]+?;"); - private final List wildcards; + private final WildcardTrie wildcards; private final Map typeCache = new HashMap<>(); private final Map pathCache = new HashMap<>(); private final Map valueCache = new HashMap<>(); @@ -36,7 +36,7 @@ class PackageRemapper extends Remapper { public PackageRemapper(List ruleList, boolean verbose) { this.verbose = verbose; - wildcards = PatternElement.createWildcards(ruleList); + wildcards = new WildcardTrie(PatternElement.createWildcards(ruleList)); } // also used by KeepProcessor @@ -128,7 +128,7 @@ public Object mapValue(Object value) { } private String replaceHelper(String value) { - for (Wildcard wildcard : wildcards) { + for (Wildcard wildcard : wildcards.getPossibleMatches(value)) { String test = wildcard.replace(value); if (test != null) { return test; diff --git a/src/main/com/tonicsystems/jarjar/PatternElement.java b/src/main/com/tonicsystems/jarjar/PatternElement.java index eeae88f..399928d 100644 --- a/src/main/com/tonicsystems/jarjar/PatternElement.java +++ b/src/main/com/tonicsystems/jarjar/PatternElement.java @@ -32,13 +32,15 @@ public String getPattern() { static List createWildcards(List patterns) { List wildcards = new ArrayList<>(); + int ruleIndex = 0; for (PatternElement pattern : patterns) { String result = (pattern instanceof Rule) ? ((Rule) pattern).getResult() : ""; String expr = pattern.getPattern(); if (expr.indexOf('/') >= 0) { throw new IllegalArgumentException("Patterns cannot contain slashes"); } - wildcards.add(new Wildcard(expr.replace('.', '/'), result)); + wildcards.add(new Wildcard(expr.replace('.', '/'), result, ruleIndex)); + ruleIndex++; } return wildcards; } diff --git a/src/main/com/tonicsystems/jarjar/Wildcard.java b/src/main/com/tonicsystems/jarjar/Wildcard.java index d32e2a4..d1f93d6 100644 --- a/src/main/com/tonicsystems/jarjar/Wildcard.java +++ b/src/main/com/tonicsystems/jarjar/Wildcard.java @@ -25,14 +25,18 @@ class Wildcard { private static final Pattern DSTAR = Pattern.compile("\\*\\*"); private static final Pattern STAR = Pattern.compile("\\*"); private static final Pattern ESTAR = Pattern.compile("\\+\\??\\)\\Z"); + // Apart from stars and dollar signs, wildcards are plain-text full matches + private static Pattern PLAIN_TEXT_PREFIX = Pattern.compile("^[^*$]*"); private final Pattern pattern; + private final String plainTextPrefix; + private final int ruleIndex; private final int count; private final ArrayList parts = new ArrayList<>(16); // kept for debugging private final String[] strings; private final int[] refs; - public Wildcard(String pattern, String result) { + public Wildcard(String pattern, String result, int ruleIndex) { if (pattern.equals("**")) { throw new IllegalArgumentException("'**' is not a valid pattern"); } @@ -47,6 +51,13 @@ public Wildcard(String pattern, String result) { regex = replaceAllLiteral(DSTAR, regex, "(.+?)"); regex = replaceAllLiteral(STAR, regex, "([^/]+)"); regex = replaceAllLiteral(ESTAR, regex, "*)"); + Matcher prefixMatcher = PLAIN_TEXT_PREFIX.matcher(pattern); + // prefixMatcher will always match, but may match an empty string + if (!prefixMatcher.find()) { + throw new IllegalArgumentException(PLAIN_TEXT_PREFIX + " not found in " + pattern); + } + this.plainTextPrefix = prefixMatcher.group(); + this.ruleIndex = ruleIndex; this.pattern = Pattern.compile("\\A" + regex + "\\Z"); this.count = this.pattern.matcher("foo").groupCount(); @@ -107,6 +118,14 @@ public Wildcard(String pattern, String result) { // System.err.println(this); } + public String getPlainTextPrefix() { + return plainTextPrefix; + } + + public int getRuleIndex() { + return ruleIndex; + } + public boolean matches(String value) { return getMatcher(value) != null; } diff --git a/src/main/com/tonicsystems/jarjar/WildcardTrie.java b/src/main/com/tonicsystems/jarjar/WildcardTrie.java new file mode 100644 index 0000000..e80dbc9 --- /dev/null +++ b/src/main/com/tonicsystems/jarjar/WildcardTrie.java @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tonicsystems.jarjar; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.TreeMap; + +/** + * A prefix trie of {@link Wildcard}, where the prefix is obtained from + * {@link Wildcard#getPlainTextPrefix()}. + * + * This allows quick lookup of applicable wildcards in the common case where wildcards have a + * non-empty plain-text prefix. + */ +public class WildcardTrie { + private final TreeMap subTries = new TreeMap<>(); + private final List wildcards = new ArrayList<>(); + private final String prefix; + + public WildcardTrie(List wildcards) { + this(""); + final ArrayList lst = new ArrayList<>(wildcards); + // Sort values to ensure that wildcards that prefix others are added first + lst.sort(Comparator.comparing(Wildcard::getPlainTextPrefix)); + for (Wildcard w : lst) { + final String prefix = w.getPlainTextPrefix(); + final WildcardTrie prefixTrie = findSubTrieWhichPrefixes(prefix, this); + if (prefixTrie.prefix.equals(prefix)) { + prefixTrie.wildcards.add(w); + } else { + final WildcardTrie newTrie = new WildcardTrie(prefix); + newTrie.wildcards.add(w); + prefixTrie.subTries.put(prefix, newTrie); + } + } + } + + private WildcardTrie(String prefix) { + this.prefix = prefix; + } + + private static WildcardTrie findSubTrieWhichPrefixes(String value, WildcardTrie baseTrie) { + final String possiblePrefix = baseTrie.subTries.floorKey(value); + // Because each level of the trie does not contain keys that are prefixes of each other, + // there can be at most one prefix of the value at that level, and that prefix will be the + // highest key ordered before the value (any non-prefix key would have a character + // difference with the prefix and so be ordered before the prefix or after the value). + if (possiblePrefix != null && value.startsWith(possiblePrefix)) { + return findSubTrieWhichPrefixes(value, baseTrie.subTries.get(possiblePrefix)); + } + return baseTrie; + } + + public List getPossibleMatches(String value) { + WildcardTrie baseTrie = this; + List prefixMatches = wildcards.isEmpty() + // If there's no match, don't even allocate a list and use the singleton emptyList + ? Collections.emptyList() : new ArrayList<>(wildcards); + while (true) { + final String possiblePrefix = baseTrie.subTries.floorKey(value); + if (possiblePrefix != null && value.startsWith(possiblePrefix)) { + baseTrie = baseTrie.subTries.get(possiblePrefix); + if (prefixMatches.isEmpty()) { + prefixMatches = new ArrayList<>(baseTrie.wildcards); + } else { + prefixMatches.addAll(baseTrie.wildcards); + } + } else { + prefixMatches.sort(Comparator.comparing(Wildcard::getRuleIndex)); + return prefixMatches; + } + } + } +}