From db256157eacbff1f37834e523667db677d07a5d8 Mon Sep 17 00:00:00 2001 From: Janne Koschinski <janne@kuschku.de> Date: Mon, 10 Dec 2018 14:54:12 +0100 Subject: [PATCH] Fixes #145 --- .../quassel/syncables/IgnoreListManager.kt | 64 +- .../libquassel/util/ExpressionMatch.kt | 1112 +++++++++++++++++ .../libquassel/util/GlobTransformer.kt | 96 -- .../libquassel/util/ExpressionMatchTest.kt | 406 ++++++ 4 files changed, 1547 insertions(+), 131 deletions(-) create mode 100644 lib/src/main/java/de/kuschku/libquassel/util/ExpressionMatch.kt delete mode 100644 lib/src/main/java/de/kuschku/libquassel/util/GlobTransformer.kt create mode 100644 lib/src/test/java/de/kuschku/libquassel/util/ExpressionMatchTest.kt diff --git a/lib/src/main/java/de/kuschku/libquassel/quassel/syncables/IgnoreListManager.kt b/lib/src/main/java/de/kuschku/libquassel/quassel/syncables/IgnoreListManager.kt index 551dd4418..85c2202ae 100644 --- a/lib/src/main/java/de/kuschku/libquassel/quassel/syncables/IgnoreListManager.kt +++ b/lib/src/main/java/de/kuschku/libquassel/quassel/syncables/IgnoreListManager.kt @@ -24,7 +24,7 @@ import de.kuschku.libquassel.quassel.syncables.interfaces.IIgnoreListManager import de.kuschku.libquassel.session.ISession import de.kuschku.libquassel.session.Session import de.kuschku.libquassel.session.SignalProxy -import de.kuschku.libquassel.util.GlobTransformer +import de.kuschku.libquassel.util.ExpressionMatch import de.kuschku.libquassel.util.flag.and import io.reactivex.subjects.BehaviorSubject import java.io.Serializable @@ -169,8 +169,8 @@ class IgnoreListManager constructor( val scope: ScopeType, val scopeRule: String, val isActive: Boolean, - val regEx: Regex?, - val scopeRegEx: Set<Regex> + val regEx: ExpressionMatch, + val scopeRegEx: ExpressionMatch ) : Serializable { constructor(type: Int, ignoreRule: String, isRegEx: Boolean, strictness: Int, scope: Int, scopeRule: String, isActive: Boolean) : this( @@ -181,24 +181,18 @@ class IgnoreListManager constructor( constructor(type: IgnoreType, ignoreRule: String, isRegEx: Boolean, strictness: StrictnessType, scope: ScopeType, scopeRule: String, isActive: Boolean) : this( type, ignoreRule, isRegEx, strictness, scope, scopeRule, isActive, - try { - Regex(ignoreRule.let { - if (isRegEx) it else GlobTransformer.convertGlobToRegex(it) - }, RegexOption.IGNORE_CASE) - } catch (_: Throwable) { - null - }, - scopeRule.split(';') - .map(String::trim) - .map(GlobTransformer::convertGlobToRegex) - .mapNotNull { - try { - Regex(it, RegexOption.IGNORE_CASE) - } catch (e: Throwable) { - null - } - } - .toSet() + ExpressionMatch( + ignoreRule, + if (isRegEx) ExpressionMatch.MatchMode.MatchRegEx + else ExpressionMatch.MatchMode.MatchWildcard, + true + ), + ExpressionMatch( + scopeRule, + if (isRegEx) ExpressionMatch.MatchMode.MatchRegEx + else ExpressionMatch.MatchMode.MatchMultiWildcard, + true + ) ) fun copy( @@ -217,14 +211,17 @@ class IgnoreListManager constructor( scope = scope, scopeRule = scopeRule, isActive = isActive, - regEx = if (ignoreRule == this.ignoreRule) this.regEx else Regex(ignoreRule.let { - if (isRegEx) it else GlobTransformer.convertGlobToRegex(it) - }, RegexOption.IGNORE_CASE), - scopeRegEx = if (scopeRule == this.scopeRule) this.scopeRegEx else scopeRule.split(';') - .map(String::trim) - .map(GlobTransformer::convertGlobToRegex) - .map { Regex(it, RegexOption.IGNORE_CASE) } - .toSet() + regEx = ExpressionMatch( + ignoreRule, + if (isRegEx) ExpressionMatch.MatchMode.MatchRegEx + else ExpressionMatch.MatchMode.MatchWildcard, + true + ), + scopeRegEx = ExpressionMatch( + scopeRule, + ExpressionMatch.MatchMode.MatchMultiWildcard, + true + ) ) override fun equals(other: Any?): Boolean { @@ -269,12 +266,11 @@ class IgnoreListManager constructor( it.isActive && it.type != IgnoreType.CtcpIgnore }.filter { it.scope == ScopeType.GlobalScope || - it.scope == ScopeType.NetworkScope && it.scopeRegEx.any { it matches network } || - it.scope == ScopeType.ChannelScope && it.scopeRegEx.any { it matches bufferName } + it.scope == ScopeType.NetworkScope && it.scopeRegEx.match(network) || + it.scope == ScopeType.ChannelScope && it.scopeRegEx.match(bufferName) }.filter { val content = if (it.type == IgnoreType.MessageIgnore) msgContents else msgSender - !it.isRegEx && it.regEx?.matches(content) == true || - it.isRegEx && it.regEx?.containsMatchIn(content) == true + it.regEx.match(content) }.map { it.strictness }.sortedByDescending { @@ -296,6 +292,4 @@ class IgnoreListManager constructor( override fun toString(): String { return "IgnoreListManager(_ignoreList=$_ignoreList)" } - - } diff --git a/lib/src/main/java/de/kuschku/libquassel/util/ExpressionMatch.kt b/lib/src/main/java/de/kuschku/libquassel/util/ExpressionMatch.kt new file mode 100644 index 000000000..6a9e3b8b7 --- /dev/null +++ b/lib/src/main/java/de/kuschku/libquassel/util/ExpressionMatch.kt @@ -0,0 +1,1112 @@ +package de.kuschku.libquassel.util + +import de.kuschku.libquassel.util.compatibility.LoggingHandler.Companion.log +import de.kuschku.libquassel.util.compatibility.LoggingHandler.LogLevel +import java.io.Serializable + +class ExpressionMatch : Serializable { + enum class MatchMode { + /** Match phrase as specified, no special handling */ + MatchPhrase, + /** Match phrase as specified, split on \n only */ + MatchMultiPhrase, + /** Match wildcards, "!" at start inverts, "\" escapes */ + MatchWildcard, + /** Match wildcards, split ; or \n, "!" at start inverts, "\" escapes */ + MatchMultiWildcard, + /** Match as regular expression, "!..." invert regex, "\" escapes */ + MatchRegEx + } + + /** + * Construct an Expression match with the given parameters + * + * @param expression A phrase, wildcard expression, or regular expression + * @param mode Expression matching mode @see ExpressionMatch.MatchMode + * @param caseSensitive If true, match case-sensitively, otherwise ignore case when matching + */ + constructor(expression: String, mode: MatchMode, caseSensitive: Boolean) { + // Store the original parameters for later reference + _sourceExpression = expression + _sourceMode = mode + _sourceCaseSensitive = caseSensitive + + // Calculate the internal regex + // + // Do this now instead of on-demand to provide immediate feedback on errors when editing + // highlight and ignore rules. + cacheRegEx() + } + + /** + * Check if the given string matches the stored expression + * + * @param string String to check + * @param matchEmpty If true, always match when the expression is empty, otherwise never match + * @return True if match found, otherwise false + */ + fun match(string: String, matchEmpty: Boolean = false): Boolean { + // Handle empty expression strings + if (_sourceExpressionEmpty) { + // Match found if matching empty is allowed, otherwise no match found + return matchEmpty + } + + if (!isValid()) { + // Can't match on an invalid rule + return false + } + + // We have "_matchRegEx", "_matchInvertRegEx", or both due to isValid() check above + + // If specified, first check inverted rules + val _matchInvertRegEx = _matchInvertRegEx + if (_matchInvertRegExActive && _matchInvertRegEx != null) { + // Check inverted match rule + if (_matchInvertRegEx.containsMatchIn(string)) { + // Inverted rule matched, the rest of the rule cannot match + return false + } + } + + val _matchRegEx = _matchRegEx + if (_matchRegExActive && _matchRegEx != null) { + // Check regular match rule + return _matchRegEx.containsMatchIn(string) + } else { + // If no valid regular rules exist, due to the isValid() check there must be valid inverted + // rules that did not match. Count this as properly matching (implicit wildcard). + return true + } + } + + /** + * Gets if the source expression is empty + * + * @return True if source expression is empty, otherwise false + */ + fun isEmpty() = _sourceExpressionEmpty + + /** + * Gets if the source expression and parameters resulted in a valid expression matcher + * + * @return True if given expression is valid, otherwise false + */ + fun isValid(): Boolean { + return _sourceExpressionEmpty || + ((!_matchRegExActive || _matchRegEx != null) && + (!_matchInvertRegExActive || _matchInvertRegEx != null)) + } + + var sourceExpression + /** + * Gets the original expression match string + * + * @return String of the source expression match string + */ + get() = _sourceExpression + /** + * Sets the expression match string + * + * @param expression A phrase, wildcard expression, or regular expression + */ + set(expression) { + if (_sourceExpression != expression) { + _sourceExpression = expression + cacheRegEx() + } + } + + var sourceMode + /** + * Gets the original expression match mode + * + * @return MatchMode of the source expression + */ + get() = _sourceMode + /** + * Sets the expression match mode + * + * @param mode Expression matching mode (see ExpressionMatch.MatchMode) + */ + set(mode) { + if (_sourceMode != mode) { + _sourceMode = mode + cacheRegEx() + } + } + + var sourceCaseSensitive + /** + * Gets the original expression case-sensitivity + * + * @return True if case-sensitive, otherwise false + */ + get() = _sourceCaseSensitive + /** + * Sets the expression match as case sensitive or not + * + * @param caseSensitive If true, match case-sensitively, otherwise ignore case when matching + */ + set(caseSensitive) { + if (_sourceCaseSensitive != caseSensitive) { + _sourceCaseSensitive = caseSensitive + cacheRegEx() + } + } + + override fun equals(other: Any?): Boolean { + return other is ExpressionMatch && + _sourceExpression == other._sourceExpression && + _sourceMode == other._sourceMode && + _sourceCaseSensitive == other._sourceCaseSensitive + } + + private fun cacheRegEx() { + _matchRegExActive = false + _matchInvertRegExActive = false + + _sourceExpressionEmpty = _sourceExpression.isEmpty() + if (_sourceExpressionEmpty) { + // No need to calculate anything for empty strings + return + } + + // Convert the given expression to a regular expression based on the mode + when (_sourceMode) { + MatchMode.MatchPhrase -> { + // Match entire phrase, noninverted + // Don't trim whitespace for phrase matching as someone might want to match on " word ", a + // more-specific request than "word". + _matchRegEx = regExFactory("(?:^|\\W)" + regExEscape(_sourceExpression) + "(?:\\W|$)", + _sourceCaseSensitive) + _matchRegExActive = true + } + MatchMode.MatchMultiPhrase -> { + // Match multiple entire phrases, noninverted + // Convert from multiple-phrase rules + _matchRegEx = regExFactory(convertFromMultiPhrase(_sourceExpression), _sourceCaseSensitive) + _matchRegExActive = true + } + MatchMode.MatchWildcard -> { + // Match as wildcard expression + // Convert from wildcard rules for a single wildcard + if (_sourceExpression.startsWith("!")) { + // Inverted rule: take the remainder of the string + // "^" + invertComponents.at(0) + "$" + _matchInvertRegEx = regExFactory("^" + wildcardToRegEx(_sourceExpression.substring(1)) + "$", + _sourceCaseSensitive) + _matchInvertRegExActive = true + } else { + // Normal rule: take the whole string + // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past + // escaped "\" (i.e. "\\!") + val expression = + if (_sourceExpression.startsWith("\\!")) _sourceExpression.substring(1) + else _sourceExpression + _matchRegEx = regExFactory("^" + wildcardToRegEx(expression) + "$", _sourceCaseSensitive) + _matchRegExActive = true + } + } + MatchMode.MatchMultiWildcard -> { + // Match as multiple wildcard expressions + // Convert from wildcard rules for multiple wildcards + // (The generator function handles setting matchRegEx/matchInvertRegEx) + generateFromMultiWildcard(_sourceExpression, _sourceCaseSensitive) + } + MatchMode.MatchRegEx -> { + // Match as regular expression + if (_sourceExpression.startsWith("!")) { + // Inverted rule: take the remainder of the string + _matchInvertRegEx = regExFactory(_sourceExpression.substring(1), _sourceCaseSensitive) + _matchInvertRegExActive = true + } else { + // Normal rule: take the whole string + // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past + // escaped "\" (i.e. "\\!") + val expression = + if (_sourceExpression.startsWith("\\!")) _sourceExpression.substring(1) + else _sourceExpression + _matchRegEx = regExFactory(expression, _sourceCaseSensitive) + _matchRegExActive = true + } + } + } + + if (_sourceExpressionEmpty && !isValid()) { + // This can happen with invalid regex, so make it a bit more user-friendly. Set it to Info + // level as ideally someone's not just going to leave a broken match rule around. For + // MatchRegEx, they probably need to fix their regex rule. For the other modes, there's + // probably a bug in the parsing routines (which should also be fixed). + + log(LogLevel.INFO, + "ExpressionMatch", + "Could not parse expression match rule $_sourceExpression (match mode: $_sourceMode), this rule will be ignored") + } + } + + /** + * Internally converts a wildcard rule into regular expressions + * + * Splits wildcards on ";" and "\n", "!..." inverts section, "\" escapes + * + * @param originalRule MultiWildcard rule list, ";"-separated + * @param caseSensitive If true, match case-sensitively, otherwise ignore case when matching + */ + private fun generateFromMultiWildcard(originalRule: String, caseSensitive: Boolean) { + // Convert the wildcard rule into regular expression format + // First, reset the existing match expressions + _matchRegEx = null + _matchInvertRegEx = null + _matchRegExActive = false + _matchInvertRegExActive = false + + // This gets handled in three steps: + // + // 1. Break apart ";"-separated list into components + // 2. Convert components from wildcard format into regular expression format + // 3. Combine normal/invert components into normal/invert regular expressions + // + // Let's start by making the list... + + // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping + // escaped characters + + // Escaped list rules (where "[\n]" represents newline): + // --------------- + // Token | Outcome + // -------|-------- + // ; | Split + // \; | Replace with ";" + // \\; | Split (keep as "\\") + // ! | At start: mark as inverted + // \! | At start: replace with "!" + // \\! | At start: keep as "\\!" (replaced with "\!" in wildcard conversion) + // ! | Elsewhere: keep as "!" + // \! | Elsewhere: keep as "\!" + // \\! | Elsewhere: keep as "\\!" + // \\\ | Keep as "\\" + "\", set consecutive slashes to 1 + // [\n] | Split + // \[\n] | Split (keep as "\") + // \\[\n] | Split (keep as "\\") + // ... | Keep as "..." + // \... | Keep as "\..." + // \\... | Keep as "\\..." + // + // Strings are forced to end with "\n", always applying "\..." and "\\..." rules + // "..." also includes another "\" character + // + // All whitespace is trimmed from each component + + // "\\" and "\" are not downconverted to allow for other escape codes to be detected in + // ExpressionMatch::wildcardToRegex + + // Example: + // + // > Wildcard rule + // norm;!invert; norm-space ; !invert-space ;;!;\!norm-escaped;\\!slash-invert;\\\\double; + // escape\;sep;slash-end-split\\;quad\\\\!noninvert;newline-split[\n]newline-split-slash\\[\n] + // slash-at-end\\ [line does not continue] + // + // (Newlines are encoded as "[\n]". Ignore linebreaks for the sake of comment wrapping.) + // + // + // > Normal components without wildcard conversion + // norm + // norm-space + // !norm-escaped + // \\!slash-invert + // \\\\double + // escape;sep + // slash-end-split\\ [line does not continue] + // quad\\\\!noninvert + // newline-split + // newline-split-slash\\ [line does not continue] + // slash-at-end\\ [line does not continue] + // + // > Inverted components without wildcard conversion + // invert + // invert-space + // + // + // > Normal components with wildcard conversion + // norm + // norm\-space + // \!norm\-escaped + // \\\!slash\-invert + // \\\\double + // escape\;sep + // slash\-end\-split\\ [line does not continue] + // quad\\\\\!noninvert + // newline\-split + // newline\-split\-slash\\ [line does not continue] + // slash\-at\-end\\ [line does not continue] + // + // > Inverted components with wildcard conversion + // invert + // invert\-space + // + // + // > Normal wildcard-converted regex + // ^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double|escape\;sep| + // slash\-end\-split\\|quad\\\\\!noninvert|newline\-split|newline\-split\-slash\\| + // slash\-at\-end\\)$ + // + // > Inverted wildcard-converted regex + // ^(?:invert|invert\-space)$ + + // Prepare to loop! + + var rule = originalRule + + // Force a termination at the end of the string to trigger a split + // Don't check for ";" splits as they may be escaped + if (!rule.endsWith("\n")) { + rule += "\n" + } + + // Result, sorted into normal and inverted rules + val normalComponents = mutableSetOf<String>() + val invertComponents = mutableSetOf<String>() + + // Current string + var curString = "" + // Consecutive "\" characters + var consecutiveSlashes = 0 + // Whether or not this marks an inverted rule + var isInverted = false + // Whether or not we're at the beginning of the rule (for detecting "!" and "\!") + var isRuleStart = true + + for (curChar in rule) { + // Check if it's on the list of special list characters + when (curChar) { + ';' -> { + // Separator found + when (consecutiveSlashes) { + 0, 2 -> { + // ";" -> Split + // ...or... + // "\\;" -> Split (keep as "\\") + // Not escaped separator, split into a new item + + // Apply the additional "\\" if needed + if (consecutiveSlashes == 2) { + // "\\;" -> Split (keep as "\\") + curString += """\\""" + } + + // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2" + curString = curString.trim() + + // Skip empty items + if (curString.isNotEmpty()) { + // Add to inverted/normal list + if (isInverted) { + invertComponents.add(wildcardToRegEx(curString)) + } else { + normalComponents.add(wildcardToRegEx(curString)) + } + } + // Reset the current list item + curString = "" + isInverted = false + isRuleStart = true + } + 1 -> { + // "\;" -> Replace with ";" + curString += ";" + isRuleStart = false + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + isRuleStart = false + } + } + consecutiveSlashes = 0 + } + '!' -> { + // Rule inverter found + if (isRuleStart) { + // Apply the inverting logic + when (consecutiveSlashes) { + 0 -> { + // "!" -> At start: mark as inverted + isInverted = true + // Don't include the "!" character + } + 1 -> { + // "\!" -> At start: replace with "!" + curString += "!" + } + 2 -> { + // "\\!" -> At start: keep as "\\!" (replaced with "\!" in wildcard conversion) + curString += """\\!""" + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + } + } + } else { + // Preserve the characters as they are now + when (consecutiveSlashes) { + 0 -> { + // "!" -> Elsewhere: keep as "!" + curString += "!" + } + 1, 2 -> { + // "\!" -> Elsewhere: keep as "\!" + // "\\!" -> Elsewhere: keep as "\\!" + curString += """\""".repeat(consecutiveSlashes) + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + } + } + } + isRuleStart = false + consecutiveSlashes = 0 + } + '\\' -> { + // Split escape + // Increase consecutive slash count + consecutiveSlashes++ + // Check if we've reached "\\\"... + if (consecutiveSlashes == 3) { + // "\\\" -> Keep as "\\" + "\" + curString += """\\""" + // No longer at the rule start + isRuleStart = false + // Set consecutive slashes to 1, recognizing the trailing "\" + consecutiveSlashes = 1 + } else if (consecutiveSlashes > 3) { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + } + // Don't set "isRuleStart" here as "\" is used in escape sequences + } + '\n' -> { + // Newline found + // Preserve the characters as they are now + + // "[\n]" -> Split + // "\[\n]" -> Split (keep as "\") + // "\\[\n]" -> Split (keep as "\\") + + when (consecutiveSlashes) { + 0 -> { + // Keep string as is + } + 1, 2 -> { + // Apply the additional "\" or "\\" + curString += """\""".repeat(consecutiveSlashes) + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), applying newline split anyways!") + } + } + + // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2" + curString = curString.trim() + + // Skip empty items + if (curString.isNotEmpty()) { + // Add to inverted/normal list + if (isInverted) { + invertComponents.add(wildcardToRegEx(curString)) + } else { + normalComponents.add(wildcardToRegEx(curString)) + } + } + // Reset the current list item + curString = "" + isInverted = false + isRuleStart = true + consecutiveSlashes = 0 + } + else -> { + // Preserve the characters as they are now + when (consecutiveSlashes) { + 0 -> { + // "..." -> Keep as "..." + curString += curChar + } + 1, 2 -> { + // "\..." -> Keep as "\..." + // "\\..." -> Keep as "\\..." + curString += """\""".repeat(consecutiveSlashes) + curChar + } + else -> { + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar char escape!") + } + } + // Don't mark as past rule start for whitespace (whitespace gets trimmed) + if (!curChar.isWhitespace()) { + isRuleStart = false + } + consecutiveSlashes = 0 + } + } + } + + // Create full regular expressions by... + // > Anchoring to start and end of string to mimic QRegExp's .exactMatch() handling, "^...$" + // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)" + // > Flattening normal and inverted rules using the regex OR character "...|..." + // + // Before: [foo, bar, baz] + // After: ^(?:foo|bar|baz)$ + // + // See https://doc.qt.io/qt-5/qregularexpression.html#porting-from-qregexp-exactmatch + // And https://regex101.com/ + + // Any empty/invalid regex are handled within ExpressionMatch::match() + if (!normalComponents.isEmpty()) { + // Create normal match regex + if (normalComponents.count() == 1) { + // Single item, skip the noncapturing group + _matchRegEx = regExFactory("^${normalComponents.toList().first()}$", caseSensitive) + } else { + val buffer = StringBuilder() + buffer.append("^(?:") + normalComponents.joinTo(buffer, "|") + buffer.append(")$") + _matchRegEx = regExFactory(buffer.toString(), caseSensitive) + } + _matchRegExActive = true + } + if (!invertComponents.isEmpty()) { + // Create invert match regex + if (invertComponents.count() == 1) { + // Single item, skip the noncapturing group + _matchInvertRegEx = regExFactory("^${invertComponents.toList().first()}$", caseSensitive) + } else { + val buffer = StringBuilder() + buffer.append("^(?:") + invertComponents.joinTo(buffer, "|") + buffer.append(")$") + _matchInvertRegEx = regExFactory(buffer.toString(), caseSensitive) + } + _matchInvertRegExActive = true + } + } + + // Original/source components + /** Expression match string given on creation */ + private var _sourceExpression: String = "" + /** Expression match mode given on creation */ + private var _sourceMode: MatchMode = MatchMode.MatchPhrase + /** Expression case sensitive on creation */ + private var _sourceCaseSensitive: Boolean = false + + // Derived components + /** Cached expression match string is empty */ + private var _sourceExpressionEmpty: Boolean = false + + /** Underlying regular expression matching instance for normal (noninverted) rules */ + private var _matchRegEx: Regex? = null + /** If true, use normal expression in matching */ + private var _matchRegExActive: Boolean = false + + /** Underlying regular expression matching instance for inverted rules */ + private var _matchInvertRegEx: Regex? = null + /** If true, use invert expression in matching */ + private var _matchInvertRegExActive: Boolean = false + + companion object { + + /** + * Creates a regular expression object of appropriate type and case-sensitivity + * + * @param regExString Regular expression string + * @param caseSensitive If true, match case-sensitively, otherwise ignore case when matching + * @return Configured QRegularExpression + */ + private fun regExFactory(regExString: String, caseSensitive: Boolean) = + if (caseSensitive) Regex(regExString) + else Regex(regExString, RegexOption.IGNORE_CASE) + + /** + * Escapes any regular expression characters in a string so they have no special meaning + * + * @param phrase String containing potential regular expression special characters + * @return QString with all regular expression characters escaped + */ + private fun regExEscape(phrase: String): String { + val size = phrase.length + val result = StringBuilder(size) + var i = 0 + while (i < size) { + val current = phrase[i] + when (current) { + 0.toChar() -> { + result.append('\\') + result.append('0') + } + '\\', '.', '[', ']', '{', '}', '(', ')', '<', '>', '*', '+', '-', '=', '?', '^', '$', '|' -> { + result.append('\\') + result.append(current) + } + else -> { + result.append(current) + } + } + i++ + } + return result.toString() + } + + /** + * Converts a multiple-phrase rule into a regular expression + * + * Unconditionally splits phrases on "\n", whitespace is preserved + * + * @param originalRule MultiPhrase rule list, "\n"-separated + * @return A regular expression matching the given phrases + */ + private fun convertFromMultiPhrase(originalRule: String): String { + // Convert the multi-phrase rule into regular expression format + // Split apart the original rule into components + val components = mutableListOf<String>() + // Split on "\n" + for (component in originalRule.splitToSequence('\n')) { + if (component.isNotEmpty()) { + components.add(regExEscape(component)) + } + } + + // Create full regular expression by... + // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)" + // > Flattening normal and inverted rules using the regex OR character "...|..." + // + // Before: [foo, bar, baz] + // After: (?:^|\W)(?:foo|bar|baz)(?:\W|$) + + if (components.count() == 1) { + // Single item, skip the noncapturing group + return "(?:^|\\W)${components[0]}(?:\\W|$)" + } else { + val buffer = java.lang.StringBuilder() + buffer.append("(?:^|\\W)(?:") + components.joinTo(buffer, "|") + buffer.append(")(?:\\W|$)") + return buffer.toString() + } + } + + /** + * Converts a wildcard expression into a regular expression + * + * NOTE: Does not handle Quassel's extended scope matching and splitting. + * + * @see ExpressionMatch::convertFromWildcard() + * @return QString with all regular expression characters escaped + */ + fun wildcardToRegEx(expression: String): String { + // Convert the wildcard expression into regular expression format + + // We're taking a little bit different of a route... + // + // Original QRegExp::Wildcard rules: + // -------------------------- + // Wildcard | Regex | Outcome + // ---------|-------|-------- + // * | .* | zero or more of any character + // ? | . | any single character + // + // NOTE 1: This is QRegExp::Wildcard, not QRegExp::WildcardUnix + // + // NOTE 2: We are ignoring the "[...]" character-class matching functionality of + // QRegExp::Wildcard as that feature's a bit more complex and can be handled with full-featured + // regexes. + // + // See https://doc.qt.io/qt-5/qregexp.html#wildcard-matching + // + // Quassel originally did not use QRegExp::WildcardUnix, which prevented escaping "*" and "?" in + // messages. Unfortunately, spam messages might decide to use both, so offering a way to escape + // makes sense. + // + // On the flip-side, that means to match "\" requires escaping as "\\", breaking backwards + // compatibility. + // + // Quassel's Wildcard rules + // ------------------------------------------ + // Wildcard | Regex escaped | Regex | Outcome + // ---------|---------------|-------|-------- + // * | \* | .* | zero or more of any character + // ? | \? | . | any single character + // \* | \\\* | \* | literal "*" + // \? | \\\? | \? | literal "?" + // \[...] | \\[...] | [...] | invalid escape, ignore it + // \\ | \\\\ | \\ | literal "\" + // + // In essence, "*" and "?" need changed only if not escaped, "\\" collapses into "\", "\" gets + // ignored; other characters escape normally. + // + // Example: + // + // > Wildcard rule + // never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval + // + // ("\\\\" represents "\\", "\\" represents "\", and "\\\" is valid+invalid, "\") + // + // > Regex escaped wildcard rule + // never\?gonna\*give\\\*you\\\?up\\\\test\|y\\yeah\\\\1\\\\\\\\2\\\\\\1inval + // + // > Expected correct regex + // never.gonna.*give\*you\?up\\test\|yyeah\\1\\\\2\\1inval + // + // > Undoing regex escaping of "\" as "\\" (i.e. simple replace, with special escapes intact) + // never.gonna.*give\*you\?up\test\|yyeah\1\\2\1inval + + // Escape string according to regex + val regExEscaped = regExEscape(expression) + + // Fix up the result + // + // NOTE: In theory, regular expression lookbehind could solve this. Unfortunately, QRegExp does + // not support lookbehind, and it's theoretically inefficient, anyways. Just use an approach + // similar to that taken by QRegExp's official wildcard mode. + // + // Lookbehind example (that we can't use): + // (?<!abc)test Negative lookbehind - don't match if "test" is proceeded by "abc" // + // See https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/tools/qregexp.cpp + // + // NOTE: We don't copy QRegExp's mode as QRegularExpression has more special characters. We + // can't use the same escaping code, hence calling the appropriate QReg[...]::escape() above. + + // Prepare to loop! + + // Result + val result = StringBuilder() + // Consecutive "\" characters + var consecutiveSlashes = 0 + + for (curChar in regExEscaped) { + // Check if it's on the list of special wildcard characters + when (curChar) { + '?' -> { + // Wildcard "?" + when (consecutiveSlashes) { + 1 -> { + // "?" -> "\?" -> "." + // Convert from regex escaped "?" to regular expression + result.append(".") + } + 3 -> { + // "\?" -> "\\\?" -> "\?" + // Convert from regex escaped "\?" to literal string + result.append("""\?""") + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $expression resulted in escaped regular expression string $regExEscaped with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + } + } + consecutiveSlashes = 0 + } + '*' -> { + // Wildcard "*" + when (consecutiveSlashes) { + 1 -> { + // "*" -> "\*" -> ".*" + // Convert from regex escaped "*" to regular expression + result.append(".*") + } + 3 -> { + // "\*" -> "\\\*" -> "\*" + // Convert from regex escaped "\*" to literal string + result.append("""\*""") + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $expression resulted in escaped regular expression string $regExEscaped with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + } + } + consecutiveSlashes = 0 + } + '\\' -> { + // Wildcard escape + // Increase consecutive slash count + consecutiveSlashes++ + // Check if we've hit an escape sequence + if (consecutiveSlashes == 4) { + // "\\" -> "\\\\" -> "\\" + // Convert from regex escaped "\\" to literal string + result.append("""\\""") + // Reset slash count + consecutiveSlashes = 0 + } + } + else -> { + // Any other character + when (consecutiveSlashes) { + 0, 2 -> { + // "[...]" -> "[...]" -> "[...]" + // ...or... + // "\[...]" -> "\\[...]" -> "[...]" + // Either just print the character itself, or convert from regex-escaped invalid + // wildcard escape sequence to the character itself + // + // Both mean doing nothing, the actual character [...] gets appended below + } + 1 -> { + // "[...]" -> "\[...]" -> "\" + // Keep regex-escaped special character "[...]" as literal string + // (Where "[...]" represents any non-wildcard regex special character) + result.append("""\""") + // The actual character [...] gets appended below + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $expression resulted in escaped regular expression string $regExEscaped with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar char escape!") + } + } + consecutiveSlashes = 0 + // Add the character itself + result.append(curChar) + } + } + } + + // Anchoring to simulate QRegExp::exactMatch() is handled in + // ExpressionMatch::convertFromWildcard() + return result.toString() + } + + /** + * Trim extraneous whitespace from individual rules within a given MultiWildcard expression + * + * This respects the ";" escaping rules with "\". It is safe to call this multiple times; a + * trimmed string should remain unchanged. + * + * @see ExpressionMatch.MatchMode.MatchMultiWildcard + * + * @param originalRule MultiWildcard rule list, ";"-separated + * @return Trimmed MultiWildcard rule list + */ + fun trimMultiWildcardWhitespace(originalRule: String): String { + // This gets handled in two steps: + // + // 1. Break apart ";"-separated list into components + // 2. Combine whitespace-trimmed components into wildcard expression + // + // Let's start by making the list... + + // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping + // escaped characters + + // Escaped list rules (where "[\n]" represents newline): + // --------------- + // Token | Outcome + // -------|-------- + // ; | Split + // \; | Keep as "\;" + // \\; | Split (keep as "\\") + // \\\ | Keep as "\\" + "\", set consecutive slashes to 1 + // [\n] | Split + // \[\n] | Split (keep as "\") + // \\[\n] | Split (keep as "\\") + // ... | Keep as "..." + // \... | Keep as "\..." + // \\... | Keep as "\\..." + // + // Strings are forced to end with "\n", always applying "\..." and "\\..." rules + // "..." also includes another "\" character + // + // All whitespace is trimmed from each component + + // "\\" and "\" are not downconverted to allow for other escape codes to be detected in + // ExpressionMatch::wildcardToRegex + + // Example: + // + // > Wildcard rule + // norm; norm-space ; newline-space [\n] ;escape \; sep ; slash-end-split\\; quad\\\\norm; + // newline-split-slash\\[\n] slash-at-end\\ [line does not continue] + // + // > Components + // norm + // norm-space + // newline-space + // escape \; sep + // slash-end-split\\ [line does not continue] + // quad\\\\norm + // newline-split-slash\\ [line does not continue] + // slash-at-end\\ [line does not continue] + // + // > Trimmed wildcard rule + // norm; norm-space; newline-space[\n]escape \; sep; slash-end-split\\; quad\\\\norm; + // newline-split-slash\\[\n]slash-at-end\\ [line does not continue] + // + // (Newlines are encoded as "[\n]". Ignore linebreaks for the sake of comment wrapping.) + + // Prepare to loop! + + var rule: String = originalRule + + // Force a termination at the end of the string to trigger a split + // Don't check for ";" splits as they may be escaped + if (!rule.endsWith("\n")) { + rule += "\n" + } + + // Result + val result = StringBuilder() + // Current string + var curString = "" + // Max length + // Consecutive "\" characters + var consecutiveSlashes = 0 + + for (curChar in rule) { + // Check if it's on the list of special list characters + when (curChar) { + ';' -> { + // Separator found + when (consecutiveSlashes) { + 0, 2 -> { + // ";" -> Split + // ...or... + // "\\;" -> Split (keep as "\\") + // Not escaped separator, split into a new item + + // Apply the additional "\\" if needed + if (consecutiveSlashes == 2) { + // "\\;" -> Split (keep as "\\") + curString += """\\""" + } + + curString = curString.trim() + + // Skip empty items + if (curString.isNotEmpty()) { + // Add to list with the same separator used + result.append(curString) + result.append("; ") + } + + // Reset the current list item + curString = "" + } + 1 -> { + // "\;" -> Keep as "\;" + curString += """\;""" + } + else -> { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + } + } + consecutiveSlashes = 0 + } + '\\' -> { + // Split escape + // Increase consecutive slash count + consecutiveSlashes++ + // Check if we’ve reached "\\\"... + if (consecutiveSlashes == 3) { + // "\\\" -> Keep as "\\" + "\" + curString += """\\""" + // Set consecutive slashes to 1, recognizing the trailing "\" + consecutiveSlashes = 1 + } else if (consecutiveSlashes > 3) { + // This shouldn't ever happen (even with invalid wildcard rules), log a warning + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar character!") + } + } + '\n' -> { + // Newline found + // Preserve the characters as they are now + + // "[\n]" -> Split + // "\[\n]" -> Split (keep as "\") + // "\\[\n]" -> Split (keep as "\\") + + when (consecutiveSlashes) { + 0 -> { + // Keep string as is + } + 1, 2 -> { + // Apply the additional "\" or "\\" + curString += """\""".repeat(consecutiveSlashes) + } + else -> { + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), applying newline split anyways!") + } + } + + // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2" + curString = curString.trim() + + // Skip empty items + if (curString.isNotEmpty()) { + // Add to list with the same separator used + result.append(curString + "\n") + } + + // Reset the current list item + curString = "" + consecutiveSlashes = 0 + } + else -> { + when (consecutiveSlashes) { + 0 -> { + // "..." -> Keep as "..." + curString += curChar + } + 1, 2 -> { + // "\..." -> Keep as "\..." + // "\\..." -> Keep as "\\..." + curString += """\""".repeat(consecutiveSlashes) + curString += curChar + } + else -> { + log(LogLevel.WARN, + "ExpressionMatch", + "Wildcard rule $rule resulted in rule component $curString with unexpected count of consecutive '\\' ($consecutiveSlashes), ignoring $curChar char escape!") + } + } + consecutiveSlashes = 0 + } + } + } + + // Remove any trailing separators + if (result.endsWith("; ")) { + result.setLength(maxOf(result.length - 2, 0)) + } + + // Remove any trailing whitespace + return result.trim().toString() + } + } +} diff --git a/lib/src/main/java/de/kuschku/libquassel/util/GlobTransformer.kt b/lib/src/main/java/de/kuschku/libquassel/util/GlobTransformer.kt deleted file mode 100644 index e43ed2200..000000000 --- a/lib/src/main/java/de/kuschku/libquassel/util/GlobTransformer.kt +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Quasseldroid - Quassel client for Android - * - * Copyright (c) 2018 Janne Koschinski - * Copyright (c) 2018 The Quassel Project - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 3 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -package de.kuschku.libquassel.util - -object GlobTransformer { - /** - * Converts a standard POSIX Shell globbing pattern into a regular expression - * pattern. The result can be used with the standard {@link java.util.regex} API to - * recognize strings which match the glob pattern. - * <p/> - * See also, the POSIX Shell language: - * http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_01 - * - * @param pattern A glob pattern. - * @return A regex pattern to recognize the given glob pattern. - */ - fun convertGlobToRegex(pattern: String): String { - val sb = StringBuilder(pattern.length) - var inGroup = 0 - var inClass = 0 - var firstIndexInClass = -1 - val arr = pattern.toCharArray() - var i = 0 - while (i < arr.size) { - val ch = arr[i] - when (ch) { - '\\' -> - if (++i >= arr.size) { - sb.append('\\') - } else { - val next = arr[i] - when (next) { - ',' -> { - } - 'Q', 'E' -> { - // extra escape needed - sb.append('\\') - sb.append('\\') - } - else -> sb.append('\\') - }// escape not needed - sb.append(next) - } - '*' -> sb.append(if (inClass == 0) ".*" else '*') - '?' -> sb.append(if (inClass == 0) '.' else '?') - '[' -> { - inClass++ - firstIndexInClass = i + 1 - sb.append('[') - } - ']' -> { - inClass-- - sb.append(']') - } - '.', '(', ')', - '+', '|', '^', - '$', '@', '%' -> { - if (inClass == 0 || firstIndexInClass == i && ch == '^') - sb.append('\\') - sb.append(ch) - } - '!' -> - sb.append(if (firstIndexInClass == i) '^' else '!') - '{' -> { - inGroup++ - sb.append('(') - } - '}' -> { - inGroup-- - sb.append(')') - } - ',' -> sb.append(if (inGroup > 0) '|' else ',') - else -> sb.append(ch) - } - i++ - } - return sb.toString() - } -} diff --git a/lib/src/test/java/de/kuschku/libquassel/util/ExpressionMatchTest.kt b/lib/src/test/java/de/kuschku/libquassel/util/ExpressionMatchTest.kt new file mode 100644 index 000000000..d875f7801 --- /dev/null +++ b/lib/src/test/java/de/kuschku/libquassel/util/ExpressionMatchTest.kt @@ -0,0 +1,406 @@ +package de.kuschku.libquassel.util + +import org.junit.Assert.* +import org.junit.Test + +class ExpressionMatchTest { + @Test + fun testEmptyPattern() { + // Empty pattern + val emptyMatch = ExpressionMatch("", ExpressionMatch.MatchMode.MatchPhrase, false) + + // Assert empty is valid + assertTrue(emptyMatch.isValid()) + // Assert empty + assertTrue(emptyMatch.isEmpty()) + // Assert default match fails (same as setting match empty to false) + assertFalse(emptyMatch.match("something")) + // Assert match empty succeeds + assertTrue(emptyMatch.match("something", true)) + } + + @Test + fun testMatchPhrase() { + // Simple phrase, case-insensitive + val simpleMatch = ExpressionMatch("test", ExpressionMatch.MatchMode.MatchPhrase, false) + // Simple phrase, case-sensitive + val simpleMatchCS = ExpressionMatch("test", ExpressionMatch.MatchMode.MatchPhrase, true) + // Phrase with space, case-insensitive + val simpleMatchSpace = ExpressionMatch(" space ", ExpressionMatch.MatchMode.MatchPhrase, true) + // Complex phrase + val complexMatchFull = """^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double|escape\;sep|slash\-end\-split\\|quad\\\\\!noninvert|newline\-split|newline\-split\-slash\\|slash\-at\-end\\)$""" + val complexMatch = ExpressionMatch(complexMatchFull, + ExpressionMatch.MatchMode.MatchPhrase, + false) + + // Assert valid and not empty + assertTrue(simpleMatch.isValid()) + assertFalse(simpleMatch.isEmpty()) + assertTrue(simpleMatchCS.isValid()) + assertFalse(simpleMatchCS.isEmpty()) + assertTrue(simpleMatchSpace.isValid()) + assertFalse(simpleMatchSpace.isEmpty()) + assertTrue(complexMatch.isValid()) + assertFalse(complexMatch.isEmpty()) + + // Assert match succeeds + assertTrue(simpleMatch.match("test")) + assertTrue(simpleMatch.match("other test;")) + assertTrue(simpleMatchSpace.match(" space ")) + // Assert partial match fails + assertFalse(simpleMatch.match("testing")) + assertFalse(simpleMatchSpace.match("space")) + // Assert unrelated fails + assertFalse(simpleMatch.match("not above")) + + // Assert case sensitivity followed + assertFalse(simpleMatch.sourceCaseSensitive) + assertTrue(simpleMatch.match("TeSt")) + assertTrue(simpleMatchCS.sourceCaseSensitive) + assertFalse(simpleMatchCS.match("TeSt")) + + // Assert complex phrases are escaped properly + assertTrue(complexMatch.match(complexMatchFull)) + assertFalse(complexMatch.match("norm")) + } + + @Test + fun matchMultiPhrase() { + // Simple phrases, case-insensitive + val simpleMatch = ExpressionMatch("test\nOther ", + ExpressionMatch.MatchMode.MatchMultiPhrase, + false) + // Simple phrases, case-sensitive + val simpleMatchCS = ExpressionMatch("test\nOther ", + ExpressionMatch.MatchMode.MatchMultiPhrase, + true) + // Complex phrases + val complexMatchFullA = """^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double)|escape\;sep|slash\-end\-split\\|quad\\\\\!noninvert)|newline\-split|newline\-split\-slash\\|slash\-at\-end\\)$""" + val complexMatchFullB = """^(?:invert|invert\-space)$)$""" + val complexMatch = ExpressionMatch(complexMatchFullA + "\n" + complexMatchFullB, + ExpressionMatch.MatchMode.MatchMultiPhrase, + false) + + // Assert valid and not empty + assertTrue(simpleMatch.isValid()) + assertFalse(simpleMatch.isEmpty()) + assertTrue(simpleMatchCS.isValid()) + assertFalse(simpleMatchCS.isEmpty()) + assertTrue(complexMatch.isValid()) + assertFalse(complexMatch.isEmpty()) + + // Assert match succeeds + assertTrue(simpleMatch.match("test")) + assertTrue(simpleMatch.match("test[suffix]")) + assertTrue(simpleMatch.match("other test;")) + assertTrue(simpleMatch.match("Other ")) + assertTrue(simpleMatch.match(".Other !")) + // Assert partial match fails + assertFalse(simpleMatch.match("testing")) + assertFalse(simpleMatch.match("Other!")) + // Assert unrelated fails + assertFalse(simpleMatch.match("not above")) + + // Assert case sensitivity followed + assertFalse(simpleMatch.sourceCaseSensitive) + assertTrue(simpleMatch.match("TeSt")) + assertTrue(simpleMatchCS.sourceCaseSensitive) + assertFalse(simpleMatchCS.match("TeSt")) + + // Assert complex phrases are escaped properly + assertTrue(complexMatch.match(complexMatchFullA)) + assertTrue(complexMatch.match(complexMatchFullB)) + assertFalse(complexMatch.match("norm")) + assertFalse(complexMatch.match("invert")) + } + + @Test + fun matchWildcard() { + // Simple wildcard, case-insensitive + val simpleMatch = + ExpressionMatch("?test*", ExpressionMatch.MatchMode.MatchWildcard, false) + // Simple wildcard, case-sensitive + val simpleMatchCS = + ExpressionMatch("?test*", ExpressionMatch.MatchMode.MatchWildcard, true) + // Escaped wildcard, case-insensitive + val simpleMatchEscape = + ExpressionMatch("""\?test\*""", ExpressionMatch.MatchMode.MatchWildcard, false) + // Inverted wildcard, case-insensitive + val simpleMatchInvert = + ExpressionMatch("!test*", ExpressionMatch.MatchMode.MatchWildcard, false) + // Not inverted wildcard, case-insensitive + val simpleMatchNoInvert = + ExpressionMatch("""\!test*""", ExpressionMatch.MatchMode.MatchWildcard, false) + // Not inverted wildcard literal slash, case-insensitive + val simpleMatchNoInvertSlash = + ExpressionMatch("""\\!test*""", ExpressionMatch.MatchMode.MatchWildcard, false) + // Complex wildcard + val complexMatch = + ExpressionMatch("""never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval""", + ExpressionMatch.MatchMode.MatchWildcard, false) + + // Assert valid and not empty + assertTrue(simpleMatch.isValid()) + assertFalse(simpleMatch.isEmpty()) + assertTrue(simpleMatchCS.isValid()) + assertFalse(simpleMatchCS.isEmpty()) + assertTrue(simpleMatchEscape.isValid()) + assertFalse(simpleMatchEscape.isEmpty()) + assertTrue(simpleMatchInvert.isValid()) + assertFalse(simpleMatchInvert.isEmpty()) + assertTrue(simpleMatchNoInvert.isValid()) + assertFalse(simpleMatchNoInvert.isEmpty()) + assertTrue(simpleMatchNoInvertSlash.isValid()) + assertFalse(simpleMatchNoInvertSlash.isEmpty()) + assertTrue(complexMatch.isValid()) + assertFalse(complexMatch.isEmpty()) + + // Assert match succeeds + assertTrue(simpleMatch.match("@test")) + assertTrue(simpleMatch.match("@testing")) + assertTrue(simpleMatch.match("!test")) + assertTrue(simpleMatchEscape.match("?test*")) + assertTrue(simpleMatchInvert.match("atest")) + assertTrue(simpleMatchNoInvert.match("!test")) + assertTrue(simpleMatchNoInvertSlash.match("""\!test)""")) + // Assert partial match fails + assertFalse(simpleMatch.match("test")) + // Assert unrelated fails + assertFalse(simpleMatch.match("not above")) + // Assert escaped wildcard fails + assertFalse(simpleMatchEscape.match("@testing")) + assertFalse(simpleMatchNoInvert.match("test")) + assertFalse(simpleMatchNoInvert.match("anything")) + assertFalse(simpleMatchNoInvertSlash.match("!test")) + assertFalse(simpleMatchNoInvertSlash.match("test")) + assertFalse(simpleMatchNoInvertSlash.match("anything")) + // Assert non-inverted fails + assertFalse(simpleMatchInvert.match("testing")) + + // Assert case sensitivity followed + assertFalse(simpleMatch.sourceCaseSensitive) + assertTrue(simpleMatch.match("@TeSt")) + assertTrue(simpleMatchCS.sourceCaseSensitive) + assertFalse(simpleMatchCS.match("@TeSt")) + + // Assert complex match + assertTrue(complexMatch.match("""neverAgonnaBBBgive*you?up\test|yyeah\1\\2\1inval""")) + // Assert complex not literal match + assertFalse(complexMatch.match("""never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval""")) + // Assert complex unrelated not match + assertFalse(complexMatch.match("other")) + } + + @Test + fun matchMultiWildcard() { + /* + // Simple wildcards, case-insensitive + val simpleMatch = + ExpressionMatch("?test*;another?", + ExpressionMatch.MatchMode.MatchMultiWildcard, false) + // Simple wildcards, case-sensitive + val simpleMatchCS = + ExpressionMatch("?test*;another?", + ExpressionMatch.MatchMode.MatchMultiWildcard, true) + */ + // Escaped wildcards, case-insensitive + val simpleMatchEscape = + ExpressionMatch("""\?test\*\;*thing\*""", + ExpressionMatch.MatchMode.MatchMultiWildcard, false) + /* + // Inverted wildcards, case-insensitive + val simpleMatchInvert = + ExpressionMatch("""test*;!testing""", + ExpressionMatch.MatchMode.MatchMultiWildcard, false) + // Implicit wildcards, case-insensitive + val simpleMatchImplicit = + ExpressionMatch("""!testing*""", + ExpressionMatch.MatchMode.MatchMultiWildcard, false) + // Complex wildcard + val complexMatchFull = """norm;!invert; norm-space ; !invert-space ;;!;\!norm-escaped;\\!slash-invert;\\\\double; escape\;sep;slash-end-split\\;quad\\\\!noninvert;newline-split\nnewline-split-slash\\\nslash-at-end\\""" + // Match normal components + val complexMatchNormal = listOf( + """norm""", + """norm-space""", + """!norm-escaped""", + """\!slash-invert""", + """\\double""", + """escape;sep""", + """slash-end-split\""", + """quad\\!noninvert""", + """newline-split""", + """newline-split-slash\""", + """slash-at-end\""" + ) + // Match negating components + val complexMatchInvert = listOf( + """(invert)""", + """(invert-space)""" + ) + val complexMatch = + ExpressionMatch(complexMatchFull, ExpressionMatch.MatchMode.MatchMultiWildcard, + false) + + // Assert valid and not empty + assertTrue(simpleMatch.isValid()) + assertFalse(simpleMatch.isEmpty()) + assertTrue(simpleMatchCS.isValid()) + assertFalse(simpleMatchCS.isEmpty()) + assertTrue(simpleMatchEscape.isValid()) + assertFalse(simpleMatchEscape.isEmpty()) + assertTrue(simpleMatchInvert.isValid()) + assertFalse(simpleMatchInvert.isEmpty()) + assertTrue(simpleMatchImplicit.isValid()) + assertFalse(simpleMatchImplicit.isEmpty()) + assertTrue(complexMatch.isValid()) + assertFalse(complexMatch.isEmpty()) + + // Assert match succeeds + assertTrue(simpleMatch.match("@test")) + assertTrue(simpleMatch.match("@testing")) + assertTrue(simpleMatch.match("!test")) + assertTrue(simpleMatch.match("anotherA")) + */ + assertTrue(simpleMatchEscape.match("?test*;thing*")) + assertTrue(simpleMatchEscape.match("?test*;AAAAAthing*")) + /* + assertTrue(simpleMatchInvert.match("test")) + assertTrue(simpleMatchInvert.match("testing things")) + // Assert implicit wildcard succeeds + assertTrue(simpleMatchImplicit.match("AAAAAA")) + // Assert partial match fails + assertFalse(simpleMatch.match("test")) + assertFalse(simpleMatch.match("another")) + assertFalse(simpleMatch.match("anotherBB")) + // Assert unrelated fails + assertFalse(simpleMatch.match("not above")) + */ + // Assert escaped wildcard fails + assertFalse(simpleMatchEscape.match("@testing")) + /* + // Assert inverted match fails + assertFalse(simpleMatchInvert.match("testing")) + assertFalse(simpleMatchImplicit.match("testing")) + + // Assert case sensitivity followed + assertFalse(simpleMatch.sourceCaseSensitive) + assertTrue(simpleMatch.match("@TeSt")) + assertTrue(simpleMatchCS.sourceCaseSensitive) + assertFalse(simpleMatchCS.match("@TeSt")) + + // Assert complex match + for (normMatch in complexMatchNormal) { + // Each normal component should match + assertTrue(complexMatch.match(normMatch)) + } + + for (invertMatch in complexMatchInvert) { + // Each invert component should not match + assertFalse(complexMatch.match(invertMatch)) + } + + // Assert complex not literal match + assertFalse(complexMatch.match(complexMatchFull)) + // Assert complex unrelated not match + assertFalse(complexMatch.match("other")) + */ + } + + @Test + fun matchRegEx() { + // Simple regex, case-insensitive + val simpleMatch = + ExpressionMatch("""simple.\*escape-match.*""", + ExpressionMatch.MatchMode.MatchRegEx, false) + // Simple regex, case-sensitive + val simpleMatchCS = + ExpressionMatch("""simple.\*escape-match.*""", + ExpressionMatch.MatchMode.MatchRegEx, true) + // Inverted regex, case-insensitive + val simpleMatchInvert = + ExpressionMatch("""!invert.\*escape-match.*""", + ExpressionMatch.MatchMode.MatchRegEx, false) + // Non-inverted regex, case-insensitive + val simpleMatchNoInvert = + ExpressionMatch("""\!simple.\*escape-match.*""", + ExpressionMatch.MatchMode.MatchRegEx, false) + // Non-inverted regex literal slash, case-insensitive + val simpleMatchNoInvertSlash = + ExpressionMatch("""\\!simple.\*escape-match.*""", + ExpressionMatch.MatchMode.MatchRegEx, false) + + // Assert valid and not empty + assertTrue(simpleMatch.isValid()) + assertFalse(simpleMatch.isEmpty()) + assertTrue(simpleMatchCS.isValid()) + assertFalse(simpleMatchCS.isEmpty()) + assertTrue(simpleMatchInvert.isValid()) + assertFalse(simpleMatchInvert.isEmpty()) + assertTrue(simpleMatchNoInvert.isValid()) + assertFalse(simpleMatchNoInvert.isEmpty()) + assertTrue(simpleMatchNoInvertSlash.isValid()) + assertFalse(simpleMatchNoInvertSlash.isEmpty()) + + // Assert match succeeds + assertTrue(simpleMatch.match("simpleA*escape-match")) + assertTrue(simpleMatch.match("simpleA*escape-matchBBBB")) + assertTrue(simpleMatchInvert.match("not above")) + assertTrue(simpleMatchNoInvert.match("!simpleA*escape-matchBBBB")) + assertTrue(simpleMatchNoInvertSlash.match("""\!simpleA*escape-matchBBBB""")) + // Assert partial match fails + assertFalse(simpleMatch.match("simpleA*escape-mat")) + assertFalse(simpleMatch.match("simple*escape-match")) + // Assert unrelated fails + assertFalse(simpleMatch.match("not above")) + // Assert escaped wildcard fails + assertFalse(simpleMatch.match("simpleABBBBescape-matchBBBB")) + // Assert inverted fails + assertFalse(simpleMatchInvert.match("invertA*escape-match")) + assertFalse(simpleMatchInvert.match("invertA*escape-matchBBBB")) + assertFalse(simpleMatchNoInvert.match("simpleA*escape-matchBBBB")) + assertFalse(simpleMatchNoInvert.match("anything")) + assertFalse(simpleMatchNoInvertSlash.match("!simpleA*escape-matchBBBB")) + assertFalse(simpleMatchNoInvertSlash.match("anything")) + + // Assert case sensitivity followed + assertFalse(simpleMatch.sourceCaseSensitive) + assertTrue(simpleMatch.match("SiMpLEA*escape-MATCH")) + assertTrue(simpleMatchCS.sourceCaseSensitive) + assertFalse(simpleMatchCS.match("SiMpLEA*escape-MATCH")) + } + + @Test + fun trimMultiWildcardWhitespace() { + // Patterns + val patterns = listOf( + // Literal + Pair("literal", + "literal"), + // Simple semicolon cleanup + Pair("simple1 ;simple2; simple3 ", + "simple1; simple2; simple3"), + // Simple newline cleanup + Pair("simple1 \nsimple2\n simple3 ", + "simple1\nsimple2\nsimple3"), + // Complex cleanup + Pair( + """norm; norm-space ; newline-space """ + "\n" + + """ ;escape \; sep ; slash-end-split\\; quad\\\\norm; newline-split-slash\\""" + "\n" + + """slash-at-end\\""", + """norm; norm-space; newline-space""" + "\n" + + """escape \; sep; slash-end-split\\; quad\\\\norm; newline-split-slash\\""" + "\n" + + """slash-at-end\\""" + ) + ) + + // Check every source string... + for (patternPair in patterns) { + // Run transformation + val result = ExpressionMatch.trimMultiWildcardWhitespace(patternPair.first) + // Assert that source trims into expected pattern + assertEquals(patternPair.second, result) + // Assert that re-trimming expected pattern gives the same result + assertEquals(ExpressionMatch.trimMultiWildcardWhitespace(result), result) + } + } +} -- GitLab