diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3084a4038866..d351c549319a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -223,6 +223,8 @@ Optimizations * GITHUB#12589: Disjunctions now sometimes run as conjunctions when the minimum competitive score requires multiple clauses to match. (Adrien Grand) +* GITHUB#12710: Use Arrays#mismatch for Outputs#common operations. (Guo Feng) + Changes in runtime behavior --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java index 261b5e9bd430..dcfc552dd4b5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java @@ -17,6 +17,7 @@ package org.apache.lucene.util.fst; import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.BytesRef; @@ -44,28 +45,29 @@ public BytesRef common(BytesRef output1, BytesRef output2) { assert output1 != null; assert output2 != null; - int pos1 = output1.offset; - int pos2 = output2.offset; - int stopAt1 = pos1 + Math.min(output1.length, output2.length); - while (pos1 < stopAt1) { - if (output1.bytes[pos1] != output2.bytes[pos2]) { - break; - } - pos1++; - pos2++; - } + int mismatchPos = + Arrays.mismatch( + output1.bytes, + output1.offset, + output1.offset + output1.length, + output2.bytes, + output2.offset, + output2.offset + output2.length); - if (pos1 == output1.offset) { + if (mismatchPos == 0) { // no common prefix return NO_OUTPUT; - } else if (pos1 == output1.offset + output1.length) { + } else if (mismatchPos == -1) { + // exactly equals + return output1; + } else if (mismatchPos == output1.length) { // output1 is a prefix of output2 return output1; - } else if (pos2 == output2.offset + output2.length) { + } else if (mismatchPos == output2.length) { // output2 is a prefix of output1 return output2; } else { - return new BytesRef(output1.bytes, output1.offset, pos1 - output1.offset); + return new BytesRef(output1.bytes, output1.offset, mismatchPos); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java index 707b5f9973e3..f3655fdd2846 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java @@ -17,6 +17,7 @@ package org.apache.lucene.util.fst; import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.CharsRef; @@ -43,28 +44,29 @@ public CharsRef common(CharsRef output1, CharsRef output2) { assert output1 != null; assert output2 != null; - int pos1 = output1.offset; - int pos2 = output2.offset; - int stopAt1 = pos1 + Math.min(output1.length, output2.length); - while (pos1 < stopAt1) { - if (output1.chars[pos1] != output2.chars[pos2]) { - break; - } - pos1++; - pos2++; - } + int mismatchPos = + Arrays.mismatch( + output1.chars, + output1.offset, + output1.offset + output1.length, + output2.chars, + output2.offset, + output2.offset + output2.length); - if (pos1 == output1.offset) { + if (mismatchPos == 0) { // no common prefix return NO_OUTPUT; - } else if (pos1 == output1.offset + output1.length) { + } else if (mismatchPos == -1) { + // exactly equals + return output1; + } else if (mismatchPos == output1.length) { // output1 is a prefix of output2 return output1; - } else if (pos2 == output2.offset + output2.length) { + } else if (mismatchPos == output2.length) { // output2 is a prefix of output1 return output2; } else { - return new CharsRef(output1.chars, output1.offset, pos1 - output1.offset); + return new CharsRef(output1.chars, output1.offset, mismatchPos); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java b/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java index e81d13ee46e1..2dcf37bc8878 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java @@ -17,6 +17,7 @@ package org.apache.lucene.util.fst; import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.IntsRef; @@ -43,28 +44,29 @@ public IntsRef common(IntsRef output1, IntsRef output2) { assert output1 != null; assert output2 != null; - int pos1 = output1.offset; - int pos2 = output2.offset; - int stopAt1 = pos1 + Math.min(output1.length, output2.length); - while (pos1 < stopAt1) { - if (output1.ints[pos1] != output2.ints[pos2]) { - break; - } - pos1++; - pos2++; - } + int mismatchPos = + Arrays.mismatch( + output1.ints, + output1.offset, + output1.offset + output1.length, + output2.ints, + output2.offset, + output2.offset + output2.length); - if (pos1 == output1.offset) { + if (mismatchPos == 0) { // no common prefix return NO_OUTPUT; - } else if (pos1 == output1.offset + output1.length) { + } else if (mismatchPos == -1) { + // exactly equals + return output1; + } else if (mismatchPos == output1.length) { // output1 is a prefix of output2 return output1; - } else if (pos2 == output2.offset + output2.length) { + } else if (mismatchPos == output2.length) { // output2 is a prefix of output1 return output2; } else { - return new IntsRef(output1.ints, output1.offset, pos1 - output1.offset); + return new IntsRef(output1.ints, output1.offset, mismatchPos); } }