Skip to content

Commit

Permalink
Avoid hopping through memory if possible
Browse files Browse the repository at this point in the history
Most cubes may well be smaller that 2^30 in size and, if so, we should
avoid the extra hop via the array-of-arrays. Also, since we know that
the arrays will never change under our feet, we don't need to be using
an AtomicReferenceArray to access the sub-arrays.

Avoiding the extra memory hop yields a 30% speed-up for some access
patterns.
  • Loading branch information
iamsrp-deshaw committed Sep 28, 2024
1 parent 022c197 commit 7cee6f0
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 235 deletions.
87 changes: 46 additions & 41 deletions java/src/main/java/com/deshaw/hypercube/BooleanBitSetHypercube.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
public class BooleanBitSetHypercube
extends AbstractBooleanHypercube
{
/**
* An empty BitSet.
*/
private static final BitSet EMPTY = new BitSet(0);

/**
* The shift for our max bitset size.
*/
Expand All @@ -38,7 +43,13 @@ public class BooleanBitSetHypercube
* underlying storage when you call set on them. So we force a full
* allocation which will help to guard against that.
*/
private final AtomicReferenceArray<BitSet> myElements;
private final BitSet[] myElements;

/**
* The first array in myElements. This is optimistically here to avoid an
* extra hop through memory for accesses to smaller cubes.
*/
private final BitSet myElements0;

/**
* Constructor.
Expand All @@ -55,15 +66,16 @@ public BooleanBitSetHypercube(final Dimension<?>[] dimensions)
}

// Force a full allocation by setting the final bit in each bitset
myElements = new AtomicReferenceArray<>(numBitsets);
myElements = new BitSet[numBitsets];
final int maxIdx = (int)(MAX_BITSET_SIZE-1);
for (int i=0; i < myElements.length(); i++) {
for (int i=0; i < myElements.length; i++) {
final BitSet elements = allocForIndex(i);
elements.clear();
elements.set(maxIdx, true );
elements.set(maxIdx, false);
myElements.set(i, elements);
myElements[i] = elements;
}
myElements0 = (myElements.length == 0) ? EMPTY : myElements[0];
}

/**
Expand All @@ -89,17 +101,21 @@ public BooleanBitSetHypercube(final Dimension<?>[] dimensions,
if (numBitsets * MAX_BITSET_SIZE < size) {
numBitsets++;
}
myElements = new AtomicReferenceArray<>(numBitsets);
myElements = new BitSet[numBitsets];
for (int i=0; i < numBitsets; i++) {
myElements.set(i, allocForIndex(i));
myElements[i] = allocForIndex(i);
}
myElements0 = (myElements.length == 0) ? EMPTY : myElements[0];

// There will never be more elements than MAX_BITSET_SIZE so all these
// will fit in the first one.
assert(elements.size() <= MAX_BITSET_SIZE);
for (int i=0; i < elements.size(); i++) {
final Boolean value = elements.get(i);
myElements.get(0).set(i, (value != null && value));
myElements[(int)(i >>> MAX_BITSET_SHIFT)].set(
(int)(i & MAX_BITSET_MASK),
(value != null && value)
);
}
}

Expand Down Expand Up @@ -129,9 +145,8 @@ public void toFlattenedObjs(final long srcPos,
preRead();
for (int i=0; i < length; i++) {
final long pos = srcPos + i;
final BitSet bitset = myElements.get((int)(pos >>> MAX_BITSET_SHIFT));
final boolean b =
(bitset != null && bitset.get((int)(pos & MAX_BITSET_MASK)));
final BitSet bitset = myElements[(int)(pos >>> MAX_BITSET_SHIFT)];
final boolean b = bitset.get((int)(pos & MAX_BITSET_MASK));
dst[dstPos + i] = b;
}
}
Expand Down Expand Up @@ -173,13 +188,7 @@ public void fromFlattenedObjs(final Boolean[] src,
for (int i=0; i < length; i++) {
final long pos = dstPos + i;
final int idx = (int)(pos >>> MAX_BITSET_SHIFT);
BitSet bitset = myElements.get(idx);
if (bitset == null) {
bitset = allocForIndex(idx);
if (!myElements.compareAndSet(idx, null, bitset)) {
bitset = myElements.get(idx);
}
}
final BitSet bitset = myElements[idx];
final Boolean value = src[srcPos + i];
bitset.set((int)(pos & MAX_BITSET_MASK),
(value != null && value));
Expand Down Expand Up @@ -215,9 +224,8 @@ public void toFlattened(final long srcPos,
preRead();
for (int i=0; i < length; i++) {
final long pos = srcPos + i;
final BitSet bitset = myElements.get((int)(pos >>> MAX_BITSET_SHIFT));
final boolean b =
(bitset != null && bitset.get((int)(pos & MAX_BITSET_MASK)));
final BitSet bitset = myElements[(int)(pos >>> MAX_BITSET_SHIFT)];
final boolean b = bitset.get((int)(pos & MAX_BITSET_MASK));
dst[dstPos + i] = b;
}
}
Expand Down Expand Up @@ -252,16 +260,9 @@ public void fromFlattened(final boolean[] src,
for (int i=0; i < length; i++) {
final long pos = dstPos + i;
final int idx = (int)(pos >>> MAX_BITSET_SHIFT);
BitSet bitset = myElements.get(idx);
if (bitset == null) {
bitset = allocForIndex(idx);
if (!myElements.compareAndSet(idx, null, bitset)) {
bitset = myElements.get(idx);
}
}
final Boolean value = src[srcPos + i];
bitset.set((int)(pos & MAX_BITSET_MASK),
(value != null && value));
final BitSet bitset = myElements[idx];
final boolean value = src[srcPos + i];
bitset.set((int)(pos & MAX_BITSET_MASK), value);
}
postWrite();
}
Expand Down Expand Up @@ -321,8 +322,13 @@ public boolean getAt(final long index)
}

preRead();
final BitSet bitset = myElements.get((int)(index >>> MAX_BITSET_SHIFT));
return (bitset != null && bitset.get((int)(index & MAX_BITSET_MASK)));
if (index < MAX_BITSET_SIZE) {
return myElements0.get((int)index);
}
else {
final BitSet bitset = myElements[(int)(index >>> MAX_BITSET_SHIFT)];
return bitset.get((int)(index & MAX_BITSET_MASK));
}
}

/**
Expand All @@ -338,15 +344,14 @@ public void setAt(final long index, final boolean value)
);
}

final int idx = (int)(index >>> MAX_BITSET_SHIFT);
BitSet bitset = myElements.get(idx);
if (bitset == null) {
bitset = allocForIndex(idx);
if (!myElements.compareAndSet(idx, null, bitset)) {
bitset = myElements.get(idx);
}
if (index < MAX_BITSET_SIZE) {
myElements0.set((int)index, value);
}
else {
final int idx = (int)(index >>> MAX_BITSET_SHIFT);
final BitSet bitset = myElements[idx];
bitset.set((int)(index & MAX_BITSET_MASK), value);
}
bitset.set((int)(index & MAX_BITSET_MASK), value);
postWrite();
}

Expand Down Expand Up @@ -375,7 +380,7 @@ private BitSet allocForIndex(final int index)
// length as the 'tail' value. We force a full allocation by setting the
// final bit in each bitset
final long tail = (size & MAX_BITSET_MASK);
final int sz = (tail == 0 || index+1 < myElements.length())
final int sz = (tail == 0 || index+1 < myElements.length)
? (int)MAX_BITSET_SIZE
: (int)tail;
final BitSet result = new BitSet(sz);
Expand Down
93 changes: 54 additions & 39 deletions java/src/main/java/com/deshaw/hypercube/DoubleArrayHypercube.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
public class DoubleArrayHypercube
extends AbstractDoubleHypercube
{
/**
* An empty array of doubles.
*/
private static final double[] EMPTY = new double[0];

/**
* The shift for the max array size.
*/
Expand All @@ -41,7 +46,13 @@ public class DoubleArrayHypercube
* since we might have a size which is larger than what can be represented
* by a single array. (I.e. more than 2^30 elements.)
*/
private final AtomicReferenceArray<double[]> myElements;
private final double[][] myElements;

/**
* The first array in myElements. This is optimistically here to avoid an
* extra hop through memory for accesses to smaller cubes.
*/
private final double[] myElements0;

/**
* Constructor.
Expand All @@ -57,12 +68,13 @@ public DoubleArrayHypercube(final Dimension<?>[] dimensions)
numArrays++;
}

myElements = new AtomicReferenceArray<>(numArrays);
for (int i=0; i < myElements.length(); i++) {
myElements = new double[numArrays][];
for (int i=0; i < myElements.length; i++) {
final double[] elements = allocForIndex(i);
Arrays.fill(elements, Double.NaN);
myElements.set(i, elements);
myElements[i] = elements;
}
myElements0 = (myElements.length == 0) ? EMPTY : myElements[0];
}

/**
Expand All @@ -88,19 +100,19 @@ public DoubleArrayHypercube(final Dimension<?>[] dimensions,
if (numArrays * MAX_ARRAY_SIZE < size) {
numArrays++;
}
myElements = new AtomicReferenceArray<>(numArrays);
myElements = new double[numArrays][];
for (int i=0; i < numArrays; i++) {
myElements.set(i, allocForIndex(i));
myElements[i] = allocForIndex(i);
}
myElements0 = (myElements.length == 0) ? EMPTY : myElements[0];

// There will never be more elements than MAX_ARRAY_SIZE so all these
// will fit in the first one.
assert(elements.size() <= MAX_ARRAY_SIZE);
// Populate
for (int i=0; i < elements.size(); i++) {
final Double value = elements.get(i);
myElements.get(0)[i] = (value == null) ? Double.NaN
: value.doubleValue();
}
myElements[(int)(i >>> MAX_ARRAY_SHIFT)][(int)(i & MAX_ARRAY_MASK)] =
(value == null) ? Double.NaN
: value.doubleValue();
}
}

/**
Expand All @@ -109,8 +121,8 @@ public DoubleArrayHypercube(final Dimension<?>[] dimensions,
@Override
public void fill(final double v)
{
for (int i=0; i < myElements.length(); i++) {
Arrays.fill(myElements.get(i), v);
for (int i=0; i < myElements.length; i++) {
Arrays.fill(myElements[i], v);
}
}

Expand Down Expand Up @@ -140,7 +152,7 @@ public void toFlattenedObjs(final long srcPos,
preRead();
for (int i=0; i < length; i++) {
final long pos = srcPos + i;
final double[] array = myElements.get((int)(pos >>> MAX_ARRAY_SHIFT));
final double[] array = myElements[(int)(pos >>> MAX_ARRAY_SHIFT)];
final double d = array[(int)(pos & MAX_ARRAY_MASK)];
dst[dstPos + i] = Double.valueOf(d);
}
Expand Down Expand Up @@ -183,7 +195,7 @@ public void fromFlattenedObjs(final Double[] src,
for (int i=0; i < length; i++) {
final long pos = dstPos + i;
final int idx = (int)(pos >>> MAX_ARRAY_SHIFT);
double[] array = myElements.get(idx);
double[] array = myElements[idx];
final Double value = src[srcPos + i];
array[(int)(pos & MAX_ARRAY_MASK)] =
(value == null) ? Double.NaN : value.doubleValue();
Expand Down Expand Up @@ -222,7 +234,7 @@ public void toFlattened(final long srcPos,
if (startIdx == endIdx) {
// What to copy? Try to avoid the overhead of the system call. If we are
// striding through the cube then we may well have just the one.
final double[] array = myElements.get(startIdx);
final double[] array = myElements[startIdx];
switch (length) {
case 0:
// NOP
Expand All @@ -242,8 +254,8 @@ public void toFlattened(final long srcPos,
}
else {
// Split into two copies
final double[] startArray = myElements.get(startIdx);
final double[] endArray = myElements.get( endIdx);
final double[] startArray = myElements[startIdx];
final double[] endArray = myElements[ endIdx];
final int startPos = (int)(srcPos & MAX_ARRAY_MASK);
final int startLength = length - (startArray.length - startPos);
final int endLength = length - startLength;
Expand Down Expand Up @@ -290,7 +302,7 @@ public void fromFlattened(final double[] src,
// striding through the cube then we may well have just the one.
if (startIdx == endIdx) {
// Get the array, creating if needbe
double[] array = myElements.get(startIdx);
double[] array = myElements[startIdx];

// And handle it
switch (length) {
Expand Down Expand Up @@ -329,8 +341,8 @@ public void fromFlattened(final double[] src,
}
else {
// Split into two copies
double[] startArray = myElements.get(startIdx);
double[] endArray = myElements.get( endIdx);
double[] startArray = myElements[startIdx];
double[] endArray = myElements[ endIdx];

// And do the copy
final int startPos = (int)(dstPos & MAX_ARRAY_MASK);
Expand Down Expand Up @@ -362,17 +374,10 @@ public void copyFrom(final DoubleArrayHypercube that)
throw new IllegalArgumentException("Given cube is not compatible");
}

// We always expect this to be true but, just in case something really
// weird is going on, we fall back to the superclass's method. This
// override is really just an optimisation anyhow.
if (myElements.length() == that.myElements.length()) {
for (int i=0; i < myElements.length(); i++) {
final double[] els = that.myElements.get(i);
myElements.set(i, Arrays.copyOf(els, els.length));
}
}
else {
super.copyFrom((DoubleHypercube)that);
for (int i=0; i < myElements.length; i++) {
System.arraycopy(that.myElements[i], 0,
this.myElements[i], 0,
that.myElements[i].length);
}
}

Expand Down Expand Up @@ -427,8 +432,13 @@ public double getAt(final long index)
throws IndexOutOfBoundsException
{
preRead();
final double[] array = myElements.get((int)(index >>> MAX_ARRAY_SHIFT));
return array[(int)(index & MAX_ARRAY_MASK)];
if (index < MAX_ARRAY_SIZE) {
return myElements0[(int)index];
}
else {
final double[] array = myElements[(int)(index >>> MAX_ARRAY_SHIFT)];
return array[(int)(index & MAX_ARRAY_MASK)];
}
}

/**
Expand All @@ -438,8 +448,13 @@ public double getAt(final long index)
public void setAt(final long index, final double value)
throws IndexOutOfBoundsException
{
double[] array = myElements.get((int)(index >>> MAX_ARRAY_SHIFT));
array[(int)(index & MAX_ARRAY_MASK)] = value;
if (index < MAX_ARRAY_SIZE) {
myElements0[(int)index] = value;
}
else {
double[] array = myElements[(int)(index >>> MAX_ARRAY_SHIFT)];
array[(int)(index & MAX_ARRAY_MASK)] = value;
}
postWrite();
}

Expand Down Expand Up @@ -467,11 +482,11 @@ private double[] allocForIndex(final int index)
// of MAX_ARRAY_SIZE so we look to account for that. We compute its
// length as the 'tail' value.
final long tail = (size & MAX_ARRAY_MASK);
final int sz = (tail == 0 || index+1 < myElements.length())
final int sz = (tail == 0 || index+1 < myElements.length)
? (int)MAX_ARRAY_SIZE
: (int)tail;
return new double[sz];
}
}

// [[[end]]] (checksum: b88a57b3feb95aaffba5dd3124475ddf)
// [[[end]]] (checksum: 3230aaea35ca70ce4f6fa659f8e8e03f)
Loading

0 comments on commit 7cee6f0

Please sign in to comment.