Skip to content

Commit

Permalink
Merge pull request boskworks#37 from prdoyle/bson
Browse files Browse the repository at this point in the history
Add bsok-bson library
  • Loading branch information
prdoyle authored Jan 19, 2025
2 parents 32c5a6a + 45684ff commit 1fd1735
Show file tree
Hide file tree
Showing 29 changed files with 566 additions and 385 deletions.
7 changes: 7 additions & 0 deletions bosk-bson/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
## bosk-bson

This is the subproject for the published `bosk-bson` library,
containing facilities for manipulating MongoDB's binary JSON data structures.
Outside of MongoDB itself, we also use BSON as a handy utility
for divide up and re-combine large JSON documents,
which is useful even outside of MongoDB.
47 changes: 47 additions & 0 deletions bosk-bson/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

plugins {
id 'bosk.development'
id 'bosk.maven-publish'
id 'info.solidsoft.pitest' version '1.15.0'
id 'com.github.spotbugs' version '5.1.5'
}

base {
archivesName = 'bosk-bson'
}

java {
toolchain {
languageVersion = JavaLanguageVersion.of(jdkVersion)
}
}

compileJava {
options.release = prodJavaVersion
}

compileTestJava {
options.release = null
}

dependencies {
api project(":bosk-core")
api 'org.mongodb:bson:5.1.2'
implementation 'com.github.spotbugs:spotbugs-annotations:4.8.6' // To stop warnings about When from MongoDB driver

testImplementation project(":bosk-logback")
testImplementation project(":bosk-testing")
testImplementation project(":lib-testing")
}

pitest {
pitestVersion = '1.15.0'
junit5PluginVersion = '1.2.0'
jvmArgs = ['-ea'] // Our unit tests check for assert statements
targetClasses = ['works.bosk.drivers.mongo.BsonSurgeon']
targetTests = ['works.bosk.drivers.mongo.BsonSurgeonTest']
threads = 4
outputFormats = ['XML', 'HTML']
timestampedReports = false
//verbose = true
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package works.bosk.drivers.mongo;
package works.bosk.bson;

class BsonFormatException extends IllegalStateException {
public BsonFormatException(String s) { super(s); }
Expand Down
332 changes: 332 additions & 0 deletions bosk-bson/src/main/java/works/bosk/bson/BsonFormatter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
package works.bosk.bson;

import java.lang.reflect.Type;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.function.Function;
import java.util.function.UnaryOperator;
import java.util.regex.Pattern;
import org.bson.BsonDocument;
import org.bson.BsonDocumentWriter;
import org.bson.BsonReader;
import org.bson.BsonValue;
import org.bson.codecs.Codec;
import org.bson.codecs.DecoderContext;
import org.bson.codecs.DocumentCodecProvider;
import org.bson.codecs.EncoderContext;
import org.bson.codecs.ValueCodecProvider;
import org.bson.codecs.configuration.CodecRegistries;
import org.bson.codecs.configuration.CodecRegistry;
import works.bosk.BoskDiagnosticContext;
import works.bosk.BoskInfo;
import works.bosk.Listing;
import works.bosk.Path;
import works.bosk.Reference;
import works.bosk.SerializationPlugin;
import works.bosk.SideTable;
import works.bosk.exceptions.InvalidTypeException;

import static works.bosk.Path.validSegment;
import static works.bosk.ReferenceUtils.rawClass;

/**
* Utility class for encoding and decoding bosk state trees as BSON.
*/
public class BsonFormatter {
protected static final UnaryOperator<String> DECODER;
protected static final UnaryOperator<String> ENCODER;
protected final CodecRegistry simpleCodecs;
protected final Function<Type, Codec<?>> preferredBoskCodecs;
protected final Function<Reference<?>, SerializationPlugin.DeserializationScope> deserializationScopeFunction;

public BsonFormatter(BoskInfo<?> boskInfo, BsonPlugin bsonPlugin) {
this.simpleCodecs = CodecRegistries.fromProviders(
bsonPlugin.codecProviderFor(boskInfo),
new ValueCodecProvider(),
new DocumentCodecProvider());
this.preferredBoskCodecs = type -> bsonPlugin.getCodec(type, rawClass(type), simpleCodecs, boskInfo);
this.deserializationScopeFunction = bsonPlugin::newDeserializationScope;
}

public static String dottedFieldNameSegment(String segment) {
return ENCODER.apply(validSegment(segment));
}

public static String undottedFieldNameSegment(String dottedSegment) {
return DECODER.apply(dottedSegment);
}

/**
* @param refLength behave as though <code>ref</code> were truncated to this length without actually having to do it
*/
private static <T> void buildDottedFieldNameOf(Reference<T> ref, int startLength, int refLength, ArrayList<String> segments) {
if (ref.path().length() > startLength) {
Reference<?> enclosingReference = ref.enclosingReference(Object.class);
BsonFormatter.buildDottedFieldNameOf(enclosingReference, startLength, refLength, segments);
if (ref.path().length() <= refLength) {
if (Listing.class.isAssignableFrom(enclosingReference.targetClass())) {
segments.add("ids");
} else if (SideTable.class.isAssignableFrom(enclosingReference.targetClass())) {
segments.add("valuesById");
}
segments.add(BsonFormatter.dottedFieldNameSegment(ref.path().lastSegment()));
}
}
}

static {
DECODER = s->{
return URLDecoder.decode(s, StandardCharsets.UTF_8);
};

ENCODER = s->{
// Selective percent-encoding of characters MongoDB doesn't like.
// Standard percent-encoding doesn't handle the period character, which
// we want, so if we're already diverging from the standard, we might
// as well do something that suits our needs.
// Good to stay compatible with standard percent-DEcoding, though.
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); ) {
int cp = s.codePointAt(i);
switch (cp) {
case '%': // For percent-encoding
case '+': case ' ': // These two are affected by URLDecoder
case '$': // MongoDB treats these specially
case '.': // MongoDB separator for dotted field names
case 0: // Can MongoDB handle nulls? Probably. Do we want to find out? Not really.
case '|': // (These are reserved for internal use)
case '!':
case '~':
case '[':
case ']':
appendPercentEncoded(sb, cp);
break;
default:
sb.appendCodePoint(cp);
break;
}
i += Character.charCount(cp);
}
return sb.toString();
};
}

private static void appendPercentEncoded(StringBuilder sb, int cp) {
assert 0 <= cp && cp <= 255;
sb
.append('%')
.append(hexCharForDigit(cp / 16))
.append(hexCharForDigit(cp % 16));
}

/**
* An uppercase version of {@link Character#forDigit} with a radix of 16.
*/
private static char hexCharForDigit(int value) {
if (value < 10) {
return (char)('0' + value);
} else {
return (char)('A' + value - 10);
}
}

/**
* @param ref the bosk node whose field name is required
* @param startingRef the bosk node corresponding to the MongoDB document in which {@code ref} is located
* @return MongoDB field name corresponding to {@code ref} within the document for {@code startingRef}, starting with {@link DocumentFields#state state}
* @see #referenceTo(String, Reference)
*/
public static <T> String dottedFieldNameOf(Reference<T> ref, Reference<?> startingRef) {
assert startingRef.encloses(ref);
return dottedFieldNameOf(ref, ref.path().length(), startingRef);
}

/**
* @param refLength behave as though <code>ref</code> were truncated to this length, without actually having to do it
* @return MongoDB field name corresponding to the given Reference
* @see #referenceTo(String, Reference)
*/
static <T> String dottedFieldNameOf(Reference<T> ref, int refLength, Reference<?> startingRef) {
// TODO: Is this required? It's currently only really called by tests
ArrayList<String> segments = dottedFieldNameSegments(ref, refLength, startingRef);
return String.join(".", segments.toArray(new String[0]));
}

/**
* @return Reference corresponding to the given field name within a document representing {@code startingReference}
* @see #dottedFieldNameOf
*/
@SuppressWarnings("unchecked")
public static <T> Reference<T> referenceTo(String dottedName, Reference<?> startingReference) throws InvalidTypeException {
Reference<?> ref = startingReference;
Iterator<String> iter = Arrays.asList(dottedName.split(Pattern.quote("."))).iterator();
BsonFormatter.skipField(ref, iter, DocumentFields.state.name()); // The entire Bosk state is in this field
while (iter.hasNext()) {
if (Listing.class.isAssignableFrom(ref.targetClass())) {
BsonFormatter.skipField(ref, iter, "ids");
} else if (SideTable.class.isAssignableFrom(ref.targetClass())) {
BsonFormatter.skipField(ref, iter, "valuesById");
}
if (iter.hasNext()) {
String segment = undottedFieldNameSegment(iter.next());
ref = ref.then(Object.class, segment);
}
}
return (Reference<T>) ref;
}

private static void skipField(Reference<?> ref, Iterator<String> iter, String expectedName) {
String actualName;
try {
actualName = iter.next();
} catch (NoSuchElementException e) {
throw new IllegalStateException("Expected '" + expectedName + "' for " + ref.targetClass().getSimpleName() + "; encountered end of dotted field name");
}
if (!expectedName.equals(actualName)) {
throw new IllegalStateException("Expected '" + expectedName + "' for " + ref.targetClass().getSimpleName() + "; was: " + actualName);
}
}

/**
* A <em>BSON path</em> is a pipe-delimited sequence of BSON fields.
*
* <p>
* Note that the BSON structure is such that the BSON path's segments may
* not exactly match those of {@code ref.path()}.
*
* <p>
* This computes the BSON path for {@code ref} within a document representing {@code startingRef}.
*
* @param ref reference to the node whose path is desired
* @param startingRef reference to the node corresponding to the document containing the {@code ref} node
* @return the BSON path leading to {@code ref} within a document representing {@code startingRef}
*/
public static String docBsonPath(Reference<?> ref, Reference<?> startingRef) {
return "|" + String.join("|", docSegments(ref, startingRef));
}

/**
* @return list of field names suitable for {@link #lookup} to find the document corresponding
* to <code>docRef</code> inside a document corresponding to <code>rootRef</code>
*/
private static List<String> docSegments(Reference<?> docRef, Reference<?> rootRef) {
ArrayList<String> allSegments = dottedFieldNameSegments(docRef, docRef.path().length(), rootRef);
return allSegments
.subList(1, allSegments.size()); // Skip the "state" field
}

protected Codec<?> codecFor(Type type) {
// BsonPlugin gives better codecs than CodecRegistry, because BsonPlugin is aware of generics,
// so we always try that first. The CodecSupplier protocol uses "null" to indicate that another
// CodecSupplier should be used, so we follow that protocol and fall back on the CodecRegistry.
// TODO: Should this logic be in BsonPlugin? It has nothing to do with MongoDriver really.
Codec<?> result = preferredBoskCodecs.apply(type);
if (result == null) {
return simpleCodecs.get(rawClass(type));
} else {
return result;
}
}

/**
* Converts a bosk state node to BSON.
*
* <p>
* A common way to call this is, for a given {@link Reference} {@code ref}, is:
* <pre>
* ref.value(), ref.targetType()
* </pre>
* @param object the bosk state node to convert
* @param type the type of {@code object}
* @see #bsonValue2object(BsonValue, Reference)
*/
@SuppressWarnings("unchecked")
public <T> BsonValue object2bsonValue(T object, Type type) {
rawClass(type).cast(object);
Codec<T> objectCodec = (Codec<T>) codecFor(type);
BsonDocument document = new BsonDocument();
try (BsonDocumentWriter writer = new BsonDocumentWriter(document)) {
// To support arbitrary values, not just whole documents, we put the result INSIDE a document.
writer.writeStartDocument();
writer.writeName("value");
objectCodec.encode(writer, object, EncoderContext.builder().build());
writer.writeEndDocument();
}
return document.get("value");
}

/**
* Converts a BSON value to a bosk state node.
* @param bson the value to convert
* @param target the bosk location of the resulting node
* @see #object2bsonValue(Object, Type)
*/
@SuppressWarnings("unchecked")
public <T> T bsonValue2object(BsonValue bson, Reference<T> target) {
Codec<T> objectCodec = (Codec<T>) codecFor(target.targetType());
BsonDocument document = new BsonDocument();
document.append("value", bson);
try (
@SuppressWarnings("unused") SerializationPlugin.DeserializationScope scope = deserializationScopeFunction.apply(target);
BsonReader reader = document.asBsonReader()
) {
reader.readStartDocument();
reader.readName("value");
return objectCodec.decode(reader, DecoderContext.builder().build());
}
}

/**
* The fields of the main MongoDB document. Case-sensitive.
*
* <p>
* No field name should be a prefix of any other.
*/
public enum DocumentFields {
/**
* The location of this document's state within the conceptual giant document.
* A pipe-separated list of BSON field names.
*/
path,

/**
* The BSON-encoded portion of the bosk state represented by this document.
*/
state,

/**
* An ever-increasing 64-bit long that is incremented every time the document changes.
*/
revision,

/**
* The contents of {@link BoskDiagnosticContext} corresponding to this
* document's last update.
*/
diagnostics,
}

static <T> ArrayList<String> dottedFieldNameSegments(Reference<T> ref, int refLength, Reference<?> startingRef) {
assert startingRef.path().matchesPrefixOf(ref.path()): "'" + ref + "' must be under '" + startingRef + "'";
ArrayList<String> segments = new ArrayList<>();
segments.add(DocumentFields.state.name());
buildDottedFieldNameOf(ref, startingRef.path().length(), refLength, segments);
return segments;
}

/**
* @param elementRefLength behave as though <code>elementRef</code> were truncated to this length, without actually having to do it
* @return MongoDB field name corresponding to the object that contains the given element
* @see #referenceTo(String, Reference)
*/
static <T> List<String> containerSegments(Reference<T> elementRef, int elementRefLength, Reference<?> startingRef) {
List<String> elementSegments = dottedFieldNameSegments(elementRef, elementRefLength, startingRef);
return elementSegments.subList(0, elementSegments.size()-1); // Trim off the element itself
}

}
Loading

0 comments on commit 1fd1735

Please sign in to comment.