Skip to content

Commit

Permalink
Fix combinatorial explosion in DSDL compiler (#164)
Browse files Browse the repository at this point in the history
* Adopt OpenCyphal/pydsdl#66
* Add complex DSDL type to catch regressions
* Do not unroll (de)serialization of fixed-length arrays
* Update Nunavut
  • Loading branch information
pavel-kirienko authored Apr 10, 2021
1 parent 87c27a9 commit e985e3a
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 30 deletions.
2 changes: 1 addition & 1 deletion pyuavcan/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.3
1.2.4
2 changes: 1 addition & 1 deletion pyuavcan/application/heartbeat_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Mode(enum.IntEnum):


VENDOR_SPECIFIC_STATUS_CODE_MASK = (
2 ** list(pyuavcan.dsdl.get_model(Heartbeat)["vendor_specific_status_code"].data_type.bit_length_set)[0] - 1
2 ** pyuavcan.dsdl.get_model(Heartbeat)["vendor_specific_status_code"].data_type.bit_length_set.max - 1
)


Expand Down
6 changes: 3 additions & 3 deletions pyuavcan/application/plug_and_play.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@


_PSEUDO_UNIQUE_ID_MASK = (
2 ** list(pyuavcan.dsdl.get_model(NodeIDAllocationData_1)["unique_id_hash"].data_type.bit_length_set)[0] - 1
2 ** pyuavcan.dsdl.get_model(NodeIDAllocationData_1)["unique_id_hash"].data_type.bit_length_set.max - 1
)

_NODE_ID_MASK = 2 ** max(pyuavcan.dsdl.get_model(ID)["value"].data_type.bit_length_set) - 1
_NODE_ID_MASK = 2 ** pyuavcan.dsdl.get_model(ID)["value"].data_type.bit_length_set.max - 1

_UNIQUE_ID_SIZE_BYTES = pyuavcan.application.NodeInfo().unique_id.size

Expand Down Expand Up @@ -63,7 +63,7 @@ class Allocatee:

DEFAULT_PRIORITY = pyuavcan.transport.Priority.SLOW

_MTU_THRESHOLD = max(pyuavcan.dsdl.get_model(NodeIDAllocationData_2).bit_length_set) // 8
_MTU_THRESHOLD = pyuavcan.dsdl.get_model(NodeIDAllocationData_2).bit_length_set.max // 8

def __init__(
self,
Expand Down
20 changes: 12 additions & 8 deletions pyuavcan/dsdl/_templates/deserialization.j2
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
{% set t = self.inner_type %}
{% if t is StructureType %}
{% set field_ref_map = {} %}
{% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
{% for f, offset in t.iterate_fields_with_offsets() %}
{% if f is not padding %}
{% set field_ref = 'f'|to_template_unique_name %}
{% do field_ref_map.update({f: field_ref}) %}
Expand All @@ -33,7 +33,7 @@
{% elif t is UnionType %}
{% set tag_ref = 'tag'|to_template_unique_name %}
{{ _deserialize_integer(t.tag_field_type, tag_ref, 0|bit_length_set) }}
{% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
{% for f, offset in t.iterate_fields_with_offsets() %}
{# We generate new temporary for each variant to prevent MyPy from complaining. #}
{% set field_ref = 'uni'|to_template_unique_name %}
{{ 'if' if loop.first else 'elif' }} {{ tag_ref }} == {{ loop.index0 }}:
Expand All @@ -45,7 +45,7 @@
{% else %}{% assert False %}{# Delimited type is not expected in this context. #}
{% endif %}
_des_.pad_to_alignment({{ self.alignment_requirement }})
assert {{ t.bit_length_set|min }} <= (_des_.consumed_bit_length - _base_offset_) <= {{ t.bit_length_set|max }}, \
assert {{ t.bit_length_set.min }} <= (_des_.consumed_bit_length - _base_offset_) <= {{ t.bit_length_set.max }}, \
'Bad deserialization of {{ self }}'
{%- endmacro %}

Expand All @@ -67,13 +67,17 @@
{{ ref }} = _des_.fetch_{{ offset|alignment_prefix -}}
_array_of_standard_bit_length_primitives({{ t.element_type|numpy_scalar_type }}, {{ t.capacity }})
{% else %}
{# Element offset is the superposition of each individual element offsets plus the array's own offset.
# For example, an array like uint8[3] offset by 16 bits would have its element_offset = {16, 24, 32}.
# We can also unroll element deserialization for small arrays (e.g., below ~10 elements) to take advantage of
# spurious alignment of elements but the benefit of such optimization is believed to be negligible. #}
{% set element_offset = offset + t.element_type.bit_length_set.repeat_range(t.capacity - 1) %}
{% set element_ref = 'e'|to_template_unique_name %}
# Unrolled fixed-length array: {{ t }}; the temporary {{ element_ref }} is used for element storage.
{% set index_ref = 'i'|to_template_unique_name %}
{{ ref }} = _np_.empty({{ t.capacity }}, {{ t.element_type|numpy_scalar_type }})
{% for index, element_offset in t.enumerate_elements_with_offsets(offset) %}
{{ _deserialize_any(t.element_type, element_ref, element_offset) }}
{{ ref }}[{{ index }}] = {{ element_ref }}
{% endfor %}
for {{ index_ref }} in range({{ t.capacity }}):
{{ _deserialize_any(t.element_type, element_ref, element_offset)|indent }}
{{ ref }}[{{ index_ref }}] = {{ element_ref }}
{% endif %}
assert len({{ ref }}) == {{ t.capacity }}, '{{ t }}'
{% endmacro %}
Expand Down
27 changes: 16 additions & 11 deletions pyuavcan/dsdl/_templates/serialization.j2
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
_base_offset_ = _ser_.current_bit_length
{% set t = self.inner_type %}
{% if t is StructureType %}
{% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
{% for f, offset in t.iterate_fields_with_offsets() %}
{{ _serialize_any(f.data_type, 'self.' + (f|id), offset) }}
{% endfor %}
{% elif t is UnionType %}
{% for f, offset in t.iterate_fields_with_offsets(0|bit_length_set) %}
{% for f, offset in t.iterate_fields_with_offsets() %}
{% set field_ref = 'self.' + (f|id) %}
{{ 'if' if loop.first else 'elif' }} {{ field_ref }} is not None: # Union tag {{ loop.index0 }}
{{ _serialize_integer(t.tag_field_type, loop.index0|string, 0|bit_length_set)|indent }}
Expand All @@ -24,7 +24,7 @@
{% else %}{% assert False %}{# Delimited type is not expected in this context. #}
{% endif %}
_ser_.pad_to_alignment({{ self.alignment_requirement }})
assert {{ t.bit_length_set|min }} <= (_ser_.current_bit_length - _base_offset_) <= {{ t.bit_length_set|max }}, \
assert {{ t.bit_length_set.min }} <= (_ser_.current_bit_length - _base_offset_) <= {{ t.bit_length_set.max }}, \
'Bad serialization of {{ self }}'
{%- endmacro %}

Expand Down Expand Up @@ -79,10 +79,15 @@
{% elif t.element_type is PrimitiveType and t.element_type.standard_bit_length %}
_ser_.add_{{ offset|alignment_prefix -}}_array_of_standard_bit_length_primitives({{ ref }})
{% else %}
# Unrolled fixed-length array: {{ t }}
{% for index, element_offset in t.enumerate_elements_with_offsets(offset) %}
{{ _serialize_any(t.element_type, '%s[%d]'|format(ref, index), element_offset) }}
{% endfor %}
{# Element offset is the superposition of each individual element offsets plus the array's own offset.
# For example, an array like uint8[3] offset by 16 bits would have its element_offset = {16, 24, 32}.
# We can also unroll element serialization for small arrays (e.g., below ~10 elements) to take advantage of
# spurious alignment of elements but the benefit of such optimization is believed to be negligible. #}
{% set element_offset = offset + t.element_type.bit_length_set.repeat_range(t.capacity - 1) %}
{% set element_ref = 'elem'|to_template_unique_name %}
# Element offset: {{ element_offset }}
for {{ element_ref }} in {{ ref }}:
{{ _serialize_any(t.element_type, element_ref, element_offset)|indent }}
{% endif %}
{% endmacro %}

Expand Down Expand Up @@ -120,7 +125,7 @@
{%- elif t is VariableLengthArrayType -%} {{ _serialize_variable_length_array(t, ref, offset) }}
{%- elif t is CompositeType -%}
{% if t is DelimitedType %}
{% if (t.inner_type.bit_length_set | length) > 1 %}
{% if not t.inner_type.bit_length_set.fixed_length %}
{# Instead of the outer extent, we use the inner extent, which equals the max bit length and is a
# tighter bound than the user-defined extent.
# This is safe because when serializing we always know the concrete type.
Expand All @@ -136,14 +141,14 @@
{{ ref }}._serialize_(_nested_)
_nested_length_ = _nested_.current_bit_length - {{ t.delimiter_header_type.bit_length }}
del _nested_
assert {{ t.inner_type.bit_length_set|min }} <= _nested_length_ <= {{ t.inner_type.bit_length_set|max }}
assert {{ t.inner_type.bit_length_set.min }} <= _nested_length_ <= {{ t.inner_type.bit_length_set.max }}
assert _nested_length_ % 8 == 0
_ser_.add_aligned_u32(_nested_length_ // 8) # Jump back and serialize the delimiter header.
_ser_.skip_bits(_nested_length_) # Return to the current offset.
{% else %}
{# Optional optimization: if the nested object is fixed-length, no need to fork the serializer. #}
{% set length_bits = t.inner_type.bit_length_set | first %}
{% assert [length_bits] == t.inner_type.bit_length_set | list %}
{% set length_bits = t.inner_type.bit_length_set.max %}
{% assert length_bits == t.inner_type.bit_length_set.min %}
{% assert length_bits % 8 == 0 %}
{% set length_bytes = length_bits // 8 %}
# Delimited serialization of {{ t }}, fixed bit length {{ length_bits }} ({{ length_bytes }} bytes)
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ packages = find:
# Think thrice before adding anything here, please.
# The preferred long-term plan is to avoid adding any new required dependencies whatsoever for the project's lifetime.
install_requires =
nunavut ~= 1.0
nunavut ~= 1.1
numpy ~= 1.17, < 1.20
# TODO: allow NumPy v1.20 -- requires fixing type annotations and many meaningless false-positives. No runtime effects.

Expand Down
2 changes: 1 addition & 1 deletion tests/dsdl/_builtin_form.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _unittest_slow_builtin_form_manual(compiled: typing.List[pyuavcan.dsdl.Gener
def _unittest_slow_builtin_form_automatic(compiled: typing.List[pyuavcan.dsdl.GeneratedPackageInfo]) -> None:
for info in compiled:
for model in _util.expand_service_types(info.models):
if max(model.bit_length_set) / 8 > 1024 * 1024:
if model.bit_length_set.max / 8 > 1024 * 1024:
_logger.info("Automatic test of %s skipped because the type is too large", model)
continue # Skip large objects because they take forever to convert and test

Expand Down
8 changes: 6 additions & 2 deletions tests/dsdl/_rand.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,12 @@ def _serialize_deserialize(obj: pyuavcan.dsdl.CompositeObject) -> typing.Tuple[f


def _make_random_fragmented_serialized_representation(bls: pydsdl.BitLengthSet) -> typing.Sequence[memoryview]:
bit_length = random.choice(list(bls))
byte_length = (bit_length + 7) // 8
if bls.max < 8 * 1024: # If the BLS appears small, perform numerical expansion and pick a random value.
bit_length = random.choice(list(bls))
byte_length = (bit_length + 7) // 8
else: # Otherwise, just use the smallest value because expansion is slow.
bit_length = bls.min
byte_length = (bit_length + 7) // 8
return _fragment_randomly(numpy.random.randint(0, 256, size=byte_length, dtype=numpy.uint8).data)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This data type is crafted to trigger the combinatorial explosion problem: https://github.com/UAVCAN/pydsdl/issues/23
# The problem is now fixed so we introduce this type to shield us against regressions.
# If DSDL compilation takes over a few minutes, you have a combinatorial problem somewhere in the compiler.

uavcan.primitive.String.1.0[<=1024] foo
uavcan.primitive.String.1.0[256] bar

@extent 100 * (1024 ** 2) * 8 # One hundred megabytes should be about right.
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,3 @@ void8
uint8[PIXELS_PER_IMAGE * 3] pixels # Row major, top-left pixel first, color ordering RGB

@sealed
@assert _offset_.count == 1 # Fixed size message
@print _offset_.max / 8 / 1024 / 1024 # Print size in mebibytes

0 comments on commit e985e3a

Please sign in to comment.