From 02007c12df5a457dccad9d6546207276806a99c5 Mon Sep 17 00:00:00 2001 From: Daniel Burr Date: Sat, 18 Jan 2025 00:53:29 +0100 Subject: [PATCH 1/2] Fix kaitai python version for messages ending in a variable-length string --- kaitai/python/kaitai_sbp/parse_utils.py | 12 +++-- .../kaitai_sbp/tests/test_benchmark_lite.py | 3 +- .../python/kaitai_sbp/tests/test_parsers.py | 4 +- kaitai/python/kaitai_sbp/tests/utils.py | 46 ++++++++++++++++--- 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/kaitai/python/kaitai_sbp/parse_utils.py b/kaitai/python/kaitai_sbp/parse_utils.py index 75a9f401dd..10b5982687 100644 --- a/kaitai/python/kaitai_sbp/parse_utils.py +++ b/kaitai/python/kaitai_sbp/parse_utils.py @@ -13,16 +13,18 @@ # wrapper object which allows KaitaiStream to be used with a simple byte array class BufferKaitaiStream(KaitaiStream): - class IOBytes: + class IOBytes: # "inspired by" BytesIO def __init__(self, buf): self.buf = buf self.pos = 0 - def read(self, num): - if self.pos + num > len(self.buf): + def read(self, size=-1): + if size < 0: + size = len(self.buf) - self.pos + if self.pos + size > len(self.buf): raise EOFError - buf = self.buf[self.pos:self.pos + num] - self.pos += num + buf = self.buf[self.pos:self.pos + size] + self.pos += size return buf def seek(self, pos): diff --git a/kaitai/python/kaitai_sbp/tests/test_benchmark_lite.py b/kaitai/python/kaitai_sbp/tests/test_benchmark_lite.py index e18f547c2c..417d72286c 100644 --- a/kaitai/python/kaitai_sbp/tests/test_benchmark_lite.py +++ b/kaitai/python/kaitai_sbp/tests/test_benchmark_lite.py @@ -9,7 +9,7 @@ # WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. import timeit -from kaitai_sbp.tests.utils import count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_external, PERL_CMD +from kaitai_sbp.tests.utils import count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_hybrid3, get_next_msg_external, PERL_CMD TEST_DATA = "test_data/benchmark.sbp" @@ -21,4 +21,5 @@ def test_benchmarks(): print("kaitai: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_kaitai)', number=COUNT, globals=globals()))) print("hybrid1: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_hybrid1)', number=COUNT, globals=globals()))) print("hybrid2: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_hybrid2)', number=COUNT, globals=globals()))) + print("hybrid3: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_hybrid3)', number=COUNT, globals=globals()))) print("perl: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_external, PERL_CMD)', number=COUNT, globals=globals()))) diff --git a/kaitai/python/kaitai_sbp/tests/test_parsers.py b/kaitai/python/kaitai_sbp/tests/test_parsers.py index e4ecba1370..ab664947e5 100644 --- a/kaitai/python/kaitai_sbp/tests/test_parsers.py +++ b/kaitai/python/kaitai_sbp/tests/test_parsers.py @@ -8,7 +8,7 @@ # EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. -from kaitai_sbp.tests.utils import compare_parser_outputs, count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_external, PERL_CMD +from kaitai_sbp.tests.utils import compare_parser_outputs, count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_hybrid3, get_next_msg_external, PERL_CMD import os import random import tempfile @@ -61,9 +61,11 @@ def test_corrupted_counts(): num_messages_kaitai = count_messages(filename_corrupted, get_next_msg_kaitai) num_messages_hybrid1 = count_messages(filename_corrupted, get_next_msg_hybrid1) num_messages_hybrid2 = count_messages(filename_corrupted, get_next_msg_hybrid2) + num_messages_hybrid3 = count_messages(filename_corrupted, get_next_msg_hybrid3) num_messages_perl = count_messages(filename_corrupted, get_next_msg_external, PERL_CMD) assert(num_messages_construct == num_messages_kaitai) assert(num_messages_construct == num_messages_hybrid1) assert(num_messages_construct == num_messages_hybrid2) + assert(num_messages_construct == num_messages_hybrid3) assert(num_messages_construct == num_messages_perl) diff --git a/kaitai/python/kaitai_sbp/tests/utils.py b/kaitai/python/kaitai_sbp/tests/utils.py index f8dd436c84..b8c011f2af 100644 --- a/kaitai/python/kaitai_sbp/tests/utils.py +++ b/kaitai/python/kaitai_sbp/tests/utils.py @@ -5,11 +5,13 @@ from generator.sbpg.targets.common import snake_case, snake_case_keys, decode_json from kaitaistruct import KaitaiStream, KaitaiStruct import sys +import base64 import sbp.msg as msg_construct import sbp.table as table_construct from sbp.sbp2json import iter_messages_buffered as parse_file_construct from subprocess import Popen, PIPE +SBP_PREAMBLE = 0x55 SBP_HEADER_LEN = 6 PERL_CMD = ['perl', 'kaitai/perl/bin/sbp2json.pl'] @@ -31,8 +33,8 @@ def dictify(obj, round_floats=False): # "original" version of sbp2json based entirely upon construct def get_next_msg_construct(fileobj): for msg_type, sender, payload_len, buf, crc_read in parse_file_construct(fileobj): - msg_buf = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len] - msg = msg_construct.SBP(msg_type, sender, payload_len, msg_buf, crc_read) + payload = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len] + msg = msg_construct.SBP(msg_type, sender, payload_len, payload, crc_read) if msg_type not in table_construct._SBP_TABLE: sys.stderr.write("Skipping unknown message type: {}\n".format(msg_type)) @@ -70,14 +72,40 @@ def get_next_msg_hybrid2(fileobj): sys.stderr.write("Skipping unknown message type: {}\n".format(msg_type)) continue - msg_buf = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len] - msg = msg_construct.SBP(msg_type, sender, payload_len, msg_buf, crc_read) + payload = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len] + msg = msg_construct.SBP(msg_type, sender, payload_len, payload, crc_read) stream.set_buffer(msg.to_binary()) obj = table_kaitai.SbpMessage(stream) yield get_flattened_msg(obj) +# hybrid version of sbp2json which uses original parser + kaitai struct to +# avoid calling table_construct.dispatch() and avoids usage of BytesIO +def get_next_msg_hybrid3(fileobj): + stream = BufferKaitaiStream() + for msg_type, sender, payload_len, buf, crc_read in parse_file_construct(fileobj): + + if msg_type not in table_kaitai.TABLE: + sys.stderr.write("Skipping unknown message type: {}\n".format(msg_type)) + continue + + # we can construct an object directly from the payload, but this + # means that we need to fill in the preamble/sender/crc/etc + payload = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len] + cls = table_kaitai.TABLE[msg_type] + stream.set_buffer(bytes(payload)) + obj = cls(stream) + obj.preamble = SBP_PREAMBLE + obj.msg_type = msg_type + obj.sender = sender + obj.length = payload_len + obj.payload = base64.standard_b64encode(payload).decode('ascii') + obj.crc = crc_read + + yield obj + + def get_next_msg_external(cmd, filename): proc = Popen(cmd + [filename], stdout=PIPE) @@ -106,7 +134,10 @@ def count_messages(filename, fn, cmd=None): # (to avoid calling table_construct.dispatch()) # 4. hybrid2: use parsing code from construct version + msg_construct.SBP + # kaitai struct objects (to avoid calling table_construct.dispatch()) -# 5. perl: based completely upon the perl bindings generated by +# 5. hybrid2: use parsing code from construct version + msg_construct.SBP + +# kaitai_table.TABLE (to avoid calling table_construct.dispatch() and +# usage of BytesIO) +# 6. perl: based completely upon the perl bindings generated by # kaitai-struct-compiler def compare_parser_outputs(filename): num_messages = 0 @@ -115,8 +146,9 @@ def compare_parser_outputs(filename): file2 = open(filename, 'rb') file3 = open(filename, 'rb') file4 = open(filename, 'rb') + file5 = open(filename, 'rb') - for msg_construct, msg_kaitai, msg_hybrid1, msg_hybrid2, msg_perl in zip(get_next_msg_construct(file1), get_next_msg_kaitai(file2), get_next_msg_hybrid1(file3), get_next_msg_hybrid2(file4), get_next_msg_external(PERL_CMD, filename)): + for msg_construct, msg_kaitai, msg_hybrid1, msg_hybrid2, msg_hybrid3, msg_perl in zip(get_next_msg_construct(file1), get_next_msg_kaitai(file2), get_next_msg_hybrid1(file3), get_next_msg_hybrid2(file4), get_next_msg_hybrid3(file5), get_next_msg_external(PERL_CMD, filename)): msg_construct = snake_case_keys(msg_construct) msg_perl = decode_json(msg_perl) @@ -124,10 +156,12 @@ def compare_parser_outputs(filename): dict_kaitai = dictify(msg_kaitai) dict_hybrid1 = dictify(msg_hybrid1) dict_hybrid2 = dictify(msg_hybrid2) + dict_hybrid3 = dictify(msg_hybrid3) assert dict_construct == dict_kaitai, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_kaitai) assert dict_construct == dict_hybrid1, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_hybrid1) assert dict_construct == dict_hybrid2, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_hybrid2) + assert dict_construct == dict_hybrid3, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_hybrid3) # need to round floats due to difference in rounding approaches used # by perl and python JSON encoders From 6623ef1b3c36e491f72f2b02f1e61009bc4611bc Mon Sep 17 00:00:00 2001 From: Daniel Burr Date: Sat, 18 Jan 2025 02:29:17 +0100 Subject: [PATCH 2/2] Tweak comments --- kaitai/python/kaitai_sbp/parse_utils.py | 3 ++- kaitai/python/kaitai_sbp/tests/utils.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/kaitai/python/kaitai_sbp/parse_utils.py b/kaitai/python/kaitai_sbp/parse_utils.py index 10b5982687..6a94dbc634 100644 --- a/kaitai/python/kaitai_sbp/parse_utils.py +++ b/kaitai/python/kaitai_sbp/parse_utils.py @@ -13,7 +13,8 @@ # wrapper object which allows KaitaiStream to be used with a simple byte array class BufferKaitaiStream(KaitaiStream): - class IOBytes: # "inspired by" BytesIO + # simple emulation of io.BytesIO interface expected by KaitaiStream + class IOBytes: def __init__(self, buf): self.buf = buf self.pos = 0 diff --git a/kaitai/python/kaitai_sbp/tests/utils.py b/kaitai/python/kaitai_sbp/tests/utils.py index b8c011f2af..fdfe66c9ab 100644 --- a/kaitai/python/kaitai_sbp/tests/utils.py +++ b/kaitai/python/kaitai_sbp/tests/utils.py @@ -80,8 +80,8 @@ def get_next_msg_hybrid2(fileobj): yield get_flattened_msg(obj) -# hybrid version of sbp2json which uses original parser + kaitai struct to -# avoid calling table_construct.dispatch() and avoids usage of BytesIO +# hybrid version of sbp2json which uses original parser + kaitai struct to avoid +# calling table_construct.dispatch() as well as avoiding usage of io.BytesIO def get_next_msg_hybrid3(fileobj): stream = BufferKaitaiStream() for msg_type, sender, payload_len, buf, crc_read in parse_file_construct(fileobj): @@ -90,8 +90,8 @@ def get_next_msg_hybrid3(fileobj): sys.stderr.write("Skipping unknown message type: {}\n".format(msg_type)) continue - # we can construct an object directly from the payload, but this - # means that we need to fill in the preamble/sender/crc/etc + # we can construct a kaitai object directly from the payload, but this + # means that we need to manually fill in the preamble/sender/crc/etc payload = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len] cls = table_kaitai.TABLE[msg_type] stream.set_buffer(bytes(payload)) @@ -134,7 +134,7 @@ def count_messages(filename, fn, cmd=None): # (to avoid calling table_construct.dispatch()) # 4. hybrid2: use parsing code from construct version + msg_construct.SBP + # kaitai struct objects (to avoid calling table_construct.dispatch()) -# 5. hybrid2: use parsing code from construct version + msg_construct.SBP + +# 5. hybrid3: use parsing code from construct version + msg_construct.SBP + # kaitai_table.TABLE (to avoid calling table_construct.dispatch() and # usage of BytesIO) # 6. perl: based completely upon the perl bindings generated by