From 960ae8a6c056806be1ac44152f8f90aa6865645c Mon Sep 17 00:00:00 2001 From: Roumen Roupski Date: Wed, 13 Sep 2023 09:10:29 -0700 Subject: [PATCH] Add config param to split multiline text --- gradle.properties | 2 +- .../io/ocsf/translator/TranslatorBuilder.java | 110 ++++++++---------- .../translator/TranslatorBuilderTest.java | 65 ++++------- .../src/main/java/io/ocsf/utils/Strings.java | 39 ++++++- .../test/java/io/ocsf/utils/StringsTest.java | 95 +++++++++++++++ 5 files changed, 201 insertions(+), 110 deletions(-) create mode 100644 ocsf-utils/src/test/java/io/ocsf/utils/StringsTest.java diff --git a/gradle.properties b/gradle.properties index 1de4a11..694dc59 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,3 +1,3 @@ # Global properties artifactGroup=io.ocsf -artifactVersion=1.2.2-dev +artifactVersion=1.3.0-dev diff --git a/ocsf-translator/src/main/java/io/ocsf/translator/TranslatorBuilder.java b/ocsf-translator/src/main/java/io/ocsf/translator/TranslatorBuilder.java index 7610846..beed60b 100644 --- a/ocsf-translator/src/main/java/io/ocsf/translator/TranslatorBuilder.java +++ b/ocsf-translator/src/main/java/io/ocsf/translator/TranslatorBuilder.java @@ -61,6 +61,7 @@ public final class TranslatorBuilder private static final String ValueType = "type"; private static final String Values = "values"; private static final String Separator = "separator"; + private static final String Splitter = "splitter"; private static final String Predicate = "when"; private static final String Parser = "parser"; @@ -157,8 +158,11 @@ public static Translator fromString(final String json) throws IOException public static Translator build( final Path home, final JsonReader reader, final Map map) throws IOException { - final Translator translator = createTranslator( - (String) map.get(Predicate), readParsers(home, reader, map), readRules(home, reader, map)); + final Translator translator = + createTranslator( + (String) map.get(Predicate), + readParsers(home, reader, map), + readRules(home, reader, map)); final Collection> ruleset = Maps.typecast(map.get(RuleSet)); if (ruleset == null || ruleset.isEmpty()) @@ -170,9 +174,11 @@ public static Translator build( final Collection list = new ArrayList<>(ruleset.size()); for (final Map rule : ruleset) { - list.add(createSubTranslator( - (String) rule.get(Predicate), - readParsers(home, reader, rule), readRules(home, reader, rule))); + list.add( + createSubTranslator( + (String) rule.get(Predicate), + readParsers(home, reader, rule), + readRules(home, reader, rule))); } return new Translator() @@ -315,12 +321,18 @@ private static DataTranslator createParser(final Map map) { final Map dstMap = Maps.typecast(Maps.getIn(data, dstKey)); if (dstMap == null) + { Maps.putIn(data, dstKey, parsed); + } else + { dstMap.putAll(parsed); + } } else + { data.putAll(parsed); + } } } catch (final Exception ex) @@ -398,8 +410,7 @@ public Map apply(final Map data) @Override public Map apply( - final Map data, final Map translated) + final Map data, final Map translated) { return TranslatorBuilder.apply(compiled, translator.parse(data), translated); } @@ -419,8 +430,7 @@ public Map apply(final Map data) @Override public Map apply( - final Map data, - final Map translated) + final Map data, final Map translated) { return p.test(data) ? TranslatorBuilder.apply(compiled, translator.parse(data), translated) : @@ -438,11 +448,8 @@ private static Collection> readRules( } private static Collection> readRules( - final Path home, - final JsonReader reader, - final Collection> list, - final Collection> rules) - throws IOException + final Path home, final JsonReader reader, final Collection> list, + final Collection> rules) throws IOException { for (final Map rule : list) { @@ -474,11 +481,8 @@ else if (include instanceof List) } private static void includeRule( - final Path home, - final String filename, - final JsonReader reader, - final Collection> rules) - throws IOException + final Path home, final String filename, final JsonReader reader, + final Collection> rules) throws IOException { final Object included = reader.read(home.resolve(filename)); if (included instanceof Map) @@ -499,15 +503,14 @@ else if (included instanceof Collection) } static Map apply( - final List> rules, final Map data) + final List> rules, final Map data) { return apply(rules, data, new HashMap<>()); } static Map apply( - final List> rules, final Map data, final Map translated) + final List> rules, final Map data, + final Map translated) { rules.forEach(rule -> rule.value.apply(data, translated)); Maps.cleanup(data); @@ -540,12 +543,10 @@ private static Tuple createRule(final Map rule) th final String name = r.getKey().intern(); final Object obj = r.getValue(); - if (obj instanceof Map) - return newRule(name, Maps.typecast(obj)); + if (obj instanceof Map) return newRule(name, Maps.typecast(obj)); // handle embedded objects - if (obj instanceof Collection) - return embedded(name, Maps.typecast(obj)); + if (obj instanceof Collection) return embedded(name, Maps.typecast(obj)); } throw new IllegalArgumentException("Illegal rule"); @@ -559,8 +560,7 @@ private static Tuple newRule(final String name, final Map) - return merge(name, Maps.typecast(value)); + if (value instanceof Map) return merge(name, Maps.typecast(value)); return merge(name, map); } @@ -599,8 +599,7 @@ private static Tuple newRule(final String name, final Map embedded( - final String name, final Collection> ruleData) + final String name, final Collection> ruleData) { final List> rules = compile(ruleData); @@ -649,8 +648,7 @@ private static Tuple value(final String name, final Object ruleDat } return new Tuple<>(name, (data, translated) -> { - if (predicate == null || predicate.test(data)) - Maps.putIn(translated, name, value, overwrite); + if (predicate == null || predicate.test(data)) Maps.putIn(translated, name, value, overwrite); }); } @@ -678,7 +676,9 @@ private static Tuple clone(final String name, final Object ruleDat return new Tuple<>(name, (data, translated) -> { if (predicate == null || predicate.test(data)) + { Maps.putIn(translated, dest, Maps.getIn(translated, name), overwrite); + } }); } @@ -698,8 +698,7 @@ private static Tuple remove(final String name, final Object ruleDa } return new Tuple<>(name, (data, translated) -> { - if (predicate == null || predicate.test(data)) - Maps.removeIn(data, name); + if (predicate == null || predicate.test(data)) Maps.removeIn(data, name); }); } @@ -719,6 +718,7 @@ private static Tuple rename( final String key; final String type; final String separator; + final String splitter; final Object defValue; final boolean overwrite; final boolean is_array; @@ -732,6 +732,7 @@ private static Tuple rename( key = Maps.get(map, NameField, name).intern(); type = Maps.get(map, ValueType); separator = (String) map.get(Separator); + splitter = (String) map.getOrDefault(Splitter, Strings.LineSplitter); defValue = map.get(DefaultValue); overwrite = Maps.get(map, Overwrite, Boolean.FALSE); @@ -745,6 +746,7 @@ else if (ruleData instanceof String) key = ((String) ruleData).intern(); type = null; separator = null; + splitter = Strings.LineSplitter; defValue = null; overwrite = false; is_array = false; @@ -778,7 +780,6 @@ public Object get(final Map map, final String _name) { if (separator != null && sb.length() > 0) sb.append(separator); - sb.append(v); } } @@ -792,8 +793,7 @@ public Object get(final Map map, final String _name) src = source; } - return new Tuple<>(name, (data, translated) -> - { + return new Tuple<>(name, (data, translated) -> { if (predicate == null || predicate.test(data)) { final Object value = src.get(data, name); @@ -807,7 +807,7 @@ public Object get(final Map map, final String _name) if (is_array) { final List list = new ArrayList<>(); - for (final Object o : toArray(value)) + for (final Object o : Strings.toArray(value, splitter)) { final Object parsed = o != null ? typecast(o, type) : null; list.add(parsed); @@ -825,9 +825,13 @@ public Object get(final Map map, final String _name) else { if (is_array) - Maps.putIn(translated, key, toArray(value), overwrite); + { + Maps.putIn(translated, key, Strings.toArray(value, splitter), overwrite); + } else + { Maps.putIn(translated, key, value, overwrite); + } } } else if (defValue != null) @@ -853,8 +857,7 @@ private static Tuple lookup( final Map values = Maps.downcase(Maps.get(rule, Values)); - return new Tuple<>(name, (data, translated) -> - { + return new Tuple<>(name, (data, translated) -> { if (predicate == null || predicate.test(data)) { final Object value = source.get(data, name); @@ -986,23 +989,6 @@ private static Object url(final Object o) } } - static List toArray(final Object value) - { - if (value instanceof String) - { - // Split the string and convert to list - final String[] split = ((String) value).trim().split("\\s+"); - return Maps.typecast(Arrays.asList(split)); - } - else if (value instanceof List) - { - return Maps.typecast(value); - } - - // Everything else, including null, gets wrapped in a single-item list - return Collections.singletonList(value); - } - private static Object toDouble(final Object o) { if (o instanceof Number) return ((Number) o).doubleValue(); @@ -1043,8 +1029,7 @@ private static Object toLong(final Object o) { final String n = ((String) o).trim(); - if (!n.isEmpty()) - return Long.decode(n); + if (!n.isEmpty()) return Long.decode(n); } catch (final NumberFormatException ignore) { @@ -1066,8 +1051,7 @@ private static Object toInt(final Object o) { final String n = ((String) o).trim(); - if (!n.isEmpty()) - return Integer.decode(n); + if (!n.isEmpty()) return Integer.decode(n); } catch (final NumberFormatException ignore) { diff --git a/ocsf-translator/src/test/java/io/ocsf/translator/TranslatorBuilderTest.java b/ocsf-translator/src/test/java/io/ocsf/translator/TranslatorBuilderTest.java index 6e1e703..6c48490 100644 --- a/ocsf-translator/src/test/java/io/ocsf/translator/TranslatorBuilderTest.java +++ b/ocsf-translator/src/test/java/io/ocsf/translator/TranslatorBuilderTest.java @@ -25,15 +25,13 @@ import org.junit.Assert; import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; public final class TranslatorBuilderTest extends TestCase { - private static final String JsonData = "{name: 'foo', port:42, rule: 'test data'}"; + private static final String JsonData = "{name: 'foo', port:42, rule: 'test data'}"; private Map data; @Override @@ -561,48 +559,6 @@ public void testEmptyUrlText() throws IOException Assert.assertEquals("", url.get(Dictionary.Hostname)); } - public void testToArray() - { - // single value - Assert.assertEquals(Collections.singletonList(null), TranslatorBuilder.toArray(null)); - - Assert.assertEquals(Collections.singletonList(1), TranslatorBuilder.toArray(1)); - - Assert.assertEquals(Collections.singletonList("a"), TranslatorBuilder.toArray("a")); - - // array value - all should just pass through - Assert.assertEquals(Collections.EMPTY_LIST, TranslatorBuilder.toArray(Collections.EMPTY_LIST)); - - Assert.assertEquals( - Collections.singletonList(1), TranslatorBuilder.toArray(Collections.singletonList(1))); - - Assert.assertEquals( - Collections.singletonList(null), TranslatorBuilder.toArray(Collections.singletonList(null))); - - Assert.assertEquals( - Arrays.asList("a", "b"), TranslatorBuilder.toArray(Arrays.asList("a", "b"))); - - // Single string value, split into list - Assert.assertEquals(Arrays.asList("a", "b"), TranslatorBuilder.toArray("a b")); - - Assert.assertEquals(Arrays.asList("a", "b"), TranslatorBuilder.toArray("a\tb")); - Assert.assertEquals(Arrays.asList("a", "b"), TranslatorBuilder.toArray("a\nb")); - Assert.assertEquals(Arrays.asList("a", "b"), TranslatorBuilder.toArray("a\n\tb")); - Assert.assertEquals(Arrays.asList("a", "b"), TranslatorBuilder.toArray("a\n\tb\n")); - Assert.assertEquals(Arrays.asList("a", "b"), TranslatorBuilder.toArray("a b ")); - Assert.assertEquals(Arrays.asList("a", "b"), TranslatorBuilder.toArray(" a b")); - - // Edge cases - Assert.assertEquals( - "Empty string results in list of one element with an empty string", - Collections.singletonList(""), TranslatorBuilder.toArray("")); - - Assert.assertEquals( - "Split of only whitespace results in list of one element with an empty string", - Collections.singletonList(""), TranslatorBuilder.toArray("\n")); - - } - public void testCopyArrayValue2() throws IOException { final Map translated = TranslatorBuilder @@ -636,6 +592,25 @@ public void testCopyAndTypecastArrayValuesAsInt() throws IOException final Map data = Json5Parser.to("{name: 'foo', port:42, rule: 'test data', ports: '22 42 69'}"); + final Map translated = TranslatorBuilder + .fromString("{rules: [{ports: {@copy: {name: 'ports', is_array: true, type: 'integer', splitter: '\\\\s+'}}}]}") + .apply(data); + + Assert.assertEquals(1, translated.size()); + Assert.assertTrue(translated.get("ports") instanceof List); + + final List list = (List) translated.get("ports"); + Assert.assertEquals(3, list.size()); + Assert.assertEquals(22, list.get(0)); + Assert.assertEquals(42, list.get(1)); + Assert.assertEquals(69, list.get(2)); + } + + public void testCopyAndTypecastArrayValuesAsInt2() throws IOException + { + final Map data = + Json5Parser.to("{name: 'foo', port:42, rule: 'test data', ports: '22 \n 42 \n69'}"); + final Map translated = TranslatorBuilder .fromString("{rules: [{ports: {@copy: {name: 'ports', is_array: true, type: 'integer'}}}]}") .apply(data); diff --git a/ocsf-utils/src/main/java/io/ocsf/utils/Strings.java b/ocsf-utils/src/main/java/io/ocsf/utils/Strings.java index 84aff73..7173b39 100644 --- a/ocsf-utils/src/main/java/io/ocsf/utils/Strings.java +++ b/ocsf-utils/src/main/java/io/ocsf/utils/Strings.java @@ -20,12 +20,18 @@ import java.text.CollationElementIterator; import java.text.Collator; import java.text.RuleBasedCollator; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Language-sensitive text searching utility class. */ public final class Strings { + public static final String LineSplitter = "\\R+"; + public static final String WhiteSpaceSplitter = "\\s+"; + public static final String EMPTY = ""; private static final int mask = 0xFFFF0000; // Collator.PRIMARY @@ -71,6 +77,37 @@ public static int search(final String text, final String sub) return search(text, Strings.getCollationElementIterator(sub)); } + public static List toArray(final Object value) + { + return toArray(value, LineSplitter); + } + + public static List toArray(final Object value, final String splitter) + { + if (value instanceof String) + { + // Split the string and convert to list + final String[] split = ((String) value).split(splitter); + final List list = new ArrayList<>(split.length); + for (final String s : split) + { + final String s1 = s.trim(); + if (!s1.isEmpty()) + { + list.add(s1); + } + } + return list; + } + else if (value instanceof List) + { + return Maps.typecast(value); + } + + // Everything else, including null, gets wrapped in a single-item list + return Collections.singletonList(value); + } + private static int search(final String text, final CollationElementIterator patIter) { final CollationElementIterator it = collator.getCollationElementIterator(text); @@ -96,7 +133,7 @@ private static boolean match( { do { - final int i = pattern.next() & Strings.mask; + final int i = pattern.next() & Strings.mask; final int target = text.next() & Strings.mask; if (i == Strings.mask) diff --git a/ocsf-utils/src/test/java/io/ocsf/utils/StringsTest.java b/ocsf-utils/src/test/java/io/ocsf/utils/StringsTest.java new file mode 100644 index 0000000..2f25922 --- /dev/null +++ b/ocsf-utils/src/test/java/io/ocsf/utils/StringsTest.java @@ -0,0 +1,95 @@ +package io.ocsf.utils; + +import junit.framework.TestCase; +import org.junit.Assert; + +import java.util.Arrays; +import java.util.Collections; + +public class StringsTest extends TestCase +{ + + // Test the default line splitter + public void testToArray() + { + // single value + Assert.assertEquals(Collections.singletonList(null), Strings.toArray(null)); + + Assert.assertEquals(Collections.singletonList(1), Strings.toArray(1)); + + Assert.assertEquals(Collections.singletonList("a"), Strings.toArray("a")); + + // array value - all should just pass through + Assert.assertEquals(Collections.emptyList(), Strings.toArray(Collections.EMPTY_LIST)); + + Assert.assertEquals( + Collections.singletonList(1), Strings.toArray(Collections.singletonList(1))); + + Assert.assertEquals( + Collections.singletonList(null), Strings.toArray(Collections.singletonList(null))); + + Assert.assertEquals( + Arrays.asList("a", "b"), Strings.toArray(Arrays.asList("a", "b"))); + + // Single string value, split into list + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a \nb")); + + Assert.assertEquals(Collections.singletonList("a\tb"), Strings.toArray("a\tb")); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\nb")); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\n\tb")); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\n\tb\n")); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\n\n\tb\n\n")); + Assert.assertEquals(Collections.singletonList("a b"), Strings.toArray("a b ")); + Assert.assertEquals(Collections.singletonList("a b"), Strings.toArray(" a b")); + + // Edge cases + Assert.assertEquals( + "Empty string results in an empty list", + Collections.emptyList(), Strings.toArray("")); + + Assert.assertEquals( + "Split of only whitespace results in an empty list", + Collections.emptyList(), Strings.toArray("\n")); + } + + public void testTestToArray() + { + // single value + Assert.assertEquals(Collections.singletonList(null), Strings.toArray(null, Strings.WhiteSpaceSplitter)); + + Assert.assertEquals(Collections.singletonList(1), Strings.toArray(1, Strings.WhiteSpaceSplitter)); + + Assert.assertEquals(Collections.singletonList("a"), Strings.toArray("a", Strings.WhiteSpaceSplitter)); + + // array value - all should just pass through + Assert.assertEquals(Collections.emptyList(), Strings.toArray(Collections.EMPTY_LIST, Strings.WhiteSpaceSplitter)); + + Assert.assertEquals( + Collections.singletonList(1), Strings.toArray(Collections.singletonList(1), Strings.WhiteSpaceSplitter)); + + Assert.assertEquals( + Collections.singletonList(null), Strings.toArray(Collections.singletonList(null), Strings.WhiteSpaceSplitter)); + + Assert.assertEquals( + Arrays.asList("a", "b"), Strings.toArray(Arrays.asList("a", "b"), Strings.WhiteSpaceSplitter)); + + // Single string value, split into list + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a \nb", Strings.WhiteSpaceSplitter)); + + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\tb", Strings.WhiteSpaceSplitter)); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\nb", Strings.WhiteSpaceSplitter)); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\n\tb", Strings.WhiteSpaceSplitter)); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a\n\tb\n", Strings.WhiteSpaceSplitter)); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray("a b ", Strings.WhiteSpaceSplitter)); + Assert.assertEquals(Arrays.asList("a", "b"), Strings.toArray(" a b", Strings.WhiteSpaceSplitter)); + + // Edge cases + Assert.assertEquals( + "Empty string results in an empty list", + Collections.emptyList(), Strings.toArray("", Strings.WhiteSpaceSplitter)); + + Assert.assertEquals( + "Split of only whitespace results in an empty list", + Collections.emptyList(), Strings.toArray("\n", Strings.WhiteSpaceSplitter)); + } +} \ No newline at end of file