diff --git a/logstash/pipelines/zeek/1029_zeek_intel.conf b/logstash/pipelines/zeek/1029_zeek_intel.conf index da376c60e..0d3591b3c 100644 --- a/logstash/pipelines/zeek/1029_zeek_intel.conf +++ b/logstash/pipelines/zeek/1029_zeek_intel.conf @@ -42,14 +42,29 @@ filter { } } - # "sources" is handled differently because of some weirdness between creating the meta.source field in the - # intel file (which seems to be a string) and the sources field in intel.log (which is a set of string) - # so we're doing our own | pseudo-delimiter to work around it. mutate { id => "mutate_split_zeek_intel_commas" - split => { "[zeek_cols][sources]" => "|" - "[zeek_cols][matched]" => "," + split => { "[zeek_cols][matched]" => "," "[zeek_cols][cif_tags]" => "," } } + # "sources" is handled differently because of some weirdness between creating the meta.source field in the + # intel file (which seems to be a string) and the sources field in intel.log (which is a set of string) + # so we're doing our own | pseudo-delimiter to work around it. Further complicating things, in JSON + # "sources" is already an array, and in TSV it's a string. + ruby { + id => "ruby_intel_sources_split" + code => " + if (sources = event.get('[zeek_cols][sources]')) then + if sources.is_a?(Array) + # sources is an array already (like from JSON), expand out the elements that may be pipe-separated + sources.map! { |source| source.to_s.split('|') }.flatten! + else + # sources is not an array, split on pipe + sources = sources.to_s.split('|') + end + event.set('[zeek_cols][sources]', sources) unless sources.nil? or (sources.length == 0) + end" + } + } } # end Filter