From 03e47c9f0822ee96c4f354813a3f2b575db9d96b Mon Sep 17 00:00:00 2001 From: Koray Erkan Date: Fri, 31 Oct 2025 16:51:28 +0300 Subject: [PATCH 1/2] Add comprehensive documentation for Usage and Limits, Users management, and Local Pipeline tutorial; update release notes for versions 1.3.0, 1.4.0, 1.5.0, and 1.5.1; and create sidebar structure for version 1.5.0. --- blog/2025-10-31-release-notes-1.5.1.mdx | 11 + docs/release-notes/v1.5.1.mdx | 6 + package-lock.json | 4 +- package.json | 2 +- src/includes/release-1.5.1.mdx | 31 + .../version-1.5.0/about/applications.mdx | 58 ++ .../version-1.5.0/about/architecture.mdx | 163 +++++ .../version-1.5.0/about/key-features.mdx | 345 +++++++++++ .../version-1.5.0/about/licensing.mdx | 96 +++ .../version-1.5.0/about/product.mdx | 86 +++ .../version-1.5.0/about/siem-optimization.mdx | 531 ++++++++++++++++ .../appendix/configuration-bnf.mdx | 63 ++ .../appendix/field-formats/asim.mdx | 21 + .../appendix/field-formats/cef.mdx | 13 + .../appendix/field-formats/cim.mdx | 36 ++ .../appendix/field-formats/csl.mdx | 37 ++ .../appendix/field-formats/ecs.mdx | 21 + .../appendix/field-formats/leef.mdx | 15 + .../appendix/field-formats/ocsf.mdx | 132 ++++ .../appendix/file-formats/avro.mdx | 49 ++ .../appendix/file-formats/parquet.mdx | 60 ++ .../appendix/file-formats/pem.mdx | 52 ++ .../version-1.5.0/appendix/includes-index.mdx | 17 + .../appendix/protocols/estreamer.mdx | 28 + .../appendix/protocols/ipfix.mdx | 29 + .../appendix/protocols/kafka.mdx | 47 ++ .../version-1.5.0/appendix/protocols/nats.mdx | 29 + .../appendix/protocols/netflow.mdx | 28 + .../appendix/protocols/rabbitmq.mdx | 27 + .../appendix/protocols/redis.mdx | 26 + .../appendix/protocols/sflow.mdx | 28 + .../version-1.5.0/appendix/protocols/smtp.mdx | 27 + .../appendix/protocols/syslog.mdx | 57 ++ .../version-1.5.0/appendix/protocols/tftp.mdx | 30 + .../appendix/windows-event-ids.mdx | 43 ++ .../configuration/devices/_app-protocols.mdx | 49 ++ .../devices/azure-blob-storage.mdx | 313 ++++++++++ .../configuration/devices/azure-monitor.mdx | 153 +++++ .../configuration/devices/estreamer.mdx | 243 ++++++++ .../configuration/devices/event-hubs.mdx | 305 +++++++++ .../configuration/devices/http.mdx | 273 ++++++++ .../configuration/devices/ipfix.mdx | 168 +++++ .../configuration/devices/kafka.mdx | 239 +++++++ .../configuration/devices/linux.mdx | 106 ++++ .../devices/microsoft-sentinel.mdx | 226 +++++++ .../configuration/devices/nats.mdx | 242 ++++++++ .../configuration/devices/netflow.mdx | 168 +++++ .../configuration/devices/overview.mdx | 162 +++++ .../configuration/devices/rabbitmq.mdx | 288 +++++++++ .../configuration/devices/redis.mdx | 234 +++++++ .../configuration/devices/sflow.mdx | 168 +++++ .../configuration/devices/smtp.mdx | 248 ++++++++ .../configuration/devices/snmp-trap.mdx | 243 ++++++++ .../configuration/devices/syslog.mdx | 311 ++++++++++ .../configuration/devices/tcp.mdx | 210 +++++++ .../configuration/devices/tftp.mdx | 224 +++++++ .../configuration/devices/udp.mdx | 155 +++++ .../configuration/devices/windows.mdx | 106 ++++ .../configuration/directors/cli/agent.mdx | 575 +++++++++++++++++ .../configuration/directors/cli/director.mdx | 367 +++++++++++ .../configuration/directors/configuration.mdx | 315 ++++++++++ .../configuration/directors/deployment.mdx | 292 +++++++++ .../configuration/directors/introduction.mdx | 214 +++++++ .../directors/troubleshooting.mdx | 394 ++++++++++++ .../pipelines/conditional-running.mdx | 130 ++++ .../pipelines/handling-failures.mdx | 78 +++ .../pipelines/handling-success.mdx | 91 +++ .../configuration/pipelines/normalization.mdx | 129 ++++ .../configuration/pipelines/overview.mdx | 253 ++++++++ .../pipelines/processors/aad-errcode.mdx | 280 +++++++++ .../pipelines/processors/abs.mdx | 187 ++++++ .../pipelines/processors/acl-decode.mdx | 258 ++++++++ .../pipelines/processors/add.mdx | 229 +++++++ .../pipelines/processors/alienvault.mdx | 585 ++++++++++++++++++ .../pipelines/processors/anthropic.mdx | 219 +++++++ .../pipelines/processors/append.mdx | 211 +++++++ .../pipelines/processors/attachment.mdx | 227 +++++++ .../pipelines/processors/azure-openai.mdx | 227 +++++++ .../pipelines/processors/bag-pack.mdx | 187 ++++++ .../pipelines/processors/binary-decode.mdx | 236 +++++++ .../pipelines/processors/bytes.mdx | 221 +++++++ .../pipelines/processors/camel-case.mdx | 226 +++++++ .../pipelines/processors/capitalize.mdx | 307 +++++++++ .../pipelines/processors/case.mdx | 230 +++++++ .../pipelines/processors/cef.mdx | 208 +++++++ .../pipelines/processors/ceil.mdx | 218 +++++++ .../pipelines/processors/checksum.mdx | 257 ++++++++ .../pipelines/processors/circle.mdx | 243 ++++++++ .../pipelines/processors/clean.mdx | 300 +++++++++ .../pipelines/processors/cloudflare-intel.mdx | 341 ++++++++++ .../pipelines/processors/coalesce.mdx | 312 ++++++++++ .../pipelines/processors/color-decode.mdx | 237 +++++++ .../pipelines/processors/comment.mdx | 110 ++++ .../pipelines/processors/community-id.mdx | 124 ++++ .../pipelines/processors/compact.mdx | 202 ++++++ .../pipelines/processors/concat.mdx | 360 +++++++++++ .../pipelines/processors/confidence.mdx | 289 +++++++++ .../pipelines/processors/contains.mdx | 152 +++++ .../pipelines/processors/continue.mdx | 212 +++++++ .../pipelines/processors/convert.mdx | 250 ++++++++ .../pipelines/processors/cpid.mdx | 189 ++++++ .../pipelines/processors/csv.mdx | 169 +++++ .../pipelines/processors/data-size.mdx | 195 ++++++ .../pipelines/processors/date-index.mdx | 174 ++++++ .../pipelines/processors/date.mdx | 285 +++++++++ .../pipelines/processors/debug.mdx | 153 +++++ .../pipelines/processors/decrypt.mdx | 178 ++++++ .../pipelines/processors/dissect.mdx | 212 +++++++ .../pipelines/processors/divide.mdx | 213 +++++++ .../pipelines/processors/dns-lookup.mdx | 309 +++++++++ .../pipelines/processors/dns-query-type.mdx | 340 ++++++++++ .../processors/dns-response-code.mdx | 341 ++++++++++ .../pipelines/processors/dot-case.mdx | 191 ++++++ .../pipelines/processors/dot-expander.mdx | 199 ++++++ .../pipelines/processors/dot-nester.mdx | 318 ++++++++++ .../pipelines/processors/drop.mdx | 181 ++++++ .../pipelines/processors/duration.mdx | 219 +++++++ .../pipelines/processors/dynamic-sample.mdx | 182 ++++++ .../pipelines/processors/encrypt.mdx | 190 ++++++ .../pipelines/processors/enforce-schema.mdx | 446 +++++++++++++ .../pipelines/processors/enrich.mdx | 239 +++++++ .../pipelines/processors/error-code.mdx | 319 ++++++++++ .../pipelines/processors/expand-range.mdx | 338 ++++++++++ .../pipelines/processors/fail.mdx | 226 +++++++ .../pipelines/processors/final.mdx | 174 ++++++ .../pipelines/processors/fingerprint.mdx | 255 ++++++++ .../pipelines/processors/floor.mdx | 364 +++++++++++ .../pipelines/processors/foreach.mdx | 247 ++++++++ .../pipelines/processors/fqdn.mdx | 392 ++++++++++++ .../pipelines/processors/geo-grid.mdx | 276 +++++++++ .../pipelines/processors/geo-ip.mdx | 268 ++++++++ .../pipelines/processors/go-to.mdx | 249 ++++++++ .../pipelines/processors/grok.mdx | 259 ++++++++ .../pipelines/processors/group.mdx | 486 +++++++++++++++ .../pipelines/processors/gsub.mdx | 229 +++++++ .../pipelines/processors/hex-decode.mdx | 193 ++++++ .../pipelines/processors/html-strip.mdx | 220 +++++++ .../pipelines/processors/http-status.mdx | 373 +++++++++++ .../pipelines/processors/humanize.mdx | 233 +++++++ .../pipelines/processors/icmp-type.mdx | 327 ++++++++++ .../pipelines/processors/iff.mdx | 281 +++++++++ .../pipelines/processors/ip-quality-score.mdx | 426 +++++++++++++ .../pipelines/processors/ip-type.mdx | 411 ++++++++++++ .../pipelines/processors/join-kv.mdx | 244 ++++++++ .../pipelines/processors/join.mdx | 193 ++++++ .../pipelines/processors/json.mdx | 247 ++++++++ .../pipelines/processors/jwt-decode.mdx | 229 +++++++ .../pipelines/processors/kebab-case.mdx | 191 ++++++ .../pipelines/processors/keep-first.mdx | 234 +++++++ .../pipelines/processors/keep-last.mdx | 232 +++++++ .../pipelines/processors/keep.mdx | 232 +++++++ .../pipelines/processors/kerberos-decode.mdx | 228 +++++++ .../configuration/pipelines/processors/kv.mdx | 256 ++++++++ .../pipelines/processors/leef.mdx | 175 ++++++ .../pipelines/processors/level.mdx | 185 ++++++ .../pipelines/processors/lookup.mdx | 314 ++++++++++ .../pipelines/processors/lowercase.mdx | 219 +++++++ .../pipelines/processors/mask.mdx | 282 +++++++++ .../pipelines/processors/math.mdx | 338 ++++++++++ .../pipelines/processors/max.mdx | 223 +++++++ .../pipelines/processors/min.mdx | 235 +++++++ .../pipelines/processors/minify.mdx | 268 ++++++++ .../pipelines/processors/modulo.mdx | 228 +++++++ .../pipelines/processors/move.mdx | 274 ++++++++ .../pipelines/processors/multiply.mdx | 277 +++++++++ .../processors/network-direction.mdx | 257 ++++++++ .../pipelines/processors/network-protocol.mdx | 284 +++++++++ .../pipelines/processors/normalize.mdx | 234 +++++++ .../pipelines/processors/openai.mdx | 217 +++++++ .../pipelines/processors/ordinal.mdx | 307 +++++++++ .../pipelines/processors/overview.mdx | 86 +++ .../pipelines/processors/pascal-case.mdx | 222 +++++++ .../pipelines/processors/pattern.mdx | 216 +++++++ .../pipelines/processors/pipeline.mdx | 148 +++++ .../pipelines/processors/power.mdx | 275 ++++++++ .../pipelines/processors/print.mdx | 320 ++++++++++ .../pipelines/processors/random-string.mdx | 235 +++++++ .../pipelines/processors/recover.mdx | 224 +++++++ .../pipelines/processors/redact.mdx | 196 ++++++ .../pipelines/processors/regex-extract.mdx | 281 +++++++++ .../pipelines/processors/regex-filter.mdx | 248 ++++++++ .../pipelines/processors/regex-replace.mdx | 271 ++++++++ .../processors/registered-domain.mdx | 200 ++++++ .../pipelines/processors/remove.mdx | 197 ++++++ .../pipelines/processors/rename.mdx | 211 +++++++ .../pipelines/processors/replace.mdx | 339 ++++++++++ .../pipelines/processors/reroute.mdx | 168 +++++ .../pipelines/processors/return.mdx | 264 ++++++++ .../pipelines/processors/round.mdx | 263 ++++++++ .../pipelines/processors/sample.mdx | 187 ++++++ .../pipelines/processors/score.mdx | 341 ++++++++++ .../pipelines/processors/script.mdx | 165 +++++ .../pipelines/processors/select.mdx | 304 +++++++++ .../pipelines/processors/serialize.mdx | 347 +++++++++++ .../pipelines/processors/set.mdx | 290 +++++++++ .../pipelines/processors/sid-decode.mdx | 239 +++++++ .../pipelines/processors/slice.mdx | 296 +++++++++ .../pipelines/processors/snake-case.mdx | 222 +++++++ .../pipelines/processors/snowflake.mdx | 224 +++++++ .../pipelines/processors/sort.mdx | 155 +++++ .../pipelines/processors/split.mdx | 186 ++++++ .../pipelines/processors/sqrt.mdx | 265 ++++++++ .../pipelines/processors/substring.mdx | 231 +++++++ .../pipelines/processors/subtract.mdx | 303 +++++++++ .../pipelines/processors/syslog.mdx | 237 +++++++ .../pipelines/processors/take.mdx | 279 +++++++++ .../pipelines/processors/text-wrap.mdx | 234 +++++++ .../pipelines/processors/time-shift.mdx | 342 ++++++++++ .../pipelines/processors/title-case.mdx | 237 +++++++ .../pipelines/processors/trim-first.mdx | 295 +++++++++ .../pipelines/processors/trim-last.mdx | 297 +++++++++ .../pipelines/processors/trim.mdx | 154 +++++ .../pipelines/processors/unix-permission.mdx | 281 +++++++++ .../pipelines/processors/uppercase.mdx | 154 +++++ .../pipelines/processors/uri-parts.mdx | 181 ++++++ .../pipelines/processors/url-decode.mdx | 158 +++++ .../pipelines/processors/user-agent.mdx | 172 +++++ .../pipelines/processors/username-type.mdx | 308 +++++++++ .../pipelines/processors/virustotal.mdx | 380 ++++++++++++ .../pipelines/processors/wait.mdx | 172 +++++ .../processors/windows-user-type.mdx | 307 +++++++++ .../pipelines/processors/xml.mdx | 247 ++++++++ .../configuration/quick-start.mdx | 44 ++ .../version-1.5.0/configuration/routes.mdx | 328 ++++++++++ .../configuration/scheduling/cron.mdx | 447 +++++++++++++ .../configuration/scheduling/interval.mdx | 497 +++++++++++++++ .../configuration/scheduling/overview.mdx | 529 ++++++++++++++++ .../targets/analytics/clickhouse.mdx | 181 ++++++ .../targets/analytics/elasticsearch.mdx | 407 ++++++++++++ .../targets/analytics/splunk-hec.mdx | 340 ++++++++++ .../targets/aws/amazon-cloudwatch.mdx | 372 +++++++++++ .../targets/aws/amazon-kinesis.mdx | 349 +++++++++++ .../configuration/targets/aws/amazon-msk.mdx | 435 +++++++++++++ .../targets/aws/amazon-opensearch.mdx | 536 ++++++++++++++++ .../targets/aws/amazon-redshift.mdx | 548 ++++++++++++++++ .../configuration/targets/aws/amazon-s3.mdx | 299 +++++++++ .../targets/aws/amazon-security-lake.mdx | 420 +++++++++++++ .../configuration/targets/aws/amazon-sns.mdx | 396 ++++++++++++ .../configuration/targets/aws/amazon-sqs.mdx | 361 +++++++++++ .../targets/azure/azure-blob-storage.mdx | 350 +++++++++++ .../targets/azure/azure-data-explorer.mdx | 275 ++++++++ .../targets/azure/azure-event-hubs.mdx | 279 +++++++++ .../targets/azure/azure-monitor.mdx | 432 +++++++++++++ .../targets/azure/azure-service-bus.mdx | 445 +++++++++++++ .../azure/microsoft-sentinel-data-lake.mdx | 246 ++++++++ .../targets/azure/microsoft-sentinel.mdx | 304 +++++++++ .../targets/cloud-storage/alibaba-oss.mdx | 355 +++++++++++ .../targets/cloud-storage/backblaze-b2.mdx | 345 +++++++++++ .../targets/cloud-storage/cloudflare-r2.mdx | 309 +++++++++ .../cloud-storage/digitalocean-spaces.mdx | 326 ++++++++++ .../targets/cloud-storage/ibm-cos.mdx | 322 ++++++++++ .../targets/cloud-storage/minio.mdx | 325 ++++++++++ .../targets/cloud-storage/oracle-cloud-os.mdx | 343 ++++++++++ .../targets/cloud-storage/scaleway-os.mdx | 347 +++++++++++ .../cloud-storage/wasabi-cloud-storage.mdx | 383 ++++++++++++ .../configuration/targets/gcs/bigquery.mdx | 285 +++++++++ .../targets/gcs/google-cloud-pubsub.mdx | 418 +++++++++++++ .../targets/gcs/google-cloud-storage.mdx | 361 +++++++++++ .../configuration/targets/mq/apache-kafka.mdx | 485 +++++++++++++++ .../targets/mq/confluent-cloud.mdx | 372 +++++++++++ .../configuration/targets/mq/redpanda.mdx | 431 +++++++++++++ .../configuration/targets/other/console.mdx | 95 +++ .../configuration/targets/other/discard.mdx | 56 ++ .../configuration/targets/other/file.mdx | 353 +++++++++++ .../configuration/targets/other/syslog.mdx | 191 ++++++ .../configuration/targets/overview.mdx | 345 +++++++++++ .../version-1.5.0/content-hub/licensing.mdx | 202 ++++++ .../version-1.5.0/content-hub/overview.mdx | 236 +++++++ .../getting-started/add-first-device.mdx | 109 ++++ .../configure-first-target.mdx | 108 ++++ .../getting-started/connect-with-routes.mdx | 209 +++++++ .../getting-started/create-director.mdx | 98 +++ .../getting-started/create-your-account.mdx | 52 ++ .../getting-started/install-content-hub.mdx | 164 +++++ .../getting-started/monitor-and-verify.mdx | 238 +++++++ .../getting-started/next-steps.mdx | 411 ++++++++++++ versioned_docs/version-1.5.0/glossary.mdx | 348 +++++++++++ .../automation/arm-templates.mdx | 306 +++++++++ .../automation/bicep-templates.mdx | 271 ++++++++ .../microsoft-sentinel/integration.mdx | 418 +++++++++++++ .../microsoft-sentinel/overview.mdx | 322 ++++++++++ .../version-1.5.0/organization/audit.mdx | 88 +++ .../version-1.5.0/organization/overview.mdx | 46 ++ .../version-1.5.0/organization/roles.mdx | 247 ++++++++ .../organization/settings/company.mdx | 43 ++ .../organization/settings/single-sign-on.mdx | 72 +++ .../organization/settings/subscription.mdx | 65 ++ .../version-1.5.0/organization/tenants.mdx | 125 ++++ .../organization/usage-and-limits.mdx | 79 +++ .../version-1.5.0/organization/users.mdx | 193 ++++++ .../version-1.5.0/release-notes/v1.3.0.mdx | 6 + .../version-1.5.0/release-notes/v1.4.0.mdx | 6 + .../version-1.5.0/release-notes/v1.5.0.mdx | 6 + .../version-1.5.0/release-notes/v1.5.1.mdx | 6 + .../tutorials/a-local-pipeline.mdx | 92 +++ .../version-1.5.0-sidebars.json | 486 +++++++++++++++ versions.json | 1 + 297 files changed, 68508 insertions(+), 3 deletions(-) create mode 100644 blog/2025-10-31-release-notes-1.5.1.mdx create mode 100644 docs/release-notes/v1.5.1.mdx create mode 100644 src/includes/release-1.5.1.mdx create mode 100644 versioned_docs/version-1.5.0/about/applications.mdx create mode 100644 versioned_docs/version-1.5.0/about/architecture.mdx create mode 100644 versioned_docs/version-1.5.0/about/key-features.mdx create mode 100644 versioned_docs/version-1.5.0/about/licensing.mdx create mode 100644 versioned_docs/version-1.5.0/about/product.mdx create mode 100644 versioned_docs/version-1.5.0/about/siem-optimization.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/configuration-bnf.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/field-formats/asim.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/field-formats/cef.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/field-formats/cim.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/field-formats/csl.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/field-formats/ecs.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/field-formats/leef.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/field-formats/ocsf.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/file-formats/avro.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/file-formats/parquet.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/file-formats/pem.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/includes-index.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/estreamer.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/ipfix.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/kafka.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/nats.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/netflow.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/rabbitmq.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/redis.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/sflow.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/smtp.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/syslog.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/protocols/tftp.mdx create mode 100644 versioned_docs/version-1.5.0/appendix/windows-event-ids.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/_app-protocols.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/azure-blob-storage.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/azure-monitor.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/estreamer.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/event-hubs.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/http.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/ipfix.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/kafka.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/linux.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/microsoft-sentinel.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/nats.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/netflow.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/overview.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/rabbitmq.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/redis.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/sflow.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/smtp.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/snmp-trap.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/syslog.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/tcp.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/tftp.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/udp.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/devices/windows.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/directors/cli/agent.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/directors/cli/director.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/directors/configuration.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/directors/deployment.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/directors/introduction.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/directors/troubleshooting.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/conditional-running.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/handling-failures.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/handling-success.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/normalization.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/overview.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/aad-errcode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/abs.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/acl-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/add.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/alienvault.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/anthropic.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/append.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/attachment.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/azure-openai.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/bag-pack.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/binary-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/bytes.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/camel-case.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/capitalize.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/case.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/cef.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/ceil.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/checksum.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/circle.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/clean.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/cloudflare-intel.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/coalesce.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/color-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/comment.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/community-id.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/compact.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/concat.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/confidence.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/contains.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/continue.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/convert.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/cpid.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/csv.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/data-size.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/date-index.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/date.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/debug.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/decrypt.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dissect.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/divide.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dns-lookup.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dns-query-type.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dns-response-code.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dot-case.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dot-expander.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dot-nester.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/drop.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/duration.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/dynamic-sample.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/encrypt.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/enforce-schema.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/enrich.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/error-code.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/expand-range.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/fail.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/final.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/fingerprint.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/floor.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/foreach.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/fqdn.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/geo-grid.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/geo-ip.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/go-to.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/grok.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/group.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/gsub.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/hex-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/html-strip.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/http-status.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/humanize.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/icmp-type.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/iff.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/ip-quality-score.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/ip-type.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/join-kv.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/join.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/json.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/jwt-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/kebab-case.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/keep-first.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/keep-last.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/keep.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/kerberos-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/kv.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/leef.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/level.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/lookup.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/lowercase.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/mask.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/math.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/max.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/min.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/minify.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/modulo.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/move.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/multiply.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/network-direction.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/network-protocol.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/normalize.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/openai.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/ordinal.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/overview.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/pascal-case.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/pattern.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/pipeline.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/power.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/print.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/random-string.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/recover.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/redact.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/regex-extract.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/regex-filter.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/regex-replace.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/registered-domain.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/remove.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/rename.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/replace.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/reroute.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/return.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/round.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/sample.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/score.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/script.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/select.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/serialize.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/set.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/sid-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/slice.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/snake-case.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/snowflake.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/sort.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/split.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/sqrt.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/substring.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/subtract.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/syslog.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/take.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/text-wrap.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/time-shift.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/title-case.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/trim-first.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/trim-last.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/trim.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/unix-permission.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/uppercase.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/uri-parts.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/url-decode.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/user-agent.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/username-type.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/virustotal.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/wait.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/windows-user-type.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/pipelines/processors/xml.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/quick-start.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/routes.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/scheduling/cron.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/scheduling/interval.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/scheduling/overview.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/analytics/clickhouse.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/analytics/elasticsearch.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/analytics/splunk-hec.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-cloudwatch.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-kinesis.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-msk.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-opensearch.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-redshift.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-s3.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-security-lake.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-sns.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/aws/amazon-sqs.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/azure/azure-blob-storage.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/azure/azure-data-explorer.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/azure/azure-event-hubs.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/azure/azure-monitor.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/azure/azure-service-bus.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/azure/microsoft-sentinel-data-lake.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/azure/microsoft-sentinel.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/alibaba-oss.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/backblaze-b2.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/cloudflare-r2.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/digitalocean-spaces.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/ibm-cos.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/minio.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/oracle-cloud-os.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/scaleway-os.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/cloud-storage/wasabi-cloud-storage.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/gcs/bigquery.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/gcs/google-cloud-pubsub.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/gcs/google-cloud-storage.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/mq/apache-kafka.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/mq/confluent-cloud.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/mq/redpanda.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/other/console.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/other/discard.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/other/file.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/other/syslog.mdx create mode 100644 versioned_docs/version-1.5.0/configuration/targets/overview.mdx create mode 100644 versioned_docs/version-1.5.0/content-hub/licensing.mdx create mode 100644 versioned_docs/version-1.5.0/content-hub/overview.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/add-first-device.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/configure-first-target.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/connect-with-routes.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/create-director.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/create-your-account.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/install-content-hub.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/monitor-and-verify.mdx create mode 100644 versioned_docs/version-1.5.0/getting-started/next-steps.mdx create mode 100644 versioned_docs/version-1.5.0/glossary.mdx create mode 100644 versioned_docs/version-1.5.0/microsoft-sentinel/automation/arm-templates.mdx create mode 100644 versioned_docs/version-1.5.0/microsoft-sentinel/automation/bicep-templates.mdx create mode 100644 versioned_docs/version-1.5.0/microsoft-sentinel/integration.mdx create mode 100644 versioned_docs/version-1.5.0/microsoft-sentinel/overview.mdx create mode 100644 versioned_docs/version-1.5.0/organization/audit.mdx create mode 100644 versioned_docs/version-1.5.0/organization/overview.mdx create mode 100644 versioned_docs/version-1.5.0/organization/roles.mdx create mode 100644 versioned_docs/version-1.5.0/organization/settings/company.mdx create mode 100644 versioned_docs/version-1.5.0/organization/settings/single-sign-on.mdx create mode 100644 versioned_docs/version-1.5.0/organization/settings/subscription.mdx create mode 100644 versioned_docs/version-1.5.0/organization/tenants.mdx create mode 100644 versioned_docs/version-1.5.0/organization/usage-and-limits.mdx create mode 100644 versioned_docs/version-1.5.0/organization/users.mdx create mode 100644 versioned_docs/version-1.5.0/release-notes/v1.3.0.mdx create mode 100644 versioned_docs/version-1.5.0/release-notes/v1.4.0.mdx create mode 100644 versioned_docs/version-1.5.0/release-notes/v1.5.0.mdx create mode 100644 versioned_docs/version-1.5.0/release-notes/v1.5.1.mdx create mode 100644 versioned_docs/version-1.5.0/tutorials/a-local-pipeline.mdx create mode 100644 versioned_sidebars/version-1.5.0-sidebars.json diff --git a/blog/2025-10-31-release-notes-1.5.1.mdx b/blog/2025-10-31-release-notes-1.5.1.mdx new file mode 100644 index 00000000..70256960 --- /dev/null +++ b/blog/2025-10-31-release-notes-1.5.1.mdx @@ -0,0 +1,11 @@ +--- +authors: [release-team] +tags: [release] +title: Version 1.5.1 Released +--- + +This release focuses on usability improvements and important bug fixes. The **Content Hub** receives significant enhancements with improved content format display and expanded filtering options, while the **Pipeline search** functionality is now more flexible. Critical fixes address documentation links, device notifications, IP address display issues, and interface functionality across Quick Routes and language support features. + +{/* truncate */} + + diff --git a/docs/release-notes/v1.5.1.mdx b/docs/release-notes/v1.5.1.mdx new file mode 100644 index 00000000..1910beea --- /dev/null +++ b/docs/release-notes/v1.5.1.mdx @@ -0,0 +1,6 @@ +--- +sidebar_label: v1.5.1 +title: Version 1.5.1 +--- + + diff --git a/package-lock.json b/package-lock.json index b73eeed1..2b18831a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "virtualmetric-docs", - "version": "1.5.0", + "version": "1.5.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "virtualmetric-docs", - "version": "1.5.0", + "version": "1.5.1", "dependencies": { "@docusaurus/core": "^3.9.2", "@docusaurus/faster": "^3.9.2", diff --git a/package.json b/package.json index 90defbc3..abde2478 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "virtualmetric-docs", - "version": "1.5.0", + "version": "1.5.1", "private": true, "scripts": { "docusaurus": "docusaurus", diff --git a/src/includes/release-1.5.1.mdx b/src/includes/release-1.5.1.mdx new file mode 100644 index 00000000..4f709050 --- /dev/null +++ b/src/includes/release-1.5.1.mdx @@ -0,0 +1,31 @@ +## :wrench: Improvements + +### Content Hub Enhancements + +- **Content Format Conversion** - Content display converted from JSON to string format for significantly improved readability and usability. Comments within content are now visible, providing descriptive context and making information easier to understand. This enhancement offers better clarity when reviewing content, enabling more efficient data workflows and informed decision-making. + +- **Enhanced Filtering Options** - Content Hub filtering capabilities expanded with additional device types and vendors. Enhanced filtering makes it easier to locate and filter relevant content, improving navigation efficiency and helping users quickly find specific configurations for their devices and vendor products. + +### User Interface + +- **Pipeline Search Character Limit Removal** - Removed character limit restrictions in Pipeline search functionality that were preventing searches from being performed. Search feature now operates properly regardless of query length, enabling comprehensive pipeline discovery without limitations. + +## :bug: Bug Fixes + +### Documentation and Navigation + +- **Documentation Link Corrections** - Fixed incorrect documentation links throughout the platform. All documentation references now direct users to correct pages, improving accessibility and facilitating greater utilization of documentation resources for enhanced user support and guidance. + +### Device Management + +- **Windows and Linux Device Notifications** - Resolved issue with incorrect notifications on Windows and Linux devices. Notifications now display accurate information, providing reliable alerts and status updates for monitored systems and ensuring proper visibility into device health and events. + +- **Windows Device IP Address Display** - Fixed issue where IP addresses were appearing empty for some Windows devices. All Windows devices now properly display their IP addresses, providing complete visibility and easier identification of monitored systems across the infrastructure. + +### Interface Functionality + +- **Quick Routes Table Enhancements** - Corrected missing columns and filtering issues in Quick Routes device and target selection tables. All relevant columns now display correctly, and filtering functionality operates as expected, streamlining route selection and configuration processes. + +- **Language Support Corrections** - Resolved issues where language support was not functioning properly in certain platform areas. All language-related features now work as expected, ensuring consistent multilingual experience across the interface for international users. + +--- diff --git a/versioned_docs/version-1.5.0/about/applications.mdx b/versioned_docs/version-1.5.0/about/applications.mdx new file mode 100644 index 00000000..3c73c3c9 --- /dev/null +++ b/versioned_docs/version-1.5.0/about/applications.mdx @@ -0,0 +1,58 @@ +--- +sidebar_label: Applications +--- + +# Applications + +**VirtualMetric DataStream** is a telemetry pipeline solution that simplifies data collection, processing, and routing for multiple platforms including _Microsoft Sentinel_, _AWS Security Lake_, _Elasticsearch_, _Splunk_, and other security analytics platforms. At its core, **DataStream** uses pipelines to process, enrich, and direct data flows to their optimal destinations. It is composed of the following components: + +## VirtualMetric Director™ + +**VirtualMetric Director** is a comprehensive platform designed for listening on various data sources, extracting and transforming them, and routing the data to multiple destinations across different security platforms. This powerful component acts as the central nervous system of your pipeline, orchestrating the flow across your entire infrastructure with multi-schema support. + +**Director** provides a unified interface for managing multiple sources and destinations, enabling seamless data collection, transformation, and distribution across ASIM, OCSF, ECS, CIM, and UDM formats. Its architecture is built to handle enterprise-scale data volumes at high levels of performance and reliability. + +Key capabilities include: + +* **Source Management** - support for multiple protocols (TCP, UDP, HTTP), file system monitoring, database change tracking, API integration, custom source implementations +* **Multi-Schema Data Transformation** - real-time processing with ASIM, OCSF, ECS, and CIM schema support, format conversion, field extraction, data enrichment, custom transformation rules +* **Intelligent Routing** - dynamic destination selection across Microsoft Sentinel, AWS Security Lake, Elasticsearch, and Splunk, load balancing, failover handling, priority-based routing, conditional routing +* **Monitoring and Control** - real-time pipeline visibility, performance metrics, health monitoring, alert management, configuration validation + +**Director**'s flexible architecture allows it to adapt to changing requirements. Whether you're collecting logs from applications, monitoring system metrics, or gathering security events, **Director** provides the necessary tools to ensure efficient data handling and delivery across multiple security platforms. + +## VirtualMetric Agent™ + +**VirtualMetric Agent** is a lightweight, high-performance data collection component designed to gather telemetry data from various sources while maintaining minimal system impact. This versatile agent serves as the first point of contact in the telemetry pipeline, ensuring reliable data collection and initial processing. + +**Agent** is engineered with efficiency and reliability in mind, offering robust data collection without compromising system performance or stability. + +Key features include: + +* **Efficient Collection** - low resource utilization, minimal CPU and memory footprint, optimized disk I/O, configurable collection intervals, adaptive rate limiting +* **Reliable Processing** - local buffering, crash recovery, data persistence, automatic reconnection, error handling +* **Flexible Integration** - multiple source types support, custom collector plugins, format adaptation, protocol conversion, destination selection +* **Advanced Monitoring** - self-diagnostics, performance metrics and health status reporting, resource usage tracking, alert generation + +**Agent**'s architecture ensures seamless data collection and transmission while providing robust monitoring and management capabilities. Its modular design allows for easy extension and customization to meet specific organizational needs. It can operate both independently and as part of a larger telemetry infrastructure, making it suitable for various deployment scenarios, from single-server installations to large-scale distributed environments. + +## VirtualMetric Director Proxy™ + +**VirtualMetric Director Proxy** is a secure, lightweight forwarding component designed to operate within customer environments, whether on-premises or in their own cloud infrastructure. This strategic component enables secure data delivery to customer-owned destinations while maintaining complete isolation of customer credentials and infrastructure access. + +**Director Proxy** serves as the secure bridge between **VirtualMetric Director** and customer destinations, providing a perfect solution for Managed Security Service Providers (MSSPs) managing multiple customer environments without requiring access to customer credentials or infrastructure. + +Key capabilities include: + +* **Secure Data Reception** - receives highly compressed data streams from **VirtualMetric Director**, token-based authentication (JWT-compatible), encrypted communication channels, automatic decompression and processing +* **Azure Managed Identity Integration** - native Azure Managed Identity support for secure destination access, eliminates credential management overhead, seamless integration with Azure services, automatic token refresh and management +* **Multi-Destination Support** - intelligent routing to Microsoft Sentinel, Microsoft Sentinel data lake, Azure Data Explorer and Azure Blob Storage based on Director instructions +* **MSSP-Optimized Architecture** - tenant isolation and security, token-based tenant authentication, centralized management for MSSPs, no credential sharing required + +### MSSP Deployment Model + +**Director Proxy** enables a streamlined MSSP workflow where each customer tenant installs the proxy within their environment and shares only the proxy endpoint address and authentication token with the MSSP. The MSSP operates **VirtualMetric Director** centrally, processing and routing data for multiple customers without ever accessing customer credentials or infrastructure. + +**Director** sends destination routing instructions and processed data to **Director Proxy** via secure HTTP requests. **Director Proxy** handles all final delivery using customer-owned Azure Managed Identity credentials, ensuring complete security isolation and compliance with customer data sovereignty requirements. + +This architecture provides enterprise-grade security, simplified credential management, scalable multi-tenant support, and complete customer control over data destinations while enabling efficient MSSP operations. \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/about/architecture.mdx b/versioned_docs/version-1.5.0/about/architecture.mdx new file mode 100644 index 00000000..4b499e35 --- /dev/null +++ b/versioned_docs/version-1.5.0/about/architecture.mdx @@ -0,0 +1,163 @@ +--- +sidebar_label: Architecture +--- + +# Architecture + +**VirtualMetric DataStream** is architected with enterprise security and data sovereignty as core principles. Unlike traditional solutions that require sending sensitive data to third-party cloud platforms for processing, **DataStream** keeps all your critical data within your environment while providing centralized management and visibility through a secure cloud control plane. + +VirtualMetric DataStream - Enterprise Architecture Overview + +## Security-First Architecture + +### Data Plane vs Control Plane Separation + +**DataStream** employs a strict separation between data plane and control plane operations, ensuring your sensitive security data never leaves your environment: + +**Control Plane (VirtualMetric Cloud)** +- Multi-tenant SaaS platform for centralized management +- Pipeline configuration and deployment +- Fleet management across all Directors and Agents +- Real-time statistics, monitoring, and alerting +- Role-Based Access Control (RBAC) for team collaboration +- Zero data processing or storage of customer logs + +**Data Plane (Customer Environment)** +- All data processing occurs within customer-controlled infrastructure +- Director processes, transforms, and routes data locally +- Agents collect data and communicate directly with local Director +- No customer data transmission to VirtualMetric Cloud +- Complete data sovereignty and compliance control + +### Enterprise Security Benefits + +This architecture addresses critical enterprise security concerns: + +**Data Sovereignty** - All sensitive log data remains within your infrastructure, ensuring compliance with data residency requirements, industry regulations (GDPR, HIPAA, SOX), and corporate data governance policies. + +**Minimal Attack Surface** - Only a single HTTPS outbound connection required from Director to VirtualMetric Cloud for management, no inbound connections needed, and agents communicate exclusively with local Director infrastructure. + +**Zero Third-Party Data Exposure** - Raw log data never transmitted to external vendors, eliminates risks of data breaches during transit, and maintains complete control over sensitive security information. + +**Network Security** - Reduced firewall complexity with minimal external connections, no need to open firewall access from cloud to local systems, and simplified network security management. + +**Bandwidth Optimization** - Processes data locally before routing to destinations, eliminates unnecessary raw data transmission, and provides intelligent compression and filtering capabilities. + +## Deployment Architecture + +### VirtualMetric Cloud (Multi-Tenant) + +The centralized management platform provides: +- **Management Interface** - Intuitive web-based console for pipeline configuration and monitoring +- **Statistics & Analytics** - Real-time performance metrics, data flow visualization, and operational insights +- **RBAC & Security** - Granular access controls, audit logging, and secure authentication +- **Fleet Management** - Centralized deployment and configuration management across distributed environments + +### Customer Environment Options + +**VirtualMetric Director** can be deployed in multiple configurations to meet diverse enterprise requirements: + +#### Clustered Director (High Availability) + +- **Load Balancing** - Distributes processing load across multiple Director instances +- **Automatic Failover** - Ensures continuous operation during maintenance or failures +- **Scalable Processing** - Handles enterprise-scale data volumes with horizontal scaling +- **Shared Configuration** - Synchronized pipeline configurations across cluster nodes + +#### Flexible Deployment Models + +**On-Premises Deployment** +- Physical servers or virtual machines within customer data centers +- Complete isolation from external networks if required +- Integration with existing infrastructure and security controls + +**Cloud Deployment** +- Customer-owned Azure, AWS, or other cloud environments +- Maintains data sovereignty within customer cloud tenants +- Leverages cloud-native services while preserving security isolation + +**Hybrid Deployment** +- Directors in both on-premises and cloud environments +- Unified management through single control plane +- Flexible data routing based on location and requirements + +### Container and Serverless Support + +**Docker Containerization** +- Lightweight, portable deployment across environments +- Simplified installation and maintenance +- Container orchestration support (Kubernetes, Docker Swarm) +- Consistent runtime environment across platforms + +**Azure Serverless Integration** +- **Director Proxy** - Azure Function-based secure data forwarding +- Automatic scaling based on data volume +- Pay-per-use cost optimization + +## Network Communication + +### Simplified Network Requirements + +**Outbound HTTPS (Director to VirtualMetric Cloud)** +- Single port 443 connection for management communications +- Control plane synchronization and configuration updates +- Statistics reporting and health monitoring +- Secure token-based authentication + +**Internal HTTPS (Agents to Director)** +- Direct communication between Agents and local Director +- No external connectivity required for Agents +- Secure data transmission within customer environment +- Simplified firewall configuration + +### Zero Inbound Connectivity + +**DataStream** requires no inbound connections from external networks, eliminating common security vulnerabilities: +- No firewall rules for external access to internal systems +- Reduced exposure to external threats +- Simplified compliance and security auditing +- Enhanced network security posture + +## Management Models + +### Managed (Default) + +- **Centralized Configuration** - Manage all pipelines through VirtualMetric portal +- **Automatic Updates** - Seamless deployment of configuration changes and updates +- **Real-Time Monitoring** - Comprehensive visibility across distributed infrastructure +- **Collaborative Management** - Team-based access with RBAC controls + +### Self-Managed (Air-Gapped) + +For environments requiring complete network isolation: + +- **Offline Configuration** - Manual pipeline configuration and deployment +- **Local Management** - Direct access to Director management interfaces +- **Manual Updates** - Administrator-controlled updates and maintenance +- **Complete Isolation** - Zero external connectivity requirements + +### Benefits for Enterprise Environments + +**Operational Efficiency** +- Centralized management of distributed telemetry infrastructure +- Reduced operational overhead with automated configuration deployment +- Comprehensive monitoring and alerting capabilities +- Streamlined troubleshooting and performance optimization + +**Security & Compliance** +- Data never leaves customer-controlled environment +- Simplified compliance with industry regulations +- Reduced risk of data breaches and unauthorized access +- Enhanced audit trail and governance capabilities + +**Scalability & Performance** +- High availability and clustering support for mission-critical environments +- Horizontal scaling to handle growing data volumes +- Intelligent load balancing and resource optimization +- Container and serverless deployment flexibility + +**Cost Optimization** +- Eliminates costs associated with third-party data processing +- Reduces bandwidth requirements through local processing +- Flexible deployment models to optimize infrastructure costs +- Pay-per-use serverless options for variable workloads \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/about/key-features.mdx b/versioned_docs/version-1.5.0/about/key-features.mdx new file mode 100644 index 00000000..b986d5f4 --- /dev/null +++ b/versioned_docs/version-1.5.0/about/key-features.mdx @@ -0,0 +1,345 @@ +--- +sidebar_label: Key Features +--- + +# Key Features + +**VirtualMetric DataStream** offers a comprehensive range of features that make it a powerful and flexible telemetry pipeline solution for multiple security platforms including _Microsoft Sentinel_, _AWS Security Lake_, _Elasticsearch_, _Splunk Enterprise Security_, and other leading SIEM and analytics platforms. Enterprises can leverage these features to streamline their data collection, processing, and routing operations across diverse security ecosystems: + +## Agentless Data Collection + +**DataStream**'s agentless design enables data collection on many widely-used platforms without requiring third-party tools or complicated configurations. This significantly reduces operational overhead and eliminates common deployment challenges associated with traditional agent-based solutions. + +The system operates through secure read-only connections to target systems using standard protocols. On **Windows** systems, it leverages Windows Remote Management (WinRM). On Unix-based systems including **Linux**, **macOS**, **Solaris**, and **AIX**, it utilizes SSH and native logging facilities to collect data securely. + +The agentless approach also ensures that **DataStream** can begin collecting data immediately after configuration, without requiring system restarts or extensive installation management. This makes it particularly valuable for large enterprises where deploying and maintaining agents across thousands of systems is impractical. + +```mermaid +graph LR + DS([DataStream System]) + CS[Credential Store] + AD[Active Directory] + + subgraph Target Systems + W[Windows] + L[Linux] + M[macOS] + S[Solaris] + A[AIX] + end + + CS <-.-> |Secure Credentials| DS + AD <-.-> |Service Accounts| DS + + DS <-.-> |Read-only Access| W + DS <-.-> |Read-only Access| L + DS <-.-> |Read-only Access| M + DS <-.-> |Read-only Access| S + DS <-.-> |Read-only Access| A + + style DS fill:#BCC0E7 + style CS fill:#E5E2FB + style AD fill:#E5E2FB + style W fill:#E5E2FB + style L fill:#E5E2FB + style M fill:#E5E2FB + style S fill:#E5E2FB + style A fill:#E5E2FB +``` + +The system leverages read-only user rights for secure remote access. By integrating with _Credential Stores_ and **Active Directory Service Accounts**, it eliminates the need for user credentials, simplifying creation of secure connections, and thereby ensuring data integrity and compliance. + +Key benefits include: + +* **Zero deployment overhead** - no software installation required on target systems +* **Simplified maintenance** - no agent updates or patches to manage +* **Cross-platform compatibility** - works consistently across different operating systems +* **Minimal system footprint** - uses native protocols and interfaces +* **Reduced attack surface** - operates with read-only permissions +* **Enterprise-grade security** - leverages existing authentication infrastructure + +## Multi-Schema Processing Engine + +**DataStream**'s advanced processing engine natively supports multiple industry-standard security schemas, enabling seamless data transformation and routing across diverse security platforms. This comprehensive schema support eliminates the complexity of managing multiple data formats and ensures consistent data quality across your entire security ecosystem. + +The engine intelligently converts between schemas while preserving data integrity and semantic meaning. Field mappings are automatically applied based on schema specifications, ensuring that security context and relationships are maintained throughout the transformation process. + +```mermaid +graph TD + Input[Raw Log Data] -.-> Engine([Multi-Schema Engine]) + + subgraph Schemas[Supported Schemas] + ASIM(ASIM) + OCSF(OCSF) + ECS(ECS) + CIM(CIM) + UDM(UDM) + end + + Engine -.-> Schemas + Schemas -.-> |Intelligent Routing| Platforms([Security Platforms]) + + style Engine fill:#BCC0E7 + style Schemas fill:#E5E2FB +``` + +**DataStream** supports seamless conversion between **ASIM** (Microsoft Sentinel), **OCSF** (AWS Security Lake), **ECS** (Elasticsearch), **CIM** (Splunk), and **UDM** (Google SecOps). This native multi-schema support enables organizations to leverage multiple security platforms simultaneously while maintaining data consistency and quality. + +Key capabilities include: + +* **Native Schema Support** - built-in support for ASIM, OCSF, ECS, CIM schemas +* **Intelligent Field Mapping** - automatic field transformation with semantic preservation +* **Schema Validation** - ensures data quality and compliance with target schema requirements +* **Bi-directional Conversion** - seamless transformation between any supported schemas +* **Rule Enforcement** - automatic application of schema-specific validation rules +* **Context Preservation** - maintains security relationships and metadata across transformations + +## Vectorized Processing Architecture + +**DataStream**'s pipeline engine employs a sophisticated vectorized processing architecture that maximizes system resources by utilizing all available CPU cores. This design enables efficient processing of large log volumes and facilitates parallel data ingestion with multiple target platforms simultaneously. + +The vectorized architecture breaks down incoming log streams into optimized chunks that can be processed independently across multiple cores. This parallelization ensures that system resources are used efficiently, preventing bottlenecks that commonly occur in single-threaded processing systems. + +Each processing core operates independently on its assigned data chunk, performing tasks such as parsing, filtering, schema transformation, and enrichment. This parallel processing approach significantly reduces the overall processing time and enables real-time data handling even under heavy loads. + +```mermaid +graph TD + subgraph Cores[Multiple CPU Cores] + C1(Core 1) + C2(Core 2) + C3(Core 3) + C4(Core N) + end + + Data[Log Data] -.-> Pipeline([Pipeline Engine]) + Pipeline -.-> Cores + + Cores -.-> |Parallel Ingestion| SIEM1[Microsoft Sentinel] + Cores -.-> |Parallel Ingestion| SIEM2[AWS Security Lake] + Cores -.-> |Parallel Ingestion| SIEM3[Elasticsearch] + Cores -.-> |Parallel Ingestion| SIEM4[Splunk] + Cores -.-> |Parallel Ingestion| SIEM5[Google SecOps] + + style Pipeline fill:#BCC0E7 + style SIEM1 fill:#E5E2FB + style SIEM2 fill:#E5E2FB + style SIEM3 fill:#E5E2FB + style SIEM4 fill:#E5E2FB + style SIEM5 fill:#E5E2FB +``` + +With over 10 times the ingestion speed of traditional solutions, **DataStream** reduces bandwidth usage down to the bare minimum, delivering significant cost savings. The high-performance architecture ensures that data is processed and delivered to target systems with minimal latency. + +Key advantages include: + +* **Maximum resource utilization** - efficiently uses all available CPU cores +* **Parallel data processing** - handles multiple data streams simultaneously +* **Multi-platform ingestion** - concurrent delivery to multiple SIEM platforms +* **Scalable performance** - processing capacity scales with available cores +* **Low latency** - minimizes delay between data collection and delivery +* **Resource optimization** - intelligent workload distribution across cores +* **Schema-aware processing** - parallel transformation to multiple target formats + +## Lossless Pipeline Engine + +Our _Write-Ahead Log_ (WAL) architecture provides a robust foundation for data integrity by securely storing all routing and pipeline states on disk. This architecture ensures that every piece of data is safely persisted before processing, creating a reliable recovery point in case of system failures or unexpected shutdowns. + +The WAL implementation in **DataStream** operates as a high-performance buffer between data collection and processing stages. When data arrives, it is immediately written to the WAL before any processing begins, creating a durable record of all incoming information. This approach guarantees that no data is lost during pipeline processing or system transitions. + +```mermaid +flowchart LR + Input[Input Data] --> WAL[Write-Ahead Log] + WAL --> Disk[(Disk Storage)] + WAL --> Pipeline([Pipeline Processing]) + Pipeline --> Output[Multi-Platform Output] + + subgraph Recovery + Disk --> |Crash Restore| Pipeline + end + + style WAL fill:#BCC0E7 + style Disk fill:#E5E2FB +``` + +Unlike solutions that require additional components like Kafka, **DataStream** caps log duplication at just one message. This ensures zero data loss, even in the event of a crash, while maintaining efficient storage utilization. The system achieves this through a sophisticated checkpoint mechanism that tracks the processing state of each message. + +The _WAL_ approach also minimizes the risk of system downtime, ensuring that your telemetry pipeline is always up and running, and consistent, even under heavy loads. When the system restarts after an interruption, it can seamlessly resume processing from the last recorded state without data loss or duplication. + +Key features include: + +* **Zero data loss guarantee** - all data is persisted before processing +* **Minimal message duplication** - maximum of one copy per message +* **Automatic crash recovery** - seamless resumption after system interruptions +* **State preservation** - maintains pipeline and routing states on disk +* **High throughput** - efficient write-ahead logging with minimal overhead +* **System consistency** - ensures data integrity across pipeline stages +* **Multi-platform reliability** - consistent data delivery across all target platforms + +## Dedicated Storage Format + +The _VirtualMetric File Format_ (VMF) was engineered specifically for high-performance pipeline engines. It represents a significant advancement in log data storage and processing, building upon the foundations of Apache Avro while addressing its limitations for telemetry data handling. + +VMF combines the efficiency of a row-based format with sophisticated data organization capabilities, enabling it to handle massive volumes of small data chunks efficiently. This hybrid approach provides optimal performance for both sequential processing and random access patterns, making it uniquely suited for telemetry data management. + +```mermaid +graph LR + VMF[VMF] + + VMF -.-> Compression([99% Compression]) + VMF -.-> Storage([Zero Trust Storage]) + VMF -.-> Filters([Bloom Filters]) + VMF -.-> Chain([Log Chaining]) + VMF -.-> TSA[(TSA Integration)] + + subgraph Benefits + Compression -.-> |Efficient| Transport[/Network Transport/] + Filters -.-> |Fast| Search[/Search Capabilities/] + Chain -.-> |Enhanced| Forensics[/Forensic Integrity/] + Storage -.-> |Secure| DataHandling[/Data Handling/] + end + + style VMF fill:#BCC0E7 +``` + +With its roots in Apache Avro, VMF overcomes the limitations of Avro through innovative features designed specifically for telemetry data. + +Key features include: + +* **Advanced Compression** - achieves up to 99% compression ratio, optimized for both storage efficiency and quick access, intelligent compression selection based on data patterns, minimal CPU overhead during compression/decompression +* **Zero Trust Storage** - end-to-end encryption of stored data, cryptographic verification of data integrity, access control integrated at the storage level, secure key management and rotation +* **Bloom Filters** - rapid search capabilities across large datasets, efficient index management, minimized false positive rates, optimized memory usage for filter operations +* **Log Chaining** - cryptographic linking of sequential log entries, tamper-evident log storage, verifiable audit trails, guaranteed log sequence integrity +* **TSA Integration** - time-stamped authority integration, certified temporal proof of log existence, compliance with regulatory requirements, non-repudiation of log timestamps + +The format's design enables disk-level merging without consuming system resources, making it highly efficient for large-scale data operations. This capability is particularly valuable in high-throughput environments where traditional formats would create significant system overhead. + +## Advanced Data Routing + +We have simplified data routing with our advanced `reroute` processor, eliminating the need for manual filtering that is necessary in other solutions. This innovative approach transforms complex routing scenarios into manageable, automated workflows that significantly reduce operational overhead across multiple security platforms. + +The reroute processor operates at multiple levels of abstraction, allowing for both broad and granular control over data flow. At the pipeline level, it handles the overall flow of data between major system components, while at the content pack level, it manages specific data transformations and routing rules for different types of content and target schemas. + +```mermaid +graph TD + subgraph Sources[Data Sources] + S1[(Source 1)] + S2[(Source 2)] + S3[(Source 3)] + end + + subgraph Routing[Intelligent Reroute Processor] + F([Advanced Filters]) + P([Pipeline Level]) + C([Content Pack Level]) + S([Schema-Aware]) + end + + subgraph Destinations[Multi-Platform Targets] + MS[Microsoft Sentinel - ASIM] + ASL[AWS Security Lake - OCSF] + ES[Elasticsearch - ECS] + SP[Splunk - CIM] + UDM[Google SecOps - UDM] + end + + Sources -.-> Routing + Routing -.-> |Dataset Coalescing| Destinations + + style Routing fill:#BCC0E7 +``` + +This processor allows users to route data to destinations at the pipeline or content pack level with schema-aware transformations. Advanced filters can be applied for precise data routing based on content, metadata, and security context. The _Dataset_ concept further streamlines this by enabling multiple data sources to coalesce around optimal target platforms with appropriate schema formatting. + +Key capabilities include: + +* **Multi-level Routing Control** - pipeline-level traffic management, content pack-specific routing rules, conditional routing based on data attributes, schema-aware destination selection +* **Advanced Filtering** - complex condition evaluation, pattern matching and regular expressions, metadata-based filtering, security context-aware routing decisions +* **Schema-Aware Routing** - automatic format conversion based on destination requirements, intelligent field mapping preservation, multi-platform simultaneous delivery +* **Dataset Coalescing** - unified endpoint management across multiple platforms, intelligent data aggregation, optimized bandwidth utilization, reduced complexity + +## Extensive Processor Support + +Our pipeline engine adopts the widely recognized Elastic Ingest Pipeline format while extending it with comprehensive multi-schema support, allowing IT and Security Engineers to create and manage pipelines effortlessly. This adoption of a familiar standard significantly reduces the learning curve while providing powerful data processing capabilities through a comprehensive set of built-in processors. + +The pipeline architecture is designed with flexibility and ease of use in mind, offering both low-code and no-code options for pipeline configuration. This approach makes it accessible to team members with varying levels of technical expertise while maintaining the power and flexibility needed for complex data processing scenarios across multiple security platforms. + +```mermaid +graph TD + EIP([Elastic Ingest Pipeline]) + + subgraph Processors[150+ Processors] + P1(Parsing) + P2(Filtering) + P3(Enrichment) + P4(Schema Transform) + P5(Normalization) + end + + EIP -.-> |Low Code/No Code| Processors + Processors -.-> Routing([Multi-Platform Routing]) + Routing -.-> |Low Code/No Code| Engineers[(IT/Security Engineers)] + + style EIP fill:#E5E2FB + style Processors fill:#BCC0E7 +``` + +Offering **150+** processors, **DataStream** has the most comprehensive support in the industry for _low-code_/_no-code_ management, enabling tasks like parsing, filtering, enrichment, schema transformation, normalization, routing, and more. Engineers with **Elastic** experience can leverage this robust and flexible pipeline engine while benefiting from extended multi-platform capabilities. + +Key capabilities include: + +* **Data Parsing** - structured log parsing, JSON/XML processing, CSV handling, regular expressions for extraction, custom format support +* **Schema Transformation** - native ASIM, OCSF, ECS, CIM conversion, intelligent field mapping, semantic preservation +* **Filtering Operations** - content-based filtering, metadata filtering, conditional processing, pattern matching, security context filtering +* **Data Enrichment** - metadata addition, field transformation, lookup table integration, geolocation enrichment, threat intelligence correlation +* **Advanced Processing** - data aggregation, statistical analysis, machine learning integration, complex event processing, custom script execution + +## Multi-Platform Security Integration + +The pipeline engine was specifically crafted to integrate seamlessly with multiple leading security platforms, providing sophisticated integration layers that understand and optimize data flow into each platform's native schema and structure. This comprehensive integration eliminates the complexity typically associated with multi-platform data ingestion and normalization. + +Our integration approach focuses on intelligent context inference, automatically analyzing incoming log messages to determine their security context and optimal destination platforms. This automated classification ensures that data is properly categorized and routed to the appropriate schemas and tables without manual intervention. + +```mermaid +graph TD + Log[Log Messages] + + Log --> Context([Context Inference]) + + Context --> Filter[Contextual Filters] + + Filter --> OptData[Optimized Data] + + subgraph Platforms[Security Platforms] + MS[Microsoft Sentinel - ASIM] + ASL[AWS Security Lake - OCSF] + ES[Elasticsearch - ECS] + SP[Splunk - CIM] + UDM[Google SecOps - UDM] + end + + OptData --> Platforms + + Platforms --> Engineers[(IT/Security Engineers)] + + style Context fill:#BCC0E7 + style Platforms fill:#E5E2FB +``` + +By inferring context from log messages, our solution automatically routes and transforms data to appropriate platforms and schemas, drastically reducing manual effort and accelerating multi-platform integration. This intelligent mapping ensures that security events are properly normalized and enriched before reaching each platform's analytics engine. + +Key features include: + +* **Intelligent Context Inference** - automatic event classification, smart field mapping, metadata enrichment, relationship detection, multi-platform optimization +* **Multi-Schema Processing** - native ASIM table mapping for Microsoft Sentinel, OCSF class mapping for AWS Security Lake, ECS field mapping for Elasticsearch, CIM compliance for Splunk +* **Cost & Performance Optimization** - smart data filtering across platforms, deduplication and aggregation, volume optimization per platform, ingestion rate control +* **Unified Management** - single pipeline configuration for multiple destinations, centralized monitoring across all platforms, consistent data quality assurance + +With contextual filters and intelligent routing, users can easily optimize data ingestion across multiple platforms simultaneously, ensuring only relevant information reaches each destination while maintaining comprehensive security coverage and reducing operational costs. + +--- + +
+Whatever your telemetry needs across multiple security platforms, **DataStream** has something to offer to make your operations much more robust, secure, streamlined, and efficient at drastically reduced costs. +
+ +--- \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/about/licensing.mdx b/versioned_docs/version-1.5.0/about/licensing.mdx new file mode 100644 index 00000000..c22f75cc --- /dev/null +++ b/versioned_docs/version-1.5.0/about/licensing.mdx @@ -0,0 +1,96 @@ +--- +sidebar_label: Licensing +pagination_next: null +--- + +# Licensing + +**VirtualMetric DataStream** offers the following editions and subscription options. + +- Available editions: + + |Edition|Daily Ingestion|Audience| + |--:|:-:|:--| + |_Basic_|≤**500 GB**|Home lab enthusiasts, researchers, and data explorers.| + |_Standard_|≤**2 TB**|Businesses and consultants.| + |_Enterprise_|∞|Large enterprises and system integrators.| + +- Services and features available on a _subscription_ basis: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ServiceFeatureEdition
_Basic__Standard__Enterprise_
Data Collection
Max. Directors110
Max. Agents10100
Content Hub Access
Management Console (UI)
Persistent Queues
State-aware Clustering--
Dynamic Workload Management--
Subtenants--
Routing
Max. Destinations
Max. Pipelines
AnalyticsBasicAdvancedAdvanced
Operations
Audit Logs--
RBAC--
SSO--
Support
DirectCommunityStandard (8 x 5)Enterprise (24 x 7)
SLA - Cloud Services99.9%99.9%99.9%
Onboarding--
diff --git a/versioned_docs/version-1.5.0/about/product.mdx b/versioned_docs/version-1.5.0/about/product.mdx new file mode 100644 index 00000000..42b697e7 --- /dev/null +++ b/versioned_docs/version-1.5.0/about/product.mdx @@ -0,0 +1,86 @@ +--- +slug: / +sidebar_label: Product +--- + +# VirtualMetric DataStream + +**VirtualMetric DataStream** is a comprehensive automation engine that revolutionizes security data processing and routing across multiple platforms and destinations. Built for modern security operations, DataStream streamlines the collection, transformation, and distribution of security telemetry data from diverse sources to leading SIEM platforms, data lakes, and security analytics solutions. + +VirtualMetric DataStream - Universal Security Telemetry Pipeline +
+Introducing the next-generation telemetry pipeline solution for modern security operations +
+ +## Universal Security Data Processing + +DataStream serves as the central nervous system for your security infrastructure, automatically discovering, processing, and routing telemetry data with unprecedented flexibility and intelligence. Our platform transforms raw security logs into meaningful, standardized formats that integrate seamlessly with your existing security ecosystem across multiple platforms and schemas. + +### Multi-Schema Support + +DataStream natively supports industry-standard security schemas, enabling seamless integration across diverse security platforms: + +- **ASIM (Advanced Security Information Model)** - Microsoft Sentinel's unified data model for comprehensive threat detection +- **OCSF (Open Cybersecurity Schema Framework)** - AWS Security Lake and multi-vendor security environments +- **ECS (Elastic Common Schema)** - Elasticsearch, Elastic Security, and the complete Elastic Stack ecosystem +- **CIM (Common Information Model)** - Splunk Enterprise Security and SOAR platforms +- **UDM (Unified Data Model)** - Google SecOps and Chronicle SIEM + +## Telemetry Pipelines + +A telemetry pipeline is a comprehensive end-to-end system that manages the entire journey of log data from source to destination, handling collection, processing, and routing to various endpoints. It is responsible for ensuring that the right information reaches the right destination at the right time. + +VirtualMetric DataStream - Telemetry Pipeline Overview + +Users can design the pipeline so as to route the data based on data type, source, or other criteria. Each piece of information that enters the pipeline undergoes several crucial transformations. + +### Data Collection + +The pipeline first gathers raw data from various sources through **agentless log collection** with automated discovery: + +- **Application & Server Logs** - Comprehensive application monitoring and server-side event collection +- **Network Device Logs** - Routers, firewalls, switches, and network appliance telemetry +- **Endpoint Telemetry** - Workstation, server, and mobile device security events +- **Cloud Services & Infrastructure** - Multi-cloud security events and infrastructure monitoring + +### Data Processing + +The pipeline processes data through sophisticated automation stages including parsing and filtering, transforming and enriching, threat intelligence integration, and context enhancement. Our advanced processing engine delivers: + +- **Parsing & Normalization** - Intelligent field extraction and data standardization across multiple schemas +- **Filtering & Enrichment** - Context-aware event filtering with real-time threat intelligence integration +- **Schema Transformation** - Seamless conversion between ASIM, OCSF, ECS, and CIM formats +- **Validation & Quality Assurance** - Data integrity checks and completeness validation + +### Data Routing + +The processed data is intelligently directed to appropriate destinations based on security requirements, cost considerations, and functionality needs. DataStream supports flexible routing to: + +- **SIEM Platforms** - Microsoft Sentinel, Elasticsearch/Elastic Security, Splunk Enterprise Security with optimized ingestion +- **Data Lakes** - AWS Security Lake, Azure Data Lake with automated partitioning +- **Storage Solutions** - Cost-effective long-term retention in Azure Blob, AWS S3 +- **Analytics Platforms** - Splunk, custom APIs, and real-time streaming endpoints + +## Enterprise Challenges + +In enterprise environments, pipelines are essential due to the need to handle massive volumes of data from diverse security tools and platforms. Organizations face complex challenges including managing multiple data formats and schemas, ensuring consistent processing across different security platforms, maintaining real-time threat detection capabilities, and optimizing costs while preserving security coverage. + +The data must be directed to the appropriate destinations for security monitoring, threat detection, compliance, and analysis efficiently and accurately, enabling real-time flow management, with consistent processing and formatting, maintaining data integrity throughout the journey, and enabling sophisticated routing decisions across multiple platforms. + +VirtualMetric DataStream - Multi-Platform Security Automation + +**DataStream**'s intelligent routing capabilities enable organizations to direct different types of log data to the most appropriate platforms and **Azure** services based on their security, operational, and compliance requirements: + +### Meaningful Security Data + +Critical security events require real-time monitoring and immediate alerting across multiple platforms. **DataStream** intelligently routes these logs to **Microsoft Sentinel** for ASIM-based threat detection, **AWS Security Lake** for OCSF compliance, **Elasticsearch** for ECS-formatted security analytics, and **Splunk Enterprise Security** for CIM-compliant data ingestion. + +### Threat Hunting Logs + +Historical security data essential for threat hunting and incident investigation is efficiently routed to **Azure Data Explorer** and **Data Lakes** for long-term analysis, advanced querying, and cross-platform correlation. + +### Compliance Logs + +Regulatory compliance data requiring secure long-term retention is automatically routed to **Azure Blob Storage** and **AWS S3** for cost-effective storage, comprehensive audit trails, and automated compliance reporting. + +This unified approach provides several enterprise benefits including cost optimization through intelligent routing, improved query performance across multiple platforms, flexible retention policies, comprehensive multi-platform security coverage, and simplified management of complex security data workflows. \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/about/siem-optimization.mdx b/versioned_docs/version-1.5.0/about/siem-optimization.mdx new file mode 100644 index 00000000..c6f78bfb --- /dev/null +++ b/versioned_docs/version-1.5.0/about/siem-optimization.mdx @@ -0,0 +1,531 @@ +--- +sidebar_label: SIEM Optimization +--- + +# SIEM Optimization + +**VirtualMetric DataStream** provides comprehensive data optimization capabilities that significantly reduce storage costs and improve query performance across multiple security platforms including _Microsoft Sentinel_, _AWS Security Lake_, _Elasticsearch_, _Splunk Enterprise Security_, and _Google SecOps_. Through intelligent field-level optimization and optional event filtering, organizations can achieve 55-60% data reduction while preserving all security-critical information required for detection and response operations. + +## Risk-Free Reduction Framework + +**DataStream**'s Risk-Free Reduction represents a fundamentally different approach to data optimization compared to traditional telemetry pipelines. While most solutions focus on dropping entire log lines, **DataStream** focuses on removing garbage from log content, eliminating unnecessary fields while preserving complete security context. This field-level approach achieves substantial data reduction without compromising detection capabilities. + +The framework is built on extensive analysis of Microsoft Sentinel content, including analytic queries, ASIM parsers, detection rules, and workbooks. For each supported vendor, VirtualMetric engineers analyze which fields are actively used by security operations and which fields contain only operational metadata or placeholder values. This analysis has been validated by external third-party security experts, confirming that only truly unnecessary data is removed. + +```mermaid +graph TD + Analysis[Microsoft Sentinel Content Analysis] --> Queries([Analytic Queries]) + Analysis --> Parsers([ASIM Parsers]) + Analysis --> Rules([Detection Rules]) + Analysis --> Workbooks([Workbooks]) + + Queries --> Mapping([Field Usage Mapping]) + Parsers --> Mapping + Rules --> Mapping + Workbooks --> Mapping + + Mapping --> Critical[Security-Critical Fields] + Mapping --> Unused[Unused Fields] + + Critical --> Preserve([Preserve in All Vendors]) + Unused --> Remove([Safe to Remove]) + + Remove --> Validation([3rd Party Validation]) + Validation --> Pack([Vendor Optimization Pack]) + + style Analysis fill:#BCC0E7 + style Mapping fill:#E5E2FB + style Pack fill:#E5E2FB +``` + +This methodology ensures zero security risk because optimization decisions are based on actual usage patterns in production security operations, not assumptions or heuristics. When Microsoft Sentinel parsers require a field for normalization or analytic rules reference a field for detection, that field is preserved regardless of its content. + +Key principles include: + +* **Field-level optimization** - removes unnecessary fields, not entire events +* **Content-based analysis** - decisions based on Microsoft Sentinel production usage +* **Third-party validation** - external experts verify security integrity +* **Vendor-specific intelligence** - unique optimization for each vendor's log format +* **Preservation guarantees** - all detection-relevant fields always retained +* **No AI/ML involvement** - deterministic, predictable optimization behavior + +## Why VirtualMetric's Approach is Superior + +**DataStream** deliberately avoids AI-based optimization techniques that other vendors promote, recognizing the fundamental incompatibility between AI unpredictability and enterprise security requirements. AI models can produce unexpected results, potentially dropping critical security events without warning. This unpredictability is unacceptable in security operations where a single missed alert could represent a major breach. + +AI-based approaches introduce multiple risks that VirtualMetric's deterministic framework eliminates. AI models require training on actual log data, creating privacy and compliance concerns as sensitive security information may be learned by the model. AI processing adds significant latency and computational cost, reducing throughput and increasing infrastructure requirements. Most critically, AI decisions cannot be audited or validated, making it impossible to verify that security-relevant data is preserved. + +|AI-Based Optimization (Risky)|VirtualMetric's Approach (Safe)| +|---|---| +|
  • Unpredictable Results
  • May Drop Critical Events
  • Privacy Concerns
  • Training on Sensitive Data
  • Processing Latency
  • Increased Costs
  • Non-Auditable Decisions
|
  • Deterministic Rules
  • Guaranteed Field Preservation
  • No Data Learning
  • High Performance
  • Cost-Efficient
  • Fully Auditable
  • Expert Validated
| + +**DataStream**'s expert-driven approach provides predictable, consistent results that security teams can trust. Every optimization decision is based on analysis of real-world security operations, validated by experts, and documented for audit purposes. Organizations can confidently deploy aggressive optimization knowing that detection capabilities remain intact. + +Advantages over AI-based optimization include: + +* **Predictable behavior** - same input always produces same output +* **Zero risk of dropping critical events** - preservation rules are absolute +* **No privacy concerns** - no learning from customer data +* **Maximum performance** - no AI processing overhead +* **Lower costs** - efficient rule-based processing +* **Complete auditability** - every decision can be traced and validated +* **Enterprise trust** - deterministic systems meet compliance requirements + +## Unified Optimization Strategy + +**DataStream** employs a smart, centralized optimization strategy that dramatically simplifies management across multiple SIEM platforms. Rather than maintaining separate optimization logic for each target platform, the system applies vendor-specific optimization based on Microsoft Sentinel content analysis, then transforms the optimized data to target schemas in post-processing pipelines. + +This approach means administrators configure optimization rules once per vendor, not once per vendor per SIEM platform. A single Fortinet optimization pack automatically reduces data volume for Sentinel, Splunk, Elasticsearch, and all other configured destinations. Changes to vendor-specific filtering rules immediately apply across the entire multi-platform deployment. + +```mermaid +graph LR + Vendor[Vendor Logs] --> Pack([Vendor Optimization Pack]) + + Pack --> Optimized[Optimized Data] + + Optimized --> Schema([Multi-Schema Transform]) + + Schema --> ASIM[ASIM - Microsoft Sentinel] + Schema --> OCSF[OCSF - AWS Security Lake] + Schema --> ECS[ECS - Elasticsearch] + Schema --> CIM[CIM - Splunk] + Schema --> UDM[UDM - Google SecOps] + + style Pack fill:#BCC0E7 + style Schema fill:#E5E2FB +``` + +This unified strategy provides significant operational advantages. Security teams maintain a single set of optimization rules regardless of how many SIEM platforms they use. Testing and validation happens once, not repeatedly for each destination. Knowledge gained from Microsoft Sentinel content analysis automatically benefits all target platforms. + +The approach works because security-relevant fields are consistent across platforms. A field that contains critical detection data for Microsoft Sentinel also contains critical data for Splunk or Elasticsearch. By optimizing based on Microsoft Sentinel's comprehensive parser and detection rule ecosystem, **DataStream** ensures security integrity across all platforms. + +Benefits include: + +* **Single configuration point** - one vendor pack optimizes for all destinations +* **Simplified management** - no per-platform optimization rules needed +* **Consistent behavior** - same optimization across all SIEM platforms +* **Easier validation** - test once, deploy everywhere +* **Reduced complexity** - fewer configuration files to maintain +* **Faster deployment** - single change affects all platforms +* **Knowledge leverage** - Microsoft Sentinel analysis benefits all destinations + +## Vendor-Specific Optimization Packs + +**DataStream** includes pre-built optimization packs for major security vendors, each developed through detailed analysis of Microsoft Sentinel parsers, analytic queries, and detection rules. These packs understand the specific log formats and field structures for each vendor, applying precise field-level optimization while guaranteeing preservation of security-relevant data. + +Each vendor pack identifies which fields are actively used in security operations and which fields consistently contain placeholder values, operational metadata, or redundant information. The packs parse complex extension fields, remove unnecessary attributes, and reconstruct only the meaningful portions of each log entry. + +```mermaid +graph LR + Logs[Vendor Logs] + + subgraph Packs[Vendor Optimization Packs] + PEnt["`Fortinet
Palo Alto
Check Point
Cisco
Zscaler
Citrix
Forcepoint
F5 BigIP
SonicWall
Barracuda
Infoblox
WatchGuard
Nozomi
Akamai
ExtraHop
Darktrace
CyberArk
Vectra
CrowdStrike
Symantec
Sophos
Juniper
Aruba
SentinelOne`"] + end + + Logs --> Packs + Packs --> Optimized[Field-Optimized Data] + + style Packs fill:#BCC0E7 + style Optimized fill:#E5E2FB +``` + +The vendor pack library is continuously expanding and includes optimization for leading security solutions across firewalls, proxies, endpoint protection, network detection and response, privileged access management, and cloud security platforms. + +Supported vendor optimization packs include: + +* **Network Security** - Fortinet FortiGate, Palo Alto Networks, Check Point, Cisco ASA, SonicWall, Barracuda WAF, WatchGuard, Juniper SRX +* **Secure Web Gateway** - Zscaler, Citrix NetScaler, Forcepoint +* **Application Delivery** - F5 BigIP, Citrix ADC +* **DNS Security** - Infoblox +* **Network Detection & Response** - Nozomi Networks, ExtraHop RevealX, Darktrace, Vectra +* **Cloud Security** - Akamai Edge Platform +* **Privileged Access** - CyberArk +* **Endpoint Protection** - CrowdStrike Falcon, Symantec Endpoint Protection, Sophos XG, SentinelOne +* **Network Access Control** - Aruba ClearPass + +Each pack automatically activates when logs from the corresponding vendor are detected, requiring no manual configuration. + +## Intelligent Field Optimization + +The core of **DataStream**'s Risk-Free Reduction is intelligent field-level optimization that removes garbage from log content without eliminating security context. The **Compact Processor** automatically removes fields that provide no security value, including empty fields, null values, and common placeholder patterns found across different security vendors. + +The processor recognizes standard placeholder values including numeric zeros, string placeholders, undefined values, and various representations of "no data available." By analyzing Microsoft Sentinel parsers and detection rules, VirtualMetric engineers identified which fields are never referenced in security operations, allowing safe removal even when they contain data. + +```mermaid +graph TD + Data[Security Event] --> Analysis([Field Analysis]) + + Analysis --> Used{Used by Sentinel?} + + Used -->|Yes| Preserve[Preserve Field] + Used -->|No| Check{Has Value?} + + Check -->|Placeholder| Remove[Remove Field] + Check -->|Empty| Remove + Check -->|Null| Remove + Check -->|Real Value| Evaluate{Security Value?} + + Evaluate -->|None| Remove + Evaluate -->|Potential| Preserve + + Preserve --> Output[Optimized Event] + Remove --> Output + + style Analysis fill:#BCC0E7 + style Output fill:#E5E2FB +``` + +The processor supports configurable exclusion lists to preserve specific fields even when they contain placeholder values. This is essential for fields like severity levels or operation codes where a zero value carries semantic meaning and is referenced in detection logic. + +Key capabilities include: + +* **Microsoft Sentinel usage analysis** - preserves fields used in parsers and queries +* **Automatic placeholder detection** - recognizes vendor-specific null patterns +* **Configurable value patterns** - "0", "undefined", "0x0", "-", "N/A" and custom patterns +* **Field exclusion support** - protects fields where placeholders have meaning +* **Extension field processing** - parses and optimizes CEF/LEEF additional extensions +* **XML optimization** - processes Windows Event Log EventData efficiently +* **Recursive cleanup** - handles nested objects and arrays + +## Optional Event-Level Filtering + +Beyond field-level optimization, **DataStream** provides optional event-level filtering that removes entire log entries based on industry best practices and expert knowledge. These filters are **disabled by default** to ensure conservative, risk-free operation, but can be enabled when organizations want more aggressive data reduction. + +Event filters are developed based on deep vendor knowledge and real-world security operations experience. VirtualMetric engineers identify specific log types, event IDs, and traffic patterns that generate high volumes but rarely contain security-relevant information. These patterns are documented and validated before inclusion in vendor packs. + +```mermaid +graph TD + Event[Security Event] --> FieldOpt([Field Optimization - Always On]) + + FieldOpt --> Optimized[Field-Optimized Event] + + Optimized --> EventFilter{Event Filters Enabled?} + + EventFilter -->|No - Default| Output[To SIEM] + EventFilter -->|Yes - Optional| Analysis([Pattern Analysis]) + + Analysis --> Type{Event Type} + + Type -->|Private-to-Private| Drop[Drop Event] + Type -->|IPv6 Local| Drop + Type -->|Reserved Country| Drop + Type -->|Security Relevant| Output + + style FieldOpt fill:#BCC0E7 + style Analysis fill:#E5E2FB + style Output fill:#E5E2FB +``` + +Common event filtering patterns include: + +* **Private network traffic** - communications between internal private IP addresses +* **IPv6 local traffic** - link-local (fe80::) and unique local (fc00::) addresses +* **Reserved geographic regions** - traffic from unassigned country codes +* **Accepted outbound connections** - permitted traffic from internal to external +* **Specific event IDs** - vendor-specific operational events with no security value + +Organizations enable event filtering after reviewing their specific environment and security requirements, understanding that aggressive filtering provides maximum cost savings while field-level optimization alone delivers substantial reduction with zero risk. + +## Statistical Sampling + +For organizations requiring even more aggressive data reduction, **DataStream** provides configurable statistical sampling that retains only a percentage of events matching specific criteria. Sampling is always selective, never applied to security-critical events, and users configure exactly which event types should be sampled at which rates. + +The sampling engine allows different rates for different event patterns. High-volume operational traffic might be sampled at 1-in-10 while verbose debug logs are sampled at 1-in-100. Security alerts, authentication failures, and other critical events are never sampled, ensuring complete visibility into actual security incidents. + +```mermaid +graph TD + Stream[Event Stream] --> Classify([Event Classification]) + + Classify --> Priority{Event Category} + + Priority -->|Security Critical| Full[100% Retention] + Priority -->|Operational - High Volume| Sample1([1-in-10 Sampling]) + Priority -->|Operational - Very High Volume| Sample2([1-in-100 Sampling]) + + Sample1 --> Output[To SIEM] + Sample2 --> Output + Full --> Output + + style Classify fill:#BCC0E7 + style Output fill:#E5E2FB +``` + +Sampling capabilities include: + +* **Rule-based sampling** - different rates for different event patterns +* **Vendor-specific rules** - sampling patterns tuned per vendor +* **Configurable rates** - precise control over retention percentages +* **Security event protection** - critical events never sampled +* **Statistical validity** - maintains representative distributions +* **Deterministic behavior** - consistent, predictable sampling + +## Dynamic Sampling + +Beyond static sampling rates, **DataStream** supports dynamic sampling that adjusts retention rates based on current data volumes and system conditions. This advanced capability prevents data loss during unusual activity while maintaining aggressive reduction during normal operations. + +Dynamic sampling monitors incoming data rates and automatically reduces sampling when volumes drop or increase retention when volumes spike. This ensures that unusual patterns, which often indicate security events, receive higher retention while routine operational traffic is aggressively reduced. + +Key features include: + +* **Volume-based adjustment** - responds to traffic pattern changes +* **Anomaly detection** - increases retention during unusual activity +* **Automatic rate tuning** - optimizes sampling without manual intervention +* **Threshold configuration** - defines volume levels triggering adjustments +* **Real-time response** - immediate adaptation to changing conditions + +## Aggregation + +For use cases where real-time delivery is not required, **DataStream** provides aggregation capabilities that combine similar events into summarized records, achieving additional data reduction. Aggregation operates on configurable time intervals, such as 1 minute or 5 minutes, grouping events by key attributes and producing statistical summaries. + +Aggregation is particularly valuable for high-volume metrics, performance data, and operational telemetry where individual events provide less value than aggregate statistics. Organizations configure which event types to aggregate, which fields to group by, and what statistics to calculate. + +```mermaid +graph TD + Events[Event Stream] --> Window([Time Window - 1/5 min]) + + Window --> Group([Group by Attributes]) + + Group --> Calc([Calculate Statistics]) + + Calc --> Summary[Aggregated Summary] + + Summary --> Output[To SIEM] + + style Window fill:#BCC0E7 + style Summary fill:#E5E2FB + style Output fill:#E5E2FB +``` + +Aggregation capabilities include: + +* **Time-based windowing** - configurable aggregation intervals +* **Multi-field grouping** - combine events by multiple attributes +* **Statistical functions** - count, sum, average, min, max, percentiles +* **Selective aggregation** - only specified event types aggregated +* **Metadata preservation** - maintains security context in summaries + +Note that aggregation introduces latency equal to the aggregation window, making it unsuitable for real-time security monitoring. Organizations typically use aggregation for operational metrics and performance data while sending security events in real-time. + +## Correlation ID and Archive Integration + +**DataStream** provides a sophisticated correlation ID system that enables cost-effective long-term storage while maintaining the ability to retrieve complete original logs when needed. The system appends a unique correlation ID to each event before optimization, creating a permanent link between the optimized data in active SIEM platforms and complete raw data in archival storage. + +This architecture allows organizations to send full, unoptimized logs to low-cost storage tiers like Azure Blob Storage, AWS S3, Azure Data Explorer, Google BigQuery, or Microsoft Sentinel data lake, while sending optimized, field-reduced logs to expensive active SIEM platforms. Security analysts work with optimized data for day-to-day operations but can retrieve complete original logs for forensic investigations using the correlation ID. + +```mermaid +graph TD + Original[Original Log] --> ID([Append Correlation ID]) + + ID --> Split{Data Path} + + Split -->|Full Raw Data| Archive[Archival Storage] + Split -->|Optimized Data| SIEM[Active SIEM] + + Archive --> Blob[Azure Blob / AWS S3] + Archive --> Lake[Sentinel Data Lake] + Archive --> ADX[Azure Data Explorer] + Archive --> BQ[Google BigQuery] + + SIEM --> Sentinel[Microsoft Sentinel] + SIEM --> Splunk[Splunk] + SIEM --> Elastic[Elasticsearch] + + Sentinel -.->|KQL Join| Blob + Sentinel -.->|KQL Join| Lake + Sentinel -.->|KQL Join| ADX + + style ID fill:#BCC0E7 + style Archive fill:#E5E2FB + style SIEM fill:#E5E2FB +``` + +This approach is particularly powerful with Microsoft Sentinel, where KQL supports joining data across multiple sources including Sentinel workspaces, Azure Data Explorer, Sentinel data lake, and Azure Blob Storage. Analysts can query optimized data for fast, cost-effective operations, then seamlessly retrieve complete original logs when investigation requires full context. + +The correlation ID system enables: + +* **Dual-tier storage** - active SIEM for optimized data, archive for complete logs +* **Cost optimization** - expensive platforms store only reduced data +* **Complete forensics** - full original logs always available via correlation ID +* **Cross-platform joins** - KQL queries span multiple storage systems +* **Audit compliance** - complete logs preserved for regulatory requirements +* **Investigation flexibility** - analysts choose appropriate level of detail + +The correlation ID is implemented as a unique identifier appended to each event during initial processing. This ID remains consistent across all destinations, whether the event is sent to Sentinel, ADX, Blob Storage, or multiple platforms simultaneously. When analysts identify events of interest in optimized Sentinel data, they use the correlation ID to retrieve corresponding full records from archival storage. + +## Windows Event Log Optimization + +Windows Security Event logs represent one of the highest volume data sources in enterprise environments. **DataStream** provides specialized optimization for Windows events that can reduce their size by 60-70% through intelligent EventData field processing while maintaining complete security visibility. + +Windows events include a complex XML EventData field containing dozens of attributes, many of which contain placeholder values or operational metadata not used in security detection. VirtualMetric's analysis of Microsoft Sentinel Windows parsers and detection rules identified which EventData attributes are security-relevant and which can be safely removed. + +```mermaid +graph TD + WinLog[Windows Security Event] --> Parse([XML Parser]) + + Parse --> EventData[EventData Field] + + EventData --> Sentinel([Sentinel Parser Analysis]) + + Sentinel --> Used{Used in Detections?} + + Used -->|Yes| Keep[Preserve Attribute] + Used -->|No| Check{Has Value?} + + Check -->|Placeholder| Remove[Remove Attribute] + Check -->|Empty| Remove + Check -->|Meaningful| Keep + + Keep --> Rebuild([Reconstruct XML]) + Remove --> Rebuild + + Rebuild --> Optimized[Optimized Event] + + style Parse fill:#BCC0E7 + style Sentinel fill:#E5E2FB + style Optimized fill:#E5E2FB +``` + +The system parses the EventData XML, analyzes each attribute against Microsoft Sentinel usage patterns, removes unnecessary attributes and placeholders, and reconstructs a minimal XML structure containing only security-relevant data. This selective processing dramatically reduces storage requirements while preserving all information used by detection rules and ASIM parsers. + +Windows-specific optimizations include: + +* **EventData XML parsing** - efficient processing of complex event structures +* **Sentinel parser validation** - preserves fields used in ASIM normalization +* **Default GUID removal** - strips placeholder GUIDs like 00000000-0000-0000-0000-000000000000 +* **Empty attribute removal** - eliminates fields with no values +* **Placeholder filtering** - removes "0x0", "-", and vendor-specific patterns +* **Schema-aware preservation** - maintains detection-required fields +* **XML reconstruction** - creates minimal valid EventData structure +* **Level and opcode protection** - preserves operational fields where zeros matter + +## Configuration and Control + +All optimization features are fully configurable through the **DataStream** management interface or direct pipeline configuration. The system provides granular control over every optimization technique, from global enable/disable switches to field-level filtering rules. Default configuration emphasizes safety, with only field-level optimization enabled and event filtering disabled. + +Configuration options are organized hierarchically. Master switches control broad categories of optimization while detailed settings allow fine-tuned control. This structure enables quick deployment of conservative optimization settings while providing flexibility for aggressive reduction of high-volume, low-value data sources. + +```yaml +optimization: + # Master switch for all optimization features + status: true + + # Statistical sampling (disabled by default) + use_sampling: false + sample_rate: 10 + + # Event-level filtering (disabled by default) + use_event_filters: false + + # ASIM-aware field optimization (enabled by default) + use_asim_filters: true + + # Correlation ID for archive integration + append_correlationid: true +``` + +Configuration capabilities include: + +* **Conservative defaults** - field optimization on, event filtering off +* **Per-vendor customization** - different rules for each vendor +* **Per-platform settings** - optimize differently for Sentinel vs Splunk +* **Sampling rate adjustment** - configurable retention percentages +* **Custom filter rules** - user-defined filtering logic +* **Field exclusion lists** - protect specific fields from optimization +* **Correlation ID control** - enable archive integration +* **Real-time updates** - changes applied without restarts + +## Performance and Cost Impact + +The optimization capabilities in **DataStream** deliver substantial cost savings across all supported security platforms. Real-world deployments consistently achieve 55-60% data reduction through field-level optimization alone, with aggressive configurations reaching 70-80% reduction when combining field optimization, event filtering, sampling, and aggregation. + +Beyond direct storage cost savings, optimization improves query performance by reducing the amount of data that analytics engines must process. Faster queries mean more responsive security operations, reduced infrastructure requirements, and better experience for security analysts. + +```mermaid +graph LR + Before[100% Raw Data] + Field[40-45% Field Optimized] + Event[30-40% Event Filtered] + Sample[20-30% Sampled] + + Before -->|Field Optimization| Field + Field -->|Event Filtering| Event + Event -->|Sampling| Sample + + subgraph Impact[Cost & Performance Impact] + Storage[55-80% Storage Savings] + Query[2-3x Query Performance] + Network[60-70% Network Reduction] + Cost[50-80% Cost Reduction] + end + + Sample --> Impact + + style Before fill:#E5E2FB + style Field fill:#BCC0E7 + style Event fill:#BCC0E7 + style Sample fill:#BCC0E7 + style Impact fill:#E5E2FB +``` + +Measured benefits include: + +* **Storage cost reduction** - 55-60% with field optimization, 70-80% with full optimization +* **Query performance improvement** - 2-3x faster analytics queries +* **Network bandwidth savings** - 60-70% reduction in data transmission +* **Infrastructure optimization** - reduced processing and indexing overhead +* **License optimization** - lower per-GB licensing costs +* **Operational efficiency** - faster incident investigation and response + +The correlation ID system provides additional cost benefits by enabling tiered storage strategies. Organizations can maintain expensive active SIEM platforms at 40-50% of original data volume while archiving complete logs to storage costing 90% less per GB. + +## Security and Compliance Considerations + +All optimization techniques in **DataStream** are designed with security and compliance requirements as primary considerations. The field-level optimization approach based on Microsoft Sentinel content analysis ensures that no security-relevant data is eliminated. External third-party validation confirms the integrity of optimization decisions. + +For regulated environments, the correlation ID system enables compliance with data retention mandates while still achieving substantial cost savings. Complete original logs remain available in archival storage while optimized data serves day-to-day security operations. This satisfies regulatory requirements for log retention while optimizing costs for active analysis. + +```mermaid +graph TD + Optimization[Optimization Process] --> Field([Field Analysis]) + + Field --> Sentinel([Microsoft Sentinel Usage]) + Sentinel --> Detection([Detection Rules]) + Sentinel --> Parsers([ASIM Parsers]) + Sentinel --> Analytics([Analytics Queries]) + + Detection --> Validation([3rd Party Validation]) + Parsers --> Validation + Analytics --> Validation + + Validation --> Safe{Security Safe?} + + Safe -->|Yes| Deploy[Deploy Optimization] + Safe -->|No| Reject[Reject Changes] + + Deploy --> Audit([Audit Trail]) + Audit --> Compliance[Compliance Ready] + + style Optimization fill:#BCC0E7 + style Validation fill:#E5E2FB + style Compliance fill:#E5E2FB +``` + +Key security and compliance features include: + +* **Third-party validation** - external experts verify optimization safety +* **Deterministic behavior** - no AI unpredictability +* **Complete audit trail** - logging of all optimization decisions +* **Compliance mode** - pre-configured settings for regulatory requirements +* **Field protection** - guaranteed preservation of detection-relevant data +* **Archive integration** - complete logs preserved via correlation ID +* **Risk assessment reporting** - validation of optimization security impact +* **No sensitive data exposure** - no AI training on customer logs + +--- + +
+**DataStream**'s comprehensive optimization capabilities enable organizations to achieve dramatic cost savings across multiple security platforms while maintaining complete security visibility and compliance with regulatory requirements. The Risk-Free Reduction framework based on Microsoft Sentinel content analysis ensures that cost optimization never compromises security effectiveness, while the unified optimization strategy simplifies management across diverse SIEM deployments. +
+ +--- \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/appendix/configuration-bnf.mdx b/versioned_docs/version-1.5.0/appendix/configuration-bnf.mdx new file mode 100644 index 00000000..7137fec5 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/configuration-bnf.mdx @@ -0,0 +1,63 @@ +--- +pagination_prev: null +pagination_next: null +toc: false +--- + +# Configuration BNF + +All **DataStream** configuration files in YAML format conform to the following syntax: + + + `comp-decl` + `::= ":" ` + + `comp-type` + `::= "devices" | "targets" | "pipelines" | "routes"` + + `comp-def` + `::= ` + + `id-fld-def` + `::= "-" ":" ` + + `fld-defs` + `::= [ ]*` + + `fld-def` + `::= ":" ` + + `fld-vals` + `::= | | ` + + `inline-list` + `::= "[" ("," )* "]"` + + `block-list` + `::= ( "-" )+` + + `fld-val` + `::= txt-val | num-val` + + `txt-val` + `::= ( | )+` + + `num-val` + `::= ('-' | '+')? +` + + `txt-char` + `::= 'a' .. 'z' | 'A' .. 'Z' | '_'` + + `num-char` + `::= '0' .. '9'` + + `` + `::= '\n' | "\r\n" ` + + ``* + `::= '\t' | [' ']{2,} ` + + +:::note Indentation Rule +\* Users may choose tabs or multiple spaces for indentation. However, the pattern _must_ be consistent in all configuration files, and follow the layout given in the code samples found throughout this guide. +::: diff --git a/versioned_docs/version-1.5.0/appendix/field-formats/asim.mdx b/versioned_docs/version-1.5.0/appendix/field-formats/asim.mdx new file mode 100644 index 00000000..dd9a5034 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/field-formats/asim.mdx @@ -0,0 +1,21 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# ASIM + +The Advanced Security Information Model is a layer between the data and the user to configure what and how to ingest data from a source and to route it to a destination. ASIM provides standardization for security-focused log data. + +Available ASIM tables: + +- `ASimAuditEventLogs` +- `ASimAuthenticationEventLogs` +- `ASimDhcpEventLogs` +- `ASimDnsActivityLogs` +- `ASimFileEventLogs` +- `ASimNetworkSessionLogs` +- `ASimProcessEventLogs` +- `ASimRegistryEventLogs` +- `ASimUserManagementActivityLogs` +- `ASimWebSessionLogs` diff --git a/versioned_docs/version-1.5.0/appendix/field-formats/cef.mdx b/versioned_docs/version-1.5.0/appendix/field-formats/cef.mdx new file mode 100644 index 00000000..ec2fc6e4 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/field-formats/cef.mdx @@ -0,0 +1,13 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# CEF + +The Common Event Format is a standardized security event logging layout. Its creator is ArcSight, and it has been widely adopted by the industry. Features include: + +- Standard header with 7 required fields +- Extensible key-value pair extension format +- Header fields include: version, device vendor, device product, device version, signature ID, name, and severity +- Extension fields use a key=value format diff --git a/versioned_docs/version-1.5.0/appendix/field-formats/cim.mdx b/versioned_docs/version-1.5.0/appendix/field-formats/cim.mdx new file mode 100644 index 00000000..3ed74a9e --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/field-formats/cim.mdx @@ -0,0 +1,36 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# CIM + +The Common Information Model (CIM) is a standardized data model developed by Splunk. It provides: + +**Common Fields**: + +|Field Category|Fields|Description| +|:-:|:--|:--| +|Base Fields|`source`, `sourcetype`, `timestamp`, `host`, `index`|Core fields for event identification and source tracking| +|Identity Fields|`user`, `src_user`, `dest_user`|User identification and authentication tracking| +|Network Fields|`src_ip`, `dest_ip`, `src_port`, `dest_port`|Network communication endpoints| + +**Data Models**: + +|Model Type|Fields|Purpose| +|:-:|:--|:--| +|Authentication|`action`, `app`, `status`, `auth_method`|Track authentication events and access control| +|Network Traffic|`bytes`, `protocol`, `direction`, `tcp_flags`|Monitor network communications and traffic patterns| +|Vulnerability|`severity`, `signature`, `vulnerability_id`|Track security vulnerabilities and risks| +|Changes|-|Track system and configuration changes| +|Intrusion Detection|-|Monitor security threats and intrusions| + +**Event Categories**: + +|Category|Event Types|Description| +|--:|:--|:--| +|Authentication|`success`, `failure`, `logout`|Authentication-related events and outcomes| +|Network|`connection`, `alert`, `traffic`|Network activity and communications| +|System|`change`, `status`, `error`|System-level events and status changes| +|Security|-|Security-related events and alerts| + diff --git a/versioned_docs/version-1.5.0/appendix/field-formats/csl.mdx b/versioned_docs/version-1.5.0/appendix/field-formats/csl.mdx new file mode 100644 index 00000000..f95a9261 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/field-formats/csl.mdx @@ -0,0 +1,37 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# CSL + +The Common Security Log (CSL) is a standardized schema used in Microsoft Sentinel. It provides: + +**Common Fields**: + +|Field Category|Fields|Description| +|:-:|:--|:--| +|Base Fields|`TimeGenerated`, `Type`, `TenantId`, `SourceSystem`, `Computer`|Core fields for event identification and source tracking| +|Identity Fields|`AccountName`, `AccountDomain`, `UserPrincipalName`, `UserId`|User identification and authentication tracking| +|Network Fields|`SourceIP`, `DestinationIP`, `SourcePort`, `DestinationPort`|Network communication endpoints| +|Security Fields|`Activity`, `Status`, `ResultType`, `ResultDescription`|Security operation outcomes and status information| + +**Schema Categories**: + +|Category|Fields|Purpose| +|:-:|:--|:--| +|Authentication|`LogonType`, `AuthenticationMethod`, `LogonProcessName`, `ImpersonationLevel`|Track authentication events and access control| +|Network Session|`Protocol`, `Direction`, `BytesSent`, `BytesReceived`, `Duration`|Monitor network communications and traffic patterns| +|Process|`ProcessName`, `CommandLine`, `ProcessId`, `ParentProcessName`|Track process creation and execution| +|File|`FileName`, `FilePath`, `FileHash`, `FileOperation`|Monitor file access and modifications| +|Registry|`RegistryKey`, `RegistryValueName`, `RegistryValueData`|Track registry changes and access| + +**Event Types**: + +|Type|Event Classes|Description| +|--:|:--|:--| +|Authentication|`SignInLogs`, `AuditLogs`, `AADNonInteractiveUserSignInLogs`|Authentication-related events and outcomes| +|Security|`SecurityEvent`, `SecurityAlert`, `SecurityIncident`|Security-related events and alerts| +|Network|`AzureNetworkAnalytics`, `CommonSecurityLog`, `DnsEvents`|Network activity and communications| +|Identity|`IdentityInfo`, `IdentityDirectoryEvents`, `IdentityLogonEvents`|Identity and directory service events| +|Endpoint|`DeviceEvents`, `DeviceProcessEvents`, `DeviceFileEvents`|Endpoint detection and response events| \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/appendix/field-formats/ecs.mdx b/versioned_docs/version-1.5.0/appendix/field-formats/ecs.mdx new file mode 100644 index 00000000..19b678ae --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/field-formats/ecs.mdx @@ -0,0 +1,21 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# ECS + +Elastic Common Schema (ECS) is a specification that defines a common set of fields for ingesting data into Elasticsearch. Field groups include: + +|Field Group|Core Fields|Description| +|:-:|:--|:--| +|Base Fields|`@timestamp`, `tags`, `labels`, `message`|Universal fields that appear in every event| +|Host|`host.name`, `host.ip`, `host.os.*`, `host.mac`|Information about the host machine| +|Network|`network.protocol`, `network.type`, `network.direction`, `network.bytes`|Network activity details| +|Source/Destination|`source.ip`, `source.port`, `dest.ip`, `dest.port`|Communication endpoint information| +|User|`user.id`, `user.name`, `user.domain`, `user.email`|User-related information| +|Event|`event.category`, `event.type`, `event.action`, `event.outcome`|Event classification details| +|File|`file.path`, `file.size`, `file.type`, `file.hash.*`|File-related information| +|Process|`process.pid`, `process.name`, `process.args`, `process.parent.*`|Process execution details| +|Error|`error.code`, `error.message`, `error.type`, `error.stack_trace`|Error-related information| +|Trace|`trace.id`, `span.id`, `transaction.id`|Distributed tracing data| diff --git a/versioned_docs/version-1.5.0/appendix/field-formats/leef.mdx b/versioned_docs/version-1.5.0/appendix/field-formats/leef.mdx new file mode 100644 index 00000000..3805b1ad --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/field-formats/leef.mdx @@ -0,0 +1,15 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# LEEF + +The Log Event Extended Format is an enterprise security event logging format created by IBM QRadar. + +Features: + +- Lightweight parsing requirements +- Fixed header fields: version, vendor, product, version, eventID +- Variable attributes section +- Optimized for SIEM processing diff --git a/versioned_docs/version-1.5.0/appendix/field-formats/ocsf.mdx b/versioned_docs/version-1.5.0/appendix/field-formats/ocsf.mdx new file mode 100644 index 00000000..d80e5069 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/field-formats/ocsf.mdx @@ -0,0 +1,132 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# OCSF + +The Open Cybersecurity Schema Framework (OCSF) is an open standard for security event data that provides a vendor-agnostic way to normalize security logs across different sources. OCSF provides standardization for security-focused log data, enabling seamless integration with AWS Security Lake and other security analytics platforms. + +OCSF organizes security events into classes, each representing a specific type of security activity. When using `field_format: "ocsf"`, VirtualMetric automatically transforms your security data into OCSF-compliant format based on the event type. + +## Available OCSF Schema Classes + +### System Activity (1000-1999) + +- `OCSF1001` - File Activity +- `OCSF1002` - Kernel Extension Activity +- `OCSF1003` - Kernel Activity +- `OCSF1004` - Memory Activity +- `OCSF1005` - Module Activity +- `OCSF1006` - Scheduled Job Activity +- `OCSF1007` - Process Activity + +### Findings (2000-2999) + +- `OCSF2001` - Security Finding +- `OCSF2002` - Vulnerability Finding +- `OCSF2003` - Compliance Finding +- `OCSF2004` - Detection Finding + +### Identity & Access Management (3000-3999) + +- `OCSF3001` - Account Change +- `OCSF3002` - Authentication +- `OCSF3003` - Authorize Session +- `OCSF3004` - Entity Management +- `OCSF3005` - User Access Management +- `OCSF3006` - Group Management + +### Network Activity (4000-4999) + +- `OCSF4001` - Network Activity +- `OCSF4002` - HTTP Activity +- `OCSF4003` - DNS Activity +- `OCSF4004` - DHCP Activity +- `OCSF4005` - RDP Activity +- `OCSF4006` - SMB Activity +- `OCSF4007` - SSH Activity +- `OCSF4008` - FTP Activity +- `OCSF4009` - Email Activity +- `OCSF4010` - Network File Activity +- `OCSF4011` - Email File Activity +- `OCSF4012` - Email URL Activity +- `OCSF4013` - NTP Activity +- `OCSF4014` - Tunnel Activity + +### Discovery (5000-5999) + +- `OCSF5001` - Device Inventory Info +- `OCSF5002` - Device Config State +- `OCSF5003` - User Inventory Info +- `OCSF5004` - Operating System Patch State + +### Application Activity (6000-6999) + +- `OCSF6001` - Web Resources Activity +- `OCSF6002` - Application Lifecycle +- `OCSF6003` - API Activity +- `OCSF6004` - Web Resource Access Activity +- `OCSF6005` - Datastore Activity +- `OCSF6006` - File Hosting Activity + +## Usage + +To enable OCSF normalization, specify the field format in your target configuration: + +```yaml +targets: + - name: my_target + type: awssecuritylake + properties: + field_format: "ocsf" +``` + +When using the VirtualMetric AWS Security Lake Pack, OCSF normalization is handled automatically through the `aws_lake` pipeline. The pack intelligently routes events to the appropriate OCSF schema class based on the source data type and vendor. + +## Integration with AWS Security Lake + +OCSF is the native schema format for AWS Security Lake. When sending data to AWS Security Lake, you must: + +1. Enable OCSF field formatting (handled automatically by the `aws_lake` pipeline) +2. Specify the appropriate OCSF schema identifier for each bucket +3. Ensure data is in Parquet format (handled automatically by the `awssecuritylake` target) + +## VirtualMetric AWS Security Lake Pack + +The VirtualMetric AWS Security Lake Pack provides comprehensive OCSF normalization for diverse security data sources: + +- **Syslog messages** (native, CEF, LEEF formats) are automatically converted to OCSF +- **Windows Security Events** are transformed from ECS through ASIM to OCSF +- **Firewall logs** from major vendors (Fortinet, Palo Alto Networks, Check Point, Cisco ASA, SonicWall, WatchGuard, Cisco Meraki) are normalized to OCSF +- **Windows DNS logs** are converted to OCSF DNS Activity format + +The pack handles multi-stage transformations, preserving vendor-specific context while ensuring OCSF compliance for AWS Security Lake ingestion. + +## Example Configuration + +```yaml +targets: + - name: security_lake + type: awssecuritylake + pipelines: + - aws_lake + properties: + key: "AKIAIOSFODNN7EXAMPLE" + secret: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + region: "us-east-1" + source: "virtualmetric" + account: "123456789012" + buckets: + - bucket: "aws-security-data-lake-network" + name: "network-{{.Timestamp}}.parquet" + schema: "OCSF4001" + - bucket: "aws-security-data-lake-auth" + name: "auth-{{.Timestamp}}.parquet" + schema: "OCSF3002" + - bucket: "aws-security-data-lake-dns" + name: "dns-{{.Timestamp}}.parquet" + schema: "OCSF4003" +``` + +In this example, the `aws_lake` pipeline automatically normalizes all events to OCSF format, and the target routes them to the appropriate buckets based on their schema class. \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/appendix/file-formats/avro.mdx b/versioned_docs/version-1.5.0/appendix/file-formats/avro.mdx new file mode 100644 index 00000000..45291187 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/file-formats/avro.mdx @@ -0,0 +1,49 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# Avro + +**Apache Avro** is a data serialization system that provides rich data structures and a compact, fast, binary data format. Originally developed within the Apache Hadoop ecosystem, Avro is designed for schema evolution and language-neutral data exchange. + +## Binary Layout + +|Section|Internal Name|Description|Possible Values / Format| +|--:|:-:|:--|:--| +|**File Header**|`magic`|4-byte magic number identifying Avro files|ASCII: `Obj` followed by `1` byte (hex: `4F 62 6A 01`)| +||`meta`|Metadata map storing key-value pairs (e.g., schema, codec)|Map of string keys to byte values (e.g., `"avro.schema"` → JSON schema string)| +||`sync`|16-byte random sync marker used between blocks| 16 random bytes (unique per file)| +|**Data Block**|`blockCount`|Number of records in the block|Long (variable-length zigzag encoding)| +||`blockSize`|Size in bytes of the serialized records (after compression, if any)|Long| +||`blockData`| Serialized records (optionally compressed)|Binary-encoded data per schema| +||`sync`| Sync marker repeated after each block|Same 16-byte value as in header| + +## Schema Types (Stored in Metadata) + +|Type|Internal Name|Description|Example / Format| +|--:|:-:|:--|:--| +|Primitive|`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`|Basic types|`"type": "string"| +|Record|`record`|Named collection of fields|`{ "type": "record", "name": "Person", "fields": [...] }`| +|Enum|`enum`|Named set of symbols| `{ "type": "enum", "name": "Suit", "symbols": ["SPADES", "HEARTS"] }`| +|Array|`array`|Ordered list of items|`{ "type": "array", "items": "string" }`| +|Map|`map`|Key-value pairs with string keys|`{ "type": "map", "values": "int" }`| +|Union|JSON array|Multiple possible types|`[ "null", "string" ]`| +|Fixed|`fixed`|Fixed-size byte array|`{ "type": "fixed", "name": "md5", "size": 16 }`| + +## Metadata Keys (in `meta`) + +|Key|Description|Example Value| +|--:|:--|:--| +|`avro.schema`|JSON-encoded schema|JSON string defining the schema| +|`avro.codec`|Compression codec used (optional)|`"null"` (default), `"deflate"`, `"snappy"`, `"bzip2"`, `"xz"`| + +## Compression Codecs + +|Codec|Description|Best For| +|--:|:--|:--| +|`null`|No compression applied|Small files or testing| +|`deflate`|Standard ZIP compression|General-purpose compression| +|`snappy`|Fast compression/decompression|Real-time streaming applications| +|`bzip2`|High compression ratio|Storage-constrained environments| +|`xz`|Modern compression algorithm|Maximum compression efficiency| diff --git a/versioned_docs/version-1.5.0/appendix/file-formats/parquet.mdx b/versioned_docs/version-1.5.0/appendix/file-formats/parquet.mdx new file mode 100644 index 00000000..d234c97f --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/file-formats/parquet.mdx @@ -0,0 +1,60 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# Parquet + +**Apache Parquet** is a column-oriented binary storage format optimized for analytical workloads. Originally developed within the Apache Hadoop ecosystem, Parquet provides efficient compression and encoding schemes for large-scale data processing. + +## Binary Layout + +|Section|Internal Name|Description|Possible Values / Format| +|--:|:-:|:--|:--| +|**File Header**|`magic`|4-byte magic number identifying Parquet files|ASCII: `PAR1` (hex: `50 41 52 31`)| +|**Row Group**|`row_group_metadata`|Metadata for each row group|Contains column chunk metadata and statistics| +||`column_chunk`|Data for each column in the row group|Compressed and encoded column data| +|**File Footer**|`metadata`|File-level metadata including schema and row groups|Thrift-encoded metadata structure| +||`metadata_length`|Length of metadata section|4-byte little-endian integer| +||`magic`|Footer magic number|ASCII: `PAR1` (hex: `50 41 52 31`)| + +## Column Storage Example + +**Row-based storage** (traditional): + +```plaintext +id,name,last_name,age +1,John,Buck,35 +2,Jane,Doe,27 +3,Joe,Dane,42 +``` + +**Column-based Storage** (Parquet): + +```plaintext +id: [1, 2, 3] +name: [John, Jane, Joe] +last_name: [Buck, Doe, Dane] +age: [35, 27, 42] +``` + +## Encoding Types + +|Encoding|Internal Name|Description|Use Case| +|--:|:-:|:--|:--| +|Plain|`PLAIN`|No encoding applied|Small datasets or unsorted data| +|Dictionary|`PLAIN_DICTIONARY`|Values replaced with dictionary indices|Repeated string values| +|Run Length|`RLE`|Consecutive identical values compressed|Sparse or repetitive data| +|Bit Packing|`BIT_PACKED`|Pack values using minimum required bits|Boolean or small integer ranges| +|Delta|`DELTA_BINARY_PACKED`|Store differences between consecutive values|Sorted numerical data| + +## Compression Codecs + +|Codec|Description|Best For| +|--:|:--|:--| +|`UNCOMPRESSED`|No compression applied|Testing or very small files| +|`SNAPPY`|Fast compression/decompression|General-purpose, balanced performance| +|`GZIP`|Higher compression ratio|Storage-constrained environments| +|`LZO`|Fast decompression|Read-heavy workloads| +|`BROTLI`|Modern compression algorithm|High compression ratio needs| +|`LZ4`|Extremely fast compression|Low-latency applications| diff --git a/versioned_docs/version-1.5.0/appendix/file-formats/pem.mdx b/versioned_docs/version-1.5.0/appendix/file-formats/pem.mdx new file mode 100644 index 00000000..1d527aac --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/file-formats/pem.mdx @@ -0,0 +1,52 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# PEM + +**Privacy Enhanced Mail (PEM)** is a Base64-encoded format for storing cryptographic keys, certificates, and other security-related data. Despite its name, PEM is widely used beyond email applications for various cryptographic purposes. + +## Structure Format + +|Component|Description|Example| +|--:|:--|:--| +|**Begin Marker**|Header identifying content type|`-----BEGIN CERTIFICATE-----`| +|**Headers**|Optional key-value metadata pairs|`Proc-Type: 4,ENCRYPTED`| +|**Encoded Data**|Base64-encoded binary content|`MIIHzTCCBbWgAwIBAgIQaBYE3/M08XHYCnNVmcFBcjANBgkqhkiG9w0BAQsFADBy...`| +|**End Marker**|Footer matching the begin marker|`-----END CERTIFICATE-----`| + +## Common PEM Types + +|Type|Begin/End Label|Description|Use Case| +|--:|:-:|:--|:--| +|Certificate|`CERTIFICATE`|X.509 public key certificate|SSL/TLS, code signing| +|Private Key|`PRIVATE KEY`|PKCS#8 private key|General-purpose private key storage| +|RSA Private Key|`RSA PRIVATE KEY`|PKCS#1 RSA private key|RSA-specific private keys| +|Public Key|`PUBLIC KEY`|X.509 SubjectPublicKeyInfo|Public key distribution| +|Certificate Request|`CERTIFICATE REQUEST`|PKCS#10 certificate signing request|Certificate authority requests| +|DH Parameters|`DH PARAMETERS`|Diffie-Hellman parameters|Key exchange configuration| +|EC Private Key|`EC PRIVATE KEY`|Elliptic Curve private key|EC cryptography| + +## Encrypted PEM Format + +|Field|Description|Example| +|--:|:--|:--| +|**Proc-Type**|Processing type and encryption flag|`Proc-Type: 4,ENCRYPTED`| +|**DEK-Info**|Encryption algorithm and IV|`DEK-Info: AES-256-CBC,A1B2C3D4E5F6...`| +|**Encrypted Data**|Base64-encoded encrypted content|`Encrypted binary data...`| + +## Example Structure + +```encoding +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,A1B2C3D4E5F67890A1B2C3D4E5F67890 + +MIIEpAIBAAKCAQEA2Z3QX0KZVE9I+sLlmEUKkYgJiEQSvfNF6JUVNBQdHPvs +kNkRFWGLQQEjLXPOCjGhvQZZLvbPjVZGKlnTJ1yJQvzjhvnP0zJhExFmKWz8 +... +-----END RSA PRIVATE KEY----- +``` + +PEM files are text-based, human-readable, and can contain multiple objects separated by blank lines. They're commonly used in web servers, email systems, and various security applications. diff --git a/versioned_docs/version-1.5.0/appendix/includes-index.mdx b/versioned_docs/version-1.5.0/appendix/includes-index.mdx new file mode 100644 index 00000000..d91cd6e3 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/includes-index.mdx @@ -0,0 +1,17 @@ +--- +id: includes-index +sidebar_label: Includes Index +title: Includes Index +--- + +import includes from '@site/includes.json'; + +# Includes Index + +This page lists all available include IDs (from `includes.json`) that you can embed with the `` component. + +| ID | File | +|----|------| +{Object.entries(includes).map(([id, file]) => `| ${id} | ${file} |`).join('\n')} + +> Maintenance: update `includes.json` to add or remove include content. No code changes required for lazy loading. diff --git a/versioned_docs/version-1.5.0/appendix/protocols/estreamer.mdx b/versioned_docs/version-1.5.0/appendix/protocols/estreamer.mdx new file mode 100644 index 00000000..a680ba1a --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/estreamer.mdx @@ -0,0 +1,28 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# eStreamer + +Cisco's event streaming protocol used by Firepower Management Center (FMC) to send events to export security event data, intrusion alerts, connection logs, and other network telemetry in real-time. It enables integration with external SIEMs and analytics platforms, providing deep visibility into network security events. + +|Field|Description| +|--:|:--| +|`eventType`|Type of event (e.g., intrusion, connection, malware)| +|`timestamp`|Time the event occurred| +|`sourceIP`|Source IP address| +|`destinationIP`|Destination IP address| +|`sourcePort`|Source port number| +|`destinationPort`|Destination port number| +|`protocol`|Transport protocol (TCP, UDP, etc.)| +|`userIdentity`|Associated user (if available)| +|`deviceUUID`|Unique identifier for the source device| +|`application`|Detected application (e.g., HTTP, SSH)| +|`threatScore`|Severity or risk rating of the event| +|`signatureID`|Identifier for the security rule triggered| +|`signatureName`|Description of the triggered security rule| +|`malwareSHA256`|Hash of detected malware (if applicable)| +|`fileName`|Name of the file involved in the event| + +eStreamer provides detailed security telemetry and integrates with SIEMs for real-time threat monitoring and forensic analysis. diff --git a/versioned_docs/version-1.5.0/appendix/protocols/ipfix.mdx b/versioned_docs/version-1.5.0/appendix/protocols/ipfix.mdx new file mode 100644 index 00000000..520c0095 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/ipfix.mdx @@ -0,0 +1,29 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# IPFIX + +The IP Flow Information Export is an IETF-standardized protocol for exporting flow-based traffic data from routers, switches, and other network devices. It is an evolution of NetFlow, offering greater flexibility by supporting custom fields and templates for diverse network monitoring, security, and analytics applications. IPFIX allows vendors to define and export additional data types beyond traditional NetFlow fields. + +|Field|Description| +|--:|:--| +|`sourceIPv4Address`|Source IP address (IPv4)| +|`destinationIPv4Address`|Destination IP address (IPv4)| +|`sourceIPv6Address`|Source IP address (IPv6)| +|`destinationIPv6Address`|Destination IP address (IPv6)| +|`sourceTransportPort`|Source port number| +|`destinationTransportPort`|Destination port number| +|`protocolIdentifier`|Transport protocol (TCP, UDP, etc.)| +|`packetTotalCount`|Number of packets in the flow| +|`octetTotalCount`|Total bytes transferred| +|`flowStartMilliseconds`|Start timestamp in milliseconds| +|`flowEndMilliseconds`|End timestamp in milliseconds| +|`tcpControlBits`|TCP control tcp_flags| +|`ipClassOfService`|Type of Service (QoS marking)| +|`bgpSourceAsNumber`|Source BGP Autonomous System (AS) number| +|`bgpDestinationAsNumber`|Destination BGP AS number| +|`flowEndReason`|Reason the flow ended (e.g. timeout, TCP FIN)| + +IPFIX extends NetFlow by supporting variable-length fields and user-defined templates, making it highly adaptable for modern network monitoring needs. diff --git a/versioned_docs/version-1.5.0/appendix/protocols/kafka.mdx b/versioned_docs/version-1.5.0/appendix/protocols/kafka.mdx new file mode 100644 index 00000000..b60b7cd8 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/kafka.mdx @@ -0,0 +1,47 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# Kafka + +## Binary Layout + +|Field|Internal Name|Description|Type / Format|Example / Values| +|--:|:--:|:--|:--|:--| +|**Size**|`length`|Total size of the request (excluding this field)|`int32`|e.g. `0x0000012C`| +|**API Key**|`api_key`|Identifies the type of request|`int16`|`0` = Produce, `1` = Fetch, etc.| +|**API Version**|`api_version`|Version of the API being used|`int16`| e.g. `7`| +|**Correlation ID**|`correlation_id`|Used to match requests to responses|`int32`|e.g. `12345`| +|**Client ID**|`client_id`|Optional identifier of the client|`string` (nullable)|e.g. `"my-client"`| +|**Request Body**|*(varies by API)*|The actual request payload|Structured binary|Depends on `api_key` and `api_version`| + +## Common API Keys + +|API Key|Name|Purpose| +|--:|:--|:--| +|`0`|Produce|Send messages to a topic| +|`1`|Fetch|Retrieve messages from a topic| +|`3`|Metadata|Get topic/partition info| +|`8`|Offset|Get earliest/latest offsets| +|`18`|ApiVersions|Discover supported API versions| +|`21`|SaslHandshake|SASL authentication handshake| +|`22`|SaslAuthenticate|SASL authentication| +|`42`|DescribeCluster|Get cluster metadata| + +## Primitive Types Used + +|Type|Description| +|:-:|---| +|`int8/16/32/64`|Signed integers (big-endian)| +|`string`|Length-prefixed UTF-8 string| +|`array`|Length-prefixed array of type `T`| +|`bytes`|Length-prefixed byte array| +|`varint`|Variable-length integer (zigzag encoding)| + +## Response Structure + +|Field|Description| +|--:|:--| +|`correlation_id`|Matches the request| +|`response_body`|Depends on the request type| diff --git a/versioned_docs/version-1.5.0/appendix/protocols/nats.mdx b/versioned_docs/version-1.5.0/appendix/protocols/nats.mdx new file mode 100644 index 00000000..97d97d61 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/nats.mdx @@ -0,0 +1,29 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# NATS + +NATS protocol for lightweight messaging: + +## Message Format + +|Field|Description|Example Value| +|--:|:--|:--| +|**Operation**|Command type|`PUB`, `SUB`, `MSG`, `PING`, `PONG`, `INFO`, `CONNECT`| +|**Subject**|Message topic/subject|`foo.bar`, `user.123`| +|**Reply-To**|Optional reply subject|`_INBOX.abc123`| +|**Payload Size**|Byte length of payload|`13`| +|**Payload**|Message data|`Hello, World!`| +|**Terminator**|CRLF sequence|`\r\n`| + +## Protocol Operations + +|Operation|Format|Purpose| +|:-:|:--|:--| +|`PUB`|`PUB [reply-to] \r\n\r\n`|Publish message| +|`SUB`|`SUB [queue] \r\n`|Subscribe to subject| +|`MSG`|`MSG [reply-to] \r\n\r\n`|Received message| +|`PING`|`PING\r\n`|Keep-alive request| +|`PONG`|`PONG\r\n`|Keep-alive response| diff --git a/versioned_docs/version-1.5.0/appendix/protocols/netflow.mdx b/versioned_docs/version-1.5.0/appendix/protocols/netflow.mdx new file mode 100644 index 00000000..8937c5cb --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/netflow.mdx @@ -0,0 +1,28 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# NetFlow + +A network protocol developed by Cisco for collecting, analyzing, and monitoring network traffic. It captures metadata about IP traffic flows, providing insights into bandwidth usage, security threats, and network performance. NetFlow records include key details such as source and destination IPs, ports, protocol types, and timestamps. + +|Field|Description| +|--:|:--| +|`SrcAddr`|Source IP address| +|`DstAddr`|Destination IP address| +|`SrcPort`|Source port number| +|`DstPort`|Destination port number| +|`Protocol`|Transport protocol (TCP, UDP, etc.)| +|`Packets`|Number of packets in the flow| +|`Bytes`|Total bytes transferred| +|`StartTime`|Timestamp of the first packet in the flow| +|`EndTime`|Timestamp of the last packet in the flow| +|`SrcAS`|Source Autonomous System (AS) number| +|`DstAS`|Destination Autonomous System (AS) number| +|`TCPFlags`|TCP control flags for the flow| +|`ToS`|Type of Service (QoS marking)| +|`NextHop`|IP address of the next hop router| +|`FlowDuration`|Duration of the flow in milliseconds| + +This is a general overview; actual fields may vary depending on the versions and implementations. diff --git a/versioned_docs/version-1.5.0/appendix/protocols/rabbitmq.mdx b/versioned_docs/version-1.5.0/appendix/protocols/rabbitmq.mdx new file mode 100644 index 00000000..49243ffe --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/rabbitmq.mdx @@ -0,0 +1,27 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# RabbitMQ + +Advanced Message Queuing Protocol (AMQP) 0-9-1 frame structure: + +## Binary Layout + +|Field|Internal Name|Description|Type / Format|Example / Values| +|--:|:--:|:--|:--|:--| +|**Type**|`frame_type`|Type of frame|`uint8`|`1` = Method, `2` = Header, `3` = Body, `8` = Heartbeat| +|**Channel**|`channel_id`|Channel number|`uint16`|e.g. `1`, `0` for connection-level| +|**Size**|`frame_size`|Payload size in bytes|`uint32`|e.g. `0x00000014`| +|**Payload**|`payload`|Frame-specific data|Binary|Depends on `frame_type`| +|**End**|`frame_end`|Frame terminator|`uint8`|Always `0xCE`| + +## Frame Types + +|Type|Name|Purpose| +|--:|:--|:--| +|`1`|Method|AMQP method calls (open, close, publish, etc.)| +|`2`|Header|Content header with properties| +|`3`|Body|Message content data| +|`8`|Heartbeat|Keep-alive signal| diff --git a/versioned_docs/version-1.5.0/appendix/protocols/redis.mdx b/versioned_docs/version-1.5.0/appendix/protocols/redis.mdx new file mode 100644 index 00000000..1de705d1 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/redis.mdx @@ -0,0 +1,26 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# Redis + +Redis Serialization Protocol (RESP) for client-server communication: + +## Message Format + +|Field|Description|Example Value| +|--:|:--|:--| +|**Type**|First byte indicates data type|`+` (Simple String), `-` (Error), `:` (Integer), `$` (Bulk String), `*` (Array)| +|**Data**|Payload following type indicator|`OK\r\n`, `3\r\n`, `$5\r\nhello\r\n`| +|**Terminator**|CRLF sequence marking end of element|`\r\n`| + +## Data Types + +|Type|Indicator|Format|Example| +|:-:|:-:|:--|:--| +|Simple String|`+`|`+\r\n`|`+OK\r\n`| +|Error|`-`|`-\r\n`|`-ERR unknown command\r\n`| +|Integer|`:`|`:\r\n`|`:1000\r\n`| +|Bulk String|`$`|`$\r\n\r\n`|`$5\r\nhello\r\n`| +|Array|`*`|`*\r\n`|`*2\r\n$3\r\nfoo\r\n$3\r\nbar\r\n`| diff --git a/versioned_docs/version-1.5.0/appendix/protocols/sflow.mdx b/versioned_docs/version-1.5.0/appendix/protocols/sflow.mdx new file mode 100644 index 00000000..952890c2 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/sflow.mdx @@ -0,0 +1,28 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# sFlow + +sFlow (Sampled Flow) is a network monitoring protocol designed for high-speed networks. Unlike NetFlow and IPFIX, which capture complete flow records, sFlow uses packet sampling to provide scalable and efficient traffic analysis. It operates by embedding monitoring agents in network devices that randomly sample packets and send them to a central collector for analysis. + +|Field|Description| +|--:|:--| +|`sampleSequenceNumber`|Unique identifier for the sampled packet| +|`sourceIP`|Source IP address| +|`destinationIP`|Destination IP address| +|`sourcePort`|Source port number| +|`destinationPort`|Destination port number| +|`protocol`|Transport protocol (TCP, UDP, etc.)| +|`sampledPacketSize`|Size of the sampled packet in bytes| +|`inputInterface`|Interface where the packet was received| +|`outputInterface`|Interface where the packet was forwarded| +|`vlanID`|VLAN identifier of the packet| +|`tcpFlags`|TCP control flags| +|`flowSampleType`|Type of sampling (e.g., packet, counter)| +|`samplingRate`|Ratio of sampled packets to total packets| +|`agentAddress`|IP address of the device performing sampling| +|`collectorAddress`|IP address of the sFlow collector| + +sFlow's lightweight sampling approach makes it ideal for real-time traffic monitoring in large-scale, high-speed networks. diff --git a/versioned_docs/version-1.5.0/appendix/protocols/smtp.mdx b/versioned_docs/version-1.5.0/appendix/protocols/smtp.mdx new file mode 100644 index 00000000..aa61a5d5 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/smtp.mdx @@ -0,0 +1,27 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# SMTP + +Simple Mail Transfer Protocol for email transmission: + +## Message Format + +|Field|Description|Example Value| +|--:|:--|:--| +|**Command**|SMTP command|`MAIL`, `RCPT`, `DATA`, `HELO`, `EHLO`, `QUIT`| +|**Parameters**|Command arguments|`FROM:`, `TO:`| +|**Response Code**|3-digit status code|`250`, `354`, `550`| +|**Response Text**|Human-readable message|`OK`, `Start mail input`, `Mailbox unavailable`| +|**Terminator**|CRLF sequence|`\r\n`| + +## Response Codes + +|Code|Category|Description| +|:-:|:-:|:--| +|`2xx`|Success|Command completed successfully| +|`3xx`|Intermediate|Command accepted, more info needed| +|`4xx`|Transient Error|Temporary failure, retry possible| +|`5xx`|Permanent Error|Command failed, do not retry| diff --git a/versioned_docs/version-1.5.0/appendix/protocols/syslog.mdx b/versioned_docs/version-1.5.0/appendix/protocols/syslog.mdx new file mode 100644 index 00000000..18f236a6 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/syslog.mdx @@ -0,0 +1,57 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# Syslog + +Standard protocol for system logging: + +## Message Format + +[**RFC 3164**](https://www.rfc-editor.org/rfc/rfc3164.html): + +|Field| Description|Example Value| +|--:|:--|:--| +|`PRI`|Priority value = Facility * 8 + Severity, enclosed in angle brackets|`<34>`| +|`TIMESTAMP`|Date and time in "Mmm dd hh:mm:ss" format|Oct 22 12:34:56| +|`HOSTNAME`|Hostname or IP address of the sender|``| +|`TAG`|Application name and optional `PID`| `appname[1234]`| +|`MESSAGE`|Free-form message content|`This is a log message.`| + +[**RFC 5424**](https://www.rfc-editor.org/rfc/rfc3164): + +|Field|Description|Example Value| +|--:|:--|:--| +|`PRI`|Priority value = Facility * 8 + Severity, enclosed in angle brackets|`<34>`| +|`VERSION`|Syslog protocol version (always 1 for RFC 5424)|`1`| +|`TIMESTAMP`|ISO 8601 timestamp with optional timezone|`2025-01-03T14:07:15.003Z`| +|`HOSTNAME`|FQDN or IP address of the sender|`host.example.com`| +|`APP-NAME`|Application name|`appname`| +|`PROCID`|Process ID|`1234`| +|`MSGID`|Identifier for the type of message|`ID47`| +|`STRUCTURED-DATA`|Optional structured key-value pairs|`[exampleSDID@32473 iut="3"]`| +|`MESSAGE`|Free-form message content|This is a structured log message.| + +## Facility Values + +|Code|Facility| +|:-:|:--| +|`0`|kernel messages| +|`1`|user-level messages| +|`2`|mail system| +|...|...| +|`16`–`23`|`local0` to `local7`| + +## Severity Levels + +|Code|Level| +|:-:|:--| +|`0`|Emergency| +|`1`|Alert| +|`2`|Critical| +|`3`|Error| +|`4`|Warning| +|`5`|Notice| +|`6`|Informational| +|`7`|Debug| diff --git a/versioned_docs/version-1.5.0/appendix/protocols/tftp.mdx b/versioned_docs/version-1.5.0/appendix/protocols/tftp.mdx new file mode 100644 index 00000000..d51c23c2 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/protocols/tftp.mdx @@ -0,0 +1,30 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# TFTP + +Trivial File Transfer Protocol for simple file transfers: + +## Binary Layout + +|Field|Internal Name|Description|Type / Format|Example / Values| +|--:|:--:|:--|:--|:--| +|**Opcode**|`opcode`|Operation type|`uint16`|`1` = RRQ, `2` = WRQ, `3` = DATA, `4` = ACK, `5` = ERROR| +|**Filename**|`filename`|File path (RRQ/WRQ only)|Null-terminated string|`config.txt\0`| +|**Mode**|`mode`|Transfer mode (RRQ/WRQ only)|Null-terminated string|`octet\0`, `netascii\0`| +|**Block Number**|`block_num`|Data block sequence (DATA/ACK)|`uint16`|e.g. `1`, `2`, `3`| +|**Data**|`data`|File content (DATA only)|Binary|Up to 512 bytes| +|**Error Code**|`error_code`|Error type (ERROR only)|`uint16`|`0` = Not defined, `1` = File not found| +|**Error Message**|`error_msg`|Error description (ERROR only)|Null-terminated string|`File not found\0`| + +## Opcodes + +|Opcode|Name|Purpose| +|--:|:--|:--| +|`1`|RRQ|Read Request| +|`2`|WRQ|Write Request| +|`3`|DATA|Data packet| +|`4`|ACK|Acknowledgment| +|`5`|ERROR|Error packet| diff --git a/versioned_docs/version-1.5.0/appendix/windows-event-ids.mdx b/versioned_docs/version-1.5.0/appendix/windows-event-ids.mdx new file mode 100644 index 00000000..f0473b85 --- /dev/null +++ b/versioned_docs/version-1.5.0/appendix/windows-event-ids.mdx @@ -0,0 +1,43 @@ +--- +pagination_prev: null +pagination_next: null +--- + +# Windows Event IDs + +Predefined **Windows** log channel configurations: + +- `windows_event_log_collector` is the base definition name +- `windows_security_log_collector` is for security + +There are three different security definitions that can be used under the `\Director\package\definitions\module\windows\host\event` directory. + +1. `windows_security_log_collector_all`: This is to collect all "Security" related events, "Microsoft-Windows-AppLocker/EXE and DLL", and "Microsoft-Windows-AppLocker/MSI and Script". + +2. `windows_security_log_collector_common_example`: This is to collect specific events from "Security", "Microsoft-Windows-AppLocker/EXE and DLL", and "Microsoft-Windows-AppLocker/MSI and Script" channels. + +- Security related event ids: + + [`1`, `299`, `300`, `324`, `340`, `403`, `404`, `410`, `411`, `412`, `413`, `431`, `500`, `501`, `1100`, `1102`, `1107`, `1108`, `4608`, `4610`, `4611`, `4614`, `4622`, `4624`, `4625`, `4634`, `4647`, `4648`, `4649`, `4657`, `4661`, `4662`, `4663`, `4665`, `4666`, `4667`, `4670`, `4672`, `4673`, `4674`, `4675`, `4688`, `4689`, `4697`, `4700`, `4702`, `4704`, `4705`, `4716`, `4717`, `4718`, `4719`, `4720`, `4722`, `4723`, `4724`, `4725`, `4726`, `4727`, `4728`, `4729`, `4732`, `4733`, `4735`, `4737`, `4738`, `4739`, `4740`, `4742`, `4744`, `4745`, `4746`, `4750`, `4751`, `4752`, `4754`, `4755`, `4756`, `4757`, `4760`, `4761`, `4762`, `4764`, `4767`, `4768`, `4771`, `4774`, `4778`, `4779`, `4781`, `4793`, `4797`, `4798`, `4799`, `4800`, `4801`, `4802`, `4803`, `4825`, `4826`, `4870`, `4886`, `4887`, `4888`, `4893`, `4898`, `4902`, `4904`, `4905`, `4907`, `4931`, `4932`, `4933`, `4946`, `4948`, `4956`, `4985`, `5024`, `5033`, `5059`, `5136`, `5137`, `5140`, `5145`, `5632`, `6144`, `6145`, `6272`, `6273`, `6278`, `6416`, `6423`, `6424`, `26401`, `30004`] + +- Microsoft-Windows-AppLocker/EXE and DLL-related event ids: + + [`8001`, `8002`, `8003`, `8004`] + +- Microsoft-Windows-AppLocker/MSI and Script-related event ids: + + [`8005`, `8006`, `8007`, `8222`] + +3. `windows_security_log_collector_minimal_example` + + - Security related event ids: + + [`1102`, `4624`, `4625`, `4657`, `4663`, `4688`, `4700`, `4702`, `4719`, `4720`, `4722`, `4723`, `4724`, `4727`, `4728`, `4732`, `4735`, `4737`, `4739`, `4740`, `4754`, `4755`, `4756`, `4767`, `4799`, `4825`, `4946`, `4948`, `4956`, `5024`, `5033`] + +- Microsoft-Windows-AppLocker/EXE and DLL-related event ids: + + [`8001`, `8002`, `8003`, `8004`] + +- Microsoft-Windows-AppLocker/MSI and Script-related event ids: + + [`8005`, `8006`, `8007`, `8222`] diff --git a/versioned_docs/version-1.5.0/configuration/devices/_app-protocols.mdx b/versioned_docs/version-1.5.0/configuration/devices/_app-protocols.mdx new file mode 100644 index 00000000..ba6c5c80 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/_app-protocols.mdx @@ -0,0 +1,49 @@ +### Application Protocols + +The collector supports application-based identification, TCP port mapping, and UDP port mapping. The definition files for these respectively are placed in three locations under ``: + +- `/user/definitions/app-definitions-{device-id}.csv` (device-specific) +- `/user/definitions/app-definitions.csv` (user-defined) +- `/package/definitions/app-definitions.csv` (system defaults) +- `/user/definitions/tcp-definitions-{device-id}.csv` (device-specific) +- `/user/definitions/tcp-definitions.csv` (user-defined) +- `/package/definitions/tcp-definitions.csv` (system defaults) +- `/user/definitions/udp-definitions-{device-id}.csv` (device-specific) +- `/user/definitions/udp-definitions.csv` (user-defined) +- `/package/definitions/udp-definitions.csv` (system defaults):::warning + +Definition files must be in CSV format with exactly two columns per row. +::: + +The contents of these files are: + +`app-definitions.csv`: + +```csv +SSH,Secure Shell +RDP,Remote Desktop +HTTP,Web Browsing +HTTPS,Secure Web +``` + +`tcp-definitions.csv`: + +```csv +22,SSH +3389,RDP +80,HTTP +443,HTTPS +``` + +`udp-definitions.csv`: + +```csv +53,DNS +67,DHCP +123,NTP +161,SNMP +``` + +:::note +The collector will fall back on the system defaults if the custom definition files are not found. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/azure-blob-storage.mdx b/versioned_docs/version-1.5.0/configuration/devices/azure-blob-storage.mdx new file mode 100644 index 00000000..faa39938 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/azure-blob-storage.mdx @@ -0,0 +1,313 @@ +--- +description: Azure Blob Storage device for reading and processing files from Azure storage containers +--- + +# Azure Blob Storage + +## Synopsis + +Azure Blob Storage device reads and processes files from Azure storage containers. This pull-type device connects to Azure Blob Storage containers to retrieve files in various formats (JSON, JSONL, Parquet) and processes them through DataStream pipelines. The device supports both connection string and service principal authentication methods. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: azblob + tags: + pipelines: + status: + properties: + connection_string: + container_name: + tenant_id: + client_id: + client_secret: + account: + path_prefix: + file_format: + batch_size: + poll_interval: + delete_after_processing: + max_concurrent_files: +``` + +## Configuration + +|Field|Type|Required|Default|Description| +|---|---|---|---|---| +|`id`|numeric|Y|-|Unique numeric identifier| +|`name`|string|Y|-|Device name| +|`description`|string|N|-|Optional description of the device's purpose| +|`type`|string|Y|-|Device type identifier (must be `azblob`)| +|`tags`|string[]|N|-|Array of labels for categorization| +|`pipelines`|pipeline[]|N|-|Array of preprocessing pipeline references| +|`status`|boolean|N|true|Boolean flag to enable/disable the device| +|`connection_string`|string|Y*|-|Azure storage account connection string for authentication| +|`container_name`|string|Y|-|Name of the Azure Blob Storage container to read from| +|`tenant_id`|string|Y*|-|Azure tenant ID for service principal authentication| +|`client_id`|string|Y*|-|Azure client ID for service principal authentication| +|`client_secret`|string|Y*|-|Azure client secret for service principal authentication| +|`account`|string|Y*|-|Azure storage account name for service principal authentication| +|`path_prefix`|string|N|""|Path prefix filter to limit which files are processed| +|`file_format`|string|N|json|File format to expect: `json`, `jsonl`, or `parquet`| +|`batch_size`|number|N|1000|Number of records to process in each batch| +|`poll_interval`|number|N|60|Interval in seconds between container polling cycles| +|`delete_after_processing`|boolean|N|false|Whether to delete files after successful processing| +|`max_concurrent_files`|number|N|5|Maximum number of files to process concurrently| + +\* = Conditionally required (see authentication methods below) + +:::note Authentication Methods +Choose either connection string OR service principal authentication: +- **Connection String**: Requires `connection_string` and `container_name` +- **Service Principal**: Requires `tenant_id`, `client_id`, `client_secret`, `account`, and `container_name` +::: + +:::caution Secrets management +Avoid hardcoding `connection_string` and `client_secret` in plain text. Prefer referencing encrypted secrets (e.g., environment variables, vault integrations, or secret files) supported by DataStream. Rotate credentials regularly and restrict scope/permissions to least privilege. +::: + +## Details + +The Azure Blob Storage device operates as a pull-type data source that periodically scans Azure storage containers for new files. The device supports multiple file formats and provides flexible authentication options for enterprise environments. + +**File Format Processing**: The device automatically detects and processes files based on the configured format. JSON files are parsed as individual objects, JSONL files process each line as a separate record, and Parquet files are read using columnar processing for efficient large-data handling. + +**Polling Behavior**: The device maintains state to track processed files and only processes new or modified files during each polling cycle. The polling interval can be adjusted based on data arrival patterns and processing requirements. + +**Concurrent Processing**: Multiple files can be processed simultaneously to improve throughput. The concurrency level is configurable and should be tuned based on available system resources and storage account limits. + +**Error Handling**: Files that fail processing are marked and can be retried on subsequent polling cycles. The device provides detailed logging for troubleshooting connection and processing issues. + +## Examples + +### Basic Connection String Authentication + + + + Configuring Azure Blob Storage device with connection string authentication to process JSON files... + + + ```yaml + - id: 1 + name: blob-json-processor + type: azblob + properties: + connection_string: "DefaultEndpointsProtocol=https;AccountName=myaccount;AccountKey=key123;EndpointSuffix=core.windows.net" + container_name: "logs" + file_format: "json" + poll_interval: 300 + ``` + + + Device polls the 'logs' container every 5 minutes for JSON files and processes each file as individual records... + + + ```json + { + "timestamp": "2024-01-15T10:30:00Z", + "level": "INFO", + "message": "Application started", + "source_file": "app-logs-2024-01-15.json", + "container": "logs" + } + ``` + + + +### Service Principal Authentication + + + + Using service principal authentication for enterprise security compliance... + + + ```yaml + - id: 2 + name: enterprise-blob-reader + type: azblob + properties: + tenant_id: "12345678-1234-1234-1234-123456789abc" + client_id: "87654321-4321-4321-4321-cba987654321" + client_secret: "your-client-secret" + account: "enterprisestorage" + container_name: "security-logs" + file_format: "jsonl" + path_prefix: "prod/" + ``` + + + Service principal provides enterprise-grade authentication with path filtering for production logs only... + + + ```json + { + "event_type": "authentication", + "user_id": "user123", + "timestamp": "2024-01-15T10:30:00Z", + "source_file": "prod/auth-events-2024-01-15.jsonl" + } + ``` + + + +### High-Volume Parquet Processing + + + + Processing large Parquet files with optimized settings for high-volume data... + + + ```yaml + - id: 3 + name: parquet-bulk-processor + type: azblob + properties: + connection_string: "DefaultEndpointsProtocol=https;AccountName=datawarehouse;AccountKey=key456" + container_name: "analytics" + file_format: "parquet" + batch_size: 10000 + max_concurrent_files: 3 + poll_interval: 1800 + delete_after_processing: true + ``` + + + Optimized for processing large Parquet files with batching and automatic cleanup after successful processing... + + + ```json + { + "record_id": "rec_001", + "metric_value": 42.5, + "timestamp": "2024-01-15T10:30:00Z", + "batch_info": { + "file": "analytics/metrics-2024-01-15.parquet", + "batch_size": 10000 + } + } + ``` + + + +### Pipeline Processing + + + + Integrating blob storage device with preprocessing pipeline for data transformation... + + + ```yaml + - id: 4 + name: blob-with-pipeline + type: azblob + tags: + - "azure_storage" + - "raw_data" + pipelines: + - timestamp-normalization + - field-enrichment + properties: + connection_string: "DefaultEndpointsProtocol=https;AccountName=rawdata;AccountKey=key789" + container_name: "raw-logs" + file_format: "json" + ``` + + + Raw blob data is processed through pipelines for timestamp normalization and field enrichment before routing to targets... + + + ```json + { + "timestamp": "2024-01-15T10:30:00.000Z", + "level": "INFO", + "message": "User login successful", + "enriched_data": { + "normalized_timestamp": "2024-01-15T10:30:00Z", + "severity_level": 6, + "source_container": "raw-logs" + } + } + ``` + + + +### Path-Based File Organization + + + + Using path prefixes to organize and process files from specific subdirectories... + + + ```yaml + - id: 5 + name: organized-blob-reader + type: azblob + properties: + connection_string: "DefaultEndpointsProtocol=https;AccountName=organized;AccountKey=keyABC" + container_name: "structured-data" + path_prefix: "2024/01/security/" + file_format: "jsonl" + poll_interval: 600 + ``` + + + Device only processes files from the specific path structure, enabling organized data ingestion patterns... + + + ```json + { + "security_event": "failed_login", + "user": "user456", + "timestamp": "2024-01-15T10:30:00Z", + "file_path": "2024/01/security/failed-logins-15.jsonl", + "path_metadata": { + "year": "2024", + "month": "01", + "category": "security" + } + } + ``` + + + +### Error Recovery Configuration + + + + Configuring robust error handling with retry logic and processing state management... + + + ```yaml + - id: 6 + name: resilient-blob-reader + type: azblob + properties: + connection_string: "DefaultEndpointsProtocol=https;AccountName=resilient;AccountKey=keyXYZ" + container_name: "critical-data" + file_format: "json" + poll_interval: 120 + max_concurrent_files: 2 + delete_after_processing: false + ``` + + + Conservative settings preserve files after processing and limit concurrency for stable processing of critical data... + + + ```json + { + "critical_event": "system_alert", + "severity": "high", + "timestamp": "2024-01-15T10:30:00Z", + "processing_info": { + "file_preserved": true, + "retry_count": 0, + "processing_status": "success" + } + } + ``` + + \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/configuration/devices/azure-monitor.mdx b/versioned_docs/version-1.5.0/configuration/devices/azure-monitor.mdx new file mode 100644 index 00000000..01bce86c --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/azure-monitor.mdx @@ -0,0 +1,153 @@ +# Azure Monitor + +Microsoft AzurePull + +## Synopsis + +Creates an Azure Monitor client that collects logs from specified Log Analytics workspaces. Supports multiple log streams with configurable batch sizes and collection frequencies. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: azmon + tags: + pipelines: + status: + properties: + tenant_id: + client_id: + client_secret: + workspace_id: + stream: + batch_size: +``` + +## Configuration + +The following fields are used to define the device. + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `azmon`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Authentication + +|Field|Required|Default|Description| +|---|---|---|---| +|`tenant_id`|Y||Azure tenant ID| +|`client_id`|Y||Azure client ID| +|`client_secret`|Y||Azure client secret| +|`workspace_id`|Y||Log Analytics workspace ID| + +### Events + +|Field|Required|Default|Description| +|---|---|---|---| +|`stream`|Y||Array of Log Analytics queries to collect| +|`batch_size`|N|`1000`|Number of log entries to collect per batch| +|`event_frequency`|N|`300`|Collection frequency in seconds| + +## Examples + +The following are commonly used configuration types. + +### Basic + +The minimum required configuration: + + + + Creating a basic collector... + + + ```yaml + devices: + - id: 1 + name: basic_azmon + type: azmon + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + workspace_id: "22222222-2222-2222-2222-222222222222" + stream: + - "SecurityEvent" + ``` + + + +### Multiple Streams + +The collecter can consume multiple log types with pre-processing: + + + + Specifying multiple log streams... + + + ```yaml + devices: + - id: 2 + name: multi_stream_azmon + type: azmon + pipelines: + - security_events + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + workspace_id: "22222222-2222-2222-2222-222222222222" + stream: + - "SecurityEvent" + - "Syslog" + - "AzureActivity" + batch_size: 2000 + ``` + + + +:::note +The `security_events` pipeline can be used to process and enrich security-related log entries before ingestion. +::: + +### High Volumes + +Performance can be enhanced for high log volumes: + + + + Optimizing for high volumes... + + + ```yaml + devices: + - id: 3 + name: high_volume_azmon + type: azmon + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + workspace_id: "22222222-2222-2222-2222-222222222222" + stream: + - "SecurityEvent | where Level == 'Critical' or Level == 'Error'" + - "Syslog | where Facility == 'auth'" + batch_size: 5000 + ``` + + + +:::warning +Large batch sizes may impact **memory usage** and **processing time**. Monitor system resources and adjust accordingly. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/estreamer.mdx b/versioned_docs/version-1.5.0/configuration/devices/estreamer.mdx new file mode 100644 index 00000000..c6f5a9d3 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/estreamer.mdx @@ -0,0 +1,243 @@ +# eStreamer + +Pull + +## Synopsis + +Creates an eStreamer client that connects to an eStreamer server to receive various security events. Supports TLS encryption, event filtering, and batch processing of events. + +For details, see Appendix. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: estreamer + tags: + pipelines: + status: + properties: + address: + port: + tls: + status: + cert_name: + key_name: + non_secure: + batch_size: + flush_interval: + inputs: + - id: + status: +``` + +## Key Features + +- Real-time event streaming with TLS encryption +- Support for multiple event types and metadata +- Configurable batch processing and performance tuning +- Automatic reconnection handling +- Event filtering capabilities + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `estreamer`| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Server address to connect to| +|`port`|N|`8302`|Server port| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|Y|`true`|Enable TLS encryption (always required)| +|`tls.cert_name`|Y||Client certificate file path| +|`tls.key_name`|Y||Client private key file path| +|`tls.non_secure`|N|`false`|Allow less secure TLS versions| + +:::note +The client certificate and private key files must be placed in the service root directory. +::: + +## Advanced Configuration + +To enhance performance and achieve better event handling, the following settings are used. + +### Events + +The following settings are used for **event processing**: + +|Field|Required|Default|Description| +|---|---|---|---| +|`batch_size`|N|`1000`|Number of events to batch before processing| +|`flush_interval`|N|`60`|Event flush interval in seconds| + +The **event types** are specified with: + +|Field|Required|Default|Description| +|---|---|---|---| +|`inputs[].id`|N|-|Event type ID to process| +|`inputs[].status`|N|`true`|Enable/disable specific event type. Available options: `102` (Connection), `103` (File), `104` (Malware), `106` (Intrusion)| + +## Event Types + +eStreamer supports four main types of security events: + +1. **Connection Events (ID: 102)** + + - Network connection tracking + - Protocol information + - Source and destination details + - Connection statistics + - Available block types: 163, 160, 157, 155, 154, 152, 137 + +2. **File Events (ID: 103)** + + - File transfers detection + - File type identification + - File SHA hashes + - Available block types: 56, 46, 43, 38, 32 + +3. **Malware Events (ID: 104)** + + - Malware detection results + - File disposition + - Threat scores + - Available block types: 62, 47, 44, 35, 33, 24, 16 + +4. **Intrusion Events (ID: 106)** + + - IPS/IDS alerts + - Rule-based detections + - Threat classifications + - Available block types: 60, 45, 42, 41, 34, 25 + +## Examples + +The following are commonly used configuration types. + +### Basic + +For a basic client, enable TLS encryption and use default event types and settings. + + + + Creating a simple eStreamer client... + + + ```yaml + devices: + - id: 1 + name: basic_estreamer + type: estreamer + properties: + address: "192.168.1.100" + port: 8302 + tls: + cert_name: "client.crt" + key_name: "client.key" + ``` + + + +### High-Volume + +To enhance performance, make sure that the batch size is larger, the flush interval is smaller, and multiple workers are used. + + + + Optimizing for high event volumes... + + + ```yaml + devices: + - id: 2 + name: performant_estreamer + type: estreamer + properties: + address: "192.168.1.100" + port: 8302 + tls: + cert_name: "client.crt" + key_name: "client.key" + batch_size: 5000 + flush_interval: 30 + reuse: true + workers: 4 + ``` + + + +### Events + +For selective event processing, enable connection and intrusion events, and disable file and malware events. + + + + Collecting specific event types... + + + ```yaml + devices: + - id: 3 + name: filtered_estreamer + type: estreamer + properties: + address: "192.168.1.100" + port: 8302 + tls: + cert_name: "client.crt" + key_name: "client.key" + inputs: + - id: 102 + status: true + - id: 106 + status: true + ``` + + + +### Legacy Systems + +Compatibility with legacy systems allows use of older TLS versions, standard event processing, and default security events. + + + + Connecting to older eStreamer servers... + + + ```yaml + devices: + - id: 4 + name: legacy_estreamer + type: estreamer + properties: + address: "192.168.1.100" + port: 8302 + tls: + cert_name: "client.crt" + key_name: "client.key" + non_secure: true + ``` + + + +:::warning +For improved security, unless you are connecting to legacy systems that require older TLS versions, set `tls.non_secure: false`. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/event-hubs.mdx b/versioned_docs/version-1.5.0/configuration/devices/event-hubs.mdx new file mode 100644 index 00000000..81264f00 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/event-hubs.mdx @@ -0,0 +1,305 @@ +# Event Hubs + +Pull + +## Synopsis + +Creates a collector that connects to Azure Event Hubs and consumes messages from specified event hubs. Supports multiple authentication methods, TLS encryption, and multiple workers for high-throughput scenarios. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: eventhubs + tags: + pipelines: + status: + properties: + client_connection_string: + tenant_id: + client_id: + client_secret: + namespace: + event_hub: + consumer_group: + container_connection_string: + container_url: + container_name: + reuse: + workers: + tls: + status: + cert_name: + key_name: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `eventhubs`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +Event Hubs supports two authentication methods: + +**Method 1: Connection String Authentication** +|Field|Required|Default|Description| +|---|---|---|---| +|`client_connection_string`|Y*||Event Hubs connection string (required if not using method 2)| +|`event_hub`|Y||Event hub name to consume from| + +**Method 2: Service Principal Authentication** +|Field|Required|Default|Description| +|---|---|---|---| +|`tenant_id`|Y*||Azure tenant ID (required if not using connection string)| +|`client_id`|Y*||Azure service principal client ID| +|`client_secret`|Y*||Azure service principal client secret| +|`namespace`|Y*||Event Hubs namespace (required if not using connection string)| +|`event_hub`|Y||Event hub name to consume from| + +### Consumer Configuration + +|Field|Required|Default|Description| +|---|---|---|---| +|`consumer_group`|N|`"$Default"`|Consumer group name| + +### Storage Configuration + +EventHubs requires checkpoint storage. Choose one method: + +**Method 1: Storage Account Connection String** +|Field|Required|Default|Description| +|---|---|---|---| +|`container_connection_string`|Y*||Azure Storage connection string| +|`container_name`|Y*||Blob container name for checkpoints| + +\* = Conditionally required (see authentication methods above) + +**Method 2: Storage Account URL** +|Field|Required|Default|Description| +|---|---|---|---| +|`container_url`|Y*||Azure Storage container URL| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|N*||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|N*||TLS private key file path (required if TLS enabled)| + +\* = Conditionally required (only when `tls.status: true`) + +:::note +TLS certificate and key files must be placed in the service root directory. +::: + +## Advanced Configuration + +To enhance performance and achieve better message handling, the following settings are used. + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable multi-worker mode| +|`workers`|N|`4`|Number of worker processes when reuse enabled| + +## Key Features + +### Multiple Workers + +When `reuse` is enabled, the collector uses multiple workers. Each worker maintains its own Event Hubs consumer and processes messages independently, automatically balancing message volumes. The worker count is capped at the number of available CPU cores. + +### Messages + +The collector supports automatic checkpoint management, consumer group load balancing, multiple Event Hub subscriptions, TLS-encrypted connections, both connection string and service principal authentication, and custom message-processing pipelines. + +## Examples + +The following are commonly used configuration types. + +### Basic with Connection String + +The minimum required configuration using an Event Hubs connection string: + + + + Creating a simple EventHubs consumer with connection string... + + + ```yaml + devices: + - id: 1 + name: basic_eventhubs + type: eventhubs + properties: + client_connection_string: "Endpoint=sb://mynamespace.servicebus.windows.net/;SharedAccessKeyName=mykey;SharedAccessKey=myvalue" + event_hub: "logs" + container_connection_string: "DefaultEndpointsProtocol=https;AccountName=mystorage;AccountKey=mykey" + container_name: "checkpoints" + ``` + + + +### Service Principal Authentication + +Using Azure service principal for authentication: + + + + Connecting with service principal authentication... + + + ```yaml + devices: + - id: 2 + name: sp_eventhubs + type: eventhubs + properties: + tenant_id: "12345678-1234-1234-1234-123456789012" + client_id: "87654321-4321-4321-4321-210987654321" + client_secret: "${AZURE_CLIENT_SECRET}" + namespace: "mynamespace" + event_hub: "security-logs" + consumer_group: "datastream-group" + container_url: "https://mystorage.blob.core.windows.net/checkpoints" + ``` + + + +### High-Volume Processing + +Performance optimization for high message volumes: + + + + Optimizing for throughput with multiple workers... + + + ```yaml + devices: + - id: 3 + name: performant_eventhubs + type: eventhubs + properties: + client_connection_string: "${EVENTHUBS_CONNECTION_STRING}" + event_hub: "high-volume-logs" + consumer_group: "processing-group" + container_connection_string: "${STORAGE_CONNECTION_STRING}" + container_name: "checkpoints" + reuse: true + workers: 8 + ``` + + + +:::note +When `reuse` is enabled, the actual worker count will be capped at the number of available CPU cores. +::: + +### Secure Connection + +Using TLS encryption for enhanced security: + + + + Secure EventHubs connection with TLS... + + + ```yaml + devices: + - id: 4 + name: secure_eventhubs + type: eventhubs + properties: + tenant_id: "${AZURE_TENANT_ID}" + client_id: "${AZURE_CLIENT_ID}" + client_secret: "${AZURE_CLIENT_SECRET}" + namespace: "secure-namespace" + event_hub: "secure-logs" + consumer_group: "secure-group" + container_url: "${STORAGE_CONTAINER_URL}" + tls: + status: true + cert_name: "eventhubs.crt" + key_name: "eventhubs.key" + ``` + + + +### Pipeline Processing + +Messages can be pre-processed using pipelines: + + + + Applying custom processing to EventHubs messages... + + + ```yaml + devices: + - id: 5 + name: pipeline_eventhubs + type: eventhubs + pipelines: + - json_parser + - field_extractor + - normalize_timestamps + properties: + client_connection_string: "${EVENTHUBS_CONNECTION_STRING}" + event_hub: "application-logs" + consumer_group: "processing-group" + container_connection_string: "${STORAGE_CONNECTION_STRING}" + container_name: "checkpoints" + ``` + + + +:::note +Pipelines are processed sequentially and can modify or drop messages before ingestion. +::: + +### Multiple Consumer Groups + +For load distribution across multiple DataStream instances: + + + + Configuring consumer groups for load distribution... + + + ```yaml + devices: + - id: 6 + name: distributed_eventhubs + type: eventhubs + properties: + client_connection_string: "${EVENTHUBS_CONNECTION_STRING}" + event_hub: "distributed-logs" + consumer_group: "instance-1" + container_connection_string: "${STORAGE_CONNECTION_STRING}" + container_name: "checkpoints-instance1" + reuse: true + workers: 4 + ``` + + + +:::warning +Each consumer group should use a separate checkpoint container to avoid conflicts. +::: + diff --git a/versioned_docs/version-1.5.0/configuration/devices/http.mdx b/versioned_docs/version-1.5.0/configuration/devices/http.mdx new file mode 100644 index 00000000..b734de64 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/http.mdx @@ -0,0 +1,273 @@ +# HTTP + +Push + +## Synopsis + +Creates an HTTP server that accepts messages via HTTP POST requests. Supports multiple authentication methods, TLS encryption, and customizable response handling. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: http + tags: + pipelines: + status: + properties: + address: + port: + url: + protocol: + content_type: + reuse: + workers: + response: + code: + body: + content_type: + tls: + status: + cert_name: + key_name: + authentication: + type: + username: + password: + header: + key: + value: + hmac: + type: + header: + key: + prefix: + queue: + interval: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `http`| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`protocol`|N|`"tcp"`|Transport protocol (must be tcp)| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|Y||Listen port| +|`url`|N|`"/"`|URL path to listen on| +|`content_type`|N|`"application/json"`|Expected content type of incoming requests| + +### Response + +|Field|Required|Default|Description| +|---|---|---|---| +|`response.code`|N|`200`|HTTP response status code| +|`response.body`|N|`{"message":"success"}`|Response body content| +|`response.content_type`|N|`"application/json"`|Response content type| + +### Authentication + +|Field|Required|Default|Description| +|---|---|---|---| +|`authentication.type`|N|`"none"`|Authentication type (`basic`, `header`, or `hmac`)| +|`username`|Y||Username for basic auth (required if type is `basic`)| +|`password`|Y||Password for basic auth (required if type is `basic`)| +|`header.key`|Y||Header name for header auth (required if type is `header`)| +|`header.value`|Y||Header value for header auth (required if type is `header`)| +|`hmac.type`|Y||HMAC algorithm (`sha1`, `sha256`, or `sha512`)| +|`hmac.header`|Y||Header name for HMAC signature| +|`hmac.key`|Y||Secret key for HMAC calculation| +|`hmac.prefix`|N|-|Optional prefix to strip from HMAC header value| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +TLS certificate and key files must be placed in the service root directory. +::: + +## Advanced Configuration + +To enhance performance and achieve better message handling, the following settings are used. + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable socket address reuse| +|`workers`|N|``|Number of worker processes when reuse is enabled| + +### Messages + +|Field|Required|Default|Description| +|---|---|---|---| +|`queue.interval`|N|`1`|Queue processing interval in seconds| + +## Examples + +The following are commonly used configuration types. + +### Basic + +The minimum required settings for a basic server are POST endpoint at `/logs`, a JSON content type, and a simple success response + + + + Create a simple HTTP server... + + + ```yaml + devices: + - id: 1 + name: basic_http + type: http + properties: + port: 8080 + url: "/logs" + content_type: "application/json" + response: + code: 200 + body: '{"status":"ok"}' + ``` + + + +### Authentication + +For authentication, define a username/password, and an environment variable for the password. + + + + HTTP server with basic auth... + + + ```yaml + devices: + - id: 2 + name: auth_http + type: http + properties: + port: 8080 + url: "/api/logs" + authentication: + type: "basic" + username: "vmetric" + password: "P@ssw0rd" + ``` + + + +### API Keys + +To enable API key authentication, use a custom header-based authentication, a configurable header name and value, and an environment variable for secure key storage. + + + + HTTP server with API key header auth... + + + ```yaml + devices: + - id: 3 + name: apikey_http + type: http + properties: + port: 8080 + url: "/api/v1/logs" + authentication: + type: "header" + header: + key: "X-API-Key" + value: "${API_KEY}" + ``` + + + +### HMAC + +For a secure HMAC authentication, use SHA-256 signature verification, a custom signature header, and optional signature prefix. + + + + HTTP server with HMAC signature verification... + + + ```yaml + devices: + - id: 4 + name: hmac_http + type: http + properties: + port: 8080 + url: "/secure/logs" + authentication: + type: "hmac" + hmac: + type: "sha256" + header: "X-Signature" + key: "${HMAC_SECRET}" + prefix: "sha256=" + ``` + + + +:::warning +When using HMAC authentication, ensure that the client calculates the signature using the same algorithm and key. +::: + +### Secure + +For a secure server, use TLS encryption, basic authentication, a custom response code, and secure credentials handling. + + + + HTTPS server with TLS and authentication... + + + ```yaml + devices: + - id: 5 + name: secure_http + type: http + properties: + port: 8443 + url: "/api/ingest" + tls: + status: true + cert_name: "server.crt" + key_name: "server.key" + authentication: + type: "basic" + username: "ingest_user" + password: "${INGEST_PASSWORD}" + response: + code: 201 + body: '{"status":"created"}' + content_type: "application/json" + ``` + + + +:::warning +For production deployments, always use TLS encryption when authentication is enabled to protect credentials and tokens. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/ipfix.mdx b/versioned_docs/version-1.5.0/configuration/devices/ipfix.mdx new file mode 100644 index 00000000..d5eabb66 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/ipfix.mdx @@ -0,0 +1,168 @@ +import AppProtocols from "./_app-protocols.mdx"; + +# IPFix + +Push + +## Synopsis + +Creates an IPFix collector that accepts flow data over UDP connections. Supports High-Volume collection with multiple worker processes and configurable buffer sizes. + +For details, see Appendix. + +## Schema + +```yaml {1,2,4} +id: +name: +description: +type: ipfix +tags: +pipelines: +status: +properties: + address: + port: + reuse: + buffer_size: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `ipfix`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|N|`4739`|Listen port| +|`reuse`|N|`true`|Enable socket address reuse| + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`buffer_size`|N|`9000`|Network read buffer size in bytes| + +## Key Features + +The following are unique features that **Director** offers. + +### Multiple Workers + +When `reuse` is enabled, the collector automatically scales to use multiple workers based on available CPU cores. Each worker maintains its own UDP listener, processes flows independently, and writes to a dedicated queue file. + +### Flows + +The collector supports template management for NetFlow v9/IPFix, application identification, port-based protocol mapping, flow state tracking, and statistical aggregation. + +## Examples + +The following are commonly used configuration types. + +### Basic + +The minimum required configuration using defaults: + + + + Create a simple IPFix collector... + + + ```yaml + devices: + - id: 1 + name: basic_ipfix + type: ipfix + properties: + port: 4739 + ``` + + + +### High-Volume + +Performance can be enhanced for high volumes: + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 2 + name: performant_ipfix + type: ipfix + properties: + address: "0.0.0.0" + port: 4739 + reuse: true + buffer_size: 32768 + ``` + + + +:::note +When `reuse` is enabled, the collector automatically scales up to use all available CPU cores. +::: + +### Multiple Flows + +Multiple flow types can be collected through a single device: + + + + Collecting multiple flow types... + + + ```yaml + devices: + - id: 3 + name: multi_flow + type: ipfix + properties: + address: "0.0.0.0" + port: 4739 + reuse: true + buffer_size: 16384 + ``` + + + +:::warning +Each flow type requires its own port. Ensure no port conflicts exist between different collectors. +::: + + + + + + Using application identification... + + + ```yaml + devices: + - id: 4 + name: app_aware_ipfix + type: ipfix + properties: + port: 4739 + reuse: true + buffer_size: 16384 + ``` + + diff --git a/versioned_docs/version-1.5.0/configuration/devices/kafka.mdx b/versioned_docs/version-1.5.0/configuration/devices/kafka.mdx new file mode 100644 index 00000000..fa72cfda --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/kafka.mdx @@ -0,0 +1,239 @@ +# Kafka + +PullPush + +## Synopsis + +Creates a collector that connects to Kafka brokers and consumes messages from specified topics. Supports authentication, TLS encryption, and multiple workers. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: kafka + tags: + pipelines: + status: + properties: + address: + port: + username: + password: + group: + topic: + balancer: + reuse: + workers: + buffer_size: + stats_frequency: + tls: + status: + cert_name: + key_name: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `kafka`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Kafka broker address| +|`port`|Y||Kafka broker port| +|`username`|N|-|SASL username| +|`password`|N|-|SASL password| +|`group`|N|`"vmetric"`|Consumer group ID| +|`topic`|Y||Topic to consume from| +|`balancer`|N|`"roundrobin"`|Partition balancing strategy| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +The TLS certificate and key files must be placed in the service root directory. +::: + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable multi-worker mode| +|`workers`|N|`4`|Number of worker processes when reuse enabled| +|`buffer_size`|N|`9000`|Read buffer size in bytes| +|`stats_frequency`|N|`300`|Statistics collection interval in seconds| + +## Key Features + +### Multiple Workers + +When `reuse` is enabled, the collector uses multiple workers. Each worker maintains its own Kafka consumer, and processes its own messages independently, automatically balancing message volumes. The worker count is capped at the number of available CPU cores. + +### Messages + +The collector supports message offset tracking and commits, automatic consumer group rebalancing, multiple topic subscriptions, TLS-encrypted connections and SASL authentication, and custom message-processing pipelines. + +## Examples + +The following are commonly used configuration types. + +### Basic + +The minimum required configuration creates the consumer: + + + + Creating a simple Kafka consumer... + + + ```yaml + devices: + - id: 1 + name: basic_kafka + type: kafka + properties: + address: "kafka.example.com" + port: 9092 + topic: "logs" + ``` + + + +### Secure + +The consumer can connect to secure Kafka brokers: + + + + Connecting with authentication and encryption... + + + ```yaml + devices: + - id: 2 + name: secure_kafka + type: kafka + properties: + address: "kafka.example.com" + port: 9093 + username: "consumer" + password: "secret" + topic: "secure-logs" + tls: + status: true + cert_name: "kafka.crt" + key_name: "kafka.key" + ``` + + + +### High-Volume + +Performance can be enhanced for high message volumes: + + + + Optimizing for throughput... + + + ```yaml + devices: + - id: 3 + name: performant_kafka + type: kafka + properties: + address: "kafka.example.com" + port: 9092 + topic: "high-volume-logs" + group: "high-perf-group" + reuse: true + workers: 4 + buffer_size: 32768 + stats_frequency: 60 + ``` + + + +:::note +When `reuse` is enabled, the actual worker count will be capped at the number of available CPU cores. +::: + +### Consumer Groups + +Message consumption can be coordinated: + + + + Configuring consumer group behavior... + + + ```yaml + devices: + - id: 4 + name: group_kafka + type: kafka + properties: + address: "kafka.example.com" + port: 9092 + topic: "shared-logs" + group: "processing-group" + balancer: "roundrobin" + reuse: true + workers: 2 + ``` + + + +:::warning +The consumers in the same group must use compatible configuration settings. +::: + +### Messages + +Messages can be pre-processed: + + + + Applying custom processing to messages... + + + ```yaml + devices: + - id: 5 + name: pipeline_kafka + type: kafka + pipelines: + - json_parser + - field_extractor + properties: + address: "kafka.example.com" + port: 9092 + topic: "raw-logs" + group: "processing-group" + ``` + + + +:::note +Pipelines are processed sequentially, and can modify or drop messages before ingestion. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/linux.mdx b/versioned_docs/version-1.5.0/configuration/devices/linux.mdx new file mode 100644 index 00000000..d75566d2 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/linux.mdx @@ -0,0 +1,106 @@ +# Linux + +Agentless + +## Synopsis + +Creates a collector that connects to Linux servers via SSH to gather system metrics and performance data. Supports both password and private key authentication methods, with configurable connection parameters and debug options. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: linux + tags: + pipelines: + status: + properties: + protocol: + address: + port: + username: + password: + private_key: + passphrase: + timeout: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `linux`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`protocol`|N|`"ssh"`|Connection protocol (only `ssh` supported)| +|`address`|Y||Target server address| +|`port`|N|`22`|SSH port number| +|`username`|N|``|Must be specified if `use_ssh_credentials` is `false`| +|`password`|N|``|Must be specified if `use_ssh_credentials` is `false`| +|`private_key`|N|-|Path to SSH private key file| +|`passphrase`|N|-|Private key passphrase (if private key is used)| + +## Advanced Configuration + +The following settings can be used to fine tune the collector's behavior. + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`timeout`|N|`30`|SSH connection timeout in seconds| + +## Examples + +The following are commonly used configuration types. + +### Private Keys + +SSH keys can be used for authentication: + +```yaml +devices: + - id: 2 + name: secure_linux + type: linux + properties: + address: "10.0.0.50" + port: 2222 + username: "monitor" + private_key: "/path/to/private_key" + passphrase: "private_key_passphrase" +``` + +### Passwords + +If SSH credentials are not used, passwords can be specified: + +```yaml +devices: + - id: 1 + name: linux_server + type: linux + properties: + address: "192.168.1.100" + username: "admin" + password: "secret" +``` + +:::warning +The private key file must be readable by the service user, and have the appropriate permissions (600 or more restrictive). +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/microsoft-sentinel.mdx b/versioned_docs/version-1.5.0/configuration/devices/microsoft-sentinel.mdx new file mode 100644 index 00000000..6a1adc2b --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/microsoft-sentinel.mdx @@ -0,0 +1,226 @@ +# Microsoft Sentinel + +Microsoft AzurePull + +## Synopsis + +Creates a collector that fetches security incidents from Microsoft Sentinel workspaces. Supports authentication, batch processing, and automatic incident tracking with incremental updates. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: sentinel + tags: + pipelines: + status: + properties: + tenant_id: + client_id: + client_secret: + subscription_id: + resource_group: + workspace_name: + batch_size: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `sentinel`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Azure Authentication + +|Field|Required|Default|Description| +|---|---|---|---| +|`tenant_id`|Y||Azure tenant ID| +|`client_id`|Y||Azure client ID| +|`client_secret`|Y||Azure client secret| +|`subscription_id`|Y||Azure subscription ID| + +### Workspaces + +|Field|Required|Default|Description| +|---|---|---|---| +|`resource_group`|Y||Azure resource group name| +|`workspace_name`|Y||Log Analytics workspace name| + +### Collection + +|Field|Required|Default|Description| +|---|---|---|---| +|`batch_size`|N|`1000`|Number of incidents to fetch per batch| + +## Key Features + +### Incidents + +The collector captures comprehensive incident data such as basic incident details (ID, title, description), severity and status, classification and labels, owner information, temporal data (create, modify, activity dates and times), and information on resources. + +### Incremental Updates + +The collector tracks the last processed incident's timestamp, fetches only new incidents since the last checkpoint, orders incidents by creation time, and supports batch processing. + +### Normalization + +Incidents are automatically normalized with ECS field mapping in addition to using consistent timestamp formats, structured label handling, and owner information. (See Appendix for details of ECS.) + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic collector can be created as below: + + + + Creating a simple Sentinel collector... + + + ```yaml + devices: + - id: 1 + name: basic_sentinel + type: sentinel + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + subscription_id: "22222222-2222-2222-2222-222222222222" + resource_group: "your-resource-group" + workspace_name: "your-workspace" + ``` + + + +### High-Volume + +Large numbers of incidents can be collected: + + + + Optimizing for high incident volumes... + + + ```yaml + devices: + - id: 2 + name: volume_sentinel + type: sentinel + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + subscription_id: "22222222-2222-2222-2222-222222222222" + resource_group: "your-resource-group" + workspace_name: "your-workspace" + batch_size: 5000 + ``` + + + +### Pipelines + +Incidents can be enriched and processed: + + + + Applying custom processing to incidents... + + + ```yaml + devices: + - id: 3 + name: pipeline_sentinel + type: sentinel + pipelines: + - incident_enricher + - severity_classifier + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + subscription_id: "22222222-2222-2222-2222-222222222222" + resource_group: "your-resource-group" + workspace_name: "your-workspace" + batch_size: 1000 + ``` + + + +### Multiple Workspaces + +Information can be collected from multiple workspaces: + + + + Configuring multiple workspace collectors... + + + ```yaml + devices: + - id: 4 + name: prod_sentinel + type: sentinel + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + subscription_id: "22222222-2222-2222-2222-222222222222" + resource_group: "prod-rg" + workspace_name: "prod-workspace" + - id: 5 + name: dev_sentinel + type: sentinel + properties: + tenant_id: "00000000-0000-0000-0000-000000000000" + client_id: "11111111-1111-1111-1111-111111111111" + client_secret: "your-client-secret" + subscription_id: "22222222-2222-2222-2222-222222222222" + resource_group: "dev-rg" + workspace_name: "dev-workspace" + ``` + + + +:::note +Each workspace collector maintains its own checkpoint, and can be configured independently. +::: + +### Incident Fields + +The collector maps incident fields to standardized ECS fields: + +|Sentinel Field|ECS Field|Description| +|---|---|---| +|`Title`|`event.name`|Incident title| +|`Description`|`event.description`|Incident description| +|`Severity`|`event.severity`|Incident severity level| +|`Status`|`event.outcome`|Current incident status| +|`Classification`|`event.classification`|Incident classification| +|`CreatedTimeUTC`|`event.created`|Incident creation time| +|`LastModifiedTimeUTC`|`event.last_modified`|Last update time| +|`FirstActivityTimeUTC`|`event.start`|First detected activity| +|`LastActivityTimeUTC`|`event.end`|Latest detected activity| +|`OwnerEmail`|`user.email`|Assigned owner's email| +|`OwnerAssignedTo`|`user.name`|Assigned owner's name| +|`ResourceId`|`cloud.resource_id`|Azure resource ID| +|`Labels`|`labels`|Incident labels| + +:::note +All timestamps are normalized at nanosecond level in UTC. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/nats.mdx b/versioned_docs/version-1.5.0/configuration/devices/nats.mdx new file mode 100644 index 00000000..e9ed5f0f --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/nats.mdx @@ -0,0 +1,242 @@ +# NATS + +PushPull + +## Synopsis + +Creates a JetStream consumer that connects to NATS servers and processes messages from specified streams and subjects. Supports authentication, TLS encryption, and multiple workers with automatic message acknowledgment. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: nats + tags: + pipelines: + status: + properties: + address: + port: + username: + password: + stream: + consumer: + subject: + reuse: + workers: + buffer_size: + tls: + status: + cert_name: + key_name: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `nats`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|NATS server address| +|`port`|Y||NATS server port| +|`username`|N|-|Authentication username| +|`password`|N|-|Authentication password| +|`stream`|Y|`"vmetric"`|JetStream stream name| +|`consumer`|Y|`"vmetric"`|JetStream consumer name| +|`subject`|Y||Subject pattern to subscribe to| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +The TLS certificate and key files must be placed in the service root directory. +::: + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable multi-worker mode| +|`workers`|N|`4`|Number of worker processes when reuse enabled| +|`buffer_size`|N|`9000`|Read buffer size in bytes| + +## Key Features + +The following are unique features that **Director** offers. + +### Multiple Workers + +When `reuse` is enabled, the collector uses multiple workers each of which maintains its own NATS consumer. Workers process messages independently, and messages are automatically acknowledged. + +:::note +The worker count will be capped at the number of available CPU cores. +::: + +### Messages + +The collector supports a JetStream persistent message store, message acknowledgment and retry, multiple subject subscriptions, and custom message processing pipelines. It also provides security through TLS-encrypted connections and basic authentication. + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic consumer can be created easily. + + + + Creating a simple NATS consumer... + + + ```yaml + devices: + - id: 1 + name: basic_nats + type: nats + properties: + address: "nats.example.com" + port: 4222 + subject: "logs.>" + ``` + + + +### Secure + +The consumer can connect to secure NATS servers. + + + + Connecting with authentication and encryption... + + + ```yaml + devices: + - id: 2 + name: secure_nats + type: nats + properties: + address: "nats.example.com" + port: 4222 + username: "consumer" + password: "secret" + stream: "LOGS" + consumer: "processor" + subject: "logs.secure.>" + tls: + status: true + cert_name: "nats.crt" + key_name: "nats.key" + ``` + + + +### High-Volume + +Performance can be enhanced for high message volumes. + + + + Optimizing for high throughput... + + + ```yaml + devices: + - id: 3 + name: performant_nats + type: nats + properties: + address: "nats.example.com" + port: 4222 + stream: "LOGS" + consumer: "high-perf" + subject: "logs.>" + reuse: true + workers: 4 + buffer_size: 32768 + ``` + + + +### Subject Filters + +Targeted message processing is possible: + + + + Configuring for subject-based filtering... + + + ```yaml + devices: + - id: 4 + name: filtered_nats + type: nats + properties: + address: "nats.example.com" + port: 4222 + stream: "LOGS" + consumer: "filtered" + subject: "logs.*.error" + reuse: true + workers: 2 + ``` + + + +:::tip +NATS subjects support `*` and `>` as wildcards for single and multiple tokens respectively. +::: + +### Messages + +Messages can be pre-processed based on custom criteria: + + + + Applying custom processing to messages... + + + ```yaml + devices: + - id: 5 + name: pipeline_nats + type: nats + pipelines: + - json_parser + - field_extractor + properties: + address: "nats.example.com" + port: 4222 + stream: "LOGS" + consumer: "processed" + subject: "logs.raw.>" + ``` + + + +:::note +Pipelines are processed sequentially, and can modify or drop messages before ingestion. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/netflow.mdx b/versioned_docs/version-1.5.0/configuration/devices/netflow.mdx new file mode 100644 index 00000000..761825ea --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/netflow.mdx @@ -0,0 +1,168 @@ +import AppProtocols from "./_app-protocols.mdx" + +# NetFlow + +Push + +## Synopsis + +Creates a NetFlow v5 collector that accepts flow data over UDP connections. Supports High-Volume collection with multiple workers and configurable buffer sizes. + +:::warning +The collector only supports legacy NetFlow types, such as NetFlow v5. For **NetFlow v9**, use the `ipfix` collector type. +::: + +For details, see Appendix. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: netflow + tags: + pipelines: + status: + properties: + address: + port: + reuse: + buffer_size: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `netflow`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|N|`2055`|Listen port| +|`reuse`|N|`true`|Enable socket address reuse| + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`buffer_size`|N|`9000`|Network read buffer size in bytes| + +## Key Features + +The following are unique features that **Director** offers. + +### Multiple Workers + +When `reuse` is enabled, the collector automatically spawns multiple workers which maintain their own UDP listeners, process flows independently, and write to dedicated queue files. + +:::note +The collector scales up to use all available CPU cores. +::: + +### Flows + +The collector supports fixed format NetFlow v5 records, application identification, port-based protocol mapping, flow state tracking, and statistical aggregation. + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic collector can be created easily: + + + + Creating a simple NetFlow collector... + + + ```yaml + devices: + - id: 1 + name: basic_netflow + type: netflow + properties: + port: 2055 + ``` + + + +### High-Volume + +Performance can be enhanced for high flow volumes: + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 2 + name: performant_netflow + type: netflow + properties: + address: "0.0.0.0" + port: 2055 + reuse: true + buffer_size: 32768 + ``` + + + +### Legacy Networks + +Collecting flows from older network devices is possible: + + + + Collecting from legacy network devices... + + + ```yaml + devices: + - id: 3 + name: legacy_netflow + type: netflow + properties: + address: "0.0.0.0" + port: 2055 + reuse: true + buffer_size: 16384 + ``` + + + + + + + + Using application identification... + + + ```yaml + devices: + - id: 4 + name: app_aware_netflow + type: netflow + properties: + port: 2055 + reuse: true + buffer_size: 16384 + ``` + + diff --git a/versioned_docs/version-1.5.0/configuration/devices/overview.mdx b/versioned_docs/version-1.5.0/configuration/devices/overview.mdx new file mode 100644 index 00000000..6e086b63 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/overview.mdx @@ -0,0 +1,162 @@ +--- +pagination_prev: null +sidebar_label: Overview +--- + +# Devices: Overview + +Devices are configurable components that serve as data collection points in your environment. They represent various input mechanisms for receiving, processing, and forwarding log data and security events. Each device is defined using a standardized YAML configuration format that specifies its behavior, connection parameters, and processing options. + +**DataStream** uses devices as an abstraction layer to manage telemetry. As such, they decouple data sources from **DataStream**'s pipelines. + +:::note +Each device type provides specific configuration options detailed in their respective sections. +::: + +## Definitions + +Devices operate on the following principles: + +1. **Unified Configuration Structure**: All devices share a common configuration framework with device-specific properties. +2. **Data Collection**: Devices receive data through network connections, APIs, or direct system access. +3. **Pipeline Integration**: Devices can link to preprocessing pipelines for data transformation. +4. **Stateful Operation**: Devices maintain their operational state and can be enabled or disabled. + +:::note +Devices enable: + +> **Authentication**: Basic authentication, API keys, HMAC signing, and client certificates.\ +> **Encryption**: _TLS_/_SSL_, _SNMPv3_ privacy, and custom encryption. + +They also provide _access control_, and _audit logging_. +::: + +## Configuration + +All devices share the following base configuration fields: + +|Field|Required|Description| +|---|---|---| +|`id`|Y|Unique numeric identifier| +|`name`|Y|Device name| +|`description`|-|Optional description of the device's purpose| +|`type`|Y|Device type identifier (e.g., `http`, `syslog`, `tcp`)| +|`tags`|-|Array of labels for categorization| +|`pipelines`|-|Array of preprocessing pipeline references| +|`status`|-|Boolean flag to enable/disable the device (default: `true`)| + +:::tip +Each device type provides specific options detailed in its respective section. +::: + +Use the `id` of the device to refer to it in your configurations. + +**Example**: + +```yaml +devices: + - id: 1 + name: http_logs + type: http + properties: + port: 8080 + content_type: "application/json" +``` + +This is an HTTP device listening on port `8080`, and it expects the incoming data to be in JSON format. + +## Deployment + +Devices enable handling and processing of data through the following means: + +**Pipelines**: These provide the capability to parse and enrich messages with custom rules. Data is transformed—through _field mapping_ and _type conversion_—and events are correlated and aggregated, custom fields are extracted, tagged, filtered, and routed based on message content. For certain conditions, alerts are generated. Additionally, there is _templating_ support for output formatting, and built-in and custom functions. + +**Queues**: These provide persistent message storage, configurable batch processing capability, automatic checkpoint recovery, and rotation and cleanup ability. + +Devices offer various deployment options for different purposes: + +* **Standalone** - Sources are connected directly to **Director**, bypassing any intermediate mechanism. + + > Sources → **Director** + +* **High Availability** - Sources are first connected to a load balancer—which distributes the work to multiple processes—after which they are streamed to either a **Director** cluster: + + > Sources → Load Balancer → **Director** + + -or- sources are connected to a cluster with _Variable Information Period_ (VIP): + + > Sources → **Director** Cluster + +## Device Types + +The system supports the following device types: + +* **Network-Based** - These devices listen for incoming network connections: + + * **HTTP**: Accepts JSON data via HTTP/HTTPS POST requests with authentication options + * **TCP**: Receives messages over TCP connections with framing and TLS support + * **UDP**: Collects datagram-based messages with high throughput capabilities + * **Syslog**: Specialized for syslog format messages with forwarding capabilities + +* **Cloud Integration** - These devices connect to cloud services: + * **Azure Monitor**: Collects logs from Azure Log Analytics workspaces + +* **Security-Specific** - These devices integrate with security products: + + * **eStreamer**: Connects to eStreamer servers for security event collection + +* **System Integration** - These devices interact with operating systems: + + * **Windows**: Collects Windows events, performance data, and metrics + +## Use Cases + +Devices can be used in the following scenarios: + +* **Infrastructure monitoring**: Provides system performance metrics, event logs, resource utilization, and service availability information. + +* **Security operations**: Enables security event monitoring, threat detection, compliance monitoring, and provides audit trails. + +* **Application telemetry**: Provides application logs and performance metrics, and enables error tracking and user activity monitoring. + +* **Network monitoring**: Provides network device logs and SNMP data, and enables traffic analysis and connection tracking + +* **Performance**: Improves telemetry performance through the multi-worker architecture, dynamic scaling ability, socket reuse optimization, buffer management, and resource monitoring capability. + +## Implementation Strategies + +### Listening + +Data is received from listeners using various network protocols with two types of collection: + +**Push-based**: Devices like _Syslog_ (via _UDP_/_TCP_), _SNMP Traps_, _HTTP/HTTPS_, _TCP_/_UDP_, and _eStreamer_ send event data. + +**Pull-based**: Data is fetched from _Kafka_ topics, _Microsoft Sentinel_, _REST_ APIs, database queries, and other custom integration types. + +### Monitoring + +For monitoring operating systems, **Director** uses a unified agent-based approach with two types of deployment: + +**Managed** (Traditional): The agent is installed and managed by system administrators. This provides persistent installation on the target system. Local data is buffered in the emergence of network issues. **Director** supports _Windows_, _Linux_, _macOS_, _Solaris_, and _AIX_. + +**Auto-managed** (Agentless): +The agent is automatically deployed and managed, no manual installation is required. Auto-managed agents provide local data buffering, network resilience, and performance optimization. This deployment type is self-healing, since the agent is automatically redeployed if the process terminates. Also, it supports _remote credential management_. Deployment is done using **WinRM** for _Windows_, and **SSH** for _Linux_, _macOS_, _Solaris_, and _AIX_. + +Both approaches provide local data processing, store-and-forward capability against connectivity issues, real-time metrics and events, and native OS monitoring. The key difference is deployment and lifecycle management, not functionality. + +### Layered Collectors + +Configure multiple devices to handle different aspects of data collection: + +* External-facing HTTP endpoints for application logs +* Internal TCP/UDP listeners for network device logs +* Specialized connectors for cloud and security products + +### Pipeline Integration + +Enhance device functionality by attaching preprocessing pipelines: + +* Filtering unwanted events +* Normalizing data formats +* Enriching events with additional context +* Transforming raw data into structured formats diff --git a/versioned_docs/version-1.5.0/configuration/devices/rabbitmq.mdx b/versioned_docs/version-1.5.0/configuration/devices/rabbitmq.mdx new file mode 100644 index 00000000..61787a47 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/rabbitmq.mdx @@ -0,0 +1,288 @@ +# RabbitMQ + +PullPush + +## Synopsis + +Creates a consumer that connects to RabbitMQ servers and consumes messages from specified exchanges and queues. Supports multiple authentication methods, exchange types, TLS encryption, and multiple workers with automatic message acknowledgment. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: rabbitmq + tags: + pipelines: + status: + properties: + address: + port: + username: + password: + authentication: + exchange: + name: + type: + queue: + name: + key: + reuse: + workers: + buffer_size: + tls: + status: + cert_name: + key_name: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `rabbitmq`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|RabbitMQ server address| +|`port`|Y||RabbitMQ server port| +|`username`|N|-|Authentication username| +|`password`|N|-|Authentication password| +|`authentication`|Y||Auth type (`plain`, `amqplain`)| + +### Exchange + +|Field|Required|Default|Description| +|---|---|---|---| +|`exchange.name`|Y||Exchange name| +|`exchange.type`|Y||Exchange type (`direct`, `fanout`, `topic`, `x-custom`)| + +### Queue + +|Field|Required|Default|Description| +|---|---|---|---| +|`queue.name`|Y||Queue name| +|`queue.key`|Y||Routing key pattern| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +The TLS certificate and key files must be placed in the service root directory. +::: + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable multi-worker mode| +|`workers`|N|`4`|Number of worker processes when reuse enabled| +|`buffer_size`|N|`9000`|Read buffer size in bytes| + +## Key Features + +The following are unique features that **Director** offers. + +### Exchange Types + +The collector supports various exchange types: + +|Exchange Type|Description| +|---|---| +|`direct`|Exact routing key match| +|`fanout`|Broadcast to all bound queues| +|`topic`|Pattern-based routing using wildcards| + +### Multiple Workers + +When `reuse` is enabled, the collector uses multiple workers which maintain their own RabbitMQ consumers and process messages independently. Messages are automatically acknowledged. + +:::note +The worker count will be capped at the number of available CPU cores. +::: + +### Messages + +The collector supports multiple exchange types, pattern-based routing, message acknowledgment and rejection, dead letter exchanges, and custom message processing pipelines. It also supports TLS-encrypted connections and multiple authentication methods. + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic consumer can be easily created. + + + + Creating a simple RabbitMQ consumer... + + + ```yaml + devices: + - id: 1 + name: basic_rabbitmq + type: rabbitmq + properties: + address: "rabbitmq.example.com" + port: 5672 + authentication: "plain" + exchange: + name: "logs" + type: "direct" + queue: + name: "app_logs" + key: "app.*" + ``` + + + +### Secure + +The collector can connect to secure servers: + + + + Connecting with authentication and encryption... + + + ```yaml + devices: + - id: 2 + name: secure_rabbitmq + type: rabbitmq + properties: + address: "rabbitmq.example.com" + port: 5671 + username: "consumer" + password: "secret" + authentication: "plain" + exchange: + name: "secure_logs" + type: "topic" + queue: + name: "secure_app_logs" + key: "secure.app.#" + tls: + status: true + cert_name: "rabbitmq.crt" + key_name: "rabbitmq.key" + ``` + + + +### High-Volume + +Performance can be enhanced for high message volumes: + + + + Optimizing for high throughput... + + + ```yaml + devices: + - id: 3 + name: performant_rabbitmq + type: rabbitmq + properties: + address: "rabbitmq.example.com" + port: 5672 + authentication: "plain" + exchange: + name: "high_volume" + type: "direct" + queue: + name: "high_volume_logs" + key: "logs" + reuse: true + workers: 4 + buffer_size: 32768 + ``` + + + +### Topic Exchange + +Pattern-based message routing is possible: + + + + Configuring topic-based routing... + + + ```yaml + devices: + - id: 4 + name: topic_rabbitmq + type: rabbitmq + properties: + address: "rabbitmq.example.com" + port: 5672 + authentication: "plain" + exchange: + name: "logs" + type: "topic" + queue: + name: "filtered_logs" + key: "app.*.error" + ``` + + + +:::tip +Topic routing keys support `*` and `#` wildards for single and multiple words respectively. +::: + +### Pipelines + +Messages can be pre-processed: + + + + Applying custom processing to messages... + + + ```yaml + devices: + - id: 5 + name: pipeline_rabbitmq + type: rabbitmq + pipelines: + - json_parser + - field_extractor + properties: + address: "rabbitmq.example.com" + port: 5672 + authentication: "plain" + exchange: + name: "raw_logs" + type: "direct" + queue: + name: "processed_logs" + key: "logs" + ``` + + + +:::note +Pipelines are processed sequentially, and can modify or drop messages before ingestion. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/redis.mdx b/versioned_docs/version-1.5.0/configuration/devices/redis.mdx new file mode 100644 index 00000000..aa91ca69 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/redis.mdx @@ -0,0 +1,234 @@ +# Redis + +PullPush + +## Synopsis + +Creates a Pub/Sub subscriber that connects to Redis servers and processes messages from specified channels. Supports authentication, TLS encryption, and multiple workers with automatic message handling. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: redis + tags: + pipelines: + status: + properties: + address: + port: + username: + password: + channel: + reuse: + workers: + buffer_size: + tls: + status: + cert_name: + key_name: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `redis`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Redis server address| +|`port`|Y||Redis server port| +|`username`|N|-|Authentication username| +|`password`|N|-|Authentication password| +|`channel`|Y||Channel pattern to subscribe to| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +The TLS certificate and key files must be placed in the service root directory. +::: + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`false`|Enable multi-worker mode| +|`workers`|N|`4`|Number of worker processes when reuse enabled| +|`buffer_size`|N|`9000`|Read buffer size in bytes| + +## Advanced Features + +The following are unique features that **Director** offers. + +### Channel Patterns + +The collector supports Redis Pub/Sub channel consuming with exact matching (e.g. `logs`) and single- or multi-pattern matching (e.g. `logs.*` or `logs.* app.*`). + +### Multiple Workers + +When `reuse` is enabled, the collector uses multiple workers which maintain their own Redis subscriptions and process messages independently thanks to which messages are automatically distributed. + +:::note +The worker count is capped at the number of available CPU cores. +::: + +### Messages + +The collector supports pattern-based subscriptions, multiple channel subscriptions, and custom message processing pipelines. It also supports TLS-encrypted connections and authentication methods. + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic collector can be easily created: + + + + Creating a simple Redis subscriber... + + + ```yaml + devices: + - id: 1 + name: basic_redis + type: redis + properties: + address: "redis.example.com" + port: 6379 + channel: "logs" + ``` + + + +### Secure + +The collector can connect to secure Redis servers: + + + + Connecting with authentication and encryption... + + + ```yaml + devices: + - id: 2 + name: secure_redis + type: redis + properties: + address: "redis.example.com" + port: 6379 + username: "subscriber" + password: "secret" + channel: "secure.logs" + tls: + status: true + cert_name: "redis.crt" + key_name: "redis.key" + ``` + + + +### High-Volume + +Performance can be enhanced for high message volumes: + + + + Optimizing for high throughput... + + + ```yaml + devices: + - id: 3 + name: performant_redis + type: redis + properties: + address: "redis.example.com" + port: 6379 + channel: "high-volume" + reuse: true + workers: 4 + buffer_size: 32768 + ``` + + + +### Pattern Subscription + +Messages can be filtered using pattern-based matching: + + + + Configuring pattern-based subscriptions... + + + ```yaml + devices: + - id: 4 + name: pattern_redis + type: redis + properties: + address: "redis.example.com" + port: 6379 + channel: "logs.*" + reuse: true + workers: 2 + ``` + + + +:::tip +Redis channel patterns support the `*` wildcard character for matching multiple channels. +::: + +### Pipelines + +Messages can be pre-processed: + + + + Applying custom processing to messages... + + + ```yaml + devices: + - id: 5 + name: pipeline_redis + type: redis + pipelines: + - json_parser + - field_extractor + properties: + address: "redis.example.com" + port: 6379 + channel: "raw.logs" + ``` + + + +:::note +Pipelines are processed sequentially, and can modify or drop messages before ingestion. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/sflow.mdx b/versioned_docs/version-1.5.0/configuration/devices/sflow.mdx new file mode 100644 index 00000000..2e419158 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/sflow.mdx @@ -0,0 +1,168 @@ +import AppProtocols from "./_app-protocols.mdx" + +# sFlow + +Push + +## Synopsis + +Creates an sFlow collector that accepts flow sampling data over UDP connections. Supports High-Volume collection with multiple workers and configurable buffer sizes. + +For details, see Appendix. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: sflow + tags: + pipelines: + status: + properties: + address: + port: + reuse: + buffer_size: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `sflow`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|N|`6343`|Listen port| +|`reuse`|N|`true`|Enable socket address reuse| + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`buffer_size`|N|`9000`|Network read buffer size in bytes| + +## Key Features + +The following are unique features that **Director** offers. + +### Multiple Workers + +When `reuse` is enabled, the collector automatically scales to use multiple worker processes based on available CPU cores. Each worker maintains its own UDP listener, processes flows independently, and writes to a dedicated queue file. + +:::note +The collector automatically scales up to use all available CPU cores. +::: + +### Flows + +The collector supports interface-counter sampling, packet-flow sampling, application identification, port-based protocol mapping, flow-state tracking, and statistical aggregation. + +## Examples + +The following are commonly used configuration types. + +### Basic + +The minimum required configuration using defaults: + + + + Creating a simple sFlow collector... + + + ```yaml + devices: + - id: 1 + name: basic_sflow + type: sflow + properties: + port: 6343 + ``` + + + +### High-Volume + +Performance can be enhanced for high flow volumes: + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 2 + name: performant_sflow + type: sflow + properties: + address: "0.0.0.0" + port: 6343 + reuse: true + buffer_size: 32768 + ``` + + + +### Multiple Agents + +Multiple sFlow agents can be used for collection: + + + + Collecting from multiple network devices... + + + ```yaml + devices: + - id: 3 + name: multi_agent_sflow + type: sflow + properties: + address: "0.0.0.0" + port: 6343 + reuse: true + buffer_size: 16384 + ``` + + + +:::warning +All sFlow agents must be configured to send to the same collector port. +::: + + + + + + Basic flow collection with application identification... + + + ```yaml + devices: + - id: 4 + name: app_aware_sflow + type: sflow + properties: + port: 6343 + reuse: true + buffer_size: 16384 + ``` + + diff --git a/versioned_docs/version-1.5.0/configuration/devices/smtp.mdx b/versioned_docs/version-1.5.0/configuration/devices/smtp.mdx new file mode 100644 index 00000000..8984c73f --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/smtp.mdx @@ -0,0 +1,248 @@ +# SMTP + +Push + +## Synopsis + +Creates an SMTP server that receives email messages. Supports authentication, TLS encryption, and multiple workers with automatic message handling, and JSON conversion. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: smtp + tags: + pipelines: + status: + properties: + address: + port: + username: + password: + timeout: + reuse: + workers: + buffer_size: + tls: + status: + cert_name: + key_name: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `smtp`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|Y||Listen port| +|`username`|N|-|Authentication username| +|`password`|N|-|Authentication password| +|`timeout`|N|`15`|Connection timeout in seconds| + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +The TLS certificate and key files must be placed in the service root directory. +::: + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`false`|Enable multi-worker mode| +|`workers`|N|`4`|Number of worker processes when reuse enabled| +|`buffer_size`|N|`9000`|Read buffer size in bytes| + +## Key Features + +The following are unique features that **Director** offers. + +### Emails + +The server captures and processes email headers, sender information, recipient information, message content, attachments, and remote client information. + +### JSON Conversion + +All email messages are automatically converted to JSON format with the following fields: + +|Field|Description| +|---|---| +|`from`|Sender address| +|`to`|Recipient addresses| +|`subject`|Email subject| +|`body`|Message body| +|`headers`|Email headers| +|`remoteAddr`|Client IP address| + +### Multiple Workers + +When `reuse` is enabled, the server uses multiple worker processes which maintain a separate SMTP listener and process messages independently. Messages are automatically converted to JSON. + +:::note +The worker count will be capped at the number of available CPU cores. +::: + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic server can be easily created: + + + + Creating a simple SMTP server... + + + ```yaml + devices: + - id: 1 + name: basic_smtp + type: smtp + properties: + address: "0.0.0.0" + port: 25 + ``` + + + +### Secure + +E-mail can be received securely: + + + + Configuring TLS and authentication... + + + ```yaml + devices: + - id: 2 + name: secure_smtp + type: smtp + properties: + address: "0.0.0.0" + port: 587 + username: "mailuser" + password: "secret" + timeout: 30 + tls: + status: true + cert_name: "smtp.crt" + key_name: "smtp.key" + ``` + + + +:::note +Port 587 is commonly used for TLS-enabled SMTP (STARTTLS). +::: + +### High-Volume + +Performance can be enhanced for high email volumes: + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 3 + name: performant_smtp + type: smtp + properties: + address: "0.0.0.0" + port: 25 + timeout: 60 + reuse: true + workers: 4 + buffer_size: 32768 + ``` + + + +### Submissions + +Message submission can be dedicated: + + + + Configuring a message submission server... + + + ```yaml + devices: + - id: 4 + name: submission_smtp + type: smtp + properties: + address: "0.0.0.0" + port: 587 + username: "mailuser" + password: "secret" + timeout: 30 + tls: + status: true + cert_name: "smtp.crt" + key_name: "smtp.key" + reuse: true + workers: 2 + ``` + + + +### Pipelines + +Emails can be pre-processed: + + + + Applying custom processing to emails... + + + ```yaml + devices: + - id: 5 + name: pipeline_smtp + type: smtp + pipelines: + - email_parser + - spam_filter + properties: + address: "0.0.0.0" + port: 25 + timeout: 30 + ``` + + + +:::note +Pipelines are processed sequentially and can modify or drop messages before ingestion. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/snmp-trap.mdx b/versioned_docs/version-1.5.0/configuration/devices/snmp-trap.mdx new file mode 100644 index 00000000..a4c9b5e1 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/snmp-trap.mdx @@ -0,0 +1,243 @@ +# SNMP Trap + +Push + +## Synopsis + +Creates a receiver that listens for SNMP trap messages. Supports SNMPv2c and SNMPv3 with various authentication and privacy protocols, MIB integration, and High-Volume message processing. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: snmptrap + status: + properties: + address: + port: + reuse: + workers: + community: + authentication: + status: + protocol: + password: + privacy: + protocol: + passphrase: + username: + batch_size: + queue: + interval: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `snmptrap`| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|Y|`162`|Listen port for SNMP traps| +|`community`|Y||Community string for SNMPv2c (required if not using v3)| + +### Authentication + +|Field|Required|Default|Description| +|---|---|---|---| +|`authentication.status`|N|`false`|Enable SNMPv3 authentication| +|`authentication.protocol`|N|`1`|Authentication protocol ID. See table below| +|`authentication.password`|Y||Authentication password (required if auth enabled)| +|`username`|Y||SNMPv3 username| + +#### Authentication Protocols + +|ID|Protocol|Description| +|---|---|---| +|`1`|`NoAuth`|No authentication| +|`2`|`MD5`|HMAC-MD5-96 authentication| +|`3`|`SHA`|HMAC-SHA-96 authentication| +|`4`|`SHA224`|HMAC-SHA-224 authentication| +|`5`|`SHA256`|HMAC-SHA-256 authentication| +|`6`|`SHA384`|HMAC-SHA-384 authentication| +|`7`|`SHA512`|HMAC-SHA-512 authentication| + +### Privacy + +|Field|Required|Default|Description| +|---|---|---|---| +|`privacy.protocol`|N|`1`|Privacy protocol ID. See table below| +|`privacy.passphrase`|Y||Privacy passphrase (required if protocol > 1)| + +#### Privacy Protocols + +|ID|Protocol|Description| +|---|---|---| +|`1`|`NoPriv`|No privacy| +|`2`|`DES`|DES privacy| +|`3`|`AES`|AES-128 privacy| +|`4`|`AES192`|AES-192 privacy| +|`5`|`AES256`|AES-256 privacy| +|`6`|`AES192C`|AES-192 with 3DES key padding| +|`7`|`AES256C`|AES-256 with 3DES key padding| + +## MIB Configuration + +The SNMP trap receiver automatically loads and integrates _Management Information Base_ (MIB) files for translating SNMP trap messages. MIB files are essential for proper interpretation of vendor-specific or custom SNMP traps. + +### MIB File Locations + +The system looks for MIB files in two locations under ``: + +**User directory**, i.e. `\user\mibs\` (Windows) or `/user/mibs/` (Unix): Primary location for custom MIB files. Supports nested directory structure. All user-specific MIB files should be placed here. + +**Package directory**, i.e. `\package\mibs\` (Windows) or `/package/mibs/` (Unix): Contains default/standard MIB files. Bundled with the installation. Should not be modified directly. + +:::note +Custom MIB files in the user directory take precedence over similarly named files in the package directory. +::: + +### MIB Loading Process + +The system will recursively scan both MIB directories, load all discovered MIB modules, automatically resolve MIB dependencies, and use loaded MIBs for trap translation. + +:::warning +If MIB loading fails, the system will still receive traps but may not translate vendor-specific OIDs correctly. +::: + +## Advanced Configuration + +To enhance performance and achieve better data handling, the following settings are used: + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`false`|Enable socket address reuse| +|`workers`|N|``|Number of worker processes when reuse enabled| +|`batch_size`|N|`1000`|Number of messages to batch before processing| + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic trap requires a default listening address, a standard SNMP trap port, and a public community string. + + + + Creating a simple SNMPv2c trap receiver... + + + ```yaml + devices: + - id: 1 + name: basic_trap + type: snmptrap + properties: + port: 162 + community: "public" + ``` + + + +### MIBs + +This requires standard SNMP trap listening, a custom MIB file integration, and basic trap handling. + + + + Basic trap receiver with custom MIB support... + + + ```yaml + devices: + - id: 5 + name: mib_enabled_trap + type: snmptrap + description: "Trap receiver with custom MIB support" + properties: + port: 162 + community: "public" + ``` + + + +:::note +Place your custom MIB files in the `/user/mibs/` directory before starting the trap receiver. The system will automatically load and use them for trap translation. +::: + +### Secure + +For enhanced security, use SHA256 authentication, AES-256 privacy, and a secure credential store. + +:::caution +When using **SNMPv3** with privacy, ensure that both the authentication and privacy passwords meet the minimum length requirements for the selected protocols. +::: + + + + SNMPv3 with authentication and privacy... + + + ```yaml + devices: + - id: 2 + name: secure_trap + type: snmptrap + properties: + port: 162 + authentication: + status: true + protocol: 5 + password: "${SNMP_AUTH_PASS}" + privacy: + protocol: 5 + passphrase: "${SNMP_PRIV_PASS}" + username: "trapuser" + ``` + + + +### High-Volume + +Performance can be enhanced for high message volumes using multiple worker processes, a larger batch size, +optimized queues, and basic SHA authentication. + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 3 + name: performant_trap + type: snmptrap + properties: + port: 162 + reuse: true + workers: 4 + batch_size: 5000 + queue: + interval: 2 + authentication: + status: true + protocol: 3 + username: "trapuser" + ``` + + diff --git a/versioned_docs/version-1.5.0/configuration/devices/syslog.mdx b/versioned_docs/version-1.5.0/configuration/devices/syslog.mdx new file mode 100644 index 00000000..63a84531 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/syslog.mdx @@ -0,0 +1,311 @@ +# Syslog + +Pull + +## Synopsis + +Creates a Syslog server that accepts log messages over UDP or TCP connections. Supports both plain and TLS-encrypted connections, with configurable framing and buffering options. + +For details, see Appendix. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: syslog + tags: + pipelines: + status: + properties: + protocol: + address: + port: + framing: + line_delimiter: + max_connections: + timeout: + tls: + status: + cert_name: + key_name: + reuse: + workers: + buffer_size: + max_message_size: + flush_interval: + batch_size: + queue: + interval: + forwarding: + - address: + port: + type: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `syslog`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Protocol + +|Field|Required|Default|Description| +|---|---|---|---| +|`protocol`|N|`"udp"`|Transport protocol (`udp` or `tcp`)| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|Y||Listen port| + +### TCP + +The following are only applicable when `protocol` is set to `tcp`. + +|Field|Required|Default|Description| +|---|---|---|---| +|`framing`|N|`"delimiter"`|Framing mode for TCP (`delimiter` or `octet`)| +|`line_delimiter`|N|`"\n"`|Line separator for TCP delimiter framing| + +### TLS + +The following are only applicable when `protocol` is set to `tcp`. + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +The TLS certificate and key files must be placed in the service root directory. +::: + +## Advanced Configuration + +To enhance performance and achieve better data handling, the following settings are used. + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable socket address reuse| +|`workers`|N|``|Number of worker processes when reuse enabled| +|`max_connections`|N|`10000`|Maximum concurrent TCP connections| +|`max_message_size`|N|`20971520`|Maximum message size in bytes (20MB)| +|`timeout`|N|`300`|Connection timeout in seconds| +|`buffer_size`|N|`9000`|Network read buffer size in bytes| + +### Messages + +|Field|Required|Default|Description| +|---|---|---|---| +|`flush_interval`|N|`1`|Message flush interval in seconds| +|`batch_size`|N|`1000`|Number of messages to batch before flushing| +|`queue.interval`|N|`1`|Queue processing interval in seconds| + +### Forwarding + +|Field|Required|Default|Description| +|---|---|---|---| +|`forwarding[].address`|Y||Forward destination address| +|`forwarding[].port`|N|`514`|Forward destination port| +|`forwarding[].type`|N|`"udp"`|Forward protocol (`udp` or `tcp`)| + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic configuration can be created easily using "udp" for `protocol` and "0.0.0.0" for `address`. + + + + Creating a simple UDP syslog server... + + + ```yaml + devices: + - id: 1 + name: basic_syslog + type: syslog + properties: + port: 514 + ``` + + + +#### Checkpoint + +The basic UDP Server can be configured to use a `checkpoint` pre-processing pipeline. This is a pre-processing pipeline that extracts Checkpoint firewall logs from syslog messages: + + + + Creating a simple UDP syslog server with checkpoint... + + + ```yaml + devices: + - id: 2 + name: basic_syslog + type: syslog + tags: + - "network_device" + pipelines: + - checkpoint + properties: + address: "10.0.0.1" + protocol: "udp" + port: 1514 + ``` + + + +:::note +If the device is a Checkpoint firewall, this pipeline will parse the logs and extract relevant fields for further processing. Otherwise, the pipeline will have no effect on the incoming messages. +::: + +### High-Volume + +Performance of a UDP server can be enhanced Volumes using multiple workers, a larger buffer size, a larger batch size, and adjusted flush intervals. + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 3 + name: performant_syslog + type: syslog + properties: + protocol: udp + port: 514 + reuse: true + workers: 4 + buffer_size: 32768 + batch_size: 5000 + flush_interval: 5 + queue: + interval: 2 + ``` + + + +:::note +The worker count will be automatically capped at the maximum number of physical cores available on the system. +::: + +### Framing + +For a TCP server with custom message framing, use a custom frame delimiter, connection limits, and an idle timeout: + + + + TCP server with custom message framing... + + + ```yaml + devices: + - id: 4 + name: tcp_syslog + type: syslog + properties: + protocol: tcp + port: 1514 + framing: delimiter + line_delimiter: "\r\n" + max_connections: 5000 + timeout: 60 + ``` + + + +:::warning +When using TCP with delimiter framing, ensure the `line_delimiter` matches the client side. +::: + +### Security + +Security can be enhanced using TLS encryption, multiple forward destinations, and mixed protocols: + + + + Securing the server forwarding the messages... + + + ```yaml + devices: + - id: 5 + name: secure_syslog + type: syslog + properties: + protocol: tcp + port: 6514 + tls: + status: true + cert_name: cert.pem + key_name: key.pem + forwarding: + - address: "10.0.0.1" + port: 514 + type: udp + - address: "10.0.0.2" + port: 6514 + type: tcp + ``` + + + +### Forwarding + +For message forwarding use network devices with single syslog output. This can also be implemented on legacy systems that need multiple destinations. The messages can be fanned out to different analysis tools. + +:::note +Forwarding acts as a message replicator, sending exact copies of incoming messages to all configured destinations unmodified. This is particularly useful for network devices that can only send syslog data to a single destination. +::: + +The messages are forwarded exactly as received to the UDP server on port `514`, and the TCP server on ports `1514` and `6514`. + + + + Forwarding incoming messages to multiple destinations... + + + ```yaml + devices: + - id: 6 + name: forwarder_syslog + type: syslog + properties: + protocol: udp + port: 514 + forwarding: + - address: "10.0.0.50" + port: 514 + type: udp + - address: "10.0.0.51" + port: 1514 + type: tcp + - address: "syslog.example.com" + port: 6514 + type: tcp + ``` + + + +:::warning +When using TCP forwarding, ensure the destination servers can handle the connection load as each connection is persistent. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/tcp.mdx b/versioned_docs/version-1.5.0/configuration/devices/tcp.mdx new file mode 100644 index 00000000..4df08711 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/tcp.mdx @@ -0,0 +1,210 @@ +# TCP + +Push + +## Synopsis + +Creates a server that accepts network messages over TCP connections. Supports both plain and TLS-encrypted connections, with configurable framing modes, connection management, and buffering options. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: tcp + tags: + pipelines: + status: + properties: + protocol: + address: + port: + framing: + line_delimiter: + max_connections: + timeout: + max_message_size: + reuse: + workers: + buffer_size: + batch_size: + queue: + interval: + tls: + status: + cert_name: + key_name: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `tcp`| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`protocol`|N|`"tcp"`|Transport protocol (must be tcp)| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|Y||Listen port| + +### TCP + +|Field|Required|Default|Description| +|---|---|---|---| +|`framing`|N|`"delimiter"`|Framing mode (`delimiter` or `octet`)| +|`line_delimiter`|N|`"\n"`|Line separator for delimiter framing| +|`max_connections`|N|`10000`|Maximum concurrent connections| +|`timeout`|N|`300`|Connection timeout in seconds| +|`max_message_size`|N|`20971520`|Maximum message size in bytes (20MB)| + +:::warning +When using delimiter framing, ensure that the `line_delimiter` matches the client's to prevent message parsing errors. +::: + +### TLS + +|Field|Required|Default|Description| +|---|---|---|---| +|`tls.status`|N|`false`|Enable TLS encryption| +|`tls.cert_name`|Y||TLS certificate file path (required if TLS enabled)| +|`tls.key_name`|Y||TLS private key file path (required if TLS enabled)| + +:::note +The TLS certificate and key files must be placed in the service root directory. +::: + +## Advanced Configuration + +To enhance performance and achieve better message handling, the following settings are used. + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable socket address reuse| +|`workers`|N|``|Number of worker processes when reuse is enabled| +|`buffer_size`|N|`1048576`|Network read buffer size in bytes (1MB)| + +### Messages + +|Field|Required|Default|Description| +|---|---|---|---| +|`batch_size`|N|`1000`|Number of messages to batch before processing| +|`queue.interval`|N|`1`|Queue processing interval in seconds| + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic server can be easily created using `"tcp"` for `protocol`, `"0.0.0.0"` for `address`, and the default framing and timeout settings. + + + + Creating a simple TCP server... + + + ```yaml + devices: + - id: 1 + name: basic_tcp + type: tcp + properties: + port: 514 + ``` + + + +### High-Volume + +Performance can be enhanced using multiple workers, a larger buffer size (e.g. **4MB**), a higher connection limit, and optimized batches. + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 2 + name: performant_tcp + type: tcp + properties: + port: 514 + reuse: true + workers: 4 + buffer_size: 4194304 + max_connections: 20000 + batch_size: 5000 + queue: + interval: 2 + ``` + + + +:::note +The worker count is automatically capped at the number of physical cores available on the system. +::: + +### Framing + +A custom framing can be achieved using CRLF (i.e. `"\r\n"`) as the message delimiter, a **5MB** message size limit, and **1min** connection timeout. + + + + TCP server with custom message framing... + + + ```yaml + devices: + - id: 3 + name: framed_tcp + type: tcp + properties: + port: 1514 + framing: delimiter + line_delimiter: "\r\n" + max_message_size: 5242880 + timeout: 60 + ``` + + + +### Encryption + +Security can be enhanced using TLS encryption, a custom certificate and key, connection limits, and an extended timeout the TLS handshake. + + + + Securing TCP server with TLS encryption... + + + ```yaml + devices: + - id: 4 + name: secure_tcp + type: tcp + properties: + port: 6514 + tls: + status: true + cert_name: cert.pem + key_name: key.pem + max_connections: 5000 + timeout: 120 + ``` + + diff --git a/versioned_docs/version-1.5.0/configuration/devices/tftp.mdx b/versioned_docs/version-1.5.0/configuration/devices/tftp.mdx new file mode 100644 index 00000000..76738074 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/tftp.mdx @@ -0,0 +1,224 @@ +# TFTP + +Pull + +## Synopsis + +Creates a server that accepts file uploads using the TFTP protocol. Supports IP-based device mapping, multiple workers, and automatic file content processing. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: tftp + tags: + pipelines: + status: + properties: + address: + port: + reuse: + workers: + buffer_size: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `tftp`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|Y||Listen port (typically 69)| + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`false`|Enable multi-worker mode| +|`workers`|N|`4`|Number of worker processes when reuse enabled| +|`buffer_size`|N|`9000`|Read buffer size in bytes| + +:::warning +TFTP protocol does **not** support authentication. Deploy only on _trusted_ networks. +::: + +## Key Features + +The following are unique features that **Director** offers. + +### IP-Based Device Mapping + +The server supports automatic device mapping based on client IP addresses: it maps incoming client IPs to device IDs, and automatically associates uploads with devices. It also supports dynamic device discovery and maintains an IP-to-device mapping cache. + +### Files + +The server processes uploaded files by reading the file contents into memory, recording upload metadata (filename, timestamp, etc.), capturing the client information, and converting the content to consumable format. + +### Multiple Workers + +When `reuse` is enabled, the server uses multiple workers which maintain their own TFTP listeners and process files independently so that the files are automatically distributed. + +:::note +The worker count will be capped at the number of available CPU cores. +::: + +### Upload + +The device upload operations observe the following procedure: + +* The client initiates a TFTP upload + +* The server accepts the connection + +* The client transfers the file + +* Finally, the server maps the client IP to the device, reads the file contents, applies the pipelines, and saves the processed data to a store. + +:::tip +Configure clients to use binary mode for file transfers to avoid data corruption. +::: + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic server can be created easily: + + + + Creating a simple TFTP server... + + + ```yaml + devices: + - id: 1 + name: basic_tftp + type: tftp + properties: + address: "0.0.0.0" + port: 69 + ``` + + + +### Device Mapping + +Devices can be automatically captured: + + + + Configuring IP-based device mapping... + + + ```yaml + devices: + - id: 2 + name: mapped_tftp + type: tftp + properties: + address: "0.0.0.0" + port: 69 + reuse: false + buffer_size: 16384 + ``` + + + +### Multiple Devices + +Uploads from multiple devices can be handled easily: + + + + Configuring server for multiple devices... + + + ```yaml + devices: + - id: 4 + name: multi_device_tftp + type: tftp + properties: + address: "0.0.0.0" + port: 69 + reuse: true + workers: 2 + ``` + + + +:::warning +Ensure all devices are configured to use the same TFTP server port. +::: + +### High-Volume + +Performance can be enhanced for high file upload volumes: + + + + Optimizing for high volumes... + + + ```yaml + devices: + - id: 3 + name: performant_tftp + type: tftp + properties: + address: "0.0.0.0" + port: 69 + reuse: true + workers: 4 + buffer_size: 32768 + ``` + + + +### Pipelines + +File contents can be handled according to requirements: + + + + Applying custom processing to uploaded files... + + + ```yaml + devices: + - id: 5 + name: pipeline_tftp + type: tftp + pipelines: + - config_parser + - field_extractor + properties: + address: "0.0.0.0" + port: 69 + buffer_size: 16384 + ``` + + + +:::note +Pipelines are processed sequentially, and can modify or drop content before ingestion. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/udp.mdx b/versioned_docs/version-1.5.0/configuration/devices/udp.mdx new file mode 100644 index 00000000..e21e8169 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/udp.mdx @@ -0,0 +1,155 @@ +# UDP + +Push + +## Synopsis + +Creates a server that accepts network messages over UDP connections. Supports High-Volume message ingestion with configurable workers and buffering options. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: udp + tags: + pipelines: + status: + properties: + address: + port: + reuse: + workers: + buffer_size: + batch_size: + queue: + interval: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `udp`| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|N|`"0.0.0.0"`|Listen address| +|`port`|Y||Listen port| + +## Advanced Configuration + +To enhance performance and achieve better event handling, the following settings are used. + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`reuse`|N|`true`|Enable socket address reuse| +|`workers`|N|``|Number of worker processes when reuse enabled| +|`buffer_size`|N|`9000`|Network read buffer size in bytes| + +### Messages + +|Field|Required|Default|Description| +|---|---|---|---| +|`batch_size`|N|`1000`|Number of messages to batch before processing| +|`queue.interval`|N|`1`|Queue processing interval in seconds| + +## Examples + +The following are commonly used configuration types. + +### Basic + +A basic server can be easily set up using the defaults for address, buffer size, and a single worker process. + + + + Creating a simple UDP server... + + + ```yaml + devices: + - id: 1 + name: basic_udp + type: udp + properties: + port: 514 + ``` + + + +### High-Volume + +Performance can be enhanced using multiple workersi a larger buffer size, a larger batch size, and a longer queue interval. + + + + Optimizing for high message volumes... + + + ```yaml + devices: + - id: 2 + name: performant_udp + type: udp + properties: + address: "0.0.0.0" + port: 514 + reuse: true + workers: 4 + buffer_size: 32768 + batch_size: 5000 + queue: + interval: 2 + ``` + + + +:::note +The worker count will be automatically capped at the number of available cores. +::: + +### Multiple Ports + +Multiple UDP servers can be used with different ports, separate workers, and independent message queues. + + + + Listening on multiple ports using separate configurations... + + + ```yaml + devices: + - id: 3 + name: udp_server_1 + type: udp + properties: + port: 514 + workers: 2 + + - id: 4 + name: udp_server_2 + type: udp + properties: + port: 515 + workers: 2 + ``` + + + +:::note +When running multiple UDP servers, ensure that each configuration has a unique port number, and consider the total number of workers across all instances relative to the available system resources. +::: diff --git a/versioned_docs/version-1.5.0/configuration/devices/windows.mdx b/versioned_docs/version-1.5.0/configuration/devices/windows.mdx new file mode 100644 index 00000000..77652535 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/devices/windows.mdx @@ -0,0 +1,106 @@ +# Windows + +Agentless + +## Synopsis + +Creates a device collector that connects to Windows servers to gather system metrics, performance counters, and event logs. Supports both local and domain authentication methods, with configurable connection parameters and debug options. + +## Schema + +```yaml {1,2,4} +- id: + name: + description: + type: windows + tags: + pipelines: + status: + properties: + address: + port: + username: + password: + domain: + timeout: +``` + +## Configuration + +The following fields are used to define the device: + +### Device + +|Field|Required|Default|Description| +|---|---|---|---| +|`id`|Y||Unique identifier| +|`name`|Y||Device name| +|`description`|N|-|Optional description| +|`type`|Y||Must be `windows`| +|`tags`|N|-|Optional tags| +|`pipelines`|N|-|Optional pre-processor pipelines| +|`status`|N|`true`|Enable/disable the device| + +### Connection + +|Field|Required|Default|Description| +|---|---|---|---| +|`address`|Y||Target server address| +|`port`|N|`5985`|WinRM port number| +|`username`|N||_Active Directory_ username| +|`password`|N||_Active Directory_ password| +|`domain`|N|-|Domain name for domain authentication| + +## Advanced Configuration + +To enhance performance and achieve better message handling, the following settings are used. + +### Performance + +|Field|Required|Default|Description| +|---|---|---|---| +|`timeout`|N|`30`|Connection timeout in seconds| + +## Examples + +The following are commonly used configuration types. + +### Local + +Configuration using local _Windows_ authentication: + +```yaml +devices: + - id: 1 + name: windows_server + type: windows + properties: + address: "192.168.1.100" + username: "Administrator" + password: "secret" +``` + +### Domain-Level + +Configuration using domain-level authentication: + +:::warning +The user account must have appropriate permissions to collect performance counters and event logs. For domain authentication, the format should be either "DOMAIN\\username" or "username@domain.local". +::: + +```yaml +devices: + - id: 2 + name: domain_windows + type: windows + properties: + address: "server1.domain.local" + username: "monitor" + password: "P@ssw0rd" + domain: "DOMAIN" + timeout: 60 +``` + +:::note +When multiple collectors are deployed, the server adds a random delay (up to **20** seconds) before initiating the collection to help prevent overwhelming the target server. +::: diff --git a/versioned_docs/version-1.5.0/configuration/directors/cli/agent.mdx b/versioned_docs/version-1.5.0/configuration/directors/cli/agent.mdx new file mode 100644 index 00000000..471f3790 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/directors/cli/agent.mdx @@ -0,0 +1,575 @@ +--- +description: Command-line options +pagination_prev: null +pagination_next: null +sidebar_label: Agent +--- + +# CLI: Agent + +## Overview + +**VirtualMetric Agent™** is a lightweight data collection and synchronization component designed to work in distributed environments. It provides agent-based data collection capabilities, cluster synchronization, and management connectivity for enterprise deployments. The agent can operate independently or as part of a larger VirtualMetric infrastructure. + +## Synopsis + +```bash +vmetric-agent [OPTIONS] +``` + +## Options + +The following command line options are available. + +### General Options + +|Option|Default|Description| +|--:|:--|---| +|`-compare-version`|-|Compare with specified version| +|`-config`|-|Run configuration wizard| +|`-example`|-|Show usage examples| +|`-path`|-|Specify configuration file or directory path| +|`-read`|-|Read configuration (reserved for future use)| +|`-validate`|-|Validate configuration files| +|`-version`|-|Display version information| + +### Mode Options + +|Option|Default|Description| +|--:|:--|---| +|`-console`|-|Run in console mode| +|`-mode`|`agent`|Operating mode (`agent`, `supervisor`, `console`, `update`)| +|`-supervisor`|-|Run in supervisor mode| +|`-update`|-|Run update process| + +### Service Management + +|Option|Default|Description| +|--:|:--|---| +|`-agentless`|-|Run without installing as a service| +|`-background`|-|Run in background mode| +|`-info`|-|Display service information| +|`-service`|-|Service operation (`install`, `uninstall`, `start`, `stop`, `restart`)| +|`-test`|-|Run in test mode| +|`-vpc`|-|Display product code| + +### Console Options + +|Option|Default|Description| +|--:|:---|---| +|`-debug-level`|-|Set debug verbosity level (1-5)| +|`-filter`|-|Filter console output by keywords| + +### Synchronization Options + +|Option|Default|Description| +|--:|:--|---| +|`-cluster-sync`|-|Run cluster synchronization with specified interval| +|`-manager-sync`|-|Run manager synchronization with specified interval| + +### Control Options + +|Option|Default|Description| +|--:|:---|---| +|`-stop`|`false`|Stop running agent process| + +## Operating Modes + +### Agent Mode (Default) + +The primary operating mode for data collection and processing. + +* Basic Usage: + + ```bash + vmetric-agent + ``` + +* Run as Service: + + ```bash + vmetric-agent -service=install + vmetric-agent -service=start + ``` + +* Run in Background: + + ```bash + vmetric-agent -background + ``` + +* Agentless Mode: + + ```bash + vmetric-agent -agentless + ``` + +### Supervisor Mode + +Manages and monitors other VirtualMetric components. + +* Basic Usage: + + ```bash + vmetric-agent -mode=supervisor + ``` + +* Supervisor as Service: + + ```bash + vmetric-agent -mode=supervisor -service=install + vmetric-agent -mode=supervisor -service=start + ``` + +### Console Mode + +Interactive mode for monitoring and debugging. + +* Basic Console: + + ```bash + vmetric-agent -mode=console + ``` + +* Filtered Console Output: + + ```bash + vmetric-agent -mode=console -filter="error" + ``` + +* Debug Level Console: + + ```bash + vmetric-agent -mode=console -debug-level=3 + ``` + +### Update Mode + +Handles software updates and maintenance. + +* Run Update: + + ```bash + vmetric-agent -mode=update + ``` + +## Service Management + +### Installation and Setup + +* Install Agent Service: + + ```bash + vmetric-agent -service=install + ``` + +* Install with Custom Path: + + ```bash + vmetric-agent -path=/Agent/config -service=install + ``` + +* Uninstall Service: + + ```bash + vmetric-agent -service=uninstall + ``` + +### Service Control + +* Start Service: + + ```bash + vmetric-agent -service=start + ``` + +* Stop Service: + + ```bash + vmetric-agent -service=stop + ``` + +* Restart Service: + + ```bash + vmetric-agent -service=restart + ``` + +* Check Service Status: + + ```bash + vmetric-agent -info + ``` + +### Process Management + +* Stop Running Agent: + + ```bash + vmetric-agent -stop + ``` + +## Configuration Management + +### Configuration Wizard + +* Run Configuration Setup: + + ```bash + vmetric-agent -config=setup + ``` + +* Configure with Template: + + ```bash + vmetric-agent -config=template-name + ``` + +### Configuration Validation + +* Validate Configuration Files: + + ```bash + vmetric-agent -validate + ``` + +* Validate Specific Path: + + ```bash + vmetric-agent -path=/path/to/config -validate + ``` + +## Synchronization Features + +### Cluster Synchronization + +Synchronizes data and state across cluster nodes. + +* Run Cluster Sync (30-second intervals): + + ```bash + vmetric-agent -cluster-sync=30 + ``` + +* Run Cluster Sync (5-minute intervals): + + ```bash + vmetric-agent -cluster-sync=300 + ``` + +### Manager Synchronization + +Synchronizes with central management systems. + +* Run Manager Sync (1-minute intervals): + + ```bash + vmetric-agent -manager-sync=60 + ``` + +* Run Manager Sync (10-minute intervals): + + ```bash + vmetric-agent -manager-sync=600 + ``` + +## Monitoring and Debugging + +### Console Monitoring + +* Real-time Log Monitoring: + + ```bash + vmetric-agent -mode=console + ``` + +* Filter Specific Events: + + ```bash + vmetric-agent -mode=console -filter="agent" + ``` + +* High-Detail Debug Monitoring: + + ```bash + vmetric-agent -mode=console -debug-level=5 + ``` + +### Service Information + +* Display Service Details: + + ```bash + vmetric-agent -info + ``` + +* Get Product Code: + + ```bash + vmetric-agent -vpc + ``` + +## Common Usage Patterns + +### Basic Deployment + +* Standard Agent Installation: + + Install and start the agent service + + ```bash + vmetric-agent -service=install + vmetric-agent -service=start + ``` + + Verify installation + + ```bash + vmetric-agent -info + ``` + +### Development and Testing + +* Test Configuration: + + ```bash + vmetric-agent -validate -path=./test-config.yml + ``` + +* Run in Test Mode: + + ```bash + vmetric-agent -test -agentless + ``` + +* Debug with Console: + + ```bash + vmetric-agent -mode=console -debug-level=4 -filter="test" + ``` + +### Distributed Environment Setup + +* Primary Agent Node: + + ```bash + vmetric-agent -service=install + vmetric-agent -service=start + ``` + +* Secondary Agent with Cluster Sync: + + ```bash + vmetric-agent -agentless -cluster-sync=60 + ``` + +* Supervisor Node: + + ```bash + vmetric-agent -mode=supervisor -service=install + vmetric-agent -mode=supervisor -service=start + ``` + +### Maintenance Operations + +* Update Agent: + + ```bash + vmetric-agent -mode=update + ``` + +* Graceful Shutdown: + + ```bash + vmetric-agent -stop + ``` + +* Service Restart: + + ```bash + vmetric-agent -service=stop + vmetric-agent -service=start + ``` + +## Integration Scenarios + +### With VirtualMetric Director + +* **Agent** with **Director** Coordination: + + Start **Director** first + + ```bash + vmetric-director -service=start + ``` + + Start **Agent** in coordination mode + + ```bash + vmetric-agent -manager-sync=120 + ``` + +### Cluster Configuration + +* Master **Agent** Node: + + ```bash + vmetric-agent -service=install + vmetric-agent -service=start + ``` + +* Worker **Agent** Nodes: + + ```bash + vmetric-agent -agentless -cluster-sync=30 -manager-sync=300 + ``` + +### High Availability Setup + +* Primary **Agent**: + + ```bash + vmetric-agent -service=install + vmetric-agent -service=start + ``` + +* Backup **Agent**: + + ```bash + vmetric-agent -mode=supervisor -service=install + vmetric-agent -mode=supervisor -service=start + ``` + +## Troubleshooting + +### Service Issues + +* Check Service Status: + + ```bash + vmetric-agent -info + ``` + +* Debug Service Problems: + + ```bash + vmetric-agent -mode=console -debug-level=5 + ``` + +* Restart Service: + + ```bash + vmetric-agent -service=stop + vmetric-agent -service=start + ``` + +### Configuration Issues + +* Validate Configuration: + + ```bash + vmetric-agent -validate + ``` + +* Reconfigure **Agent**: + + ```bash + vmetric-agent -config=reset + ``` + +### Connectivity Issues + +* Test Cluster Communication: + + ```bash + vmetric-agent -cluster-sync=10 -agentless + ``` + +* Debug Manager Connection: + + ```bash + vmetric-agent -manager-sync=30 -mode=console -debug-level=3 + ``` + +### Performance Issues + +* Monitor **Agent** Performance: + + ```bash + vmetric-agent -mode=console -filter="performance" + ``` + +* Run Performance Test: + + ```bash + vmetric-agent -test -mode=console + ``` + +## Advanced Configuration + +### Custom Service Names + +When installing with custom configurations: + +```bash +vmetric-agent -path=/custom/config -service=install +``` + +### Background Processing + +* Silent Background Operation: + + ```bash + vmetric-agent -background + ``` + +### Multi-Instance Deployment + +* Instance 1 (Primary): + + ```bash + vmetric-agent -service=install + ``` + +* Instance 2 (Supervisor): + + ```bash + vmetric-agent -mode=supervisor -service=install + ``` + +## Security Considerations + +### Service Permissions + +Ensure the agent service runs with appropriate permissions for: + +- Network access +- File system access +- Registry access (Windows) +- Process management + +### Configuration Security + +- Secure configuration files with appropriate file permissions +- Use encrypted communication for cluster synchronization +- Implement access controls for management interfaces + +## Version Information + +* Check Current Version: + + ```bash + vmetric-agent -version + ``` + +* Compare Versions: + + ```bash + vmetric-agent -compare-version=2.1.0 + ``` + +* Show Usage Examples: + + ```bash + vmetric-agent -example + ``` diff --git a/versioned_docs/version-1.5.0/configuration/directors/cli/director.mdx b/versioned_docs/version-1.5.0/configuration/directors/cli/director.mdx new file mode 100644 index 00000000..289ff384 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/directors/cli/director.mdx @@ -0,0 +1,367 @@ +--- +description: Command-line options +pagination_prev: null +pagination_next: null +sidebar_label: Director +--- + +# CLI: Director + +## Overview + +**VirtualMetric Director™** is the core orchestration and data processing engine of the **DataStream** platform, designed to manage high-volume log ingestion, processing, and routing with enterprise-grade performance and reliability. It serves as the central command center that coordinates devices, pipelines, routes, and targets to create comprehensive data processing workflows. + +**Director** handles everything from simple syslog collection to complex multi-protocol data ingestion scenarios, providing real-time processing capabilities, advanced routing logic, and seamless integration with various storage and analysis systems. Its architecture supports both standalone deployments and distributed enterprise environments with built-in scalability and fault tolerance. + +## Synopsis + +```bash +vmetric-director [OPTIONS] +``` + +## Options + +The following command line options are available. + +### General Options + +|Option|Default|Description| +|--:|:--|---| +|`-compare-version`|-|Compare with specified version| +|`-config`|-|Run configuration wizard| +|`-example`|-|Show configuration examples| +|`-path`|-|Specify configuration file or directory path| +|`-validate`|-|Validate configuration files| +|`-version`|-|Display version information| + +### Operating Modes + +|Option|Default|Description| +|--:|:--|---| +|`-console`|-|Run in console monitoring mode| +|`-generator`|-|Generate test data| +|`-mode`|`director`|Operating mode (`director`, `supervisor`, `console`, `pipeline`, `generator`, `update`)| +|`-pipeline`|-|Execute pipeline processing mode| +|`-sentinel`|-|Run network discovery and monitoring| +|`-snmpwalk`|-|Perform SNMP device discovery| +|`-stats`|-|Display performance statistics| +|`-supervisor`|-|Run in supervisor mode| +|`-update`|-|Run update process| + +### Service Management + +|Option|Default|Description| +|--:|:--|---| +|`-agentless`|-|Run without installing as a service| +|`-background`|-|Run in background mode| +|`-info`|-|Display service information| +|`-service`|-|Service operation (`install`, `uninstall`, `start`, `stop`, `restart`)| +|`-service-name`|-|Custom service name for installation| +|`-vpc`|-|Display product code| + +### Pipeline Operations + +|Option|Default|Description| +|--:|:--|---| +|`-name`|-|Pipeline name for processing| +|`-definition-id`|-|Pipeline definition identifier| +|`-diff`|-|Show differences between actual and expected output| +|`-expected`|-|Expected output file for validation| +|`-export-json`|-|Export pipeline results as JSON| +|`-extended`|-|Show extended processing information| +|`-input`|-|Input data file or JSON string| +|`-output`|-|Output file for processed data| +|`-visualize`|-|Display pipeline processing visualization| + +### Console Monitoring + +|Option|Default|Description| +|--:|:--|---| +|`-debug-level`|-|Set debug verbosity level (1-5)| +|`-filter`|-|Filter console output by keywords| + +### Network Discovery + +|Option|Default|Description| +|--:|:--|---| +|`-autodiscovery`|-|Enable automatic network discovery| +|`-snmp-community`|-|SNMP community string| +|`-snmp-device-id`|-|SNMP device identifier| +|`-snmp-host`|-|Target SNMP host address| +|`-snmp-oid`|`.1.3`|Starting OID for SNMP walk| +|`-snmp-port`|`161`|SNMP port number| +|`-snmp-version`|`2c`|SNMP version (1, 2c, 3)| + +### Data Generation + +|Option|Default|Description| +|--:|:--|---| +|`-address`|`127.0.0.1:514`|Target address for generated data| +|`-count`|`1000`|Number of messages to generate| +|`-duration`|`300`|Total generation duration (seconds)| +|`-file-path`|-|File path for file-based operations| +|`-interval`|`1`|Interval between messages (seconds)| +|`-message`|`VirtualMetric Test Message`|Message content| +|`-now`|-|Generate messages immediately| +|`-protocol`|`syslog`|Protocol for data generation| +|`-severity`|`Error`|Syslog severity level| + +### Utility Options + +|Option|Default|Description| +|--:|:--|---| +|`-clear`|-|Clear statistics (used with -stats)| +|`-pfx2pem`|-|Convert PFX certificate to PEM format| +|`-stats`|-|Display performance statistics| + +## Common Uses + +The following are the valid switch combinations for the indicated tasks: + +### General Tasks + +- Display _version_ information: + + ``` + vmetric-director -version + ``` + +- Show _example_ configuration: + + ``` + vmetric-director -example + ``` + +- Run configuration tool: + + ``` + vmetric-director -config [config_file_path] + ``` + +- Compare _version_: + + ``` + vmetric-director -compare-version [version_string] + ``` + +- Convert _PFX_ certificate to _PEM_: + + ``` + vmetric-director -pfx2pem [certificate_path] + ``` + +- Validate configuration: + + ``` + vmetric-director -validate + ``` +### Service Management + +- Install service: + + ``` + vmetric-director -service install [-service-name custom_name] [-path installation_path] + ``` + +- Uninstall service: + + ``` + vmetric-director -service uninstall [-service-name custom_name] + ``` + +- Start service: + + ``` + vmetric-director -service start [-service-name custom_name] + ``` + +- Stop service: + + ``` + vmetric-director -service stop [-service-name custom_name] + ``` + +- Restart service: + + ``` + vmetric-director -service restart [-service-name custom_name] + ``` + +- Run in background (without service): + + ``` + vmetric-director -background [-mode director|supervisor] + ``` + +- Run in _agentless_ mode: + + ``` + vmetric-director -agentless [-mode director|supervisor] + ``` + +- Show service information: + + ``` + vmetric-director -info [-mode director|supervisor] + ``` + +- Show _product code_: + + ``` + vmetric-director -vpc [-mode director|supervisor] + ``` + +- Stop running process: + + ``` + vmetric-director -stop [-mode director|supervisor] + ``` + +### Operational Modes + +- Console mode: + + ``` + vmetric-director -mode console [-filter filter_string] [-debug-level level] + ``` + + -or- + + ``` + vmetric-director -console [-filter filter_string] [-debug-level level] + ``` + +- Pipeline mode: + + ``` + vmetric-director -mode pipeline -name [pipeline_name] [-path path] [-definition-id id] [-input input_file] [-output output_file] [-expected expected_file] [-filter filter] [-diff] [-validate] [-visualize] [-extended] [-export-json] + ``` + + -or- + + ``` + vmetric-director -pipeline -name [pipeline_name] [-path path] [-definition-id id] [-input input_file] [-output output_file] [-expected expected_file] [-filter filter] [-diff] [-validate] [-visualize] [-extended] [-export-json] + ``` + +- Generator mode: + ``` + vmetric-director -mode generator [-protocol syslog|tcp|http|netflow|vmf] [-address target_address] [-severity level] [-message message_text] [-count message_count] [-interval seconds] [-duration seconds] [-now] [-file-path file_path] + ``` + + -or- + + ``` + vmetric-director -generator [-protocol syslog|tcp|http|netflow|vmf] [-address target_address] [-severity level] [-message message_text] [-count message_count] [-interval seconds] [-duration seconds] [-now] [-file-path file_path] + ``` + +- Sentinel mode: + + ``` + vmetric-director -mode sentinel [-autodiscovery] + ``` + + -or- + + ``` + vmetric-director -sentinel [-autodiscovery] + ``` + +- SNMPWalk mode: + + ``` + vmetric-director -mode snmpwalk -snmp-device-id [device_id] -snmp-host [host] [-snmp-port port] [-snmp-version 1|2c|3] [-snmp-community community_string] [-snmp-oid oid] + ``` + + -or- + + ``` + vmetric-director -snmpwalk -snmp-device-id [device_id] -snmp-host [host] [-snmp-port port] [-snmp-version 1|2c|3] [-snmp-community community_string] [-snmp-oid oid] + ``` + +- Update mode: + + ``` + vmetric-director -mode update + ``` + + -or- + + ``` + vmetric-director -update + ``` + +- Supervisor mode: + + ``` + vmetric-director -mode supervisor + ``` + + -or- + + ``` + vmetric-director -supervisor + ``` + +- Statistics mode: + + ``` + vmetric-director -mode stats [-clear] + ``` + + -or- + + ``` + vmetric-director -stats [-clear] + ``` + +- **Director** mode (default): + + ``` + vmetric-director + ``` + + -or- + + ``` + vmetric-director -mode director + ``` + + -or- + + ``` + vmetric-director -director + ``` + +## Protocol-Specific Generator Tasks + +The following can be used to simulate various log streaming protocols: + +- _Syslog_ generator: + + ``` + vmetric-director -mode generator -protocol syslog -address 127.0.0.1:514 -severity Error -message "Test Message" -count 1000 -interval 1 -duration 300 + ``` + +- _TCP_ generator: + + ``` + vmetric-director -mode generator -protocol tcp -address 127.0.0.1:9000 -message "Test Message" -count 1000 -interval 1 -duration 300 + ``` + +- _HTTP_ generator: + + ``` + vmetric-director -mode generator -protocol http -address http://127.0.0.1:8080/logs -message "Test Message" -count 1000 -interval 1 -duration 300 + ``` + +- _Netflow_ generator: + + ``` + vmetric-director -mode generator -protocol netflow -address 127.0.0.1:2055 -count 1000 -interval 1 -duration 300 + ``` + +- _VMF_ generator: + + ``` + vmetric-director -mode generator -protocol vmf -file-path input.json -message "Test Message" -count 1000 -interval 1 -duration 300 + ``` diff --git a/versioned_docs/version-1.5.0/configuration/directors/configuration.mdx b/versioned_docs/version-1.5.0/configuration/directors/configuration.mdx new file mode 100644 index 00000000..bc9e0c4e --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/directors/configuration.mdx @@ -0,0 +1,315 @@ +# Configuration + +The Directors configuration interface provides comprehensive tools for creating, managing, and monitoring Director instances throughout their lifecycle. This centralized management system allows administrators to deploy new Directors, modify existing configurations, and maintain operational oversight across your entire Director fleet. + +## Accessing Director Configuration + +The Directors configuration interface is accessible through the Fleet Management section: + +**Navigation Path:** +- Go to **Home** page +- Find the **Fleet Management** pane +- Click the **Directors** link + +This takes you to the main Directors dashboard where you can view existing Directors and initiate new deployments. + +## Directors Management Dashboard + +The main interface provides a comprehensive view of your Director infrastructure with essential monitoring and management capabilities. + +### Directors Table + +The central table displays all Director instances with key operational information: + +- **Name** - Unique identifier assigned during Director creation +- **Platform Type** - Management model (Self-managed by default) +- **Installation Type** - Architecture type (Standalone by default) +- **Mode** - Managed or Self-managed (Managed by default) +- **Status** - Current operational state (Enabled/Disabled) +- **Connection Status** - Real-time connectivity to cloud platform + - *Connected* (green) - Director successfully communicating with cloud platform + - *Not Connected* (red) - Director unable to establish or maintain connection + +### Interface Controls + +**Search and Filtering:** +- **Search directors** - Text field to quickly locate Directors by name +- **Status filter** - Dropdown to filter by Director status (All/Enabled/Disabled) +- **Pagination controls** - Navigate through multiple pages of Directors + +**Management Actions:** +- **Create director** - Primary button to initiate new Director deployment +- **Actions menu (⋮)** - Per-Director operations available from dropdown + +### Actions Menu Operations + +Each Director row includes a vertical ellipsis (⋮) providing access to management operations: + +- **Edit director** - Access Director configuration and settings +- **Download config** - Appears only on rows for _Self-managed_ directors. Download Director configuration +- **Enable/Disable director** - Toggle operational status +- **Delete director** - Remove Director from fleet (with dependency checks) + +## Creating New Directors + +The Director creation process guides you through deployment setup and installation verification. + +### Initial Setup + +1. **Access Creation Interface** + - Click the **Create director** button from the main Directors dashboard + - This opens the **Director Setup** configuration area + +2. **Configure Director Properties** + - **Name**: Enter a unique identifier for the Director + - Must be unique across your organization + - Cannot be changed after creation + - Validation prevents duplicate names + + - **Installation Type**: Select deployment architecture + - **Standalone** (default): Single Director instance + - Provides basic functionality without redundancy + - Suitable for most standard deployments + - **Self-managed** Local deployment. + - Suitable for users that do not want to send private data to the cloud. + - Configuration file must be downloaded + +### Installation Process + +3. **Generate Installation Scripts** + - After configuration, click **Create director** + - System creates Director record and generates unique API key + - Platform-specific installation scripts are provided: + - **PowerShell** button (Windows environments) + - **Bash** button (Linux/Unix environments) + +4. **Execute Installation** + - Copy the generated API key (displayed in dedicated box) + - Open terminal with administrator privileges on target system + - Copy and paste the installation script + - Execute script and wait for completion (typically several minutes) + +### Network Requirements + +**Critical Configuration:** +- Ensure port **443** is open for outbound HTTPS connections +- Director requires access to VirtualMetric service endpoints +- If operating in restricted network environment, allow documented domains/IP ranges + +**Firewall Configuration:** +- Only outbound connections required (no inbound firewall rules needed) +- Director initiates all communication with cloud platform +- Encrypted HTTPS for all cloud platform interactions + +### Connection Verification + +5. **Verify Deployment** + - Click **Verify connection** button to test Director connectivity + - System sends requests to confirm Director installation and operation + - Connection verification may take several attempts during initial setup + +6. **Handle Verification Results** + - **Successful Connection**: Continue to completion process + - **Connection Failure**: Review network configuration and troubleshooting steps + - **Deferred Verification**: Use **Connect Later** to skip verification + +7. **Complete Setup** + - Once connection is verified, **Complete setup** button becomes enabled + - Click to finalize Director creation and return to Directors dashboard + - New Director appears in table with *Enabled* and *Connected* status + - Success notification confirms Director creation and connectivity + +## Managing Existing Directors + +The Director management system provides comprehensive tools for ongoing configuration and monitoring. + +### Accessing Director Management + +**From Directors Table:** +- Click **Actions** menu (⋮) for specific Director +- Select **Edit director** from dropdown options + +This provides access to detailed Director configuration and monitoring interfaces. + +### Director Details Configuration + +The Director management interface includes multiple sections for comprehensive oversight: + +#### Basic Configuration Management + +**Editing Director Details:** +1. **Access Configuration** + - Navigate to **Director Details** view from Actions menu + - All management options available via **Actions** dropdown (top right) + +2. **Modify Director Settings** + - Click **Manage director details** to enable editing + - **Name field** becomes editable (primary configurable setting) + - **Cancel** and **Save changes** buttons appear for modification control + +3. **Apply Changes** + - Make necessary modifications to Director name + - Click **Save changes** to apply updates + - Changes take effect immediately across the platform + +#### Connection Details Management + +**Monitoring Connectivity:** +- **Connection Details** tab provides real-time connectivity information +- **Connection Status** indicator shows current state: + - *Not connected* (red) - Director unable to communicate with platform + - *Connected* (green) - Normal operational connectivity +- **IP Address** display shows Director's network location +- **Connection History** provides historical connectivity tracking + +**Reinstallation Procedures:** +When Director connectivity or configuration issues require reinstallation: + +1. **Initiate Reinstallation** + - Click **Re-install director** button in Connection Details + - System presents confirmation modal with security warning + - **Important**: This action revokes current API key and generates new one + +2. **Confirm Reinstallation** + - Review warning about API key revocation + - Click **Continue** to proceed with reinstallation process + - New API key and installation script are generated + +3. **Execute New Installation** + - Follow same installation procedures as initial Director setup + - Use new API key and installation script provided + - Verify successful connection and operation + +#### Activity Monitoring + +**Comprehensive Audit Trail:** +The **Activity logs** tab maintains detailed operational history: + +**Activity Information:** +- **User** - Email address of administrator who performed action +- **User IP** - Source IP address for the administrative action +- **Object Type** - Resource type being managed (typically "Director") +- **Object** - Specific Director identifier (using unique name) +- **Action** - Type of operation performed on Director +- **Action Description** - Detailed explanation of the activity +- **Date** - Timestamp when action was executed + +**Use Cases for Activity Monitoring:** +- Security auditing and compliance reporting +- Troubleshooting operational issues +- Change management and approval workflows +- Performance analysis and optimization planning + +## Director Status Management + +Directors can be enabled or disabled to control their operational state without deletion. + +### Enabling and Disabling Directors + +**Status Toggle Operations:** +1. **Access Status Controls** + - From Directors table: Click **Actions** menu (⋮) for target Director + - Select **Enable director** or **Disable director** from dropdown + +2. **Status Change Effects** + - **Disabling Director**: Halts all data ingestion and processing + - Stops data collection from all configured sources + - Prevents data routing to all target destinations + - Maintains Director configuration and connection to cloud platform + - **Enabling Director**: Resumes normal data processing operations + - Restores data ingestion from configured sources + - Re-establishes data routing to target destinations + - Returns Director to full operational status + +### Status Use Cases + +**Temporary Suspension:** +- Maintenance windows for source or destination systems +- Troubleshooting data quality or processing issues +- Resource conservation during low-activity periods + +**Operational Control:** +- Staged deployment and testing procedures +- Capacity management during peak processing periods +- Emergency response for security or compliance incidents + +## Director Deletion + +Director deletion provides permanent removal of Director instances with comprehensive dependency checking. + +### Deletion Process + +1. **Initiate Deletion** + - From Directors table: Click **Actions** menu (⋮) for target Director + - Select **Delete director** from dropdown options + +2. **Dependency Verification** + - System checks for attached devices and targets + - If dependencies exist, warning modal displays: + - Lists all components managed by the Director + - Prevents deletion until dependencies are removed + - Provides guidance on required cleanup actions + +3. **Confirm Deletion** + - For Directors without dependencies, deletion proceeds immediately + - Director is permanently removed from organization + - All associated configuration and history are deleted + - Action cannot be undone + +### Pre-Deletion Requirements + +**Mandatory Cleanup:** +Before Director deletion, you must remove all associated components: + +- **Data Sources**: Disconnect all devices sending data to Director +- **Target Destinations**: Remove all routing configurations to external systems +- **Processing Pipelines**: Clear all active data transformation configurations +- **Integration Points**: Update any systems referencing the Director + +**Verification Steps:** +1. Review warning modal for complete dependency list +2. Systematically remove or reassign each listed component +3. Return to deletion process once all dependencies are cleared +4. Confirm final deletion when system verifies no remaining dependencies + +## Best Practices for Director Configuration + +### Naming Conventions + +**Effective Director Naming:** +- Use descriptive names reflecting deployment purpose or location +- Include environment indicators (prod, dev, test) where applicable +- Consider organizational structure (department, region, function) +- Maintain consistency across Director fleet for easier management + +### Configuration Management + +**Change Control:** +- Document all configuration changes in activity logs +- Test configuration changes in development environment first +- Coordinate changes with dependent system administrators +- Maintain backup procedures for critical configuration data + +### Monitoring and Maintenance + +**Operational Oversight:** +- Regularly review Director status and connectivity +- Monitor activity logs for unusual or unauthorized changes +- Establish alerting for connection failures or status changes +- Plan regular maintenance windows for updates and optimization + +### Security Considerations + +**Access Control:** +- Limit Director management permissions to authorized administrators +- Regularly review and audit administrative access +- Implement approval processes for critical operations (deletion, reinstallation) +- Monitor activity logs for security and compliance requirements + +**API Key Management:** +- Treat API keys as sensitive credentials requiring secure handling +- Rotate API keys regularly or when security incidents occur +- Ensure secure storage and transmission of installation scripts +- Implement procedures for compromised credential response + +This comprehensive configuration management approach ensures reliable, secure, and well-governed Director operations across your security telemetry infrastructure. \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/configuration/directors/deployment.mdx b/versioned_docs/version-1.5.0/configuration/directors/deployment.mdx new file mode 100644 index 00000000..97fb140c --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/directors/deployment.mdx @@ -0,0 +1,292 @@ +# Deployment + +VirtualMetric DataStream Directors support flexible deployment options to match your infrastructure requirements and operational preferences. Whether you're running on physical hardware, virtual machines, or containerized environments, Directors can be deployed to optimize performance while maintaining data sovereignty. + +## Deployment Overview + +Directors are lightweight, containerized services that process security telemetry data locally while connecting to the DataStream cloud platform for configuration management. This architecture ensures your sensitive data remains within your controlled environment while providing centralized management capabilities. + +### Supported Deployment Models + +**Standalone Director** +- Single Director instance handling all data processing +- Recommended for most production deployments +- Simple configuration and management +- Suitable for small to medium-scale environments + +**Clustered Director** *(Coming Soon)* +- Multiple Director instances with load balancing and high availability +- Automatic failover and redundancy +- Horizontal scaling capabilities +- Ideal for mission-critical, high-volume environments + +## Deployment Options + +### Physical Server Deployment + +Deploy Directors directly on dedicated physical hardware for maximum performance and complete infrastructure control. + +**Advantages:** +- Maximum performance and resource allocation +- Complete control over hardware specifications +- No virtualization overhead +- Ideal for high-throughput environments + +**Considerations:** +- Higher infrastructure costs and maintenance overhead +- Limited flexibility for resource scaling +- Longer deployment and provisioning times + +### Virtual Machine Deployment + +Deploy Directors on virtual machines across various hypervisors and cloud platforms for balanced performance and flexibility. + +**Advantages:** +- Flexible resource allocation and scaling +- Cost-effective resource utilization +- Simplified backup and disaster recovery +- Platform agnostic (VMware, Hyper-V, KVM, AWS, Azure, GCP) + +**Considerations:** +- Slight performance overhead from virtualization +- Dependency on hypervisor platform stability + +**Recommended VM Specifications:** + +| Workload Size | CPU Cores | Memory | Storage | Notes | +|---------------|-----------|---------|---------|-------| +| Small | 2-4 cores | 8GB RAM | 50GB | Development/testing, < 10K EPS | +| Medium | 4-8 cores | 16GB RAM | 100GB | Standard production, 10K-50K EPS | +| Large | 8+ cores | 32GB RAM | 200GB+ | High-volume production, > 50K EPS | + +### Container Deployment + +Deploy Directors in containerized environments for modern infrastructure management and orchestration capabilities. + +**Docker Deployment:** +- Single-host container deployment +- Simplified dependency management +- Easy scaling and updates +- Ideal for development and small production environments + +**Kubernetes Deployment:** +- Multi-node orchestration with automatic scaling +- Built-in service discovery and load balancing +- Rolling updates with zero downtime +- Enterprise-grade high availability and resilience + +**Container Advantages:** +- Consistent deployment across environments +- Rapid scaling and resource optimization +- Integrated monitoring and logging capabilities +- DevOps-friendly CI/CD integration + +## Platform-Specific Considerations + +### Linux Director Deployment + +**Performance Benefits:** +- Network-based collectors (syslog, TCP, SNMP) operate more efficiently +- Lower resource overhead for network processing +- Superior performance for high-volume data ingestion +- Native support for Unix/Linux system integration + +**Agent Connectivity:** +- **Windows Agent**: Full support for Windows systems via VirtualMetric Agent +- **Linux Agentless**: Complete support via SSH-based connections +- **Windows Agentless**: Not supported (Microsoft deprecated WinRM support) + +**Recommended For:** +- High-volume network data collection environments +- Mixed Windows/Linux infrastructure with agent-based monitoring +- Cost-sensitive deployments requiring maximum efficiency + +### Windows Director Deployment + +**Agentless Connectivity:** +- **Windows Agentless**: Full support via WinRM protocols +- **Linux Agentless**: Complete support via SSH connections +- **Universal Agent**: Support for both Windows and Linux systems + +**Integration Benefits:** +- Native Windows service integration +- Active Directory authentication support +- PowerShell-based management capabilities +- Seamless Windows ecosystem integration + +**Recommended For:** +- Windows-centric environments +- Organizations requiring agentless Windows monitoring +- Environments with existing Windows management infrastructure + +## Installation Process + +### Standalone Director Installation + +The standard installation process follows a guided setup through the DataStream web interface: + +1. **Access Director Creation** + - Navigate to Home > Fleet Management > Directors + - Click "Create director" to begin setup process + +2. **Configure Director Properties** + - Assign unique Director name for identification + - Select "Standalone" installation type + - Choose appropriate platform + + A self-managed director is indicated under the **Mode** column as _Self-managed_, with a warning icon to its right. Hovering over the icon displays a tooltip, informing the user that the xonfiguration has changed and that the current one has to be deployed. + + :::info + The actions menu of a self-managed director contains a _Download config_ option. Clicking it downloads the `vmetric.vmf` file to the **Downloads** directory of **Windows**. This file should be placed under the `\Director\config` directory. + ::: + + This option removes the access verification step. The user can monitor errors through the CLI or the files under the `\Director\storage\logs` directory. + +3. **Generate Installation Scripts** + - System generates platform-specific installation scripts + - Unique API key created for secure cloud connectivity + - Scripts provided for both PowerShell (Windows) and Bash (Linux) + +4. **Execute Installation** + - Run provided script with administrative privileges on target system + - Installation downloads and configures Director service + - Automatic service registration and startup configuration + +5. **Verify Connectivity** + - Use built-in connection verification tool + - Confirm Director successfully connects to DataStream platform + - Complete setup process once connectivity is established + +### Network Requirements + +**Outbound Connectivity:** +- Port 443 (HTTPS) for DataStream cloud platform communication +- DNS resolution for *.virtualmetric.com domains +- Certificate validation requires accurate system time + +**Inbound Connectivity:** +- Configure based on data source requirements +- Common ports: 514 (Syslog), 1514 (Secure Syslog), 162 (SNMP) +- Custom ports as defined in device configurations + +### Firewall Configuration + +**Outbound Rules (Required):** +- Allow HTTPS (443) to *.virtualmetric.com +- Allow DNS queries for name resolution +- Allow NTP for time synchronization + +**Inbound Rules (As Needed):** +- Open ports for configured data collection protocols +- Allow management access (SSH for Linux, RDP for Windows) +- Configure source restrictions based on security policies + +## Deployment Best Practices + +### Security Considerations + +**Network Security:** +- Deploy Directors in appropriate network segments +- Implement network access controls and monitoring +- Use dedicated service accounts with minimal privileges +- Enable logging and audit trails for security monitoring + +**Data Protection:** +- All sensitive data processing occurs locally on Director +- Only configuration metadata transmitted to cloud platform +- Implement encryption for data at rest and in transit +- Regular security updates and patch management + +### Performance Optimization + +**Resource Allocation:** +- Monitor CPU and memory utilization patterns +- Allocate sufficient disk space for logging and buffering +- Configure appropriate network interface capacity +- Plan for peak load scenarios and growth + +**Data Processing Efficiency:** +- Optimize YAML pipeline configurations for performance +- Implement efficient parsing and transformation rules +- Use appropriate batch sizes for different data sources +- Monitor processing latency and throughput metrics + +### High Availability Planning + +**Backup and Recovery:** +- Regular configuration backups and version control +- Document recovery procedures and test regularly +- Implement monitoring and alerting for service health +- Plan for disaster recovery scenarios + +**Redundancy Options:** +- Deploy multiple Directors for critical environments +- Implement load balancing for high-availability scenarios +- Consider geographic distribution for disaster recovery +- Plan for seamless failover procedures + +## Troubleshooting Deployment Issues + +### Common Installation Problems + +**Script Execution Failures:** +- Verify administrative privileges for installation +- Check network connectivity to download servers +- Ensure required dependencies are installed +- Review firewall and proxy configurations + +**Service Startup Issues:** +- Confirm system meets minimum requirements +- Verify proper file permissions and ownership +- Check for port conflicts with existing services +- Review system logs for detailed error messages + +**Connectivity Problems:** +- Validate outbound HTTPS connectivity +- Confirm DNS resolution for required domains +- Check system time synchronization +- Verify API key accuracy and format + +For detailed troubleshooting procedures, refer to the Directors Troubleshooting documentation. + +## Advanced Deployment Scenarios + +### Multi-Site Deployments + +For organizations with multiple locations or data centers: + +- Deploy Directors at each site for local data processing +- Implement centralized configuration management +- Coordinate routing and aggregation strategies +- Plan for inter-site connectivity and failover + +### Compliance and Regulatory Requirements + +For regulated industries requiring specific compliance: + +- Implement appropriate data retention and disposal policies +- Configure audit logging and compliance reporting +- Ensure data sovereignty and jurisdictional requirements +- Plan for regulatory audit and inspection procedures + +### Scalability Planning + +As your environment grows: + +- Monitor resource utilization and performance trends +- Plan for vertical and horizontal scaling options +- Consider migration to clustered deployments +- Implement capacity planning and forecasting procedures + +## Next Steps + +Once you've selected your deployment approach: + +1. **Prepare Infrastructure** - Set up target systems with required specifications +2. **Configure Networking** - Implement firewall rules and connectivity requirements +3. **Install Director** - Follow the guided installation process in the DataStream interface +4. **Configure Data Sources** - Set up devices and data collection points +5. **Test and Validate** - Verify data flow and processing functionality +6. **Monitor Operations** - Implement ongoing monitoring and maintenance procedures + +For specific installation guidance, access the Director Configuration interface through Home > Fleet Management > Directors and follow the step-by-step setup wizard. \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/configuration/directors/introduction.mdx b/versioned_docs/version-1.5.0/configuration/directors/introduction.mdx new file mode 100644 index 00000000..c4bae6cc --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/directors/introduction.mdx @@ -0,0 +1,214 @@ +--- +sidebar_label: Introduction +--- + +# Introduction + +Directors are the core data processing engines within the **DataStream** platform, responsible for collecting, processing, transforming, and routing security telemetry data from various sources to target destinations. They serve as the central orchestration layer that maintains data sovereignty by keeping sensitive information within your environment while providing centralized cloud-based management. + +## What is a Director? + +A **Director** is a lightweight, containerized service that acts as a secure data processing hub in your infrastructure. It connects securely to the DataStream cloud platform for configuration management while ensuring all sensitive security data remains within your controlled environment. + +### Key Capabilities + +**Data Processing Pipeline:** +- Ingests security data from multiple sources (syslog, APIs, files, databases) +- Applies real-time transformation and normalization using YAML-defined pipelines +- Supports multiple security schemas (ASIM, OCSF, ECS, CIM, UDM) +- Routes processed data to various destinations (SIEM platforms, data lakes, security tools) + +**Security and Compliance:** +- Maintains data sovereignty by processing all data locally +- Establishes outbound-only HTTPS connections to cloud management services +- Provides comprehensive audit logging and activity tracking +- Supports enterprise security requirements and compliance frameworks + +**Scalability and Reliability:** +- Horizontal scaling through clustering capabilities +- High availability configurations for mission-critical environments +- Resource-efficient processing with minimal infrastructure requirements +- Automatic failover and load balancing in clustered deployments + +## Platform Management Options + +DataStream provides two distinct management approaches for Directors, each designed for different organizational needs and security requirements: + +### Self-Managed Directors + +**Self-Managed** Directors provide complete control over the deployment and management of your data processing infrastructure. This option is ideal for organizations with specific security requirements or existing infrastructure management processes. + +**Characteristics:** +- Full control over deployment environment and configuration +- Direct management of updates, patches, and maintenance +- Custom security controls and compliance configurations +- Integration with existing infrastructure monitoring and management tools +- Support for air-gapped or restricted network environments + +**Suitable For:** +- Organizations with strict data governance requirements +- Environments with existing container orchestration systems +- Companies requiring custom security configurations +- Regulated industries with specific compliance needs + +### Managed Directors (Enterprise Feature) + +**Managed** Directors offer a fully-managed service where VirtualMetric handles the infrastructure management, monitoring, and maintenance of your Directors while still maintaining data sovereignty. + +**Characteristics:** +- Automated deployment and configuration management +- Proactive monitoring and maintenance by VirtualMetric +- Automatic updates and security patches +- 24/7 support and incident response +- Performance optimization and capacity planning + +**Suitable For:** +- Organizations seeking reduced operational overhead +- Teams without dedicated infrastructure management resources +- Companies prioritizing time-to-value over operational control +- Environments requiring guaranteed SLA and support coverage + +## Installation Types + +Directors support different installation architectures to accommodate various operational requirements and scale needs: + +### Standalone Installation + +**Standalone** is the default installation type, designed for straightforward deployments where a single Director instance handles all data processing needs. + +**Features:** +- Single Director instance per deployment +- Simplified configuration and management +- Resource-efficient for most use cases +- Quick deployment and setup process + +**Limitations:** +- No built-in high availability or load balancing +- Single point of failure for data processing +- Limited horizontal scaling capabilities +- Manual backup and disaster recovery procedures + +**Recommended For:** +- Small to medium-scale deployments +- Development and testing environments +- Organizations with basic availability requirements +- Initial proof-of-concept implementations + +### Clustered Installation (Enterprise Feature) + +**Clustered** installations provide high availability and horizontal scaling capabilities through multiple Director instances working together. + +**Features:** +- Multiple Director instances with automatic load balancing +- Built-in failover and redundancy mechanisms +- Horizontal scaling based on processing demands +- Distributed processing for improved performance +- Shared state management across cluster nodes + +**Benefits:** +- Elimination of single points of failure +- Improved processing capacity and throughput +- Automatic recovery from node failures +- Dynamic scaling based on data volume +- Enhanced monitoring and observability + +**Recommended For:** +- Mission-critical security data processing +- High-volume environments requiring guaranteed availability +- Organizations with strict SLA requirements +- Production deployments requiring enterprise-grade reliability + +## Directors Management Interface + +The Directors interface provides comprehensive tools for monitoring and managing your Director fleet across different environments and deployment types. + +### Directors Dashboard + +The main **Directors** interface offers centralized visibility and control over all Director instances: + +**Directors Table:** +- **Name** - Unique identifier assigned during Director creation +- **Platform Type** - Management model (Self-managed or Managed) +- **Installation Type** - Architecture type (Standalone or Clustered) +- **Status** - Current operational state (Enabled/Disabled) +- **Connection Status** - Real-time connectivity indicator (Connected/Not Connected) + +**Management Controls:** +- **Search directors** - Quick filtering by Director name +- **Status filter** - Filter by operational status +- **Create director** - Initiate new Director deployment +- **Actions menu (⋮)** - Per-Director operations (Edit, Enable/Disable, Delete) + +### Director Operations + +The platform provides comprehensive management capabilities for the complete Director lifecycle: + +**Creating Directors:** +- Guided setup process with name assignment and configuration selection +- Platform-specific installation scripts (PowerShell, Bash) +- Automatic API key generation for secure cloud connectivity +- Connection verification and status monitoring + +**Managing Directors:** +- Real-time status monitoring and health checks +- Configuration updates and deployment management +- Activity logging and audit trail maintenance +- Connection troubleshooting and diagnostic tools + +**Monitoring Directors:** +- Performance metrics and resource utilization tracking +- Data throughput and processing statistics +- Error logging and incident tracking +- Integration status with source systems and destinations + +## Director Architecture and Data Flow + +Directors operate as secure intermediaries between your security data sources and target destinations, implementing a data sovereignty model that keeps sensitive information within your controlled environment. + +### Data Processing Architecture + +**Input Layer:** +- Multiple simultaneous data source connections +- Protocol-agnostic ingestion (Syslog, REST APIs, file monitoring) +- Real-time streaming and batch processing capabilities +- Built-in buffering and queuing for reliability + +**Processing Layer:** +- YAML-defined transformation pipelines +- Multi-schema normalization and enrichment +- Real-time data validation and quality checks +- Custom logic implementation through processors + +**Output Layer:** +- Multi-destination routing and delivery +- Format adaptation for different target systems +- Delivery confirmation and retry mechanisms +- Performance optimization for various endpoint types + +### Security and Connectivity Model + +**Outbound-Only Communication:** +- Directors initiate all cloud platform connections +- No inbound firewall rules required +- Encrypted HTTPS communication for all cloud interactions +- Certificate-based authentication and authorization + +**Data Sovereignty:** +- All security data processing occurs locally +- No sensitive data transmitted to cloud services +- Configuration and metadata-only cloud synchronization +- Complete audit trail for compliance and governance + +## Getting Started with Directors + +To begin using Directors in your environment, follow this general workflow: + +1. **Access Director Management** - Navigate to Home > Fleet Management > Directors +2. **Create New Director** - Use the guided setup to define your Director configuration +3. **Deploy Director Service** - Run the provided installation script in your target environment +4. **Verify Connectivity** - Confirm successful connection to the DataStream cloud platform +5. **Configure Data Sources** - Set up connections to your security data sources +6. **Define Processing Rules** - Create YAML pipelines for data transformation and routing +7. **Monitor Operations** - Use the dashboard to track performance and troubleshoot issues + +The Directors interface provides comprehensive guidance and support throughout this process, with detailed documentation, troubleshooting resources, and expert assistance available for complex deployments. \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/configuration/directors/troubleshooting.mdx b/versioned_docs/version-1.5.0/configuration/directors/troubleshooting.mdx new file mode 100644 index 00000000..941e6511 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/directors/troubleshooting.mdx @@ -0,0 +1,394 @@ +# Troubleshooting + +This guide provides solutions to common Director deployment and operational issues. Issues are organized by category with step-by-step resolution procedures. + +## Connection Issues + +### Director Cannot Connect to Cloud Platform + +**Symptoms:** +- Director status shows as "Not Connected" (red indicator) +- Connection verification fails during setup +- Timeout errors in Director logs + +**Resolution Steps:** + +1. **Validate Network Connectivity** + - Ensure port 443 is open for outbound HTTPS connections + - Verify DNS resolution is working properly + - Test connectivity: `curl -I https://portal.virtualmetric.com` + +2. **Check Firewall Configuration** + - Allow outbound connections to `*.virtualmetric.com` domains + - Ensure no SSL/TLS inspection is blocking certificate validation + - Verify proxy configurations if applicable + +3. **Validate API Key** + - Confirm the API key was copied correctly during installation + - Check for extra spaces or line breaks in the key + - Regenerate API key if corruption is suspected + +4. **Review System Time** + - Ensure system clock is synchronized (certificate validation requirement) + - Use NTP to maintain accurate time synchronization + - Check timezone configuration matches your location + +**Additional Diagnostics:** +```bash +# Test DNS resolution +nslookup portal.virtualmetric.com + +# Check outbound connectivity +telnet portal.virtualmetric.com 443 + +# Verify certificate chain +openssl s_client -connect portal.virtualmetric.com:443 +``` + +### Connection Drops Intermittently + +**Symptoms:** +- Director alternates between Connected and Not Connected states +- Periodic timeout errors in logs +- Data processing interruptions + +**Resolution Steps:** + +1. **Network Stability Check** + - Monitor network latency and packet loss + - Verify network equipment stability + - Check for bandwidth limitations or throttling + +2. **Resource Monitoring** + - Ensure adequate CPU and memory resources + - Monitor disk I/O performance + - Check for resource contention with other services + +3. **Proxy Configuration** + - Verify proxy server stability and configuration + - Check proxy authentication credentials + - Consider bypassing proxy for testing + +## Installation Issues + +### Director Fails to Start + +**Symptoms:** +- Installation script completes but service doesn't start +- Error messages during service startup +- Director not appearing in management console + +**Resolution Steps:** + +1. **Check System Requirements** + - Verify supported operating system version + - Ensure minimum hardware requirements are met + - Confirm administrator/root privileges for installation + +2. **Permission Validation** + - Ensure installation user has sufficient privileges + - Check file and directory permissions in installation path + - Verify service account permissions if using dedicated account + +3. **Port Availability** + - Confirm required ports are not in use by other services + - Check local firewall settings + - Use `netstat -an` to identify port conflicts + +4. **Review Installation Logs** + - Check installation script output for error messages + - Review system event logs for service startup failures + - Examine Director service logs for specific error details + +**Common Solutions:** +```bash +# Windows: Check service status +Get-Service VirtualMetricDirector + +# Linux: Check service status +systemctl status virtualmetric-director + +# Check port usage +netstat -tulpn | grep :443 +``` + +### Installation Script Fails + +**Symptoms:** +- Script execution terminates with error +- Permission denied messages +- Download failures during script execution + +**Resolution Steps:** + +1. **Execution Context** + - Run PowerShell as Administrator (Windows) + - Use sudo privileges for bash script (Linux) + - Ensure execution policy allows script running (Windows) + +2. **Network Access** + - Verify internet connectivity for downloading components + - Check corporate firewall for download restrictions + - Consider manual download if automated download fails + +3. **System Dependencies** + - Install required runtime dependencies (.NET Core, Docker) + - Update package managers (apt, yum, chocolatey) + - Resolve any missing system libraries + +**Windows PowerShell Execution Policy:** +```powershell +# Check current execution policy +Get-ExecutionPolicy + +# Allow script execution (if required) +Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser +``` + +## Data Processing Issues + +### Director Not Receiving Data + +**Symptoms:** +- No data appearing in target destinations +- Zero throughput metrics in monitoring +- Source systems showing successful transmission + +**Resolution Steps:** + +1. **Source Configuration Verification** + - Confirm correct Director IP address in source system configuration + - Verify port numbers match between source and Director + - Check protocol settings (TCP/UDP for syslog, HTTP/HTTPS for APIs) + +2. **Network Connectivity Testing** + - Test connection from source system to Director + - Verify routing and firewall rules allow traffic + - Use packet capture tools to confirm data transmission + +3. **Director Input Configuration** + - Review input configuration in Director settings + - Verify enabled protocols and listening ports + - Check for configuration syntax errors + +4. **Log Analysis** + - Examine Director logs for input processing errors + - Check for data parsing or validation failures + - Review error messages for specific issues + +**Diagnostic Commands:** +```bash +# Test syslog connectivity +logger -n -P "Test message" + +# Check listening ports +netstat -tulpn | grep + +# Monitor network traffic +tcpdump -i any port +``` + +### Data Processing Errors + +**Symptoms:** +- Partial data processing with error messages +- Data transformation failures +- Inconsistent output formatting + +**Resolution Steps:** + +1. **Pipeline Configuration Review** + - Validate YAML syntax in processing pipelines + - Check field mappings and transformation rules + - Verify regular expressions and parsing patterns + +2. **Data Format Validation** + - Examine sample input data for format consistency + - Check for unexpected characters or encoding issues + - Verify timestamp formats match expected patterns + +3. **Resource Monitoring** + - Monitor CPU and memory usage during processing + - Check for disk space availability + - Ensure adequate processing capacity for data volume + +4. **Error Log Analysis** + - Review detailed error messages in Director logs + - Identify specific records causing processing failures + - Check for schema validation errors + +## Performance Issues + +### Slow Data Processing + +**Symptoms:** +- High latency between data ingestion and output +- Growing backlog of unprocessed data +- Timeout errors in processing pipeline + +**Resolution Steps:** + +1. **Resource Optimization** + - Increase CPU and memory allocation to Director + - Monitor resource utilization patterns + - Consider vertical scaling for improved performance + +2. **Pipeline Efficiency** + - Review processing pipeline for optimization opportunities + - Simplify complex transformation rules where possible + - Optimize regular expressions and parsing logic + +3. **Output Destination Performance** + - Check target system capacity and response times + - Verify network connectivity to destination systems + - Consider batch processing for improved throughput + +4. **Clustering Consideration** + - Evaluate clustered deployment for horizontal scaling + - Distribute processing load across multiple Director instances + - Implement load balancing for improved performance + +### High Resource Usage + +**Symptoms:** +- Excessive CPU or memory consumption +- System performance degradation +- Out of memory errors + +**Resolution Steps:** + +1. **Configuration Tuning** + - Adjust buffer sizes and queue lengths + - Optimize processing batch sizes + - Configure appropriate timeout values + +2. **Pipeline Optimization** + - Identify resource-intensive transformation operations + - Optimize data parsing and enrichment logic + - Consider caching frequently accessed data + +3. **System Monitoring** + - Implement comprehensive resource monitoring + - Set up alerts for resource threshold breaches + - Plan capacity upgrades based on usage patterns + +## Migration and Maintenance + +### How to Migrate Director to New Server + +**Scenario:** Moving Director installation to new hardware or different server. + +**Migration Steps:** + +1. **Prepare New Environment** + - Install required dependencies on new server + - Ensure network connectivity and firewall configuration + - Verify system requirements and permissions + +2. **Export Configuration** + - Document current Director configuration and settings + - Export processing pipelines and custom rules + - Note integration points with source and destination systems + +3. **Install Director on New Server** + - Run installation script on new server + - Use existing organization credentials and API key + - Follow standard installation verification procedures + +4. **Update System Integrations** + - Update source systems to point to new Director IP address + - Verify destination system connectivity from new location + - Test data flow end-to-end + +5. **Decommission Old Director** + - Stop data processing on old Director + - Remove Director from management console + - Properly dispose of old server and credentials + +### Director Reinstallation Process + +**When to Reinstall:** +- API key compromise or security incident +- Significant configuration corruption +- Major system updates or changes + +**Reinstallation Steps:** + +1. **Initiate Reinstallation from Console** + - Navigate to Director details in management interface + - Click "Re-install director" button + - Acknowledge API key revocation warning + +2. **Execute New Installation Script** + - Download new installation script with updated API key + - Run installation script with administrative privileges + - Verify successful installation and connection + +3. **Restore Configuration** + - Reconfigure data sources and destinations + - Restore custom processing pipelines + - Verify all integrations are working properly + +**Important Notes:** +- Previous API key will be immediately revoked +- All existing connections will be terminated +- Configuration may need to be restored manually + +## Advanced Troubleshooting + +### Log Analysis and Diagnostics + +**Director Log Locations:** +- Windows: `C:\ProgramData\VirtualMetric\Director\Logs\` +- Linux: `/var/log/virtualmetric/director/` + +**Key Log Files:** +- `application.log` - General Director operation and errors +- `connection.log` - Cloud platform connectivity events +- `processing.log` - Data transformation and pipeline execution +- `performance.log` - Resource usage and performance metrics + +**Log Analysis Commands:** +```bash +# Monitor real-time logs +tail -f /var/log/virtualmetric/director/application.log + +# Search for specific errors +grep -i "error\|exception" /var/log/virtualmetric/director/*.log + +# Check recent connection events +grep "connection" /var/log/virtualmetric/director/connection.log | tail -50 +``` + +### Performance Monitoring + +**System Metrics to Monitor:** +- CPU utilization and load average +- Memory usage and available space +- Disk I/O performance and space utilization +- Network throughput and connection statistics + +**Director-Specific Metrics:** +- Data ingestion rate (events per second) +- Processing latency and queue depth +- Error rates and failure patterns +- Connection stability and uptime + +### Support and Escalation + +When standard troubleshooting procedures don't resolve issues: + +1. **Gather Diagnostic Information** + - Collect relevant log files and error messages + - Document system configuration and environment details + - Note specific symptoms and reproduction steps + +2. **Contact Support** + - Submit support ticket with comprehensive diagnostic data + - Include Director version and deployment information + - Provide timeline of when issues first occurred + +3. **Emergency Escalation** + - For critical production issues, use emergency contact procedures + - Follow your organization's incident management processes + - Consider temporary workarounds while permanent solutions are developed \ No newline at end of file diff --git a/versioned_docs/version-1.5.0/configuration/pipelines/conditional-running.mdx b/versioned_docs/version-1.5.0/configuration/pipelines/conditional-running.mdx new file mode 100644 index 00000000..6871f8c3 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/pipelines/conditional-running.mdx @@ -0,0 +1,130 @@ +# Conditional Running + +Processors support an optional `if` field that uses the expr expression language for conditional execution. + +## Basic Usage + +If provided, the processor only runs when the condition evaluates to true. The condition has access to the current log entry context as `service`. + +```yaml +- drop: + if: "service.network.direction != 'local'" +``` + +## Condition Types + +### Null Checks + +Check for null/nil values safely using optional chaining with `?`: + +```yaml +- set: + if: "service._temp_?.timezone_offset != null && service._temp_?.timezone_offset != 'local'" + field: timezone + value: "{{service._temp_.timezone_offset}}" + +- drop: + if: "service.observer.egress.interface.name == nil && service.network.direction == 'outbound'" +``` + +### Numeric Comparisons + +Compare numeric values using standard operators: + +```yaml +- set: + if: "service.source?.packets > 5 && service.destination?.packets < 30" + field: traffic_status + value: "normal" +``` + +### Array Operations + +Check array contents using `contains` and `notcontains` (or `!contains`): + +```yaml +- drop: + if: "['Drop', 'Reject', 'Block', 'Prevent'].contains(service.checkpoint?.rule_action)" + +- set: + if: "['Reject', 'Block', 'Prevent'].notcontains(service.checkpoint?.rule_action)" + field: allow_status + value: true +``` + +### String Operations + +Work with strings using various methods: + +```yaml +# Case-insensitive contains +- set: + if: "service.checkpoint.product.toLowerCase().contains('example')" + field: is_example + value: true + +# Using lower() function +- set: + if: "lower(checkpoint.product) contains 'example'" + field: is_example + value: true +``` + +### Key Existence Checks + +Check if fields or keys exist: + +```yaml +- set: + if: "service.containsKey('network')" + field: has_network + value: true + +- set: + if: "service.network.containsKey('direction')" + field: has_direction + value: true +``` + +## Complex Conditions + +For better readability, complex conditions can be split across multiple lines using `>` in YML: + +```yaml +- set: + if: > + service?._temp_?.external_zones != null && + service?._temp_?.internal_zones != null && + service?.observer?.ingress?.zone != null && + service?.observer?.egress?.zone != null && + service._temp_.external_zones.contains(service.observer.egress.zone) && + service._temp_.external_zones.contains(service.observer.ingress.zone) + field: zone_status + value: "external" +``` + +:::caution +Keep conditions simple and focused for better performance and maintainability. +::: + +## Pipeline Branching + +The `if` condition can be used to conditionally execute different pipelines: + +```yaml +- pipeline: + name: network_traffic + if: "service?.source.packets != null && service.destination.packets != null" + +- pipeline: + name: threat_detection + if: "service.threat?.indicator?.confidence > 50" + +- fail: + if: "service.network?.direction == nil" + message: "Network direction must be specified" +``` + +:::tip +Use optional chaining (`?.`) to safely access nested fields without causing null pointer exceptions. +::: diff --git a/versioned_docs/version-1.5.0/configuration/pipelines/handling-failures.mdx b/versioned_docs/version-1.5.0/configuration/pipelines/handling-failures.mdx new file mode 100644 index 00000000..8fac59e0 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/pipelines/handling-failures.mdx @@ -0,0 +1,78 @@ +# Handling Failures + +The processors in a pipeline run sequentially, and when one of them fails or encounters an error, by default the process halts. + +The `on_failure` processor can be used to specify a list of processors to run immediately after a processor fails. However, **Director** keeps running the pipeline's remaining processors even if the `on_failure` field is blank. + +## Recovery + +To ignore a failure and keep running the remaining processors in the pipeline, `ignore_failure` must be set to `true`. + +```yaml +- rename: + field: provider + target_field: cloud.provider + ignore_failure: true +``` + +## Continuation + +To configure a processor so that in the event of a failure another processor runs, the `on_failure` parameter can be used. + +```yaml +- rename: + field: foo + target_field: bar + on_failure: + - set: + field: error.message + value: "Field 'foo' could not be renamed as 'bar'" + override: false +``` + +## Nesting + +For cascading errors, multiple `on_failure` processors can be nested. + +```yaml +- rename: + field: foo + target_field: bar + on_failure: + - set: + field: error.message + value: "Field 'foo' could not be renamed as 'bar'" + override: false + on_failure: + - set: + field: error.message.cascade + value: "Multiple errors encountered" + override: true +``` + +## Pipelines + +The `on_failure` option can also be specified for the pipeline. When a processor fails, in the absence of a processor-level `on_failure` setting, **DataStream** uses the pipeline-level setting as a fallback. However, in that case it does not attempt to run the remaining processors. + +```yaml +pipelines: + - name: routine_process + on_failure: + - set: + field: _index + value: failed-{{{ _ingest.on_failure_processor_tag }}} + description: "Processor failed" +``` + +## Metadata + +More information may be available in the metadata fields `on_failure_message`, `on_failure_processor_type`, `on_failure_processor_tag`, and `on_failure_pipeline` which can only be accessed from within an `on_failure` block. + +```yaml +pipelines: + - name: routine_process + on_failure: + - set: + field: error_info + value: "{{ _ingest.on_failure_processor_type }} in {{ _ingest.on_failure_pipeline }} failed; error message: {{ _ingest.on_failure_message }}" +``` diff --git a/versioned_docs/version-1.5.0/configuration/pipelines/handling-success.mdx b/versioned_docs/version-1.5.0/configuration/pipelines/handling-success.mdx new file mode 100644 index 00000000..cdcd28ee --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/pipelines/handling-success.mdx @@ -0,0 +1,91 @@ +# Handling Success + +The processors in a pipeline run sequentially, and when one of them completes successfully, **Director** provides options to control what happens next through the `on_success` field. + +## Notification + +The simplest use of `on_success` is to log a message or set a field when a processor completes successfully. + +```yaml +- rename: + field: message + target_field: event.message + on_success: + - set: + field: status + value: "Field renamed successfully" +``` + +## Chaining + +To run additional processors after a successful execution, list them in the `on_success` field. + +```yaml +- grok: + field: message + patterns: ["%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"] + on_success: + - date: + field: timestamp + target_field: "@timestamp" + formats: ["ISO8601"] + - remove: + field: timestamp +``` + +## Nesting + +For complex processing flows, multiple `on_success` processors can be nested. + +```yaml +- grok: + field: message + patterns: ["%{IP:client.ip} %{WORD:http.method} %{URIPATHPARAM:http.url}"] + on_success: + - geoip: + field: client.ip + target_field: client.geo + on_success: + - set: + field: enrichment_status + value: "IP and Geo information extracted" +``` + +## Pipeline Branching + +The `on_success` handlers can trigger processors in other pipelines, enabling conditional processing flows. + +```yaml +pipelines: + - name: parse_apache + processors: + - grok: + field: message + patterns: ["%{COMBINEDAPACHELOG}"] + on_success: + - pipeline: enrich_apache + + - name: enrich_apache + processors: + - geoip: + field: clientip + target_field: geoip +``` + +## Metadata + +Success context information is available in metadata fields that can be accessed within an `on_success` block. + +```yaml +- grok: + field: message + patterns: ["%{DATA:event.type}"] + on_success: + - set: + field: processor_info + value: "Successfully processed by {{ _pipeline.name }} pipeline" +``` + +:::note +Unlike error handling, success handlers do not interrupt the normal pipeline flow. After executing the`on_success` processors, **Director** continues with the next in the pipeline. +::: diff --git a/versioned_docs/version-1.5.0/configuration/pipelines/normalization.mdx b/versioned_docs/version-1.5.0/configuration/pipelines/normalization.mdx new file mode 100644 index 00000000..fc131b35 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/pipelines/normalization.mdx @@ -0,0 +1,129 @@ +# Normalization + +Normalization is a critical stage connecting ingestion from sources and forwarding to targets used to coalesce log data from diverse sources into consistent formats, enabling unified handling across different logging systems. + +## Log Formats + +The processor supports several widely-used log formats: + +### Generic + +|Format|Notation|Key Identifier|Layout Characteristics|Example Fields| +|:-:|:-:|:-:|:-:|:-:| +|Elastic Common Schema (ECS)|Dot notation with lowercase|`@timestamp`|Hierarchical structure|`source.ip`, `network.direction`| +|Splunk Common Information Model (CIM)|Underscore with lowercase|`_time`|Flat structure|`src_ip`, `network_direction`| +|Advanced Security Information Model (ASIM)|PascalCase|`TimeGenerated`|Explicit names|`SourceIp`, `NetworkDirection`| + +### Security-specific + +|Format|Description|Key Identifier|Example Fields| +|:-:|:-:|:-:|:-:| +|Common Event Format (CEF)|ArcSight's standard format|`rt` (receiptTime)|`networkUser`, `sourceAddress`| +|Log Event Extended Format (LEEF)|IBM QRadar's format|`devTime`|`networkUser`, `srcAddr`| +|Common Security Log (CSL)|Microsoft Sentinel's format|`TimeGenerated`|`NetworkUser`, `SourceAddress`| + +## Format Detection + +Source formats can be automatically detected using certain characteristic fields, e.g. + +|Context|Field|Format| +|--:|:--|:--| +|Timestamp|`@timestamp`|ECS| +||`_time`|CIM| +||`TimeGenerated`|ASIM/CSL +|Security|`rt`|CEF| +||`devTime`|LEEF| +|CSL detection|`TimeGenerated` + `LogSeverity`|CSL| +||`TimeGenerated` only|ASIM| + +## Conversion + +### Casing and Delimiters + +Each format follows specific naming conventions: + + + **ECS** + `source.ip`, `event.severity` + + **CIM** + `src_ip`, `event_severity` + + **ASIM** + `SourceIp`, `EventSeverity` + + **CEF** + `sourceAddress`, `eventSeverity` + + **LEEF** + `srcAddr`, `evtSev` + + **CSL** + `SourceIP`, `EventSeverity` + + +:::caution +Complex format conversions may impact performance. +::: + +### Field Mapping + +There are identifiable common network fields based on context across various formats: + + + + + + + + + + +
Context
FormatSource IPDestination IPDirection
`ecs``source.ip``destination.ip``network.direction`
`cim``src_ip``dest_ip``network_direction`
`asim``SourceIp``DstIp``NetworkDirection`
`cef``src``dst``networkDirection`
`leef``srcAddr``dstAddr``netDir`
`csl``SourceIp``DestinationIp``NetworkDirection`
+ +## Configuration + +### Basic + +Convert from ECS to ASIM format: + +```yaml +normalize: + source_format: ecs + target_format: asim +``` + +### Field-specific + +Convert a specific network field: + +```yaml +normalize: + field: network_data + source_format: cef + target_format: ecs +``` + +### Auto-detection + +Let the processor detect the source format: + +```yaml +normalize: + target_format: cim +``` + +## Preprocessing + +Fields are standardized with `normalize` for conversion between the ECS, CIM, ASIM, CEF, LEEF and CSL formats (see the [Log Formats](#log-formats) and [Conversion](#conversion) sections above). Values are formatted for uniform casing with `uppercase` and `lowercase` processors when required by the target format's naming conventions. + +## Postprocessing + +Fields are optimized for storage and queries using format conversion with the `normalize` processor (see the [Conversion](#conversion) and [Field Mapping](#field-mapping) sections above). For Microsoft Sentinel integration, data is prepared by converting to the ASIM format with `normalize` (see [Log Formats](#log-formats) table). + +:::warning +Complex format conversions may impact processing performance and delivery latency. +::: + + + diff --git a/versioned_docs/version-1.5.0/configuration/pipelines/overview.mdx b/versioned_docs/version-1.5.0/configuration/pipelines/overview.mdx new file mode 100644 index 00000000..f6ccea77 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/pipelines/overview.mdx @@ -0,0 +1,253 @@ +--- +pagination_prev: null +sidebar_label: Overview +--- + +import useBaseUrl from '@docusaurus/useBaseUrl'; + +# Pipelines: Overview + +**VirtualMetric DataStream** pipelines were designed to automate large-volume data processing. They can be used to extract values from various sources, transform or convert these values, enrich them by correlating them with other available information, and to forward them to various destinations for consumption. + +In short, they are aimed at helping you design and implement your telemetry workflow. + +## Definitions + +A pipeline, in its simplest form, is a chain of processors that run sequentially, operating on the incoming data streamed from providers, and directing them to destinations for consumption. + +The sources can be devices, networks, or other pipelines, and the targets can be consoles, files, storage systems, or other pipelines. Sources and targets can be connected to each other in _one-to-one_, _one-to-many_, _many-to-one_, and _many-to-many_ configurations. + +Each incoming stream can be queried with criteria specific to the information contained therein, and each outgoing stream can be enriched with inferences made from correlations for use on the destination side. + +The following table summarizes the basic structure of a pipeline: + +> **Provider** (_raw data_) → **Pipeline** (_transformation_) → **Consumer** (_processed data_) + +where **Pipeline** (_transformation_) is + +> (processor1 ∘ processor2 ∘ … ∘ processorn)(_data_) + +{/* Schematically, a data stream leading to various pipelines are likely to be structured like this: + +Device To Pipeline + +and the processor chain will frequently resemble this: + +Processor Chain */} + +In real life scenarios, the level of complexity of the configurations will vary based on the requirements of the consumers and the targeted destinations. + +## Configuration + +All pipelines share the following base configuration fields: + +|Field|Required|Default|Description| +|---|---|---|---| +|`name`|Y||Unique identifier for the pipeline| +|`description`|N||Optional explanation of the pipeline's purpose| +|`processors`|Y||Ordered list of processors to execute| +|`if`|N||Conditional expression to determine if pipeline should be applied| +|`on_failure`|N||Processors to execute if the pipeline fails| +|`ignore_failure`|N|`false`|Continue pipeline execution despite processor failures| + +Use the `name` of the pipeline to refer to it in your configurations. + +**Example**: + +```yaml +pipelines: + - name: checkpoint + processors: + - grok: + field: message + patterns: + - "%{COMMONAPACHELOG}" + - set: + field: event.provider + value: "checkpoint" +``` + +This pipeline uses a `grok` processor with a pattern that extracts _Apache_ log data, and a `set` processor to write "`checkpoint`" to the `event.provider` field. + +## Deployment + +There are multiple ways to deploy pipelines, and they can be used in various configurations: + +* A pipeline can consume data from multiple providers, and—once it is finished processing them—can direct some or all of the processed data to a specific consumer in a one-to-one scheme: + + > **Provider**1[A] → **Pipeline** → **Consumer**1(A) + +* It can also consume data from a single provider and deliver the processed results to multiple consumers: + + > **Provider**1[A] + **Provider**2[B] → **Pipeline** → **Consumer**1(A) + **Consumer**2(B) + +* Finally—and possibly in most real-life scenarios—there may be multiple providers and consumers connected to each other in much more complex arrangements: + + > **Provider**1[A, B] + **Provider**2[C] + **Provider**3[D] → **Pipeline** → **Consumer**1(A, C) + **Consumer**2(A, B, D) + +Effectively, each source may be the target of an upstream pipeline, and each target may serve as the source for a downstream one. The detail to keep in mind is that each provider side is delegating some processing to the next pipeline based on the established requirements of the consumer side. The pipeline is acting as the _middleman_ for the data interchange. + +Even if the pipeline routes some of the data without any processing, this will be due to the demands of the consumer side, so the pipeline is still performing a meaningful role by forwarding the data as per the consumer's policy. + +## Pipeline Types + +Pipelines can be categorized based on their role in the data processing workflow: + +* **Pre-processing** - These pipelines prepare raw data for further processing: Parse raw logs, normalize timestamps, remove unnecessary fields + +* **Routing** - These pipelines determine where data should be directed: Filter events by criteria, tag data for specific destinations + +* **Transformation** - These pipelines change data structure or format: Field mapping, data type conversion, schema alignment + +* **Enrichment** - These pipelines add context to existing data: Lookup tables, geolocation, threat intelligence integration + +* **Post-processing** - These pipelines prepare data for specific targets: Format adaptation, field mapping to destination schemas + +## Use Cases + +Pipelines can be put to good use to implement some common patterns for data processing. The following table summarizes a few typical layouts and their recommended processors: + +|Use Case|Recommended Processors| +|--:|:--| +|Log Parsing|`grok` → `date` → `rename` → `remove`| +|Security Events|`kv` → `geoip` → `set` → `script`| +|API Data|`json` → `convert` → `rename` → `set`| +|CSV Processing|`csv` → `convert` → `rename`| +|Field Standardization|`rename` → `lowercase` → `set`| + +And here are some examples: + +- **Log Parsing and Normalization**: A common use case is parsing and normalizing log messages from various sources. For example, a pipeline that processes syslog messages might look like this: + + ```yaml + pipelines: + - name: syslog_parser + description: "Parse and normalize syslog messages" + processors: + - grok: + field: message + patterns: + - "%{SYSLOGLINE}" + - date: + field: timestamp + target_field: "@timestamp" + formats: + - "MMM dd HH:mm:ss" + - "MMM d HH:mm:ss" + - set: + field: event.kind + value: event + - rename: + fields: + - from: program + to: process.name + - from: logsource + to: host.hostname + ``` + +- **Field Enrichment**: A pipeline can be used to enrich security events with additional context, such as geolocation data or threat intelligence: + + ```yaml + pipelines: + - name: security_enrichment + description: "Enrich security events with additional context" + processors: + - geoip: + field: source.ip + target_field: source.geo + - set: + field: event.category + value: security + - script: + lang: Go + source: | + if strings.HasPrefix(source.ip, '192.168.') { + source.type = 'internal'; + } else { + source.type = 'external'; + } + ``` + +- **Data Transformation**: A sequence of processors can be used to transform data formats, such as converting IP addresses or normalizing field names: + + ```yaml + pipelines: + - name: data_normalizer + description: "Standardize field names and values" + processors: + - rename: + fields: + - from: src + to: source.ip + - from: dst + to: destination.ip + - convert: + field: bytes + type: long + - remove: + field: + - bytes_in + - bytes_out + ``` + +## Implementation Strategies + +### Streamlined Streams + +The incoming data streams will have their own structure. As such, they will at best be only partially suitable for analysis and, later, decision making. In its raw form, data is frequently not—at least may not be—what it seems to be. It has to be sorted and sifted through to pick the relevant information from it. + +The process of selecting or discarding items based on specific criteria is called _curation_. This involves checking whether a field's value matches or contains the values or fragments of values we are looking for. + +After this, the remaining data may need to be converted into forms making them more suitable for analysis and use. This second phase is called _transformation_. + +Finally, the data may contain hints or fragments of information which, when correlated with other available information, may yield insights that may be prerequisite to analysis and use. Adding correlated information in order to render the data more relevant—or increase its relevance—is known as _enrichment_. + +It is through this type of seamless three-stage design that a pipeline truly shines and proves itself indispensible for telemetry. + +### Pipeline Composition + +Divide and conquer your data processing by composing targeted pipelines: + +* **Modular Design**: Create small, focused pipelines that handle specific tasks +* **Reusable Components**: Build common processing patterns as reusable pipeline segments +* **Conditional Processing**: Use conditional statements to selectively apply processors +* **Error Handling**: Implement robust error handling with `on_failure` and `ignore_failure` + +### Conditional Execution + +Pipelines and processors support conditional execution using the `if` parameter: + +```yaml +pipelines: + - name: web_logs + processors: + - set: + field: event.category + value: web + if: "_ingest.key.source == 'apache'|| _ingest.key.source == 'nginx'" + - geoip: + field: client.ip + target_field: client.geo + if: "_ingest._key.client?.ip != null" +``` + +### Pipeline Chaining + +Pipelines can be chained together using routes or referenced in device configurations: + +```yaml +devices: + - name: web_server_logs + type: http + pipelines: + - common_enrichment + - web_logs + properties: + port: 8080 +``` + +--- + +## Next Steps + +For an example of a locally validated pipeline, see our A Local Pipeline tutorial. diff --git a/versioned_docs/version-1.5.0/configuration/pipelines/processors/aad-errcode.mdx b/versioned_docs/version-1.5.0/configuration/pipelines/processors/aad-errcode.mdx new file mode 100644 index 00000000..404bfe10 --- /dev/null +++ b/versioned_docs/version-1.5.0/configuration/pipelines/processors/aad-errcode.mdx @@ -0,0 +1,280 @@ +--- +description: Converts Azure Active Directory error codes to human-readable descriptions +sidebar_custom_props: + customIcon: 🔑 + customCategory: Enrich +--- + +# AAD Error Code + +EnrichASIM Compatible + +## Synopsis + +Converts Azure Active Directory (AAD) error codes to human-readable error descriptions using ASIM lookup logic. + +## Schema + +```yaml {3} +- aad_errcode: + description: + field: + target_field: + if: