type.inbound
// one or more PDF documents
and length(filter(attachments, .file_type == "pdf")) >= 1
and length(attachments) <= 4
// a single recipient (this is in the link so there can be only one)
and length(recipients.to) == 1
and all(recipients.to, .email.domain.valid)
and any(filter(attachments,
.file_type == "pdf"
and (
// contains the SLD or local part of recipient
all(recipients.to,
strings.icontains(..file_name, .email.domain.sld)
or strings.icontains(..file_name, .email.local_part)
)
// contains financial refernces
or any(ml.nlu_classifier(.file_name).entities,
.name == "financial"
and strings.contains(..file_name, .text)
)
)
),
(
// the urls in the PDF
any(filter(file.explode(.), .depth == 0),
// remove links we are not interested in
any(filter(.scan.pdf.urls,
// remove mailto: links
not strings.istarts_with(.url, 'mailto:')
and not strings.istarts_with(.url, 'email:')
// remove links found in exiftool output
and not (
..scan.exiftool.producer is not null
and strings.icontains(..scan.exiftool.producer,
.domain.domain
)
)
// remove links found in exiftool output
and not (
..scan.exiftool.creator is not null
and strings.icontains(..scan.exiftool.creator,
.domain.domain
)
)
// legitimate domains that exhibits this behavior
and .domain.root_domain not in (
"univarsolutions.com",
"westpac.com.au",
"safeshiphub.com",
"sharepoint.com"
)
),
// it contains the email address of the recipient
any(recipients.to,
.email.domain.valid
and (
strings.icontains(..url, .email.email)
// or the base64 encoded email
or any(beta.scan_base64(..url,
format="url",
ignore_padding=true
),
strings.icontains(., ..email.email)
)
)
)
)
)
// or there is a QR code
or (
any(filter(file.explode(.), .depth == 1),
.scan.qr.url.domain.valid
and any(recipients.to,
// QR code contains the email
(
strings.icontains(..scan.qr.url.url, .email.email)
// QR code contains the base64 endcoded email
or any(beta.scan_base64(..scan.qr.url.url,
format="url",
ignore_padding=true
),
strings.icontains(., ..email.email)
)
)
)
)
)
)
)
Playground
Test against your own EMLs or sample data.