type.inbound
// one or more PDF documents
and length(filter(attachments, .file_type == "pdf")) >= 1
and length(attachments) <= 4
// a single recipient (this is in the link so there can be only one)
and length(recipients.to) == 1
and all(recipients.to, .email.domain.valid)
and any(filter(attachments,
.file_type == "pdf"
and (
// contains the SLD of recipient
all(recipients.to,
strings.icontains(..file_name, .email.domain.sld)
)
// contains financial refernces
or any(ml.nlu_classifier(.file_name).entities,
.name == "financial"
and strings.contains(..file_name, .text)
)
)
),
(
// the urls in the PDF
any(filter(file.explode(.), .depth == 0),
// a single URL in the PDF
length(distinct(filter(.scan.pdf.urls,
// remove mailto: links
not strings.istarts_with(.url, 'mailto:')
and not strings.istarts_with(.url,
'email:'
)
// remove links found in exiftool output
and not (
..scan.exiftool.producer is not null
and strings.icontains(..scan.exiftool.producer,
.domain.domain
)
)
// remove links found in exiftool output
and not (
..scan.exiftool.creator is not null
and strings.icontains(..scan.exiftool.creator,
.domain.domain
)
)
// common observed invoice system that exhibits this behavior
and not .domain.root_domain == "univarsolutions.com"
),
.url
)
) == 1
// it contains the email address of the recipient
and any(recipients.to,
.email.domain.valid
and any(..scan.pdf.urls,
not strings.istarts_with(.url, 'mailto:')
and (
strings.icontains(.url, ..email.email)
// or the base64 encoded email
or any(beta.scan_base64(.url,
format="url",
ignore_padding=true
),
strings.icontains(., ...email.email)
)
)
)
)
)
// or there is a QR code
or (
any(filter(file.explode(.), .depth == 1),
.scan.qr.url.domain.valid
and any(recipients.to,
// QR code contains the email
(
strings.icontains(..scan.qr.url.url, .email.email)
// QR code contains the base64 endcoded email
or any(beta.scan_base64(..scan.qr.url.url,
format="url",
ignore_padding=true
),
strings.icontains(., ..email.email)
)
)
)
)
)
)
)
Playground
Test against your own EMLs or sample data.