type.inbound
and (
not profile.by_sender().solicited
or (
profile.by_sender().any_messages_malicious_or_spam
and not profile.by_sender().any_messages_benign
)
)
// single attachment
and length(attachments) == 1
// sender is freemail
and sender.email.domain.root_domain in $free_email_providers
// the attachment is a pdf with 1 page, and at least 60 ocr chars
and any(attachments,
.file_extension == "pdf"
and any(file.explode(.), .scan.exiftool.page_count == 1)
and any(file.explode(.), length(.scan.ocr.raw) > 60)
// 4 of the following strings are found
and (
any(file.explode(.),
4 of (
strings.icontains(.scan.ocr.raw, "fraudulent activity"),
strings.icontains(.scan.ocr.raw, "Social Security Number"),
strings.icontains(.scan.ocr.raw, "SSN"),
strings.icontains(.scan.ocr.raw, "stolen"),
strings.icontains(.scan.ocr.raw, "illicit activities"),
strings.icontains(.scan.ocr.raw,
"Social Security Administration"
),
strings.icontains(.scan.ocr.raw, "if you are innocent"),
strings.icontains(.scan.ocr.raw, "help line"),
strings.icontains(.scan.ocr.raw, "Department of Justice"),
strings.icontains(.scan.ocr.raw, "innocent"),
regex.icontains(.scan.ocr.raw, '\$\d{3}\.\d{2}\b')
and (
regex.contains(.scan.ocr.raw,
'(\+\d|1.(\()?\d{3}(\))?\D\d{3}\D\d{4})'
)
or regex.contains(.scan.ocr.raw,
'\+?(\d{1,2})?\s?\(?\d{3}\)?[\s\.\-⋅]{0,5}\d{3}[\s\.\-⋅]{0,5}\d{4}'
)
)
)
)
)
)
Playground
Test against your own EMLs or sample data.