type.inbound
// high number of links
and length(body.links) > 20
// hr-related subject or sender
and (
regex.icontains(sender.display_name, "\\bhr\\b")
or strings.icontains(sender.display_name, "human resources")
or strings.icontains(sender.display_name, "employee relation")
or regex.icontains(subject.subject, "sal[ai1l|]r[i1l|]es")
or regex.icontains(subject.subject, "hr__.{0,30}")
or regex.icontains(subject.subject, "work.{0,5}hours")
or regex.icontains(subject.subject,
"instant:.{0,20}(salaries|salary|changed|update)"
)
or strings.icontains(body.current_thread.text, "vacation plan")
)
// suspect domain irregularities (like www.,company.com)
and any(body.links,
(
// malformed domains with comma variations
regex.icontains(.display_text, "www.?,")
// multiple consecutive dots
or regex.icontains(.display_text, "\\.{2,}")
// comma in domain position
or regex.icontains(.display_text, "\\.,")
// suspicious TLD patterns that might be typosquatting
or regex.icontains(.display_text, "\\.(tu|cg|mv|tk|3v|ct|jh)/")
// random characters in TLD position
or regex.icontains(.display_text,
"\\.[a-z0-9]{1,3}/[a-z0-9]+/[a-z0-9]+/"
)
// URLs that contain obvious credential theft terms in the path
or regex.icontains(.display_text,
"/(sal[ai1l|]r[i1l|]es|login|auth|verify|portal|payment)/"
)
)
and .visible == true
and any(ml.nlu_classifier(beta.ocr(ml.link_analysis(.).screenshot).text).intents,
.name == "cred_theft" and .confidence == "high"
)
)
// exclusions for legitimate sources
and not any(ml.nlu_classifier(body.current_thread.text).topics,
.name in (
"Security and Authentication",
"Secure Message",
"Newsletters and Digests",
"Entertainment and Sports"
)
and .confidence in ("medium", "high")
)
// exclude messages with a bunch of previous corrospondance
and not length(body.previous_threads) > 5
Playground
Test against your own EMLs or sample data.