type.inbound
and length(body.links) < 20
// file sharing pretext
and any(ml.nlu_classifier(body.current_thread.text).topics,
.name == "File Sharing and Cloud Services" and .confidence != "low"
)
// the message does not contain previous threads
and length(body.previous_threads) == 0
// no PDF attachments
and length(filter(attachments, .file_type == "pdf")) == 0
// there is only a single link to the free file host
and length(filter(body.links,
.href_url.domain.domain in $self_service_creation_platform_domains
or .href_url.domain.root_domain in $self_service_creation_platform_domains
or .href_url.domain.domain in $url_shorteners
or .href_url.domain.root_domain in $url_shorteners
or .href_url.domain.root_domain == "dynamics.com"
)
) == 1
// there are few distinct domains in the message
and length(distinct(body.links, .href_url.domain.root_domain)) <= 3
// body/link display text is related to the subject & links to suspicious domain
and any(body.links,
(
strings.icontains(.display_text, subject.base)
or strings.icontains(body.current_thread.text,
strings.concat('"', subject.base, '"')
)
)
and (
.href_url.domain.domain in $self_service_creation_platform_domains
or .href_url.domain.root_domain in $self_service_creation_platform_domains
or .href_url.domain.domain in $url_shorteners
or .href_url.domain.root_domain in $url_shorteners
or .href_url.domain.root_domain == "dynamics.com"
)
// negate links which make use of google icons inside of a bounding box
// filter down to the link with the same display text
and not any(filter(html.xpath(body.html,
'//a[img[@src] or .//img[@src]][.//div[contains(@style, "border:1px solid")] or ancestor::div[contains(@style, "border:1px solid")]]'
).nodes,
// the display text is the link we're inspecting
..display_text == .display_text
),
// inside this is a reference to the google icon
strings.icontains(.raw, 'gstatic.com/docs/doclist/images/')
)
)
Playground
Test against your own EMLs or sample data.