type.inbound
and any(beta.ml_topic(body.html.display_text).topics,
.name == "File Sharing and Cloud Services" and .confidence == "high"
)
and 0 < length(body.links) < 10
and length(body.html.display_text) < 2000
and any(filter(body.links, .href_url.domain.root_domain == 'sharepoint.com'),
// Normalize Levenshtein distance by string length (0 = identical, 0.7+ = different)
// Working with what we have in MQL, considering we dont have max() or any other forms of string distancing
(
(
strings.iends_with(.href_url.domain.subdomain,
'-my'
) // common Sharepoint subdomain suffix
and (
(
strings.ilevenshtein(.href_url.domain.subdomain,
sender.email.domain.sld
) - 3 // subtract aforementioned suffix for more accurate calculation
) / (
(
(length(.href_url.domain.subdomain) - 3) + length(sender.email.domain.sld
)
+ (
(
(length(.href_url.domain.subdomain) - 3) - length(sender.email.domain.sld
)
) + (
length(sender.email.domain.sld) - (
length(.href_url.domain.subdomain) - 3
)
)
)
) / 2.0 // to ensure we keep the result as a float
)
) > 0.7 // customizable threshold
)
or (
not strings.iends_with(.href_url.domain.subdomain,
'-my'
) // no suffix, continue with original calculation
and (
strings.ilevenshtein(.href_url.domain.subdomain,
sender.email.domain.sld
) / (
(
length(.href_url.domain.subdomain) + length(sender.email.domain.sld
)
+ (
(
length(.href_url.domain.subdomain) - length(sender.email.domain.sld
)
) + (
length(sender.email.domain.sld) - length(.href_url.domain.subdomain
)
)
)
) / 2.0 // to ensure we keep the result as a float
)
) > 0.7 // customizable threshold
)
)
and not strings.icontains(.href_url.path, sender.email.local_part)
and not any($org_slds, strings.icontains(..href_url.domain.subdomain, .))
// it is either a OneNote or PDF file, or unknown
and regex.icontains(.href_url.path, '\/:[obu]:\/(?:p|g\/personal)')
)
// a way to negate long threads
// the full thread must be less than 6 times the length of the current thread
and length(body.html.inner_text) < 6 * length(body.current_thread.text)
and sender.email.domain.root_domain not in (
"sharepoint.com",
"sharepointonline.com"
)
// negate highly trusted sender domains unless they fail DMARC authentication
and (
(
sender.email.domain.root_domain in $high_trust_sender_root_domains
and not headers.auth_summary.dmarc.pass
)
or sender.email.domain.root_domain not in $high_trust_sender_root_domains
)
Playground
Test against your own EMLs or sample data.