type.inbound
and length(filter(attachments,
.file_extension == "eml" or .content_type == "message/rfc822"
)
) == 1
and any(attachments,
any(filter(file.parse_eml(.).body.links,
.href_url.domain.root_domain == 'sharepoint.com'
),
// Normalize Levenshtein distance by string length (0 = identical, 0.7+ = different)
// Working with what we have in MQL, considering we dont have max() or any other forms of string distancing
(
(
strings.iends_with(.href_url.domain.subdomain,
'-my'
) // common Sharepoint subdomain suffix
and (
(
strings.ilevenshtein(.href_url.domain.subdomain,
sender.email.domain.sld
) - 3 // subtract aforementioned suffix for more accurate calculation
) / (
(
(length(.href_url.domain.subdomain) - 3) + length(sender.email.domain.sld
)
+ (
(
(length(.href_url.domain.subdomain) - 3) - length(sender.email.domain.sld
)
) + (
length(sender.email.domain.sld) - (
length(.href_url.domain.subdomain) - 3
)
)
)
) / 2.0 // to ensure we keep the result as a float
)
) > 0.7 // customizable threshold
)
or (
not strings.iends_with(.href_url.domain.subdomain,
'-my'
) // no suffix, continue with original calculation
and (
strings.ilevenshtein(.href_url.domain.subdomain,
sender.email.domain.sld
) / (
(
length(.href_url.domain.subdomain) + length(sender.email.domain.sld
)
+ (
(
length(.href_url.domain.subdomain) - length(sender.email.domain.sld
)
) + (
length(sender.email.domain.sld) - length(.href_url.domain.subdomain
)
)
)
) / 2.0 // to ensure we keep the result as a float
)
) > 0.7 // customizable threshold
)
)
and not strings.icontains(.href_url.path, sender.email.local_part)
and not any($org_slds,
strings.icontains(..href_url.domain.subdomain, .)
)
// it is either a OneNote or PDF file, or unknown
and regex.icontains(.href_url.path, '\/:[obu]:\/(?:p|g\/personal)')
)
// a way to negate long threads
// the full thread must be less than 6 times the length of the current thread
and length(body.html.inner_text) < 6 * length(body.current_thread.text)
and sender.email.domain.root_domain not in (
"sharepoint.com",
"sharepointonline.com"
)
)
// negate highly trusted sender domains unless they fail DMARC authentication
and (
(
sender.email.domain.root_domain in $high_trust_sender_root_domains
and not headers.auth_summary.dmarc.pass
)
or sender.email.domain.root_domain not in $high_trust_sender_root_domains
)
// negate instances where proofpoint sends a review of a reported message via analyzer
and not (
any(headers.domains, .root_domain == "pphosted.com")
and headers.auth_summary.spf.pass
and headers.auth_summary.dmarc.pass
)
Playground
Test against your own EMLs or sample data.