type.inbound
// Filter to Office documents that contain 1-3 non-schema URLs
and any(filter(attachments,
// Only check Office documents that can contain macros/embedded content
.file_extension in $file_extensions_macros
// Count URLs after filtering out common XML namespace/schema URLs
and 0 < sum(map(map(file.explode(.),
// Filter out standard XML namespace URLs that appear in all Office docs
filter(.scan.url.urls,
// Exclude OpenXML format schemas
.domain.domain not in (
'schemas.openxmlformats.org',
'schemas.microsoft.com',
'www.w3.org'
)
// Additional Microsoft domain exclusion
and not .domain.domain in (
'microsoft.com',
'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
)
// Exclude Dublin Core persistent URLs (metadata schemas)
and not (
.domain.domain == 'purl.org'
and strings.starts_with(.path,
'/dc/'
)
)
// Exclude Dublin Core XML schemas
and not (
.domain.domain == "dublincore.org"
and strings.starts_with(.path,
'/schemas/xmls/'
)
)
)
),
// Count URLs in each exploded file component
length(.)
)
) <= 3 // Only process attachments with 3 or fewer non-schema URLs
),
// For the filtered Office documents, check for malicious URLs
any(file.explode(.),
any(
// Apply the same URL filtering to remove XML namespace noise
filter(.scan.url.urls,
.domain.domain not in (
'schemas.openxmlformats.org',
'schemas.microsoft.com',
'www.w3.org'
)
and not .domain.domain in (
'microsoft.com',
'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
)
and not (
.domain.domain == 'purl.org'
and strings.starts_with(.path, '/dc/')
)
and not (
.domain.domain == "dublincore.org"
and strings.starts_with(.path, '/schemas/xmls/')
)
),
// Run link analysis on the filtered URLs to detect phishing
ml.link_analysis(.).credphish.disposition == "phishing"
// confidence is only returned when brands, if it's not there, consider this true
// this ensures if there is a brand, the confidence is high
// and allows matching when there is no confidence
and coalesce(ml.link_analysis(.).credphish.confidence == "high",
true
)
and not (
ml.link_analysis(.).credphish.brand.name is not null
and ml.link_analysis(.).credphish.brand.name == "GoDaddy"
and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
'is parked free, courtesy of GoDaddy.com.'
)
and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
'Get This Domain'
)
)
)
)
)
Playground
Test against your own EMLs or sample data.