type.inbound
and length(body.links) < 20
// the message does not contain previous threads
and length(body.previous_threads) == 0
// no PDF attachments
and length(filter(attachments, .file_type == "pdf")) == 0
// there is only a single link to the free file host
and length(filter(body.links,
.href_url.domain.domain in $free_file_hosts
or .href_url.domain.root_domain in $free_file_hosts
or .href_url.domain.domain in $self_service_creation_platform_domains
or .href_url.domain.root_domain in $self_service_creation_platform_domains
or .href_url.domain.domain in $url_shorteners
or .href_url.domain.root_domain in $url_shorteners
or .href_url.domain.root_domain == "dynamics.com"
)
) == 1
// there are few distinct domains in the message
and length(distinct(body.links, .href_url.domain.root_domain)) <= 3
// the display_text ends in .pdf and goes to a free file host
and any(body.links,
strings.iends_with(.display_text, '.pdf')
and (
.href_url.domain.domain in $free_file_hosts
or .href_url.domain.root_domain in $free_file_hosts
or .href_url.domain.domain in $self_service_creation_platform_domains
or .href_url.domain.root_domain in $self_service_creation_platform_domains
or .href_url.domain.domain in $url_shorteners
or .href_url.domain.root_domain in $url_shorteners
or .href_url.domain.root_domain == "dynamics.com"
)
// the display text is financial related (remittance, invoice, etc)
and (
strings.icontains(.display_text, 'payment')
or regex.icontains(.display_text, 'pay\b')
or strings.icontains(.display_text, 'remit')
or strings.icontains(.display_text, 'receipt')
or strings.icontains(.display_text, 'Distribution')
or strings.icontains(.display_text, 'payoff')
or strings.icontains(.display_text, 'Wire Instructions')
or regex.icontains(.display_text, 'ACH\b')
or regex.icontains(.display_text, 'EFT\b')
or strings.istarts_with(.display_text, 'INV')
or strings.istarts_with(.display_text, 'View RFQ')
or strings.istarts_with(.display_text, 'Contract')
// the display text is the subject
or (.display_text =~ subject.base and length(.display_text) > 0)
)
// negate links which make use of google icons inside of a bounding box
// filter down to the link with the same display text
and not any(filter(html.xpath(body.html,
'//a[img[@src] or .//img[@src]][.//div[contains(@style, "border:1px solid")] or ancestor::div[contains(@style, "border:1px solid")]]'
).nodes,
// the display text is the link we're inspecting
..display_text == .display_text
),
// inside this is a reference to the google icon
strings.icontains(.raw, 'gstatic.com/docs/doclist/images/')
)
)
Playground
Test against your own EMLs or sample data.