type.inbound
and any(filter(body.links, .href_url.domain.domain == "forms.office.com"),
// avoid doing Link Analysis if the display-text has strong indications of phishing
(
// replace confusables - observed ITW
regex.icontains(strings.replace_confusables(.display_text),
'review|proposal|document|efax|restore|[o0]pen|secure|messaging|reset|account|verify|login|notification|alert|urgent|immediate|access|support|\bupdate\b|download|attachment|service|payment|remittance|invoice'
)
and not regex.icontains(strings.replace_confusables(.display_text),
'customer service'
)
// add confidence to these strings by using profile.by_sender()
and (
not profile.by_sender().solicited
and profile.by_sender().prevalence in ('new', 'outlier')
)
)
or
// look at the final_dom.raw
// if the page has been taken down, match
strings.icontains(ml.link_analysis(., mode="aggressive").final_dom.raw,
'This form was blocked due to privacy or safety concerns.'
)
// this error has been shown before with the text "Phishing form from content scan. Inner Message: This form has been flagged for potential phishing."
or any(ml.link_analysis(., mode="aggressive").additional_responses,
strings.icontains(.json["error"]["message"], "phishing")
)
// or MS thinks there are phishing keywords
or any(ml.link_analysis(., mode="aggressive").additional_responses,
any(.json["form"]["questions"],
.["subtitleHasPhishingKeywords"] == true
)
or any(.json["form"]["questions"],
.["titleHasPhishingKeywords"] == true
)
or any(.json["form"]["descriptiveQuestions"],
.["titleHasPhishingKeywords"] == true
)
or any(.json["form"]["descriptiveQuestions"],
.["titleHasPhishingKeywords"] == true
)
)
// this logic checks for three abnormal cases
// 1) no questions
// 2) questions, but no inputs
// 3) a bunch of new lines (used to push down the submit button of the form)
// AND
// // there is one or two links that isn't "standard" on the form
or (
(
// 1) doesn't contain any sections or questions
any(ml.link_analysis(., mode="aggressive").additional_responses,
length(.json["form"]["descriptiveQuestions"]) == 0
and length(.json["form"]["questions"]) == 0
)
or
// 2) Contains a form section header, but no actual inputs
// possible question types are .Choice, .TextField, .Rating, .DateTime, .Ranking, .MatrixChoiceGroup, .MatrixChoice, and .NPS
any(ml.link_analysis(., mode="aggressive").additional_responses,
length(.json["form"]["descriptiveQuestions"]) > 0
and length(.json["form"]["questions"]) == 0
)
or
// 3) a bunch of new lines (used to push down the submit button of the form)
(
strings.icount(ml.link_analysis(., mode="aggressive").final_dom.raw,
'<br><br>'
) > 20
or strings.icount(ml.link_analysis(., mode="aggressive").final_dom.raw,
'\n\n'
) > 20
or strings.icount(ml.link_analysis(., mode="aggressive").final_dom.raw,
'<span><span>'
) > 20
or any(ml.link_analysis(., mode="aggressive").additional_responses,
any(.json["form"]["questions"],
strings.icount(.["formsProRTQuestionTitle"],
'<br><br>'
) > 20
or strings.icount(.["formsProRTQuestionTitle"], '\n\n') > 20
or strings.icount(.["formsProRTQuestionTitle"],
'<span><span>'
) > 20
)
)
)
)
// AND
and
// there is one or two links to another page
0 < length(filter(ml.link_analysis(.).final_dom.links,
not (
(
(
.display_text =~ "Privacy and cookies"
or .display_text =~ "terms of use"
or .display_text =~ "report abuse"
)
and .href_url.domain.root_domain =~ 'microsoft.com'
)
or .href_url.domain.root_domain =~ sender.email.domain.root_domain
or (
.href_url.domain.tld == "ms"
// Microsoft does not own the .ms TLD, this checks to ensure it is one of their domains
and (
network.whois(.href_url.domain).registrant_company == "Microsoft Corporation"
or strings.ilike(network.whois(.href_url.domain
).registrar_name,
"*MarkMonitor*",
"*CSC Corporate*",
"*com laude*"
)
)
)
)
)
) <= 2
and (
not strings.contains(ml.link_analysis(., mode="aggressive").final_dom.raw,
'role="progressbar" aria-label="Page 1 of '
)
or any(ml.link_analysis(., mode="aggressive").additional_responses,
.json["form"]["progressBarEnabled"] == false
)
)
)
)
Playground
Test against your own EMLs or sample data.