type.inbound
and any(attachments,
(
(
.file_type in ("html", "svg")
or .file_extension in ("html", "xhtml", "mhtml", "svg")
or .content_type in ("text/html", "text/plain")
)
and .size < 100000
)
and (
(
// targeting comments that pad the file with quotes from literature
// examples: "// Echoes of the past linger in silence.", "// The wind whispered secrets unknown.", "// Shadows tell stories in the dark."
// count all HTML code comments that match our pattern
regex.count(file.parse_text(.).text, '// [A-Z][ a-z ]+\.') /
// divide by the count of all UNIQUE HTML code comments that match our pattern
length(distinct(regex.extract(file.parse_text(.).text,
'// [A-Z][ a-z ]+\.'
),
.full_match
)
)
// at least 50% of the comments are duplicates
>= 2
)
or (
// targeting comments that pad the file with sayings
// examples: "<!-- <span> No gain without pain. </span> -->", "<!-- <p> Beauty is only skin deep. </p> -->", "<!-- <span> Actions speak louder than words. </span> -->"
regex.count(file.parse_text(.).text,
'<!-- +(<[a-z]+>)? [A-Z][ a-z ]+\. (</[a-z]+>)? +-->'
)
) > 2
or (
// targeting comments inside hidden HTML elements
// example: "<h1 style="display:none;"> Self-confidence inspires others to believe in you. </h1>"
regex.count(file.parse_text(.).text,
'<[a-z0-9]+ style="display:none;">(<[a-z]+>)? [A-Z].*\. </[a-z0-9]+>'
)
) > 2
)
)
Playground
Test against your own EMLs or sample data.