type.inbound
and any(filter(attachments,
.file_extension in $file_extensions_macros
// limit the size to reduce FPs, the larger the document, the more likely it is for FPs on benign automated reports
and .size < 2000000
),
// Detection Note: There are multiple regex patterns used in multiple places, it'll be important to keep them in sync
any(file.explode(.),
// document sharing lure
(
length(.scan.strings.raw, ) < 1000
and regex.icontains(.scan.strings.raw,
'(?:sent|shared|forwarded|provided|invited|received)(?:\s+\w+){0,9}\s+(?:document|file|attachment)',
)
)
// ocr output
or (
length(.scan.ocr.raw) < 1000
and regex.icontains(.scan.ocr.raw,
'(?:sent|shared|forwarded|provided|invited|received)(?:\s+\w+){0,9}\s+(?:document|file|attachment)',
)
)
or (
length(.scan.ocr.raw) < 500
and strings.ends_with(.scan.ocr.raw, 'REVIEW DOCUMENTS')
)
)
// copy/paste stuff or disclaimer text in the OCR output
and (
any(file.explode(.),
(
length(.scan.strings.raw, ) < 1000
and (
regex.icontains(.scan.strings.raw,
'(?:copy (?:and paste)?|right.?click)(?:\s+\w+\s*){0,9}browser',
'click.{0,50}(?:above|below) to access'
)
)
)
// ocr output
or (
length(.scan.ocr.raw) < 1000
and (
regex.icontains(.scan.ocr.raw,
'(?:copy (?:and paste)?|right.?click)(?:\s+\w+\s*){0,9}browser',
'click.{0,50}(?:above|below) to access'
)
)
)
or 5 of (
strings.icontains(.scan.ocr.raw,
'confidential and intended solely'
),
strings.icontains(.scan.ocr.raw,
'intended solely for the use of'
),
strings.icontains(.scan.ocr.raw, 'intended recipient'),
strings.icontains(.scan.ocr.raw, 'received this email in error'),
strings.icontains(.scan.ocr.raw,
'notify the sender immediately'
),
strings.icontains(.scan.ocr.raw, 'delete it from your system'),
strings.icontains(.scan.ocr.raw, 'delete the email from'),
strings.icontains(.scan.ocr.raw, 'virus-free'),
strings.icontains(.scan.ocr.raw, 'scan for viruses'),
strings.icontains(.scan.ocr.raw, 'legally binding agreement'),
strings.icontains(.scan.ocr.raw, 'informational purposes only'),
strings.icontains(.scan.ocr.raw,
'any attachments are confidential'
),
strings.icontains(.scan.ocr.raw, 'loss or damage arising'),
strings.icontains(.scan.ocr.raw, 'responsibility for any loss'),
strings.icontains(.scan.ocr.raw, 'unauthorised and prohibited'),
strings.icontains(.scan.ocr.raw, 'subject to legal privilege'),
strings.icontains(.scan.ocr.raw,
'The information contained in or attached'
),
strings.icontains(.scan.ocr.raw,
'The information contained in this email'
),
strings.icontains(.scan.ocr.raw,
'people to whom it is addressed'
),
strings.icontains(.scan.ocr.raw,
'received this in error, please inform'
),
strings.icontains(.scan.ocr.raw, 'inform the sender and/or'),
strings.icontains(.scan.ocr.raw,
'immediately and delete the material'
),
strings.icontains(.scan.ocr.raw,
'confidential and/or privileged material'
),
strings.icontains(.scan.ocr.raw,
'confidential and / or legally privileged'
),
strings.icontains(.scan.ocr.raw,
'The information transmitted is intended'
),
strings.icontains(.scan.ocr.raw,
'other than the intended recipient is prohibited'
),
strings.icontains(.scan.ocr.raw,
'intended for use by the addressee'
),
strings.icontains(.scan.ocr.raw,
'taking of any action in reliance upon'
),
strings.icontains(.scan.ocr.raw, 'distribution is strictly prohibited'),
strings.icontains(.scan.ocr.raw, 'may be unlawful')
)
)
)
)
Playground
Test against your own EMLs or sample data.