367 lines
11 KiB
CFEngine3
367 lines
11 KiB
CFEngine3
# Syntax:
|
|
# loadplugin <Plugin_Name> <Location>
|
|
# <Location> path where Plugin resides.
|
|
loadplugin FuzzyOcr FuzzyOcr.pm
|
|
|
|
body FUZZY_OCR eval:fuzzyocr_check()
|
|
body FUZZY_OCR_WRONG_CTYPE eval:dummy_check()
|
|
body FUZZY_OCR_CORRUPT_IMG eval:dummy_check()
|
|
body FUZZY_OCR_WRONG_EXTENSION eval:dummy_check()
|
|
body FUZZY_OCR_KNOWN_HASH eval:dummy_check()
|
|
|
|
describe FUZZY_OCR Mail contains an image with common spam text inside
|
|
describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set
|
|
describe FUZZY_OCR_WRONG_EXTENSION Mail contains an image with wrong file extension
|
|
describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image
|
|
describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash
|
|
|
|
priority FUZZY_OCR 900
|
|
|
|
###
|
|
### Plugin Configuration
|
|
###
|
|
|
|
###
|
|
### Logging options
|
|
###
|
|
|
|
# Verbosity level (see manual)
|
|
# Level 0 - Errors only
|
|
# Level 1 - Errors and Warnings
|
|
# Level 2 - Errors, Warnings and Info Messages
|
|
# Level 3 - Full debug output
|
|
# Default value: 1
|
|
focr_verbose 3
|
|
|
|
# Log Message-Id, From, To
|
|
# Default: 1
|
|
#focr_log_pmsinfo 0
|
|
|
|
# Send logging output to stderr.
|
|
# Default value: 1
|
|
#focr_log_stderr 0
|
|
|
|
# Logfile (make sure it is writable by the plugin)
|
|
# Default value: none
|
|
#focr_logfile /tmp/FuzzyOcr.log
|
|
focr_logfile /var/spool/MailScanner/spamassassin/FuzzyOcr.log
|
|
|
|
|
|
###
|
|
### Wordlists
|
|
###
|
|
|
|
# Here we defined the words to scan for
|
|
# Default value: /etc/mail/spamassassin/FuzzyOcr.words
|
|
#focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words
|
|
#
|
|
# This is the path RELATIVE to the respective home directory
|
|
# for the personalized list. This list is merged with the global
|
|
# word list on execution.
|
|
# Default value: ~/.spamassassin/fuzzyocr.words
|
|
# If value begins with '/', it is treated as fixed path.
|
|
#focr_personal_wordlist fuzzyocr.words
|
|
#
|
|
# This option allows you to disable the whole personalization stuff,
|
|
# i.e. FuzzyOcr will not call functions in SA that require home
|
|
# directories for your users. This is only required if you are running
|
|
# an environment where the users don't have home directories at all.
|
|
# Default value: 0
|
|
#
|
|
#focr_no_homedirs 1
|
|
#
|
|
## Optionally, disable this option if you want to scan for numbers
|
|
## Setting this to 0 will cause FuzzyOcr not to strip numbers from
|
|
## both the wordlist and the OCR results
|
|
#
|
|
#focr_strip_numbers 1
|
|
|
|
|
|
###
|
|
### Helper Applications
|
|
###
|
|
|
|
# These parameters can be used to change other detection settings
|
|
# If you leave these commented out, the defaults will be used.
|
|
# Do not use " " around any parameters!
|
|
|
|
###
|
|
### Step 1:
|
|
### Inform the plugin which helper apps are required.
|
|
###
|
|
|
|
# The following are already included by default:
|
|
#
|
|
#focr_bin_helper gifsicle, giffix, giftext, gifinter, giftopnm
|
|
#focr_bin_helper jpegtopnm, pngtopnm, bmptopnm, tifftopnm, ppmhist
|
|
#focr_bin_helper gocr, ocrad
|
|
|
|
# Include additional scanner/preprocessor commands here:
|
|
#
|
|
#focr_bin_helper pnmnorm, pnminvert, pamthreshold, ppmtopgm, pamtopnm
|
|
#focr_bin_helper tesseract
|
|
focr_bin_helper pnmnorm, pnminvert, ppmtopgm
|
|
|
|
###
|
|
### Step 2:
|
|
### Inform the plugin of the search path to find all helper apps.
|
|
### Only the first match will be considered, so the order is important.
|
|
###
|
|
|
|
# Search path for locating helper applications
|
|
#focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin
|
|
focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin
|
|
|
|
###
|
|
### Step 3:
|
|
### You can optionally define a helper application location, bypassing
|
|
### the search path algorithm. Please note that if the helper app is not
|
|
### previously defined, it will generate an error:
|
|
|
|
#focr_bin_gifsicle /usr/bin/gifsicle
|
|
#focr_bin_giffix /usr/bin/giffix
|
|
#focr_bin_giftext /usr/bin/giftext
|
|
#focr_bin_gifinter /usr/bin/gifinter
|
|
#focr_bin_giftopnm /usr/bin/giftopnm
|
|
#focr_bin_jpegtopnm /usr/bin/jpegtopnm
|
|
#focr_bin_pngtopnm /usr/bin/pngtopnm
|
|
#focr_bin_bmptopnm /usr/bin/bmptopnm
|
|
#focr_bin_tifftopnm /usr/bin/tifftopnm
|
|
#focr_bin_ppmhist /usr/bin/ppmhist
|
|
#focr_bin_gocr /usr/bin/gocr
|
|
#focr_bin_ocrad /usr/bin/ocrad
|
|
|
|
#focr_bin_pnmnorm /usr/bin/pnmnorm
|
|
#focr_bin_pnminvert /usr/bin/pnminvert
|
|
#focr_bin_convert /usr/bin/convert
|
|
|
|
###
|
|
### Scansets
|
|
###
|
|
|
|
# Paths to the files containing Scansets and Preprocessors definitions
|
|
#
|
|
#focr_preprocessor_file /etc/mail/spamassassin/FuzzyOcr.preps
|
|
#focr_scanset_file /etc/mail/spamassassin/FuzzyOcr.scansets
|
|
|
|
# Setting this to 1 will cause FuzzyOcr to skip all other scansets,
|
|
# if a scanset has reached the amount of hits specified in
|
|
# focr_counts_required. (i.e. if the image is detected as spam).
|
|
# This saves resources, but lowers the scores because not the best,
|
|
# but the first best scanset is taken as result.
|
|
# Default value: 1
|
|
#focr_minimal_scanset 0
|
|
|
|
# This option is only used when focr_minimal_scanset is enabled.
|
|
# Basically, this counts the effectiveness of a scanset on the current
|
|
# mail traffic and resorts the scansets with the most effective first.
|
|
# This saves unnecessary scanner passes and saves resources.
|
|
# Default value: 1.
|
|
#focr_autosort_scanset 0
|
|
|
|
# This is a parameter for the focr_autosort_scanset function, and specifies
|
|
# the maximum value of the effectiveness counter used in each scanset. If you
|
|
# increase this, it will take longer until the autosort function adapts to new
|
|
# types of spam, setting it too low will lower the effectiveness of the
|
|
# function.
|
|
# Default value: 10
|
|
#focr_autosort_buffer 10
|
|
|
|
###
|
|
### Scan Settings
|
|
###
|
|
|
|
# Timeout for the plugin, in seconds. (Maximum runtime of the plugin)
|
|
# Default value: 10
|
|
#focr_timeout 15
|
|
|
|
# Use a global timeout value instead of per helper application.
|
|
# Default value: 0
|
|
#focr_global_timeout 1
|
|
|
|
# Minimum image size to scan. Images with dimensions smaller than the
|
|
# ones specified here will be skipped:
|
|
# Default: Height:4 Width:4
|
|
#
|
|
#focr_min_height 4
|
|
#focr_min_width 4
|
|
|
|
# Maximum image size to scan. Images with dimensions bigger than the
|
|
# ones specified here will be skipped:
|
|
# Default: Height:800 Width:800
|
|
#
|
|
#focr_max_height 800
|
|
#focr_max_width 800
|
|
|
|
|
|
# Maximum file size for different formats in byte, bigger pictures
|
|
# will not be scanned
|
|
# Default values: Unlimited)
|
|
#focr_max_size_gif 80000
|
|
#focr_max_size_jpeg 100000
|
|
#focr_max_size_png 80000
|
|
#focr_max_size_bmp 500000
|
|
#focr_max_size_tiff 500000
|
|
|
|
# Skip checking the following image types
|
|
# Default value: 0 (check image type)
|
|
#focr_skip_gif 1
|
|
#focr_skip_jpeg 1
|
|
#focr_skip_png 1
|
|
#focr_skip_bmp 1
|
|
#focr_skip_tiff 1
|
|
|
|
# Default detection treshold (see manual)
|
|
# Default value: 0.25 (Can be changed on a per word basis in the wordlist).
|
|
#focr_threshold 0.20
|
|
|
|
# Number of minimum matches before the rule scores (Default value: 2)
|
|
#focr_counts_required 3
|
|
|
|
# Setting this will cause every word to be matched only once per image (Default value: 0)
|
|
#focr_unique_matches 1
|
|
|
|
# This is the score for a hit after focr_counts_required matches
|
|
# Default value: 5
|
|
#focr_base_score 5
|
|
|
|
# This is the additional score for every additional match after
|
|
# focr_counts_required matches
|
|
# Default value: 1
|
|
#focr_add_score 0.375
|
|
|
|
# This option defines the factor, which is multiplied with the number
|
|
# of matches, that were made without stripping spaces. FuzzyOcr does two
|
|
# matching attempts on OCR results, one without space strippings and one with.
|
|
# To weight the first match type more, this factor is applied.
|
|
# Default value: 1.5
|
|
#focr_twopass_scoring_factor 1.5
|
|
|
|
# This is the score to give for a wrong content-type.
|
|
# e.g. JPEG image but content type says GIF
|
|
# Default value: 1.5
|
|
#focr_wrongctype_score 1.5
|
|
|
|
# This is the score to give for a wrong file extension.
|
|
# e.g. JPEG image but file extension says GIF
|
|
# Default value: 1.5
|
|
#focr_wrongext_score 1.5
|
|
|
|
# This is the score to give for a corrupted image.
|
|
# This currently affects only GIF images
|
|
# Default value: 2.5
|
|
#focr_corrupt_score 2.5
|
|
|
|
# This is the score to give for a corrupted unfixable image.
|
|
# This currently affects only GIF images.
|
|
# Default value: 5
|
|
#focr_corrupt_unfixable_score 5
|
|
|
|
# This is used to disable the OCR engine if the message has
|
|
# already more points than this value
|
|
# Default value: 10
|
|
#focr_autodisable_score 30
|
|
|
|
# This is used to disable the OCR engine if the message has
|
|
# already less points than this value
|
|
# Default value: -5
|
|
#focr_autodisable_negative_score -5
|
|
|
|
|
|
###
|
|
### Hashing Options (Optional)
|
|
###
|
|
|
|
# Select which type of image hashing to use:
|
|
# Default value: 0 (disabled)
|
|
# Allowed values:
|
|
# 1 ... use digest_hash only (deprecated)
|
|
# 2 ... use digest_db w/digest_hash import (see requirements, recommended)
|
|
# 3 ... use mysql database (see requirements, experimental)
|
|
#--
|
|
# The score is saved with the hash in the database, allowing the plugin to
|
|
# skip the scans when the image is found in the database, using the score
|
|
# from the previous scans.
|
|
#--
|
|
#focr_enable_image_hashing 3
|
|
focr_enable_image_hashing 2
|
|
|
|
# Set this to skip updating the hashing database at startup
|
|
# Default value: 0 (update at startup)
|
|
#focr_skip_updates 1
|
|
|
|
# Automatically add hashes of spam images recognized by OCR to the Image
|
|
# Hash database, to disable, set to 0
|
|
# Default value: 1 (learn)
|
|
#focr_hashing_learn_scanned 1
|
|
|
|
# Score images who's global word count is below focr_counts_required using
|
|
# the following formulae: (focr_add_score * word count) as score.
|
|
# Default value: 0 (ignore images)
|
|
#focr_score_ham 1
|
|
|
|
# If the image hash database feature is enabled (Type 1 Hashing),
|
|
# specify the file to use as database
|
|
# Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb
|
|
#focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb
|
|
|
|
# If the image hash db feature is enabled (Type 2 Hashing),
|
|
# specify the file to use as the SPAM database
|
|
# Default value: /etc/mail/spamassassin/FuzzyOcr.db
|
|
#focr_db_hash /etc/mail/spamassassin/FuzzyOcr.db
|
|
focr_db_hash /var/spool/MailScanner/spamassassin/FuzzyOcr.db
|
|
|
|
# If the image hash db feature is enabled (Type 2 Hashing),
|
|
# specify the file to use as the HAM database
|
|
# Default value: /etc/mail/spamassassin/FuzzyOcr.safe.db
|
|
#focr_db_safe /etc/mail/spamassassin/FuzzyOcr.safe.db
|
|
focr_db_safe /var/spool/MailScanner/spamassassin/FuzzyOcr.safe.db
|
|
|
|
# Auto-prune: Expire records from hasing databases after these many days
|
|
# Default value: 35
|
|
#focr_db_max_days 15
|
|
|
|
###
|
|
### MySQL options (Type 3 Hashing)
|
|
###
|
|
|
|
#focr_mysql_db FuzzyOcr
|
|
#focr_mysql_hash Hash
|
|
#focr_mysql_safe Safe
|
|
#focr_mysql_user fuzzyocr
|
|
#focr_mysql_pass fuzzyocr
|
|
#focr_mysql_host localhost
|
|
#focr_mysql_port 3306
|
|
#focr_mysql_socket /tmp/mysql.sock
|
|
|
|
# If set, the database table is updated with different data from one of
|
|
# the following:
|
|
# + filename,
|
|
# + image-params,
|
|
# + content-type,
|
|
# + file-type,
|
|
# + score,
|
|
# + word-info
|
|
# Default value: 0
|
|
#focr_mysql_update_hash 1
|
|
|
|
###
|
|
### Miscellaneous Options
|
|
###
|
|
|
|
# The pluging uses a temporary directory to store intermediate information.
|
|
# In order to Keep these files for debugging purposes use any of these
|
|
# values:
|
|
# 0 = always cleanup (default value)
|
|
# 1 = keep only if error
|
|
# 2 = always keep
|
|
#--
|
|
# Keeping these intermediate files could fill your HDD _very_ fast!
|
|
# Make shure you periodically empty your temp dir (usually: /tmp) or
|
|
# suffer the conscecuences. You've been warned!!
|
|
#--
|
|
#focr_keep_bad_images 1
|
|
|
|
#################################################################
|
|
# DO NOT REMOVE THIS LINE, IT IS REQUIRED UNDER ALL CIRCUMSTANCES
|
|
focr_end_config
|