egscripts/egserverinstall/templates/FuzzyOcr.cf

368 lines
11 KiB
CFEngine3
Raw Normal View History

# Syntax:
# loadplugin <Plugin_Name> <Location>
# <Location> path where Plugin resides.
loadplugin FuzzyOcr FuzzyOcr.pm
body FUZZY_OCR eval:fuzzyocr_check()
body FUZZY_OCR_WRONG_CTYPE eval:dummy_check()
body FUZZY_OCR_CORRUPT_IMG eval:dummy_check()
body FUZZY_OCR_WRONG_EXTENSION eval:dummy_check()
body FUZZY_OCR_KNOWN_HASH eval:dummy_check()
describe FUZZY_OCR Mail contains an image with common spam text inside
describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set
describe FUZZY_OCR_WRONG_EXTENSION Mail contains an image with wrong file extension
describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image
describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash
priority FUZZY_OCR 900
###
### Plugin Configuration
###
###
### Logging options
###
# Verbosity level (see manual)
# Level 0 - Errors only
# Level 1 - Errors and Warnings
# Level 2 - Errors, Warnings and Info Messages
# Level 3 - Full debug output
# Default value: 1
focr_verbose 3
# Log Message-Id, From, To
# Default: 1
#focr_log_pmsinfo 0
# Send logging output to stderr.
# Default value: 1
#focr_log_stderr 0
# Logfile (make sure it is writable by the plugin)
# Default value: none
#focr_logfile /tmp/FuzzyOcr.log
focr_logfile /var/spool/MailScanner/spamassassin/FuzzyOcr.log
###
### Wordlists
###
# Here we defined the words to scan for
# Default value: /etc/mail/spamassassin/FuzzyOcr.words
#focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words
#
# This is the path RELATIVE to the respective home directory
# for the personalized list. This list is merged with the global
# word list on execution.
# Default value: ~/.spamassassin/fuzzyocr.words
# If value begins with '/', it is treated as fixed path.
#focr_personal_wordlist fuzzyocr.words
#
# This option allows you to disable the whole personalization stuff,
# i.e. FuzzyOcr will not call functions in SA that require home
# directories for your users. This is only required if you are running
# an environment where the users don't have home directories at all.
# Default value: 0
#
#focr_no_homedirs 1
#
## Optionally, disable this option if you want to scan for numbers
## Setting this to 0 will cause FuzzyOcr not to strip numbers from
## both the wordlist and the OCR results
#
#focr_strip_numbers 1
###
### Helper Applications
###
# These parameters can be used to change other detection settings
# If you leave these commented out, the defaults will be used.
# Do not use " " around any parameters!
###
### Step 1:
### Inform the plugin which helper apps are required.
###
# The following are already included by default:
#
#focr_bin_helper gifsicle, giffix, giftext, gifinter, giftopnm
#focr_bin_helper jpegtopnm, pngtopnm, bmptopnm, tifftopnm, ppmhist
#focr_bin_helper gocr, ocrad
# Include additional scanner/preprocessor commands here:
#
#focr_bin_helper pnmnorm, pnminvert, pamthreshold, ppmtopgm, pamtopnm
#focr_bin_helper tesseract
focr_bin_helper pnmnorm, pnminvert, ppmtopgm
###
### Step 2:
### Inform the plugin of the search path to find all helper apps.
### Only the first match will be considered, so the order is important.
###
# Search path for locating helper applications
#focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin
focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin
###
### Step 3:
### You can optionally define a helper application location, bypassing
### the search path algorithm. Please note that if the helper app is not
### previously defined, it will generate an error:
#focr_bin_gifsicle /usr/bin/gifsicle
#focr_bin_giffix /usr/bin/giffix
#focr_bin_giftext /usr/bin/giftext
#focr_bin_gifinter /usr/bin/gifinter
#focr_bin_giftopnm /usr/bin/giftopnm
#focr_bin_jpegtopnm /usr/bin/jpegtopnm
#focr_bin_pngtopnm /usr/bin/pngtopnm
#focr_bin_bmptopnm /usr/bin/bmptopnm
#focr_bin_tifftopnm /usr/bin/tifftopnm
#focr_bin_ppmhist /usr/bin/ppmhist
#focr_bin_gocr /usr/bin/gocr
#focr_bin_ocrad /usr/bin/ocrad
#focr_bin_pnmnorm /usr/bin/pnmnorm
#focr_bin_pnminvert /usr/bin/pnminvert
#focr_bin_convert /usr/bin/convert
###
### Scansets
###
# Paths to the files containing Scansets and Preprocessors definitions
#
#focr_preprocessor_file /etc/mail/spamassassin/FuzzyOcr.preps
#focr_scanset_file /etc/mail/spamassassin/FuzzyOcr.scansets
# Setting this to 1 will cause FuzzyOcr to skip all other scansets,
# if a scanset has reached the amount of hits specified in
# focr_counts_required. (i.e. if the image is detected as spam).
# This saves resources, but lowers the scores because not the best,
# but the first best scanset is taken as result.
# Default value: 1
#focr_minimal_scanset 0
# This option is only used when focr_minimal_scanset is enabled.
# Basically, this counts the effectiveness of a scanset on the current
# mail traffic and resorts the scansets with the most effective first.
# This saves unnecessary scanner passes and saves resources.
# Default value: 1.
#focr_autosort_scanset 0
# This is a parameter for the focr_autosort_scanset function, and specifies
# the maximum value of the effectiveness counter used in each scanset. If you
# increase this, it will take longer until the autosort function adapts to new
# types of spam, setting it too low will lower the effectiveness of the
# function.
# Default value: 10
#focr_autosort_buffer 10
###
### Scan Settings
###
# Timeout for the plugin, in seconds. (Maximum runtime of the plugin)
# Default value: 10
#focr_timeout 15
# Use a global timeout value instead of per helper application.
# Default value: 0
#focr_global_timeout 1
# Minimum image size to scan. Images with dimensions smaller than the
# ones specified here will be skipped:
# Default: Height:4 Width:4
#
#focr_min_height 4
#focr_min_width 4
# Maximum image size to scan. Images with dimensions bigger than the
# ones specified here will be skipped:
# Default: Height:800 Width:800
#
#focr_max_height 800
#focr_max_width 800
# Maximum file size for different formats in byte, bigger pictures
# will not be scanned
# Default values: Unlimited)
#focr_max_size_gif 80000
#focr_max_size_jpeg 100000
#focr_max_size_png 80000
#focr_max_size_bmp 500000
#focr_max_size_tiff 500000
# Skip checking the following image types
# Default value: 0 (check image type)
#focr_skip_gif 1
#focr_skip_jpeg 1
#focr_skip_png 1
#focr_skip_bmp 1
#focr_skip_tiff 1
# Default detection treshold (see manual)
# Default value: 0.25 (Can be changed on a per word basis in the wordlist).
#focr_threshold 0.20
# Number of minimum matches before the rule scores (Default value: 2)
#focr_counts_required 3
# Setting this will cause every word to be matched only once per image (Default value: 0)
#focr_unique_matches 1
# This is the score for a hit after focr_counts_required matches
# Default value: 5
#focr_base_score 5
# This is the additional score for every additional match after
# focr_counts_required matches
# Default value: 1
#focr_add_score 0.375
# This option defines the factor, which is multiplied with the number
# of matches, that were made without stripping spaces. FuzzyOcr does two
# matching attempts on OCR results, one without space strippings and one with.
# To weight the first match type more, this factor is applied.
# Default value: 1.5
#focr_twopass_scoring_factor 1.5
# This is the score to give for a wrong content-type.
# e.g. JPEG image but content type says GIF
# Default value: 1.5
#focr_wrongctype_score 1.5
# This is the score to give for a wrong file extension.
# e.g. JPEG image but file extension says GIF
# Default value: 1.5
#focr_wrongext_score 1.5
# This is the score to give for a corrupted image.
# This currently affects only GIF images
# Default value: 2.5
#focr_corrupt_score 2.5
# This is the score to give for a corrupted unfixable image.
# This currently affects only GIF images.
# Default value: 5
#focr_corrupt_unfixable_score 5
# This is used to disable the OCR engine if the message has
# already more points than this value
# Default value: 10
#focr_autodisable_score 30
# This is used to disable the OCR engine if the message has
# already less points than this value
# Default value: -5
#focr_autodisable_negative_score -5
###
### Hashing Options (Optional)
###
# Select which type of image hashing to use:
# Default value: 0 (disabled)
# Allowed values:
# 1 ... use digest_hash only (deprecated)
# 2 ... use digest_db w/digest_hash import (see requirements, recommended)
# 3 ... use mysql database (see requirements, experimental)
#--
# The score is saved with the hash in the database, allowing the plugin to
# skip the scans when the image is found in the database, using the score
# from the previous scans.
#--
#focr_enable_image_hashing 3
focr_enable_image_hashing 2
# Set this to skip updating the hashing database at startup
# Default value: 0 (update at startup)
#focr_skip_updates 1
# Automatically add hashes of spam images recognized by OCR to the Image
# Hash database, to disable, set to 0
# Default value: 1 (learn)
#focr_hashing_learn_scanned 1
# Score images who's global word count is below focr_counts_required using
# the following formulae: (focr_add_score * word count) as score.
# Default value: 0 (ignore images)
#focr_score_ham 1
# If the image hash database feature is enabled (Type 1 Hashing),
# specify the file to use as database
# Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb
#focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb
# If the image hash db feature is enabled (Type 2 Hashing),
# specify the file to use as the SPAM database
# Default value: /etc/mail/spamassassin/FuzzyOcr.db
#focr_db_hash /etc/mail/spamassassin/FuzzyOcr.db
focr_db_hash /var/spool/MailScanner/spamassassin/FuzzyOcr.db
# If the image hash db feature is enabled (Type 2 Hashing),
# specify the file to use as the HAM database
# Default value: /etc/mail/spamassassin/FuzzyOcr.safe.db
#focr_db_safe /etc/mail/spamassassin/FuzzyOcr.safe.db
focr_db_safe /var/spool/MailScanner/spamassassin/FuzzyOcr.safe.db
# Auto-prune: Expire records from hasing databases after these many days
# Default value: 35
#focr_db_max_days 15
###
### MySQL options (Type 3 Hashing)
###
#focr_mysql_db FuzzyOcr
#focr_mysql_hash Hash
#focr_mysql_safe Safe
#focr_mysql_user fuzzyocr
#focr_mysql_pass fuzzyocr
#focr_mysql_host localhost
#focr_mysql_port 3306
#focr_mysql_socket /tmp/mysql.sock
# If set, the database table is updated with different data from one of
# the following:
# + filename,
# + image-params,
# + content-type,
# + file-type,
# + score,
# + word-info
# Default value: 0
#focr_mysql_update_hash 1
###
### Miscellaneous Options
###
# The pluging uses a temporary directory to store intermediate information.
# In order to Keep these files for debugging purposes use any of these
# values:
# 0 = always cleanup (default value)
# 1 = keep only if error
# 2 = always keep
#--
# Keeping these intermediate files could fill your HDD _very_ fast!
# Make shure you periodically empty your temp dir (usually: /tmp) or
# suffer the conscecuences. You've been warned!!
#--
#focr_keep_bad_images 1
#################################################################
# DO NOT REMOVE THIS LINE, IT IS REQUIRED UNDER ALL CIRCUMSTANCES
focr_end_config