# Syntax: # loadplugin # path where Plugin resides. loadplugin FuzzyOcr FuzzyOcr.pm body FUZZY_OCR eval:fuzzyocr_check() body FUZZY_OCR_WRONG_CTYPE eval:dummy_check() body FUZZY_OCR_CORRUPT_IMG eval:dummy_check() body FUZZY_OCR_WRONG_EXTENSION eval:dummy_check() body FUZZY_OCR_KNOWN_HASH eval:dummy_check() describe FUZZY_OCR Mail contains an image with common spam text inside describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set describe FUZZY_OCR_WRONG_EXTENSION Mail contains an image with wrong file extension describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash priority FUZZY_OCR 900 ### ### Plugin Configuration ### ### ### Logging options ### # Verbosity level (see manual) # Level 0 - Errors only # Level 1 - Errors and Warnings # Level 2 - Errors, Warnings and Info Messages # Level 3 - Full debug output # Default value: 1 focr_verbose 3 # Log Message-Id, From, To # Default: 1 #focr_log_pmsinfo 0 # Send logging output to stderr. # Default value: 1 #focr_log_stderr 0 # Logfile (make sure it is writable by the plugin) # Default value: none #focr_logfile /tmp/FuzzyOcr.log focr_logfile /var/spool/MailScanner/spamassassin/FuzzyOcr.log ### ### Wordlists ### # Here we defined the words to scan for # Default value: /etc/mail/spamassassin/FuzzyOcr.words #focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words # # This is the path RELATIVE to the respective home directory # for the personalized list. This list is merged with the global # word list on execution. # Default value: ~/.spamassassin/fuzzyocr.words # If value begins with '/', it is treated as fixed path. #focr_personal_wordlist fuzzyocr.words # # This option allows you to disable the whole personalization stuff, # i.e. FuzzyOcr will not call functions in SA that require home # directories for your users. This is only required if you are running # an environment where the users don't have home directories at all. # Default value: 0 # #focr_no_homedirs 1 # ## Optionally, disable this option if you want to scan for numbers ## Setting this to 0 will cause FuzzyOcr not to strip numbers from ## both the wordlist and the OCR results # #focr_strip_numbers 1 ### ### Helper Applications ### # These parameters can be used to change other detection settings # If you leave these commented out, the defaults will be used. # Do not use " " around any parameters! ### ### Step 1: ### Inform the plugin which helper apps are required. ### # The following are already included by default: # #focr_bin_helper gifsicle, giffix, giftext, gifinter, giftopnm #focr_bin_helper jpegtopnm, pngtopnm, bmptopnm, tifftopnm, ppmhist #focr_bin_helper gocr, ocrad # Include additional scanner/preprocessor commands here: # #focr_bin_helper pnmnorm, pnminvert, pamthreshold, ppmtopgm, pamtopnm #focr_bin_helper tesseract focr_bin_helper pnmnorm, pnminvert, ppmtopgm ### ### Step 2: ### Inform the plugin of the search path to find all helper apps. ### Only the first match will be considered, so the order is important. ### # Search path for locating helper applications #focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin ### ### Step 3: ### You can optionally define a helper application location, bypassing ### the search path algorithm. Please note that if the helper app is not ### previously defined, it will generate an error: #focr_bin_gifsicle /usr/bin/gifsicle #focr_bin_giffix /usr/bin/giffix #focr_bin_giftext /usr/bin/giftext #focr_bin_gifinter /usr/bin/gifinter #focr_bin_giftopnm /usr/bin/giftopnm #focr_bin_jpegtopnm /usr/bin/jpegtopnm #focr_bin_pngtopnm /usr/bin/pngtopnm #focr_bin_bmptopnm /usr/bin/bmptopnm #focr_bin_tifftopnm /usr/bin/tifftopnm #focr_bin_ppmhist /usr/bin/ppmhist #focr_bin_gocr /usr/bin/gocr #focr_bin_ocrad /usr/bin/ocrad #focr_bin_pnmnorm /usr/bin/pnmnorm #focr_bin_pnminvert /usr/bin/pnminvert #focr_bin_convert /usr/bin/convert ### ### Scansets ### # Paths to the files containing Scansets and Preprocessors definitions # #focr_preprocessor_file /etc/mail/spamassassin/FuzzyOcr.preps #focr_scanset_file /etc/mail/spamassassin/FuzzyOcr.scansets # Setting this to 1 will cause FuzzyOcr to skip all other scansets, # if a scanset has reached the amount of hits specified in # focr_counts_required. (i.e. if the image is detected as spam). # This saves resources, but lowers the scores because not the best, # but the first best scanset is taken as result. # Default value: 1 #focr_minimal_scanset 0 # This option is only used when focr_minimal_scanset is enabled. # Basically, this counts the effectiveness of a scanset on the current # mail traffic and resorts the scansets with the most effective first. # This saves unnecessary scanner passes and saves resources. # Default value: 1. #focr_autosort_scanset 0 # This is a parameter for the focr_autosort_scanset function, and specifies # the maximum value of the effectiveness counter used in each scanset. If you # increase this, it will take longer until the autosort function adapts to new # types of spam, setting it too low will lower the effectiveness of the # function. # Default value: 10 #focr_autosort_buffer 10 ### ### Scan Settings ### # Timeout for the plugin, in seconds. (Maximum runtime of the plugin) # Default value: 10 #focr_timeout 15 # Use a global timeout value instead of per helper application. # Default value: 0 #focr_global_timeout 1 # Minimum image size to scan. Images with dimensions smaller than the # ones specified here will be skipped: # Default: Height:4 Width:4 # #focr_min_height 4 #focr_min_width 4 # Maximum image size to scan. Images with dimensions bigger than the # ones specified here will be skipped: # Default: Height:800 Width:800 # #focr_max_height 800 #focr_max_width 800 # Maximum file size for different formats in byte, bigger pictures # will not be scanned # Default values: Unlimited) #focr_max_size_gif 80000 #focr_max_size_jpeg 100000 #focr_max_size_png 80000 #focr_max_size_bmp 500000 #focr_max_size_tiff 500000 # Skip checking the following image types # Default value: 0 (check image type) #focr_skip_gif 1 #focr_skip_jpeg 1 #focr_skip_png 1 #focr_skip_bmp 1 #focr_skip_tiff 1 # Default detection treshold (see manual) # Default value: 0.25 (Can be changed on a per word basis in the wordlist). #focr_threshold 0.20 # Number of minimum matches before the rule scores (Default value: 2) #focr_counts_required 3 # Setting this will cause every word to be matched only once per image (Default value: 0) #focr_unique_matches 1 # This is the score for a hit after focr_counts_required matches # Default value: 5 #focr_base_score 5 # This is the additional score for every additional match after # focr_counts_required matches # Default value: 1 #focr_add_score 0.375 # This option defines the factor, which is multiplied with the number # of matches, that were made without stripping spaces. FuzzyOcr does two # matching attempts on OCR results, one without space strippings and one with. # To weight the first match type more, this factor is applied. # Default value: 1.5 #focr_twopass_scoring_factor 1.5 # This is the score to give for a wrong content-type. # e.g. JPEG image but content type says GIF # Default value: 1.5 #focr_wrongctype_score 1.5 # This is the score to give for a wrong file extension. # e.g. JPEG image but file extension says GIF # Default value: 1.5 #focr_wrongext_score 1.5 # This is the score to give for a corrupted image. # This currently affects only GIF images # Default value: 2.5 #focr_corrupt_score 2.5 # This is the score to give for a corrupted unfixable image. # This currently affects only GIF images. # Default value: 5 #focr_corrupt_unfixable_score 5 # This is used to disable the OCR engine if the message has # already more points than this value # Default value: 10 #focr_autodisable_score 30 # This is used to disable the OCR engine if the message has # already less points than this value # Default value: -5 #focr_autodisable_negative_score -5 ### ### Hashing Options (Optional) ### # Select which type of image hashing to use: # Default value: 0 (disabled) # Allowed values: # 1 ... use digest_hash only (deprecated) # 2 ... use digest_db w/digest_hash import (see requirements, recommended) # 3 ... use mysql database (see requirements, experimental) #-- # The score is saved with the hash in the database, allowing the plugin to # skip the scans when the image is found in the database, using the score # from the previous scans. #-- #focr_enable_image_hashing 3 focr_enable_image_hashing 2 # Set this to skip updating the hashing database at startup # Default value: 0 (update at startup) #focr_skip_updates 1 # Automatically add hashes of spam images recognized by OCR to the Image # Hash database, to disable, set to 0 # Default value: 1 (learn) #focr_hashing_learn_scanned 1 # Score images who's global word count is below focr_counts_required using # the following formulae: (focr_add_score * word count) as score. # Default value: 0 (ignore images) #focr_score_ham 1 # If the image hash database feature is enabled (Type 1 Hashing), # specify the file to use as database # Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb #focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb # If the image hash db feature is enabled (Type 2 Hashing), # specify the file to use as the SPAM database # Default value: /etc/mail/spamassassin/FuzzyOcr.db #focr_db_hash /etc/mail/spamassassin/FuzzyOcr.db focr_db_hash /var/spool/MailScanner/spamassassin/FuzzyOcr.db # If the image hash db feature is enabled (Type 2 Hashing), # specify the file to use as the HAM database # Default value: /etc/mail/spamassassin/FuzzyOcr.safe.db #focr_db_safe /etc/mail/spamassassin/FuzzyOcr.safe.db focr_db_safe /var/spool/MailScanner/spamassassin/FuzzyOcr.safe.db # Auto-prune: Expire records from hasing databases after these many days # Default value: 35 #focr_db_max_days 15 ### ### MySQL options (Type 3 Hashing) ### #focr_mysql_db FuzzyOcr #focr_mysql_hash Hash #focr_mysql_safe Safe #focr_mysql_user fuzzyocr #focr_mysql_pass fuzzyocr #focr_mysql_host localhost #focr_mysql_port 3306 #focr_mysql_socket /tmp/mysql.sock # If set, the database table is updated with different data from one of # the following: # + filename, # + image-params, # + content-type, # + file-type, # + score, # + word-info # Default value: 0 #focr_mysql_update_hash 1 ### ### Miscellaneous Options ### # The pluging uses a temporary directory to store intermediate information. # In order to Keep these files for debugging purposes use any of these # values: # 0 = always cleanup (default value) # 1 = keep only if error # 2 = always keep #-- # Keeping these intermediate files could fill your HDD _very_ fast! # Make shure you periodically empty your temp dir (usually: /tmp) or # suffer the conscecuences. You've been warned!! #-- #focr_keep_bad_images 1 ################################################################# # DO NOT REMOVE THIS LINE, IT IS REQUIRED UNDER ALL CIRCUMSTANCES focr_end_config