#!/bin/bash # Usage: scan_perpage # where imagefile is the data just scanned # (specify this script to scanadf via -S) usage() { echo "Usage: $0 " echo "Set the following environment variables:" echo " UNPAPER" echo " SEARCHABLE" echo " LANGUAGE" echo " RESOLUTION" echo " PGWIDTHIN" echo " PGHEIGHTIN" echo " SKIP_EMPTY_PAGES" echo " PS2PDF_OPTS (optional)" echo " VERBOSE (optional)" echo " LOCKFILE (required if VERBOSE=1)" } log() { if [ $VERBOSE = 1 ]; then echo "scan_perpage: $1" fi } logstdout() { if [ $VERBOSE = 1 ]; then cat else cat > /dev/null fi } if [ $# -lt 1 ]; then usage exit 1 fi if [ "$UNPAPER" == "" -o "$SEARCHABLE" == "" -o "$RESOLUTION" == "" -o "$RESOLUTION" == "" -o "$SKIP_EMPTY_PAGES" == "" ]; then usage exit 1 fi IMAGE_PATH=$1 IMAGE_DIR=`dirname $1` IMAGE_FILE=`basename $1` process_page() { log "" log "-------------------------------------------------------------------------------" log "Post-processing scanned page ${IMAGE_FILE}, deskew=$UNPAPER, searchable=$SEARCHABLE..." [[ $SKIP_EMPTY_PAGES -eq 1 ]] && PERCENTAGE_WHITE=`convert "$IMAGE_PATH" -fuzz 0% -negate -threshold 0 -negate -format "%[fx:100*mean]" info:` || PERCENTAGE_WHITE=0 log "$IMAGE_PATH has $PERCENTAGE_WHITE % white" PP_PREFIX= if (( $(echo "$PERCENTAGE_WHITE < 99.8" |bc -l) )); then if [ $UNPAPER -eq 1 ]; then log "Applying unpaper post-processing to image data..." PP_PREFIX="unpaper-" if [ $VERBOSE = 1 ]; then UNPAPERVERBOSE="-v" fi #unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout fi log "" log "-------------------------------------------------------------------------------" if [ $SEARCHABLE -eq 1 ]; then log "Converting image data to searchable pdf..." tesseract $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout else log "Converting image data to pdf..." PNMVERBOSE= if [ $VERBOSE = 1 ]; then PNMVERBOSE="-verbose" fi if [ "$PGWIDTHIN" == "" -o "$PGHEIGHTIN" == "" ]; then PAGEOPTS="-equalpixels -dpi=$RESOLUTION -noturn" else PAGEOPTS="-imagewidth $PGWIDTHIN -imageheight $PGHEIGHTIN" fi log "Using page options: $PAGEOPTS" pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | ps2pdf $PS2PDF_OPTS - > $IMAGE_DIR/${IMAGE_FILE%.*}.pdf | logstdout fi else log "Skipping empty page $IMAGE_FILE with white percentage $PERCENTAGE_WHITE" fi status=$? rm $IMAGE_PATH if [ -f $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE ]; then rm $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE fi log "" log "Scan processing done, status = $status" } if [ $VERBOSE = 1 ]; then ( flock 200 process_page ) 200>$LOCKFILE else process_page fi; exit $status