#!/bin/bash # Usage: scan_perpage # where imagefile is the data just scanned # (specify this script to scanadf via -S) usage() { echo "Usage: $0 " echo "Set the following environment variables:" echo " UNPAPER" echo " SEARCHABLE" echo " RESOLUTION" echo " PGWIDTHIN" echo " PGHEIGHTIN" echo " PS2PDF_OPTS (optional)" echo " VERBOSE (optional)" echo " LOCKFILE (required if VERBOSE=1)" } log() { if [ $VERBOSE = 1 ]; then echo "scan_perpage: $1" fi } logstdout() { if [ $VERBOSE = 1 ]; then cat else cat > /dev/null fi } if [ $# -lt 1 ]; then usage exit 1 fi if [ "$UNPAPER" == "" -o "$SEARCHABLE" == "" -o "$RESOLUTION" == "" -o "$RESOLUTION" == "" ]; then usage exit 1 fi IMAGE_FILE=$1 process_page() { log "" log "-------------------------------------------------------------------------------" log "Post-processing scanned page ${IMAGE_FILE}, deskew=$UNPAPER, searchable=$SEARCHABLE..." PP_PREFIX= if [ $UNPAPER -eq 1 ]; then log "Applying unpaper post-processing to image data..." PP_PREFIX="unpaper-" if [ $VERBOSE = 1 ]; then UNPAPERVERBOSE="-v" fi #unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout fi log "" log "-------------------------------------------------------------------------------" log "Converting image data to pdf..." if [ $SEARCHABLE -eq 1 ]; then log "Converting image data to searchable pdf..." tesseract $PP_PREFIX$IMAGE_FILE ${IMAGE_FILE%.*} -l eng pdf | logstdout else log "Converting image data to pdf..." PNMVERBOSE= if [ $VERBOSE = 1 ]; then PNMVERBOSE="-verbose" fi if [ "$PGWIDTHIN" == "" -o "$PGHEIGHTIN" == "" ]; then PAGEOPTS="-equalpixels -dpi=$RESOLUTION -noturn" else PAGEOPTS="-imagewidth $PGWIDTHIN -imageheight $PGHEIGHTIN" fi log "Using page options: $PAGEOPTS" pnmtops $PNMVERBOSE $PAGEOPTS $PP_PREFIX$IMAGE_FILE | ps2pdf $PS2PDF_OPTS - > ${IMAGE_FILE%.*}.pdf | logstdout fi status=$? rm $IMAGE_FILE if [ -f $PP_PREFIX$IMAGE_FILE ]; then rm $PP_PREFIX$IMAGE_FILE fi log "" log "Scan processing done, status = $status" } if [ $VERBOSE = 1 ]; then ( flock 200 process_page ) 200>$LOCKFILE else process_page fi; exit $status