diff --git a/scan b/scan new file mode 100755 index 0000000..b9bd88a --- /dev/null +++ b/scan @@ -0,0 +1,164 @@ +#!/bin/sh + +DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +DEVICE=fujitsu +OUTPUT=scan.pdf +APPEND=0 +RESOLUTION=300 +MODE=Lineart +SCRIPT="$DIR/scan_perpage" +DUPLEX=0 +DESKEW=0 +SEARCHABLE=0 +MAXPAGE= +TRUNCPAGE=0 +HELP=0 +SIZE=Letter +PGHEIGHT= +PGHEIGHTIN=11 +PGWIDTH= +PGWIDTHIN=8.5 +VERBOSE=0 + +# The unpaper processing reduces the file size considerably, so 300 dpi is selected. +# When not using unpaper (--nodskew) it is recommended to scan at a lower resolution e.g. 200 dpi. + +# Parse command-line options +while [ $# -gt 0 ]; do + case "$1" in + + -v|--verbose) VERBOSE=1 ;; + + -d|--duplex) DUPLEX=1 ;; + + -m|--mode) shift; MODE=$1 ;; + + -r|--resolution) shift; RESOLUTION=$1 ;; + + -o|--output) shift; OUTPUT=$1 ;; + + -a|--append) APPEND=1 ;; + + -e|--max) shift; MAXPAGE=$1 ;; + + -t|--truncate) shift; TRUNCPAGE=$1 ;; + + -h|--help) HELP=1 ;; + + -s|--size) shift; SIZE=$1 ;; + + -ph|--page-height) shift; PGHEIGHT=$1 ;; + + -pw|--page-width) shift; PGWIDTH=$1 ;; + + --dskew|--deskew) DESKEW=1 ;; + + --searchable|--ocr) SEARCHABLE=1 ;; + + esac + shift # next option +done + + +if [ $HELP -eq 1 ]; then + echo "$(basename $0) [-d|--duplex] [-m|--mode] [-r|--resolution] [-o|--output] [-a|--append] [-e|--max] [-s|--size | [-ph|--page-height] [-pw|--page-width]] [--nodskew]" + echo " -v Verbose output (this will slow down the scan due to the need to prevent interleaved output)" + echo " -d Duplex scanning" + echo " -m Mode e.g. Lineart (default), Halftone, Gray, Color, etc." + echo " -r Resolution e.g 300 (default)" + echo " -o Output e.g. scan.pdf (default)" + echo " -a Append output to existing scan" + echo " -e Max number of pages e.g. 2 (default is all pages)" + echo " -t Truncate number of pages from end e.g. 1 (default is none)" + echo " -s Page Size as type e.g. Letter (default), Legal, A4" + echo " -ph Custom Page Height in mm" + echo " -pw Custom Page Width in mm" + echo " --nodskew Do not deskew (no unpaper)" + echo "" + exit 0 +fi + +if [ -f "$OUTPUT" -a ! $APPEND = 1 ]; then + echo >&2 "Output file $OUTPUT already exists. Delete or specify -a. Aborting." + exit 1 +fi + +SOURCE="" +if [ $DUPLEX -eq 1 ]; then + SOURCE="--source \"ADF Duplex\"" +fi + +if [ "$MAXPAGE" != "" ]; then + MAXPAGE="-e $MAXPAGE" +fi + +PS2PDF_OPTS= + +# Specify non-letter sizes in mm +case "$SIZE" in + + Letter) PGHEIGHTIN=11; PGWIDTHIN=8.5 ;; + + Legal) PGHEIGHT=355.6; PGWIDTH=215.9 ;; + + A4) PGHEIGHT=297; PGWIDTH=210 ;; + +esac + +if [ "$PGHEIGHT" != "" ]; then + PGHEIGHTIN=$(units --compact -1 "$PGHEIGHT mm" 'in') + PGHEIGHT="--page-height $PGHEIGHT -y $PGHEIGHT" + PS2PDF_OPTS="-dEPSCrop" +fi + +if [ "$PGWIDTH" != "" ]; then + PGWIDTHIN=$(units --compact -1 "$PGWIDTH mm" 'in') + PGWIDTH="--page-width $PGWIDTH -x $PGWIDTH" + PS2PDF_OPTS="-dEPSCrop" +fi + +export VERBOSE +export DESKEW +export SEARCHABLE +export RESOLUTION +export PGWIDTHIN +export PGHEIGHTIN +export PS2PDF_OPTS + +if [ $VERBOSE = 1 ]; then + LOCKFILE=$(mktemp) + trap "rm -rf $LOCKFILE" 0 + export LOCKFILE +fi; + +echo >&2 "Scanning..." +#eval strace -f -o /tmp/scan-trace.txt scanadf -d $DEVICE $MAXPAGE $PGHEIGHT $PGWIDTH -S $SCRIPT --script-wait --resolution $RESOLUTION --mode $MODE $SOURCE -o scan-%04d +eval scanadf -d $DEVICE $MAXPAGE $PGHEIGHT $PGWIDTH -S $SCRIPT --script-wait --resolution $RESOLUTION --mode $MODE $SOURCE -o scan-%04d + +numscans=$(ls scan-[0-9]*.pdf | wc -w) +if [ $numscans -gt 0 ]; then + echo "" + echo "Processing $numscans pages" + if [ $TRUNCPAGE -gt 0 ]; then + source /usr/local/bin/stack.sh + for x in scan-[0-9]*; do push $x; done; + for x in $(seq $TRUNCPAGE); do rm $(pop); done; + echo "Truncated $TRUNCPAGE pages" + fi + if [ $numscans -gt 1 -o $APPEND -eq 1 ]; then + echo "Concatenating pdfs..." + if [ -f "$OUTPUT" ]; then + mv "$OUTPUT" "${OUTPUT}.orig" + fi + pdfunite $(ls ${OUTPUT}.orig 2>/dev/null) $(ls scan-[0-9]*.pdf) $OUTPUT + rm scan-[0-9]*.pdf + else + mv scan-0*.pdf $OUTPUT + fi + chown raman:raman $OUTPUT + echo "" + echo "Done." +else + echo "Found no scans." +fi diff --git a/scan_perpage b/scan_perpage new file mode 100755 index 0000000..b5842bc --- /dev/null +++ b/scan_perpage @@ -0,0 +1,97 @@ +#!/bin/bash +# Usage: scan_perpage +# where imagefile is the data just scanned +# (specify this script to scanadf via -S) + +usage() +{ + echo "Usage: $0 " + echo "Set the following environment variables:" + echo " DESKEW" + echo " SEARCHABLE" + echo " RESOLUTION" + echo " PGWIDTHIN" + echo " PGHEIGHTIN" + echo " PS2PDF_OPTS (optional)" + echo " VERBOSE (optional)" + echo " LOCKFILE (required if VERBOSE=1)" +} + +log() +{ + if [ $VERBOSE = 1 ]; then + echo "scan_perpage: $1" + fi +} + +logstdout() +{ + if [ $VERBOSE = 1 ]; then + cat + else + cat > /dev/null + fi +} + +if [ $# -lt 1 ]; then + usage + exit 1 +fi + +if [ "$DESKEW" == "" -o "$SEARCHABLE" == "" -o "$RESOLUTION" == "" -o "$RESOLUTION" == "" -o "$PGWIDTHIN" == "" -o "$PGHEIGHTIN" == "" ]; then + usage + exit 1 +fi + +IMAGE_FILE=$1 + +process_page() { + log "" + log "-------------------------------------------------------------------------------" + log "Post-processing scanned page ${IMAGE_FILE}, deskew=$DESKEW, searchable=$SEARCHABLE..." + + PP_PREFIX= + if [ $DESKEW -eq 1 ]; then + log "Applying unpaper post-processing to image data..." + PP_PREFIX="unpaper-" + #unpaper -v --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout + if [ $VERBOSE = 1 ]; then + UNPAPERVERBOSE="-v" + fi + unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout + fi + log "" + log "-------------------------------------------------------------------------------" + log "Converting image data to pdf..." + if [ $SEARCHABLE -eq 1 ]; then + log "Converting image data to searchable pdf..." + tesseract $PP_PREFIX$IMAGE_FILE ${IMAGE_FILE%.*} -l eng pdf | logstdout + else + log "Converting image data to pdf..." + PNMVERBOSE= + if [ $VERBOSE = 1 ]; then + PNMVERBOSE="-verbose" + fi + pnmtops $PNMVERBOSE -imagewidth $PGWIDTHIN -imageheight $PGHEIGHTIN $PP_PREFIX$IMAGE_FILE | ps2pdf $PS2PDF_OPTS - > ${IMAGE_FILE%.*}.pdf | logstdout + fi + + status=$? + rm $IMAGE_FILE + if [ -f $PP_PREFIX$IMAGE_FILE ]; then + rm $PP_PREFIX$IMAGE_FILE + fi + + log "" + log "Scan processing done, status = $status" +} + +if [ $VERBOSE = 1 ]; then + ( + flock 200 + process_page + ) 200>$LOCKFILE +else + process_page +fi; + +exit $status