mirror of
https://github.com/rocketraman/sane-scan-pdf.git
synced 2025-05-18 08:30:16 -07:00
Best practices from the sane mailing list indicate that deskew should be used in combination with crop for best results. Warn if not doing so.
396 lines
11 KiB
Bash
Executable File
396 lines
11 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
|
|
DEVICE=fujitsu
|
|
OUTPUT=scan.pdf
|
|
OUTPUTARR=()
|
|
USEOUTPUT=0
|
|
USEARRAY=0
|
|
APPEND=0
|
|
RESOLUTION=300
|
|
MODE=Lineart
|
|
MODE_CHANGED=0
|
|
MODE_HW_DEFAULT=0
|
|
SCRIPT="$DIR/scan_perpage"
|
|
DUPLEX=0
|
|
UNPAPER=0
|
|
SEARCHABLE=0
|
|
LANGUAGE=eng
|
|
MAXPAGE=
|
|
TRUNCPAGE=0
|
|
HELP=0
|
|
SIZE=
|
|
DEFAULTSIZE=1
|
|
PGHEIGHT=
|
|
PGHEIGHTIN=
|
|
PGWIDTH=
|
|
PGWIDTHIN=
|
|
CROP=0
|
|
DESKEW=0
|
|
DRIVER_OPTION=
|
|
VERBOSE=0
|
|
SKIP_EMPTY_PAGES=0
|
|
WHITE_THRESHOLD=99.8
|
|
BRIGHTNESS_CONTRAST=
|
|
SOURCE=""
|
|
OPENSCAN=0
|
|
|
|
TMP_DIR=$(mktemp -d -p "" scan.XXXXXXXXXX)
|
|
cleanup()
|
|
{
|
|
rm -rf "$TMP_DIR"
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
DEFAULTS="${XDG_DATA_HOME:-$HOME/.local/share}/sane-scan-pdf/defaults"
|
|
SCANPRE="${XDG_DATA_HOME:-$HOME/.local/share}/sane-scan-pdf/scan_pre"
|
|
|
|
[ -e "$DEFAULTS" ] && . "$DEFAULTS"
|
|
|
|
# Parse command-line options
|
|
while [[ $# > 0 ]]; do
|
|
case "$1" in
|
|
|
|
-v|--verbose) VERBOSE=1 ;;
|
|
|
|
-d|--duplex) DUPLEX=1 ;;
|
|
|
|
-m|--mode) shift; MODE=$1; MODE_CHANGED=1 ;;
|
|
|
|
--mode-hw-default) MODE_HW_DEFAULT=1 ;;
|
|
|
|
-r|--resolution) shift; RESOLUTION=$1 ;;
|
|
|
|
-a|--append) APPEND=1 ;;
|
|
|
|
-e|--max) shift; MAXPAGE=$1 ;;
|
|
|
|
-t|--truncate) shift; TRUNCPAGE=$1 ;;
|
|
|
|
-h|--help) HELP=1 ;;
|
|
|
|
-s|--size) shift; SIZE=$1 ;;
|
|
|
|
-ph|--page-height) shift; PGHEIGHT=$1 ;;
|
|
|
|
-pw|--page-width) shift; PGWIDTH=$1 ;;
|
|
|
|
--no-default-size) DEFAULTSIZE=0 ;;
|
|
|
|
--crop) CROP=1 ;;
|
|
|
|
--deskew) DESKEW=1 ;;
|
|
|
|
--unpaper) UNPAPER=1 ;;
|
|
|
|
--searchable|--ocr) SEARCHABLE=1 ;;
|
|
|
|
--language) shift; LANGUAGE=$1 ;;
|
|
|
|
--skip-empty-pages) SKIP_EMPTY_PAGES=1 ;;
|
|
|
|
--white-threshold) shift; WHITE_THRESHOLD=$1 ;;
|
|
|
|
-o|--output) shift; USEOUTPUT=1; OUTPUT="$1" ;;
|
|
|
|
-l|--outputlist) shift; USEARRAY=1; OUTPUTARR=(); OUTPUTARR+=("$1") ;;
|
|
|
|
-x|--device) shift; DEVICE="$1";;
|
|
|
|
-xo|--driver-options) shift; DRIVER_OPTION=$1;;
|
|
|
|
--brightness-contrast-sw) shift; BRIGHTNESS_CONTRAST=$1 ;;
|
|
|
|
--open) OPENSCAN=1 ;;
|
|
|
|
*) if [[ $USEARRAY == 1 ]]; then OUTPUTARR+=("$1"); else echo >&2 "Unknown argument: $1"; exit 1; fi ;;
|
|
|
|
esac
|
|
shift # next option
|
|
done
|
|
|
|
if [[ $HELP == 1 ]]; then
|
|
echo "$(basename $0) [OPTIONS]... [OUTPUT]"
|
|
echo ""
|
|
echo "OPTIONS"
|
|
echo " -v, --verbose"
|
|
echo " Verbose output (this will slow down the scan due to the need to prevent interleaved output)"
|
|
echo " -d, --duplex"
|
|
echo " Duplex scanning"
|
|
echo " -m, --mode"
|
|
echo " Mode e.g. Lineart (default), Halftone, Gray, Color, etc. Use --mode-hw-default to not set any mode"
|
|
echo " --mode-hw-default"
|
|
echo " Do not set the mode explicitly, use the hardware default — ignored if --mode is set"
|
|
echo " -r, --resolution"
|
|
echo " Resolution e.g 300 (default)"
|
|
echo " -a, --append"
|
|
echo " Append output to existing scan"
|
|
echo " -e, --max <pages>"
|
|
echo " Max number of pages e.g. 2 (default is all pages)"
|
|
echo " -t, --truncate <pages>"
|
|
echo " Truncate number of pages from end e.g. 1 (default is none) -- truncation happens after --skip-empty-pages"
|
|
echo " -s, --size"
|
|
echo " Page Size as type e.g. Letter (default), Legal, A4, no effect if --crop is specified"
|
|
echo " -ph, --page-height"
|
|
echo " Custom Page Height in mm"
|
|
echo " -pw, --page-width"
|
|
echo " Custom Page Width in mm"
|
|
echo " -x, --device"
|
|
echo " Override scanner device name, defaulting to \"$DEVICE\", pass an empty value for no device arg"
|
|
echo " -xo, --driver-options"
|
|
echo " Send additional options to the scanner driver e.g."
|
|
echo " -xo \"--whatever bar --frobnitz baz\""
|
|
echo " --no-default-size"
|
|
echo " Disable default page size, useful if driver does not support page size/location arguments"
|
|
echo " --crop"
|
|
echo " Crop to contents (driver must support this)"
|
|
echo " --deskew"
|
|
echo " Run driver deskew (driver must support this)"
|
|
echo " --unpaper"
|
|
echo " Run post-processing deskew and black edge detection (requires unpaper)"
|
|
echo " --ocr"
|
|
echo " Run OCR to make the PDF searchable (requires tesseract)"
|
|
echo " --language <lang>"
|
|
echo " which language to use for OCR"
|
|
echo " --skip-empty-pages"
|
|
echo " remove empty pages from resulting PDF document (e.g. one sided doc in duplex mode)"
|
|
echo " --white-threshold"
|
|
echo " threshold to identify an empty page is a percentage value between 0 and 100. The default is 99.8"
|
|
echo " --brightness-contrast-sw"
|
|
echo " Alter brightness and contrast via post-processing - prefer specifying brightness and/or"
|
|
echo " contrast via --driver-options if supported by your hardware."
|
|
echo " --open"
|
|
echo " After scanning, open the scan via xdg-open"
|
|
echo ""
|
|
echo "OUTPUT"
|
|
echo " -o, --output <outputfile>"
|
|
echo " Output to named file default=scan.pdf"
|
|
echo " -l, --outputlist <outputfile-1...outputfile-n> Output to named files for each scanned page, can be used with append"
|
|
echo ""
|
|
echo "CONFIGURATION"
|
|
echo "Local configuration can be written in the form of environment variables to:"
|
|
echo " $DEFAULTS"
|
|
if [[ -e "$DEFAULTS" ]]; then
|
|
echo ""
|
|
echo "This file exists and currently contains:"
|
|
echo "-----"
|
|
cat "$DEFAULTS"
|
|
echo "-----"
|
|
fi
|
|
echo ""
|
|
echo "A pre-scan hook script can be written to:"
|
|
echo " $SCANPRE"
|
|
if [[ -e "$SCANPRE" ]]; then
|
|
echo ""
|
|
echo "This file exists and currently contains:"
|
|
echo "-----"
|
|
cat "$SCANPRE"
|
|
echo "-----"
|
|
fi
|
|
echo ""
|
|
exit 0
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 && $USEOUTPUT == 1 ]]; then
|
|
echo >&2 "Use one of -o or -l. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $MODE_CHANGED == 1 ]]; then
|
|
MODE_HW_DEFAULT=0
|
|
fi
|
|
|
|
if [[ $MODE_CHANGED == 0 && $MODE_HW_DEFAULT == 0 ]]; then
|
|
echo >&2 'Warning: neither the -m/--mode nor --mode-hw-default argument was specified. The current default is "Lineart", however -m/--mode is a SANE device-specific switch. Therefore, in a future version when --mode is not specified, `'"`basename "$0"`""'"' will defer to the device-specific default. If you wish to continue using "Lineart" as your scan mode, either verify that is the default for your scanner (scanadf --help -d <device>), or specify --mode Lineart explicitly on the command line.'
|
|
fi
|
|
|
|
if [[ $USEOUTPUT == 1 && "$OUTPUT" == "" ]]; then
|
|
echo >&2 "Output file must be specified. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $USEOUTPUT == 1 && -f "$OUTPUT" && $APPEND != 1 ]]; then
|
|
echo >&2 "Output file $OUTPUT already exists. Delete or specify -a. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 && ${#OUTPUTARR[@]} == 0 ]]; then
|
|
echo >&2 "At least one file must be specified with -l. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 && $APPEND != 1 ]]; then
|
|
for o in "${OUTPUTARR[@]}"; do
|
|
if [[ -f "$o" ]]; then
|
|
echo >&2 "Output file $o already exists. Delete or specify -a. Aborting."
|
|
exit 1
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 ]]; then
|
|
OUTPUT=("${OUTPUTARR[@]}")
|
|
fi
|
|
|
|
if [[ $DUPLEX == 1 ]]; then
|
|
SOURCE="--source \"ADF Duplex\""
|
|
fi
|
|
|
|
if [[ "$MAXPAGE" != "" ]]; then
|
|
MAXPAGE="-e $MAXPAGE"
|
|
fi
|
|
|
|
PS2PDF_OPTS=
|
|
|
|
if [[ $CROP != 1 && $SIZE == "" && $DEFAULTSIZE == 1 ]]; then
|
|
# Default to Letter size, but only if crop is not specified and this feature is not disabled
|
|
SIZE=Letter
|
|
fi
|
|
|
|
case "$SIZE" in
|
|
|
|
Letter) PGHEIGHT=279.4; PGWIDTH=215.9 ;;
|
|
|
|
Legal) PGHEIGHT=355.6; PGWIDTH=215.9 ;;
|
|
|
|
A4) PGHEIGHT=297; PGWIDTH=210 ;;
|
|
|
|
esac
|
|
|
|
if [[ $DESKEW == 1 ]]; then
|
|
if [[ $CROP != 1 ]]; then
|
|
echo >&2 'Warning: `--deskew` specified, but `--crop` not specified. It is recommended to scan at max height/width and then crop to give deskew the best chance of working correctly.'
|
|
fi
|
|
DESKEW="--swdeskew=yes"
|
|
fi
|
|
|
|
if [[ $CROP != 1 && "$PGHEIGHT" != "" ]]; then
|
|
PGHEIGHTIN=$(units --compact -1 "$PGHEIGHT mm" 'in')
|
|
PGHEIGHT="--page-height $PGHEIGHT -y $PGHEIGHT"
|
|
PS2PDF_OPTS="-dEPSCrop"
|
|
fi
|
|
|
|
if [[ $CROP != 1 && "$PGWIDTH" != "" ]]; then
|
|
PGWIDTHIN=$(units --compact -1 "$PGWIDTH mm" 'in')
|
|
PGWIDTH="--page-width $PGWIDTH -x $PGWIDTH"
|
|
PS2PDF_OPTS="-dEPSCrop"
|
|
fi
|
|
|
|
if [[ $CROP == 1 ]]; then
|
|
CROP="--swcrop=yes --ald=yes"
|
|
# In duplex mode, the driver's buffer for the back side image will be larger than necessary, oh well
|
|
# http://sane.10972.n7.nabble.com/Fujitsu-backend-and-iX500-scanning-page-longer-than-14-Inches-td19303.html
|
|
PGHEIGHT="--page-height 9999 -y 9999"
|
|
PGWIDTH="--page-width 9999 -x 9999"
|
|
PS2PDF_OPTS="-dEPSCrop"
|
|
fi
|
|
|
|
if [[ $SKIP_EMPTY_PAGES == 1 && ! -x "$(command -v bc)" ]]; then
|
|
echo >&2 'Warning: `--skip-empty-pages` specified, but `bc` not available. Disabling empty page detection.'
|
|
SKIP_EMPTY_PAGES=0
|
|
fi
|
|
|
|
if [[ "$DEVICE" != "" ]]; then
|
|
DEVICE="-d \"$DEVICE\""
|
|
fi
|
|
|
|
export VERBOSE
|
|
export UNPAPER
|
|
export SEARCHABLE
|
|
export LANGUAGE
|
|
export RESOLUTION
|
|
export PGWIDTHIN
|
|
export PGHEIGHTIN
|
|
export PS2PDF_OPTS
|
|
export SKIP_EMPTY_PAGES
|
|
export WHITE_THRESHOLD
|
|
export BRIGHTNESS_CONTRAST
|
|
|
|
if [[ $VERBOSE == 1 ]]; then
|
|
LOCKFILE=$(mktemp)
|
|
trap "cleanup; rm -rf $LOCKFILE" EXIT
|
|
export LOCKFILE
|
|
fi;
|
|
|
|
echo >&2 "Scanning..."
|
|
#eval strace -f -o /tmp/scan-trace.txt scanadf $DEVICE $MAXPAGE $PGHEIGHT $PGWIDTH -S $SCRIPT --script-wait --resolution $RESOLUTION --mode $MODE $DESKEW $CROP $SOURCE -o scan-%04d
|
|
if [[ $MODE_HW_DEFAULT == 1 ]]; then
|
|
MODE=
|
|
else
|
|
MODE="--mode '$MODE'"
|
|
fi
|
|
[ -e "$SCANPRE" ] && . "$SCANPRE"
|
|
eval scanadf $DEVICE $MAXPAGE $PGHEIGHT $PGWIDTH -S $SCRIPT --script-wait $DESKEW $CROP $DRIVER_OPTION $SOURCE --resolution $RESOLUTION $MODE -o $TMP_DIR/scan-%04d
|
|
# Simulate empty page scanner outputs for debugging
|
|
#convert xc:none -page Letter $TMP_DIR/scan-0001.pdf
|
|
|
|
shopt -s extglob nullglob
|
|
pdffiles=($TMP_DIR/scan-[0-9]*.pdf)
|
|
numscans=${#pdffiles[@]}
|
|
if (( numscans > 0 )); then
|
|
echo "Processing $numscans pages"
|
|
if (( TRUNCPAGE > 0 )); then
|
|
truncpage=$TRUNCPAGE
|
|
if (( numscans < TRUNCPAGE )); then
|
|
truncpage=$numscans
|
|
fi
|
|
for x in ${pdffiles[@]:$numscans-$truncpage:$truncpage}; do rm "$x"; done;
|
|
pdffiles=(${pdffiles[@]:0:$numscans-$truncpage})
|
|
echo "Truncated $truncpage pages"
|
|
let "numscans = numscans - truncpage"
|
|
fi
|
|
if (( numscans <= 0 )); then
|
|
echo "Found no scans."
|
|
exit 0
|
|
fi
|
|
if (( numscans > 1 && USEARRAY == 1 )); then
|
|
output_count=${#OUTPUT[@]}
|
|
echo "Naming $numscans pdfs based on output list of $output_count names..."
|
|
index=0
|
|
while (( index < output_count && numscans > index )); do
|
|
let "scanno = index + 1"
|
|
if [[ -f "${OUTPUT[$index]}" ]]; then
|
|
mv "${OUTPUT[$index]}" "${OUTPUT[$index]}.orig"
|
|
if [[ $APPEND == 1 ]]; then
|
|
pdffiles=()
|
|
if [[ -f "${OUTPUT[$index]}.orig" ]]; then
|
|
pdffiles+=("${OUTPUT[$index]}.orig")
|
|
fi
|
|
pdffiles+=($TMP_DIR/scan-*(0)$scanno.pdf)
|
|
pdfunite "${pdffiles[@]}" "${OUTPUT[$index]}" && rm $TMP_DIR/scan-*(0)$scanno.pdf
|
|
else
|
|
mv $TMP_DIR/scan-*(0)$scanno.pdf "${OUTPUT[$index]}"
|
|
fi
|
|
else
|
|
mv $TMP_DIR/scan-*(0)$scanno.pdf "${OUTPUT[$index]}"
|
|
fi
|
|
let "index = index + 1"
|
|
done
|
|
elif (( numscans > 1 || APPEND == 1 )); then
|
|
echo "Concatenating pdfs..."
|
|
if [[ -f "$OUTPUT" ]]; then
|
|
mv "$OUTPUT" "${OUTPUT}.orig"
|
|
fi
|
|
pdffiles=()
|
|
if [[ -f "${OUTPUT}.orig" ]]; then
|
|
pdffiles+=("${OUTPUT}.orig")
|
|
fi
|
|
pdffiles+=($TMP_DIR/scan-[0-9]*.pdf)
|
|
pdfunite "${pdffiles[@]}" "$OUTPUT" && rm $TMP_DIR/scan-[0-9]*.pdf
|
|
else
|
|
if [[ $USEARRAY == 1 ]]; then
|
|
mv $TMP_DIR/scan-0*.pdf "${OUTPUT[0]}"
|
|
else
|
|
mv $TMP_DIR/scan-0*.pdf "$OUTPUT"
|
|
fi
|
|
fi
|
|
echo ""
|
|
echo "Done."
|
|
if [[ $OPENSCAN == 1 && -x "$(command -v xdg-open)" ]]; then
|
|
for x in "${OUTPUT[@]}"; do xdg-open "$x"; done;
|
|
fi
|
|
else
|
|
echo "Found no scans."
|
|
fi
|