mirror of
https://github.com/rocketraman/sane-scan-pdf.git
synced 2025-05-15 23:20:18 -07:00
485 lines
14 KiB
Bash
Executable File
485 lines
14 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
|
|
DEVICE=fujitsu
|
|
OUTPUT=scan.pdf
|
|
OUTPUTARR=()
|
|
USEOUTPUT=0
|
|
USEARRAY=0
|
|
APPEND=0
|
|
RESOLUTION=300
|
|
MODE=Lineart
|
|
MODE_CHANGED=0
|
|
MODE_HW_DEFAULT=0
|
|
SCRIPT="$DIR/scan_perpage"
|
|
DUPLEX=0
|
|
UNPAPER=0
|
|
SEARCHABLE=0
|
|
LANGUAGE=eng
|
|
MAXPAGE=
|
|
TRUNCPAGE=0
|
|
HELP=0
|
|
SIZE=
|
|
DEFAULTSIZE=1
|
|
PGHEIGHT=
|
|
PGHEIGHTIN=
|
|
PGWIDTH=
|
|
PGWIDTHIN=
|
|
PGCENTERING=1
|
|
CROP=0
|
|
DESKEW=0
|
|
DRIVER_OPTION=
|
|
VERBOSE=0
|
|
SKIP_EMPTY_PAGES=0
|
|
WHITE_THRESHOLD=99.8
|
|
BRIGHTNESS_CONTRAST=
|
|
SOURCE=""
|
|
SOURCE_DUPLEX="ADF Duplex"
|
|
OPENSCAN=0
|
|
OPTION_GROUP=""
|
|
|
|
TMP_DIR=$(mktemp -d -p "" scan.XXXXXXXXXX)
|
|
cleanup()
|
|
{
|
|
rm -rf "$TMP_DIR"
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
DEFAULTS="${XDG_DATA_HOME:-$HOME/.local/share}/sane-scan-pdf/defaults"
|
|
SCANPRE="${XDG_DATA_HOME:-$HOME/.local/share}/sane-scan-pdf/scan_pre"
|
|
|
|
[ -e "$DEFAULTS" ] && . "$DEFAULTS"
|
|
|
|
# copy the args
|
|
args=("$@")
|
|
|
|
# Parse command-line options that change how settings are read first
|
|
# This way, things like preset option groups can be set first, then overwritten by explicit args
|
|
while [[ $# > 0 ]]; do
|
|
case "$1" in
|
|
|
|
-h|--help) HELP=1 ;;
|
|
|
|
-og|--option-group) shift; OPTION_GROUP="${XDG_DATA_HOME:-$HOME/.local/share}/sane-scan-pdf/$1" ;;
|
|
|
|
esac
|
|
shift # next option
|
|
done
|
|
|
|
if [[ -f "$OPTION_GROUP" ]]; then
|
|
. "$OPTION_GROUP"
|
|
elif [[ "$OPTION_GROUP" != "" && $HELP == 0 ]]; then
|
|
echo >&2 "Warning: '--option-group' specified, but the corresponding file $OPTION_GROUP does not exist."
|
|
fi
|
|
|
|
# reset the args
|
|
set -- "${args[@]}"
|
|
|
|
# Parse other command-line options, these will now overwrite anything set via defaults or option groups
|
|
while [[ $# > 0 ]]; do
|
|
case "$1" in
|
|
|
|
-v|--verbose) VERBOSE=1 ;;
|
|
|
|
--no-verbose) VERBOSE=0 ;;
|
|
|
|
-d|--duplex) DUPLEX=1 ;;
|
|
|
|
-d|--no-duplex) DUPLEX=0 ;;
|
|
|
|
--source) shift; SOURCE="$1" ;;
|
|
|
|
--source-duplex) shift; SOURCE_DUPLEX="$1" ;;
|
|
|
|
-m|--mode) shift; MODE=$1; MODE_CHANGED=1 ;;
|
|
|
|
--mode-hw-default) MODE_HW_DEFAULT=1 ;;
|
|
|
|
--no-mode-hw-default) MODE_HW_DEFAULT=0 ;;
|
|
|
|
-r|--resolution) shift; RESOLUTION=$1 ;;
|
|
|
|
-a|--append) APPEND=1 ;;
|
|
|
|
--no-append) APPEND=0 ;;
|
|
|
|
-e|--max) shift; MAXPAGE=$1 ;;
|
|
|
|
-t|--truncate) shift; TRUNCPAGE=$1 ;;
|
|
|
|
# ignore, already handled
|
|
-h|--help) shift ;;
|
|
|
|
-s|--size) shift; SIZE=$1 ;;
|
|
|
|
-ph|--page-height) shift; PGHEIGHT=$1 ;;
|
|
|
|
-pw|--page-width) shift; PGWIDTH=$1 ;;
|
|
|
|
--no-page-centering) PGCENTERING=0 ;;
|
|
|
|
--no-default-size) DEFAULTSIZE=0 ;;
|
|
|
|
--crop) CROP=1 ;;
|
|
|
|
--no-crop) CROP=0 ;;
|
|
|
|
--deskew) DESKEW=1 ;;
|
|
|
|
--no-deskew) DESKEW=0 ;;
|
|
|
|
--unpaper) UNPAPER=1 ;;
|
|
|
|
--no-unpaper) UNPAPER=0 ;;
|
|
|
|
--searchable|--ocr) SEARCHABLE=1 ;;
|
|
|
|
--no-searchable|--no-ocr) SEARCHABLE=0 ;;
|
|
|
|
--language) shift; LANGUAGE=$1 ;;
|
|
|
|
--skip-empty-pages) SKIP_EMPTY_PAGES=1 ;;
|
|
|
|
--no-skip-empty-pages) SKIP_EMPTY_PAGES=0 ;;
|
|
|
|
--white-threshold) shift; WHITE_THRESHOLD=$1 ;;
|
|
|
|
-o|--output) shift; USEOUTPUT=1; OUTPUT="$1" ;;
|
|
|
|
-l|--outputlist) shift; USEARRAY=1; OUTPUTARR=(); OUTPUTARR+=("$1") ;;
|
|
|
|
-x|--device) shift; DEVICE="$1" ;;
|
|
|
|
-xo|--driver-options) shift; DRIVER_OPTION=$1 ;;
|
|
|
|
--brightness-contrast-sw) shift; BRIGHTNESS_CONTRAST=$1 ;;
|
|
|
|
--open) OPENSCAN=1 ;;
|
|
|
|
--no-open) OPENSCAN=0 ;;
|
|
|
|
# ignore, already handled
|
|
-og|--option-group) shift ;;
|
|
|
|
*) if [[ $USEARRAY == 1 ]]; then OUTPUTARR+=("$1"); else echo >&2 "Unknown argument: $1"; exit 1; fi ;;
|
|
|
|
esac
|
|
shift # next option
|
|
done
|
|
|
|
if [[ $HELP == 1 ]]; then
|
|
echo "$(basename $0) [OPTIONS]... [OUTPUT]"
|
|
echo ""
|
|
echo "OPTIONS"
|
|
echo " -v, --verbose"
|
|
echo " Verbose output (this will slow down the scan due to the need to prevent interleaved output)"
|
|
echo " -d, --duplex"
|
|
echo " Duplex scanning"
|
|
echo " --source"
|
|
echo " Explicitly specify a source for the scan e.g. ADF, Flatbed, etc. If not specified, the driver default is used."
|
|
echo " --source-duplex"
|
|
echo " Explicitly specify a source for duplex scans e.g. ADF Duplex, Flatbed, etc. Defaults to \"ADF Duplex\""
|
|
echo " -m, --mode"
|
|
echo " Mode e.g. Lineart (default), Halftone, Gray, Color, etc. Use --mode-hw-default to not set any mode"
|
|
echo " --mode-hw-default"
|
|
echo " Do not set the mode explicitly, use the hardware default — ignored if --mode is set"
|
|
echo " -r, --resolution"
|
|
echo " Resolution e.g 300 (default)"
|
|
echo " -a, --append"
|
|
echo " Append output to existing scan"
|
|
echo " -e, --max <pages>"
|
|
echo " Max number of pages e.g. 2 (default is all pages)"
|
|
echo " -t, --truncate <pages>"
|
|
echo " Truncate number of pages from end e.g. 1 (default is none) -- truncation happens after --skip-empty-pages"
|
|
echo " -s, --size"
|
|
echo " Page Size as type e.g. Letter (default), Legal, A4, no effect if --crop is specified"
|
|
echo " -ph, --page-height"
|
|
echo " Custom Page Height in mm"
|
|
echo " -pw, --page-width"
|
|
echo " Custom Page Width in mm"
|
|
echo " --no-page-centering"
|
|
echo " By default, we use --page-height and --page-width parameters for centering -- if specified skip these"
|
|
echo " -x, --device"
|
|
echo " Override scanner device name, defaulting to \"$DEVICE\", pass an empty value for no device arg"
|
|
echo " -xo, --driver-options"
|
|
echo " Send additional options to the scanner driver e.g."
|
|
echo " -xo \"--whatever bar --frobnitz baz\""
|
|
echo " --no-default-size"
|
|
echo " Disable default page size, useful if driver does not support page size/location arguments"
|
|
echo " --crop"
|
|
echo " Crop to contents (driver must support this)"
|
|
echo " --deskew"
|
|
echo " Run driver deskew (driver must support this)"
|
|
echo " --unpaper"
|
|
echo " Run post-processing deskew and black edge detection (requires unpaper)"
|
|
echo " --ocr"
|
|
echo " Run OCR to make the PDF searchable (requires tesseract)"
|
|
echo " --language <lang>"
|
|
echo " which language to use for OCR"
|
|
echo " --skip-empty-pages"
|
|
echo " remove empty pages from resulting PDF document (e.g. one sided doc in duplex mode)"
|
|
echo " --white-threshold"
|
|
echo " threshold to identify an empty page is a percentage value between 0 and 100. The default is 99.8"
|
|
echo " --brightness-contrast-sw"
|
|
echo " Alter brightness and contrast via post-processing - prefer specifying brightness and/or"
|
|
echo " contrast via --driver-options if supported by your hardware."
|
|
echo " --open"
|
|
echo " After scanning, open the scan via xdg-open"
|
|
echo " -og, --option-group"
|
|
echo " A named option group. Useful for saving collections of options under a name e.g. 'receipt' for easy reuse."
|
|
echo " Use this option in combination with '--help' to show the location and content of the file and edit it manually."
|
|
echo ""
|
|
echo "OUTPUT"
|
|
echo " -o, --output <outputfile>"
|
|
echo " Output to named file default=scan.pdf"
|
|
echo " -l, --outputlist <outputfile-1...outputfile-n> Output to named files for each scanned page, can be used with append"
|
|
echo ""
|
|
echo "CONFIGURATION"
|
|
echo "Local configuration can be written in the form of environment variables to:"
|
|
echo " $DEFAULTS"
|
|
if [[ -e "$DEFAULTS" ]]; then
|
|
echo ""
|
|
echo "This file exists and currently contains:"
|
|
echo "-----"
|
|
cat "$DEFAULTS"
|
|
echo "-----"
|
|
fi
|
|
echo ""
|
|
echo "A named option group can be written based on the '--option-group' argument. Current:"
|
|
echo " $OPTION_GROUP"
|
|
if [[ -e "$OPTION_GROUP" ]]; then
|
|
echo ""
|
|
echo "This file exists and currently contains:"
|
|
echo "-----"
|
|
cat "$OPTION_GROUP"
|
|
echo "-----"
|
|
fi
|
|
echo ""
|
|
echo "A pre-scan hook script can be written to:"
|
|
echo " $SCANPRE"
|
|
if [[ -e "$SCANPRE" ]]; then
|
|
echo ""
|
|
echo "This file exists and currently contains:"
|
|
echo "-----"
|
|
cat "$SCANPRE"
|
|
echo "-----"
|
|
fi
|
|
echo ""
|
|
exit 0
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 && $USEOUTPUT == 1 ]]; then
|
|
echo >&2 "Use one of -o or -l. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $MODE_CHANGED == 1 ]]; then
|
|
MODE_HW_DEFAULT=0
|
|
fi
|
|
|
|
if [[ $MODE_CHANGED == 0 && $MODE_HW_DEFAULT == 0 ]]; then
|
|
echo >&2 'Warning: neither the -m/--mode nor --mode-hw-default argument was specified. The current default is "Lineart", however -m/--mode is a SANE device-specific switch. Therefore, in a future version when --mode is not specified, `'"`basename "$0"`""'"' will defer to the device-specific default. If you wish to continue using "Lineart" as your scan mode, either verify that is the default for your scanner (scanadf --help -d <device>), or specify --mode Lineart explicitly on the command line.'
|
|
fi
|
|
|
|
if [[ $USEOUTPUT == 1 && "$OUTPUT" == "" ]]; then
|
|
echo >&2 "Output file must be specified. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $USEOUTPUT == 1 && -f "$OUTPUT" && $APPEND != 1 ]]; then
|
|
echo >&2 "Output file $OUTPUT already exists. Delete or specify -a. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 && ${#OUTPUTARR[@]} == 0 ]]; then
|
|
echo >&2 "At least one file must be specified with -l. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 && $APPEND != 1 ]]; then
|
|
for o in "${OUTPUTARR[@]}"; do
|
|
if [[ -f "$o" ]]; then
|
|
echo >&2 "Output file $o already exists. Delete or specify -a. Aborting."
|
|
exit 1
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [[ $USEARRAY == 1 ]]; then
|
|
OUTPUT=("${OUTPUTARR[@]}")
|
|
fi
|
|
|
|
# source defaults to the empty string "" which is the default source for the driver
|
|
# but duplex needs to be explicit
|
|
if [[ $DUPLEX == 1 ]]; then
|
|
SOURCE="--source \"$SOURCE_DUPLEX\""
|
|
elif [[ "$SOURCE" != "" ]]; then
|
|
SOURCE="--source \"$SOURCE\""
|
|
fi
|
|
|
|
if [[ "$MAXPAGE" != "" ]]; then
|
|
MAXPAGE="-e $MAXPAGE"
|
|
fi
|
|
|
|
PS2PDF_OPTS=
|
|
|
|
if [[ $CROP != 1 && $SIZE == "" && $DEFAULTSIZE == 1 ]]; then
|
|
# Default to Letter size, but only if crop is not specified and this feature is not disabled
|
|
SIZE=Letter
|
|
fi
|
|
|
|
case "$SIZE" in
|
|
|
|
Letter) PGHEIGHT=279.4; PGWIDTH=215.9 ;;
|
|
|
|
Legal) PGHEIGHT=355.6; PGWIDTH=215.9 ;;
|
|
|
|
A4) PGHEIGHT=297; PGWIDTH=210 ;;
|
|
|
|
esac
|
|
|
|
if [[ $DESKEW == 1 ]]; then
|
|
if [[ $CROP != 1 ]]; then
|
|
echo >&2 'Warning: `--deskew` specified, but `--crop` not specified. It is recommended to scan at max height/width and then crop to give deskew the best chance of working correctly.'
|
|
fi
|
|
DESKEW="--swdeskew=yes"
|
|
fi
|
|
|
|
if [[ $CROP != 1 && "$PGHEIGHT" != "" ]]; then
|
|
PGHEIGHTIN=$(units --compact -1 "$PGHEIGHT mm" 'in')
|
|
if [[ $PGCENTERING == 1 ]]; then
|
|
PGHEIGHT="--page-height $PGHEIGHT -y $PGHEIGHT"
|
|
else
|
|
PGHEIGHT="-y $PGHEIGHT"
|
|
fi
|
|
PS2PDF_OPTS="-dEPSCrop"
|
|
fi
|
|
|
|
if [[ $CROP != 1 && "$PGWIDTH" != "" ]]; then
|
|
PGWIDTHIN=$(units --compact -1 "$PGWIDTH mm" 'in')
|
|
if [[ $PGCENTERING == 1 ]]; then
|
|
PGWIDTH="--page-width $PGWIDTH -x $PGWIDTH"
|
|
else
|
|
PGWIDTH="-x $PGWIDTH"
|
|
fi
|
|
PS2PDF_OPTS="-dEPSCrop"
|
|
fi
|
|
|
|
if [[ $CROP == 1 ]]; then
|
|
CROP="--swcrop=yes --ald=yes"
|
|
# In duplex mode, the driver's buffer for the back side image will be larger than necessary, oh well
|
|
# http://sane.10972.n7.nabble.com/Fujitsu-backend-and-iX500-scanning-page-longer-than-14-Inches-td19303.html
|
|
PGHEIGHT="-y 9999"
|
|
PGWIDTH="-x 9999"
|
|
PS2PDF_OPTS="-dEPSCrop"
|
|
fi
|
|
|
|
if [[ $SKIP_EMPTY_PAGES == 1 && ! -x "$(command -v bc)" ]]; then
|
|
echo >&2 'Warning: `--skip-empty-pages` specified, but `bc` not available. Disabling empty page detection.'
|
|
SKIP_EMPTY_PAGES=0
|
|
fi
|
|
|
|
if [[ "$DEVICE" != "" ]]; then
|
|
DEVICE="-d \"$DEVICE\""
|
|
fi
|
|
|
|
export VERBOSE
|
|
export UNPAPER
|
|
export SEARCHABLE
|
|
export LANGUAGE
|
|
export RESOLUTION
|
|
export PGWIDTHIN
|
|
export PGHEIGHTIN
|
|
export PS2PDF_OPTS
|
|
export SKIP_EMPTY_PAGES
|
|
export WHITE_THRESHOLD
|
|
export BRIGHTNESS_CONTRAST
|
|
|
|
if [[ $VERBOSE == 1 ]]; then
|
|
LOCKFILE=$(mktemp)
|
|
trap "cleanup; rm -rf $LOCKFILE" EXIT
|
|
export LOCKFILE
|
|
fi;
|
|
|
|
echo >&2 "Scanning..."
|
|
#eval strace -f -o /tmp/scan-trace.txt scanadf $DEVICE $MAXPAGE $PGHEIGHT $PGWIDTH -S $SCRIPT --script-wait --resolution $RESOLUTION --mode $MODE $DESKEW $CROP $SOURCE -o scan-%04d
|
|
if [[ $MODE_HW_DEFAULT == 1 ]]; then
|
|
MODE=
|
|
else
|
|
MODE="--mode '$MODE'"
|
|
fi
|
|
[ -e "$SCANPRE" ] && . "$SCANPRE"
|
|
eval scanadf $DEVICE $MAXPAGE $PGHEIGHT $PGWIDTH -S $SCRIPT --script-wait $DESKEW $CROP $DRIVER_OPTION $SOURCE --resolution $RESOLUTION $MODE -o $TMP_DIR/scan-%04d
|
|
# Simulate empty page scanner outputs for debugging
|
|
#convert xc:none -page Letter $TMP_DIR/scan-0001.pdf
|
|
|
|
shopt -s extglob nullglob
|
|
pdffiles=($TMP_DIR/scan-[0-9]*.pdf)
|
|
numscans=${#pdffiles[@]}
|
|
if (( numscans > 0 )); then
|
|
echo "Processing $numscans pages"
|
|
if (( TRUNCPAGE > 0 )); then
|
|
truncpage=$TRUNCPAGE
|
|
if (( numscans < TRUNCPAGE )); then
|
|
truncpage=$numscans
|
|
fi
|
|
for x in ${pdffiles[@]:$numscans-$truncpage:$truncpage}; do rm "$x"; done;
|
|
pdffiles=(${pdffiles[@]:0:$numscans-$truncpage})
|
|
echo "Truncated $truncpage pages"
|
|
let "numscans = numscans - truncpage"
|
|
fi
|
|
if (( numscans <= 0 )); then
|
|
echo "Found no scans."
|
|
exit 0
|
|
fi
|
|
if (( numscans > 1 && USEARRAY == 1 )); then
|
|
output_count=${#OUTPUT[@]}
|
|
echo "Naming $numscans pdfs based on output list of $output_count names..."
|
|
index=0
|
|
while (( index < output_count && numscans > index )); do
|
|
let "scanno = index + 1"
|
|
if [[ -f "${OUTPUT[$index]}" ]]; then
|
|
mv "${OUTPUT[$index]}" "${OUTPUT[$index]}.orig"
|
|
if [[ $APPEND == 1 ]]; then
|
|
pdffiles=()
|
|
if [[ -f "${OUTPUT[$index]}.orig" ]]; then
|
|
pdffiles+=("${OUTPUT[$index]}.orig")
|
|
fi
|
|
pdffiles+=($TMP_DIR/scan-*(0)$scanno.pdf)
|
|
pdfunite "${pdffiles[@]}" "${OUTPUT[$index]}" && rm $TMP_DIR/scan-*(0)$scanno.pdf
|
|
else
|
|
mv $TMP_DIR/scan-*(0)$scanno.pdf "${OUTPUT[$index]}"
|
|
fi
|
|
else
|
|
mv $TMP_DIR/scan-*(0)$scanno.pdf "${OUTPUT[$index]}"
|
|
fi
|
|
let "index = index + 1"
|
|
done
|
|
elif (( numscans > 1 || APPEND == 1 )); then
|
|
echo "Concatenating pdfs..."
|
|
if [[ -f "$OUTPUT" ]]; then
|
|
mv "$OUTPUT" "${OUTPUT}.orig"
|
|
fi
|
|
pdffiles=()
|
|
if [[ -f "${OUTPUT}.orig" ]]; then
|
|
pdffiles+=("${OUTPUT}.orig")
|
|
fi
|
|
pdffiles+=($TMP_DIR/scan-[0-9]*.pdf)
|
|
pdfunite "${pdffiles[@]}" "$OUTPUT" && rm $TMP_DIR/scan-[0-9]*.pdf
|
|
else
|
|
if [[ $USEARRAY == 1 ]]; then
|
|
mv $TMP_DIR/scan-0*.pdf "${OUTPUT[0]}"
|
|
else
|
|
mv $TMP_DIR/scan-0*.pdf "$OUTPUT"
|
|
fi
|
|
fi
|
|
echo ""
|
|
echo "Done."
|
|
if [[ $OPENSCAN == 1 && -x "$(command -v xdg-open)" ]]; then
|
|
for x in "${OUTPUT[@]}"; do xdg-open "$x"; done;
|
|
fi
|
|
else
|
|
echo "Found no scans."
|
|
fi
|