performance: pnmtops to temp file

For some reason when piping the output of pnmtops directly to ps2pdf,
conversion is very slow (especially for color scans).

When writing to an intermediate temporary file, conversion is fast.

Before:

________________________________________________________
Executed in   19.20 secs   fish           external
  usr time    4.01 secs    0.32 millis    4.01 secs
  sys time   20.63 secs    5.30 millis   20.62 secs

After:

________________________________________________________
Executed in  368.06 millis    fish           external
   usr time  378.06 millis    0.00 millis  378.06 millis
   sys time  100.00 millis    2.79 millis   97.21 millis

An improvement of over 50 times!

This should resolve #19.
This commit is contained in:
Raman Gupta 2021-03-10 22:07:03 -05:00
parent dad716786e
commit d33c8caa71

View File

@ -59,6 +59,11 @@ IMAGE_PATH=$1
IMAGE_DIR=$(dirname $1) IMAGE_DIR=$(dirname $1)
IMAGE_FILE=$(basename $1) IMAGE_FILE=$(basename $1)
TIMEVERBOSE=
if [[ $VERBOSE == 1 ]]; then
TIMEVERBOSE=time
fi
process_page() { process_page() {
log "" log ""
log "-------------------------------------------------------------------------------" log "-------------------------------------------------------------------------------"
@ -80,14 +85,16 @@ process_page() {
if [[ $VERBOSE == 1 ]]; then if [[ $VERBOSE == 1 ]]; then
UNPAPERVERBOSE="-v" UNPAPERVERBOSE="-v"
fi fi
#runconstrained unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout #runconstrained $TIMEVERBOSE unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout
runconstrained unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout runconstrained $TIMEVERBOSE unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout
fi fi
if [[ $SEARCHABLE == 1 ]]; then if [[ $SEARCHABLE == 1 ]]; then
log "Converting image data to searchable pdf..." log "Converting image data to searchable pdf..."
# tesseract uses the input's DPI header, we need to convert to a format that supports this (like tiff) # tesseract uses the input's DPI header, we need to convert to a format that supports this (like tiff)
runconstrained convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff | logstdout log "...Running convert"
runconstrained tesseract $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout runconstrained $TIMEVERBOSE convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff | logstdout
log "...Running tesseract"
runconstrained $TIMEVERBOSE tesseract $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout
[[ -f $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff [[ -f $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff
else else
log "Converting image data to pdf..." log "Converting image data to pdf..."
@ -101,9 +108,11 @@ process_page() {
if [[ $VERBOSE = 1 && ! "$(pnmtops -verbose 2>&1 < /dev/null)" =~ "unrecognized option" ]]; then if [[ $VERBOSE = 1 && ! "$(pnmtops -verbose 2>&1 < /dev/null)" =~ "unrecognized option" ]]; then
PNMVERBOSE="-verbose" PNMVERBOSE="-verbose"
fi fi
log "Using page options: $PAGEOPTS" log "...Running pnmtops on $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE using page options: $PAGEOPTS"
runconstrained pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | ps2pdf $PS2PDF_OPTS - > $IMAGE_DIR/${IMAGE_FILE%.*}.pdf | logstdout runconstrained $TIMEVERBOSE pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE > $IMAGE_DIR/${IMAGE_FILE}.ps | logstdout
[[ -f $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps log "...Running ps2pdf on $IMAGE_DIR/${IMAGE_FILE}.ps"
runconstrained $TIMEVERBOSE ps2pdf $PS2PDF_OPTS $IMAGE_DIR/${IMAGE_FILE}.ps $IMAGE_DIR/${IMAGE_FILE}.pdf | logstdout
[[ -f $IMAGE_DIR/${IMAGE_FILE}.ps ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps
fi fi
else else
log "Skipping empty page $IMAGE_FILE with white percentage $PERCENTAGE_WHITE" log "Skipping empty page $IMAGE_FILE with white percentage $PERCENTAGE_WHITE"