Merge branch 'issue-12' into master

This commit is contained in:
Raman Gupta 2020-07-29 18:02:08 -04:00
commit aa93b62873
2 changed files with 5 additions and 3 deletions

View File

@ -26,7 +26,7 @@ Tested and run regularly on Fedora, but should work on other distributions with
* pnmtops (netpbm-progs)
* ps2pdf (ghostscript)
* pdfunite
* ImageMagick (if --skip-empty-pages) is used
* ImageMagick (if --skip-empty-pages or --ocr is used)
### Optional
@ -71,7 +71,7 @@ OPTIONS
--unpaper
Run post-processing deskew and black edge detection (requires unpaper)
--ocr
Run OCR to make the PDF searchable (requires tesseract)
Run OCR to make the PDF searchable (requires tesseract and ImageMagick)
--skip-empty-pages
remove empty pages from resulting PDF document (e.g. one sided doc in duplex mode)

View File

@ -87,7 +87,9 @@ process_page() {
log "-------------------------------------------------------------------------------"
if [ $SEARCHABLE -eq 1 ]; then
log "Converting image data to searchable pdf..."
runconstrained tesseract $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout
# tesseract uses the input's DPI header, we need to convert to a format that supports this (like tiff)
convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff
runconstrained tesseract $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout
else
log "Converting image data to pdf..."
if [ "$PGWIDTHIN" == "" -o "$PGHEIGHTIN" == "" ]; then