#!/bin/bash

#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#
# Copyright 2013 Toke Eskildsen, State and University Library, Denmark
#

#
# Quack 1.2 beta - Quality assurance tool for text scanning projects.
# 
# Generates zoomable (OpenSeadragon) views of scanned text pages with overlays
# containing OCR-text from ALTO-files. The views are static HTML pages that
# can be viewed directly from the file system or through a webserver.
#
# Note that the images used for OpenSeadragon are PNG and not tiled, which 
# makes this script a very poor choice for generating pages for end-users.
# The focus is fully on QA, there pixel-perfect reproduction is required.
# The non-tile choice is to minimize storage space.
#
# The script upports iterative updates by re-using existing structures when 
# source files are added and the script is executed again. The destination
# folder is fully self-contained and suitable for mounting under a webserver
# with no access to the original files.
#
# Requirements:
#   Some unix-flavor with bash (only tested under Ubuntu)
#   GraphicsMagick (JPEG2000 -> PNG conversion is twice as fast as ImageMagick)
#   ImageMagick (to create histograms)
#   openseadragon.min.js (download at http://openseadragon.github.io/#download)
#   a fairly current browser with JavaScript enabled
#

# Settings below. Instead of changing this file, it is recommended to
# create a new file "quack.settings" with the wanted setup as it will
# override the defaults below.

# The types of images to pull from source
IMAGE_GLOB="*.tiff *.tif *.jp2 *.jpeg2000 *.j2k *.jpg *.jpeg"
# The extension of the ALTO files corresponding to the image files
# ALTO files are expected to be located next to the image files:
#   OurScanProject_batch_2013-09-18_page_007.tif
#   OurScanProject_batch_2013-09-18_page_007.alto.xml
ALTO_EXT=".alto.xml"

# Sometimes the image corresponding to the ALTO has been scaled after ALTO
# generation. This factor will be multiplied to all ALTO elements. If the
# image has been scaled to half width & half height, set this to 0.5.
ALTO_SCALE_FACTOR="1.0"

# The image format for the QA image. Possible values are png and jpg.
# png is recommended if QA should check image quality in detail.
export IMAGE_DISP_EXT="png"
# If jpg is chosen for IMAGE_DISP_EXT, this quality setting (1-100)
# will be used when genrerating the images.
# Note: This does (unfortunately) not set the quality when tiles and
# jpg has been chosen.
export IMAGE_DISP_QUALITY="95"

# The size of thumbnails in folder view.
export THUMB_IMAGE_SIZE="300x200"

# These elements will be grepped from the ALTO-files and shown on the image pages
ALTO_ELEMENTS="processingDateTime softwareName"

# Number of threads used for image processing. Note that histogram generation
# is very memory hungry (~2GB for a 30MP image). Adjust accordingly.
THREADS=4

# For production it is recommended that all FORCE_ options are set to "false" as
# it makes iterative updates fast. If quack settings are tweaked, the relevant
# FORCE_ options should be "true".

# If true, image-pages will be generated even if they already exists.
export FORCE_PAGES=false
# If true, the main QA-images will be generated even if they already exists.
export FORCE_QAIMAGE=false
# If true, thumbnails will be generated even if they already exists.
export FORCE_THUMBNAILS=false
# If true, blown high- and low-light overlays will be generated even if they already exists.
# Setting this to true will also set FORCE_BLOWN_THUMBS to true
export FORCE_BLOWN=false
# If true, blown high- and low-light overlays for thumbs will be generated even if they already exists.
export FORCE_BLOWN_THUMBS=false
# If true, presentation images will be generated even if they already exists.
export FORCE_PRESENTATION=false
# If true, histogram images will be generated even if they already exists.
export FORCE_HISTOGRAM=false
# If true, tile images will be generated even if they already exists.
# This is only relevant if TILE="true"
export FORCE_TILES=false

# If true, the script attempts to find all alternative versions of the current image
# in other folders under source. Suitable for easy switching between alternate scans
# of the same material.
RESOLVE_ALTERNATIVES=false

# If the IDNEXT attribute starts with 'ART' it is ignored
# Used to avoid visually linking everything on the page
SKIP_NEXT_ART=false

# How much of the image to retain, cropping from center, when calculating
# histograms. Empty value = no crop. Valid values: 1-100
# This us usable for generating proper histograms for scans where the border
# is different from the rest of the image. Artifacts from rotations is an example.
# Suggested values are 85-95%.
CROP_PERCENT=""

# If true, tiles are generated for OpenSeadragon. This requires Robert Barta's 
# deepzoom (see link in README.md) and will generate a lot of 260x260 pixel tiles.
# If false, a single image will be used with OpenSeadragon. This is a lot heavier
# on the browser but avoids the size and file-count overhead of the tiles.
TILE="false"

# If true, a secondary view of the scans will be inserted into the page.
# The view represents an end-user version of the scan. This will often be 
# downscaled, levelled, sharpened and JPEG'ed.
export PRESENTATION="true"
# The image format for the presentation image. Possible values are png and jpg.
# jpg is recommended as this would normally be the choice for end-user presentation.
export PRESENTATION_IMAGE_DISP_EXT="jpg"

# Overlay colors for indicating burned out high- and low-lights
export OVERLAY_BLACK=3399FF
export OVERLAY_WHITE=FFFF00

# Limits for the overlays. Some scanners have absolute black as grey #02
# To get grey #02 and below marked as blown black, set BLOWN_BLACK_BT to 3,3,3
export BLOWN_WHITE_BT=255,255,255
export BLOWN_WHITE_WT=254,254,254
export BLOWN_BLACK_BT=1,1,1
export BLOWN_BLACK_WT=0,0,0

# Snippets are inserted verbatim at the top of the folder and the image pages.
# Use them for specifying things like delivery date or provider notes.
# Note that these snippet can be overridden on a per-folder and per-image basis
# by creating special files in the source tree (see SPECIFIC_FOLDER_SNIPPET and
# SPECIFIC_IMAGE_SNIPPET_EXTENSION below).
export SNIPPET_FOLDER=""
export SNIPPET_IMAGE=""


# End default settings. User-supplied overrides will be loaded from quack.settings

# If present in a source-folder, the content of the folder will be inserted into
# the generated folder HTML file.
SPECIFIC_FOLDER_SNIPPET="folder.snippet"

# If a file with image basename + this extension is encountered, the content will
# be inserted into the generated image HTML file.
SPECIFIC_IMAGE_SNIPPET_EXTENSION=".snippet"

# If no OpenSeadragon is present, the scripts attempts to download this version.
OSD_ZIP="openseadragon-bin-1.0.0.zip"
OSD_DIRECT="http://github.com/openseadragon/openseadragon/releases/download/v1.0.0/$OSD_ZIP"

START_PATH=`pwd`
pushd `dirname $0` > /dev/null
ROOT=`pwd`
if [ -e "quack.settings" ]; then
    echo "Sourcing user settings from quack.settings in `pwd`"
    source "quack.settings"
fi
# functions for generating identify-files and extract greyscale statistics
source "analyze.sh"
popd > /dev/null

# Local settings overrides general settings
if [ ! "$START_PATH" == "$ROOT" ]; then
    if [ -e "quack.settings" ]; then
        echo "Sourcing user settings from quack.settings in `pwd`"
        source "quack.settings"
    fi
fi

if [ ".true" == ".$FORCE_BLOWN" ]; then
    # When we force regeneration of blown, we myst also regenerate the blown thumbs.
    export FORCE_BLOWN_THUMBS=true
fi

PRESENTATION_SCRIPT="$ROOT/presentation.sh"
if [ -f "$START_PATH/presentation.sh" ]; then
    echo "Using presentation.sh located in $START_PATH"
    PRESENTATION_SCRIPT="$START_PATH/presentation.sh"
fi
FOLDER_TEMPLATE="$ROOT/web/folder_template.html"
IMAGE_TEMPLATE="$ROOT/web/image_template.html"
DRAGON="openseadragon.min.js"

function usage() {
    echo "quack 1.2 beta - Quality Assurance oriented ALTO viewer"
    echo ""
    echo "Usage: ./quack.sh source destination"
    echo ""
    echo "source:      The top folder for images with ALTO files"
    echo "destination: The wanted location of the presentation structure"
    echo ""
    echo "See comments in script and README.md for details."
}

SOURCE=$1
if [ "." == ".$SOURCE" ]; then
    echo "Error: Missing source" >&2
    echo ""
    usage
    exit 2
fi
pushd "$SOURCE" > /dev/null
SOURCE_FULL=`pwd`
popd > /dev/null

DEST=$2
if [ "." == ".$DEST" ]; then
    echo "Error: Missing destination" >&2
    echo ""
    usage
    exit 2
fi
if [ ! -f "$ROOT/web/$DRAGON" ]; then
    if [ -f "$ROOT/$DRAGON" ]; then
        echo "Copying $DRAGON from Quack root to the web folder"
        cp "$ROOT/$DRAGON" "$ROOT/web/"
    else
        echo "The file $ROOT/$DRAGON or $ROOT/web/$DRAGON does not exist" >&2
        if [ "." == ".`which wget`" -o "." == ".`which unzip`" ]; then
            echo "Please download it at http://openseadragon.github.io/#download" >&2
            echo "Tested version is 1.0.0, which can be downloaded from" >&2
            echo "$OSD_DIRECT" >&2
            exit
        else
            echo "Attempting to download of OpenSeadragon from" >&2
            echo "$OSD_DIRECT"
            wget "$OSD_DIRECT" -O "$ROOT/web/$OSD_ZIP"
            pushd "$ROOT/web" > /dev/null
            unzip "$ROOT/web/$OSD_ZIP" "openseadragon-bin-1.0.0/openseadragon.min.js"
            mv "openseadragon-bin-1.0.0/openseadragon.min.js" "$DRAGON"
            rm -r "openseadragon-bin-1.0.0"
            popd > /dev/null
            rm "$ROOT/web/$OSD_ZIP"
            if [ ! -f "$ROOT/web/$DRAGON" ]; then
                echo "Automatic OpenSeadragon download and installation failed." >&2
                echo "Please download it at http://openseadragon.github.io/#download" >&2
                echo "Tested version is 1.0.0, which can be downloaded from" >&2
                echo "$OSD_DIRECT" >&2
                exit 2
            fi
            echo "Automatic download and installation of OpenSeadragon successful."
        fi
    fi
fi

# Copy OpenSeadragon and all css-files to destination
function copyFiles () {
    if [ ! -d "$DEST" ]; then
        echo "Creating folder $DEST"
        mkdir -p "$DEST"
    fi
    cp ${ROOT}/web/*.js "$DEST"
    cp ${ROOT}/web/*.css "$DEST"
}

# http://stackoverflow.com/questions/14434549/how-to-expand-shell-variables-in-a-text-file
# Input: template-file
function ctemplate() {
    TMP="`mktemp`.sh"
    echo 'cat <<END_OF_TEXT' >  $TMP
    cat  "$1"                >> $TMP
    echo 'END_OF_TEXT'       >> $TMP
    . $TMP
    rm $TMP
}

# template pattern replacement
# Deprecated in favor of ctemplate due to better speed in ctemplate
function template () {
    local TEMPLATE="$1"
    local PATTERN="$2"
    local REPLACEMENT="$3"
    
    # T="foo\\/:bar\\&amp;"$'\n'"Nextline" ; T=`echo "$T" | sed ':a;N;$!ba;s/\\n/\\\\\&br;/g'` ; echo "zoom" | sed "s/o/$T/g" | sed 's/\&br;/\n/g'

    # We need to escape \, &, / and newline in replacement to avoid sed problems
    # http://stackoverflow.com/questions/407523/escape-a-string-for-sed-search-pattern
    # http://stackoverflow.com/questions/1251999/sed-how-can-i-replace-a-newline-n

    if [ "$REPLACEMENT" == "`echo -n \"$REPLACEMENT\" | tr '\\n' '*'`" ]; then
        # No newlines, especially no trailing ones!
        ( echo -n "s/\${$PATTERN}/" ; echo -n "$REPLACEMENT" | sed -e 's/[\\/&]/\\&/g' | sed ':a;N;$!ba;s/\n/\\\&bt;/g' ; echo "/g" ) | sed -f - -i $TEMPLATE
    else
        # The awk-version always adds a trailing newline, even when the input has none
        ( echo -n "s/\${$PATTERN}/" ; echo -n "$REPLACEMENT" | sed -e 's/[\\/&]/\\&/g' | awk 1 ORS="\\\\&br;" ; echo "/g" ) | sed -f - -i $TEMPLATE
    fi
    # Insert into template, then unescape newlines
    sed 's/\&br;/\n/g' -i $TEMPLATE
}

# Creates the bash environment variables corresponding to those used by makeImages
# This is used to separate HTML generation from the actual image processing
# srcFolder dstFolder image
# Output: SOURCE_IMAGE DEST_IMAGE HIST_IMAGE THUMB
function makeImageParams() {
    local SRC_FOLDER="$1"
    local DEST_FOLDER="$2"
    local IMAGE="$3"

    local SANS_PATH=${IMAGE##*/}
    local BASE=${SANS_PATH%.*}

    # Used by function caller
    # Must be mirrored in makeImages
    SOURCE_IMAGE="${SRC_FOLDER}/${IMAGE}"
    DEST_IMAGE="${DEST_FOLDER}/${BASE}.${IMAGE_DISP_EXT}"
    HIST_IMAGE="${DEST_FOLDER}/${BASE}.histogram.png"
    THUMB_IMAGE="${DEST_FOLDER}/${BASE}.thumb.jpg"
    THUMB_LINK=${THUMB_IMAGE##*/}
    WHITE_IMAGE="${DEST_FOLDER}/${BASE}.white.png"
    BLACK_IMAGE="${DEST_FOLDER}/${BASE}.black.png"
    PRESENTATION_IMAGE="${DEST_FOLDER}/${BASE}.presentation.jpg"
    TILE_FOLDER="${DEST_FOLDER}/${BASE}_files"
    PRESENTATION_TILE_FOLDER="${DEST_FOLDER}/${BASE}.presentation_files"
    ALTO_DEST="${DEST_FOLDER}/${BASE}.alto.xml"
}

# If force is true and image exists, image is deleted and true returned
# If force is true and image does not exist, true is returned
# If force is false and image exists, false is returned
# If force is false and image does not exists, true is returned
# Input: force image designation
# Output: true/false. Use with 'if shouldGenerate true dummy; then'
shouldGenerate() {
    local FORCE="$1"
    local IMG="$2"
    local DES="$3"

    if [ ".true" == ".$FORCE" -a -e "$IMG" ]; then
        rm -rf "$IMG"
    fi
    if [ ! -e "$IMG" -a "." != ".$DES" ]; then
        echo " - ${IMG##*/} ($DES)"
    fi
    [ ! -e "$IMG" ]
}
export -f shouldGenerate

# Creates a presentation image and a histogram for the given image
# srcFolder dstFolder image crop presentation_script tile
function makeImages() {
    local SRC_FOLDER="$1"
    local DEST_FOLDER="$2"
    local IMAGE="$3"
    local CROP_PERCENT="$5"
    local PRESENTATION_SCRIPT="$6"
    local TILE="$7"

#    echo "makeImages $SRC_FOLDER $DEST_FOLDER"

    local SANS_PATH=${IMAGE##*/}
    local BASE=${SANS_PATH%.*}

    # Must mirror the ones in makeImageParams
    # Do not cheat by calling makeImageParams as makeImages might
    # be called in parallel
    local SOURCE_IMAGE="${SRC_FOLDER}/${IMAGE}"
    local DEST_IMAGE="${DEST_FOLDER}/${BASE}.${IMAGE_DISP_EXT}"
    local HIST_IMAGE="${DEST_FOLDER}/${BASE}.histogram.png"
    local THUMB_IMAGE="${DEST_FOLDER}/${BASE}.thumb.jpg"
    local THUMB_LINK=${THUMB_IMAGE##*/}
    local WHITE_IMAGE="${DEST_FOLDER}/${BASE}.white.png"
    local BLACK_IMAGE="${DEST_FOLDER}/${BASE}.black.png"
    local THUMB_OVERLAY_WHITE="${DEST_FOLDER}/${BASE}.white.thumb.png"
    local THUMB_OVERLAY_BLACK="${DEST_FOLDER}/${BASE}.black.thumb.png"
    local PRESENTATION_IMAGE="${DEST_FOLDER}/${BASE}.presentation.jpg"
    local TILE_FOLDER="${DEST_FOLDER}/${BASE}_files"
    local PRESENTATION_TILE_FOLDER="${DEST_FOLDER}/${BASE}.presentation_files"
    local ALTO_DEST="${DEST_FOLDER}/${BASE}.alto.xml"

    if [ ! -f "$SOURCE_IMAGE" ]; then
        echo "The source image $S does not exists" >&2
        exit
    fi

    # Even if TILE="true", we create the full main presentational image as it
    # might be requested for download
    if shouldGenerate "$FORCE_QAIMAGE" "$DEST_IMAGE" "QA"; then
        gm convert "$SOURCE_IMAGE" -quality $IMAGE_DISP_QUALITY "$DEST_IMAGE"
    fi

    if [ "png" == ${IMAGE_DISP_EXT} ]; then
        # PNG is fairly fast to decode so use that as source
        local CONV="$DEST_IMAGE"
    else
        local CONV="$SOURCE_IMAGE"
    fi

    if [ ".true" == ".$TILE" ]; then
        if shouldGenerate "$FORCE_TILES" "$TILE_FOLDER" "tiles"; then
        # TODO: Specify JPEG quality
            deepzoom "$CONV" -format $IMAGE_DISP_EXT -path "${DEST_FOLDER}/"
        fi
        if shouldGenerate "$FORCE_TILES" "$PRESENTATION_TILE_FOLDER" "tiles"; then
            if [ ".true" == ".$PRESENTATION" ]; then
        # TODO: Specify JPEG quality
                deepzoom "$PRESENTATION_IMAGE" -format $PRESENTATION_IMAGE_DISP_EXT -path "${DEST_FOLDER}/"
            fi
        fi
    fi

    if shouldGenerate "$FORCE_BLOWN" "$WHITE_IMAGE" "overlay"; then
        gm convert "$CONV" -black-threshold $BLOWN_WHITE_BT -white-threshold $BLOWN_WHITE_WT -negate -fill \#$OVERLAY_WHITE -opaque black -transparent white -colors 2 "$WHITE_IMAGE"
    fi

    if shouldGenerate "$FORCE_BLOWN" "$BLACK_IMAGE" "overlay"; then
        gm convert "$CONV" -black-threshold $BLOWN_BLACK_BT -white-threshold $BLOWN_BLACK_WT -fill \#$OVERLAY_BLACK -opaque black -transparent white -colors 2 "$BLACK_IMAGE"
    fi

    if [ ".true" == ".$PRESENTATION" ]; then
        if shouldGenerate "$FORCE_PRESENTATION" "$PRESENTATION_IMAGE" "presentation"; then
            $PRESENTATION_SCRIPT "$CONV" "$PRESENTATION_IMAGE"
        fi
    fi

    if shouldGenerate "$FORCE_HISTOGRAM" "$HIST_IMAGE" "histogram"; then
        # Remove "-separate -append" to generate a RGB histogram
        # http://www.imagemagick.org/Usage/files/#histogram
        if [ "." == ".$CROP_PERCENT" ]; then
            convert "$CONV" -separate -append -define histogram:unique-colors=false -write histogram:mpr:hgram +delete mpr:hgram -negate -strip "$HIST_IMAGE"
        else
            convert "$CONV" -gravity Center -crop $CROP_PERCENT%x+0+0 -separate -append -define histogram:unique-colors=false -write histogram:mpr:hgram +delete mpr:hgram -negate -strip "$HIST_IMAGE"
        fi
    fi

    if shouldGenerate "$FORCE_THUMBNAILS" "$THUMB_IMAGE" "thumbnail"; then
        gm convert "$CONV" -sharpen 3 -enhance -resize $THUMB_IMAGE_SIZE "$THUMB_IMAGE"
    fi

    if shouldGenerate "$FORCE_BLOWN_THUMBS" "$THUMB_OVERLAY_WHITE" "thumb overlay"; then
        echo " - ${THUMB_OVERLAY_WHITE##*/}"
        # Note: We use ImageMagick here as older versions of GraphicsMagic does not
        # handle resizing of alpha-channel PNGs followed by color reduction
        convert "$WHITE_IMAGE" -resize $THUMB_IMAGE_SIZE -colors 2 "$THUMB_OVERLAY_WHITE"
    fi
    if shouldGenerate "$FORCE_BLOWN_THUMBS" "$THUMB_OVERLAY_BLACK" "thumb overlay"; then
        echo " - ${THUMB_OVERLAY_BLACK##*/}"
        # Note: We use ImageMagick here as older versions of GraphicsMagic does not
        # handle resizing of alpha-channel PNGs followed by color reduction
        convert "$BLACK_IMAGE" -resize $THUMB_IMAGE_SIZE -colors 2 "$THUMB_OVERLAY_BLACK"
    fi
}
export -f makeImages

# Generates overlays for the stated block and updates idnext & idprev
# altoxml (newlines removed) tag class
# Output (addition): IDNEXTS IDPREVS OVERLAYS OCR_CONTENT
function processElements() {
    local ALTOFLAT=$1
    local TAG=$2
    local CLASS=$3

#    echo "processGenericOverlay <altoflat> $TAG $CLASS"
    # Insert newlines before </$TAG>
    ELEMENTS=`echo $ALTOFLAT | sed "s/<$TAG/\\n<$TAG/g" | grep "<$TAG"`
#    local ELEMENTS=`echo $ALTOFLAT | sed "s/<\/$TAG>/<\/$TAG>\\n/g"`
    local SAVEIFS=$IFS
    IFS=$(echo -en "\n\b")
    # http://mywiki.wooledge.org/BashFAQ/001
    while IFS= read -r B
    do
#        echo -n "."
#    for B in $ELEMENTS ; do
        local BTAG=`echo "$B" | grep -o "<$TAG[^>]\+>"`
        local BID=`echo $BTAG | sed 's/.*ID=\"\([^"]\+\)".*/\\1/g'`
        if [ "." == ".$BID" ]; then
            continue
        fi
        local BIDNEXT=`echo $BTAG | sed 's/.*IDNEXT=\"\([^"]\+\)".*/\\1/g'`
        if [ "." != ".$BIDNEXT" -a "$BTAG" != "$BIDNEXT" ]; then
            local PRE_ART=`echo "$BIDNEXT" | grep -o "^ART"`
            if [ ".true" == ".$SKIP_NEXT_ART" ]; then
                if [ ".ART" == ".$PRE_ART" ]; then
                    BIDNEXT=""
                fi
            fi
            IDNEXTS="${IDNEXTS}nexts[\"${BID}\"] = \"$BIDNEXT\";"$'\n'
            IDPREVS="${IDPREVS}prevs[\"${BIDNEXT}\"] = \"$BID\";"$'\n'
        fi
        local BHEIGHT=`echo $BTAG | sed 's/.*HEIGHT=\"\([^"]\+\)".*/\\1/g'`
        local BWIDTH=`echo $BTAG | sed 's/.*WIDTH=\"\([^"]\+\)".*/\\1/g'`
        local BHPOS=`echo $BTAG | sed 's/.*HPOS=\"\([^"]\+\)".*/\\1/g'`
        local BVPOS=`echo $BTAG | sed 's/.*VPOS=\"\([^"]\+\)".*/\\1/g'`
        
        local SWIDTH=`echo "scale=6;$BWIDTH/$PWIDTH*$ALTO_SCALE_FACTOR" | bc | sed 's/^\./0./'`
        # TODO: Seems like there is some mismatch going on here with some deliveries
        local SHEIGHT=`echo "scale=6;$BHEIGHT/$PHEIGHT*$ALTO_SCALE_FACTOR" | bc | sed 's/^\./0./'`
#        SHEIGHT=`echo "scale=6;$BHEIGHT/$PWIDTH" | bc | sed 's/^\./0./'`
        local SHPOS=`echo "scale=6;$BHPOS/$PWIDTH*$ALTO_SCALE_FACTOR" | bc | sed 's/^\./0./'`
        local SVPOS=`echo "scale=6;$BVPOS/$PHEIGHT*$ALTO_SCALE_FACTOR" | bc | sed 's/^\./0./'`

        # Special handling of TextBlock
        if [ "TextBlock" == "$TAG" ]; then
            BCONTENT=`echo "$B" | grep -o 'CONTENT="[^"]\+"' | sed 's/CONTENT="\\([^"]\\+\\)"/\\1/g' | sed ':a;N;$!ba;s/\\n/ /g' | sed 's/\\\\/\\\\\\\\/g'`
            # TODO: Handle entity-escaped content as well as quotes and backslash
            OCR_CONTENT="${OCR_CONTENT}ocrs[\"${BID}\"] = \"$BCONTENT\";"$'\n'
#            echo "ocrs[\"${BID}\"] = \"$BCONTENT\";"$'\n'
        fi

        OVERLAYS="${OVERLAYS}    {id: '$BID',"$'\n'
        OVERLAYS="${OVERLAYS}      x: $SHPOS, y: $SVPOS, width: $SWIDTH, height: $SHEIGHT,"$'\n'
        OVERLAYS="${OVERLAYS}      className: '$CLASS'"$'\n'
        OVERLAYS="${OVERLAYS}    },"$'\n'
    done <<< "$ELEMENTS"
    IFS=$SAVEIFS
}

# Generates JavaScript snippet for black and white overlays
# Input: src
# Output: OVERLAYS (not terminated with ']')
function blackWhite() {
    local SRC="$1"
    local IMAGE_WIDTH=$2
    local IMAGE_HEIGHT=$3
    local REL_HEIGHT=`echo "scale=2;$IMAGE_HEIGHT/$IMAGE_WIDTH" | bc`

    # Special overlays to show absolute black and absolute white pixels
    # The FULL_REL is a hack as OpenSeaDragon scales with respect to width
    OVERLAYS="overlays: ["$'\n'
    OVERLAYS="${OVERLAYS}{id: 'white',"$'\n'
    OVERLAYS="${OVERLAYS}  x: 0.0, y: 0.0, width: 1.0, height: $REL_HEIGHT,"$'\n'
    OVERLAYS="${OVERLAYS}  className: 'whiteoverlay'"$'\n'
    OVERLAYS="${OVERLAYS}},"$'\n'
    OVERLAYS="${OVERLAYS}{id: 'black',"$'\n'
    OVERLAYS="${OVERLAYS}  x: 0.0, y: 0.0, width: 1.0, height: $REL_HEIGHT,"$'\n'
    OVERLAYS="${OVERLAYS}  className: 'blackoverlay'"$'\n'
    OVERLAYS="${OVERLAYS}},"$'\n'
}

# Generates overlayscase 
# src dest altofile width height
# Output: ELEMENTS_HTML OVERLAYS OCR_CONTENT IDNEXT_CONTENT FULL_RELATIVE_HEIGHT ACCURACY
function processALTO() {
    local SRC="$1"
    local DEST="$2"
    local ALTO_FILE="$3"
    local IMAGE_WIDTH=$4
    local IMAGE_HEIGHT=$5
#    local WIDTH=$4
#    local HEIGHT=$5

    # Used by caller
    OVERLAYS=""
    ELEMENTS_HTML=""
    OCR_CONTENT=""

    local ALTO="${SRC_FOLDER}/${ALTO_FILE}"
    blackWhite "$SRC" $IMAGE_WIDTH $IMAGE_HEIGHT
    # TODO: Extract relevant elements from the Alto for display
    if [ ! -f "$ALTO" ]; then
        # TODO: Better handling of non-existence
            ELEMENTS_HTML="<p class=\"warning\">No ALTO file at $ALTO</p>"$'\n'
            # Terminate the black/white overlay and return
            OVERLAYS="${OVERLAYS}]"
        return
    fi

    cp "$ALTO" "$ALTO_DEST"
    # Extract key elements from the ALTO
    local ALTO_COMPACT=`cat "$ALTO_FILE" | sed ':a;N;$!ba;s/\\n/ /g'`
#    local PTAG=`echo "$ALTO_COMPACT" | grep -o "<PrintSpace[^>]\\+>"`
    local PTAG=`echo "$ALTO_COMPACT" | grep -o "<Page[^>]\\+>"`
    local PHEIGHT=`echo $PTAG | sed 's/.*HEIGHT=\"\([^"]\+\)".*/\\1/g'`
    local PWIDTH=`echo $PTAG | sed 's/.*WIDTH=\"\([^"]\+\)".*/\\1/g'`
    ACCURACY=`echo $PTAG | sed 's/.*PC=\"\([^"]\+\)".*/\\1/g'`
    ACCURACY=`echo "scale=2;$ACCURACY*100" | bc`

    FULL_RELATIVE_HEIGHT=`echo "scale=6;$PHEIGHT/$PWIDTH" | bc | sed 's/^\./0./'`
    # TODO: Ponder how relative positioning works and why this hack is necessary
    # Theory #1: OpenSeadragon messes up the vertical relative positioning
    PHEIGHT=$PWIDTH

    ELEMENTS_HTML="<table class=\"altoelements\"><tr><th>Key</th> <th>Value</th></tr>"$'\n'
    for E in $ALTO_ELEMENTS; do
        SAVEIFS=$IFS
        IFS=$(echo -en "\n\b")
        for V in `echo "$ALTO_COMPACT" | grep -o "<${E}>[^<]\\+</${E}>"`; do
            TV=`echo "$V" | sed 's/.*>\(.*\)<.*/\\1/g'`
            ELEMENTS_HTML="${ELEMENTS_HTML}<tr><td>$E</td> <td>$TV</td></tr>"$'\n'
        done
        IFS=$SAVEIFS
    done
    ELEMENTS_HTML="${ELEMENTS_HTML}</table>"$'\n'

    OCR_CONTENT=""
    IDNEXTS=""
    IDPREVS=""

    # Remove newlines from the ALTO
    SANS=`cat "$ALTO" | sed ':a;N;$!ba;s/\\n/ /g'`

    processElements "$SANS" "ComposedBlock" "composed"
    processElements "$SANS" "Illustration" "illustration"
    processElements "$SANS" "TextBlock" "highlight"

    OVERLAYS="${OVERLAYS}   ]"$'\n'
}

# Searches from the root for alternative versions of the given image
# Very specific to Statsbiblioteket
# src_folder image
# Output: ALTERNATIVES_HTML
function resolveAlternatives() {
    local SRC_FOLDER="$1"
    local IMAGE="$2"
    local FULL="${SRC_FOLDER}/${IMAGE}"
#    local ID=`echo "$IMAGE" | grep -o "[0-9][0-9][0-9][0-9]-.*"`
    local ID="${IMAGE%.*}"

    if [ "." == ".$ID" ]; then
        echo "   Unable to extract ID for \"$IMAGE\". No alternatives lookup"
        return
    fi

    pushd "$SOURCE_FULL" > /dev/null
    ALTERNATIVES_HTML="<ul class=\"alternatives\">"$'\n'
    for A in `find . -name "*${ID}" | sort`; do
        # "../../.././Apex/B3/2012-01-05-01/Dagbladet-2012-01-05-01-0130B.jp2 -> Apex/B3
       local LINK=`echo "$A" | sed 's/[./]\\+\\([^\\/]\\+\\/[^\\/]\\+\\).*/\\1/g'`
       local D="${A%.*}"
       ALTERNATIVES_HTML="${ALTERNATIVES_HTML}<li><a href=\"${UP}${D}.html\">${LINK}</a></li>"$'\n'
    done
    ALTERNATIVES_HTML="${ALTERNATIVES_HTML}</ul>"$'\n'
    popd > /dev/null
}

# Creates only the HTML page itself. The corresponding makeImages must
# be called before calling this function
# up parent srcFolder dstFolder image prev_image next_image
# Output: PAGE_LINK BASE THUMB_LINK THUMB_WIDTH THUMB_HEIGHT
function makePreviewPage() {
    local UP="$1"
    local PARENT="$2"
    local SRC_FOLDER="$3"
    local DEST_FOLDER="$4"
    local IMAGE="$5"
    local PREV_IMAGE="$6"
    local NEXT_IMAGE="$7"

    local SANS_PATH=${IMAGE##*/}
    BASE=${SANS_PATH%.*}
    P="${DEST_FOLDER}/${BASE}.html"

    local SSNIP="${BASE}${SPECIFIC_IMAGE_SNIPPET_EXTENSION}"

    if [ -f $SSNIP ]; then
        SNIPPET=`cat $SSNIP`
    else
        SNIPPET="$SNIPPET_FOLDER"
    fi

    # Used by function caller
    PAGE_LINK="${BASE}.html"

    makeImageParams "$SRC_FOLDER" "$DEST_FOLDER" "$IMAGE"

    if [ ! -e "$DEST_IMAGE" ]; then
        echo "The destination image '$DEST_IMAGE' for '$IMAGE' has not been created" >&2
        exit
    fi

    local IDENTIFY=`identify "$DEST_IMAGE" | grep -o " [0-9]\+x[0-9]\\+ "`
    IMAGE_WIDTH=`echo $IDENTIFY | grep -o "[0-9]\+x" | grep -o "[0-9]\+"`
    IMAGE_HEIGHT=`echo $IDENTIFY | grep -o "x[0-9]\+" | grep -o "[0-9]\+"`
    local TIDENTIFY=`identify "$THUMB_IMAGE" | grep -o " [0-9]\+x[0-9]\\+ "`
    THUMB_WIDTH=`echo $TIDENTIFY | grep -o "[0-9]\+x" | grep -o "[0-9]\+"`
    THUMB_HEIGHT=`echo $TIDENTIFY | grep -o "x[0-9]\+" | grep -o "[0-9]\+"`

    if [ ".true" == ".$PRESENTATION" ]; then
        local PIDENTIFY=`identify "$PRESENTATION_IMAGE" | grep -o " [0-9]\+x[0-9]\\+ "`
        PRESENTATION_WIDTH=`echo $PIDENTIFY | grep -o "[0-9]\+x" | grep -o "[0-9]\+"`
        PRESENTATION_HEIGHT=`echo $PIDENTIFY | grep -o "x[0-9]\+" | grep -o "[0-9]\+"`
    fi
   
    if [ "true" != "$FORCE_PAGES" -a -e "$P" ]; then
        return
    fi

    echo " - ${P##*/}"

    local ALTO_FILE="${BASE}${ALTO_EXT}"
    processALTO "$SRC_FOLDER" "$DEST_FOLDER" "$ALTO_FILE" $IMAGE_WIDTH $IMAGE_HEIGHT
# $IMAGE_WIDTH $IMAGE_HEIGHT

    local NAVIGATION=""
    if [ ! "." == ".$PREV_IMAGE" ]; then
        local PSANS_PATH=${PREV_IMAGE##*/}
        local PBASE=${PSANS_PATH%.*}
        NAVIGATION="<a href=\"${PBASE}.html\">previous</a> | "
    else 
        # We write the text to keep the positions of the links constant
        NAVIGATION="previous | "
    fi
    NAVIGATION="${NAVIGATION}<a href=\"index.html\">up</a>"
    if [ ! "." == ".$NEXT_IMAGE" ]; then
        local NSANS_PATH=${NEXT_IMAGE##*/}
        local NBASE=${NSANS_PATH%.*}
        NAVIGATION="${NAVIGATION} | <a href=\"${NBASE}.html\">next</a>"
    else
        NAVIGATION="${NAVIGATION} | next"
    fi

    # PARENT, DATE, UP, NAVIGATION, BASE, SOURCE, FULL_RELATIVE_HEIGHT, EDEST, IMAGE_WIDTH, IMAGE_HEIGHT, TILE_SOURCES, THUMB, THUMB_WIDTH, THUMB_HEIGHT, PRESENTATION, PRESENTATION_WIDTH, PRESENTATION_HEIGHT, WHITE, BLACK, OVERLAYS, OCR_CONTENT, IDNEXTS, IDPREVS, ALTO_ELEMENTS_HTML, HISTOGRAM, ALTO, ALTERNATIVES
    SOURCE="$SOURCE_IMAGE"
    SOURCE_SHORT=${SOURCE##*/}
    SOURCE_SIZE=`du -k "$SOURCE" | grep -o "^[0-9]\+"`
    EDEST=${DEST_IMAGE##*/}
    IMAGE="$EDEST"

    if [ "true" == "$TILE" ]; then
        TILE_SOURCES="      Image: {\
        xmlns:    \"http://schemas.microsoft.com/deepzoom/2008\",\
        Url:      \"${TILE_FOLDER##*/}/\",\
        Format:   \"$IMAGE_DISP_EXT\",\
        Overlap:  \"4\",\
        TileSize: \"256\",\
        Size: {\
          Width:  \"$IMAGE_WIDTH\",\
          Height: \"$IMAGE_HEIGHT\"\
        }\
      }"$'\n'
        if [ ".true" == ".$PRESENTATION" ]; then
            PRESENTATION_TILE_SOURCES="      Image: {\
        xmlns:    \"http://schemas.microsoft.com/deepzoom/2008\",\
        Url:      \"${PRESENTATION_TILE_FOLDER##*/}/\",\
        Format:   \"$PRESENTATION_IMAGE_DISP_EXT\",\
        Overlap:  \"4\",\
        TileSize: \"256\",\
        Size: {\
          Width:  \"$PRESENTATION_WIDTH\",\
          Height: \"$PRESENTATION_HEIGHT\"\
        }\
      }"$'\n'
        else
            PRESENTATION_TILE_SOURCES=""
        fi
    else
        TILE_SOURCES="      type: 'legacy-image-pyramid',\
      levels:[\
        {\
          url: '${EDEST}',\
          width:  ${IMAGE_WIDTH},\
          height: ${IMAGE_HEIGHT}\
        }\
      ]"$'\n'
        if [ ".true" == ".$PRESENTATION" ]; then
            PRESENTATION_TILE_SOURCES="      type: 'legacy-image-pyramid',\
      levels:[\
        {\
          url: '${PRESENTATION_IMAGE##*/}',\
          width:  ${PRESENTATION_WIDTH},\
          height: ${PRESENTATION_HEIGHT}\
        }\
      ]"$'\n'
        else
            PRESENTATION_TILE_SOURCES=""
        fi
    fi
    THUMB="$THUMB_LINK"
    WHITE_LINK=${WHITE_IMAGE##*/}
    WHITE="$WHITE_LINK"
    BLACK_LINK=${BLACK_IMAGE##*/}
    BLACK="$BLACK_LINK"

    ALTO_ELEMENTS_HTML="$ELEMENTS_HTML"
    EHIST=${HIST_IMAGE##*/}
    HISTOGRAM="$EHIST"
    ALTO="$ALTO_FILE"
    if [ "true" == "$RESOLVE_ALTERNATIVES" ]; then
        resolveAlternatives "$SRC_FOLDER" "$IMAGE"
    else
        local ALTERNATIVES_HTML=""
    fi
    ALTERNATIVES="$ALTERNATIVES_HTML"

    # image stats
#    grey_stats "$IMAGE"
    # TODO: Use destination if that is lossless and faster to open?
    local GREY=`grey_stats "$SOURCE_IMAGE"`

    # $PIXELS $UNIQUE $FIRST_COUNT $PERCENT_FIRST $FIRST_GREY $LAST_COUNT $PERCENT_LAST $LAST_GREY $COUNT_SPIKE $PERCENT_SPIKE $GREY_SPIKE
    # 1000095 512 82362 8.23 (0,0,0) 255 .02 (255,255,255)
    GREY_PIXELS=`echo "$GREY" | cut -d\  -f1`
    GREY_UNIQUE=`echo "$GREY" | cut -d\  -f2`
    GREY_COUNT_FIRST=`echo "$GREY" | cut -d\  -f3`
    GREY_PERCENT_FIRST=`echo "$GREY" | cut -d\  -f4`
    GREY_FIRST=`echo "$GREY" | cut -d\  -f5`
    GREY_COUNT_LAST=`echo "$GREY" | cut -d\  -f6`
    GREY_PERCENT_LAST=`echo "$GREY" | cut -d\  -f7`
    GREY_LAST=`echo "$GREY" | cut -d\  -f8`
    GREY_COUNT_SPIKE=`echo "$GREY" | cut -d\  -f9`
    GREY_PERCENT_SPIKE=`echo "$GREY" | cut -d\  -f10`
    GREY_SPIKE=`echo "$GREY" | cut -d\  -f11`
    local GREY_ALL_SOURCE=`im_identify "$SOURCE_IMAGE"`
    GREY_ALL=`cat "$GREY_ALL_SOURCE" | grep -A 256 Histogram | tail -n 256`

    ctemplate $IMAGE_TEMPLATE > $P
   
#    ls -l "$IMAGE"
#   echo "$GREY"
    # ***
 #    echo ""

#    cat $P
#    exit

 }

# Input: up parent srcFolder dstFolder
#
function makeIndex() {
    local UP="$1"
    local PARENT="$2"
    local SRC_FOLDER="$3"
    local DEST_FOLDER="$4"
#    echo "Processing level '$PARENT' from $SRC_FOLDER"

    if [ ! -d "$SRC_FOLDER" ]; then
        echo "Unable to locate folder $SRC_FOLDER from `pwd`" >&2
        exit
    fi
    pushd "$SRC_FOLDER" > /dev/null
    local SRC_FOLDER=`pwd`
    popd > /dev/null
    echo "Processing $SRC_FOLDER"

    if [ ! -d "$DEST_FOLDER" ]; then
#        echo "Creating folder $DEST_FOLDER"
        mkdir -p "$DEST_FOLDER"
    fi
    pushd "$DEST_FOLDER" > /dev/null
    local DEST_FOLDER=`pwd`
    popd > /dev/null

    pushd "$SRC_FOLDER" > /dev/null
    local PP="${DEST_FOLDER}/index.html"

    if [ "." == ".$PARENT" ]; then
        true
#        echo "<p>Parent: N/A</p>" >> $PP
    fi

    # Images
    local IMAGES=`ls $IMAGE_GLOB 2> /dev/null`

    # Generate graphics
    # http://stackoverflow.com/questions/11003418/calling-functions-with-xargs-within-a-bash-script
    echo "$IMAGES" | xargs -n 1 -I'{}' -P $THREADS bash -c 'makeImages "$@"' _ "$SRC_FOLDER" "$DEST_FOLDER" "{}" "$THUMB_IMAGE_SIZE" "$CROP_PERCENT" "$PRESENTATION_SCRIPT" "$TILE" \;

    # Generate pages
    local THUMBS_HTML=""
    local PREV_IMAGE=""
    if [ "." == ".$IMAGES" ]; then
        IMAGES_HTML="<p>No images</p>"$'\n'
    else
        IMAGES_HTML="<ul>"$'\n'
        for I in $IMAGES; do
            local NEXT_IMAGE=`echo "$IMAGES" | grep -A 1 "$I" | tail -n 1 | grep -v "$I"`
            makePreviewPage "$UP" "$PARENT" "$SRC_FOLDER" "$DEST_FOLDER" "$I" "$PREV_IMAGE" "$NEXT_IMAGE"
            IMAGES_HTML="${IMAGES_HTML}<li><a href=\"$PAGE_LINK\">$BASE</a></li>"$'\n'

            THUMBS_HTML="${THUMBS_HTML}<div class=\"thumb\"><a class=\"thumblink\" href=\"$PAGE_LINK\"><span class=\"thumboverlay\"></span><img class=\"thumbimg\" src=\"${THUMB_LINK}\" alt=\"$BASE\" title=\"$BASE\" width=\"$THUMB_WIDTH\" height=\"$THUMB_HEIGHT\"/></a></div>"$'\n'
#            THUMBS_HTML="${THUMBS_HTML}<a class=\"thumblink\" href=\"$PAGE_LINK\"><img class=\"thumbimg\" src=\"${THUMB_LINK}\" alt=\"$BASE\" title=\"$BASE\" width=\"$THUMB_WIDTH\" height=\"$THUMB_HEIGHT\"/></a>"$'\n'
            PREV_IMAGE=$I
        done
        IMAGES_HTML="${IMAGES_HTML}</ul>"$'\n'
    fi

    local SUBS=`ls "$SRC_FOLDER"`
    if [ "." == ".$S
    UBS" ]; then
        SUBFOLDERS_HTML="<p>No subfolders</p>"$'\n'
    else
        SUBFOLDERS_HTML="<ul>"$'\n'
        # TODO: Make the iterator handle spaces
        for F in $SUBS; do
            if [ -d $F ]; then
                SUBFOLDERS_HTML="${SUBFOLDERS_HTML}<li><a href=\"$F/index.html\">$F</a></li>"$'\n'
            fi
        done
        SUBFOLDERS_HTML="${SUBFOLDERS_HTML}</ul>"$'\n'
    fi

    if [ ! -f *.Edition.xml ]; then
        # TODO: Only warn if there are images
        EDITION_HTML=`echo "<p class=\"warning\">No edition</p>"`
    else
        EDITION_HTML=""
        for E in *.Edition.xml; do
            # echo to get newlines
            EDITION_HTML="${EDITION_HTML}<p>$E</p>"$'\n'
            EDITION_HTML="${EDITION_HTML}<pre>"$'\n'
            cat $E | sed -e 's/&/&amp;/g' -e 's/</\&lt;/g' -e 's/>/\&gt;/g' -e 's/"/\&quot;/g'  -e 's/\&gt;\([^\&]\+\)\&lt;/\&gt;<span class="xmlvalue">\1<\/span>\&lt;/g' > /tmp/t_edition
#            cat $E | sed -e 's/&/&amp;/g' -e 's/</\&lt;/g' -e 's/>/\&gt;/g' -e 's/"/\&quot;/g'  -e 's/\&lt;([^\&]+)\&gt;/\&lt;<span class="xmlvalue">\1</span>\&gt;/g' > /tmp/t_edition
            EDITION_HTML="${EDITION_HTML}`cat /tmp/t_edition`"$'\n'
            rm /tmp/t_edition
            EDITION_HTML="${EDITION_HTML}</pre>"$'\n'
        done
    fi

    pushd $SRC_FOLDER > /dev/null
    if [ -f $SPECIFIC_FOLDER_SNIPPET ]; then
        SNIPPET=`cat $SPECIFIC_FOLDER_SNIPPET`
    else
        SNIPPET="$SNIPPET_FOLDER"
    fi
    popd > /dev/null

    # UP, PARENT, SRC_FOLDER, DEST_FOLDER, IMAGES_HTML, THUMBS_HTML, SUBFOLDERS_HTML, EDITION_HTML, SNIPPET
    ctemplate $FOLDER_TEMPLATE > $PP
    
    # Generate pages for sub folders
    # We do this at the end to avoid overriding of variables
    for F in $SUBS; do
        if [ -d $F ]; then
            makeIndex "${UP}../" "${PARENT}${F}/" "${SRC_FOLDER}/${F}" "${DEST_FOLDER}/${F}"
        fi
    done

    popd > /dev/null
 }

echo "Quack starting at `date`"
copyFiles
makeIndex "" "" "$SOURCE" "$DEST"
echo "All done at `date`"
echo "Please open ${DEST}/index.html in a browser"