#!/bin/sh
# Validates the HTML in the argument files using the W3C "nu" validator vnu.
# Returns a zero exit code iff the files are valid HTML5, else non-zero.
# Should be applied to putative static HTML file just *before* making public.
# If the validator is not available this returns 0.
# This requires Java 8 and the WC3 vnu.jar, eg as from:
#     https://github.com/validator/
#     https://github.com/validator/validator/releases/tag/17.3.0
#
# DHD20181230:
#     https://github.com/validator/validator/releases/tag/18.11.5
#
# NOTE: startup is very slow, so checking mutiple files in a batch is useful.

if [ ! -f "$1" ]; then
    echo HTML5 file "$1" missing... 1>&2
    exit 2
fi

#VJAR=.work/vnu-validator/dist/vnu.20.3.16.jar
VJAR=.work/vnu-validator/dist/vnu.20.6.30.jar
if [ ! -s "$VJAR" ]; then
    echo Cannot find $VJAR 1>&2
    exit 3
fi

# Using -client to start up as quickly as possible.
# But start with enough memory to avoid being dreadfully slow and ...
#     Exception in thread "main" java.lang.OutOfMemoryError: GC overhead limit exceeded
JFLAGS="-client -Xss1m -Xms8m -Xmx256m"
# Could use --errors-only is to let warnings such as table border="1" go
#     as alternatives with CSS may require huge upheaval.
# The --no-langdetect feature is used to attempt to speed checking.
# Explicit use of --format json is more informative as to reasons to fail.
# Use of --format gnu (default) yields no output if all is well.
VFLAGS="--format gnu --no-langdetect"

# Wrap a mutex around the validator since it is so memory intensive...
# Remove the lock on either success or failure.
LOCK=.work/vnu-validator/.lock
if lockfile -r 17 -l 300 $LOCK; then
    : lock taken
else
    echo Cannot get lock $LOCK 1>&2
    exit 3
fi

if java $JFLAGS -jar $VJAR $VFLAGS --html "$@"; then
    rm -f $LOCK
    # Success!
    exit 0
fi

rm -f $LOCK
echo Failed HTML5 validation for "$@"... 1>&2
exit 1
