#!/bin/sh
# Extract details of declared hero/document image from directives.
#   $1 must be the .*.HTML body source or some head portion of it.
#   $2 must be the name of the host .*.html body source for error messages.
#   $3 is an optional operation from the following list:
#       random  pick a random image if more then one available
#       all  output all listed LIMG and SQTN in order
#       sqtn1  return 1st/primary SQTN image (if any)
#       iLIMG  return 1st *very* small inline lightweight image (eg JXL art)
#
# Image path returned is relative to the top of the www site,
# and does not contain any of [ '"=] so can be unquoted as HTML attr values.
#
# Will return nothing if no suitable image(s) found.

if [ $# -lt 2 ]; then
    echo "ERROR: missing hero/thumbnail HTML input and source name: $@" 1>&2
    exit 2
fi

# File to read the input HTML file (directives) from.
INPUTHTML="$1"
# Source file.
ERRSRC="$2"
# Operation to perform (optional).
OP="$3"
FILTER="cat"

case $OP in
iLIMG)
    # EXPERIMENTAL
    # Extract and return (1st) iLIMG tiny inline hero image, if any.
    iLIMG=`sed < $INPUTHTML -n -e 's/^<!-- *iLIMG *\(img\/[^ =>"'\'']*\) *--> *$/\1/p' | $FILTER | head -1`
    if [ "" != "$iLIMG" ]; then
        if [ -s "$iLIMG" ]; then echo "$iLIMG"; fi
    fi
    exit 0
    ;;
all)
    # This uses a slightly different/stricter parse than the sed expressions
    # so may return a subset of the results of other operators.
    exec awk < $INPUTHTML '/^<!--  *((LIMG)|(SQTN))  *img\/[^ =>"]*  *--> *$/ { print $3 }'
    ;;
random)
    FILTER='sort -R'
    ;;
sqtn1) ;; # Handled later.
*)
    if [ "" != "$OP" ]; then
        echo "ERROR: bad operation $OP" 1>&2
        exit 2
    fi
    ;;
esac

# By default returns on stdout the name of the primary (first) hero image,
# though a random candidate can be returned instead,
# else this script exits with a non-zero exit code.
# This first/primary image should be UNIQUE TO ITS PAGE if possible.
# The hero image must be under img/ and non-zero-sized.
# No upper bounds on hero image size are enforced here
# (though Twitter may not use anything over 4096x4096px or 5MB),
# as a huge hero image may still be good for external users.
# Can be declared with SQTN or LIMG tag as delow; the latter wins if both.

# TODO: ALLOW ALL VALUES TO BE RETURNED MINUS THE DEFAULT/PRIMARY.

# <!-- SQTN ... --> tags relative to root of *static* site.
# Image should be square or nearly so (may be cropped otherwise).
# Image should be at least 50x50 and pref at least 144x144 for Twitter.
# <!-- LIMG ... -> (large/landscape image) of ~2:1 aspect ratio even better.
#
# Note: all images should be under img/ for immutability/cacheability.
#
# https://www.aira.net/open-graph-social-media-traffic/
#     Facebook recommends using images that are at least 1200 x 630 pixels
#     up to a maximum of 8MB in size, and at the very least you should use
#     images that are 600 x 315 pixels.
#
#     The absolute minimum size you’ll get away with is 200 x 200 pixels.
#     If your image is smaller than 600 x 315 px, it will still display,
#     just much smaller.
#
# https://developers.google.com/search/docs/data-types/article 2018/11/18:
#
#     Images should be at least 1200 pixels wide.
#
#     For best results, provide multiple high-resolution images
#     (minimum of 800,000 pixels when multiplying width and height)
#     with the following aspect ratios: 16x9, 4x3, and 1x1.
#
# https://developer.twitter.com/en/docs/tweets/optimize-with-cards/overview/summary-card-with-large-image.html
#     Images for this Card support an aspect ratio of 2:1 with minimum dimensions of 300x157 or maximum of 4096x4096 pixels. Images must be less than 5MB in size. JPG, PNG formats are supported. 

if [ "sqtn1" != "$OP" ]; then
    LIMG=`sed < $INPUTHTML -n -e 's/^<!-- *LIMG *\(img\/[^ =>"'\'']*\) *--> *$/\1/p' | $FILTER | head -1`
    if [ "" != "$LIMG" ]; then
        if [ -s "$LIMG" ]; then echo "$LIMG"; fi
        exit 0
    fi
fi

SQTN=`sed < $INPUTHTML -n -e 's/^<!-- *SQTN *\(img\/[^ =>"'\'']*\) *--> *$/\1/p' | $FILTER | head -1`
if [ "" != "$SQTN" ]; then
    if [ -s "$SQTN" ]; then echo "$SQTN"; fi
    exit 0
fi

# No hero image found.
exit 0
