#!/bin/sh
# Compute target (Flesch–Kincaid) minimum readability score for raw HTML.
# This looks at the tags, if any, to adjust the minimum threshold.
#
# Usage:
#     script/get_minreadability .example.html
#
# Supply the raw HTML with <!-- TAGS ... --> markup in $1.  (NOT STDIN.)
# This will produce on stdout an integer minimum score.

# Depends on script/get_tags(.sh).
GETTAGS=script/get_tags

# Minimum Flesch–Kincaid readability score [0,100]..
# Target 40+ for technical, and eventually 80--90 for consumer pieces.
# See https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests
DEFAULTMINREADABILITY=55
# Lower acceptable miniumum score for more tech pieces.
#MINTECH=40
# DHD20190719: raising from 40 to 42 to try to improve least-readable content!
# DHD20230114: lowering from 42 to 25 to reflect some tricky leget texts!
# DHD20230119: lowering to 15 to allow very dense PhD research page.
#MINTECH=42
#MINTECH=25
MINTECH=15
# Higher acceptable minimum score for easy-read consumer pieces.
MINCONS=72

MINSCORE=$DEFAULTMINREADABILITY

# FIXME: INEFFICIENT DOUBLE SCAN OF ENTIRE FILE

# Reasons to raise readability win over reasons to lower it.
# Look for consumer trigger tags and require a higher score.
TAGSCONS="EASYREAD"
if [ "" != "`$GETTAGS < $1 -contains $TAGSCONS`" ]; then
    MINSCORE=$MINCONS
    echo $MINSCORE
    exit 0
fi

# Look for 'site/tech/research' trigger tags and allow a lower score.
# Also lower the bar for podcasts as they may have other constraints.
TAGSTECH="PODCAST RESEARCH SITE TECH"
if [ "" != "`$GETTAGS < $1 -contains $TAGSTECH`" ]; then
    MINSCORE=$MINTECH
    echo $MINSCORE
    exit 0
fi


echo $MINSCORE
exit 0
