#!/bin/sh
# Generates an HTML5 table from (consolidated) energy stats, usually to stdout.
# In sub-page mode atomically updates/publishes as a page, if changed.
# Will highlight any row whose date exactly matches the temporal coverage.
# When temporal coverage is YYYY or YYYY-MM or YYYY-MM-DD
# this will highlight any row that is a prefix (superset).
# Marks values with low (<95%) or unknown coverage.


##########
# May be used / adapted / etc without any promise of fitness for purpose
# under the terms of the Apache License Version 2.0, January 2004
#     http://www.apache.org/licenses/LICENSE-2.0
##########


# Usage:
#     $0 [[-subpage wf] -pagestem pagestem] energy.csv [temporalcoverage]
#
#     Generate an HTML5 table from the supplied CSV file.
#
#     By default, this table is assumed to be inserted in-line
#     In the main energy series (energy-series-dataset.html) page,
#     with in-page links to the relevant energy events.
#     With the optional -subpage pagestem leading option
#     (typically -subpage energy-series-dataset)
#     this output is assumed to be in a sub-page, one level down,
#     of the page specified, so relative inter-page links are used.
#
#     If -subpage then this generates a sub-page and atomically
#     publishes it if changed or previously not present.
#     Generally -subpage and -pagesten will be used together.
#     The weight flag (wf) is one of d/m/o for desktop/mobile/offline.
#
#     Typical usage generating to stdout:
#         sh script/energyTableHTML.sh -pagestem energy-series-dataset data/consolidated/energy/std/net/Y/net-Y.csv
#
#     Typical usage generating a sub-page:
#         sh script/energyTableHTML.sh -subpage d -pagestem energy-series-dataset data/consolidated/energy/std/net/Y/net-Y.csv


# Run in full Web publishing mode, using recursion.
if [ "-subpage" = "$1" ]; then
    wf="$2"
    shift 2

    PAGESTEM=""
    if [ "-pagestem" = "$1" ]; then
        PAGESTEM="$2"
        shift 2
    fi
    if [ "" = "$PAGESTEM" ]; then
        echo "ERROR: $0 -subpage needs -pagestem." 1>&2
        exit 1
    fi
    if [ ! -d "$PAGESTEM" ]; then
        echo "ERROR: $0 -subpage output directory absent: $PAGESTEM" 1>&2
        exit 1
    fi

    SRCCSV="$1"
    if [ "" = "$SRCCSV" ]; then
        echo "ERROR: $0 input CSV filename needed as final parameter." 1>&2
        exit 1
    fi

    # Source and destination common element.
    basename="$(basename "$SRCCSV" .csv)"

    # Flag used to force all HTML to be regenerated (see makefile).
    HTMLREGENFLAG=.work/newsflash.html

    # Avoid even trying to rebuild if dest exists and is newer than src.
    # Like a makefile...
    DESTHTML="$PAGESTEM/table-$basename.html"
    if [ "m" = "$wf" ]; then DESTHTML="m/$DESTHTML";
    elif [ "o" = "$wf" ]; then DESTHTML=".offline/$DESTHTML";
    fi
    #echo "INFO: $0 $DESTHTML from $SRCCSV..." 1>&2
    if [ -s "$DESTHTML" ] && [ "$DESTHTML" -nt "$SRCCSV" ] && \
            [ "$DESTHTML" -nt "$HTMLREGENFLAG" ]; then
        : Nothing to do, exit silently.
        #echo "INFO: $0 $DESTHTML up to date" 1>&2
        exit 0
    fi

    sh $0 -pagestem "$PAGESTEM" "$SRCCSV" | \
        sh .work/script/wrap_subpage_complete.sh "-$wf" ".$PAGESTEM.html" "table-$basename"

    exit 0
fi


PAGESTEM=""
if [ "-pagestem" = "$1" ]; then
    PAGESTEM="$2"
    shift 2
fi

SRCCSV="$1"
TEMPCOV="$2"

if [ "" = "$SRCCSV" ] || [ ! -s "$SRCCSV" ]; then
    echo "ERROR: $0: missing source CSV." 1>&2
    exit 1
fi

# Top directory for the consolidate configuration and outputs.
CONSOLIDDIR=data/consolidated
# Standardised output data directory.
#CONSOLIDOUTDIR=data/consolidated/energy/std
# Configuration files.
#CONFIGDATASOURCES="$CONSOLIDDIR/config_data_sources.csv"
CONFIGGRANULARITY="$CONSOLIDDIR/config_granularity.csv"
CONFIGVARIABLES="$CONSOLIDDIR/config_variables.csv"
CONFIGSYNTH="$CONSOLIDDIR/config_synthetic.csv"

# Extract permitted source names.
#DATASOURCES="$(awk -F, < "$CONFIGDATASOURCES" '/^[^#]/ {print $1}')"
# Extract permitted granularities.
#GRANULARITIES="$(awk -F, < "$CONFIGGRANULARITY" '/^[^#]/ {print $1}')"
# Extract permitted variables (ie reported-on quantities).
#VARIABLES="$(awk -F, < "$CONFIGVARIABLES" '/^[^#]/ {print $1}')"

# Parse variable and granularity from first parameter.
VARIABLE="$(basename "$SRCCSV" .csv | awk -F- '{print $1}')"
GRANULARITY="$(basename "$SRCCSV" .csv | awk -F- '{print $2}')"

# Get descriptions.
VARNAME="$(awk -F, <"$CONFIGVARIABLES" '$1=="'"$VARIABLE"'" {s=$2; gsub("\"","",s); print s}')"
GRANAME="$(awk -F, <"$CONFIGGRANULARITY" '$1=="'"$GRANULARITY"'" {s=$2; gsub("\"","",s); print s}')"

# With more sources, final three data columns repeated for each.
##YYYY,device,coverage,imp,comment
#2010,meter,1,1220
#2011,meter,1,1081
#2012,meter,1,1015.3

# Extract number of devices/sources in this table.
n="$(awk -F, < "$SRCCSV" '/^2/{print int((NF-1)/3);exit}')"
# Extract the highest kWh value.
maxkWh="$(awk -F, < "$SRCCSV" '/^2/{for(i=4;i<=NF;i+=3){if($i>m){m=$i}}}END{print 0+m}')"
# Extract the source names, in order, space-separated.
sources="$(awk -F, < "$SRCCSV" '/^2/{
    n=int((NF-1)/3);
    for(i = 2; i+2 <= NF; i += 3) {
        if("" != $i) { s[int((i-2)/3)] = $i; }
        }
    }
    END {
        for(i = 0; i < n; ++i) {
        printf("%s", s[i]);
        if(i != n-1) { printf(" "); }
        }
    }
    ')"

#echo "INFO: $0: input $SRCCSV var $VARIABLE sources $sources" 1>&2
if [ "" != "$(egrep < "$CONFIGSYNTH" '^'"$VARIABLE"',...')" ]; then
    echo "<p>(Formula for synth: <code>$(awk -F, < "$CONFIGSYNTH" '$1~/^'"$VARIABLE"$'/ {print $2}')</code>)</p>"
fi

echo '<table class=doptsml>'
echo '<caption>Table: '"$GRANAME"' '"$VARNAME"'; sources: '"$sources"'; max: '"$maxkWh"'</caption>'
echo '<thead>'
# Make the header row wide enough for the number of devices/sources.
HS="<tr>"
i=0
H="${HS}<td>"
for i in $sources;
    do
    H="$H<th colspan=2 class=doptsml>Source: $i"
    done
echo "$H<td class=dopt>"
H="${HS}<th>Date"
for i in $sources;
    do
    H="$H<th class=doptsml style=text-align:right>kWh<td>"
    done
echo "$H<th class=dopt>Events"
echo '</thead>'
echo '<tbody>'
awk -F, \
        -v VARIABLE="$VARIABLE" \
        -v TEMPCOV="$TEMPCOV" \
        -v maxkWh="$maxkWh" \
        -v PAGESTEM="$PAGESTEM" \
        -vn="$n" < "$SRCCSV" '
    BEGIN {
    if(TEMPCOV ~ /^(20[0-9][0-9](-[01][0-9](-[0-3][0-9])?)?)/) {
        simpleTC = 1;
        }
    norm=maxkWh;
    if(norm<=0) { norm=1; }
    }
    /^2/ {
    # Number of display decimals; usually 0 for whole kWh.
    DP=0
    # Show extra precision to maintain significant figures.
    if(maxkWh < 100) { DP=1; }

    # Make row for date exactly matching temporal coverage bold.
    trstyle="";
    rowdate=$1
    if(rowdate == TEMPCOV || (simpleTC && (rowdate==substr(TEMPCOV,1,length(rowdate))))) {
        trstyle=" aria-current=date";
        }
    printf("<tr%s><td>%s", trstyle, rowdate);

    # Add data from each source device to the table...
    # Step across input 3 fields at a time for each, outputting 2...
    for(i = 2; i+2 <= NF; i += 3) {
        bar=substr("+++++", 1, int(5*$(i+2)/norm));
        # Note if coverage not good.
        covnote=""
        if((""==$(i+1))||($(i+1)<0.95)) { covnote="*"; } # Fairly arbitrary 95% theshold...
        if((""==$i)&&(""==$(i+1))&&(""==$(i+2))) {
            # Data not available for this source for this datetime.
            printf("<td colspan=2>");
        } else {
            printf("<td style=text-align:right>%s%."DP"f<td class=doptsml>", covnote, $(i+2));
            if($(i+2) < 0) {
                # Omit meter for negative values for now.
            } else if("" == covnote) {
                printf("<meter min=0 max=%."DP"f value=%."DP"f>%s</meter>", maxkWh, $(i+2), bar);
            } else { # if("" == $(i+1)) {
                # Low-opacity meter for unknown/low coverage....
                covtext="unknown"
                if("" != $(i+1)) { covtext=(int($(i+1)*100))"%"; }
                printf("<meter min=0 max=%."DP"f value=%."DP"f style=opacity:.7 title=\"%s coverage\">%s</meter>", maxkWh, $(i+2), covtext, bar);
                #printf("unknown cover");
            #} else {
            #    printf("~%.0f%% cover", 100 * $(i+1));
            }
        }
        }
    # Include references, if any
    printf("<td class=dopt>");
    if("" == PAGESTEM) {
        command="sh script/energyEventsTableHTML5.sh -varref "VARIABLE" -dateprefix "rowdate;
    } else {
        command="sh script/energyEventsTableHTML5.sh -varref-to "VARIABLE" ../"PAGESTEM".html -dateprefix "rowdate;
        }
    if((command | getline) > 0) { print; }
    # Terminate row and line.
    print ""
    }'
echo '</tbody>'
echo '<tfoot>'
H="${HS}<td>"
for i in $sources;
    do
    H="$H<th colspan=2 class=doptsml>Source: $i"
    done
echo "$H<td class=dopt>"
echo '</tfoot>'
echo '</table>'
echo '<p>* Partial/unknown coverage.</p>'


exit 0
