#!/bin/sh
# Wrapper for parallel direct data extraction using given var-gran-src.
# Returns a non-zero status code and touches error flag in case of error.
# If output exists and is up to date then it is left as-is, exit 0.
# Atomically updates the given outputfile if no error and non-zero.
#
# usage:
#     $0 errorflag variable-granularity-src


PARERRFLAG="$1"
if [ "" = "$PARERRFLAG" ]; then
    echo "ERROR: $0: invalid empty error-flag argument." 1>&2
fi

basename="$2"
if [ "" = "$basename" ]; then
    echo "ERROR: $0: invalid empty basename argument." 1>&2
fi
VAR=""
GRAN=""
SRC=""
if [ 3 -ne "$(echo "$basename" | awk -F- '{print NF}')" ]; then
    echo "ERROR: $0: 3 parts expected." 1>&2
    exit 1
else
    VAR="$(echo "$basename" | awk -F- '{print $1}')"
    GRAN="$(echo "$basename" | awk -F- '{print $2}')"
    SRC="$(echo "$basename" | awk -F- '{print $3}')"
fi
if [ "" = "$VAR" ] || [ "" = "$GRAN" ] || [ "" = "$SRC" ]; then
    echo "ERROR: $0: invalid empty variable/granularity/source argument." 1>&2
    exit 1
fi
# Skip 'synth' pseudo-source.
if [ "synth" = "$SRC" ]; then exit 1; fi


# Top directory for the consolidate configuration and outputs.
#CONSOLIDDIR=data/consolidated
# Standardised output data directory.
CONSOLIDOUTDIR=data/consolidated/energy/std
# Configuration files.
#CONFIGDATASOURCES="$CONSOLIDDIR/config_data_sources.csv"
#CONFIGGRANULARITY="$CONSOLIDDIR/config_granularity.csv"
#CONFIGVARIABLES="$CONSOLIDDIR/config_variables.csv"
#CONFIGSYNTH="$CONSOLIDDIR/config_synthetic.csv"

# Location of the standardised generation scripts.
# These generate output on stdout (don't worry about file management).
# These have names in a standard generateable/parseable format.
CONSOLIDSTDSCRDIR=script/consolidate/energy/std


# Temp file, removed on exit, should be parallel safe without locking.
PARWORKFILE="$OUTFILE.$$.tmp"
trap "/bin/rm -f '$PARWORKFILE'; exit 1" 1 2 15


# Uncomment these and matching TSEND stuff below for task timings on stderr.
#PERFTIMESTAMPCMD="date +%s"
#TSSTART="$($PERFTIMESTAMPCMD)"


datasourcedir="$CONSOLIDSTDSCRDIR/$VAR/$GRAN/$SRC"
# Full path name of generation script.
scriptpath="$datasourcedir/$basename.sh"
outputdir="$CONSOLIDOUTDIR/$VAR/$GRAN/$SRC"
OUTFILE="$outputdir/$basename.csv"

# Exit if no script for this direct data extraction.
if [ ! -s "$scriptpath" ]; then exit 1; fi


# If output file already exists then
# in script check for line of following form:
#INPUTGLOB="data/16WWHiRes/Enphase/adhoc/net_energy_20????.csv.gz"
# to be able to verify if output already is up to date wrt inputs,
# so need not be recomputed, per make.
if [ -s "$OUTFILE" ];  then 
    SCRIPTINPUTGLOB="$(awk -F'"' < "$scriptpath" '/^INPUTGLOB="[^"]*" *$/ {print $2;exit}')"
    if [ "" != "$SCRIPTINPUTGLOB" ]; then
       INPUTS="$(echo $SCRIPTINPUTGLOB)"
       uptodate=true
       for f in $INPUTS;
           do
           if [ "$f" -nt "$OUTFILE" ]; then
               echo "INFO: $scriptpath $f is newer than $OUTFILE" 1>&2
               uptodate=false
               break;
           fi
           done
       if $uptodate; then
           #echo "INFO: $scriptpath does not need to be re-run" 1>&2
           exit 0
       fi
    fi
fi


# Ensure temporary file not present.
rm -f "$PARWORKFILE"

# Ensure that output directory is present.  (Permissions may need fixing.)
if [ ! -d "$outputdir" ]; then mkdir -p "$outputdir"; fi

if sh "$scriptpath" > "$PARWORKFILE"; then
    # Append note of generating script for traceability.
    echo '#script,"'"$scriptpath"'"' >> "$PARWORKFILE"
    # Check for solidity, ie no missing dates.
    # TODO: repair automatically.
    MDDIFF="$(sh script/consolidate/energy/solidify.sh < "$PARWORKFILE" | diff - "$PARWORKFILE")"
    if [ "" != "$MDDIFF" ]; then
       echo "WARNING: missing date(s) in generated $OUTFILE" 1>&2
       echo "$MDDIFF" 1>&2
    fi
    # Atomically move into place.
    sh script/replacePublishedFile.sh "$PARWORKFILE" "$OUTFILE"
    #echo "INFO: generated $OUTFILE" 1>&2
else
    # Failed: remove any broken/partial result.
    rm -f "$PARWORKFILE"
    # Make error visible to task runner.
    touch "$PARERRFLAG"
    echo "ERROR: failed: $scriptpath." 1>&2
fi


# Uncomment these and matching TSSTART stuff above for task timings on stderr.
#TSEND="$($PERFTIMESTAMPCMD)"
#TSDIFF="$(expr "$TSEND" - "$TSSTART")"
#if [ "$TSDIFF" -gt 0 ]; then echo "INFO: slow ${TSDIFF}s: $scriptpath" 1>&2;fi


# Done!
exit 0
