#!/bin/sh

# Script to generate CSV and graph of load profile based on Enphase/Eddi data.
# Can be compared with, eg, standard Elexon GB profile classes.
# Shows both gross consumption as if PV and battery not there, and net.
#
# Usage:
#
#   $0 [ -all | -enphase | -eddi | -hph4 | -eheat | -iheat | -batt ] [ data1 ... datan ]
#
#       -all build all options in turn
#
#       -enphase  (default) use consumption and net grid flows from Enphase
#       -eddi   use grid flows from Eddi
#       -hph4   use hph4 heat pump consumption data from Eddi
#       -eheat  use hph4 heat pump and boost/diversion data from Eddi
#       -iheat  use boost/diversion data from Eddi (immersion)
#       -batt   use battery charge and discharge flows from Enphase
#
# Trailing arguments are taken to be the core parts of gzipped data file names:
#     DATASOURCEGZ=$DATADIR/net_energy_${datasource}.csv.gz
#
# Eg:
# % sh script/storesim/load_profile.sh 202412-01to19
# INFO: using DATADIR = data/16WWHiRes/Enphase/adhoc
# INFO: using DATASOURCES = 202412-01to19
# INFO: using FILTERS = all weekday weekend
# all 
# Created bucketed CSV filtered all: /Users/dhd/tmp/load-profile/bucketed.202412-01to19.all.csv
# Created bucketed PNG filtered all: /Users/dhd/tmp/load-profile/bucketed.202412-01to19.all.png
# weekday 
# Created bucketed CSV filtered weekday: /Users/dhd/tmp/load-profile/bucketed.202412-01to19.weekday.csv
# Created bucketed PNG filtered weekday: /Users/dhd/tmp/load-profile/bucketed.202412-01to19.weekday.png
# weekend 
# Created bucketed CSV filtered weekend: /Users/dhd/tmp/load-profile/bucketed.202412-01to19.weekend.csv
# Created bucketed PNG filtered weekend: /Users/dhd/tmp/load-profile/bucketed.202412-01to19.weekend.png
#
# DHD20250616: changed all times to be UTC (affects Enphase charts during BST).
# DHD20250910: changed output graph format to SVG for space efficiency.


# Enphase 'monthly net energy' data format is in 15-minute blocks such as:
#Date/Time,Energy Produced (Wh),Energy Consumed (Wh),Exported to Grid (Wh),Imported from Grid (Wh),Stored in AC Batteries (Wh),Discharged from AC Batteries (Wh)
#2020-12-01 00:00:00 +0000,0,14,0,15,1,0
#2020-12-01 00:15:00 +0000,0,25,0,26,1,0
#2020-12-01 00:30:00 +0000,0,16,0,17,1,0
#2020-12-01 00:45:00 +0000,0,15,0,16,1,0
#2020-12-01 01:00:00 +0000,0,29,0,30,1,0
#2020-12-01 01:15:00 +0000,0,28,0,29,1,0
#2020-12-01 01:30:00 +0000,0,59,0,60,1,0
#2020-12-01 01:45:00 +0000,0,51,0,52,1,0
#2020-12-01 02:00:00 +0000,0,27,0,28,1,0
#...
# AND
#Date/Time,Energy Produced (Wh),Energy Consumed (Wh),Exported to Grid (Wh),Imported from Grid (Wh),Stored in batteries (Wh),Discharged from batteries (Wh)
#2024-06-01 00:00:00 +0100,0,16,0,0,0,16
#2024-06-01 00:15:00 +0100,0,10,0,0,0,10
#2024-06-01 00:30:00 +0100,0,20,0,0,0,20
#2024-06-01 00:45:00 +0100,0,24,0,0,0,24
#2024-06-01 01:00:00 +0100,0,15,0,0,0,15
#2024-06-01 01:15:00 +0100,0,12,0,0,0,12
#2024-06-01 01:30:00 +0100,0,21,0,0,0,21
#2024-06-01 01:45:00 +0100,0,12,0,0,0,12
#2024-06-01 02:00:00 +0100,0,15,0,0,0,15
#...
# ALL TIMES LOCAL.

# Eddi data including hph4 looks like:
#Eddi hourly stats summary in kWh
#UTCISOdatetime,h,h1d,h1b,imp,exp,h2d,h2b,hph4
#2024-12-01T00:00Z,1,0,0,0.111,0,0,0,0.015
#2024-12-01T01:00Z,1,0,0,0.111,0,0,0,0.015
#2024-12-01T02:00Z,1,0,0,0.109,0,0,0,0.015
#2024-12-01T03:00Z,1,0,1.357,1.54,0,0,0,0.015
#2024-12-01T04:00Z,1,0,1.582,1.771,0,0,0,0.015
#2024-12-01T05:00Z,1,0,0,0.142,0,0,0,0.016
#2024-12-01T06:00Z,1,0,0,0.375,0,0,0,0.251
#2024-12-01T07:00Z,1,0,0,0.235,0,0,0,0.126
#...
# ALL TIMES UTC.


# Generate alL outputs...
if [ "-all" = "$1" ]; then
    for option in -enphase -eddi -hph4 -eheat -iheat -batt;
        do
            sh "$0" "$option" "$2" || exit 1
        done
    exit 0
fi


# Output gnuplot graphing script.
# load-profile-1.txt original lines based.
#GPSCRIPT=graphing/storesim/load-profile-1.txt
# load-profile-2.txt bar-chart style.
#GPSCRIPT=graphing/storesim/load-profile-2.txt
# As -1 but SVG output.
GPSCRIPT=graphing/storesim/load-profile-3.txt
if [ ! -s "${GPSCRIPT}" ]; then
    echo ERROR: missing gnuplot script "${GPSCRIPT}". 1>&2
    exit 1
fi

# Pick the source and data to plot.
SOURCE=enphase
FULLSOURCENAME="$SOURCE"
# And series titles.
ser1="consumption gross W"
ser2="grid net W"

# Enphase (15m).
if [ "-enphase" = "$1" ]; then
    SOURCE=enphase
    FULLSOURCENAME="$SOURCE"
    shift
fi
if [ "-batt" = "$1" ]; then
    SOURCE=enphase
    FULLSOURCENAME="$SOURCE-batt"
    # Non-default titles set.
    ser1="battery discharge W"
    ser2="battery -charge W"
    shift
fi

# Eddi (1h).
# Use heat-pump consumption data rather than full grid flows.
if [ "-eddi" = "$1" ]; then
    SOURCE=eddi
    FULLSOURCENAME="$SOURCE"
    shift
fi
# Use heat-pump consumption data rather than full grid flows.
if [ "-hph4" = "$1" ]; then
    SOURCE=eddi
    FULLSOURCENAME="$SOURCE-hph4"
    shift
fi
# Use heat-pump plus immersion consumption data rather than full grid flows.
if [ "-eheat" = "$1" ]; then
    SOURCE=eddi
    FULLSOURCENAME="$SOURCE-eheat"
    shift
fi
# Use immersion consumption data rather than full grid flows.
if [ "-iheat" = "$1" ]; then
    SOURCE=eddi
    FULLSOURCENAME="$SOURCE-iheat"
    shift
fi

# Output directory.
# Intended to be a working area only, to be moved somewhere under img.
OUTDIR=$HOME/tmp/load-profile
if [ ! -d "${OUTDIR}" ]; then
    echo ERROR: missing output directory "${OUTDIR}". 1>&2
    exit 1
fi

# Location of data files:
DATADIR=data/16WWHiRes/Enphase/adhoc
if [ "eddi" = "$SOURCE" ]; then
    DATADIR="data/eddi/log"
#data/eddi/log/202412.hourly.csv.gz
fi
#DATADIR=$HOME/tmp/
# Default, gzipped, data sources:
#DATASOURCES="202006 202008 202009 202010 202011 202012-01to21"
#DATASOURCES="202006 202012-01to21"
DATASOURCES="202109-01to14 202109-16to29"

# Default set of filters to use from: all, week, weekend
FILTERS="all weekday weekend"

# Mop up all trailing arguments as data file identifers.
if [ "$#" -gt 0 ]; then
    DATASOURCES="$@"
fi

echo "INFO: using DATADIR = $DATADIR"
echo "INFO: using DATASOURCES = $DATASOURCES"
echo "INFO: using FILTERS = $FILTERS"

for datasource in $DATASOURCES;
    do
    DATASOURCEGZ=$DATADIR/net_energy_${datasource}.csv.gz
    if [ "eddi" = "$SOURCE" ]; then
        DATASOURCEGZ=$DATADIR/${datasource}.hourly.csv.gz
        DATADIR="data/eddi/log"
    fi
    if [ ! -s "${DATASOURCEGZ}" ]; then
        echo ERROR: missing data source "${DATASOURCEGZ}". 1>&2
        exit 1
    fi

    # Uncompress, filter, batch, and create a .csv and .svg file.
    # The CSV file is directly usable and also feeds gnuplot for graphing.
    for filter in $FILTERS;
        do

        # Generate a simple CSV file.
        # The CSV content may look like:
        #00:00:00,21,67,71
        #00:15:00,21,77,81
        #00:30:00,21,61,65
        #00:45:00,21,73,77
        #01:00:00,21,84,88
        #01:15:00,21,69,73
        #01:30:00,21,72,76
        #01:45:00,21,81,85
        #02:00:00,21,71,75
        OUTCSV="$OUTDIR/bucketed.$FULLSOURCENAME.$datasource.$filter.csv"
        # Output in mean (rounded) watts:
        #     time-of-day,count,mean-gross-W,mean-net-W
        # eg:
        #00:00:00,14,15
        #00:15:00,25,26
        #00:30:00,16,17
        rm -f "$OUTCSV.tmp"

        #filtercmd="cat"
        #case $filter in
        #    all) filtercmd="cat";;
        #    weekend) ;;
        #    *) echo ERROR: bad "${filter}". 1>&2; exit 1;;
        #esac

echo "$filter" "$filtercmd"

        gzip -d < "$DATASOURCEGZ" |

            # Set up data filter ahead of processing.
            case $filter in
                all) cat;;
                weekday) perl -lF/,/ -MPOSIX -e 'my ($year, $month, $day) = $F[0] =~ m/(\d{4})-(\d{2})-(\d{2})/; if(POSIX::strftime("%u", 0, 0, 0, $day, $month - 1, $year - 1900) <= 5) { print }';;
                weekend) perl -lF/,/ -MPOSIX -e 'my ($year, $month, $day) = $F[0] =~ m/(\d{4})-(\d{2})-(\d{2})/; if(POSIX::strftime("%u", 0, 0, 0, $day, $month - 1, $year - 1900) > 5) { print }';;
                *) echo ERROR: bad filter "${filter}". 1>&2; exit 1;;
            esac |

if [ "eddi" = "$FULLSOURCENAME" ]; then
# Eddi grid flows, net only.
#
# Eddi data including hph4 looks like:
#Eddi hourly stats summary in kWh
#UTCISOdatetime,h,h1d,h1b,imp,exp,h2d,h2b,hph4
#2024-12-01T00:00Z,1,0,0,0.111,0,0,0,0.015
#
# All times UTC.
# No gross consumption available.
            awk -F, '
                $1 ~ /^202/ {
                hour=substr($1,12,5);
                ++count[hour];
                # Buckets are 1h, so W = kWh*1000.
                #gross[hour] += 1000 * $3;
                net[hour] += 1000 * ($5-$6);
                }
            END { 
                for(hour in count) {
                    n = count[hour];
                    print hour ":00," n ",," int(net[hour]/n+0.5);
                    }
                }' 

elif [ "eddi-hph4" = "$FULLSOURCENAME" ]; then
# Eddi heat-pump demand, net only.
#
# Eddi data including hph4 looks like:
#Eddi hourly stats summary in kWh
#UTCISOdatetime,h,h1d,h1b,imp,exp,h2d,h2b,hph4
#2024-12-01T00:00Z,1,0,0,0.111,0,0,0,0.015
#
# All times UTC.
# No gross consumption available.
            awk -F, '
                $1 ~ /^202/ {
                hour=substr($1,12,5);
                ++count[hour];
                # Buckets are 1h, so W = kWh*1000.
                gross[hour] += 1000 * $9;
                net[hour] += 1000 * ($5-$6);
                }
            END { 
                for(hour in count) {
                    n = count[hour];
                    print hour ":00," n "," int(gross[hour]/n+0.5) "," int(net[hour]/n+0.5);
                    }
                }' 

elif [ "eddi-eheat" = "$FULLSOURCENAME" ]; then
# Eddi heat-pump demand and divert/boost, net only.
#
# Eddi data including hph4 looks like:
#Eddi hourly stats summary in kWh
#UTCISOdatetime,h,h1d,h1b,imp,exp,h2d,h2b,hph4
#2024-12-01T00:00Z,1,0,0,0.111,0,0,0,0.015
#
# All times UTC.
# No gross consumption available.
            awk -F, '
                $1 ~ /^202/ {
                hour=substr($1,12,5);
                ++count[hour];
                # Buckets are 1h, so W = kWh*1000.
                gross[hour] += 1000 * ($9 + $3+$4 + $7+$8);
                net[hour] += 1000 * ($5-$6);
                }
            END { 
                for(hour in count) {
                    n = count[hour];
                    print hour ":00," n "," int(gross[hour]/n+0.5) "," int(net[hour]/n+0.5);
                    }
                }' 

elif [ "eddi-iheat" = "$FULLSOURCENAME" ]; then
# Eddi immersion divert/boost, net only.
#
# Eddi data including hph4 looks like:
#Eddi hourly stats summary in kWh
#UTCISOdatetime,h,h1d,h1b,imp,exp,h2d,h2b,hph4
#2024-12-01T00:00Z,1,0,0,0.111,0,0,0,0.015
#
# All times UTC.
# No gross consumption available.
            awk -F, '
                $1 ~ /^202/ {
                hour=substr($1,12,5);
                ++count[hour];
                # Buckets are 1h, so W = kWh*1000.
                gross[hour] += 1000 * ($3+$4 + $7+$8);
                net[hour] += 1000 * ($5-$6);
                }
            END { 
                for(hour in count) {
                    n = count[hour];
                    print hour ":00," n "," int(gross[hour]/n+0.5) "," int(net[hour]/n+0.5);
                    }
                }' 

elif [ "enphase-batt" = "$FULLSOURCENAME" ]; then
# Enphase grid flows, battery discharge and (-ve) charge.
#
# Enphase data looks like:
#Date/Time,Energy Produced (Wh),Energy Consumed (Wh),Exported to Grid (Wh),Imported from Grid (Wh),Stored in AC Batteries (Wh),Discharged from AC Batteries (Wh)
#2020-12-01 00:00:00 +0000,0,14,0,15,1,0
#
# All times local.
# DHD: 20250616: convert hours to UTC when offset is +0100.
            awk -F, '
                $1 ~ /^202/ {
                offset=substr($1,21,5);
                hour=0+substr($1,12,2);
                minute=0+substr($1,15,2);
                if("+0100"==offset){hour-=1;if(hour<0){hour=23;}}
                slot=sprintf("%02d:%02d", hour, minute);
                ++count[slot];
                # Buckets are assumed to be 15 mins, so W = 4*Wh.
                battd[slot] += 4 * $7;
                battc[slot] += 4 * -$6;
                }
            END { 
                for(slot in count) {
                    n = count[slot];
                    print slot "," n "," int(battd[slot]/n+0.5) "," int(battc[slot]/n+0.5);
                    }
                }' 

elif [ "enphase" = "$FULLSOURCENAME" ]; then
# Enphase grid flows, net and gross (default).
#
# Enphase data looks like:
#Date/Time,Energy Produced (Wh),Energy Consumed (Wh),Exported to Grid (Wh),Imported from Grid (Wh),Stored in AC Batteries (Wh),Discharged from AC Batteries (Wh)
#2020-12-01 00:00:00 +0000,0,14,0,15,1,0
#
# All times local.
# DHD: 20250616: convert hours to UTC when offset is +0100.
            awk -F, '
                $1 ~ /^202/ {
                offset=substr($1,21,5);
                hour=0+substr($1,12,2);
                minute=0+substr($1,15,2);
                if("+0100"==offset){hour-=1;if(hour<0){hour=23;}}
                slot=sprintf("%02d:%02d", hour, minute);
                ++count[slot];
                # Buckets are assumed to be 15 mins, so W = 4*Wh.
                gross[slot] += 4 * $3;
                net[slot] += 4 * ($5-$4);
                }
            END { 
                for(slot in count) {
                    n = count[slot];
                    print slot "," n "," int(gross[slot]/n+0.5) "," int(net[slot]/n+0.5);
                    }
                }' 
else
    echo "ERROR: bad source" 1>&2
    exit 1

fi | sort > "$OUTCSV.tmp"

        if [ -s $OUTCSV.tmp ]; then
            mv -f $OUTCSV.tmp $OUTCSV
            chmod a+r $OUTCSV
            echo Created bucketed CSV filtered ${filter}: $OUTCSV
        fi

        # Generate a simple .svg chart.
        OUTSVG="$OUTDIR/bucketed.$FULLSOURCENAME.$datasource.$filter.svg"
        rm -f "${OUTSVG}.tmp"
        gnuplot -e "title='Load Profile: ${datasource} ${filter}'" \
            -e "infilename='${OUTCSV}'" -e "outfilename='${OUTSVG}.tmp'" \
            -e "ser1='${ser1}'" -e "ser2='${ser2}'" \
            ${GPSCRIPT}
        if [ -s "$OUTSVG.tmp" ]; then
            #mv -f "$OUTSVG.tmp" "$OUTSVG"
            #zopflipng -y "$OUTSVG.tmp" "$OUTSVG" && rm "$OUTSVG.tmp"
            if svgo --multipass -i "$OUTSVG.tmp" -o "$OUTSVG"; then
                chmod a+r "$OUTSVG"
            else
                echo "INFO: could not run svgo" 1>&2
                chmod a+r "$OUTSVG.tmp"
                mv -f "$OUTSVG.tmp" "$OUTSVG"
            fi
            rm -f "$OUTSVG.tmp"
            # Build maximally pre-compressed versions.
            # Only make svnbr as most browers support, else gzip on the fly.
            #zopfli -c "$OUTSVG" > "${OUTSVG}gz" && chmod a+r "${OUTSVG}gz"
            brotli < "$OUTSVG" > "${OUTSVG}br" && chmod a+r "${OUTSVG}br"
            echo "Created bucketed SVG filtered ${filter}: $OUTSVG"
        fi

        done # filter

    done # datasource

exit 0

# weekend) filtercmd="perl -lF/,/ -MPOSIX -e 'my (\$year, \$month, \$day) = \$F[0] =~ m/(\d{4})-(\d{2})-(\d{2})/; if(POSIX::strftime(\"%u\", 0, 0, 0, \$day, \$month - 1, \$year - 1900) > 5) { print }' " ;;
# perl < tmp/testdata.dat -l'F/,/' -MPOSIX -e 'my ($year, $month, $day) = $F[0] =~ m/(\d{4})-(\d{2})-(\d{2})/; print POSIX::strftime("%u", 0, 0, 0, $day, $month - 1, $year - 1900);'


##########
# May be used / adapted / etc without any promise of fitness for purpose
# under the terms of the Apache License Version 2.0, January 2004
#     http://www.apache.org/licenses/LICENSE-2.0
##########
