#!/bin/sh
# Extract daily range from timeseries, primarily intended for temperature.
# Extract some other simple stats in passing.
# Input is ~30s or ~60s (*nix) timestamps,value rows on stdin.
# Output is CSV summary by (UTC) day (%86400s) on stdout.
# The output is intended to be further processes/examined in a spreadsheet.

# Note conversion from *nix timestamp to Excel/LibreOffice date:
#     =(XX/86400)+DATE(1970,1,1)

# Input data format:
#1739318429,18.06
#1739318459,18.01
#1739318489,18.01
#1739318519,18.06
#1739318549,18.09

exec awk -F, '
    function printDay() {
        if(n > 0) {
            printf("%d,%d,%d,%.2f,%.2f,%.2f\n",
                bucketStart, (bucketStart-firstBucket)/dayLengthSec,
                n, minVal, maxVal, sum / n);
            }
        }
    BEGIN {
        print "dayStart s,day,nSamples,min,max,mean";
        dayLengthSec=86400; # Length of 24h day in seconds.
        firstBucket=0;
        bucketStart=0;
        lastBucketEndExcl=0;
    }
    {
    # Look for roll into next bucket.
    if($1 >= lastBucketEndExcl) {
        # Print old bucket.
        printDay();
        # Start new bucket.
        offset = $1 % dayLengthSec;
        bucketStart = $1 - offset;
        if(0 == firstBucket) { firstBucket = bucketStart; }
        lastBucketEndExcl = bucketStart + dayLengthSec;
        n = 0;
        sum = 0;
        minVal = ""; maxVal = "";
        }

    ++n;
    sum += $2;
    if(("" == minVal) || ($2 < minVal)) { minVal = $2; }
    if(("" == maxVal) || ($2 > maxVal)) { maxVal = $2; }
    }
    END {
        # Print old bucket.
        printDay();
    }'
