# Awk script to process files with (ordered) lines of the form: # 29/08/2010,13.900 # ie: # DD/MM/YYYY,kWh # and provide output of the form: # 2010-08-29 13.900 14.0074 # ie: # YYYY-MM-DD kWh smoothedkWh # for direct plotting with gnuplot using gnuplotPV.txt. # # The windowsize parameter is the number of days either side of the current # point used to compute the moving average, # must be strictly positive, # and can be set on the command line # # The smoothed curve will NOT lag the real data. # # Note that this uses memory proportional to the input data size. { if(!(windowsize>=1)) { windowsize = 14; } # Ensure that windowsize is set. else { windowsize = int(windowsize); } # Ensure windowsize is an int. if(split($0, columns, ",") == 2) { # Preprocess and store valid rows for date. if(split(columns[1], d, "/") == 3) { date[NR] = d[3]"-"d[2]"-"d[1]; kWh[NR] = columns[2]; } } next; } # Do all the processing at the end... END { for(i = 1; i <= NR; ++i) { e = kWh[i]; if(""==e) { continue; } # Skip missing points. # Compute smoothed/mean value skipping missing points. total = 0; count = 0; for(j = i - windowsize; j <= i + windowsize; ++j) { eNear = kWh[j]; if(""==eNear) { continue; } # Skip missing points. ++count; total += eNear; } smoothed = total / count; print (date[i])" "e" "smoothed; } }