#!/bin/sh
#
# Convert a text file of raw input samples as ASCII to a mono WAVE (.wav).
#
# Usage:
#
#     $0 audiosamples.txt [SAMPLEFREQ] > output.wav
#
# where:
#
#     audiosamples.txt is a text file with one signed integer per line.
#     SAMPLEFREQ (optional) is sample frequency in Hz; defaults to 44100.
#
# By default 16-bit signed integers are assumed for the input samples.

SAMPLEFREQ=44100

INPUT=$1
if [ $# -lt 1 -o ! -s "$INPUT" -o ! -f "$INPUT" ]; then
    echo "ERROR: input file missing or empty or not a plain file." 1>&2
    exit 1
fi

if [ $# -gt 1 ]; then
    SAMPLEFREQ="$2";
    if [ "$SAMPLEFREQ" -lt 1 ]; then
        echo "ERROR: SAMPLEFREQ ($SAMPLEFREQ) invalid." 1>&2
        exit 1
    fi
    shift
fi

# Count the imput samples.
SAMPLECOUNT=`wc -l <$INPUT`
echo "INFO: sample count: $SAMPLECOUNT" 1>&2


# Write WAV binnary directly to output file.
# For format see eg: http://www.topherlee.com/software/pcm-tut-wavformat.html
# WAV header is 44 bytes long.
# Integers in WAV are little-endian: http://soundfile.sapp.org/doc/WaveFormat/

# Force output to C locale (avoid UTF-8 for example).
LANG=C
export LANG

awk <$INPUT -v SAMPLECOUNT="`echo $SAMPLECOUNT`" -v SAMPLEFREQ=$SAMPLEFREQ '
    BEGIN {
    dataSize = 2*SAMPLECOUNT; # Two bytes of data per sample.
    fileSize = dataSize + 44;
    # Write BINARY header.

    # RIFF header...
    printf("RIFF"); # ChunkID
    restOfFileSize = fileSize - 8;
    rOFS0 = int(restOfFileSize % 256);
    rOFS1 = int(int(restOfFileSize / 256) % 256);
    rOFS2 = int(int(restOfFileSize / 65536) % 256);
    rOFS3 = int(int(restOfFileSize / 16777216) % 256);
    printf("%c%c%c%c", rOFS0, rOFS1, rOFS2, rOFS3); # ChunkSize, little-endian.
    printf("WAVE"); # Format

    # "fmt " subchunk.
    printf("fmt "); # Format subchunk.
    printf("%c%c%c%c", 16, 0, 0, 0); # (16) Length of this subchunk; 16 for PCM.
    printf("%c%c", 1, 0); # (1) LPCM format.
    printf("%c%c", 1, 0); # (1) NumChannels (ie mono).
    sF0 = int(SAMPLEFREQ % 256);
    sF1 = int(int(SAMPLEFREQ / 256) % 256);
    sF2 = int(int(SAMPLEFREQ / 65536) % 256);
    sF3 = int(int(SAMPLEFREQ / 16777216) % 256);
    printf("%c%c%c%c", sF0, sF1, sF2, sF3); # Sample frequency (Hz).
    byteRate = SAMPLEFREQ * 2; # SampleRate * NumChannels * BitsPerSample/8
    bR0 = int(byteRate % 256);
    bR1 = int(int(byteRate / 256) % 256);
    bR2 = int(int(byteRate / 65536) % 256);
    bR3 = int(int(byteRate / 16777216) % 256);
    printf("%c%c%c%c", bR0, bR1, bR2, bR3); # ByteRate (Bps).
    printf("%c%c", 2, 0); # (2) BlockAlign = NumChannels * BitsPerSample/8
    printf("%c%c", 16, 0); # (16) BitsPerSample.

    # Start of "data" subchunk.
    printf("data");
    dS0 = int(dataSize % 256);
    dS1 = int(int(dataSize / 256) % 256);
    dS2 = int(int(dataSize / 65536) % 256);
    dS3 = int(int(dataSize / 16777216) % 256);
    printf("%c%c%c%c", dS0, dS1, dS2, dS3); # ChunkSize, little-endian.
    }

    {
    # Write each sample as a 2-byte little-endian signed integer.
    sample = int($1);
    if((sample < -32768) || (sample > 32768)) { sample = 0; } # Out of range.
    if(sample < 0) { sample += 65536; } # Force 2s-complement to +ve value.
    d0 = int(sample % 256);
    d1 = int(int(sample / 256) % 256);
    printf("%c%c", d0, d1); # 16-bit sample
    }'