Hello,

Your aes ctr software implementation discards and does not encrypt the last bytes of the message, if it does not end on blocksize boundary. Therefore i was unable to encrypt successive messages of say 500 bytes length.
It generates the previous keystream again on the next call, and uses 1 additional byte 'keypos' in the Aes structure.
One could of course save the leftover keystream and not run the encryption again, but use 15 bytes more memory.

/*
Quick fix
*/
void AesCtrEncrypt_Correct(Aes* aes, byte* out, const byte* in, word32 sz)
{
    word32 blocks;
    byte remaining;
    byte tmp[AES_BLOCK_SIZE];
    //make prev leftover keystream
    if (aes->keypos) {
        remaining=AES_BLOCK_SIZE-aes->keypos;
        AesEncrypt(aes, (byte*)aes->reg, tmp);
        IncrementAesCounter((byte*)aes->reg);
        memcpy(out,&tmp[aes->keypos],remaining);
        xorbuf(out,in,remaining);
        out+=remaining;
        in+=remaining;
        aes->keypos=0;
        sz=sz-remaining;
    }
    blocks= sz / AES_BLOCK_SIZE;
    while (blocks--) {
        AesEncrypt(aes, (byte*)aes->reg, out);
        IncrementAesCounter((byte*)aes->reg);
        xorbuf(out, in, AES_BLOCK_SIZE);
        out += AES_BLOCK_SIZE;
        in  += AES_BLOCK_SIZE;
    }
    //Encrypt remaining bytes:
    remaining=sz%AES_BLOCK_SIZE;
    if (remaining>0) {
        AesEncrypt(aes, (byte*)aes->reg, tmp);
        xorbuf(tmp, in, remaining);
        memcpy(out,tmp,remaining);
        aes->keypos=remaining;
    }
}