From e6c66fcf68eafc5fa7fe2c1b8058dda3dc123708 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Wed, 30 Nov 2011 01:49:08 +0600
Subject: [PATCH 01/10] Fix some int vs. int16_t confusion

Size should be determined from bits per sample.
The current code only accepts 16-bit streams.
---
 libavcodec/wmalosslessdec.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index bfc3903835..48941aef79 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -250,7 +250,7 @@ typedef struct WmallDecodeCtx {
 	int coefsend;
 	int bitsend;
 	int16_t coefs[256];
-    int lms_prevvalues[512];    // FIXME: see above
+    int16_t lms_prevvalues[512];    // FIXME: see above
     int16_t lms_updates[512];   // and here too
     int recent;
     } cdlms[2][9];              /* XXX: Here, 2 is the max. no. of channels allowed,
@@ -279,7 +279,7 @@ typedef struct WmallDecodeCtx {
     int lpc_scaling;
     int lpc_intbits;
 
-    int channel_coeffs[2][2048];
+    int16_t channel_coeffs[2][2048]; // FIXME: should be 32-bit / 16-bit depending on bit-depth
 
 } WmallDecodeCtx;
 
@@ -761,7 +761,7 @@ static void clear_codec_buffers(WmallDecodeCtx *s)
     for (ich = 0; ich < s->num_channels; ich++) {
         for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++) {
             memset(s->cdlms[ich][ilms].coefs         , 0, 256 * sizeof(int16_t));
-            memset(s->cdlms[ich][ilms].lms_prevvalues, 0, 512 * sizeof(int));
+            memset(s->cdlms[ich][ilms].lms_prevvalues, 0, 512 * sizeof(int16_t));
             memset(s->cdlms[ich][ilms].lms_updates   , 0, 512 * sizeof(int16_t));
         }
         s->ave_sum[ich] = 0;
@@ -789,7 +789,7 @@ static void reset_codec(WmallDecodeCtx *s)
 
 static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
 {
-    int32_t pred = 0, icoef;
+    int16_t pred = 0, icoef;
     int recent = s->cdlms[ich][ilms].recent;
 
     for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
@@ -806,7 +806,7 @@ static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
 
 static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int32_t input, int32_t pred)
 {
-    int icoef;
+    int16_t icoef;
     int recent = s->cdlms[ich][ilms].recent;
     int range = 1 << (s->bits_per_sample - 1);
     int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
@@ -888,7 +888,7 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
 static void revert_cdlms(WmallDecodeCtx *s, int tile_size)
 {
     int icoef, ich;
-    int32_t pred, channel_coeff;
+    int16_t pred, channel_coeff;
     int ilms, num_lms;
 
     for (ich = 0; ich < s->num_channels; ich++) {

From 81a3c67169aee9efe50bb8d4bffe635e55481f3a Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Wed, 30 Nov 2011 02:15:08 +0600
Subject: [PATCH 02/10] Get rid of logging that are not required anymore

(Resolves some conflicts)
---
 libavcodec/wmalosslessdec.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 48941aef79..ef663d3534 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -289,6 +289,8 @@ typedef struct WmallDecodeCtx {
 
 
 static int num_logged_tiles = 0;
+static int num_logged_subframes = 0;
+static int num_lms_update_call = 0;
 
 /**
  *@brief helper function to print the most important members of the context
@@ -692,7 +694,7 @@ static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
     if(s->seekable_tile) {
 	if(s->do_inter_ch_decorr)
 	    s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample + 1);
-	else
+    else
 	    s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample);
 	i++;
     }
@@ -716,9 +718,6 @@ static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
 	else
 	    residue = residue >> 1;
 	s->channel_residues[ch][i] = residue;
-
-    /*if (num_logged_tiles < 1)
-        av_log(0, 0, "%4d ", residue); */
     }
     dump_int_buffer(s->channel_residues[ch], tile_size, 16);
 
@@ -1056,6 +1055,7 @@ static int decode_subframe(WmallDecodeCtx *s)
         }
         ++s->channel[c].cur_subframe;
     }
+    num_logged_subframes++;
     return 0;
 }
 

From 6dd19c97c4e24b0fb52e092702fb1192ea569525 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Wed, 30 Nov 2011 01:52:33 +0600
Subject: [PATCH 03/10] Add a size argument to dump_int_buffer()

---
 libavcodec/wmalosslessdec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index ef663d3534..d4b245a4c4 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -310,14 +310,14 @@ static void av_cold dump_context(WmallDecodeCtx *s)
     PRINT("num channels",        s->num_channels);
 }
 
-static int dump_int_buffer(int *buffer, int length, int delimiter)
+static void dump_int_buffer(uint8_t *buffer, int size, int length, int delimiter)
 {
     int i;
 
     for (i=0 ; i<length ; i++) {
         if (!(i%delimiter))
             av_log(0, 0, "\n[%d] ", i);
-        av_log(0, 0, "%d, ", buffer[i]);
+        av_log(0, 0, "%d, ", *(int16_t *)(buffer + i * size));
     }
     av_log(0, 0, "\n");
 
@@ -719,7 +719,7 @@ static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
 	    residue = residue >> 1;
 	s->channel_residues[ch][i] = residue;
     }
-    dump_int_buffer(s->channel_residues[ch], tile_size, 16);
+    dump_int_buffer(s->channel_residues[ch], 4, tile_size, 16);
 
     return 0;
 

From be8a0d26dbeec72b8e254e00724f170c28644c98 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Wed, 30 Nov 2011 01:55:21 +0600
Subject: [PATCH 04/10] Init s->cdlms[][].recent to order - 1

Not sure if this is correct. But looks like
it was giving us off-by-one's when used to index
buffers.
---
 libavcodec/wmalosslessdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index d4b245a4c4..4be163e20d 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -776,7 +776,7 @@ static void reset_codec(WmallDecodeCtx *s)
     s->mclms_recent = s->mclms_order * s->num_channels;
     for (ich = 0; ich < s->num_channels; ich++) {
         for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++)
-            s->cdlms[ich][ilms].recent = s->cdlms[ich][ilms].order;
+            s->cdlms[ich][ilms].recent = s->cdlms[ich][ilms].order - 1;
         /* first sample of a seekable subframe is considered as the starting of
            a transient area which is samples_per_frame samples long */
         s->channel[ich].transient_counter = s->samples_per_frame;

From bf8715719a3ec85010e7f909c0cdafb265f50fea Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Wed, 30 Nov 2011 22:39:56 +0600
Subject: [PATCH 05/10] Fix two more int16_t vs. int confusion

If everything works fine for 16-bit streams,
the current code could be extended to do 24-bit.
---
 libavcodec/wmalosslessdec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 4be163e20d..2a0789bdf0 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -237,7 +237,7 @@ typedef struct WmallDecodeCtx {
     int8_t mclms_scaling;
     int16_t mclms_coeffs[128];
     int16_t mclms_coeffs_cur[4];
-    int mclms_prevvalues[64];   // FIXME: should be 32-bit / 16-bit depending on bit-depth
+    int16_t mclms_prevvalues[64];   // FIXME: should be 32-bit / 16-bit depending on bit-depth
     int16_t mclms_updates[64];
     int mclms_recent;
 
@@ -807,7 +807,7 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int32_t input, int3
 {
     int16_t icoef;
     int recent = s->cdlms[ich][ilms].recent;
-    int range = 1 << (s->bits_per_sample - 1);
+    int16_t range = 1 << (s->bits_per_sample - 1);
     int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
 
     if (input > pred) {

From 8aa831c07b47c4f384919cd38930ac64ce04b05b Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Wed, 30 Nov 2011 22:41:12 +0600
Subject: [PATCH 06/10] Implement revert_mclms() and associated functions

---
 libavcodec/wmalosslessdec.c | 87 +++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 2a0789bdf0..795fcaed47 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -786,6 +786,93 @@ static void reset_codec(WmallDecodeCtx *s)
 
 
 
+static void mclms_update(WmallDecodeCtx *s, int icoef)
+{
+    int i, j, ich;
+    int16_t pred_error;
+    int order = s->mclms_order;
+    int num_channels = s->num_channels;
+    int16_t range = 1 << (s->bits_per_sample - 1);
+    int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
+
+    for (ich = 0; ich < num_channels; ich++) {
+        pred_error = s->channel_coeffs[ich][icoef] -
+                     s->channel_residues[ich][icoef];
+        if (pred_error > 0) {
+            for (i = 0; i < order * num_channels; i++)
+                s->mclms_coeffs[i + ich * order * num_channels] +=
+                    s->mclms_updates[s->mclms_recent + i];
+            for (j = 0; j < i; j++) {
+                if (s->channel_coeffs[ich][icoef] > 0)
+                    s->mclms_coeffs_cur[ich * num_channels + j] += 1;
+                else if (s->channel_coeffs[ich][icoef] < 0)
+                    s->mclms_coeffs_cur[ich * num_channels + j] -= 1;
+            }
+        } else if (pred_error < 0) {
+            for (i = 0; i < order * num_channels; i++)
+                s->mclms_coeffs[i + ich * order * num_channels] -=
+                    s->mclms_updates[s->mclms_recent + i];
+            for (j = 0; j < i; j++) {
+                if (s->channel_coeffs[ich][icoef] > 0)
+                    s->mclms_coeffs_cur[ich * num_channels + j] -= 1;
+                else if (s->channel_coeffs[ich][icoef] < 0)
+                    s->mclms_coeffs_cur[ich * num_channels + j] += 1;
+            }
+        }
+    }
+
+    for (ich = num_channels - 1; ich >= 0; ich--) {
+        s->mclms_recent--;
+        if (s->channel_coeffs[ich][icoef] > range - 1)
+            s->mclms_prevvalues[s->mclms_recent] = range - 1;
+        else if (s->channel_coeffs[ich][icoef] <= -range)
+            s->mclms_prevvalues[s->mclms_recent] = -range;
+
+        s->mclms_updates[s->mclms_recent] =
+            av_clip(-1, s->channel_coeffs[ich][icoef], 1);
+    }
+
+    if (s->mclms_recent == 0) {
+        memcpy(s->mclms_prevvalues[order * num_channels],
+               s->mclms_prevvalues,
+               bps * order * num_channels);
+        memcpy(s->mclms_updates[order * num_channels],
+               s->mclms_updates,
+               bps * order * num_channels);
+        s->mclms_recent = num_channels * order;
+    }
+}
+static void mclms_predict(WmallDecodeCtx *s, int icoef)
+{
+    int ich, i;
+    int16_t pred;
+    int order = s->mclms_order;
+    int num_channels = s->num_channels;
+
+    for (ich = 0; ich < num_channels; ich++) {
+        if (!s->is_channel_coded[ich])
+            continue;
+        pred = 0;
+        for (i = 0; i < order * num_channels; i++)
+            pred += s->mclms_prevvalues[i] *
+                    s->mclms_coeffs[i + order * num_channels * ich];
+        for (i = 0; i < ich; i++)
+            pred += s->channel_coeffs[ich][icoef] *
+                    s->mclms_coeffs_cur[i + order * num_channels * ich];
+        s->channel_coeffs[ich][icoef] =
+                    s->channel_residues[ich][icoef] + pred;
+    }
+}
+
+static void revert_mclms(WmallDecodeCtx *s, int tile_size)
+{
+    int icoef;
+    for (icoef = 0; icoef < tile_size; icoef++) {
+        mclms_predict(s, icoef);
+        mclms_update(s, icoef);
+    }
+}
+
 static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
 {
     int16_t pred = 0, icoef;

From 768261e50d70fd8129b0f1672a48451344a220e9 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Fri, 2 Dec 2011 02:58:00 +0600
Subject: [PATCH 07/10] Fix some int / int16_t / int32_t confusion

For now force everything to int16_t, except
for variables used as index
---
 libavcodec/wmalosslessdec.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 795fcaed47..3b3ff0f749 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -875,7 +875,8 @@ static void revert_mclms(WmallDecodeCtx *s, int tile_size)
 
 static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
 {
-    int16_t pred = 0, icoef;
+    int16_t pred = 0;
+    int icoef;
     int recent = s->cdlms[ich][ilms].recent;
 
     for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
@@ -890,7 +891,7 @@ static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
     return pred;
 }
 
-static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int32_t input, int32_t pred)
+static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int16_t input, int16_t pred)
 {
     int16_t icoef;
     int recent = s->cdlms[ich][ilms].recent;

From 29c2fcb6776f80a0a5551bb82b43bc14c8202331 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Fri, 2 Dec 2011 03:11:21 +0600
Subject: [PATCH 08/10] Use correct value for range

The current range value causes an underflow
when negated and pushes anything less than zero
to the minimum.
---
 libavcodec/wmalosslessdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 3b3ff0f749..5a7957db61 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -895,7 +895,7 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int16_t input, int1
 {
     int16_t icoef;
     int recent = s->cdlms[ich][ilms].recent;
-    int16_t range = 1 << (s->bits_per_sample - 1);
+    int16_t range = (1 << s->bits_per_sample - 1) - 1;
     int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
 
     if (input > pred) {

From 075ebdf73563084bb35f5853ff661ed16a4819f1 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Fri, 2 Dec 2011 03:02:45 +0600
Subject: [PATCH 09/10] Move num_lms reading out of a loop

---
 libavcodec/wmalosslessdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 5a7957db61..cdaf2f312c 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -981,8 +981,8 @@ static void revert_cdlms(WmallDecodeCtx *s, int tile_size)
     for (ich = 0; ich < s->num_channels; ich++) {
         if (!s->is_channel_coded[ich])
             continue;
+        num_lms = s->cdlms_ttl[ich];
         for (icoef = 0; icoef < tile_size; icoef++) {
-            num_lms = s->cdlms_ttl[ich];
             channel_coeff = s->channel_residues[ich][icoef];
             if (icoef == s->transient_pos[ich]) {
                 s->transient[ich] = 1;

From 460bec6800f6229b664297a6c83a5ef4f0701221 Mon Sep 17 00:00:00 2001
From: Mashiat Sarker Shakkhar <shahriman_ams@yahoo.com>
Date: Fri, 2 Dec 2011 16:24:50 +0600
Subject: [PATCH 10/10] Fix lms_update()

---
 libavcodec/wmalosslessdec.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index cdaf2f312c..90a2c97fcd 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -891,21 +891,23 @@ static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
     return pred;
 }
 
-static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int16_t input, int16_t pred)
+static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int16_t residue, int16_t pred)
 {
     int16_t icoef;
     int recent = s->cdlms[ich][ilms].recent;
     int16_t range = (1 << s->bits_per_sample - 1) - 1;
     int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
+    int16_t input = residue + pred;
 
-    if (input > pred) {
+    if (residue > 0) {
         for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
             s->cdlms[ich][ilms].coefs[icoef] +=
                 s->cdlms[ich][ilms].lms_updates[icoef + recent];
     } else {
         for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
             s->cdlms[ich][ilms].coefs[icoef] -=
-                s->cdlms[ich][ilms].lms_updates[icoef];     // XXX: [icoef + recent] ?
+                s->cdlms[ich][ilms].lms_updates[icoef + recent];    /* spec mistakenly
+                                                                    dropped the recent */
     }
     s->cdlms[ich][ilms].recent--;
     s->cdlms[ich][ilms].lms_prevvalues[recent] = av_clip(input, -range, range - 1);
@@ -990,8 +992,8 @@ static void revert_cdlms(WmallDecodeCtx *s, int tile_size)
             }
             for (ilms = num_lms - 1; ilms >= 0; ilms--) {
                 pred = lms_predict(s, ich, ilms);
-                channel_coeff += pred;
                 lms_update(s, ich, ilms, channel_coeff, pred);
+                channel_coeff += pred;
             }
             if (s->transient[ich]) {
                 --s->channel[ich].transient_counter;