638 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			638 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| =============================================
 | |
| Snow Video Codec Specification Draft 20080110
 | |
| =============================================
 | |
| 
 | |
| Introduction:
 | |
| =============
 | |
| This specification describes the Snow bitstream syntax and semantics as
 | |
| well as the formal Snow decoding process.
 | |
| 
 | |
| The decoding process is described precisely and any compliant decoder
 | |
| MUST produce the exact same output for a spec-conformant Snow stream.
 | |
| For encoding, though, any process which generates a stream compliant to
 | |
| the syntactical and semantic requirements and which is decodable by
 | |
| the process described in this spec shall be considered a conformant
 | |
| Snow encoder.
 | |
| 
 | |
| Definitions:
 | |
| ============
 | |
| 
 | |
| MUST    the specific part must be done to conform to this standard
 | |
| SHOULD  it is recommended to be done that way, but not strictly required
 | |
| 
 | |
| ilog2(x) is the rounded down logarithm of x with basis 2
 | |
| ilog2(0) = 0
 | |
| 
 | |
| Type definitions:
 | |
| =================
 | |
| 
 | |
| b   1-bit range coded
 | |
| u   unsigned scalar value range coded
 | |
| s   signed scalar value range coded
 | |
| 
 | |
| 
 | |
| Bitstream syntax:
 | |
| =================
 | |
| 
 | |
| frame:
 | |
|     header
 | |
|     prediction
 | |
|     residual
 | |
| 
 | |
| header:
 | |
|     keyframe                            b   MID_STATE
 | |
|     if(keyframe || always_reset)
 | |
|         reset_contexts
 | |
|     if(keyframe){
 | |
|         version                         u   header_state
 | |
|         always_reset                    b   header_state
 | |
|         temporal_decomposition_type     u   header_state
 | |
|         temporal_decomposition_count    u   header_state
 | |
|         spatial_decomposition_count     u   header_state
 | |
|         colorspace_type                 u   header_state
 | |
|         if (nb_planes > 2) {
 | |
|             chroma_h_shift              u   header_state
 | |
|             chroma_v_shift              u   header_state
 | |
|         }
 | |
|         spatial_scalability             b   header_state
 | |
|         max_ref_frames-1                u   header_state
 | |
|         qlogs
 | |
|     }
 | |
|     if(!keyframe){
 | |
|         update_mc                       b   header_state
 | |
|         if(update_mc){
 | |
|             for(plane=0; plane<nb_plane_types; plane++){
 | |
|                 diag_mc                 b   header_state
 | |
|                 htaps/2-1               u   header_state
 | |
|                 for(i= p->htaps/2; i; i--)
 | |
|                     |hcoeff[i]|         u   header_state
 | |
|             }
 | |
|         }
 | |
|         update_qlogs                    b   header_state
 | |
|         if(update_qlogs){
 | |
|             spatial_decomposition_count u   header_state
 | |
|             qlogs
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     spatial_decomposition_type          s   header_state
 | |
|     qlog                                s   header_state
 | |
|     mv_scale                            s   header_state
 | |
|     qbias                               s   header_state
 | |
|     block_max_depth                     s   header_state
 | |
| 
 | |
| qlogs:
 | |
|     for(plane=0; plane<nb_plane_types; plane++){
 | |
|         quant_table[plane][0][0]        s   header_state
 | |
|         for(level=0; level < spatial_decomposition_count; level++){
 | |
|             quant_table[plane][level][1]s   header_state
 | |
|             quant_table[plane][level][3]s   header_state
 | |
|         }
 | |
|     }
 | |
| 
 | |
| reset_contexts
 | |
|     *_state[*]= MID_STATE
 | |
| 
 | |
| prediction:
 | |
|     for(y=0; y<block_count_vertical; y++)
 | |
|         for(x=0; x<block_count_horizontal; x++)
 | |
|             block(0)
 | |
| 
 | |
| block(level):
 | |
|     mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0
 | |
|     if(keyframe){
 | |
|         intra=1
 | |
|     }else{
 | |
|         if(level!=max_block_depth){
 | |
|             s_context= 2*left->level + 2*top->level + topleft->level + topright->level
 | |
|             leaf                        b   block_state[4 + s_context]
 | |
|         }
 | |
|         if(level==max_block_depth || leaf){
 | |
|             intra                       b   block_state[1 + left->intra + top->intra]
 | |
|             if(intra){
 | |
|                 y_diff                  s   block_state[32]
 | |
|                 cb_diff                 s   block_state[64]
 | |
|                 cr_diff                 s   block_state[96]
 | |
|             }else{
 | |
|                 ref_context= ilog2(2*left->ref) + ilog2(2*top->ref)
 | |
|                 if(ref_frames > 1)
 | |
|                     ref                 u   block_state[128 + 1024 + 32*ref_context]
 | |
|                 mx_context= ilog2(2*abs(left->mx - top->mx))
 | |
|                 my_context= ilog2(2*abs(left->my - top->my))
 | |
|                 mvx_diff                s   block_state[128 + 32*(mx_context + 16*!!ref)]
 | |
|                 mvy_diff                s   block_state[128 + 32*(my_context + 16*!!ref)]
 | |
|             }
 | |
|         }else{
 | |
|             block(level+1)
 | |
|             block(level+1)
 | |
|             block(level+1)
 | |
|             block(level+1)
 | |
|         }
 | |
|     }
 | |
| 
 | |
| 
 | |
| residual:
 | |
|     residual2(luma)
 | |
|     if (nb_planes > 2) {
 | |
|         residual2(chroma_cr)
 | |
|         residual2(chroma_cb)
 | |
|     }
 | |
| 
 | |
| residual2:
 | |
|     for(level=0; level<spatial_decomposition_count; level++){
 | |
|         if(level==0)
 | |
|             subband(LL, 0)
 | |
|         subband(HL, level)
 | |
|         subband(LH, level)
 | |
|         subband(HH, level)
 | |
|     }
 | |
| 
 | |
| subband:
 | |
|     FIXME
 | |
| 
 | |
| nb_plane_types = gray ? 1 : 2;
 | |
| 
 | |
| Tag description:
 | |
| ----------------
 | |
| 
 | |
| version
 | |
|     0
 | |
|     this MUST NOT change within a bitstream
 | |
| 
 | |
| always_reset
 | |
|     if 1 then the range coder contexts will be reset after each frame
 | |
| 
 | |
| temporal_decomposition_type
 | |
|     0
 | |
| 
 | |
| temporal_decomposition_count
 | |
|     0
 | |
| 
 | |
| spatial_decomposition_count
 | |
|     FIXME
 | |
| 
 | |
| colorspace_type
 | |
|     0   unspecified YCbCr
 | |
|     1   Gray
 | |
|     2   Gray + Alpha
 | |
|     3   GBR
 | |
|     4   GBRA
 | |
|     this MUST NOT change within a bitstream
 | |
| 
 | |
| chroma_h_shift
 | |
|     log2(luma.width / chroma.width)
 | |
|     this MUST NOT change within a bitstream
 | |
| 
 | |
| chroma_v_shift
 | |
|     log2(luma.height / chroma.height)
 | |
|     this MUST NOT change within a bitstream
 | |
| 
 | |
| spatial_scalability
 | |
|     0
 | |
| 
 | |
| max_ref_frames
 | |
|     maximum number of reference frames
 | |
|     this MUST NOT change within a bitstream
 | |
| 
 | |
| update_mc
 | |
|     indicates that motion compensation filter parameters are stored in the
 | |
|     header
 | |
| 
 | |
| diag_mc
 | |
|     flag to enable faster diagonal interpolation
 | |
|     this SHOULD be 1 unless it turns out to be covered by a valid patent
 | |
| 
 | |
| htaps
 | |
|     number of half pel interpolation filter taps, MUST be even, >0 and <10
 | |
| 
 | |
| hcoeff
 | |
|     half pel interpolation filter coefficients, hcoeff[0] are the 2 middle
 | |
|     coefficients [1] are the next outer ones and so on, resulting in a filter
 | |
|     like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ...
 | |
|     the sign of the coefficients is not explicitly stored but alternates
 | |
|     after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,...
 | |
|     hcoeff[0] is not explicitly stored but found by subtracting the sum
 | |
|     of all stored coefficients with signs from 32
 | |
|     hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ...
 | |
|     a good choice for hcoeff and htaps is
 | |
|     htaps= 6
 | |
|     hcoeff={40,-10,2}
 | |
|     an alternative which requires more computations at both encoder and
 | |
|     decoder side and may or may not be better is
 | |
|     htaps= 8
 | |
|     hcoeff={42,-14,6,-2}
 | |
| 
 | |
| 
 | |
| ref_frames
 | |
|     minimum of the number of available reference frames and max_ref_frames
 | |
|     for example the first frame after a key frame always has ref_frames=1
 | |
| 
 | |
| spatial_decomposition_type
 | |
|     wavelet type
 | |
|     0 is a 9/7 symmetric compact integer wavelet
 | |
|     1 is a 5/3 symmetric compact integer wavelet
 | |
|     others are reserved
 | |
|     stored as delta from last, last is reset to 0 if always_reset || keyframe
 | |
| 
 | |
| qlog
 | |
|     quality (logarithmic quantizer scale)
 | |
|     stored as delta from last, last is reset to 0 if always_reset || keyframe
 | |
| 
 | |
| mv_scale
 | |
|     stored as delta from last, last is reset to 0 if always_reset || keyframe
 | |
|     FIXME check that everything works fine if this changes between frames
 | |
| 
 | |
| qbias
 | |
|     dequantization bias
 | |
|     stored as delta from last, last is reset to 0 if always_reset || keyframe
 | |
| 
 | |
| block_max_depth
 | |
|     maximum depth of the block tree
 | |
|     stored as delta from last, last is reset to 0 if always_reset || keyframe
 | |
| 
 | |
| quant_table
 | |
|     quantization table
 | |
| 
 | |
| 
 | |
| Highlevel bitstream structure:
 | |
| ==============================
 | |
|  --------------------------------------------
 | |
| |                   Header                   |
 | |
|  --------------------------------------------
 | |
| |    ------------------------------------    |
 | |
| |   |               Block0               |   |
 | |
| |   |             split?                 |   |
 | |
| |   |     yes              no            |   |
 | |
| |   |  .........         intra?          |   |
 | |
| |   | : Block01 :    yes         no      |   |
 | |
| |   | : Block02 :  .......   ..........  |   |
 | |
| |   | : Block03 : :  y DC : : ref index: |   |
 | |
| |   | : Block04 : : cb DC : : motion x : |   |
 | |
| |   |  .........  : cr DC : : motion y : |   |
 | |
| |   |              .......   ..........  |   |
 | |
| |    ------------------------------------    |
 | |
| |    ------------------------------------    |
 | |
| |   |               Block1               |   |
 | |
| |                    ...                     |
 | |
|  --------------------------------------------
 | |
| | ------------   ------------   ------------ |
 | |
| || Y subbands | | Cb subbands| | Cr subbands||
 | |
| ||  ---  ---  | |  ---  ---  | |  ---  ---  ||
 | |
| || |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| ||
 | |
| ||  ---  ---  | |  ---  ---  | |  ---  ---  ||
 | |
| ||  ---  ---  | |  ---  ---  | |  ---  ---  ||
 | |
| || |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| ||
 | |
| ||  ---  ---  | |  ---  ---  | |  ---  ---  ||
 | |
| ||  ---  ---  | |  ---  ---  | |  ---  ---  ||
 | |
| || |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| ||
 | |
| ||  ---  ---  | |  ---  ---  | |  ---  ---  ||
 | |
| ||  ---  ---  | |  ---  ---  | |  ---  ---  ||
 | |
| || |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| ||
 | |
| ||    ...     | |    ...     | |    ...     ||
 | |
| | ------------   ------------   ------------ |
 | |
|  --------------------------------------------
 | |
| 
 | |
| Decoding process:
 | |
| =================
 | |
| 
 | |
|                                          ------------
 | |
|                                         |            |
 | |
|                                         |  Subbands  |
 | |
|                    ------------         |            |
 | |
|                   |            |         ------------
 | |
|                   |  Intra DC  |               |
 | |
|                   |            |    LL0 subband prediction
 | |
|                    ------------                |
 | |
|                                 \        Dequantization
 | |
|  -------------------             \             |
 | |
| |  Reference frames |             \           IDWT
 | |
| | -------   ------- |    Motion    \           |
 | |
| ||Frame 0| |Frame 1|| Compensation  .   OBMC   v      -------
 | |
| | -------   ------- | --------------. \------> + --->|Frame n|-->output
 | |
| | -------   ------- |                                 -------
 | |
| ||Frame 2| |Frame 3||<----------------------------------/
 | |
| |        ...        |
 | |
|  -------------------
 | |
| 
 | |
| 
 | |
| Range Coder:
 | |
| ============
 | |
| 
 | |
| Binary Range Coder:
 | |
| -------------------
 | |
| The implemented range coder is an adapted version based upon "Range encoding:
 | |
| an algorithm for removing redundancy from a digitised message." by G. N. N.
 | |
| Martin.
 | |
| The symbols encoded by the Snow range coder are bits (0|1). The
 | |
| associated probabilities are not fix but change depending on the symbol mix
 | |
| seen so far.
 | |
| 
 | |
| 
 | |
| bit seen | new state
 | |
| ---------+-----------------------------------------------
 | |
|     0    | 256 - state_transition_table[256 - old_state];
 | |
|     1    |       state_transition_table[      old_state];
 | |
| 
 | |
| state_transition_table = {
 | |
|   0,   0,   0,   0,   0,   0,   0,   0,  20,  21,  22,  23,  24,  25,  26,  27,
 | |
|  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,
 | |
|  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  56,  57,
 | |
|  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,
 | |
|  74,  75,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,
 | |
|  89,  90,  91,  92,  93,  94,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
 | |
| 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118,
 | |
| 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133,
 | |
| 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
 | |
| 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
 | |
| 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179,
 | |
| 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194,
 | |
| 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209,
 | |
| 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225,
 | |
| 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240,
 | |
| 241, 242, 243, 244, 245, 246, 247, 248, 248,   0,   0,   0,   0,   0,   0,   0};
 | |
| 
 | |
| FIXME
 | |
| 
 | |
| 
 | |
| Range Coding of integers:
 | |
| -------------------------
 | |
| FIXME
 | |
| 
 | |
| 
 | |
| Neighboring Blocks:
 | |
| ===================
 | |
| left and top are set to the respective blocks unless they are outside of
 | |
| the image in which case they are set to the Null block
 | |
| 
 | |
| top-left is set to the top left block unless it is outside of the image in
 | |
| which case it is set to the left block
 | |
| 
 | |
| if this block has no larger parent block or it is at the left side of its
 | |
| parent block and the top right block is not outside of the image then the
 | |
| top right block is used for top-right else the top-left block is used
 | |
| 
 | |
| Null block
 | |
| y,cb,cr are 128
 | |
| level, ref, mx and my are 0
 | |
| 
 | |
| 
 | |
| Motion Vector Prediction:
 | |
| =========================
 | |
| 1. the motion vectors of all the neighboring blocks are scaled to
 | |
| compensate for the difference of reference frames
 | |
| 
 | |
| scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8
 | |
| 
 | |
| 2. the median of the scaled left, top and top-right vectors is used as
 | |
| motion vector prediction
 | |
| 
 | |
| 3. the used motion vector is the sum of the predictor and
 | |
|    (mvx_diff, mvy_diff)*mv_scale
 | |
| 
 | |
| 
 | |
| Intra DC Prediction:
 | |
| ====================
 | |
| the luma and chroma values of the left block are used as predictors
 | |
| 
 | |
| the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
 | |
| to reverse this in the decoder apply the following:
 | |
| block[y][x].dc[0] = block[y][x-1].dc[0] +  y_diff;
 | |
| block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff;
 | |
| block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff;
 | |
| block[*][-1].dc[*]= 128;
 | |
| 
 | |
| 
 | |
| Motion Compensation:
 | |
| ====================
 | |
| 
 | |
| Halfpel interpolation:
 | |
| ----------------------
 | |
| Halfpel interpolation is done by convolution with the halfpel filter stored
 | |
| in the header:
 | |
| 
 | |
| horizontal halfpel samples are found by
 | |
| H1[y][x] =    hcoeff[0]*(F[y][x  ] + F[y][x+1])
 | |
|             + hcoeff[1]*(F[y][x-1] + F[y][x+2])
 | |
|             + hcoeff[2]*(F[y][x-2] + F[y][x+3])
 | |
|             + ...
 | |
| h1[y][x] = (H1[y][x] + 32)>>6;
 | |
| 
 | |
| vertical halfpel samples are found by
 | |
| H2[y][x] =    hcoeff[0]*(F[y  ][x] + F[y+1][x])
 | |
|             + hcoeff[1]*(F[y-1][x] + F[y+2][x])
 | |
|             + ...
 | |
| h2[y][x] = (H2[y][x] + 32)>>6;
 | |
| 
 | |
| vertical+horizontal halfpel samples are found by
 | |
| H3[y][x] =    hcoeff[0]*(H2[y][x  ] + H2[y][x+1])
 | |
|             + hcoeff[1]*(H2[y][x-1] + H2[y][x+2])
 | |
|             + ...
 | |
| H3[y][x] =    hcoeff[0]*(H1[y  ][x] + H1[y+1][x])
 | |
|             + hcoeff[1]*(H1[y+1][x] + H1[y+2][x])
 | |
|             + ...
 | |
| h3[y][x] = (H3[y][x] + 2048)>>12;
 | |
| 
 | |
| 
 | |
|                    F   H1  F
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|                    F   H1  F
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|    F-------F-------F-> H1<-F-------F-------F
 | |
|                    v   v   v
 | |
|                   H2   H3  H2
 | |
|                    ^   ^   ^
 | |
|    F-------F-------F-> H1<-F-------F-------F
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|                    F   H1  F
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|                    |   |   |
 | |
|                    F   H1  F
 | |
| 
 | |
| 
 | |
| unavailable fullpel samples (outside the picture for example) shall be equal
 | |
| to the closest available fullpel sample
 | |
| 
 | |
| 
 | |
| Smaller pel interpolation:
 | |
| --------------------------
 | |
| if diag_mc is set then points which lie on a line between 2 vertically,
 | |
| horizontally or diagonally adjacent halfpel points shall be interpolated
 | |
| linearly with rounding to nearest and halfway values rounded up.
 | |
| points which lie on 2 diagonals at the same time should only use the one
 | |
| diagonal not containing the fullpel point
 | |
| 
 | |
| 
 | |
| 
 | |
|            F-->O---q---O<--h1->O---q---O<--F
 | |
|            v \           / v \           / v
 | |
|            O   O       O   O   O       O   O
 | |
|            |         /     |     \         |
 | |
|            q       q       q       q       q
 | |
|            |     /         |         \     |
 | |
|            O   O       O   O   O       O   O
 | |
|            ^ /           \ ^ /           \ ^
 | |
|           h2-->O---q---O<--h3->O---q---O<--h2
 | |
|            v \           / v \           / v
 | |
|            O   O       O   O   O       O   O
 | |
|            |     \         |         /     |
 | |
|            q       q       q       q       q
 | |
|            |         \     |     /         |
 | |
|            O   O       O   O   O       O   O
 | |
|            ^ /           \ ^ /           \ ^
 | |
|            F-->O---q---O<--h1->O---q---O<--F
 | |
| 
 | |
| 
 | |
| 
 | |
| the remaining points shall be bilinearly interpolated from the
 | |
| up to 4 surrounding halfpel and fullpel points, again rounding should be to
 | |
| nearest and halfway values rounded up
 | |
| 
 | |
| compliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma
 | |
| interpolation at least
 | |
| 
 | |
| 
 | |
| Overlapped block motion compensation:
 | |
| -------------------------------------
 | |
| FIXME
 | |
| 
 | |
| LL band prediction:
 | |
| ===================
 | |
| Each sample in the LL0 subband is predicted by the median of the left, top and
 | |
| left+top-topleft samples, samples outside the subband shall be considered to
 | |
| be 0. To reverse this prediction in the decoder apply the following.
 | |
| for(y=0; y<height; y++){
 | |
|     for(x=0; x<width; x++){
 | |
|         sample[y][x] += median(sample[y-1][x],
 | |
|                                sample[y][x-1],
 | |
|                                sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]);
 | |
|     }
 | |
| }
 | |
| sample[-1][*]=sample[*][-1]= 0;
 | |
| width,height here are the width and height of the LL0 subband not of the final
 | |
| video
 | |
| 
 | |
| 
 | |
| Dequantization:
 | |
| ===============
 | |
| FIXME
 | |
| 
 | |
| Wavelet Transform:
 | |
| ==================
 | |
| 
 | |
| Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer
 | |
| transform and an integer approximation of the symmetric biorthogonal 9/7
 | |
| daubechies wavelet.
 | |
| 
 | |
| 2D IDWT (inverse discrete wavelet transform)
 | |
| --------------------------------------------
 | |
| The 2D IDWT applies a 2D filter recursively, each time combining the
 | |
| 4 lowest frequency subbands into a single subband until only 1 subband
 | |
| remains.
 | |
| The 2D filter is done by first applying a 1D filter in the vertical direction
 | |
| and then applying it in the horizontal one.
 | |
|  ---------------    ---------------    ---------------    ---------------
 | |
| |LL0|HL0|       |  |   |   |       |  |       |       |  |       |       |
 | |
| |---+---|  HL1  |  | L0|H0 |  HL1  |  |  LL1  |  HL1  |  |       |       |
 | |
| |LH0|HH0|       |  |   |   |       |  |       |       |  |       |       |
 | |
| |-------+-------|->|-------+-------|->|-------+-------|->|   L1  |  H1   |->...
 | |
| |       |       |  |       |       |  |       |       |  |       |       |
 | |
| |  LH1  |  HH1  |  |  LH1  |  HH1  |  |  LH1  |  HH1  |  |       |       |
 | |
| |       |       |  |       |       |  |       |       |  |       |       |
 | |
|  ---------------    ---------------    ---------------    ---------------
 | |
| 
 | |
| 
 | |
| 1D Filter:
 | |
| ----------
 | |
| 1. interleave the samples of the low and high frequency subbands like
 | |
| s={L0, H0, L1, H1, L2, H2, L3, H3, ... }
 | |
| note, this can end with a L or a H, the number of elements shall be w
 | |
| s[-1] shall be considered equivalent to s[1  ]
 | |
| s[w ] shall be considered equivalent to s[w-2]
 | |
| 
 | |
| 2. perform the lifting steps in order as described below
 | |
| 
 | |
| 5/3 Integer filter:
 | |
| 1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w
 | |
| 2. s[i] += (s[i-1] + s[i+1]    )>>1; for all odd  i < w
 | |
| 
 | |
| \ | /|\ | /|\ | /|\ | /|\
 | |
|  \|/ | \|/ | \|/ | \|/ |
 | |
|   +  |  +  |  +  |  +  |   -1/4
 | |
|  /|\ | /|\ | /|\ | /|\ |
 | |
| / | \|/ | \|/ | \|/ | \|/
 | |
|   |  +  |  +  |  +  |  +   +1/2
 | |
| 
 | |
| 
 | |
| Snow's 9/7 Integer filter:
 | |
| 1. s[i] -= (3*(s[i-1] + s[i+1])         + 4)>>3; for all even i < w
 | |
| 2. s[i] -=     s[i-1] + s[i+1]                 ; for all odd  i < w
 | |
| 3. s[i] += (   s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w
 | |
| 4. s[i] += (3*(s[i-1] + s[i+1])            )>>1; for all odd  i < w
 | |
| 
 | |
| \ | /|\ | /|\ | /|\ | /|\
 | |
|  \|/ | \|/ | \|/ | \|/ |
 | |
|   +  |  +  |  +  |  +  |   -3/8
 | |
|  /|\ | /|\ | /|\ | /|\ |
 | |
| / | \|/ | \|/ | \|/ | \|/
 | |
|  (|  + (|  + (|  + (|  +   -1
 | |
| \ + /|\ + /|\ + /|\ + /|\  +1/4
 | |
|  \|/ | \|/ | \|/ | \|/ |
 | |
|   +  |  +  |  +  |  +  |   +1/16
 | |
|  /|\ | /|\ | /|\ | /|\ |
 | |
| / | \|/ | \|/ | \|/ | \|/
 | |
|   |  +  |  +  |  +  |  +   +3/2
 | |
| 
 | |
| optimization tips:
 | |
| following are exactly identical
 | |
| (3a)>>1 == a + (a>>1)
 | |
| (a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2
 | |
| 
 | |
| 16bit implementation note:
 | |
| The IDWT can be implemented with 16bits, but this requires some care to
 | |
| prevent overflows, the following list, lists the minimum number of bits needed
 | |
| for some terms
 | |
| 1. lifting step
 | |
| A= s[i-1] + s[i+1]                              16bit
 | |
| 3*A + 4                                         18bit
 | |
| A + (A>>1) + 2                                  17bit
 | |
| 
 | |
| 3. lifting step
 | |
| s[i-1] + s[i+1]                                 17bit
 | |
| 
 | |
| 4. lifiting step
 | |
| 3*(s[i-1] + s[i+1])                             17bit
 | |
| 
 | |
| 
 | |
| TODO:
 | |
| =====
 | |
| Important:
 | |
| finetune initial contexts
 | |
| flip wavelet?
 | |
| try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients
 | |
| try the MV length as context for coding the residual coefficients
 | |
| use extradata for stuff which is in the keyframes now?
 | |
| implement per picture halfpel interpolation
 | |
| try different range coder state transition tables for different contexts
 | |
| 
 | |
| Not Important:
 | |
| compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality)
 | |
| spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later)
 | |
| 
 | |
| 
 | |
| Credits:
 | |
| ========
 | |
| Michael Niedermayer
 | |
| Loren Merritt
 | |
| 
 | |
| 
 | |
| Copyright:
 | |
| ==========
 | |
| GPL + GFDL + whatever is needed to make this a RFC
 |