shithub: openh264

Download patch

ref: 3efd69a6a9c77ceec05be03f98853b50a9819e31
parent: 0d71326adbfc38610a50f2e20b6863b8bf36d2c9
author: Guangwei <guangwwa@cisco.com>
date: Fri Jul 27 05:32:49 EDT 2018

convert windows file line break to unix format

--- a/codec/api/svc/codec_app_def.h
+++ b/codec/api/svc/codec_app_def.h
@@ -1,809 +1,809 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-
-
-#ifndef WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
-#define WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
-/**
-  * @file  codec_app_def.h
-  * @brief Data and /or structures introduced in Cisco OpenH264 application
-*/
-
-#include "codec_def.h"
-/* Constants */
-#define MAX_TEMPORAL_LAYER_NUM          4
-#define MAX_SPATIAL_LAYER_NUM           4
-#define MAX_QUALITY_LAYER_NUM           4
-
-#define MAX_LAYER_NUM_OF_FRAME          128
-#define MAX_NAL_UNITS_IN_LAYER          128     ///< predetermined here, adjust it later if need
-
-#define MAX_RTP_PAYLOAD_LEN             1000
-#define AVERAGE_RTP_PAYLOAD_LEN         800
-
-
-#define SAVED_NALUNIT_NUM_TMP           ( (MAX_SPATIAL_LAYER_NUM*MAX_QUALITY_LAYER_NUM) + 1 + MAX_SPATIAL_LAYER_NUM )  ///< SPS/PPS + SEI/SSEI + PADDING_NAL
-#define MAX_SLICES_NUM_TMP              ( ( MAX_NAL_UNITS_IN_LAYER - SAVED_NALUNIT_NUM_TMP ) / 3 )
-
-
-#define AUTO_REF_PIC_COUNT  -1          ///< encoder selects the number of reference frame automatically
-#define UNSPECIFIED_BIT_RATE 0          ///< to do: add detail comment
-
-/**
- * @brief Struct of OpenH264 version
- */
-///
-/// E.g. SDK version is 1.2.0.0, major version number is 1, minor version number is 2, and revision number is 0.
-typedef struct  _tagVersion {
-  unsigned int uMajor;                  ///< The major version number
-  unsigned int uMinor;                  ///< The minor version number
-  unsigned int uRevision;               ///< The revision number
-  unsigned int uReserved;               ///< The reserved number, it should be 0.
-} OpenH264Version;
-
-/**
-* @brief Decoding status
-*/
-typedef enum {
-  /**
-  * Errors derived from bitstream parsing
-  */
-  dsErrorFree           = 0x00,   ///< bit stream error-free
-  dsFramePending        = 0x01,   ///< need more throughput to generate a frame output,
-  dsRefLost             = 0x02,   ///< layer lost at reference frame with temporal id 0
-  dsBitstreamError      = 0x04,   ///< error bitstreams(maybe broken internal frame) the decoder cared
-  dsDepLayerLost        = 0x08,   ///< dependented layer is ever lost
-  dsNoParamSets         = 0x10,   ///< no parameter set NALs involved
-  dsDataErrorConcealed  = 0x20,   ///< current data error concealed specified
-
-  /**
-  * Errors derived from logic level
-  */
-  dsInvalidArgument     = 0x1000, ///< invalid argument specified
-  dsInitialOptExpected  = 0x2000, ///< initializing operation is expected
-  dsOutOfMemory         = 0x4000, ///< out of memory due to new request
-  /**
-  * ANY OTHERS?
-  */
-  dsDstBufNeedExpan     = 0x8000  ///< actual picture size exceeds size of dst pBuffer feed in decoder, so need expand its size
-
-} DECODING_STATE;
-
-/**
-* @brief Option types introduced in SVC encoder application
-*/
-typedef enum {
-  ENCODER_OPTION_DATAFORMAT = 0,
-  ENCODER_OPTION_IDR_INTERVAL,               ///< IDR period,0/-1 means no Intra period (only the first frame); lager than 0 means the desired IDR period, must be multiple of (2^temporal_layer)
-  ENCODER_OPTION_SVC_ENCODE_PARAM_BASE,      ///< structure of Base Param
-  ENCODER_OPTION_SVC_ENCODE_PARAM_EXT,       ///< structure of Extension Param
-  ENCODER_OPTION_FRAME_RATE,                 ///< maximal input frame rate, current supported range: MAX_FRAME_RATE = 30,MIN_FRAME_RATE = 1
-  ENCODER_OPTION_BITRATE,
-  ENCODER_OPTION_MAX_BITRATE,
-  ENCODER_OPTION_INTER_SPATIAL_PRED,
-  ENCODER_OPTION_RC_MODE,
-  ENCODER_OPTION_RC_FRAME_SKIP,
-  ENCODER_PADDING_PADDING,                   ///< 0:disable padding;1:padding
-
-  ENCODER_OPTION_PROFILE,                    ///< assgin the profile for each layer
-  ENCODER_OPTION_LEVEL,                      ///< assgin the level for each layer
-  ENCODER_OPTION_NUMBER_REF,                 ///< the number of refererence frame
-  ENCODER_OPTION_DELIVERY_STATUS,            ///< the delivery info which is a feedback from app level
-
-  ENCODER_LTR_RECOVERY_REQUEST,
-  ENCODER_LTR_MARKING_FEEDBACK,
-  ENCODER_LTR_MARKING_PERIOD,
-  ENCODER_OPTION_LTR,                        ///< 0:disable LTR;larger than 0 enable LTR; LTR number is fixed to be 2 in current encoder
-  ENCODER_OPTION_COMPLEXITY,
-
-  ENCODER_OPTION_ENABLE_SSEI,                ///< enable SSEI: true--enable ssei; false--disable ssei
-  ENCODER_OPTION_ENABLE_PREFIX_NAL_ADDING,   ///< enable prefix: true--enable prefix; false--disable prefix
-  ENCODER_OPTION_SPS_PPS_ID_STRATEGY, ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
-
-  ENCODER_OPTION_CURRENT_PATH,
-  ENCODER_OPTION_DUMP_FILE,                  ///< dump layer reconstruct frame to a specified file
-  ENCODER_OPTION_TRACE_LEVEL,                ///< trace info based on the trace level
-  ENCODER_OPTION_TRACE_CALLBACK,             ///< a void (*)(void* context, int level, const char* message) function which receives log messages
-  ENCODER_OPTION_TRACE_CALLBACK_CONTEXT,     ///< context info of trace callback
-
-  ENCODER_OPTION_GET_STATISTICS,             ///< read only
-  ENCODER_OPTION_STATISTICS_LOG_INTERVAL,    ///< log interval in millisecond
-
-  ENCODER_OPTION_IS_LOSSLESS_LINK,            ///< advanced algorithmetic settings
-
-  ENCODER_OPTION_BITS_VARY_PERCENTAGE        ///< bit vary percentage
-} ENCODER_OPTION;
-
-/**
-* @brief Option types introduced in decoder application
-*/
-typedef enum {
-  DECODER_OPTION_END_OF_STREAM = 1,     ///< end of stream flag
-  DECODER_OPTION_VCL_NAL,               ///< feedback whether or not have VCL NAL in current AU for application layer
-  DECODER_OPTION_TEMPORAL_ID,           ///< feedback temporal id for application layer
-  DECODER_OPTION_FRAME_NUM,             ///< feedback current decoded frame number
-  DECODER_OPTION_IDR_PIC_ID,            ///< feedback current frame belong to which IDR period
-  DECODER_OPTION_LTR_MARKING_FLAG,      ///< feedback wether current frame mark a LTR
-  DECODER_OPTION_LTR_MARKED_FRAME_NUM,  ///< feedback frame num marked by current Frame
-  DECODER_OPTION_ERROR_CON_IDC,         ///< indicate decoder error concealment method
-  DECODER_OPTION_TRACE_LEVEL,
-  DECODER_OPTION_TRACE_CALLBACK,        ///< a void (*)(void* context, int level, const char* message) function which receives log messages
-  DECODER_OPTION_TRACE_CALLBACK_CONTEXT,///< context info of trace callbac
-
-  DECODER_OPTION_GET_STATISTICS,        ///< feedback decoder statistics
-  DECODER_OPTION_GET_SAR_INFO,          ///< feedback decoder Sample Aspect Ratio info in Vui
-  DECODER_OPTION_PROFILE,               ///< get current AU profile info, only is used in GetOption
-  DECODER_OPTION_LEVEL,                 ///< get current AU level info,only is used in GetOption
-  DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
-  DECODER_OPTION_IS_REF_PIC,             ///< feedback current frame is ref pic or not
-  DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER  ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
-
-} DECODER_OPTION;
-
-/**
-* @brief Enumerate the type of error concealment methods
-*/
-typedef enum {
-  ERROR_CON_DISABLE = 0,
-  ERROR_CON_FRAME_COPY,
-  ERROR_CON_SLICE_COPY,
-  ERROR_CON_FRAME_COPY_CROSS_IDR,
-  ERROR_CON_SLICE_COPY_CROSS_IDR,
-  ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE,
-  ERROR_CON_SLICE_MV_COPY_CROSS_IDR,
-  ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE
-} ERROR_CON_IDC;
-/**
-* @brief Feedback that whether or not have VCL NAL in current AU
-*/
-typedef enum {
-  FEEDBACK_NON_VCL_NAL = 0,
-  FEEDBACK_VCL_NAL,
-  FEEDBACK_UNKNOWN_NAL
-} FEEDBACK_VCL_NAL_IN_AU;
-
-/**
-* @brief Type of layer being encoded
-*/
-typedef enum {
-  NON_VIDEO_CODING_LAYER = 0,
-  VIDEO_CODING_LAYER = 1
-} LAYER_TYPE;
-
-/**
-* @brief Spatial layer num
-*/
-typedef enum {
-  SPATIAL_LAYER_0 = 0,
-  SPATIAL_LAYER_1 = 1,
-  SPATIAL_LAYER_2 = 2,
-  SPATIAL_LAYER_3 = 3,
-  SPATIAL_LAYER_ALL = 4
-} LAYER_NUM;
-
-/**
-* @brief Enumerate the type of video bitstream which is provided to decoder
-*/
-typedef enum {
-  VIDEO_BITSTREAM_AVC               = 0,
-  VIDEO_BITSTREAM_SVC               = 1,
-  VIDEO_BITSTREAM_DEFAULT           = VIDEO_BITSTREAM_SVC
-} VIDEO_BITSTREAM_TYPE;
-
-/**
-* @brief Enumerate the type of key frame request
-*/
-typedef enum {
-  NO_RECOVERY_REQUSET  = 0,
-  LTR_RECOVERY_REQUEST = 1,
-  IDR_RECOVERY_REQUEST = 2,
-  NO_LTR_MARKING_FEEDBACK = 3,
-  LTR_MARKING_SUCCESS = 4,
-  LTR_MARKING_FAILED = 5
-} KEY_FRAME_REQUEST_TYPE;
-
-/**
-* @brief Structure for LTR recover request
-*/
-typedef struct {
-  unsigned int uiFeedbackType;       ///< IDR request or LTR recovery request
-  unsigned int uiIDRPicId;           ///< distinguish request from different IDR
-  int          iLastCorrectFrameNum;
-  int          iCurrentFrameNum;     ///< specify current decoder frame_num.
-  int          iLayerId;           //specify the layer for recovery request
-} SLTRRecoverRequest;
-
-/**
-* @brief Structure for LTR marking feedback
-*/
-typedef struct {
-  unsigned int  uiFeedbackType; ///< mark failed or successful
-  unsigned int  uiIDRPicId;     ///< distinguish request from different IDR
-  int           iLTRFrameNum;   ///< specify current decoder frame_num
-  int           iLayerId;        //specify the layer for LTR marking feedback
-} SLTRMarkingFeedback;
-
-/**
-* @brief Structure for LTR configuration
-*/
-typedef struct {
-  bool   bEnableLongTermReference; ///< 1: on, 0: off
-  int    iLTRRefNum;               ///< TODO: not supported to set it arbitrary yet
-} SLTRConfig;
-
-/**
-* @brief Enumerate the type of rate control mode
-*/
-typedef enum {
-  RC_QUALITY_MODE = 0,     ///< quality mode
-  RC_BITRATE_MODE = 1,     ///< bitrate mode
-  RC_BUFFERBASED_MODE = 2, ///< no bitrate control,only using buffer status,adjust the video quality
-  RC_TIMESTAMP_MODE = 3, //rate control based timestamp
-  RC_BITRATE_MODE_POST_SKIP = 4, ///< this is in-building RC MODE, WILL BE DELETED after algorithm tuning!
-  RC_OFF_MODE = -1,         ///< rate control off mode
-} RC_MODES;
-
-/**
-* @brief Enumerate the type of profile id
-*/
-typedef enum {
-  PRO_UNKNOWN   = 0,
-  PRO_BASELINE  = 66,
-  PRO_MAIN      = 77,
-  PRO_EXTENDED  = 88,
-  PRO_HIGH      = 100,
-  PRO_HIGH10    = 110,
-  PRO_HIGH422   = 122,
-  PRO_HIGH444   = 144,
-  PRO_CAVLC444  = 244,
-
-  PRO_SCALABLE_BASELINE = 83,
-  PRO_SCALABLE_HIGH     = 86
-} EProfileIdc;
-
-/**
-* @brief Enumerate the type of level id
-*/
-typedef enum {
-  LEVEL_UNKNOWN = 0,
-  LEVEL_1_0 = 10,
-  LEVEL_1_B = 9,
-  LEVEL_1_1 = 11,
-  LEVEL_1_2 = 12,
-  LEVEL_1_3 = 13,
-  LEVEL_2_0 = 20,
-  LEVEL_2_1 = 21,
-  LEVEL_2_2 = 22,
-  LEVEL_3_0 = 30,
-  LEVEL_3_1 = 31,
-  LEVEL_3_2 = 32,
-  LEVEL_4_0 = 40,
-  LEVEL_4_1 = 41,
-  LEVEL_4_2 = 42,
-  LEVEL_5_0 = 50,
-  LEVEL_5_1 = 51,
-  LEVEL_5_2 = 52
-} ELevelIdc;
-
-/**
-* @brief Enumerate the type of wels log
-*/
-enum {
-  WELS_LOG_QUIET       = 0x00,          ///< quiet mode
-  WELS_LOG_ERROR       = 1 << 0,        ///< error log iLevel
-  WELS_LOG_WARNING     = 1 << 1,        ///< Warning log iLevel
-  WELS_LOG_INFO        = 1 << 2,        ///< information log iLevel
-  WELS_LOG_DEBUG       = 1 << 3,        ///< debug log, critical algo log
-  WELS_LOG_DETAIL      = 1 << 4,        ///< per packet/frame log
-  WELS_LOG_RESV        = 1 << 5,        ///< resversed log iLevel
-  WELS_LOG_LEVEL_COUNT = 6,
-  WELS_LOG_DEFAULT     = WELS_LOG_WARNING   ///< default log iLevel in Wels codec
-};
-
-/**
- * @brief Enumerate the type of slice mode
- */
-typedef enum {
-  SM_SINGLE_SLICE         = 0, ///< | SliceNum==1
-  SM_FIXEDSLCNUM_SLICE    = 1, ///< | according to SliceNum        | enabled dynamic slicing for multi-thread
-  SM_RASTER_SLICE         = 2, ///< | according to SlicesAssign    | need input of MB numbers each slice. In addition, if other constraint in SSliceArgument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
-  SM_SIZELIMITED_SLICE           = 3, ///< | according to SliceSize       | slicing according to size, the slicing will be dynamic(have no idea about slice_nums until encoding current frame)
-  SM_RESERVED             = 4
-} SliceModeEnum;
-
-/**
- * @brief Structure for slice argument
- */
-typedef struct {
-  SliceModeEnum uiSliceMode;    ///< by default, uiSliceMode will be SM_SINGLE_SLICE
-  unsigned int
-  uiSliceNum;     ///< only used when uiSliceMode=1, when uiSliceNum=0 means auto design it with cpu core number
-  unsigned int
-  uiSliceMbNum[MAX_SLICES_NUM_TMP]; ///< only used when uiSliceMode=2; when =0 means setting one MB row a slice
-  unsigned int  uiSliceSizeConstraint; ///< now only used when uiSliceMode=4
-} SSliceArgument;
-
-/**
-* @brief Enumerate the type of video format
-*/
-typedef enum {
-  VF_COMPONENT,
-  VF_PAL,
-  VF_NTSC,
-  VF_SECAM,
-  VF_MAC,
-  VF_UNDEF,
-  VF_NUM_ENUM
-} EVideoFormatSPS;  // EVideoFormat is already defined/used elsewhere!
-
-/**
-* @brief Enumerate the type of color primaries
-*/
-typedef enum {
-  CP_RESERVED0,
-  CP_BT709,
-  CP_UNDEF,
-  CP_RESERVED3,
-  CP_BT470M,
-  CP_BT470BG,
-  CP_SMPTE170M,
-  CP_SMPTE240M,
-  CP_FILM,
-  CP_BT2020,
-  CP_NUM_ENUM
-} EColorPrimaries;
-
-/**
-* @brief Enumerate the type of transfer characteristics
-*/
-typedef enum {
-  TRC_RESERVED0,
-  TRC_BT709,
-  TRC_UNDEF,
-  TRC_RESERVED3,
-  TRC_BT470M,
-  TRC_BT470BG,
-  TRC_SMPTE170M,
-  TRC_SMPTE240M,
-  TRC_LINEAR,
-  TRC_LOG100,
-  TRC_LOG316,
-  TRC_IEC61966_2_4,
-  TRC_BT1361E,
-  TRC_IEC61966_2_1,
-  TRC_BT2020_10,
-  TRC_BT2020_12,
-  TRC_NUM_ENUM
-} ETransferCharacteristics;
-
-/**
-* @brief Enumerate the type of color matrix
-*/
-typedef enum {
-  CM_GBR,
-  CM_BT709,
-  CM_UNDEF,
-  CM_RESERVED3,
-  CM_FCC,
-  CM_BT470BG,
-  CM_SMPTE170M,
-  CM_SMPTE240M,
-  CM_YCGCO,
-  CM_BT2020NC,
-  CM_BT2020C,
-  CM_NUM_ENUM
-} EColorMatrix;
-
-
-/**
-* @brief Enumerate the type of sample aspect ratio
-*/
-typedef enum {
-  ASP_UNSPECIFIED = 0,
-  ASP_1x1 = 1,
-  ASP_12x11 = 2,
-  ASP_10x11 = 3,
-  ASP_16x11 = 4,
-  ASP_40x33 = 5,
-  ASP_24x11 = 6,
-  ASP_20x11 = 7,
-  ASP_32x11 = 8,
-  ASP_80x33 = 9,
-  ASP_18x11 = 10,
-  ASP_15x11 = 11,
-  ASP_64x33 = 12,
-  ASP_160x99 = 13,
-
-  ASP_EXT_SAR = 255
-} ESampleAspectRatio;
-
-
-/**
-* @brief  Structure for spatial layer configuration
-*/
-typedef struct {
-  int   iVideoWidth;           ///< width of picture in luminance samples of a layer
-  int   iVideoHeight;          ///< height of picture in luminance samples of a layer
-  float fFrameRate;            ///< frame rate specified for a layer
-  int   iSpatialBitrate;       ///< target bitrate for a spatial layer, in unit of bps
-  int   iMaxSpatialBitrate;    ///< maximum  bitrate for a spatial layer, in unit of bps
-  EProfileIdc  uiProfileIdc;   ///< value of profile IDC (PRO_UNKNOWN for auto-detection)
-  ELevelIdc    uiLevelIdc;     ///< value of profile IDC (0 for auto-detection)
-  int          iDLayerQp;      ///< value of level IDC (0 for auto-detection)
-
-  SSliceArgument sSliceArgument;
-
-  // Note: members bVideoSignalTypePresent through uiColorMatrix below are also defined in SWelsSPS in parameter_sets.h.
-  bool      bVideoSignalTypePresent;  // false => do not write any of the following information to the header
-  unsigned char
-  uiVideoFormat;        // EVideoFormatSPS; 3 bits in header; 0-5 => component, kpal, ntsc, secam, mac, undef
-  bool      bFullRange;         // false => analog video data range [16, 235]; true => full data range [0,255]
-  bool      bColorDescriptionPresent; // false => do not write any of the following three items to the header
-  unsigned char
-  uiColorPrimaries;     // EColorPrimaries; 8 bits in header; 0 - 9 => ???, bt709, undef, ???, bt470m, bt470bg,
-  //    smpte170m, smpte240m, film, bt2020
-  unsigned char
-  uiTransferCharacteristics;  // ETransferCharacteristics; 8 bits in header; 0 - 15 => ???, bt709, undef, ???, bt470m, bt470bg, smpte170m,
-  //   smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1, bt2020-10, bt2020-12
-  unsigned char
-  uiColorMatrix;        // EColorMatrix; 8 bits in header (corresponds to FFmpeg "colorspace"); 0 - 10 => GBR, bt709,
-  //   undef, ???, fcc, bt470bg, smpte170m, smpte240m, YCgCo, bt2020nc, bt2020c
-
-  bool bAspectRatioPresent; ///< aspect ratio present in VUI
-  ESampleAspectRatio eAspectRatio; ///< aspect ratio idc
-  unsigned short sAspectRatioExtWidth; ///< use if aspect ratio idc == 255
-  unsigned short sAspectRatioExtHeight; ///< use if aspect ratio idc == 255
-
-} SSpatialLayerConfig;
-
-/**
-* @brief Encoder usage type
-*/
-typedef enum {
-  CAMERA_VIDEO_REAL_TIME,      ///< camera video for real-time communication
-  SCREEN_CONTENT_REAL_TIME,    ///< screen content signal
-  CAMERA_VIDEO_NON_REAL_TIME,
-  SCREEN_CONTENT_NON_REAL_TIME,
-  INPUT_CONTENT_TYPE_ALL,
-} EUsageType;
-
-/**
-* @brief Enumulate the complexity mode
-*/
-typedef enum {
-  LOW_COMPLEXITY = 0,              ///< the lowest compleixty,the fastest speed,
-  MEDIUM_COMPLEXITY,          ///< medium complexity, medium speed,medium quality
-  HIGH_COMPLEXITY             ///< high complexity, lowest speed, high quality
-} ECOMPLEXITY_MODE;
-
-/**
- * @brief Enumulate for the stategy of SPS/PPS strategy
- */
-typedef enum {
-  CONSTANT_ID = 0,           ///< constant id in SPS/PPS
-  INCREASING_ID = 0x01,      ///< SPS/PPS id increases at each IDR
-  SPS_LISTING  = 0x02,       ///< using SPS in the existing list if possible
-  SPS_LISTING_AND_PPS_INCREASING  = 0x03,
-  SPS_PPS_LISTING  = 0x06,
-} EParameterSetStrategy;
-
-// TODO:  Refine the parameters definition.
-/**
-* @brief SVC Encoding Parameters
-*/
-typedef struct TagEncParamBase {
-  EUsageType
-  iUsageType;                 ///< application type; please refer to the definition of EUsageType
-
-  int       iPicWidth;        ///< width of picture in luminance samples (the maximum of all layers if multiple spatial layers presents)
-  int       iPicHeight;       ///< height of picture in luminance samples((the maximum of all layers if multiple spatial layers presents)
-  int       iTargetBitrate;   ///< target bitrate desired, in unit of bps
-  RC_MODES  iRCMode;          ///< rate control mode
-  float     fMaxFrameRate;    ///< maximal input frame rate
-
-} SEncParamBase, *PEncParamBase;
-
-/**
-* @brief SVC Encoding Parameters extention
-*/
-typedef struct TagEncParamExt {
-  EUsageType
-  iUsageType;                          ///< same as in TagEncParamBase
-
-  int       iPicWidth;                 ///< same as in TagEncParamBase
-  int       iPicHeight;                ///< same as in TagEncParamBase
-  int       iTargetBitrate;            ///< same as in TagEncParamBase
-  RC_MODES  iRCMode;                   ///< same as in TagEncParamBase
-  float     fMaxFrameRate;             ///< same as in TagEncParamBase
-
-  int       iTemporalLayerNum;         ///< temporal layer number, max temporal layer = 4
-  int       iSpatialLayerNum;          ///< spatial layer number,1<= iSpatialLayerNum <= MAX_SPATIAL_LAYER_NUM, MAX_SPATIAL_LAYER_NUM = 4
-  SSpatialLayerConfig sSpatialLayers[MAX_SPATIAL_LAYER_NUM];
-
-  ECOMPLEXITY_MODE iComplexityMode;
-  unsigned int      uiIntraPeriod;     ///< period of Intra frame
-  int               iNumRefFrame;      ///< number of reference frame used
-  EParameterSetStrategy
-  eSpsPpsIdStrategy;       ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
-  bool    bPrefixNalAddingCtrl;        ///< false:not use Prefix NAL; true: use Prefix NAL
-  bool    bEnableSSEI;                 ///< false:not use SSEI; true: use SSEI -- TODO: planning to remove the interface of SSEI
-  bool    bSimulcastAVC;               ///< (when encoding more than 1 spatial layer) false: use SVC syntax for higher layers; true: use Simulcast AVC
-  int     iPaddingFlag;                ///< 0:disable padding;1:padding
-  int     iEntropyCodingModeFlag;      ///< 0:CAVLC  1:CABAC.
-
-  /* rc control */
-  bool    bEnableFrameSkip;            ///< False: don't skip frame even if VBV buffer overflow.True: allow skipping frames to keep the bitrate within limits
-  int     iMaxBitrate;                 ///< the maximum bitrate, in unit of bps, set it to UNSPECIFIED_BIT_RATE if not needed
-  int     iMaxQp;                      ///< the maximum QP encoder supports
-  int     iMinQp;                      ///< the minmum QP encoder supports
-  unsigned int uiMaxNalSize;           ///< the maximum NAL size.  This value should be not 0 for dynamic slice mode
-
-  /*LTR settings*/
-  bool     bEnableLongTermReference;   ///< 1: on, 0: off
-  int      iLTRRefNum;                 ///< the number of LTR(long term reference),TODO: not supported to set it arbitrary yet
-  unsigned int      iLtrMarkPeriod;    ///< the LTR marked period that is used in feedback.
-  /* multi-thread settings*/
-  unsigned short
-  iMultipleThreadIdc;                  ///< 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; lager than 1: count number of threads;
-  bool  bUseLoadBalancing; ///< only used when uiSliceMode=1 or 3, will change slicing of a picture during the run-time of multi-thread encoding, so the result of each run may be different
-
-  /* Deblocking loop filter */
-  int       iLoopFilterDisableIdc;     ///< 0: on, 1: off, 2: on except for slice boundaries
-  int       iLoopFilterAlphaC0Offset;  ///< AlphaOffset: valid range [-6, 6], default 0
-  int       iLoopFilterBetaOffset;     ///< BetaOffset: valid range [-6, 6], default 0
-  /*pre-processing feature*/
-  bool    bEnableDenoise;              ///< denoise control
-  bool    bEnableBackgroundDetection;  ///< background detection control //VAA_BACKGROUND_DETECTION //BGD cmd
-  bool    bEnableAdaptiveQuant;        ///< adaptive quantization control
-  bool    bEnableFrameCroppingFlag;    ///< enable frame cropping flag: TRUE always in application
-  bool    bEnableSceneChangeDetect;
-
-  bool    bIsLosslessLink;            ///<  LTR advanced setting
-} SEncParamExt;
-
-/**
-* @brief Define a new struct to show the property of video bitstream.
-*/
-typedef struct {
-  unsigned int          size;          ///< size of the struct
-  VIDEO_BITSTREAM_TYPE  eVideoBsType;  ///< video stream type (AVC/SVC)
-} SVideoProperty;
-
-/**
-* @brief SVC Decoding Parameters, reserved here and potential applicable in the future
-*/
-typedef struct TagSVCDecodingParam {
-  char*     pFileNameRestructed;       ///< file name of reconstructed frame used for PSNR calculation based debug
-
-  unsigned int  uiCpuLoad;             ///< CPU load
-  unsigned char uiTargetDqLayer;       ///< setting target dq layer id
-
-  ERROR_CON_IDC eEcActiveIdc;          ///< whether active error concealment feature in decoder
-  bool bParseOnly;                     ///< decoder for parse only, no reconstruction. When it is true, SPS/PPS size should not exceed SPS_PPS_BS_SIZE (128). Otherwise, it will return error info
-
-  SVideoProperty   sVideoProperty;    ///< video stream property
-} SDecodingParam, *PDecodingParam;
-
-/**
-* @brief Bitstream inforamtion of a layer being encoded
-*/
-typedef struct {
-  unsigned char uiTemporalId;
-  unsigned char uiSpatialId;
-  unsigned char uiQualityId;
-  EVideoFrameType eFrameType;
-  unsigned char uiLayerType;
-
-  /**
-   * The sub sequence layers are ordered hierarchically based on their dependency on each other so that any picture in a layer shall not be
-   * predicted from any picture on any higher layer.
-  */
-  int   iSubSeqId;                ///< refer to D.2.11 Sub-sequence information SEI message semantics
-  int   iNalCount;              ///< count number of NAL coded already
-  int*  pNalLengthInByte;       ///< length of NAL size in byte from 0 to iNalCount-1
-  unsigned char*  pBsBuf;       ///< buffer of bitstream contained
-} SLayerBSInfo, *PLayerBSInfo;
-
-/**
-* @brief Frame bit stream info
-*/
-typedef struct {
-  int           iLayerNum;
-  SLayerBSInfo  sLayerInfo[MAX_LAYER_NUM_OF_FRAME];
-
-  EVideoFrameType eFrameType;
-  int iFrameSizeInBytes;
-  long long uiTimeStamp;
-} SFrameBSInfo, *PFrameBSInfo;
-
-/**
-*  @brief Structure for source picture
-*/
-typedef struct Source_Picture_s {
-  int       iColorFormat;          ///< color space type
-  int       iStride[4];            ///< stride for each plane pData
-  unsigned char*  pData[4];        ///< plane pData
-  int       iPicWidth;             ///< luma picture width in x coordinate
-  int       iPicHeight;            ///< luma picture height in y coordinate
-  long long uiTimeStamp;           ///< timestamp of the source picture, unit: millisecond
-} SSourcePicture;
-/**
-* @brief Structure for bit rate info
-*/
-typedef struct TagBitrateInfo {
-  LAYER_NUM iLayer;
-  int iBitrate;                    ///< the maximum bitrate
-} SBitrateInfo;
-
-/**
-* @brief Structure for dump layer info
-*/
-typedef struct TagDumpLayer {
-  int iLayer;
-  char* pFileName;
-} SDumpLayer;
-
-/**
-* @brief Structure for profile info in layer
-*
-*/
-typedef struct TagProfileInfo {
-  int iLayer;
-  EProfileIdc uiProfileIdc;        ///< the profile info
-} SProfileInfo;
-
-/**
-* @brief  Structure for level info in layer
-*
-*/
-typedef struct TagLevelInfo {
-  int iLayer;
-  ELevelIdc uiLevelIdc;            ///< the level info
-} SLevelInfo;
-/**
-* @brief Structure for dilivery status
-*
-*/
-typedef struct TagDeliveryStatus {
-  bool bDeliveryFlag;              ///< 0: the previous frame isn't delivered,1: the previous frame is delivered
-  int iDropFrameType;              ///< the frame type that is dropped; reserved
-  int iDropFrameSize;              ///< the frame size that is dropped; reserved
-} SDeliveryStatus;
-
-/**
-* @brief The capability of decoder, for SDP negotiation
-*/
-typedef struct TagDecoderCapability {
-  int iProfileIdc;     ///< profile_idc
-  int iProfileIop;     ///< profile-iop
-  int iLevelIdc;       ///< level_idc
-  int iMaxMbps;        ///< max-mbps
-  int iMaxFs;          ///< max-fs
-  int iMaxCpb;         ///< max-cpb
-  int iMaxDpb;         ///< max-dpb
-  int iMaxBr;          ///< max-br
-  bool bRedPicCap;     ///< redundant-pic-cap
-} SDecoderCapability;
-
-/**
-* @brief Structure for parse only output
-*/
-typedef struct TagParserBsInfo {
-  int iNalNum;                                 ///< total NAL number in current AU
-  int* pNalLenInByte;  ///< each nal length
-  unsigned char* pDstBuff;                     ///< outputted dst buffer for parsed bitstream
-  int iSpsWidthInPixel;                        ///< required SPS width info
-  int iSpsHeightInPixel;                       ///< required SPS height info
-  unsigned long long uiInBsTimeStamp;               ///< input BS timestamp
-  unsigned long long uiOutBsTimeStamp;             ///< output BS timestamp
-} SParserBsInfo, *PParserBsInfo;
-
-/**
-* @brief Structure for encoder statistics
-*/
-typedef struct TagVideoEncoderStatistics {
-  unsigned int uiWidth;                        ///< the width of encoded frame
-  unsigned int uiHeight;                       ///< the height of encoded frame
-  //following standard, will be 16x aligned, if there are multiple spatial, this is of the highest
-  float fAverageFrameSpeedInMs;                ///< average_Encoding_Time
-
-  // rate control related
-  float fAverageFrameRate;                     ///< the average frame rate in, calculate since encoding starts, supposed that the input timestamp is in unit of ms
-  float fLatestFrameRate;                      ///< the frame rate in, in the last second, supposed that the input timestamp is in unit of ms (? useful for checking BR, but is it easy to calculate?
-  unsigned int uiBitRate;                      ///< sendrate in Bits per second, calculated within the set time-window
-  unsigned int uiAverageFrameQP;                    ///< the average QP of last encoded frame
-
-  unsigned int uiInputFrameCount;              ///< number of frames
-  unsigned int uiSkippedFrameCount;            ///< number of frames
-
-  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
-  unsigned int uiIDRReqNum;                    ///< number of IDR requests
-  unsigned int uiIDRSentNum;                   ///< number of actual IDRs sent
-  unsigned int uiLTRSentNum;                   ///< number of LTR sent/marked
-
-  long long    iStatisticsTs;                  ///< Timestamp of updating the statistics
-
-  unsigned long iTotalEncodedBytes;
-  unsigned long iLastStatisticsBytes;
-  unsigned long iLastStatisticsFrameCount;
-} SEncoderStatistics;
-
-/**
-* @brief  Structure for decoder statistics
-*/
-typedef struct TagVideoDecoderStatistics {
-  unsigned int uiWidth;                        ///< the width of encode/decode frame
-  unsigned int uiHeight;                       ///< the height of encode/decode frame
-  float fAverageFrameSpeedInMs;                ///< average_Decoding_Time
-  float fActualAverageFrameSpeedInMs;          ///< actual average_Decoding_Time, including freezing pictures
-  unsigned int uiDecodedFrameCount;            ///< number of frames
-  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
-  unsigned int uiIDRCorrectNum;                ///< number of correct IDR received
-  //EC on related
-  unsigned int
-  uiAvgEcRatio;                                ///< when EC is on, the average ratio of total EC areas, can be an indicator of reconstruction quality
-  unsigned int
-  uiAvgEcPropRatio;                            ///< when EC is on, the rough average ratio of propogate EC areas, can be an indicator of reconstruction quality
-  unsigned int uiEcIDRNum;                     ///< number of actual unintegrity IDR or not received but eced
-  unsigned int uiEcFrameNum;                   ///<
-  unsigned int uiIDRLostNum;                   ///< number of whole lost IDR
-  unsigned int
-  uiFreezingIDRNum;               ///< number of freezing IDR with error (partly received), under resolution change
-  unsigned int uiFreezingNonIDRNum;            ///< number of freezing non-IDR with error
-  int iAvgLumaQp;                              ///< average luma QP. default: -1, no correct frame outputted
-  int iSpsReportErrorNum;                      ///< number of Sps Invalid report
-  int iSubSpsReportErrorNum;                   ///< number of SubSps Invalid report
-  int iPpsReportErrorNum;                      ///< number of Pps Invalid report
-  int iSpsNoExistNalNum;                       ///< number of Sps NoExist Nal
-  int iSubSpsNoExistNalNum;                    ///< number of SubSps NoExist Nal
-  int iPpsNoExistNalNum;                       ///< number of Pps NoExist Nal
-
-  unsigned int uiProfile;                ///< Profile idc in syntax
-  unsigned int uiLevel;                  ///< level idc according to Annex A-1
-
-  int iCurrentActiveSpsId;                     ///< current active SPS id
-  int iCurrentActivePpsId;                     ///< current active PPS id
-
-  unsigned int iStatisticsLogInterval;                  ///< frame interval of statistics log
-} SDecoderStatistics; // in building, coming soon
-
-/**
-* @brief Structure for sample aspect ratio (SAR) info in VUI
-*/
-typedef struct TagVuiSarInfo {
-  unsigned int uiSarWidth;                     ///< SAR width
-  unsigned int uiSarHeight;                    ///< SAR height
-  bool bOverscanAppropriateFlag;               ///< SAR overscan flag
-} SVuiSarInfo, *PVuiSarInfo;
-
-#endif//WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+
+#ifndef WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
+#define WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
+/**
+  * @file  codec_app_def.h
+  * @brief Data and /or structures introduced in Cisco OpenH264 application
+*/
+
+#include "codec_def.h"
+/* Constants */
+#define MAX_TEMPORAL_LAYER_NUM          4
+#define MAX_SPATIAL_LAYER_NUM           4
+#define MAX_QUALITY_LAYER_NUM           4
+
+#define MAX_LAYER_NUM_OF_FRAME          128
+#define MAX_NAL_UNITS_IN_LAYER          128     ///< predetermined here, adjust it later if need
+
+#define MAX_RTP_PAYLOAD_LEN             1000
+#define AVERAGE_RTP_PAYLOAD_LEN         800
+
+
+#define SAVED_NALUNIT_NUM_TMP           ( (MAX_SPATIAL_LAYER_NUM*MAX_QUALITY_LAYER_NUM) + 1 + MAX_SPATIAL_LAYER_NUM )  ///< SPS/PPS + SEI/SSEI + PADDING_NAL
+#define MAX_SLICES_NUM_TMP              ( ( MAX_NAL_UNITS_IN_LAYER - SAVED_NALUNIT_NUM_TMP ) / 3 )
+
+
+#define AUTO_REF_PIC_COUNT  -1          ///< encoder selects the number of reference frame automatically
+#define UNSPECIFIED_BIT_RATE 0          ///< to do: add detail comment
+
+/**
+ * @brief Struct of OpenH264 version
+ */
+///
+/// E.g. SDK version is 1.2.0.0, major version number is 1, minor version number is 2, and revision number is 0.
+typedef struct  _tagVersion {
+  unsigned int uMajor;                  ///< The major version number
+  unsigned int uMinor;                  ///< The minor version number
+  unsigned int uRevision;               ///< The revision number
+  unsigned int uReserved;               ///< The reserved number, it should be 0.
+} OpenH264Version;
+
+/**
+* @brief Decoding status
+*/
+typedef enum {
+  /**
+  * Errors derived from bitstream parsing
+  */
+  dsErrorFree           = 0x00,   ///< bit stream error-free
+  dsFramePending        = 0x01,   ///< need more throughput to generate a frame output,
+  dsRefLost             = 0x02,   ///< layer lost at reference frame with temporal id 0
+  dsBitstreamError      = 0x04,   ///< error bitstreams(maybe broken internal frame) the decoder cared
+  dsDepLayerLost        = 0x08,   ///< dependented layer is ever lost
+  dsNoParamSets         = 0x10,   ///< no parameter set NALs involved
+  dsDataErrorConcealed  = 0x20,   ///< current data error concealed specified
+
+  /**
+  * Errors derived from logic level
+  */
+  dsInvalidArgument     = 0x1000, ///< invalid argument specified
+  dsInitialOptExpected  = 0x2000, ///< initializing operation is expected
+  dsOutOfMemory         = 0x4000, ///< out of memory due to new request
+  /**
+  * ANY OTHERS?
+  */
+  dsDstBufNeedExpan     = 0x8000  ///< actual picture size exceeds size of dst pBuffer feed in decoder, so need expand its size
+
+} DECODING_STATE;
+
+/**
+* @brief Option types introduced in SVC encoder application
+*/
+typedef enum {
+  ENCODER_OPTION_DATAFORMAT = 0,
+  ENCODER_OPTION_IDR_INTERVAL,               ///< IDR period,0/-1 means no Intra period (only the first frame); lager than 0 means the desired IDR period, must be multiple of (2^temporal_layer)
+  ENCODER_OPTION_SVC_ENCODE_PARAM_BASE,      ///< structure of Base Param
+  ENCODER_OPTION_SVC_ENCODE_PARAM_EXT,       ///< structure of Extension Param
+  ENCODER_OPTION_FRAME_RATE,                 ///< maximal input frame rate, current supported range: MAX_FRAME_RATE = 30,MIN_FRAME_RATE = 1
+  ENCODER_OPTION_BITRATE,
+  ENCODER_OPTION_MAX_BITRATE,
+  ENCODER_OPTION_INTER_SPATIAL_PRED,
+  ENCODER_OPTION_RC_MODE,
+  ENCODER_OPTION_RC_FRAME_SKIP,
+  ENCODER_PADDING_PADDING,                   ///< 0:disable padding;1:padding
+
+  ENCODER_OPTION_PROFILE,                    ///< assgin the profile for each layer
+  ENCODER_OPTION_LEVEL,                      ///< assgin the level for each layer
+  ENCODER_OPTION_NUMBER_REF,                 ///< the number of refererence frame
+  ENCODER_OPTION_DELIVERY_STATUS,            ///< the delivery info which is a feedback from app level
+
+  ENCODER_LTR_RECOVERY_REQUEST,
+  ENCODER_LTR_MARKING_FEEDBACK,
+  ENCODER_LTR_MARKING_PERIOD,
+  ENCODER_OPTION_LTR,                        ///< 0:disable LTR;larger than 0 enable LTR; LTR number is fixed to be 2 in current encoder
+  ENCODER_OPTION_COMPLEXITY,
+
+  ENCODER_OPTION_ENABLE_SSEI,                ///< enable SSEI: true--enable ssei; false--disable ssei
+  ENCODER_OPTION_ENABLE_PREFIX_NAL_ADDING,   ///< enable prefix: true--enable prefix; false--disable prefix
+  ENCODER_OPTION_SPS_PPS_ID_STRATEGY, ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
+
+  ENCODER_OPTION_CURRENT_PATH,
+  ENCODER_OPTION_DUMP_FILE,                  ///< dump layer reconstruct frame to a specified file
+  ENCODER_OPTION_TRACE_LEVEL,                ///< trace info based on the trace level
+  ENCODER_OPTION_TRACE_CALLBACK,             ///< a void (*)(void* context, int level, const char* message) function which receives log messages
+  ENCODER_OPTION_TRACE_CALLBACK_CONTEXT,     ///< context info of trace callback
+
+  ENCODER_OPTION_GET_STATISTICS,             ///< read only
+  ENCODER_OPTION_STATISTICS_LOG_INTERVAL,    ///< log interval in millisecond
+
+  ENCODER_OPTION_IS_LOSSLESS_LINK,            ///< advanced algorithmetic settings
+
+  ENCODER_OPTION_BITS_VARY_PERCENTAGE        ///< bit vary percentage
+} ENCODER_OPTION;
+
+/**
+* @brief Option types introduced in decoder application
+*/
+typedef enum {
+  DECODER_OPTION_END_OF_STREAM = 1,     ///< end of stream flag
+  DECODER_OPTION_VCL_NAL,               ///< feedback whether or not have VCL NAL in current AU for application layer
+  DECODER_OPTION_TEMPORAL_ID,           ///< feedback temporal id for application layer
+  DECODER_OPTION_FRAME_NUM,             ///< feedback current decoded frame number
+  DECODER_OPTION_IDR_PIC_ID,            ///< feedback current frame belong to which IDR period
+  DECODER_OPTION_LTR_MARKING_FLAG,      ///< feedback wether current frame mark a LTR
+  DECODER_OPTION_LTR_MARKED_FRAME_NUM,  ///< feedback frame num marked by current Frame
+  DECODER_OPTION_ERROR_CON_IDC,         ///< indicate decoder error concealment method
+  DECODER_OPTION_TRACE_LEVEL,
+  DECODER_OPTION_TRACE_CALLBACK,        ///< a void (*)(void* context, int level, const char* message) function which receives log messages
+  DECODER_OPTION_TRACE_CALLBACK_CONTEXT,///< context info of trace callbac
+
+  DECODER_OPTION_GET_STATISTICS,        ///< feedback decoder statistics
+  DECODER_OPTION_GET_SAR_INFO,          ///< feedback decoder Sample Aspect Ratio info in Vui
+  DECODER_OPTION_PROFILE,               ///< get current AU profile info, only is used in GetOption
+  DECODER_OPTION_LEVEL,                 ///< get current AU level info,only is used in GetOption
+  DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
+  DECODER_OPTION_IS_REF_PIC,             ///< feedback current frame is ref pic or not
+  DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER  ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
+
+} DECODER_OPTION;
+
+/**
+* @brief Enumerate the type of error concealment methods
+*/
+typedef enum {
+  ERROR_CON_DISABLE = 0,
+  ERROR_CON_FRAME_COPY,
+  ERROR_CON_SLICE_COPY,
+  ERROR_CON_FRAME_COPY_CROSS_IDR,
+  ERROR_CON_SLICE_COPY_CROSS_IDR,
+  ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE,
+  ERROR_CON_SLICE_MV_COPY_CROSS_IDR,
+  ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE
+} ERROR_CON_IDC;
+/**
+* @brief Feedback that whether or not have VCL NAL in current AU
+*/
+typedef enum {
+  FEEDBACK_NON_VCL_NAL = 0,
+  FEEDBACK_VCL_NAL,
+  FEEDBACK_UNKNOWN_NAL
+} FEEDBACK_VCL_NAL_IN_AU;
+
+/**
+* @brief Type of layer being encoded
+*/
+typedef enum {
+  NON_VIDEO_CODING_LAYER = 0,
+  VIDEO_CODING_LAYER = 1
+} LAYER_TYPE;
+
+/**
+* @brief Spatial layer num
+*/
+typedef enum {
+  SPATIAL_LAYER_0 = 0,
+  SPATIAL_LAYER_1 = 1,
+  SPATIAL_LAYER_2 = 2,
+  SPATIAL_LAYER_3 = 3,
+  SPATIAL_LAYER_ALL = 4
+} LAYER_NUM;
+
+/**
+* @brief Enumerate the type of video bitstream which is provided to decoder
+*/
+typedef enum {
+  VIDEO_BITSTREAM_AVC               = 0,
+  VIDEO_BITSTREAM_SVC               = 1,
+  VIDEO_BITSTREAM_DEFAULT           = VIDEO_BITSTREAM_SVC
+} VIDEO_BITSTREAM_TYPE;
+
+/**
+* @brief Enumerate the type of key frame request
+*/
+typedef enum {
+  NO_RECOVERY_REQUSET  = 0,
+  LTR_RECOVERY_REQUEST = 1,
+  IDR_RECOVERY_REQUEST = 2,
+  NO_LTR_MARKING_FEEDBACK = 3,
+  LTR_MARKING_SUCCESS = 4,
+  LTR_MARKING_FAILED = 5
+} KEY_FRAME_REQUEST_TYPE;
+
+/**
+* @brief Structure for LTR recover request
+*/
+typedef struct {
+  unsigned int uiFeedbackType;       ///< IDR request or LTR recovery request
+  unsigned int uiIDRPicId;           ///< distinguish request from different IDR
+  int          iLastCorrectFrameNum;
+  int          iCurrentFrameNum;     ///< specify current decoder frame_num.
+  int          iLayerId;           //specify the layer for recovery request
+} SLTRRecoverRequest;
+
+/**
+* @brief Structure for LTR marking feedback
+*/
+typedef struct {
+  unsigned int  uiFeedbackType; ///< mark failed or successful
+  unsigned int  uiIDRPicId;     ///< distinguish request from different IDR
+  int           iLTRFrameNum;   ///< specify current decoder frame_num
+  int           iLayerId;        //specify the layer for LTR marking feedback
+} SLTRMarkingFeedback;
+
+/**
+* @brief Structure for LTR configuration
+*/
+typedef struct {
+  bool   bEnableLongTermReference; ///< 1: on, 0: off
+  int    iLTRRefNum;               ///< TODO: not supported to set it arbitrary yet
+} SLTRConfig;
+
+/**
+* @brief Enumerate the type of rate control mode
+*/
+typedef enum {
+  RC_QUALITY_MODE = 0,     ///< quality mode
+  RC_BITRATE_MODE = 1,     ///< bitrate mode
+  RC_BUFFERBASED_MODE = 2, ///< no bitrate control,only using buffer status,adjust the video quality
+  RC_TIMESTAMP_MODE = 3, //rate control based timestamp
+  RC_BITRATE_MODE_POST_SKIP = 4, ///< this is in-building RC MODE, WILL BE DELETED after algorithm tuning!
+  RC_OFF_MODE = -1,         ///< rate control off mode
+} RC_MODES;
+
+/**
+* @brief Enumerate the type of profile id
+*/
+typedef enum {
+  PRO_UNKNOWN   = 0,
+  PRO_BASELINE  = 66,
+  PRO_MAIN      = 77,
+  PRO_EXTENDED  = 88,
+  PRO_HIGH      = 100,
+  PRO_HIGH10    = 110,
+  PRO_HIGH422   = 122,
+  PRO_HIGH444   = 144,
+  PRO_CAVLC444  = 244,
+
+  PRO_SCALABLE_BASELINE = 83,
+  PRO_SCALABLE_HIGH     = 86
+} EProfileIdc;
+
+/**
+* @brief Enumerate the type of level id
+*/
+typedef enum {
+  LEVEL_UNKNOWN = 0,
+  LEVEL_1_0 = 10,
+  LEVEL_1_B = 9,
+  LEVEL_1_1 = 11,
+  LEVEL_1_2 = 12,
+  LEVEL_1_3 = 13,
+  LEVEL_2_0 = 20,
+  LEVEL_2_1 = 21,
+  LEVEL_2_2 = 22,
+  LEVEL_3_0 = 30,
+  LEVEL_3_1 = 31,
+  LEVEL_3_2 = 32,
+  LEVEL_4_0 = 40,
+  LEVEL_4_1 = 41,
+  LEVEL_4_2 = 42,
+  LEVEL_5_0 = 50,
+  LEVEL_5_1 = 51,
+  LEVEL_5_2 = 52
+} ELevelIdc;
+
+/**
+* @brief Enumerate the type of wels log
+*/
+enum {
+  WELS_LOG_QUIET       = 0x00,          ///< quiet mode
+  WELS_LOG_ERROR       = 1 << 0,        ///< error log iLevel
+  WELS_LOG_WARNING     = 1 << 1,        ///< Warning log iLevel
+  WELS_LOG_INFO        = 1 << 2,        ///< information log iLevel
+  WELS_LOG_DEBUG       = 1 << 3,        ///< debug log, critical algo log
+  WELS_LOG_DETAIL      = 1 << 4,        ///< per packet/frame log
+  WELS_LOG_RESV        = 1 << 5,        ///< resversed log iLevel
+  WELS_LOG_LEVEL_COUNT = 6,
+  WELS_LOG_DEFAULT     = WELS_LOG_WARNING   ///< default log iLevel in Wels codec
+};
+
+/**
+ * @brief Enumerate the type of slice mode
+ */
+typedef enum {
+  SM_SINGLE_SLICE         = 0, ///< | SliceNum==1
+  SM_FIXEDSLCNUM_SLICE    = 1, ///< | according to SliceNum        | enabled dynamic slicing for multi-thread
+  SM_RASTER_SLICE         = 2, ///< | according to SlicesAssign    | need input of MB numbers each slice. In addition, if other constraint in SSliceArgument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
+  SM_SIZELIMITED_SLICE           = 3, ///< | according to SliceSize       | slicing according to size, the slicing will be dynamic(have no idea about slice_nums until encoding current frame)
+  SM_RESERVED             = 4
+} SliceModeEnum;
+
+/**
+ * @brief Structure for slice argument
+ */
+typedef struct {
+  SliceModeEnum uiSliceMode;    ///< by default, uiSliceMode will be SM_SINGLE_SLICE
+  unsigned int
+  uiSliceNum;     ///< only used when uiSliceMode=1, when uiSliceNum=0 means auto design it with cpu core number
+  unsigned int
+  uiSliceMbNum[MAX_SLICES_NUM_TMP]; ///< only used when uiSliceMode=2; when =0 means setting one MB row a slice
+  unsigned int  uiSliceSizeConstraint; ///< now only used when uiSliceMode=4
+} SSliceArgument;
+
+/**
+* @brief Enumerate the type of video format
+*/
+typedef enum {
+  VF_COMPONENT,
+  VF_PAL,
+  VF_NTSC,
+  VF_SECAM,
+  VF_MAC,
+  VF_UNDEF,
+  VF_NUM_ENUM
+} EVideoFormatSPS;  // EVideoFormat is already defined/used elsewhere!
+
+/**
+* @brief Enumerate the type of color primaries
+*/
+typedef enum {
+  CP_RESERVED0,
+  CP_BT709,
+  CP_UNDEF,
+  CP_RESERVED3,
+  CP_BT470M,
+  CP_BT470BG,
+  CP_SMPTE170M,
+  CP_SMPTE240M,
+  CP_FILM,
+  CP_BT2020,
+  CP_NUM_ENUM
+} EColorPrimaries;
+
+/**
+* @brief Enumerate the type of transfer characteristics
+*/
+typedef enum {
+  TRC_RESERVED0,
+  TRC_BT709,
+  TRC_UNDEF,
+  TRC_RESERVED3,
+  TRC_BT470M,
+  TRC_BT470BG,
+  TRC_SMPTE170M,
+  TRC_SMPTE240M,
+  TRC_LINEAR,
+  TRC_LOG100,
+  TRC_LOG316,
+  TRC_IEC61966_2_4,
+  TRC_BT1361E,
+  TRC_IEC61966_2_1,
+  TRC_BT2020_10,
+  TRC_BT2020_12,
+  TRC_NUM_ENUM
+} ETransferCharacteristics;
+
+/**
+* @brief Enumerate the type of color matrix
+*/
+typedef enum {
+  CM_GBR,
+  CM_BT709,
+  CM_UNDEF,
+  CM_RESERVED3,
+  CM_FCC,
+  CM_BT470BG,
+  CM_SMPTE170M,
+  CM_SMPTE240M,
+  CM_YCGCO,
+  CM_BT2020NC,
+  CM_BT2020C,
+  CM_NUM_ENUM
+} EColorMatrix;
+
+
+/**
+* @brief Enumerate the type of sample aspect ratio
+*/
+typedef enum {
+  ASP_UNSPECIFIED = 0,
+  ASP_1x1 = 1,
+  ASP_12x11 = 2,
+  ASP_10x11 = 3,
+  ASP_16x11 = 4,
+  ASP_40x33 = 5,
+  ASP_24x11 = 6,
+  ASP_20x11 = 7,
+  ASP_32x11 = 8,
+  ASP_80x33 = 9,
+  ASP_18x11 = 10,
+  ASP_15x11 = 11,
+  ASP_64x33 = 12,
+  ASP_160x99 = 13,
+
+  ASP_EXT_SAR = 255
+} ESampleAspectRatio;
+
+
+/**
+* @brief  Structure for spatial layer configuration
+*/
+typedef struct {
+  int   iVideoWidth;           ///< width of picture in luminance samples of a layer
+  int   iVideoHeight;          ///< height of picture in luminance samples of a layer
+  float fFrameRate;            ///< frame rate specified for a layer
+  int   iSpatialBitrate;       ///< target bitrate for a spatial layer, in unit of bps
+  int   iMaxSpatialBitrate;    ///< maximum  bitrate for a spatial layer, in unit of bps
+  EProfileIdc  uiProfileIdc;   ///< value of profile IDC (PRO_UNKNOWN for auto-detection)
+  ELevelIdc    uiLevelIdc;     ///< value of profile IDC (0 for auto-detection)
+  int          iDLayerQp;      ///< value of level IDC (0 for auto-detection)
+
+  SSliceArgument sSliceArgument;
+
+  // Note: members bVideoSignalTypePresent through uiColorMatrix below are also defined in SWelsSPS in parameter_sets.h.
+  bool      bVideoSignalTypePresent;  // false => do not write any of the following information to the header
+  unsigned char
+  uiVideoFormat;        // EVideoFormatSPS; 3 bits in header; 0-5 => component, kpal, ntsc, secam, mac, undef
+  bool      bFullRange;         // false => analog video data range [16, 235]; true => full data range [0,255]
+  bool      bColorDescriptionPresent; // false => do not write any of the following three items to the header
+  unsigned char
+  uiColorPrimaries;     // EColorPrimaries; 8 bits in header; 0 - 9 => ???, bt709, undef, ???, bt470m, bt470bg,
+  //    smpte170m, smpte240m, film, bt2020
+  unsigned char
+  uiTransferCharacteristics;  // ETransferCharacteristics; 8 bits in header; 0 - 15 => ???, bt709, undef, ???, bt470m, bt470bg, smpte170m,
+  //   smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1, bt2020-10, bt2020-12
+  unsigned char
+  uiColorMatrix;        // EColorMatrix; 8 bits in header (corresponds to FFmpeg "colorspace"); 0 - 10 => GBR, bt709,
+  //   undef, ???, fcc, bt470bg, smpte170m, smpte240m, YCgCo, bt2020nc, bt2020c
+
+  bool bAspectRatioPresent; ///< aspect ratio present in VUI
+  ESampleAspectRatio eAspectRatio; ///< aspect ratio idc
+  unsigned short sAspectRatioExtWidth; ///< use if aspect ratio idc == 255
+  unsigned short sAspectRatioExtHeight; ///< use if aspect ratio idc == 255
+
+} SSpatialLayerConfig;
+
+/**
+* @brief Encoder usage type
+*/
+typedef enum {
+  CAMERA_VIDEO_REAL_TIME,      ///< camera video for real-time communication
+  SCREEN_CONTENT_REAL_TIME,    ///< screen content signal
+  CAMERA_VIDEO_NON_REAL_TIME,
+  SCREEN_CONTENT_NON_REAL_TIME,
+  INPUT_CONTENT_TYPE_ALL,
+} EUsageType;
+
+/**
+* @brief Enumulate the complexity mode
+*/
+typedef enum {
+  LOW_COMPLEXITY = 0,              ///< the lowest compleixty,the fastest speed,
+  MEDIUM_COMPLEXITY,          ///< medium complexity, medium speed,medium quality
+  HIGH_COMPLEXITY             ///< high complexity, lowest speed, high quality
+} ECOMPLEXITY_MODE;
+
+/**
+ * @brief Enumulate for the stategy of SPS/PPS strategy
+ */
+typedef enum {
+  CONSTANT_ID = 0,           ///< constant id in SPS/PPS
+  INCREASING_ID = 0x01,      ///< SPS/PPS id increases at each IDR
+  SPS_LISTING  = 0x02,       ///< using SPS in the existing list if possible
+  SPS_LISTING_AND_PPS_INCREASING  = 0x03,
+  SPS_PPS_LISTING  = 0x06,
+} EParameterSetStrategy;
+
+// TODO:  Refine the parameters definition.
+/**
+* @brief SVC Encoding Parameters
+*/
+typedef struct TagEncParamBase {
+  EUsageType
+  iUsageType;                 ///< application type; please refer to the definition of EUsageType
+
+  int       iPicWidth;        ///< width of picture in luminance samples (the maximum of all layers if multiple spatial layers presents)
+  int       iPicHeight;       ///< height of picture in luminance samples((the maximum of all layers if multiple spatial layers presents)
+  int       iTargetBitrate;   ///< target bitrate desired, in unit of bps
+  RC_MODES  iRCMode;          ///< rate control mode
+  float     fMaxFrameRate;    ///< maximal input frame rate
+
+} SEncParamBase, *PEncParamBase;
+
+/**
+* @brief SVC Encoding Parameters extention
+*/
+typedef struct TagEncParamExt {
+  EUsageType
+  iUsageType;                          ///< same as in TagEncParamBase
+
+  int       iPicWidth;                 ///< same as in TagEncParamBase
+  int       iPicHeight;                ///< same as in TagEncParamBase
+  int       iTargetBitrate;            ///< same as in TagEncParamBase
+  RC_MODES  iRCMode;                   ///< same as in TagEncParamBase
+  float     fMaxFrameRate;             ///< same as in TagEncParamBase
+
+  int       iTemporalLayerNum;         ///< temporal layer number, max temporal layer = 4
+  int       iSpatialLayerNum;          ///< spatial layer number,1<= iSpatialLayerNum <= MAX_SPATIAL_LAYER_NUM, MAX_SPATIAL_LAYER_NUM = 4
+  SSpatialLayerConfig sSpatialLayers[MAX_SPATIAL_LAYER_NUM];
+
+  ECOMPLEXITY_MODE iComplexityMode;
+  unsigned int      uiIntraPeriod;     ///< period of Intra frame
+  int               iNumRefFrame;      ///< number of reference frame used
+  EParameterSetStrategy
+  eSpsPpsIdStrategy;       ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
+  bool    bPrefixNalAddingCtrl;        ///< false:not use Prefix NAL; true: use Prefix NAL
+  bool    bEnableSSEI;                 ///< false:not use SSEI; true: use SSEI -- TODO: planning to remove the interface of SSEI
+  bool    bSimulcastAVC;               ///< (when encoding more than 1 spatial layer) false: use SVC syntax for higher layers; true: use Simulcast AVC
+  int     iPaddingFlag;                ///< 0:disable padding;1:padding
+  int     iEntropyCodingModeFlag;      ///< 0:CAVLC  1:CABAC.
+
+  /* rc control */
+  bool    bEnableFrameSkip;            ///< False: don't skip frame even if VBV buffer overflow.True: allow skipping frames to keep the bitrate within limits
+  int     iMaxBitrate;                 ///< the maximum bitrate, in unit of bps, set it to UNSPECIFIED_BIT_RATE if not needed
+  int     iMaxQp;                      ///< the maximum QP encoder supports
+  int     iMinQp;                      ///< the minmum QP encoder supports
+  unsigned int uiMaxNalSize;           ///< the maximum NAL size.  This value should be not 0 for dynamic slice mode
+
+  /*LTR settings*/
+  bool     bEnableLongTermReference;   ///< 1: on, 0: off
+  int      iLTRRefNum;                 ///< the number of LTR(long term reference),TODO: not supported to set it arbitrary yet
+  unsigned int      iLtrMarkPeriod;    ///< the LTR marked period that is used in feedback.
+  /* multi-thread settings*/
+  unsigned short
+  iMultipleThreadIdc;                  ///< 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; lager than 1: count number of threads;
+  bool  bUseLoadBalancing; ///< only used when uiSliceMode=1 or 3, will change slicing of a picture during the run-time of multi-thread encoding, so the result of each run may be different
+
+  /* Deblocking loop filter */
+  int       iLoopFilterDisableIdc;     ///< 0: on, 1: off, 2: on except for slice boundaries
+  int       iLoopFilterAlphaC0Offset;  ///< AlphaOffset: valid range [-6, 6], default 0
+  int       iLoopFilterBetaOffset;     ///< BetaOffset: valid range [-6, 6], default 0
+  /*pre-processing feature*/
+  bool    bEnableDenoise;              ///< denoise control
+  bool    bEnableBackgroundDetection;  ///< background detection control //VAA_BACKGROUND_DETECTION //BGD cmd
+  bool    bEnableAdaptiveQuant;        ///< adaptive quantization control
+  bool    bEnableFrameCroppingFlag;    ///< enable frame cropping flag: TRUE always in application
+  bool    bEnableSceneChangeDetect;
+
+  bool    bIsLosslessLink;            ///<  LTR advanced setting
+} SEncParamExt;
+
+/**
+* @brief Define a new struct to show the property of video bitstream.
+*/
+typedef struct {
+  unsigned int          size;          ///< size of the struct
+  VIDEO_BITSTREAM_TYPE  eVideoBsType;  ///< video stream type (AVC/SVC)
+} SVideoProperty;
+
+/**
+* @brief SVC Decoding Parameters, reserved here and potential applicable in the future
+*/
+typedef struct TagSVCDecodingParam {
+  char*     pFileNameRestructed;       ///< file name of reconstructed frame used for PSNR calculation based debug
+
+  unsigned int  uiCpuLoad;             ///< CPU load
+  unsigned char uiTargetDqLayer;       ///< setting target dq layer id
+
+  ERROR_CON_IDC eEcActiveIdc;          ///< whether active error concealment feature in decoder
+  bool bParseOnly;                     ///< decoder for parse only, no reconstruction. When it is true, SPS/PPS size should not exceed SPS_PPS_BS_SIZE (128). Otherwise, it will return error info
+
+  SVideoProperty   sVideoProperty;    ///< video stream property
+} SDecodingParam, *PDecodingParam;
+
+/**
+* @brief Bitstream inforamtion of a layer being encoded
+*/
+typedef struct {
+  unsigned char uiTemporalId;
+  unsigned char uiSpatialId;
+  unsigned char uiQualityId;
+  EVideoFrameType eFrameType;
+  unsigned char uiLayerType;
+
+  /**
+   * The sub sequence layers are ordered hierarchically based on their dependency on each other so that any picture in a layer shall not be
+   * predicted from any picture on any higher layer.
+  */
+  int   iSubSeqId;                ///< refer to D.2.11 Sub-sequence information SEI message semantics
+  int   iNalCount;              ///< count number of NAL coded already
+  int*  pNalLengthInByte;       ///< length of NAL size in byte from 0 to iNalCount-1
+  unsigned char*  pBsBuf;       ///< buffer of bitstream contained
+} SLayerBSInfo, *PLayerBSInfo;
+
+/**
+* @brief Frame bit stream info
+*/
+typedef struct {
+  int           iLayerNum;
+  SLayerBSInfo  sLayerInfo[MAX_LAYER_NUM_OF_FRAME];
+
+  EVideoFrameType eFrameType;
+  int iFrameSizeInBytes;
+  long long uiTimeStamp;
+} SFrameBSInfo, *PFrameBSInfo;
+
+/**
+*  @brief Structure for source picture
+*/
+typedef struct Source_Picture_s {
+  int       iColorFormat;          ///< color space type
+  int       iStride[4];            ///< stride for each plane pData
+  unsigned char*  pData[4];        ///< plane pData
+  int       iPicWidth;             ///< luma picture width in x coordinate
+  int       iPicHeight;            ///< luma picture height in y coordinate
+  long long uiTimeStamp;           ///< timestamp of the source picture, unit: millisecond
+} SSourcePicture;
+/**
+* @brief Structure for bit rate info
+*/
+typedef struct TagBitrateInfo {
+  LAYER_NUM iLayer;
+  int iBitrate;                    ///< the maximum bitrate
+} SBitrateInfo;
+
+/**
+* @brief Structure for dump layer info
+*/
+typedef struct TagDumpLayer {
+  int iLayer;
+  char* pFileName;
+} SDumpLayer;
+
+/**
+* @brief Structure for profile info in layer
+*
+*/
+typedef struct TagProfileInfo {
+  int iLayer;
+  EProfileIdc uiProfileIdc;        ///< the profile info
+} SProfileInfo;
+
+/**
+* @brief  Structure for level info in layer
+*
+*/
+typedef struct TagLevelInfo {
+  int iLayer;
+  ELevelIdc uiLevelIdc;            ///< the level info
+} SLevelInfo;
+/**
+* @brief Structure for dilivery status
+*
+*/
+typedef struct TagDeliveryStatus {
+  bool bDeliveryFlag;              ///< 0: the previous frame isn't delivered,1: the previous frame is delivered
+  int iDropFrameType;              ///< the frame type that is dropped; reserved
+  int iDropFrameSize;              ///< the frame size that is dropped; reserved
+} SDeliveryStatus;
+
+/**
+* @brief The capability of decoder, for SDP negotiation
+*/
+typedef struct TagDecoderCapability {
+  int iProfileIdc;     ///< profile_idc
+  int iProfileIop;     ///< profile-iop
+  int iLevelIdc;       ///< level_idc
+  int iMaxMbps;        ///< max-mbps
+  int iMaxFs;          ///< max-fs
+  int iMaxCpb;         ///< max-cpb
+  int iMaxDpb;         ///< max-dpb
+  int iMaxBr;          ///< max-br
+  bool bRedPicCap;     ///< redundant-pic-cap
+} SDecoderCapability;
+
+/**
+* @brief Structure for parse only output
+*/
+typedef struct TagParserBsInfo {
+  int iNalNum;                                 ///< total NAL number in current AU
+  int* pNalLenInByte;  ///< each nal length
+  unsigned char* pDstBuff;                     ///< outputted dst buffer for parsed bitstream
+  int iSpsWidthInPixel;                        ///< required SPS width info
+  int iSpsHeightInPixel;                       ///< required SPS height info
+  unsigned long long uiInBsTimeStamp;               ///< input BS timestamp
+  unsigned long long uiOutBsTimeStamp;             ///< output BS timestamp
+} SParserBsInfo, *PParserBsInfo;
+
+/**
+* @brief Structure for encoder statistics
+*/
+typedef struct TagVideoEncoderStatistics {
+  unsigned int uiWidth;                        ///< the width of encoded frame
+  unsigned int uiHeight;                       ///< the height of encoded frame
+  //following standard, will be 16x aligned, if there are multiple spatial, this is of the highest
+  float fAverageFrameSpeedInMs;                ///< average_Encoding_Time
+
+  // rate control related
+  float fAverageFrameRate;                     ///< the average frame rate in, calculate since encoding starts, supposed that the input timestamp is in unit of ms
+  float fLatestFrameRate;                      ///< the frame rate in, in the last second, supposed that the input timestamp is in unit of ms (? useful for checking BR, but is it easy to calculate?
+  unsigned int uiBitRate;                      ///< sendrate in Bits per second, calculated within the set time-window
+  unsigned int uiAverageFrameQP;                    ///< the average QP of last encoded frame
+
+  unsigned int uiInputFrameCount;              ///< number of frames
+  unsigned int uiSkippedFrameCount;            ///< number of frames
+
+  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
+  unsigned int uiIDRReqNum;                    ///< number of IDR requests
+  unsigned int uiIDRSentNum;                   ///< number of actual IDRs sent
+  unsigned int uiLTRSentNum;                   ///< number of LTR sent/marked
+
+  long long    iStatisticsTs;                  ///< Timestamp of updating the statistics
+
+  unsigned long iTotalEncodedBytes;
+  unsigned long iLastStatisticsBytes;
+  unsigned long iLastStatisticsFrameCount;
+} SEncoderStatistics;
+
+/**
+* @brief  Structure for decoder statistics
+*/
+typedef struct TagVideoDecoderStatistics {
+  unsigned int uiWidth;                        ///< the width of encode/decode frame
+  unsigned int uiHeight;                       ///< the height of encode/decode frame
+  float fAverageFrameSpeedInMs;                ///< average_Decoding_Time
+  float fActualAverageFrameSpeedInMs;          ///< actual average_Decoding_Time, including freezing pictures
+  unsigned int uiDecodedFrameCount;            ///< number of frames
+  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
+  unsigned int uiIDRCorrectNum;                ///< number of correct IDR received
+  //EC on related
+  unsigned int
+  uiAvgEcRatio;                                ///< when EC is on, the average ratio of total EC areas, can be an indicator of reconstruction quality
+  unsigned int
+  uiAvgEcPropRatio;                            ///< when EC is on, the rough average ratio of propogate EC areas, can be an indicator of reconstruction quality
+  unsigned int uiEcIDRNum;                     ///< number of actual unintegrity IDR or not received but eced
+  unsigned int uiEcFrameNum;                   ///<
+  unsigned int uiIDRLostNum;                   ///< number of whole lost IDR
+  unsigned int
+  uiFreezingIDRNum;               ///< number of freezing IDR with error (partly received), under resolution change
+  unsigned int uiFreezingNonIDRNum;            ///< number of freezing non-IDR with error
+  int iAvgLumaQp;                              ///< average luma QP. default: -1, no correct frame outputted
+  int iSpsReportErrorNum;                      ///< number of Sps Invalid report
+  int iSubSpsReportErrorNum;                   ///< number of SubSps Invalid report
+  int iPpsReportErrorNum;                      ///< number of Pps Invalid report
+  int iSpsNoExistNalNum;                       ///< number of Sps NoExist Nal
+  int iSubSpsNoExistNalNum;                    ///< number of SubSps NoExist Nal
+  int iPpsNoExistNalNum;                       ///< number of Pps NoExist Nal
+
+  unsigned int uiProfile;                ///< Profile idc in syntax
+  unsigned int uiLevel;                  ///< level idc according to Annex A-1
+
+  int iCurrentActiveSpsId;                     ///< current active SPS id
+  int iCurrentActivePpsId;                     ///< current active PPS id
+
+  unsigned int iStatisticsLogInterval;                  ///< frame interval of statistics log
+} SDecoderStatistics; // in building, coming soon
+
+/**
+* @brief Structure for sample aspect ratio (SAR) info in VUI
+*/
+typedef struct TagVuiSarInfo {
+  unsigned int uiSarWidth;                     ///< SAR width
+  unsigned int uiSarHeight;                    ///< SAR height
+  bool bOverscanAppropriateFlag;               ///< SAR overscan flag
+} SVuiSarInfo, *PVuiSarInfo;
+
+#endif//WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
--- a/codec/common/inc/wels_common_defs.h
+++ b/codec/common/inc/wels_common_defs.h
@@ -1,373 +1,373 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-//wels_common_defs.h
-#ifndef WELS_COMMON_DEFS_H__
-#define WELS_COMMON_DEFS_H__
-
-#include "typedefs.h"
-#include "macros.h"
-#include "codec_app_def.h"
-
-
-namespace WelsCommon {
-/*common use table*/
-
-#define  CTX_NA 0
-#define  WELS_CONTEXT_COUNT 460
-#define LEVEL_NUMBER 17
-typedef struct TagLevelLimits {
-  ELevelIdc uiLevelIdc;  // level idc
-  uint32_t uiMaxMBPS; // Max macroblock processing rate(MB/s)
-  uint32_t uiMaxFS;   // Max frame sizea(MBs)
-  uint32_t uiMaxDPBMbs;// Max decoded picture buffer size(MBs)
-  uint32_t uiMaxBR; // Max video bit rate
-  uint32_t uiMaxCPB; // Max CPB size
-  int16_t iMinVmv; // Vertical MV component range upper bound
-  int16_t iMaxVmv; // Vertical MV component range lower bound
-  uint16_t uiMinCR;  // Min compression ration
-  int16_t iMaxMvsPer2Mb; // Max number of motion vectors per two consecutive MBs
-} SLevelLimits;
-
-#define CpbBrNalFactor 1200  //baseline,main,and extended profiles.
-extern const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER];
-extern const uint32_t g_kuiLevelMaps[LEVEL_NUMBER];
-extern const uint8_t g_kuiMbCountScan4Idx[24];
-extern const uint8_t g_kuiCache30ScanIdx[16];
-extern const uint8_t g_kuiCache48CountScan4Idx[24];
-
-extern const uint8_t g_kuiMatrixV[6][8][8];
-
-extern const uint8_t g_kuiDequantScaling4x4Default[2][16];
-extern const uint8_t g_kuiDequantScaling8x8Default[2][64];
-extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
-extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16);
-extern const uint8_t g_kuiChromaQpTable[52];
-
-extern const uint8_t g_kuiCabacRangeLps[64][4];
-extern const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2];
-extern const uint8_t g_kuiStateTransTable[64][2];
-extern const uint32_t g_kuiGolombUELength[256];
-/*
- *  NAL Unit Type (5 Bits)
- */
-enum EWelsNalUnitType {
-  NAL_UNIT_UNSPEC_0             = 0,
-  NAL_UNIT_CODED_SLICE          = 1,
-  NAL_UNIT_CODED_SLICE_DPA      = 2,
-  NAL_UNIT_CODED_SLICE_DPB      = 3,
-  NAL_UNIT_CODED_SLICE_DPC      = 4,
-  NAL_UNIT_CODED_SLICE_IDR      = 5,
-  NAL_UNIT_SEI                  = 6,
-  NAL_UNIT_SPS                  = 7,
-  NAL_UNIT_PPS                  = 8,
-  NAL_UNIT_AU_DELIMITER         = 9,
-  NAL_UNIT_END_OF_SEQ           = 10,
-  NAL_UNIT_END_OF_STR           = 11,
-  NAL_UNIT_FILLER_DATA          = 12,
-  NAL_UNIT_SPS_EXT              = 13,
-  NAL_UNIT_PREFIX               = 14,
-  NAL_UNIT_SUBSET_SPS           = 15,
-  NAL_UNIT_DEPTH_PARAM          = 16, // NAL_UNIT_RESV_16
-  NAL_UNIT_RESV_17              = 17,
-  NAL_UNIT_RESV_18              = 18,
-  NAL_UNIT_AUX_CODED_SLICE      = 19,
-  NAL_UNIT_CODED_SLICE_EXT      = 20,
-  NAL_UNIT_MVC_SLICE_EXT        = 21, // NAL_UNIT_RESV_21
-  NAL_UNIT_RESV_22              = 22,
-  NAL_UNIT_RESV_23              = 23,
-  NAL_UNIT_UNSPEC_24            = 24,
-  NAL_UNIT_UNSPEC_25            = 25,
-  NAL_UNIT_UNSPEC_26            = 26,
-  NAL_UNIT_UNSPEC_27            = 27,
-  NAL_UNIT_UNSPEC_28            = 28,
-  NAL_UNIT_UNSPEC_29            = 29,
-  NAL_UNIT_UNSPEC_30            = 30,
-  NAL_UNIT_UNSPEC_31            = 31
-};
-
-/*
- *  NAL Reference IDC (2 Bits)
- */
-
-enum EWelsNalRefIdc {
-  NRI_PRI_LOWEST        = 0,
-  NRI_PRI_LOW           = 1,
-  NRI_PRI_HIGH          = 2,
-  NRI_PRI_HIGHEST       = 3
-};
-
-/*
- * VCL TYPE
- */
-
-enum EVclType {
-  NON_VCL   = 0,
-  VCL       = 1,
-  NOT_APP   = 2
-};
-
-/*
- *  vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC).
- */
-extern const EVclType g_keTypeMap[32][2];
-
-#define IS_VCL_NAL(t, ext_idx)                  (g_keTypeMap[t][ext_idx] == VCL)
-#define IS_PARAM_SETS_NALS(t)                   ( (t) == NAL_UNIT_SPS || (t) == NAL_UNIT_PPS || (t) == NAL_UNIT_SUBSET_SPS )
-#define IS_SPS_NAL(t)                           ( (t) == NAL_UNIT_SPS )
-#define IS_SUBSET_SPS_NAL(t)                    ( (t) == NAL_UNIT_SUBSET_SPS )
-#define IS_PPS_NAL(t)                           ( (t) == NAL_UNIT_PPS )
-#define IS_SEI_NAL(t)                           ( (t) == NAL_UNIT_SEI )
-#define IS_AU_DELIMITER_NAL(t)                  ( (t) == NAL_UNIT_AU_DELIMITER )
-#define IS_PREFIX_NAL(t)                        ( (t) == NAL_UNIT_PREFIX )
-#define IS_SUBSET_SPS_USED(t)                   ( (t) == NAL_UNIT_SUBSET_SPS || (t) == NAL_UNIT_CODED_SLICE_EXT )
-#define IS_VCL_NAL_AVC_BASE(t)                  ( (t) == NAL_UNIT_CODED_SLICE || (t) == NAL_UNIT_CODED_SLICE_IDR )
-#define IS_NEW_INTRODUCED_SVC_NAL(t)            ( (t) == NAL_UNIT_PREFIX || (t) == NAL_UNIT_CODED_SLICE_EXT )
-
-
-/* Base SSlice Types
- * Invalid in case of eSliceType exceeds 9,
- * Need trim when eSliceType > 4 as fixed SliceType(eSliceType-4),
- * meaning mapped version after eSliceType minus 4.
- */
-
-enum EWelsSliceType {
-  P_SLICE       = 0,
-  B_SLICE       = 1,
-  I_SLICE       = 2,
-  SP_SLICE      = 3,
-  SI_SLICE      = 4,
-  UNKNOWN_SLICE = 5
-};
-
-/* SSlice Types in scalable extension */
-enum ESliceTypeExt {
-  EP_SLICE = 0, // EP_SLICE: 0, 5
-  EB_SLICE = 1, // EB_SLICE: 1, 6
-  EI_SLICE = 2  // EI_SLICE: 2, 7
-};
-
-/* List Index */
-enum EListIndex {
-  LIST_0    = 0,
-  LIST_1    = 1,
-  LIST_A    = 2
-};
-
-
-
-/* Motion Vector components */
-enum EMvComp {
-  MV_X  = 0,
-  MV_Y  = 1,
-  MV_A  = 2
-};
-
-/* Chroma Components */
-
-enum EChromaComp {
-  CHROMA_CB     = 0,
-  CHROMA_CR     = 1,
-  CHROMA_A      = 2
-};
-
-
-
-/*
- *  Memory Management Control Operation (MMCO) code
- */
-enum EMmcoCode {
-  MMCO_END          = 0,
-  MMCO_SHORT2UNUSED = 1,
-  MMCO_LONG2UNUSED  = 2,
-  MMCO_SHORT2LONG   = 3,
-  MMCO_SET_MAX_LONG = 4,
-  MMCO_RESET        = 5,
-  MMCO_LONG         = 6
-};
-
-enum EVuiVideoFormat {
-  VUI_COMPONENT   = 0,
-  VUI_PAL         = 1,
-  VUI_NTSC        = 2,
-  VUI_SECAM       = 3,
-  VUI_MAC         = 4,
-  VUI_UNSPECIFIED = 5,
-  VUI_RESERVED1   = 6,
-  VUI_RESERVED2   = 7
-};
-
-/*
- *  Bit-stream auxiliary reading / writing
- */
-typedef struct TagBitStringAux {
-  uint8_t* pStartBuf;   // buffer to start position
-  uint8_t* pEndBuf;     // buffer + length
-  int32_t  iBits;       // count bits of overall bitstreaming input
-
-  intX_t   iIndex;      //only for cavlc usage
-  uint8_t* pCurBuf;     // current reading position
-  uint32_t uiCurBits;
-  int32_t  iLeftBits;   // count number of available bits left ([1, 8]),
-  // need pointer to next byte start position in case 0 bit left then 8 instead
-} SBitStringAux, *PBitStringAux;
-
-/* NAL Unix Header in AVC, refer to Page 56 in JVT X201wcm */
-typedef struct TagNalUnitHeader {
-  uint8_t             uiForbiddenZeroBit;
-  uint8_t             uiNalRefIdc;
-  EWelsNalUnitType    eNalUnitType;
-  uint8_t             uiReservedOneByte;                // only padding usage
-} SNalUnitHeader, *PNalUnitHeader;
-
-/* NAL Unit Header in scalable extension syntax, refer to Page 390 in JVT X201wcm */
-typedef struct TagNalUnitHeaderExt {
-  SNalUnitHeader      sNalUnitHeader;
-
-  // uint8_t   reserved_one_bit;
-  bool      bIdrFlag;
-  uint8_t   uiPriorityId;
-  int8_t    iNoInterLayerPredFlag;      // change as int8_t to support 3 values probably in encoder
-  uint8_t   uiDependencyId;
-
-  uint8_t   uiQualityId;
-  uint8_t   uiTemporalId;
-  bool      bUseRefBasePicFlag;
-  bool      bDiscardableFlag;
-
-  bool      bOutputFlag;
-  uint8_t   uiReservedThree2Bits;
-  // Derived variable(s)
-  uint8_t   uiLayerDqId;
-  bool      bNalExtFlag;
-} SNalUnitHeaderExt, *PNalUnitHeaderExt;
-
-/* AVC MB types*/
-#define MB_TYPE_INTRA4x4    0x00000001
-#define MB_TYPE_INTRA16x16  0x00000002
-#define MB_TYPE_INTRA8x8    0x00000004
-#define MB_TYPE_16x16       0x00000008
-#define MB_TYPE_16x8        0x00000010
-#define MB_TYPE_8x16        0x00000020
-#define MB_TYPE_8x8         0x00000040
-#define MB_TYPE_8x8_REF0    0x00000080
-#define MB_TYPE_SKIP        0x00000100
-#define MB_TYPE_INTRA_PCM   0x00000200
-#define MB_TYPE_INTRA_BL    0x00000400
-#define MB_TYPE_DIRECT      0x00000800
-#define MB_TYPE_P0L0        0x00001000
-#define MB_TYPE_P1L0        0x00002000
-#define MB_TYPE_P0L1        0x00004000
-#define MB_TYPE_P1L1        0x00008000
-#define MB_TYPE_L0        (MB_TYPE_P0L0 | MB_TYPE_P1L0)
-#define MB_TYPE_L1        (MB_TYPE_P0L1 | MB_TYPE_P1L1)
-
-#define SUB_MB_TYPE_8x8     0x00000001
-#define SUB_MB_TYPE_8x4     0x00000002
-#define SUB_MB_TYPE_4x8     0x00000004
-#define SUB_MB_TYPE_4x4     0x00000008
-
-#define MB_TYPE_INTRA     (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM)
-#define MB_TYPE_INTER     (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP | MB_TYPE_DIRECT)
-#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) )
-#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) )
-#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) )
-#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) )
-#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA )
-#define IS_INTER(type) ( (type)&MB_TYPE_INTER )
-#define IS_INTER_16x16(type) ( (type)&MB_TYPE_16x16 )
-#define IS_INTER_16x8(type) ( (type)&MB_TYPE_16x8 )
-#define IS_INTER_8x16(type) ( (type)&MB_TYPE_8x16 )
-#define IS_TYPE_L0(type) ( (type)&MB_TYPE_L0 )
-#define IS_TYPE_L1(type) ( (type)&MB_TYPE_L1 )
-#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0<<((part)+2*(list))))
-
-
-#define IS_SKIP(type) ( (type)&MB_TYPE_SKIP )
-#define IS_DIRECT(type) ( (type)&MB_TYPE_DIRECT )
-#define IS_SVC_INTER(type) IS_INTER(type)
-#define IS_I_BL(type) ( (type) == MB_TYPE_INTRA_BL )
-#define IS_SVC_INTRA(type) ( IS_I_BL(type) || IS_INTRA(type) )
-#define IS_Inter_8x8(type) ( (type)&MB_TYPE_8x8)
-#define IS_SUB_8x8(sub_type) ((sub_type)&SUB_MB_TYPE_8x8)
-#define IS_SUB_8x4(sub_type) ((sub_type)&SUB_MB_TYPE_8x4)
-#define IS_SUB_4x8(sub_type) ((sub_type)&SUB_MB_TYPE_4x8)
-#define IS_SUB_4x4(sub_type) ((sub_type)&SUB_MB_TYPE_4x4)
-
-#define REF_NOT_AVAIL   -2
-#define REF_NOT_IN_LIST -1  //intra
-
-/////////intra16x16  Luma
-#define I16_PRED_INVALID   -1
-#define I16_PRED_V       0
-#define I16_PRED_H       1
-#define I16_PRED_DC      2
-#define I16_PRED_P       3
-
-#define I16_PRED_DC_L    4
-#define I16_PRED_DC_T    5
-#define I16_PRED_DC_128  6
-#define I16_PRED_DC_A  7
-//////////intra4x4   Luma
-// Here, I8x8 also use these definitions
-#define I4_PRED_INVALID    0
-#define I4_PRED_V        0
-#define I4_PRED_H        1
-#define I4_PRED_DC       2
-#define I4_PRED_DDL      3 //diagonal_down_left
-#define I4_PRED_DDR      4 //diagonal_down_right
-#define I4_PRED_VR       5 //vertical_right
-#define I4_PRED_HD       6 //horizon_down
-#define I4_PRED_VL       7 //vertical_left
-#define I4_PRED_HU       8 //horizon_up
-
-#define I4_PRED_DC_L     9
-#define I4_PRED_DC_T     10
-#define I4_PRED_DC_128   11
-
-#define I4_PRED_DDL_TOP  12 //right-top replacing by padding rightmost pixel of top
-#define I4_PRED_VL_TOP   13 //right-top replacing by padding rightmost pixel of top
-#define I4_PRED_A   14
-
-//////////intra Chroma
-#define C_PRED_INVALID   -1
-#define C_PRED_DC        0
-#define C_PRED_H         1
-#define C_PRED_V         2
-#define C_PRED_P         3
-
-#define C_PRED_DC_L      4
-#define C_PRED_DC_T      5
-#define C_PRED_DC_128    6
-#define C_PRED_A    7
-}
-#endif//WELS_COMMON_DEFS_H__
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+//wels_common_defs.h
+#ifndef WELS_COMMON_DEFS_H__
+#define WELS_COMMON_DEFS_H__
+
+#include "typedefs.h"
+#include "macros.h"
+#include "codec_app_def.h"
+
+
+namespace WelsCommon {
+/*common use table*/
+
+#define  CTX_NA 0
+#define  WELS_CONTEXT_COUNT 460
+#define LEVEL_NUMBER 17
+typedef struct TagLevelLimits {
+  ELevelIdc uiLevelIdc;  // level idc
+  uint32_t uiMaxMBPS; // Max macroblock processing rate(MB/s)
+  uint32_t uiMaxFS;   // Max frame sizea(MBs)
+  uint32_t uiMaxDPBMbs;// Max decoded picture buffer size(MBs)
+  uint32_t uiMaxBR; // Max video bit rate
+  uint32_t uiMaxCPB; // Max CPB size
+  int16_t iMinVmv; // Vertical MV component range upper bound
+  int16_t iMaxVmv; // Vertical MV component range lower bound
+  uint16_t uiMinCR;  // Min compression ration
+  int16_t iMaxMvsPer2Mb; // Max number of motion vectors per two consecutive MBs
+} SLevelLimits;
+
+#define CpbBrNalFactor 1200  //baseline,main,and extended profiles.
+extern const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER];
+extern const uint32_t g_kuiLevelMaps[LEVEL_NUMBER];
+extern const uint8_t g_kuiMbCountScan4Idx[24];
+extern const uint8_t g_kuiCache30ScanIdx[16];
+extern const uint8_t g_kuiCache48CountScan4Idx[24];
+
+extern const uint8_t g_kuiMatrixV[6][8][8];
+
+extern const uint8_t g_kuiDequantScaling4x4Default[2][16];
+extern const uint8_t g_kuiDequantScaling8x8Default[2][64];
+extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
+extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16);
+extern const uint8_t g_kuiChromaQpTable[52];
+
+extern const uint8_t g_kuiCabacRangeLps[64][4];
+extern const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2];
+extern const uint8_t g_kuiStateTransTable[64][2];
+extern const uint32_t g_kuiGolombUELength[256];
+/*
+ *  NAL Unit Type (5 Bits)
+ */
+enum EWelsNalUnitType {
+  NAL_UNIT_UNSPEC_0             = 0,
+  NAL_UNIT_CODED_SLICE          = 1,
+  NAL_UNIT_CODED_SLICE_DPA      = 2,
+  NAL_UNIT_CODED_SLICE_DPB      = 3,
+  NAL_UNIT_CODED_SLICE_DPC      = 4,
+  NAL_UNIT_CODED_SLICE_IDR      = 5,
+  NAL_UNIT_SEI                  = 6,
+  NAL_UNIT_SPS                  = 7,
+  NAL_UNIT_PPS                  = 8,
+  NAL_UNIT_AU_DELIMITER         = 9,
+  NAL_UNIT_END_OF_SEQ           = 10,
+  NAL_UNIT_END_OF_STR           = 11,
+  NAL_UNIT_FILLER_DATA          = 12,
+  NAL_UNIT_SPS_EXT              = 13,
+  NAL_UNIT_PREFIX               = 14,
+  NAL_UNIT_SUBSET_SPS           = 15,
+  NAL_UNIT_DEPTH_PARAM          = 16, // NAL_UNIT_RESV_16
+  NAL_UNIT_RESV_17              = 17,
+  NAL_UNIT_RESV_18              = 18,
+  NAL_UNIT_AUX_CODED_SLICE      = 19,
+  NAL_UNIT_CODED_SLICE_EXT      = 20,
+  NAL_UNIT_MVC_SLICE_EXT        = 21, // NAL_UNIT_RESV_21
+  NAL_UNIT_RESV_22              = 22,
+  NAL_UNIT_RESV_23              = 23,
+  NAL_UNIT_UNSPEC_24            = 24,
+  NAL_UNIT_UNSPEC_25            = 25,
+  NAL_UNIT_UNSPEC_26            = 26,
+  NAL_UNIT_UNSPEC_27            = 27,
+  NAL_UNIT_UNSPEC_28            = 28,
+  NAL_UNIT_UNSPEC_29            = 29,
+  NAL_UNIT_UNSPEC_30            = 30,
+  NAL_UNIT_UNSPEC_31            = 31
+};
+
+/*
+ *  NAL Reference IDC (2 Bits)
+ */
+
+enum EWelsNalRefIdc {
+  NRI_PRI_LOWEST        = 0,
+  NRI_PRI_LOW           = 1,
+  NRI_PRI_HIGH          = 2,
+  NRI_PRI_HIGHEST       = 3
+};
+
+/*
+ * VCL TYPE
+ */
+
+enum EVclType {
+  NON_VCL   = 0,
+  VCL       = 1,
+  NOT_APP   = 2
+};
+
+/*
+ *  vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC).
+ */
+extern const EVclType g_keTypeMap[32][2];
+
+#define IS_VCL_NAL(t, ext_idx)                  (g_keTypeMap[t][ext_idx] == VCL)
+#define IS_PARAM_SETS_NALS(t)                   ( (t) == NAL_UNIT_SPS || (t) == NAL_UNIT_PPS || (t) == NAL_UNIT_SUBSET_SPS )
+#define IS_SPS_NAL(t)                           ( (t) == NAL_UNIT_SPS )
+#define IS_SUBSET_SPS_NAL(t)                    ( (t) == NAL_UNIT_SUBSET_SPS )
+#define IS_PPS_NAL(t)                           ( (t) == NAL_UNIT_PPS )
+#define IS_SEI_NAL(t)                           ( (t) == NAL_UNIT_SEI )
+#define IS_AU_DELIMITER_NAL(t)                  ( (t) == NAL_UNIT_AU_DELIMITER )
+#define IS_PREFIX_NAL(t)                        ( (t) == NAL_UNIT_PREFIX )
+#define IS_SUBSET_SPS_USED(t)                   ( (t) == NAL_UNIT_SUBSET_SPS || (t) == NAL_UNIT_CODED_SLICE_EXT )
+#define IS_VCL_NAL_AVC_BASE(t)                  ( (t) == NAL_UNIT_CODED_SLICE || (t) == NAL_UNIT_CODED_SLICE_IDR )
+#define IS_NEW_INTRODUCED_SVC_NAL(t)            ( (t) == NAL_UNIT_PREFIX || (t) == NAL_UNIT_CODED_SLICE_EXT )
+
+
+/* Base SSlice Types
+ * Invalid in case of eSliceType exceeds 9,
+ * Need trim when eSliceType > 4 as fixed SliceType(eSliceType-4),
+ * meaning mapped version after eSliceType minus 4.
+ */
+
+enum EWelsSliceType {
+  P_SLICE       = 0,
+  B_SLICE       = 1,
+  I_SLICE       = 2,
+  SP_SLICE      = 3,
+  SI_SLICE      = 4,
+  UNKNOWN_SLICE = 5
+};
+
+/* SSlice Types in scalable extension */
+enum ESliceTypeExt {
+  EP_SLICE = 0, // EP_SLICE: 0, 5
+  EB_SLICE = 1, // EB_SLICE: 1, 6
+  EI_SLICE = 2  // EI_SLICE: 2, 7
+};
+
+/* List Index */
+enum EListIndex {
+  LIST_0    = 0,
+  LIST_1    = 1,
+  LIST_A    = 2
+};
+
+
+
+/* Motion Vector components */
+enum EMvComp {
+  MV_X  = 0,
+  MV_Y  = 1,
+  MV_A  = 2
+};
+
+/* Chroma Components */
+
+enum EChromaComp {
+  CHROMA_CB     = 0,
+  CHROMA_CR     = 1,
+  CHROMA_A      = 2
+};
+
+
+
+/*
+ *  Memory Management Control Operation (MMCO) code
+ */
+enum EMmcoCode {
+  MMCO_END          = 0,
+  MMCO_SHORT2UNUSED = 1,
+  MMCO_LONG2UNUSED  = 2,
+  MMCO_SHORT2LONG   = 3,
+  MMCO_SET_MAX_LONG = 4,
+  MMCO_RESET        = 5,
+  MMCO_LONG         = 6
+};
+
+enum EVuiVideoFormat {
+  VUI_COMPONENT   = 0,
+  VUI_PAL         = 1,
+  VUI_NTSC        = 2,
+  VUI_SECAM       = 3,
+  VUI_MAC         = 4,
+  VUI_UNSPECIFIED = 5,
+  VUI_RESERVED1   = 6,
+  VUI_RESERVED2   = 7
+};
+
+/*
+ *  Bit-stream auxiliary reading / writing
+ */
+typedef struct TagBitStringAux {
+  uint8_t* pStartBuf;   // buffer to start position
+  uint8_t* pEndBuf;     // buffer + length
+  int32_t  iBits;       // count bits of overall bitstreaming input
+
+  intX_t   iIndex;      //only for cavlc usage
+  uint8_t* pCurBuf;     // current reading position
+  uint32_t uiCurBits;
+  int32_t  iLeftBits;   // count number of available bits left ([1, 8]),
+  // need pointer to next byte start position in case 0 bit left then 8 instead
+} SBitStringAux, *PBitStringAux;
+
+/* NAL Unix Header in AVC, refer to Page 56 in JVT X201wcm */
+typedef struct TagNalUnitHeader {
+  uint8_t             uiForbiddenZeroBit;
+  uint8_t             uiNalRefIdc;
+  EWelsNalUnitType    eNalUnitType;
+  uint8_t             uiReservedOneByte;                // only padding usage
+} SNalUnitHeader, *PNalUnitHeader;
+
+/* NAL Unit Header in scalable extension syntax, refer to Page 390 in JVT X201wcm */
+typedef struct TagNalUnitHeaderExt {
+  SNalUnitHeader      sNalUnitHeader;
+
+  // uint8_t   reserved_one_bit;
+  bool      bIdrFlag;
+  uint8_t   uiPriorityId;
+  int8_t    iNoInterLayerPredFlag;      // change as int8_t to support 3 values probably in encoder
+  uint8_t   uiDependencyId;
+
+  uint8_t   uiQualityId;
+  uint8_t   uiTemporalId;
+  bool      bUseRefBasePicFlag;
+  bool      bDiscardableFlag;
+
+  bool      bOutputFlag;
+  uint8_t   uiReservedThree2Bits;
+  // Derived variable(s)
+  uint8_t   uiLayerDqId;
+  bool      bNalExtFlag;
+} SNalUnitHeaderExt, *PNalUnitHeaderExt;
+
+/* AVC MB types*/
+#define MB_TYPE_INTRA4x4    0x00000001
+#define MB_TYPE_INTRA16x16  0x00000002
+#define MB_TYPE_INTRA8x8    0x00000004
+#define MB_TYPE_16x16       0x00000008
+#define MB_TYPE_16x8        0x00000010
+#define MB_TYPE_8x16        0x00000020
+#define MB_TYPE_8x8         0x00000040
+#define MB_TYPE_8x8_REF0    0x00000080
+#define MB_TYPE_SKIP        0x00000100
+#define MB_TYPE_INTRA_PCM   0x00000200
+#define MB_TYPE_INTRA_BL    0x00000400
+#define MB_TYPE_DIRECT      0x00000800
+#define MB_TYPE_P0L0        0x00001000
+#define MB_TYPE_P1L0        0x00002000
+#define MB_TYPE_P0L1        0x00004000
+#define MB_TYPE_P1L1        0x00008000
+#define MB_TYPE_L0        (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1        (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+
+#define SUB_MB_TYPE_8x8     0x00000001
+#define SUB_MB_TYPE_8x4     0x00000002
+#define SUB_MB_TYPE_4x8     0x00000004
+#define SUB_MB_TYPE_4x4     0x00000008
+
+#define MB_TYPE_INTRA     (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM)
+#define MB_TYPE_INTER     (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP | MB_TYPE_DIRECT)
+#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) )
+#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) )
+#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) )
+#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) )
+#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA )
+#define IS_INTER(type) ( (type)&MB_TYPE_INTER )
+#define IS_INTER_16x16(type) ( (type)&MB_TYPE_16x16 )
+#define IS_INTER_16x8(type) ( (type)&MB_TYPE_16x8 )
+#define IS_INTER_8x16(type) ( (type)&MB_TYPE_8x16 )
+#define IS_TYPE_L0(type) ( (type)&MB_TYPE_L0 )
+#define IS_TYPE_L1(type) ( (type)&MB_TYPE_L1 )
+#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0<<((part)+2*(list))))
+
+
+#define IS_SKIP(type) ( (type)&MB_TYPE_SKIP )
+#define IS_DIRECT(type) ( (type)&MB_TYPE_DIRECT )
+#define IS_SVC_INTER(type) IS_INTER(type)
+#define IS_I_BL(type) ( (type) == MB_TYPE_INTRA_BL )
+#define IS_SVC_INTRA(type) ( IS_I_BL(type) || IS_INTRA(type) )
+#define IS_Inter_8x8(type) ( (type)&MB_TYPE_8x8)
+#define IS_SUB_8x8(sub_type) ((sub_type)&SUB_MB_TYPE_8x8)
+#define IS_SUB_8x4(sub_type) ((sub_type)&SUB_MB_TYPE_8x4)
+#define IS_SUB_4x8(sub_type) ((sub_type)&SUB_MB_TYPE_4x8)
+#define IS_SUB_4x4(sub_type) ((sub_type)&SUB_MB_TYPE_4x4)
+
+#define REF_NOT_AVAIL   -2
+#define REF_NOT_IN_LIST -1  //intra
+
+/////////intra16x16  Luma
+#define I16_PRED_INVALID   -1
+#define I16_PRED_V       0
+#define I16_PRED_H       1
+#define I16_PRED_DC      2
+#define I16_PRED_P       3
+
+#define I16_PRED_DC_L    4
+#define I16_PRED_DC_T    5
+#define I16_PRED_DC_128  6
+#define I16_PRED_DC_A  7
+//////////intra4x4   Luma
+// Here, I8x8 also use these definitions
+#define I4_PRED_INVALID    0
+#define I4_PRED_V        0
+#define I4_PRED_H        1
+#define I4_PRED_DC       2
+#define I4_PRED_DDL      3 //diagonal_down_left
+#define I4_PRED_DDR      4 //diagonal_down_right
+#define I4_PRED_VR       5 //vertical_right
+#define I4_PRED_HD       6 //horizon_down
+#define I4_PRED_VL       7 //vertical_left
+#define I4_PRED_HU       8 //horizon_up
+
+#define I4_PRED_DC_L     9
+#define I4_PRED_DC_T     10
+#define I4_PRED_DC_128   11
+
+#define I4_PRED_DDL_TOP  12 //right-top replacing by padding rightmost pixel of top
+#define I4_PRED_VL_TOP   13 //right-top replacing by padding rightmost pixel of top
+#define I4_PRED_A   14
+
+//////////intra Chroma
+#define C_PRED_INVALID   -1
+#define C_PRED_DC        0
+#define C_PRED_H         1
+#define C_PRED_V         2
+#define C_PRED_P         3
+
+#define C_PRED_DC_L      4
+#define C_PRED_DC_T      5
+#define C_PRED_DC_128    6
+#define C_PRED_A    7
+}
+#endif//WELS_COMMON_DEFS_H__
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -1,2402 +1,2402 @@
-/*!
- * \copy
- *     Copyright (c)  2008-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- *  Abstract
- *      current slice decoding
- *
- *  History
- *      07/10/2008 Created
- *      08/09/2013 Modified
- *
- *****************************************************************************/
-
-
-#include "deblocking.h"
-
-#include "decode_slice.h"
-
-#include "parse_mb_syn_cavlc.h"
-#include "parse_mb_syn_cabac.h"
-#include "rec_mb.h"
-#include "mv_pred.h"
-
-#include "cpu_core.h"
-
-namespace WelsDec {
-
-static inline int32_t iAbs (int32_t x) {
-  static const int32_t INT_BITS = (sizeof (int) * CHAR_BIT) - 1;
-  int32_t y = x >> INT_BITS;
-  return (x ^ y) - y;
-}
-
-extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight);
-
-int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
-
-  int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount;
-
-  int32_t iCurLayerWidth  = pCurLayer->iMbWidth << 4;
-  int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4;
-
-  int32_t iNextMbXyIndex = 0;
-  PFmo pFmo = pCtx->pFmo;
-
-  int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice;
-  int32_t iCountNumMb = 0;
-  PDeblockingFilterMbFunc pDeblockMb;
-
-  if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
-    return ERR_INFO_WIDTH_MISMATCH;
-  }
-
-  iNextMbXyIndex   = pSliceHeader->iFirstMbInSlice;
-  pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
-  pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
-  pCurLayer->iMbXyIndex = iNextMbXyIndex;
-
-  if (0 == iNextMbXyIndex) {
-    pCurLayer->pDec->iSpsId = pCtx->pSps->iSpsId;
-    pCurLayer->pDec->iPpsId = pCtx->pPps->iPpsId;
-
-    pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
-  }
-
-  do {
-    if (iCountNumMb >= iTotalNumMb) {
-      break;
-    }
-
-    if (!pCtx->pParam->bParseOnly) { //for parse only, actual recon MB unnecessary
-      if (WelsTargetMbConstruction (pCtx)) {
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                 "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
-                 pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType);
-
-        return ERR_INFO_MB_RECON_FAIL;
-      }
-    }
-
-    ++iCountNumMb;
-    if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
-      pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
-      pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
-      ++pCtx->iTotalNumMbRec;
-    }
-
-    if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-               "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
-               pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
-
-      return ERR_INFO_MB_NUM_EXCEED_FAIL;
-    }
-
-    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
-      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
-    } else {
-      ++iNextMbXyIndex;
-    }
-    if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame
-      break;
-    }
-    pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
-    pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
-    pCurLayer->iMbXyIndex = iNextMbXyIndex;
-  } while (1);
-
-  pCtx->pDec->iWidthInPixel  = iCurLayerWidth;
-  pCtx->pDec->iHeightInPixel = iCurLayerHeight;
-
-  if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE) && (pCurSlice->eSliceType != B_SLICE))
-    return ERR_NONE; //no error but just ignore the type unsupported
-
-  if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on
-    return ERR_NONE;
-
-  pDeblockMb = WelsDeblockingMb;
-
-  if (1 == pSliceHeader->uiDisableDeblockingFilterIdc
-      || pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) {
-    return ERR_NONE;//NO_SUPPORTED_FILTER_IDX
-  } else {
-    WelsDeblockingFilterSlice (pCtx, pDeblockMb);
-  }
-  // any other filter_idc not supported here, 7/22/2010
-
-  return ERR_NONE;
-}
-
-int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
-                                       uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) {
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t i, iIndex, iOffset;
-
-  if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-    for (i = 0; i < 4; i++) {
-      iIndex = g_kuiMbCountScan4Idx[i << 2];
-      if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4]
-          || pCurLayer->pNzc[iMbXy][iIndex + 5]) {
-        iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
-        pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6));
-      }
-    }
-  } else {
-    // luma.
-    const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-    int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
-    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc +  0);
-    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc +  2);
-    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc +  8);
-    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10);
-  }
-
-  const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
-  // Cb.
-  pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16);
-  // Cr.
-  pCtx->pIdctFourResAddPredFunc (pDstV, iStrideC, pScaledTCoeff + 5 * 64, pNzc + 18);
-
-  return ERR_NONE;
-}
-int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  uint8_t*  pDstY, *pDstCb, *pDstCr;
-
-  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
-  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
-
-  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
-  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
-  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
-
-  if (pCtx->eSliceType == P_SLICE) {
-    GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
-  } else {
-    if (pCtx->pTempDec == NULL)
-      pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
-    uint8_t*   pTempDstYCbCr[3];
-    uint8_t*   pDstYCbCr[3];
-    pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
-    pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
-    pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
-    pDstYCbCr[0] = pDstY;
-    pDstYCbCr[1] = pDstCb;
-    pDstYCbCr[2] = pDstCr;
-    GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx);
-  }
-  WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
-
-  pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
-    pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
-  return ERR_NONE;
-}
-
-void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx) {
-  const int32_t kiQMul = pCtx->bUseScalingList ? pCtx->pDequant_coeff4x4[0][iQp][0] : (g_kuiDequantCoeff[iQp][0] << 4);
-#define STRIDE 16
-  int32_t i;
-  int32_t iTemp[16]; //FIXME check if this is a good idea
-  int16_t* pBlk = pBlock;
-  static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2,  5 * STRIDE};
-  static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE};
-
-  for (i = 0; i < 4; i++) {
-    const int32_t kiOffset = kiYOffset[i];
-    const int32_t kiX1 = kiOffset + kiXOffset[2];
-    const int32_t kiX2 = STRIDE + kiOffset;
-    const int32_t kiX3 = kiOffset + kiXOffset[3];
-    const int32_t kiI4 = i << 2; // 4*i
-    const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1];
-    const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1];
-    const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3];
-    const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3];
-
-    iTemp[kiI4]  = kiZ0 + kiZ3;
-    iTemp[1 + kiI4] = kiZ1 + kiZ2;
-    iTemp[2 + kiI4] = kiZ1 - kiZ2;
-    iTemp[3 + kiI4] = kiZ0 - kiZ3;
-  }
-
-  for (i = 0; i < 4; i++) {
-    const int32_t kiOffset = kiXOffset[i];
-    const int32_t kiI4 = 4 + i;
-    const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4];
-    const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4];
-    const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4];
-    const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4];
-
-    pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + (1 << 5)) >> 6; //FIXME think about merging this into decode_resdual
-    pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + (1 << 5)) >> 6;
-    pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + (1 << 5)) >> 6;
-    pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + (1 << 5)) >> 6;
-  }
-#undef STRIDE
-}
-
-int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput) {
-//seems IPCM should not enter this path
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-
-  WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer);
-
-  if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) {
-    RecI16x16Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
-  } else if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) {
-    RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
-  } else if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
-    RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  uint8_t*  pDstY, *pDstCb, *pDstCr;
-
-  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
-  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
-
-  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
-  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
-  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
-
-  if (pCtx->eSliceType == P_SLICE) {
-    GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
-  } else {
-    if (pCtx->pTempDec == NULL)
-      pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
-    uint8_t*   pTempDstYCbCr[3];
-    uint8_t*   pDstYCbCr[3];
-    pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
-    pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
-    pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
-    pDstYCbCr[0] = pDstY;
-    pDstYCbCr[1] = pDstCb;
-    pDstYCbCr[2] = pDstCr;
-    GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx);
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) {
-    //already decoded and reconstructed when parsing
-    return ERR_NONE;
-  } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) {
-    WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1);
-  } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB
-    if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
-      WelsMbInterPrediction (pCtx, pCurLayer);
-    } else {
-      WelsMbInterConstruction (pCtx, pCurLayer);
-    }
-  } else {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d",
-             pCurLayer->pMbType[pCurLayer->iMbXyIndex]);
-    return ERR_INFO_MB_RECON_FAIL;
-  }
-
-  return ERR_NONE;
-}
-
-void WelsChromaDcIdct (int16_t* pBlock) {
-  int32_t iStride = 32;
-  int32_t iXStride = 16;
-  int32_t iStride1 = iXStride + iStride;
-  int16_t* pBlk = pBlock;
-  int32_t iA, iB, iC, iD, iE;
-
-  iA = pBlk[0];
-  iB = pBlk[iXStride];
-  iC = pBlk[iStride];
-  iD = pBlk[iStride1];
-
-  iE = iA - iB;
-  iA += iB;
-  iB = iC - iD;
-  iC += iD;
-
-  pBlk[0] = (iA + iC);
-  pBlk[iXStride] = (iE + iB);
-  pBlk[iStride] = (iA - iC);
-  pBlk[iStride1] = (iE - iB);
-}
-
-void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail) {  //left
-    pSampleAvail[ 6] =
-      pSampleAvail[12] =
-        pSampleAvail[18] =
-          pSampleAvail[24] = 1;
-  }
-  if (pNeighAvail->iLeftTopAvail) { //top_left
-    pSampleAvail[0] = 1;
-  }
-  if (pNeighAvail->iTopAvail) { //top
-    pSampleAvail[1] =
-      pSampleAvail[2] =
-        pSampleAvail[3] =
-          pSampleAvail[4] = 1;
-  }
-  if (pNeighAvail->iRightTopAvail) { //top_right
-    pSampleAvail[5] = 1;
-  }
-}
-
-void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {   //left
-    pSampleAvail[ 6] =
-      pSampleAvail[12] =
-        pSampleAvail[18] =
-          pSampleAvail[24] = 1;
-  }
-  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {  //top_left
-    pSampleAvail[0] = 1;
-  }
-  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {  //top
-    pSampleAvail[1] =
-      pSampleAvail[2] =
-        pSampleAvail[3] =
-          pSampleAvail[4] = 1;
-  }
-  if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) {  //top_right
-    pSampleAvail[5] = 1;
-  }
-}
-void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail) {
-    *pSampleAvail = (1 << 2);
-  }
-  if (pNeighAvail->iLeftTopAvail) {
-    *pSampleAvail |= (1 << 1);
-  }
-  if (pNeighAvail->iTopAvail) {
-    *pSampleAvail |= 1;
-  }
-}
-
-void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
-    *pSampleAvail = (1 << 2);
-  }
-  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
-    *pSampleAvail |= (1 << 1);
-  }
-  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
-    *pSampleAvail |= 1;
-  }
-}
-
-int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
-                           PBitStringAux pBs,
-                           PDqLayer pCurDqLayer) {
-  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  int32_t iFinalMode, i;
-
-  uint8_t uiNeighAvail = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
-  uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
-  for (i = 0; i < 16; i++) {
-    int32_t iPrevIntra4x4PredMode = 0;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
-      iPrevIntra4x4PredMode = iCode;
-    } else {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      iPrevIntra4x4PredMode = uiCode;
-    }
-    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
-
-    int8_t iBestMode;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      if (iPrevIntra4x4PredMode == -1)
-        iBestMode = kiPredMode;
-      else
-        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
-    } else {
-      if (iPrevIntra4x4PredMode) {
-        iBestMode = kiPredMode;
-      } else {
-        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
-        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
-      }
-    }
-
-    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false);
-    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
-    }
-
-    pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
-
-    pIntraPredMode[g_kuiScan8[i]] = iBestMode;
-
-    iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
-  }
-  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
-  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
-  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
-  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
-
-  if (pCtx->pSps->uiChromaFormatIdc == 0)//no need parse chroma
-    return ERR_NONE;
-
-  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
-    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
-  } else {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  }
-
-  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
-      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
-                           PBitStringAux pBs,
-                           PDqLayer pCurDqLayer) {
-  // Similar with Intra_4x4, can put them together when needed
-  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  int32_t iFinalMode, i;
-
-  uint8_t uiNeighAvail = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
-  // Top-Right : Left : Top-Left : Top
-  uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
-
-  pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail;
-
-  for (i = 0; i < 4; i++) {
-    int32_t iPrevIntra4x4PredMode = 0;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
-      iPrevIntra4x4PredMode = iCode;
-    } else {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      iPrevIntra4x4PredMode = uiCode;
-    }
-    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2);
-
-    int8_t iBestMode;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      if (iPrevIntra4x4PredMode == -1)
-        iBestMode = kiPredMode;
-      else
-        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
-    } else {
-      if (iPrevIntra4x4PredMode) {
-        iBestMode = kiPredMode;
-      } else {
-        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
-        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
-      }
-    }
-
-    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true);
-
-    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
-    }
-
-    for (int j = 0; j < 4; j++) {
-      pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode;
-      pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode;
-      iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1;
-    }
-  }
-  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
-  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
-  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
-  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
-
-  if (pCtx->pSps->uiChromaFormatIdc == 0)
-    return ERR_NONE;
-
-  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
-    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
-  } else {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  }
-
-  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
-      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
-                             PDqLayer pCurDqLayer) {
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
-  uint32_t uiCode;
-  int32_t iCode;
-  pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail);
-
-  if (CheckIntra16x16PredMode (uiNeighAvail,
-                               &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE);
-  }
-  if (pCtx->pSps->uiChromaFormatIdc == 0)
-    return ERR_NONE;
-
-  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
-    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
-  } else {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  }
-  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
-      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-  SWelsNeighAvail sNeighAvail;
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t i;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
-
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
-  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
-  WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
-  if (uiMbType > 25) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-  } else if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17
-             && uiMbType <= 24))) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-  } else if (25 == uiMbType) {   //I_PCM
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
-    WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
-    pSlice->iLastDeltaQp = 0;
-    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-    if (uiEosFlag) {
-      RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-    }
-    return ERR_NONE;
-  } else if (0 == uiMbType) { //I4x4
-    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-    if (pCtx->pPps->bTransform8x8ModeFlag) {
-      // Transform 8x8 cabac will be added soon
-      WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
-    }
-    if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-      uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-    } else {
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-    }
-    //get uiCbp for I4x4
-    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
-    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0;
-    uiCbpLuma = uiCbp & 15;
-  } else { //I16x16;
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
-    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
-  }
-
-  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-  pCurLayer->pCbfDc[iMbXy] = 0;
-
-  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
-    }
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
-    int32_t iQpDelta, iId8x8, iId4x4;
-    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
-    if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
-    }
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
-                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
-      //step2: Luma AC
-      if (uiCbpLuma) {
-        for (i = 0; i < 16; i++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
-                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
-                            pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx));
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else { //pNonZeroCount = 0
-        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        // Transform 8x8 support for CABAC
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
-                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
-                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            int32_t iIdx = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
-                                g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
-                                pCurLayer->pLumaQp[iMbXy], pCtx));
-              iIdx++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-    int32_t iMbResProperty;
-    //chroma
-    //step1: DC
-    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
-      //Cb Cr
-      for (i = 0; i < 2; i++) {
-        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
-                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-      }
-    }
-
-    //step2: AC
-    if (2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        int32_t iIdx = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
-                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
-                            pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-          iIdx++;
-        }
-      }
-      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
-      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
-      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
-      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
-    } else {
-      ST16 (&pCurLayer->pNzc[iMbXy][16], 0);
-      ST16 (&pCurLayer->pNzc[iMbXy][20], 0);
-      ST16 (&pCurLayer->pNzc[iMbXy][18], 0);
-      ST16 (&pCurLayer->pNzc[iMbXy][22], 0);
-    }
-  } else {
-    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-  }
-
-  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-  if (uiEosFlag) {
-    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag));
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t iMbResProperty;
-  int32_t i;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
-
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-
-  WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
-  // uiMbType = 4 is not allowded.
-  if (uiMbType < 4) { //Inter mode
-    int16_t pMotionVector[LIST_A][30][MV_A];
-    int16_t pMvdCache[LIST_A][30][MV_A];
-    int8_t  pRefIndex[LIST_A][30];
-    pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
-    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
-    WELS_READ_VERIFY (ParseInterPMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-  } else { //Intra mode
-    uiMbType -= 5;
-    if (uiMbType > 25)
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-
-    if (25 == uiMbType) {   //I_PCM
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
-      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
-      pSlice->iLastDeltaQp = 0;
-      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-      if (uiEosFlag) {
-        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-      }
-      return ERR_NONE;
-    } else { //normal Intra mode
-      if (0 == uiMbType) { //Intra4x4
-        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-        if (pCtx->pPps->bTransform8x8ModeFlag) {
-          WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
-        }
-        if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-          uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-        } else {
-          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-        }
-      } else { //Intra16x16
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-        uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
-        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
-      }
-    }
-  }
-
-  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
-  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
-
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
-    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
-    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-
-    if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-      // Need modification when B picutre add in
-      bool bNeedParseTransformSize8x8Flag =
-        (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
-          || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
-         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
-         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
-         && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
-         && (pCtx->pPps->bTransform8x8ModeFlag));
-
-      if (bNeedParseTransformSize8x8Flag) {
-        WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
-                          pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
-      }
-    }
-
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
-
-    int32_t iQpDelta, iId8x8, iId4x4;
-
-    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
-    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
-                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
-      //step2: Luma AC
-      if (uiCbpLuma) {
-        for (i = 0; i < 16; i++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
-                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
-                            pCurLayer->pLumaQp[iMbXy], pCtx));
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-        // Transform 8x8 support for CABAC
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
-                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
-                              IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
-                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            int32_t iIdx = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
-                                g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
-                                pCurLayer->pLumaQp[iMbXy],
-                                pCtx));
-              iIdx++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-
-    //chroma
-    //step1: DC
-    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) {
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        else
-          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
-
-        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
-                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-      }
-    }
-    //step2: AC
-    if (2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) {
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        else
-          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
-        int32_t index = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
-                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
-                            iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-          index++;
-        }
-      }
-      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
-      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
-      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
-      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
-    } else {
-      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-    }
-  } else {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-  }
-
-  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-  if (uiEosFlag) {
-    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PBitStringAux pBsAux = pCurLayer->pBitStringAux;
-  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
-
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t iMbResProperty;
-  int32_t i;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
-
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-
-  WELS_READ_VERIFY (ParseMBTypeBSliceCabac (pCtx, pNeighAvail, uiMbType));
-
-  if (uiMbType < 23) { //Inter B mode
-    int16_t pMotionVector[LIST_A][30][MV_A];
-    int16_t pMvdCache[LIST_A][30][MV_A];
-    int8_t  pRefIndex[LIST_A][30];
-    int8_t  pDirect[30];
-    pCurLayer->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
-    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
-    WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurLayer);
-    WELS_READ_VERIFY (ParseInterBMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex,
-                      pDirect));
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-  } else { //Intra mode
-    uiMbType -= 23;
-    if (uiMbType > 25)
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-
-    if (25 == uiMbType) {   //I_PCM
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
-      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
-      pSlice->iLastDeltaQp = 0;
-      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-      if (uiEosFlag) {
-        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-      }
-      return ERR_NONE;
-    } else { //normal Intra mode
-      if (0 == uiMbType) { //Intra4x4
-        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-        if (pCtx->pPps->bTransform8x8ModeFlag) {
-          WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
-        }
-        if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-          uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-        } else {
-          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-        }
-      } else { //Intra16x16
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-        uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
-        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
-      }
-    }
-  }
-
-  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
-  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
-
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
-    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-
-    if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-      // Need modification when B picutre add in
-      bool bNeedParseTransformSize8x8Flag =
-        (((IS_INTER_16x16 (pCurLayer->pMbType[iMbXy]) || IS_DIRECT (pCurLayer->pMbType[iMbXy])
-           || IS_INTER_16x8 (pCurLayer->pMbType[iMbXy]) || IS_INTER_8x16 (pCurLayer->pMbType[iMbXy]))
-          || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
-         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
-         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
-         && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
-         && (pCtx->pPps->bTransform8x8ModeFlag));
-
-      if (bNeedParseTransformSize8x8Flag) {
-        WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
-                          pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
-      }
-    }
-
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
-
-    int32_t iQpDelta, iId8x8, iId4x4;
-
-    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
-    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
-                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
-      //step2: Luma AC
-      if (uiCbpLuma) {
-        for (i = 0; i < 16; i++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
-                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
-                            pCurLayer->pLumaQp[iMbXy], pCtx));
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-        // Transform 8x8 support for CABAC
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
-                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
-                              IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
-                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            int32_t iIdx = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
-                                g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
-                                pCurLayer->pLumaQp[iMbXy],
-                                pCtx));
-              iIdx++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-
-    //chroma
-    //step1: DC
-    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) {
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        else
-          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
-
-        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
-                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-      }
-    }
-    //step2: AC
-    if (2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) {
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        else
-          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
-        int32_t index = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
-                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
-                            iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-          index++;
-        }
-      }
-      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
-      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
-      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
-      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
-    } else {
-      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-    }
-  } else {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-  }
-
-  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-  if (uiEosFlag) {
-    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-  }
-
-  return ERR_NONE;
-}
-
-
-int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
-  uint32_t uiCode;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t i;
-  SWelsNeighAvail uiNeighAvail;
-  pCurLayer->pCbp[iMbXy] = 0;
-  pCurLayer->pCbfDc[iMbXy] = 0;
-  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
-  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
-
-  if (uiCode) {
-    int16_t pMv[2] = {0};
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
-    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
-    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
-    //predict mv
-    PredPSkipMvFromNeighbor (pCurLayer, pMv);
-    for (i = 0; i < 16; i++) {
-      ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv);
-      ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
-    }
-
-    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
-    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
-    //}
-
-    //reset rS
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-    //for neighboring CABAC usage
-    pSlice->iLastDeltaQp = 0;
-
-    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-
-    return ERR_NONE;
-  }
-
-  WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
-  return ERR_NONE;
-}
-
-
-int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
-  PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
-  PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
-  uint32_t uiCode;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t i;
-  SWelsNeighAvail uiNeighAvail;
-  pCurLayer->pCbp[iMbXy] = 0;
-  pCurLayer->pCbfDc[iMbXy] = 0;
-  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
-  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
-
-  memset (pCurLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16);
-
-  if (uiCode) {
-    int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } };
-    int8_t  ref[LIST_A] = { 0 };
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
-    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-    memset (pCurLayer->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
-    memset (pCurLayer->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
-    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && ppRefPicL0[0]->bIsComplete)
-                            || ! (ppRefPicL1[0] && ppRefPicL1[0]->bIsComplete);
-
-    if (pSliceHeader->iDirectSpatialMvPredFlag) {
-
-      //predict direct spatial mv
-      PredMvBDirectSpatial (pCtx, pMv, ref);
-    } else {
-      //temporal direct mode
-      ComputeColocated (pCtx);
-      PredBDirectTemporal (pCtx, pMv, ref);
-    }
-
-    //reset rS
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-    //for neighboring CABAC usage
-    pSlice->iLastDeltaQp = 0;
-
-    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-
-    return ERR_NONE;
-  }
-
-  WELS_READ_VERIFY (WelsDecodeMbCabacBSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
-  return ERR_NONE;
-}
-
-// Calculate deqaunt coeff scaling list value
-int32_t  WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
-  if (pCtx->pSps->bSeqScalingMatrixPresentFlag || pCtx->pPps->bPicScalingMatrixPresentFlag) {
-    pCtx->bUseScalingList = true;
-
-    if (!pCtx->bDequantCoeff4x4Init || (pCtx->iDequantCoeffPpsid != pCtx->pPps->iPpsId)) {
-      int i, q, x, y;
-      //Init dequant coeff value for different QP
-      for (i = 0; i < 6; i++) {
-        pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i];
-        pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i];
-        for (q = 0; q < 51; q++) {
-          for (x = 0; x < 16; x++) {
-            pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] *
-                                               g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList4x4[i][x] * g_kuiDequantCoeff[q][x & 0x07];
-          }
-          for (y = 0; y < 64; y++) {
-            pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] *
-                                               g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8];
-          }
-        }
-      }
-      pCtx->bDequantCoeff4x4Init = true;
-      pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId;
-    }
-  } else
-    pCtx->bUseScalingList = false;
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PFmo pFmo = pCtx->pFmo;
-  int32_t iRet;
-  int32_t iNextMbXyIndex, iSliceIdc;
-
-  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
-  PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
-  int32_t iMbX, iMbY;
-  const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
-  uint32_t uiEosFlag = 0;
-  PWelsDecMbFunc pDecMbFunc;
-
-  pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
-
-  if (pCtx->pPps->bEntropyCodingModeFlag) {
-    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
-        pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
-        pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-               "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
-      pCtx->iErrorCode |= dsBitstreamError;
-      return dsBitstreamError;
-    }
-    if (P_SLICE == pSliceHeader->eSliceType)
-      pDecMbFunc = WelsDecodeMbCabacPSlice;
-    else if (B_SLICE == pSliceHeader->eSliceType)
-      pDecMbFunc = WelsDecodeMbCabacBSlice;
-    else //I_SLICE. B_SLICE is being supported
-      pDecMbFunc = WelsDecodeMbCabacISlice;
-  } else {
-    if (P_SLICE == pSliceHeader->eSliceType) {
-      pDecMbFunc = WelsDecodeMbCavlcPSlice;
-    } else { //I_SLICE
-      pDecMbFunc = WelsDecodeMbCavlcISlice;
-    }
-  }
-
-  if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
-    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
-    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleConstrain1;
-    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleConstrain1;
-  } else {
-    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
-    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleNormal;
-    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleNormal;
-  }
-
-  pCtx->eSliceType = pSliceHeader->eSliceType;
-  if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
-    int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
-    int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
-    WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
-    //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
-    pSlice->iLastDeltaQp = 0;
-    WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
-  }
-  //try to calculate  the dequant_coeff
-  WelsCalcDeqCoeffScalingList (pCtx);
-
-  iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
-  iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
-  iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
-  pSlice->iMbSkipRun = -1;
-  iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId;
-
-  pCurLayer->iMbX =  iMbX;
-  pCurLayer->iMbY = iMbY;
-  pCurLayer->iMbXyIndex = iNextMbXyIndex;
-
-  do {
-    if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
-      break;
-    }
-
-    pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
-    pCtx->bMbRefConcealed = false;
-    iRet = pDecMbFunc (pCtx,  pNalCur, uiEosFlag);
-    pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
-    if (iRet != ERR_NONE) {
-      return iRet;
-    }
-
-    ++pSlice->iTotalMbInCurSlice;
-    if (uiEosFlag) { //end of slice
-      break;
-    }
-    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
-      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
-    } else {
-      ++iNextMbXyIndex;
-    }
-    iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
-    iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
-    pCurLayer->iMbX =  iMbX;
-    pCurLayer->iMbY = iMbY;
-    pCurLayer->iMbXyIndex = iNextMbXyIndex;
-  } while (1);
-
-  return ERR_NONE;
-}
-
-int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
-  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBs              = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-
-  SWelsNeighAvail sNeighAvail;
-  int32_t iMbResProperty;
-
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  const int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int32_t i;
-  int32_t iRet = ERR_NONE;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
-  uiMbType = uiCode;
-  if (uiMbType > 25)
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-  if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-
-  if (25 == uiMbType) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
-    int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
-    int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
-
-    int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
-    int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
-
-    uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
-    uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
-    uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
-
-    uint8_t* pTmpBsBuf;
-
-
-    int32_t i;
-    int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
-    int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
-
-    int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
-
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
-
-    //step 1: locating bit-stream pointer [must align into integer byte]
-    pBs->pCurBuf -= iIndex;
-
-    //step 2: copy pixel from bit-stream into fdec [reconstruction]
-    pTmpBsBuf = pBs->pCurBuf;
-    if (!pCtx->pParam->bParseOnly) {
-      for (i = 0; i < 16; i++) { //luma
-        memcpy (pDecY, pTmpBsBuf, iCopySizeY);
-        pDecY += iDecStrideL;
-        pTmpBsBuf += 16;
-      }
-      for (i = 0; i < 8; i++) { //cb
-        memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
-        pDecU += iDecStrideC;
-        pTmpBsBuf += 8;
-      }
-      for (i = 0; i < 8; i++) { //cr
-        memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
-        pDecV += iDecStrideC;
-        pTmpBsBuf += 8;
-      }
-    }
-
-    pBs->pCurBuf += 384;
-
-    //step 3: update QP and pNonZeroCount
-    pCurLayer->pLumaQp[iMbXy] = 0;
-    memset (pCurLayer->pChromaQp[iMbXy], 0, sizeof (pCurLayer->pChromaQp[iMbXy]));
-    memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy]));   //Rec. 9.2.1 for PCM, nzc=16
-    WELS_READ_VERIFY (InitReadBits (pBs, 0));
-    return ERR_NONE;
-  } else if (0 == uiMbType) { //reference to JM
-    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-    if (pCtx->pPps->bTransform8x8ModeFlag) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
-      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-      }
-    }
-    if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-    } else {
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-    }
-
-    //uiCbp
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
-    uiCbp = uiCode;
-    //G.9.1 Alternative parsing process for coded pBlock pattern
-    if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-    if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-
-    if (pCtx->pSps->uiChromaFormatIdc)
-      uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
-    else
-      uiCbp = g_kuiIntra4x4CbpTable400[uiCbp];
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    uiCbpC = uiCbp >> 4;
-    uiCbpL = uiCbp & 15;
-  } else { //I_PCM exclude, we can ignore it
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-    uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
-    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
-  }
-
-  ST32A4 (&pNzc[0], 0);
-  ST32A4 (&pNzc[4], 0);
-  ST32A4 (&pNzc[8], 0);
-  ST32A4 (&pNzc[12], 0);
-  ST32A4 (&pNzc[16], 0);
-  ST32A4 (&pNzc[20], 0);
-
-  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
-    int32_t iQpDelta, iId8x8, iId4x4;
-
-    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
-    iQpDelta = iCode;
-
-    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
-                                       51)];
-    }
-
-
-    BsStartCavlc (pBs);
-
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
-                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-        return iRet;//abnormal
-      }
-      //step2: Luma AC
-      if (uiCbpL) {
-        for (i = 0; i < 16; i++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
-                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
-                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
-                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                  g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;//abnormal
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-
-    //chroma
-    //step1: DC
-    if (1 == uiCbpC || 2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
-                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-          return iRet;//abnormal
-        }
-      }
-    }
-
-    //step2: AC
-    if (2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        int32_t iIndex = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
-                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-          iIndex++;
-        }
-      }
-      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
-      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
-      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
-      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
-    }
-    BsEndCavlc (pBs);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PBitStringAux pBs = pCurLayer->pBitStringAux;
-  PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
-  int32_t iBaseModeFlag;
-  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
-  uint32_t uiCode;
-  intX_t iUsedBits;
-  if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
-    iBaseModeFlag = uiCode;
-  } else {
-    iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag;
-  }
-  if (!iBaseModeFlag) {
-    iRet = WelsActualDecodeMbCavlcISlice (pCtx);
-  } else {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
-             iBaseModeFlag);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
-  }
-  if (iRet) { //occur error when parsing, MUST STOP decoding
-    return iRet;
-  }
-
-  // check whether there is left bits to read next time in case multiple slices
-  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
-  // sub 1, for stop bit
-  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
-    uiEosFlag = 1;
-  }
-  if (iUsedBits > (pBs->iBits -
-                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
-             (int64_t) iUsedBits, pBs->iBits);
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
-  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBs              = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-
-  SWelsNeighAvail sNeighAvail;
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  const int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int32_t i;
-  int32_t iRet = ERR_NONE;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-  int32_t iMbResProperty;
-
-  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
-  uiMbType = uiCode;
-  if (uiMbType < 5) { //inter MB type
-    int16_t iMotionVector[LIST_A][30][MV_A];
-    int8_t  iRefIndex[LIST_A][30];
-    pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
-    WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
-
-    if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
-      return iRet;//abnormal
-    }
-
-    if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
-      pCurLayer->pResidualPredFlag[iMbXy] =  uiCode;
-    } else {
-      pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
-    }
-
-    if (pCurLayer->pResidualPredFlag[iMbXy] == 0) {
-      pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-    } else {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
-    }
-  } else { //intra MB type
-    uiMbType -= 5;
-    if (uiMbType > 25)
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-
-    if (25 == uiMbType) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
-      int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
-      int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
-
-      int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
-      int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
-
-      uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
-      uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
-      uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
-
-      uint8_t* pTmpBsBuf;
-
-      int32_t i;
-      int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
-      int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
-
-      int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
-
-      pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
-
-      //step 1: locating bit-stream pointer [must align into integer byte]
-      pBs->pCurBuf -= iIndex;
-
-      //step 2: copy pixel from bit-stream into fdec [reconstruction]
-      pTmpBsBuf = pBs->pCurBuf;
-      if (!pCtx->pParam->bParseOnly) {
-        for (i = 0; i < 16; i++) { //luma
-          memcpy (pDecY, pTmpBsBuf, iCopySizeY);
-          pDecY += iDecStrideL;
-          pTmpBsBuf += 16;
-        }
-
-        for (i = 0; i < 8; i++) { //cb
-          memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
-          pDecU += iDecStrideC;
-          pTmpBsBuf += 8;
-        }
-        for (i = 0; i < 8; i++) { //cr
-          memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
-          pDecV += iDecStrideC;
-          pTmpBsBuf += 8;
-        }
-      }
-
-      pBs->pCurBuf += 384;
-
-      //step 3: update QP and pNonZeroCount
-      pCurLayer->pLumaQp[iMbXy] = 0;
-      pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
-      //Rec. 9.2.1 for PCM, nzc=16
-      ST32A4 (&pNzc[0], 0x10101010);
-      ST32A4 (&pNzc[4], 0x10101010);
-      ST32A4 (&pNzc[8], 0x10101010);
-      ST32A4 (&pNzc[12], 0x10101010);
-      ST32A4 (&pNzc[16], 0x10101010);
-      ST32A4 (&pNzc[20], 0x10101010);
-      WELS_READ_VERIFY (InitReadBits (pBs, 0));
-      return ERR_NONE;
-    } else {
-      if (0 == uiMbType) {
-        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-        if (pCtx->pPps->bTransform8x8ModeFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
-          pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
-          if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-            uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-          }
-        }
-        if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-        } else {
-          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-        }
-      } else { //I_PCM exclude, we can ignore it
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-        uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-        uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
-        WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-        if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) != ERR_NONE) {
-          return iRet;
-        }
-      }
-    }
-  }
-
-  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
-    uiCbp = uiCode;
-    {
-      if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
-        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-      if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
-        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-      if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
-
-        uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
-      } else //inter
-        uiCbp = pCtx->pSps->uiChromaFormatIdc ?  g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
-    }
-
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
-    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
-
-    // Need modification when B picutre add in
-    bool bNeedParseTransformSize8x8Flag =
-      (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
-        || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
-       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
-       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
-       && (uiCbpL > 0)
-       && (pCtx->pPps->bTransform8x8ModeFlag));
-
-    if (bNeedParseTransformSize8x8Flag) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
-      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
-    }
-  }
-
-  ST32A4 (&pNzc[0], 0);
-  ST32A4 (&pNzc[4], 0);
-  ST32A4 (&pNzc[8], 0);
-  ST32A4 (&pNzc[12], 0);
-  ST32A4 (&pNzc[16], 0);
-  ST32A4 (&pNzc[20], 0);
-  if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-    int32_t iQpDelta, iId8x8, iId4x4;
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
-    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
-    iQpDelta = iCode;
-
-    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
-                                       51)];
-    }
-
-    BsStartCavlc (pBs);
-
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
-                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-        return iRet;//abnormal
-      }
-      //step2: Luma AC
-      if (uiCbpL) {
-        for (i = 0; i < 16; i++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
-                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
-                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
-                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else { // Normal T4x4
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                  g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;//abnormal
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-
-
-    //chroma
-    //step1: DC
-    if (1 == uiCbpC || 2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        else
-          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
-
-        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
-                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-          return iRet;//abnormal
-        }
-      }
-    } else {
-    }
-    //step2: AC
-    if (2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        else
-          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
-
-        int32_t iIndex = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
-                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-          iIndex++;
-        }
-      }
-      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
-      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
-      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
-      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
-    }
-    BsEndCavlc (pBs);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBs              = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
-  intX_t iUsedBits;
-  const int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int32_t iBaseModeFlag, i;
-  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
-  uint32_t uiCode;
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  if (-1 == pSlice->iMbSkipRun) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
-    pSlice->iMbSkipRun = uiCode;
-    if (-1 == pSlice->iMbSkipRun) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
-    }
-  }
-  if (pSlice->iMbSkipRun--) {
-    int16_t iMv[2];
-
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
-    ST32A4 (&pNzc[0], 0);
-    ST32A4 (&pNzc[4], 0);
-    ST32A4 (&pNzc[8], 0);
-    ST32A4 (&pNzc[12], 0);
-    ST32A4 (&pNzc[16], 0);
-    ST32A4 (&pNzc[20], 0);
-
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
-    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
-    //predict iMv
-    PredPSkipMvFromNeighbor (pCurLayer, iMv);
-    for (i = 0; i < 16; i++) {
-      ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv);
-    }
-
-    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
-    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
-    //}
-
-    //reset rS
-    if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
-        (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
-      pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-      for (i = 0; i < 2; i++) {
-        pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                         pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-      }
-    }
-
-    pCurLayer->pCbp[iMbXy] = 0;
-  } else {
-    if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
-      iBaseModeFlag = uiCode;
-    } else {
-      iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
-    }
-    if (!iBaseModeFlag) {
-      iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
-    } else {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
-               iBaseModeFlag);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
-    }
-    if (iRet) { //occur error when parsing, MUST STOP decoding
-      return iRet;
-    }
-  }
-  // check whether there is left bits to read next time in case multiple slices
-  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
-  // sub 1, for stop bit
-  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
-    uiEosFlag = 1;
-  }
-  if (iUsedBits > (pBs->iBits -
-                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
-             (int64_t) iUsedBits, pBs->iBits);
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
-  }
-  return ERR_NONE;
-}
-
-void WelsBlockFuncInit (SBlockFunc*   pFunc,  int32_t iCpu) {
-  pFunc->pWelsSetNonZeroCountFunc   = WelsNonZeroCount_c;
-  pFunc->pWelsBlockZero16x16Func    = WelsBlockZero16x16_c;
-  pFunc->pWelsBlockZero8x8Func      = WelsBlockZero8x8_c;
-
-#ifdef HAVE_NEON
-  if (iCpu & WELS_CPU_NEON) {
-    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_neon;
-    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_neon;
-    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_neon;
-  }
-#endif
-
-#ifdef HAVE_NEON_AARCH64
-  if (iCpu & WELS_CPU_NEON) {
-    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_AArch64_neon;
-    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_AArch64_neon;
-    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_AArch64_neon;
-  }
-#endif
-
-#if defined(X86_ASM)
-  if (iCpu & WELS_CPU_SSE2) {
-    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_sse2;
-    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_sse2;
-    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_sse2;
-  }
-#endif
-
-}
-
-void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
-  int32_t i;
-  int16_t* pDst = pBlock;
-
-  for (i = 0; i < iH; i++) {
-    memset (pDst, uiVal, iW * sizeof (int16_t));
-    pDst += iStride;
-  }
-}
-void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
-  WelsBlockInit (pBlock, 16, 16, iStride, 0);
-}
-
-void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
-  WelsBlockInit (pBlock, 8, 8, iStride, 0);
-}
-bool ComputeColocated (PWelsDecoderContext pCtx) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
-  if (!pSliceHeader->iDirectSpatialMvPredFlag) {
-    uint32_t uiShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0];
-    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
-      for (uint32_t i = 0; i < uiShortRefCount; ++i) {
-        int32_t iTRb = WELS_CLIP3 (-128, 127, pSliceHeader->iPicOrderCntLsb - pCtx->sRefPic.pRefList[listIdx][i]->iFramePoc);
-        int32_t iTRp = WELS_CLIP3 (-128, 127,
-                                   pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc - pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc);
-        if (iTRp != 0) {
-          int32_t prescale = (16384 + iAbs (iTRp / 2)) / iTRp;
-          pCurSlice->iMvScale[listIdx][i] = WELS_CLIP3 (-1024, 1023, (iTRb * prescale + 32) >> 6);
-        } else {
-          pCurSlice->iMvScale[listIdx][i] = 0x03FFF;
-        }
-      }
-    }
-  }
-  //Implement the following
-  //get Mv_colocated_L1
-  //and do calculation
-  //iMvp[LIST_0] = Mv_colocated_L1 * (POC(cur) - POC(L0))/POC(L1) - POC(L0))
-  //iMvp[LIST_1] = Mv_colocated_L1 * (POC(cur) - POC(L1))/POC(L1) - POC(L0))
-  return true;
-}
-} // namespace WelsDec
+/*!
+ * \copy
+ *     Copyright (c)  2008-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *  Abstract
+ *      current slice decoding
+ *
+ *  History
+ *      07/10/2008 Created
+ *      08/09/2013 Modified
+ *
+ *****************************************************************************/
+
+
+#include "deblocking.h"
+
+#include "decode_slice.h"
+
+#include "parse_mb_syn_cavlc.h"
+#include "parse_mb_syn_cabac.h"
+#include "rec_mb.h"
+#include "mv_pred.h"
+
+#include "cpu_core.h"
+
+namespace WelsDec {
+
+static inline int32_t iAbs (int32_t x) {
+  static const int32_t INT_BITS = (sizeof (int) * CHAR_BIT) - 1;
+  int32_t y = x >> INT_BITS;
+  return (x ^ y) - y;
+}
+
+extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight);
+
+int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount;
+
+  int32_t iCurLayerWidth  = pCurLayer->iMbWidth << 4;
+  int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4;
+
+  int32_t iNextMbXyIndex = 0;
+  PFmo pFmo = pCtx->pFmo;
+
+  int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice;
+  int32_t iCountNumMb = 0;
+  PDeblockingFilterMbFunc pDeblockMb;
+
+  if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
+    return ERR_INFO_WIDTH_MISMATCH;
+  }
+
+  iNextMbXyIndex   = pSliceHeader->iFirstMbInSlice;
+  pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
+  pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
+  pCurLayer->iMbXyIndex = iNextMbXyIndex;
+
+  if (0 == iNextMbXyIndex) {
+    pCurLayer->pDec->iSpsId = pCtx->pSps->iSpsId;
+    pCurLayer->pDec->iPpsId = pCtx->pPps->iPpsId;
+
+    pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
+  }
+
+  do {
+    if (iCountNumMb >= iTotalNumMb) {
+      break;
+    }
+
+    if (!pCtx->pParam->bParseOnly) { //for parse only, actual recon MB unnecessary
+      if (WelsTargetMbConstruction (pCtx)) {
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                 "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
+                 pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType);
+
+        return ERR_INFO_MB_RECON_FAIL;
+      }
+    }
+
+    ++iCountNumMb;
+    if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
+      pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
+      pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
+      ++pCtx->iTotalNumMbRec;
+    }
+
+    if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+               "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
+               pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
+
+      return ERR_INFO_MB_NUM_EXCEED_FAIL;
+    }
+
+    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
+      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+    } else {
+      ++iNextMbXyIndex;
+    }
+    if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame
+      break;
+    }
+    pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
+    pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
+    pCurLayer->iMbXyIndex = iNextMbXyIndex;
+  } while (1);
+
+  pCtx->pDec->iWidthInPixel  = iCurLayerWidth;
+  pCtx->pDec->iHeightInPixel = iCurLayerHeight;
+
+  if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE) && (pCurSlice->eSliceType != B_SLICE))
+    return ERR_NONE; //no error but just ignore the type unsupported
+
+  if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on
+    return ERR_NONE;
+
+  pDeblockMb = WelsDeblockingMb;
+
+  if (1 == pSliceHeader->uiDisableDeblockingFilterIdc
+      || pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) {
+    return ERR_NONE;//NO_SUPPORTED_FILTER_IDX
+  } else {
+    WelsDeblockingFilterSlice (pCtx, pDeblockMb);
+  }
+  // any other filter_idc not supported here, 7/22/2010
+
+  return ERR_NONE;
+}
+
+int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
+                                       uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) {
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i, iIndex, iOffset;
+
+  if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+    for (i = 0; i < 4; i++) {
+      iIndex = g_kuiMbCountScan4Idx[i << 2];
+      if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4]
+          || pCurLayer->pNzc[iMbXy][iIndex + 5]) {
+        iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
+        pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6));
+      }
+    }
+  } else {
+    // luma.
+    const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+    int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
+    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc +  0);
+    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc +  2);
+    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc +  8);
+    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10);
+  }
+
+  const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
+  // Cb.
+  pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16);
+  // Cr.
+  pCtx->pIdctFourResAddPredFunc (pDstV, iStrideC, pScaledTCoeff + 5 * 64, pNzc + 18);
+
+  return ERR_NONE;
+}
+int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  uint8_t*  pDstY, *pDstCb, *pDstCr;
+
+  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
+  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
+
+  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+
+  if (pCtx->eSliceType == P_SLICE) {
+    GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+  } else {
+    if (pCtx->pTempDec == NULL)
+      pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
+    uint8_t*   pTempDstYCbCr[3];
+    uint8_t*   pDstYCbCr[3];
+    pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+    pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+    pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+    pDstYCbCr[0] = pDstY;
+    pDstYCbCr[1] = pDstCb;
+    pDstYCbCr[2] = pDstCr;
+    GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx);
+  }
+  WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
+
+  pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
+    pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
+  return ERR_NONE;
+}
+
+void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx) {
+  const int32_t kiQMul = pCtx->bUseScalingList ? pCtx->pDequant_coeff4x4[0][iQp][0] : (g_kuiDequantCoeff[iQp][0] << 4);
+#define STRIDE 16
+  int32_t i;
+  int32_t iTemp[16]; //FIXME check if this is a good idea
+  int16_t* pBlk = pBlock;
+  static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2,  5 * STRIDE};
+  static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE};
+
+  for (i = 0; i < 4; i++) {
+    const int32_t kiOffset = kiYOffset[i];
+    const int32_t kiX1 = kiOffset + kiXOffset[2];
+    const int32_t kiX2 = STRIDE + kiOffset;
+    const int32_t kiX3 = kiOffset + kiXOffset[3];
+    const int32_t kiI4 = i << 2; // 4*i
+    const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1];
+    const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1];
+    const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3];
+    const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3];
+
+    iTemp[kiI4]  = kiZ0 + kiZ3;
+    iTemp[1 + kiI4] = kiZ1 + kiZ2;
+    iTemp[2 + kiI4] = kiZ1 - kiZ2;
+    iTemp[3 + kiI4] = kiZ0 - kiZ3;
+  }
+
+  for (i = 0; i < 4; i++) {
+    const int32_t kiOffset = kiXOffset[i];
+    const int32_t kiI4 = 4 + i;
+    const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4];
+    const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4];
+    const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4];
+    const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4];
+
+    pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + (1 << 5)) >> 6; //FIXME think about merging this into decode_resdual
+    pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + (1 << 5)) >> 6;
+    pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + (1 << 5)) >> 6;
+    pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + (1 << 5)) >> 6;
+  }
+#undef STRIDE
+}
+
+int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput) {
+//seems IPCM should not enter this path
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+
+  WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer);
+
+  if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) {
+    RecI16x16Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+  } else if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) {
+    RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+  } else if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
+    RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  uint8_t*  pDstY, *pDstCb, *pDstCr;
+
+  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
+  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
+
+  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+
+  if (pCtx->eSliceType == P_SLICE) {
+    GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+  } else {
+    if (pCtx->pTempDec == NULL)
+      pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
+    uint8_t*   pTempDstYCbCr[3];
+    uint8_t*   pDstYCbCr[3];
+    pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+    pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+    pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+    pDstYCbCr[0] = pDstY;
+    pDstYCbCr[1] = pDstCb;
+    pDstYCbCr[2] = pDstCr;
+    GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) {
+    //already decoded and reconstructed when parsing
+    return ERR_NONE;
+  } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) {
+    WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1);
+  } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB
+    if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
+      WelsMbInterPrediction (pCtx, pCurLayer);
+    } else {
+      WelsMbInterConstruction (pCtx, pCurLayer);
+    }
+  } else {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d",
+             pCurLayer->pMbType[pCurLayer->iMbXyIndex]);
+    return ERR_INFO_MB_RECON_FAIL;
+  }
+
+  return ERR_NONE;
+}
+
+void WelsChromaDcIdct (int16_t* pBlock) {
+  int32_t iStride = 32;
+  int32_t iXStride = 16;
+  int32_t iStride1 = iXStride + iStride;
+  int16_t* pBlk = pBlock;
+  int32_t iA, iB, iC, iD, iE;
+
+  iA = pBlk[0];
+  iB = pBlk[iXStride];
+  iC = pBlk[iStride];
+  iD = pBlk[iStride1];
+
+  iE = iA - iB;
+  iA += iB;
+  iB = iC - iD;
+  iC += iD;
+
+  pBlk[0] = (iA + iC);
+  pBlk[iXStride] = (iE + iB);
+  pBlk[iStride] = (iA - iC);
+  pBlk[iStride1] = (iE - iB);
+}
+
+void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail) {  //left
+    pSampleAvail[ 6] =
+      pSampleAvail[12] =
+        pSampleAvail[18] =
+          pSampleAvail[24] = 1;
+  }
+  if (pNeighAvail->iLeftTopAvail) { //top_left
+    pSampleAvail[0] = 1;
+  }
+  if (pNeighAvail->iTopAvail) { //top
+    pSampleAvail[1] =
+      pSampleAvail[2] =
+        pSampleAvail[3] =
+          pSampleAvail[4] = 1;
+  }
+  if (pNeighAvail->iRightTopAvail) { //top_right
+    pSampleAvail[5] = 1;
+  }
+}
+
+void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {   //left
+    pSampleAvail[ 6] =
+      pSampleAvail[12] =
+        pSampleAvail[18] =
+          pSampleAvail[24] = 1;
+  }
+  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {  //top_left
+    pSampleAvail[0] = 1;
+  }
+  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {  //top
+    pSampleAvail[1] =
+      pSampleAvail[2] =
+        pSampleAvail[3] =
+          pSampleAvail[4] = 1;
+  }
+  if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) {  //top_right
+    pSampleAvail[5] = 1;
+  }
+}
+void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail) {
+    *pSampleAvail = (1 << 2);
+  }
+  if (pNeighAvail->iLeftTopAvail) {
+    *pSampleAvail |= (1 << 1);
+  }
+  if (pNeighAvail->iTopAvail) {
+    *pSampleAvail |= 1;
+  }
+}
+
+void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
+    *pSampleAvail = (1 << 2);
+  }
+  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
+    *pSampleAvail |= (1 << 1);
+  }
+  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
+    *pSampleAvail |= 1;
+  }
+}
+
+int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
+                           PBitStringAux pBs,
+                           PDqLayer pCurDqLayer) {
+  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int32_t iFinalMode, i;
+
+  uint8_t uiNeighAvail = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
+  uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
+  for (i = 0; i < 16; i++) {
+    int32_t iPrevIntra4x4PredMode = 0;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
+      iPrevIntra4x4PredMode = iCode;
+    } else {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      iPrevIntra4x4PredMode = uiCode;
+    }
+    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
+
+    int8_t iBestMode;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      if (iPrevIntra4x4PredMode == -1)
+        iBestMode = kiPredMode;
+      else
+        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
+    } else {
+      if (iPrevIntra4x4PredMode) {
+        iBestMode = kiPredMode;
+      } else {
+        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
+        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
+      }
+    }
+
+    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false);
+    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
+    }
+
+    pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
+
+    pIntraPredMode[g_kuiScan8[i]] = iBestMode;
+
+    iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
+  }
+  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
+  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
+  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
+  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
+
+  if (pCtx->pSps->uiChromaFormatIdc == 0)//no need parse chroma
+    return ERR_NONE;
+
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
+                           PBitStringAux pBs,
+                           PDqLayer pCurDqLayer) {
+  // Similar with Intra_4x4, can put them together when needed
+  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int32_t iFinalMode, i;
+
+  uint8_t uiNeighAvail = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
+  // Top-Right : Left : Top-Left : Top
+  uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
+
+  pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail;
+
+  for (i = 0; i < 4; i++) {
+    int32_t iPrevIntra4x4PredMode = 0;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
+      iPrevIntra4x4PredMode = iCode;
+    } else {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      iPrevIntra4x4PredMode = uiCode;
+    }
+    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2);
+
+    int8_t iBestMode;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      if (iPrevIntra4x4PredMode == -1)
+        iBestMode = kiPredMode;
+      else
+        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
+    } else {
+      if (iPrevIntra4x4PredMode) {
+        iBestMode = kiPredMode;
+      } else {
+        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
+        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
+      }
+    }
+
+    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true);
+
+    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
+    }
+
+    for (int j = 0; j < 4; j++) {
+      pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode;
+      pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode;
+      iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1;
+    }
+  }
+  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
+  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
+  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
+  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
+
+  if (pCtx->pSps->uiChromaFormatIdc == 0)
+    return ERR_NONE;
+
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
+                             PDqLayer pCurDqLayer) {
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail);
+
+  if (CheckIntra16x16PredMode (uiNeighAvail,
+                               &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE);
+  }
+  if (pCtx->pSps->uiChromaFormatIdc == 0)
+    return ERR_NONE;
+
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+  SWelsNeighAvail sNeighAvail;
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
+  if (uiMbType > 25) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+  } else if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17
+             && uiMbType <= 24))) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+  } else if (25 == uiMbType) {   //I_PCM
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
+    WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+    pSlice->iLastDeltaQp = 0;
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+    if (uiEosFlag) {
+      RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+    }
+    return ERR_NONE;
+  } else if (0 == uiMbType) { //I4x4
+    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+    if (pCtx->pPps->bTransform8x8ModeFlag) {
+      // Transform 8x8 cabac will be added soon
+      WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+    }
+    if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+      uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+    } else {
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+    }
+    //get uiCbp for I4x4
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0;
+    uiCbpLuma = uiCbp & 15;
+  } else { //I16x16;
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
+  }
+
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+  pCurLayer->pCbfDc[iMbXy] = 0;
+
+  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
+    }
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+    int32_t iQpDelta, iId8x8, iId4x4;
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
+    }
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
+                            pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else { //pNonZeroCount = 0
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        // Transform 8x8 support for CABAC
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
+                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            int32_t iIdx = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                                g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+                                pCurLayer->pLumaQp[iMbXy], pCtx));
+              iIdx++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+    int32_t iMbResProperty;
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      //Cb Cr
+      for (i = 0; i < 2; i++) {
+        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+      }
+    }
+
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        int32_t iIdx = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
+                            pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+          iIdx++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST16 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][20], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][18], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][22], 0);
+    }
+  } else {
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag));
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t iMbResProperty;
+  int32_t i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+
+  WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
+  // uiMbType = 4 is not allowded.
+  if (uiMbType < 4) { //Inter mode
+    int16_t pMotionVector[LIST_A][30][MV_A];
+    int16_t pMvdCache[LIST_A][30][MV_A];
+    int8_t  pRefIndex[LIST_A][30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
+    WELS_READ_VERIFY (ParseInterPMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  } else { //Intra mode
+    uiMbType -= 5;
+    if (uiMbType > 25)
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+    if (25 == uiMbType) {   //I_PCM
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+      pSlice->iLastDeltaQp = 0;
+      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+      if (uiEosFlag) {
+        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+      }
+      return ERR_NONE;
+    } else { //normal Intra mode
+      if (0 == uiMbType) { //Intra4x4
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        if (pCtx->pPps->bTransform8x8ModeFlag) {
+          WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+        }
+        if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+          uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        } else {
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        }
+      } else { //Intra16x16
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+      }
+    }
+  }
+
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
+
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+
+    if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+      // Need modification when B picutre add in
+      bool bNeedParseTransformSize8x8Flag =
+        (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
+          || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+         && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
+         && (pCtx->pPps->bTransform8x8ModeFlag));
+
+      if (bNeedParseTransformSize8x8Flag) {
+        WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
+                          pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
+      }
+    }
+
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+
+    int32_t iQpDelta, iId8x8, iId4x4;
+
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                            pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+        // Transform 8x8 support for CABAC
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
+                              IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
+                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            int32_t iIdx = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                                g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+                                pCurLayer->pLumaQp[iMbXy],
+                                pCtx));
+              iIdx++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        else
+          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+      }
+    }
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        else
+          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+        int32_t index = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+                            iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+          index++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+    }
+  } else {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux = pCurLayer->pBitStringAux;
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t iMbResProperty;
+  int32_t i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+
+  WELS_READ_VERIFY (ParseMBTypeBSliceCabac (pCtx, pNeighAvail, uiMbType));
+
+  if (uiMbType < 23) { //Inter B mode
+    int16_t pMotionVector[LIST_A][30][MV_A];
+    int16_t pMvdCache[LIST_A][30][MV_A];
+    int8_t  pRefIndex[LIST_A][30];
+    int8_t  pDirect[30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
+    WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurLayer);
+    WELS_READ_VERIFY (ParseInterBMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex,
+                      pDirect));
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  } else { //Intra mode
+    uiMbType -= 23;
+    if (uiMbType > 25)
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+    if (25 == uiMbType) {   //I_PCM
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+      pSlice->iLastDeltaQp = 0;
+      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+      if (uiEosFlag) {
+        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+      }
+      return ERR_NONE;
+    } else { //normal Intra mode
+      if (0 == uiMbType) { //Intra4x4
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        if (pCtx->pPps->bTransform8x8ModeFlag) {
+          WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+        }
+        if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+          uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        } else {
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        }
+      } else { //Intra16x16
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+      }
+    }
+  }
+
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
+
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+
+    if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+      // Need modification when B picutre add in
+      bool bNeedParseTransformSize8x8Flag =
+        (((IS_INTER_16x16 (pCurLayer->pMbType[iMbXy]) || IS_DIRECT (pCurLayer->pMbType[iMbXy])
+           || IS_INTER_16x8 (pCurLayer->pMbType[iMbXy]) || IS_INTER_8x16 (pCurLayer->pMbType[iMbXy]))
+          || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+         && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
+         && (pCtx->pPps->bTransform8x8ModeFlag));
+
+      if (bNeedParseTransformSize8x8Flag) {
+        WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
+                          pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
+      }
+    }
+
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+
+    int32_t iQpDelta, iId8x8, iId4x4;
+
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                            pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+        // Transform 8x8 support for CABAC
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
+                              IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
+                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            int32_t iIdx = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                                g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+                                pCurLayer->pLumaQp[iMbXy],
+                                pCtx));
+              iIdx++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        else
+          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+      }
+    }
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        else
+          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+        int32_t index = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+                            iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+          index++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+    }
+  } else {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+
+  return ERR_NONE;
+}
+
+
+int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
+  uint32_t uiCode;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i;
+  SWelsNeighAvail uiNeighAvail;
+  pCurLayer->pCbp[iMbXy] = 0;
+  pCurLayer->pCbfDc[iMbXy] = 0;
+  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
+
+  if (uiCode) {
+    int16_t pMv[2] = {0};
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
+    //predict mv
+    PredPSkipMvFromNeighbor (pCurLayer, pMv);
+    for (i = 0; i < 16; i++) {
+      ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv);
+      ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
+    }
+
+    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+    //}
+
+    //reset rS
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    //for neighboring CABAC usage
+    pSlice->iLastDeltaQp = 0;
+
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+
+    return ERR_NONE;
+  }
+
+  WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
+  return ERR_NONE;
+}
+
+
+int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
+  PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
+  uint32_t uiCode;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i;
+  SWelsNeighAvail uiNeighAvail;
+  pCurLayer->pCbp[iMbXy] = 0;
+  pCurLayer->pCbfDc[iMbXy] = 0;
+  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
+
+  memset (pCurLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16);
+
+  if (uiCode) {
+    int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } };
+    int8_t  ref[LIST_A] = { 0 };
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
+    memset (pCurLayer->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && ppRefPicL0[0]->bIsComplete)
+                            || ! (ppRefPicL1[0] && ppRefPicL1[0]->bIsComplete);
+
+    if (pSliceHeader->iDirectSpatialMvPredFlag) {
+
+      //predict direct spatial mv
+      PredMvBDirectSpatial (pCtx, pMv, ref);
+    } else {
+      //temporal direct mode
+      ComputeColocated (pCtx);
+      PredBDirectTemporal (pCtx, pMv, ref);
+    }
+
+    //reset rS
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    //for neighboring CABAC usage
+    pSlice->iLastDeltaQp = 0;
+
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+
+    return ERR_NONE;
+  }
+
+  WELS_READ_VERIFY (WelsDecodeMbCabacBSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
+  return ERR_NONE;
+}
+
+// Calculate deqaunt coeff scaling list value
+int32_t  WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
+  if (pCtx->pSps->bSeqScalingMatrixPresentFlag || pCtx->pPps->bPicScalingMatrixPresentFlag) {
+    pCtx->bUseScalingList = true;
+
+    if (!pCtx->bDequantCoeff4x4Init || (pCtx->iDequantCoeffPpsid != pCtx->pPps->iPpsId)) {
+      int i, q, x, y;
+      //Init dequant coeff value for different QP
+      for (i = 0; i < 6; i++) {
+        pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i];
+        pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i];
+        for (q = 0; q < 51; q++) {
+          for (x = 0; x < 16; x++) {
+            pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] *
+                                               g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList4x4[i][x] * g_kuiDequantCoeff[q][x & 0x07];
+          }
+          for (y = 0; y < 64; y++) {
+            pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] *
+                                               g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8];
+          }
+        }
+      }
+      pCtx->bDequantCoeff4x4Init = true;
+      pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId;
+    }
+  } else
+    pCtx->bUseScalingList = false;
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PFmo pFmo = pCtx->pFmo;
+  int32_t iRet;
+  int32_t iNextMbXyIndex, iSliceIdc;
+
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
+  PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
+  int32_t iMbX, iMbY;
+  const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
+  uint32_t uiEosFlag = 0;
+  PWelsDecMbFunc pDecMbFunc;
+
+  pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
+
+  if (pCtx->pPps->bEntropyCodingModeFlag) {
+    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
+        pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
+        pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+               "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
+      pCtx->iErrorCode |= dsBitstreamError;
+      return dsBitstreamError;
+    }
+    if (P_SLICE == pSliceHeader->eSliceType)
+      pDecMbFunc = WelsDecodeMbCabacPSlice;
+    else if (B_SLICE == pSliceHeader->eSliceType)
+      pDecMbFunc = WelsDecodeMbCabacBSlice;
+    else //I_SLICE. B_SLICE is being supported
+      pDecMbFunc = WelsDecodeMbCabacISlice;
+  } else {
+    if (P_SLICE == pSliceHeader->eSliceType) {
+      pDecMbFunc = WelsDecodeMbCavlcPSlice;
+    } else { //I_SLICE
+      pDecMbFunc = WelsDecodeMbCavlcISlice;
+    }
+  }
+
+  if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
+    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
+    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleConstrain1;
+    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleConstrain1;
+  } else {
+    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
+    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleNormal;
+    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleNormal;
+  }
+
+  pCtx->eSliceType = pSliceHeader->eSliceType;
+  if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
+    int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
+    int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
+    WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+    //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+    pSlice->iLastDeltaQp = 0;
+    WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
+  }
+  //try to calculate  the dequant_coeff
+  WelsCalcDeqCoeffScalingList (pCtx);
+
+  iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
+  iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+  iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
+  pSlice->iMbSkipRun = -1;
+  iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId;
+
+  pCurLayer->iMbX =  iMbX;
+  pCurLayer->iMbY = iMbY;
+  pCurLayer->iMbXyIndex = iNextMbXyIndex;
+
+  do {
+    if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
+      break;
+    }
+
+    pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
+    pCtx->bMbRefConcealed = false;
+    iRet = pDecMbFunc (pCtx,  pNalCur, uiEosFlag);
+    pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
+    if (iRet != ERR_NONE) {
+      return iRet;
+    }
+
+    ++pSlice->iTotalMbInCurSlice;
+    if (uiEosFlag) { //end of slice
+      break;
+    }
+    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
+      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+    } else {
+      ++iNextMbXyIndex;
+    }
+    iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+    iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
+    pCurLayer->iMbX =  iMbX;
+    pCurLayer->iMbY = iMbY;
+    pCurLayer->iMbXyIndex = iNextMbXyIndex;
+  } while (1);
+
+  return ERR_NONE;
+}
+
+int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
+  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBs              = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  SWelsNeighAvail sNeighAvail;
+  int32_t iMbResProperty;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t i;
+  int32_t iRet = ERR_NONE;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+  uiMbType = uiCode;
+  if (uiMbType > 25)
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+  if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+  if (25 == uiMbType) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
+    int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
+    int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+
+    int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+    int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+    uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
+    uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
+    uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+
+    uint8_t* pTmpBsBuf;
+
+
+    int32_t i;
+    int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
+    int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+    int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+    //step 1: locating bit-stream pointer [must align into integer byte]
+    pBs->pCurBuf -= iIndex;
+
+    //step 2: copy pixel from bit-stream into fdec [reconstruction]
+    pTmpBsBuf = pBs->pCurBuf;
+    if (!pCtx->pParam->bParseOnly) {
+      for (i = 0; i < 16; i++) { //luma
+        memcpy (pDecY, pTmpBsBuf, iCopySizeY);
+        pDecY += iDecStrideL;
+        pTmpBsBuf += 16;
+      }
+      for (i = 0; i < 8; i++) { //cb
+        memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+        pDecU += iDecStrideC;
+        pTmpBsBuf += 8;
+      }
+      for (i = 0; i < 8; i++) { //cr
+        memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+        pDecV += iDecStrideC;
+        pTmpBsBuf += 8;
+      }
+    }
+
+    pBs->pCurBuf += 384;
+
+    //step 3: update QP and pNonZeroCount
+    pCurLayer->pLumaQp[iMbXy] = 0;
+    memset (pCurLayer->pChromaQp[iMbXy], 0, sizeof (pCurLayer->pChromaQp[iMbXy]));
+    memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy]));   //Rec. 9.2.1 for PCM, nzc=16
+    WELS_READ_VERIFY (InitReadBits (pBs, 0));
+    return ERR_NONE;
+  } else if (0 == uiMbType) { //reference to JM
+    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+    if (pCtx->pPps->bTransform8x8ModeFlag) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+      }
+    }
+    if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+    } else {
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+    }
+
+    //uiCbp
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+    uiCbp = uiCode;
+    //G.9.1 Alternative parsing process for coded pBlock pattern
+    if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+    if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+
+    if (pCtx->pSps->uiChromaFormatIdc)
+      uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
+    else
+      uiCbp = g_kuiIntra4x4CbpTable400[uiCbp];
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    uiCbpC = uiCbp >> 4;
+    uiCbpL = uiCbp & 15;
+  } else { //I_PCM exclude, we can ignore it
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+    uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
+  }
+
+  ST32A4 (&pNzc[0], 0);
+  ST32A4 (&pNzc[4], 0);
+  ST32A4 (&pNzc[8], 0);
+  ST32A4 (&pNzc[12], 0);
+  ST32A4 (&pNzc[16], 0);
+  ST32A4 (&pNzc[20], 0);
+
+  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+    int32_t iQpDelta, iId8x8, iId4x4;
+
+    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+    iQpDelta = iCode;
+
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+                                       51)];
+    }
+
+
+    BsStartCavlc (pBs);
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
+                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+        return iRet;//abnormal
+      }
+      //step2: Luma AC
+      if (uiCbpL) {
+        for (i = 0; i < 16; i++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
+                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                  g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;//abnormal
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpC || 2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
+                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+          return iRet;//abnormal
+        }
+      }
+    }
+
+    //step2: AC
+    if (2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        int32_t iIndex = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+          iIndex++;
+        }
+      }
+      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+    }
+    BsEndCavlc (pBs);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PBitStringAux pBs = pCurLayer->pBitStringAux;
+  PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
+  int32_t iBaseModeFlag;
+  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+  uint32_t uiCode;
+  intX_t iUsedBits;
+  if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+    iBaseModeFlag = uiCode;
+  } else {
+    iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag;
+  }
+  if (!iBaseModeFlag) {
+    iRet = WelsActualDecodeMbCavlcISlice (pCtx);
+  } else {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+             iBaseModeFlag);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+  }
+  if (iRet) { //occur error when parsing, MUST STOP decoding
+    return iRet;
+  }
+
+  // check whether there is left bits to read next time in case multiple slices
+  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+  // sub 1, for stop bit
+  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+    uiEosFlag = 1;
+  }
+  if (iUsedBits > (pBs->iBits -
+                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
+             (int64_t) iUsedBits, pBs->iBits);
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
+  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBs              = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+  SWelsNeighAvail sNeighAvail;
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t i;
+  int32_t iRet = ERR_NONE;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  int32_t iMbResProperty;
+
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+  uiMbType = uiCode;
+  if (uiMbType < 5) { //inter MB type
+    int16_t iMotionVector[LIST_A][30][MV_A];
+    int8_t  iRefIndex[LIST_A][30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
+
+    if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
+      return iRet;//abnormal
+    }
+
+    if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
+      pCurLayer->pResidualPredFlag[iMbXy] =  uiCode;
+    } else {
+      pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+    }
+
+    if (pCurLayer->pResidualPredFlag[iMbXy] == 0) {
+      pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    } else {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+    }
+  } else { //intra MB type
+    uiMbType -= 5;
+    if (uiMbType > 25)
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+    if (25 == uiMbType) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+      int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
+      int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+
+      int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+      int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+      uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
+      uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
+      uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+
+      uint8_t* pTmpBsBuf;
+
+      int32_t i;
+      int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
+      int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+      int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+      pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+      //step 1: locating bit-stream pointer [must align into integer byte]
+      pBs->pCurBuf -= iIndex;
+
+      //step 2: copy pixel from bit-stream into fdec [reconstruction]
+      pTmpBsBuf = pBs->pCurBuf;
+      if (!pCtx->pParam->bParseOnly) {
+        for (i = 0; i < 16; i++) { //luma
+          memcpy (pDecY, pTmpBsBuf, iCopySizeY);
+          pDecY += iDecStrideL;
+          pTmpBsBuf += 16;
+        }
+
+        for (i = 0; i < 8; i++) { //cb
+          memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+          pDecU += iDecStrideC;
+          pTmpBsBuf += 8;
+        }
+        for (i = 0; i < 8; i++) { //cr
+          memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+          pDecV += iDecStrideC;
+          pTmpBsBuf += 8;
+        }
+      }
+
+      pBs->pCurBuf += 384;
+
+      //step 3: update QP and pNonZeroCount
+      pCurLayer->pLumaQp[iMbXy] = 0;
+      pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
+      //Rec. 9.2.1 for PCM, nzc=16
+      ST32A4 (&pNzc[0], 0x10101010);
+      ST32A4 (&pNzc[4], 0x10101010);
+      ST32A4 (&pNzc[8], 0x10101010);
+      ST32A4 (&pNzc[12], 0x10101010);
+      ST32A4 (&pNzc[16], 0x10101010);
+      ST32A4 (&pNzc[20], 0x10101010);
+      WELS_READ_VERIFY (InitReadBits (pBs, 0));
+      return ERR_NONE;
+    } else {
+      if (0 == uiMbType) {
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        if (pCtx->pPps->bTransform8x8ModeFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+          pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+          if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+            uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+          }
+        }
+        if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+        } else {
+          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+        }
+      } else { //I_PCM exclude, we can ignore it
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+        uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+        if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) != ERR_NONE) {
+          return iRet;
+        }
+      }
+    }
+  }
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+    uiCbp = uiCode;
+    {
+      if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+      if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+      if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
+
+        uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
+      } else //inter
+        uiCbp = pCtx->pSps->uiChromaFormatIdc ?  g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
+    }
+
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
+    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+
+    // Need modification when B picutre add in
+    bool bNeedParseTransformSize8x8Flag =
+      (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
+        || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+       && (uiCbpL > 0)
+       && (pCtx->pPps->bTransform8x8ModeFlag));
+
+    if (bNeedParseTransformSize8x8Flag) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+    }
+  }
+
+  ST32A4 (&pNzc[0], 0);
+  ST32A4 (&pNzc[4], 0);
+  ST32A4 (&pNzc[8], 0);
+  ST32A4 (&pNzc[12], 0);
+  ST32A4 (&pNzc[16], 0);
+  ST32A4 (&pNzc[20], 0);
+  if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    int32_t iQpDelta, iId8x8, iId4x4;
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
+    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+    iQpDelta = iCode;
+
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+                                       51)];
+    }
+
+    BsStartCavlc (pBs);
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
+                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+        return iRet;//abnormal
+      }
+      //step2: Luma AC
+      if (uiCbpL) {
+        for (i = 0; i < 16; i++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
+                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else { // Normal T4x4
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                  g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;//abnormal
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpC || 2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        else
+          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
+                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+          return iRet;//abnormal
+        }
+      }
+    } else {
+    }
+    //step2: AC
+    if (2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        else
+          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+
+        int32_t iIndex = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+          iIndex++;
+        }
+      }
+      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+    }
+    BsEndCavlc (pBs);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBs              = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
+  intX_t iUsedBits;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t iBaseModeFlag, i;
+  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+  uint32_t uiCode;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  if (-1 == pSlice->iMbSkipRun) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
+    pSlice->iMbSkipRun = uiCode;
+    if (-1 == pSlice->iMbSkipRun) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
+    }
+  }
+  if (pSlice->iMbSkipRun--) {
+    int16_t iMv[2];
+
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+    ST32A4 (&pNzc[0], 0);
+    ST32A4 (&pNzc[4], 0);
+    ST32A4 (&pNzc[8], 0);
+    ST32A4 (&pNzc[12], 0);
+    ST32A4 (&pNzc[16], 0);
+    ST32A4 (&pNzc[20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
+    //predict iMv
+    PredPSkipMvFromNeighbor (pCurLayer, iMv);
+    for (i = 0; i < 16; i++) {
+      ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv);
+    }
+
+    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+    //}
+
+    //reset rS
+    if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
+        (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
+      pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+      for (i = 0; i < 2; i++) {
+        pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                         pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+      }
+    }
+
+    pCurLayer->pCbp[iMbXy] = 0;
+  } else {
+    if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+      iBaseModeFlag = uiCode;
+    } else {
+      iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
+    }
+    if (!iBaseModeFlag) {
+      iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
+    } else {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+               iBaseModeFlag);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+    }
+    if (iRet) { //occur error when parsing, MUST STOP decoding
+      return iRet;
+    }
+  }
+  // check whether there is left bits to read next time in case multiple slices
+  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+  // sub 1, for stop bit
+  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+    uiEosFlag = 1;
+  }
+  if (iUsedBits > (pBs->iBits -
+                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
+             (int64_t) iUsedBits, pBs->iBits);
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
+  }
+  return ERR_NONE;
+}
+
+void WelsBlockFuncInit (SBlockFunc*   pFunc,  int32_t iCpu) {
+  pFunc->pWelsSetNonZeroCountFunc   = WelsNonZeroCount_c;
+  pFunc->pWelsBlockZero16x16Func    = WelsBlockZero16x16_c;
+  pFunc->pWelsBlockZero8x8Func      = WelsBlockZero8x8_c;
+
+#ifdef HAVE_NEON
+  if (iCpu & WELS_CPU_NEON) {
+    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_neon;
+    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_neon;
+    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_neon;
+  }
+#endif
+
+#ifdef HAVE_NEON_AARCH64
+  if (iCpu & WELS_CPU_NEON) {
+    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_AArch64_neon;
+    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_AArch64_neon;
+    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_AArch64_neon;
+  }
+#endif
+
+#if defined(X86_ASM)
+  if (iCpu & WELS_CPU_SSE2) {
+    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_sse2;
+    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_sse2;
+    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_sse2;
+  }
+#endif
+
+}
+
+void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
+  int32_t i;
+  int16_t* pDst = pBlock;
+
+  for (i = 0; i < iH; i++) {
+    memset (pDst, uiVal, iW * sizeof (int16_t));
+    pDst += iStride;
+  }
+}
+void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
+  WelsBlockInit (pBlock, 16, 16, iStride, 0);
+}
+
+void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
+  WelsBlockInit (pBlock, 8, 8, iStride, 0);
+}
+bool ComputeColocated (PWelsDecoderContext pCtx) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
+  if (!pSliceHeader->iDirectSpatialMvPredFlag) {
+    uint32_t uiShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0];
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (uint32_t i = 0; i < uiShortRefCount; ++i) {
+        int32_t iTRb = WELS_CLIP3 (-128, 127, pSliceHeader->iPicOrderCntLsb - pCtx->sRefPic.pRefList[listIdx][i]->iFramePoc);
+        int32_t iTRp = WELS_CLIP3 (-128, 127,
+                                   pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc - pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc);
+        if (iTRp != 0) {
+          int32_t prescale = (16384 + iAbs (iTRp / 2)) / iTRp;
+          pCurSlice->iMvScale[listIdx][i] = WELS_CLIP3 (-1024, 1023, (iTRb * prescale + 32) >> 6);
+        } else {
+          pCurSlice->iMvScale[listIdx][i] = 0x03FFF;
+        }
+      }
+    }
+  }
+  //Implement the following
+  //get Mv_colocated_L1
+  //and do calculation
+  //iMvp[LIST_0] = Mv_colocated_L1 * (POC(cur) - POC(L0))/POC(L1) - POC(L0))
+  //iMvp[LIST_1] = Mv_colocated_L1 * (POC(cur) - POC(L1))/POC(L1) - POC(L0))
+  return true;
+}
+} // namespace WelsDec
--- a/codec/decoder/core/src/decoder_core.cpp
+++ b/codec/decoder/core/src/decoder_core.cpp
@@ -1,2777 +1,2777 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *      decoder_core.c: Wels decoder framework core implementation
- */
-
-#include "decoder_core.h"
-#include "error_code.h"
-#include "memmgr_nal_unit.h"
-#include "au_parser.h"
-#include "decode_slice.h"
-#include "manage_dec_ref.h"
-#include "expand_pic.h"
-#include "decoder.h"
-#include "decode_mb_aux.h"
-#include "memory_align.h"
-#include "error_concealment.h"
-
-namespace WelsDec {
-static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  PDqLayer pCurDq = pCtx->pCurDqLayer;
-  PPicture pPic = pCtx->pDec;
-
-  const int32_t kiWidth = pCurDq->iMbWidth << 4;
-  const int32_t kiHeight = pCurDq->iMbHeight << 4;
-
-  const int32_t kiTotalNumMbInCurLayer = pCurDq->iMbWidth * pCurDq->iMbHeight;
-  bool bFrameCompleteFlag = true;
-
-  if (pPic->bNewSeqBegin) {
-    memcpy (& (pCtx->sFrameCrop), & (pCurDq->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.pSps->sFrameCrop),
-            sizeof (SPosOffset)); //confirmed_safe_unsafe_usage
-#ifdef LONG_TERM_REF
-    pCtx->bParamSetsLostFlag      = false;
-#else
-    pCtx->bReferenceLostAtT0Flag = false; // need initialize it due new seq, 6/4/2010
-#endif //LONG_TERM_REF
-    if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) {
-      pCtx->bPrintFrameErrorTraceFlag = true;
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-               "DecodeFrameConstruction(): will output first frame of new sequence, %d x %d, crop_left:%d, crop_right:%d, crop_top:%d, crop_bottom:%d, ignored error packet:%d.",
-               kiWidth, kiHeight, pCtx->sFrameCrop.iLeftOffset, pCtx->sFrameCrop.iRightOffset, pCtx->sFrameCrop.iTopOffset,
-               pCtx->sFrameCrop.iBottomOffset, pCtx->iIgnoredErrorInfoPacketCount);
-      pCtx->iIgnoredErrorInfoPacketCount = 0;
-    }
-  }
-
-  const int32_t kiActualWidth = kiWidth - (pCtx->sFrameCrop.iLeftOffset + pCtx->sFrameCrop.iRightOffset) * 2;
-  const int32_t kiActualHeight = kiHeight - (pCtx->sFrameCrop.iTopOffset + pCtx->sFrameCrop.iBottomOffset) * 2;
-
-
-  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-    if ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
-        || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight)) {
-      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
-      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
-      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
-    }
-    UpdateDecStatNoFreezingInfo (pCtx);
-  }
-
-  if (pCtx->pParam->bParseOnly) { //should exit for parse only to prevent access NULL pDstInfo
-    PAccessUnit pCurAu = pCtx->pAccessUnitList;
-    if (dsErrorFree == pCtx->iErrorCode) { //correct decoding, add to data buffer
-      SParserBsInfo* pParser = pCtx->pParserBsInfo;
-      SNalUnit* pCurNal = NULL;
-      int32_t iTotalNalLen = 0;
-      int32_t iNalLen = 0;
-      int32_t iNum = 0;
-      while (iNum < pParser->iNalNum) {
-        iTotalNalLen += pParser->pNalLenInByte[iNum++];
-      }
-      uint8_t* pDstBuf = pParser->pDstBuff + iTotalNalLen;
-      int32_t iIdx = pCurAu->uiStartPos;
-      int32_t iEndIdx = pCurAu->uiEndPos;
-      uint8_t* pNalBs = NULL;
-      pParser->uiOutBsTimeStamp = (pCurAu->pNalUnitsList [iIdx]) ? pCurAu->pNalUnitsList [iIdx]->uiTimeStamp : 0;
-      //pParser->iNalNum = 0;
-      pParser->iSpsWidthInPixel = (pCtx->pSps->iMbWidth << 4) - ((pCtx->pSps->sFrameCrop.iLeftOffset +
-                                  pCtx->pSps->sFrameCrop.iRightOffset) << 1);
-      pParser->iSpsHeightInPixel = (pCtx->pSps->iMbHeight << 4) - ((pCtx->pSps->sFrameCrop.iTopOffset +
-                                   pCtx->pSps->sFrameCrop.iBottomOffset) << 1);
-
-      if (pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.bIdrFlag) { //IDR
-        if (pCtx->bFrameFinish) { //add required sps/pps
-          if (pParser->iNalNum > pCtx->iMaxNalNum - 2) { //2 reserved for sps+pps
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-                     "DecodeFrameConstruction(): current NAL num (%d) plus sps & pps exceeds permitted num (%d). Will expand",
-                     pParser->iNalNum, pCtx->iMaxNalNum);
-            WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + 2))
-          }
-          bool bSubSps = (NAL_UNIT_CODED_SLICE_EXT == pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.sNalUnitHeader.eNalUnitType);
-          SSpsBsInfo* pSpsBs = NULL;
-          SPpsBsInfo* pPpsBs = NULL;
-          int32_t iSpsId = pCtx->pSps->iSpsId;
-          int32_t iPpsId = pCtx->pPps->iPpsId;
-          pCtx->bParamSetsLostFlag = false;
-          //find required sps, pps and write into dst buff
-          pSpsBs = bSubSps ? &pCtx->sSubsetSpsBsInfo [iSpsId] : &pCtx->sSpsBsInfo [iSpsId];
-          pPpsBs = &pCtx->sPpsBsInfo [iPpsId];
-          if (pDstBuf - pParser->pDstBuff + pSpsBs->uiSpsBsLen + pPpsBs->uiPpsBsLen >= MAX_ACCESS_UNIT_CAPACITY) {
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-                     "DecodeFrameConstruction(): sps pps size: (%d %d) too large. Failed to parse. \n", pSpsBs->uiSpsBsLen,
-                     pPpsBs->uiPpsBsLen);
-            pCtx->iErrorCode |= dsOutOfMemory;
-            pCtx->pParserBsInfo->iNalNum = 0;
-            return ERR_INFO_OUT_OF_MEMORY;
-          }
-          memcpy (pDstBuf, pSpsBs->pSpsBsBuf, pSpsBs->uiSpsBsLen);
-          pParser->pNalLenInByte [pParser->iNalNum ++] = pSpsBs->uiSpsBsLen;
-          pDstBuf += pSpsBs->uiSpsBsLen;
-          memcpy (pDstBuf, pPpsBs->pPpsBsBuf, pPpsBs->uiPpsBsLen);
-          pParser->pNalLenInByte [pParser->iNalNum ++] = pPpsBs->uiPpsBsLen;
-          pDstBuf += pPpsBs->uiPpsBsLen;
-          pCtx->bFrameFinish = false;
-        }
-      }
-      //then VCL data re-write
-      if (pParser->iNalNum + iEndIdx - iIdx + 1 > pCtx->iMaxNalNum) { //calculate total NAL num
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-                 "DecodeFrameConstruction(): current NAL num (%d) exceeds permitted num (%d). Will expand",
-                 pParser->iNalNum + iEndIdx - iIdx + 1, pCtx->iMaxNalNum);
-        WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + iEndIdx - iIdx + 1))
-      }
-      while (iIdx <= iEndIdx) {
-        pCurNal = pCurAu->pNalUnitsList [iIdx ++];
-        iNalLen = pCurNal->sNalData.sVclNal.iNalLength;
-        pNalBs = pCurNal->sNalData.sVclNal.pNalPos;
-        pParser->pNalLenInByte [pParser->iNalNum ++] = iNalLen;
-        if (pDstBuf - pParser->pDstBuff + iNalLen >= MAX_ACCESS_UNIT_CAPACITY) {
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-                   "DecodeFrameConstruction(): composed output size (%ld) exceeds (%d). Failed to parse. current data pos %d out of %d:, previously accumulated num: %d, total num: %d, previously accumulated len: %d, current len: %d, current buf pos: %p, header buf pos: %p \n",
-                   (long) (pDstBuf - pParser->pDstBuff + iNalLen), MAX_ACCESS_UNIT_CAPACITY, iIdx, iEndIdx, iNum, pParser->iNalNum,
-                   iTotalNalLen, iNalLen, pDstBuf, pParser->pDstBuff);
-          pCtx->iErrorCode |= dsOutOfMemory;
-          pCtx->pParserBsInfo->iNalNum = 0;
-          return ERR_INFO_OUT_OF_MEMORY;
-        }
-
-        memcpy (pDstBuf, pNalBs, iNalLen);
-        pDstBuf += iNalLen;
-      }
-      if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) { //frame complete
-        pCtx->iTotalNumMbRec = 0;
-        pCtx->bFramePending = false;
-        pCtx->bFrameFinish = true; //finish current frame and mark it
-      } else if (pCtx->iTotalNumMbRec != 0) { //frame incomplete
-        pCtx->bFramePending = true;
-        pCtx->pDec->bIsComplete = false;
-        pCtx->bFrameFinish = false; //current frame not finished
-        pCtx->iErrorCode |= dsFramePending;
-        return ERR_INFO_PARSEONLY_PENDING;
-        //pCtx->pParserBsInfo->iNalNum = 0;
-      }
-    } else { //error
-      pCtx->pParserBsInfo->uiOutBsTimeStamp = 0;
-      pCtx->pParserBsInfo->iNalNum = 0;
-      pCtx->pParserBsInfo->iSpsWidthInPixel = 0;
-      pCtx->pParserBsInfo->iSpsHeightInPixel = 0;
-      return ERR_INFO_PARSEONLY_ERROR;
-    }
-    return ERR_NONE;
-  }
-
-  if (pCtx->iTotalNumMbRec != kiTotalNumMbInCurLayer) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
-             "DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ",
-             pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight);
-    bFrameCompleteFlag = false; //return later after output buffer is done
-    if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice
-      return ERR_INFO_MB_NUM_INADEQUATE;
-  } else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag
-             && (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done
-    pCtx->pDec->bIsComplete = true;
-    pCtx->bFreezeOutput = false;
-  }
-
-  pCtx->iTotalNumMbRec = 0;
-
-  //////output:::normal path
-  pDstInfo->uiOutYuvTimeStamp = pPic->uiTimeStamp;
-  ppDst[0]      = pPic->pData[0];
-  ppDst[1]      = pPic->pData[1];
-  ppDst[2]      = pPic->pData[2];
-
-  pDstInfo->UsrData.sSystemBuffer.iFormat = videoFormatI420;
-
-  pDstInfo->UsrData.sSystemBuffer.iWidth = kiActualWidth;
-  pDstInfo->UsrData.sSystemBuffer.iHeight = kiActualHeight;
-  pDstInfo->UsrData.sSystemBuffer.iStride[0] = pPic->iLinesize[0];
-  pDstInfo->UsrData.sSystemBuffer.iStride[1] = pPic->iLinesize[1];
-  ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2;
-  ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
-  ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
-  pDstInfo->iBufferStatus = 1;
-
-  bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
-                       || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
-  pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth;
-  pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight;
-  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete
-    pDstInfo->iBufferStatus = (int32_t) (bFrameCompleteFlag
-                                         && pPic->bIsComplete); // When EC disable, ECed picture not output
-  else if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE
-            || pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE)
-           && pCtx->iErrorCode && bOutResChange)
-    pCtx->bFreezeOutput = true;
-
-  if (pDstInfo->iBufferStatus == 0) {
-    if (!bFrameCompleteFlag)
-      pCtx->iErrorCode |= dsBitstreamError;
-    return ERR_INFO_MB_NUM_INADEQUATE;
-  }
-  if (pCtx->bFreezeOutput) {
-    pDstInfo->iBufferStatus = 0;
-    if (pPic->bNewSeqBegin) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-               "DecodeFrameConstruction():New sequence detected, but freezed, correct MBs (%d) out of whole MBs (%d).",
-               kiTotalNumMbInCurLayer - pCtx->iMbEcedNum, kiTotalNumMbInCurLayer);
-    }
-  }
-  pCtx->iMbEcedNum = pPic->iMbEcedNum;
-  pCtx->iMbNum = pPic->iMbNum;
-  pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum;
-  if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-    if (pDstInfo->iBufferStatus && ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
-                                    || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight))) {
-      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
-      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
-      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
-    }
-    UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0);
-  }
-  return ERR_NONE;
-}
-
-inline bool    CheckSliceNeedReconstruct (uint8_t uiLayerDqId, uint8_t uiTargetDqId) {
-  return (uiLayerDqId == uiTargetDqId); // target layer
-}
-
-inline uint8_t GetTargetDqId (uint8_t uiTargetDqId,  SDecodingParam* psParam) {
-  uint8_t  uiRequiredDqId = psParam ? psParam->uiTargetDqLayer : (uint8_t)255;
-
-  return WELS_MIN (uiTargetDqId, uiRequiredDqId);
-}
-
-
-inline void    HandleReferenceLostL0 (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
-  if (0 == pCurNal->sNalHeaderExt.uiTemporalId) {
-    pCtx->bReferenceLostAtT0Flag = true;
-  }
-  pCtx->iErrorCode |= dsBitstreamError;
-}
-
-inline void    HandleReferenceLost (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
-  if ((0 == pCurNal->sNalHeaderExt.uiTemporalId) || (1 == pCurNal->sNalHeaderExt.uiTemporalId)) {
-    pCtx->bReferenceLostAtT0Flag = true;
-  }
-  pCtx->iErrorCode |= dsRefLost;
-}
-
-inline int32_t  WelsDecodeConstructSlice (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
-  int32_t  iRet = WelsTargetSliceConstruction (pCtx);
-
-  if (iRet) {
-    HandleReferenceLostL0 (pCtx, pCurNal);
-  }
-
-  return iRet;
-}
-
-int32_t ParsePredWeightedTable (PBitStringAux pBs, PSliceHeader pSh) {
-  uint32_t uiCode;
-  int32_t iList = 0;
-  int32_t iCode;
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
-  WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "luma_log2_weight_denom",
-                                  GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM));
-  pSh->sPredWeightTable.uiLumaLog2WeightDenom = uiCode;
-  if (pSh->pSps->uiChromaArrayType != 0) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
-    WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "chroma_log2_weight_denom",
-                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM));
-    pSh->sPredWeightTable.uiChromaLog2WeightDenom = uiCode;
-  }
-
-  if ((pSh->sPredWeightTable.uiLumaLog2WeightDenom | pSh->sPredWeightTable.uiChromaLog2WeightDenom) > 7)
-    return ERR_NONE;
-
-  do {
-
-    for (int i = 0; i < pSh->uiRefCount[iList]; i++) {
-      //luma
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      if (!!uiCode) {
-
-        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_weight",
-                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_WEIGHT));
-        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = iCode;
-
-        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_offset",
-                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_OFFSET));
-        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = iCode;
-      } else {
-        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = 1 << (pSh->sPredWeightTable.uiLumaLog2WeightDenom);
-        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = 0;
-
-      }
-      //chroma
-      if (pSh->pSps->uiChromaArrayType == 0)
-        continue;
-
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      if (!!uiCode) {
-        for (int j = 0; j < 2; j++) {
-
-
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_weight",
-                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_WEIGHT));
-          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = iCode;
-
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_offset",
-                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_OFFSET));
-          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = iCode;
-        }
-      } else {
-        for (int j = 0; j < 2; j++) {
-
-
-          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = 1 << (pSh->sPredWeightTable.uiChromaLog2WeightDenom);
-          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = 0;
-        }
-      }
-
-    }
-    ++iList;
-    if (pSh->eSliceType != B_SLICE) {
-      break;
-    }
-  } while (iList < LIST_A);//TODO: SUPPORT LIST_A
-  return ERR_NONE;
-}
-
-void CreateImplicitWeightTable (PWelsDecoderContext pCtx) {
-
-  PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
-  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
-  if (pCurDqLayer->bUseWeightedBiPredIdc && pSliceHeader->pPps->uiWeightedBipredIdc == 2) {
-    int32_t iPoc = pSliceHeader->iPicOrderCntLsb;
-
-    if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1
-        && pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) {
-      pCurDqLayer->bUseWeightedBiPredIdc = false;
-      return;
-    }
-
-    pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom = 5;
-    pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom = 5;
-    for (int32_t iRef0 = 0; iRef0 < pSliceHeader->uiRefCount[0]; iRef0++) {
-      if (pCtx->sRefPic.pRefList[LIST_0][iRef0]) {
-        const int32_t iPoc0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->iFramePoc;
-        bool bIsLongRef0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->bIsLongRef;
-        for (int32_t iRef1 = 0; iRef1 < pSliceHeader->uiRefCount[1]; iRef1++) {
-          if (pCtx->sRefPic.pRefList[LIST_1][iRef1]) {
-            const int32_t iPoc1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->iFramePoc;
-            bool bIsLongRef1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->bIsLongRef;
-            pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 32;
-            if (!bIsLongRef0 && !bIsLongRef1) {
-              const int32_t iTd = WELS_CLIP3 (iPoc1 - iPoc0, -128, 127);
-              if (iTd) {
-                int32_t iTb = WELS_CLIP3 (iPoc - iPoc0, -128, 127);
-                int32_t iTx = (16384 + (WELS_ABS (iTd) >> 1)) / iTd;
-                int32_t iDistScaleFactor = (iTb * iTx + 32) >> 8;
-                if (iDistScaleFactor >= -64 && iDistScaleFactor <= 128) {
-                  pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 64 - iDistScaleFactor;
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-  return;
-}
-
-/*
- *  Predeclared function routines ..
- */
-int32_t ParseRefPicListReordering (PBitStringAux pBs, PSliceHeader pSh) {
-  int32_t iList = 0;
-  const EWelsSliceType keSt = pSh->eSliceType;
-  PRefPicListReorderSyn pRefPicListReordering = &pSh->pRefPicListReordering;
-  PSps pSps = pSh->pSps;
-  uint32_t uiCode;
-  if (keSt == I_SLICE || keSt == SI_SLICE)
-    return ERR_NONE;
-
-  // Common syntaxs for P or B slices: list0, list1 followed if B slices used.
-  do {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //ref_pic_list_modification_flag_l0
-    pRefPicListReordering->bRefPicListReorderingFlag[iList] = !!uiCode;
-
-    if (pRefPicListReordering->bRefPicListReorderingFlag[iList]) {
-      int32_t iIdx = 0;
-      do {
-        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //modification_of_pic_nums_idc
-        const uint32_t kuiIdc = uiCode;
-
-        //Fixed the referrence list reordering crash issue.(fault kIdc value > 3 case)---
-        if ((iIdx >= MAX_REF_PIC_COUNT) || (kuiIdc > 3)) {
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
-        }
-        pRefPicListReordering->sReorderingSyn[iList][iIdx].uiReorderingOfPicNumsIdc = kuiIdc;
-        if (kuiIdc == 3)
-          break;
-
-        if (iIdx >= pSh->uiRefCount[iList] || iIdx >= MAX_REF_PIC_COUNT)
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
-
-        if (kuiIdc == 0 || kuiIdc == 1) {
-          // abs_diff_pic_num_minus1 should be in range 0 to MaxPicNum-1, MaxPicNum is derived as
-          // 2^(4+log2_max_frame_num_minus4)
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //abs_diff_pic_num_minus1
-          WELS_CHECK_SE_UPPER_ERROR_NOLOG (uiCode, (uint32_t) (1 << pSps->uiLog2MaxFrameNum), "abs_diff_pic_num_minus1",
-                                           GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING));
-          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiAbsDiffPicNumMinus1 = uiCode; // uiAbsDiffPicNumMinus1
-        } else if (kuiIdc == 2) {
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
-          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiLongTermPicNum = uiCode;
-        }
-
-        ++ iIdx;
-      } while (true);
-    }
-    if (keSt != B_SLICE)
-      break;
-    ++ iList;
-  } while (iList < LIST_A);
-
-  return ERR_NONE;
-}
-
-int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSliceHeader pSh, PSps pSps,
-                               const bool kbIdrFlag) {
-  PRefPicMarking const kpRefMarking = &pSh->sRefMarking;
-  uint32_t uiCode;
-  if (kbIdrFlag) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //no_output_of_prior_pics_flag
-    kpRefMarking->bNoOutputOfPriorPicsFlag = !!uiCode;
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //long_term_reference_flag
-    kpRefMarking->bLongTermRefFlag = !!uiCode;
-  } else {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_ref_pic_marking_mode_flag
-    kpRefMarking->bAdaptiveRefPicMarkingModeFlag = !!uiCode;
-    if (kpRefMarking->bAdaptiveRefPicMarkingModeFlag) {
-      int32_t iIdx = 0;
-      bool bAllowMmco5 = true, bMmco4Exist = false, bMmco5Exist = false, bMmco6Exist = false;
-      do {
-        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //memory_management_control_operation
-        const uint32_t kuiMmco = uiCode;
-
-        kpRefMarking->sMmcoRef[iIdx].uiMmcoType = kuiMmco;
-        if (kuiMmco == MMCO_END)
-          break;
-
-        if (kuiMmco == MMCO_SHORT2UNUSED || kuiMmco == MMCO_SHORT2LONG) {
-          bAllowMmco5 = false;
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //difference_of_pic_nums_minus1
-          kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum = 1 + uiCode;
-          kpRefMarking->sMmcoRef[iIdx].iShortFrameNum = (pSh->iFrameNum - kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum) & ((
-                1 << pSps->uiLog2MaxFrameNum) - 1);
-        } else if (kuiMmco == MMCO_LONG2UNUSED) {
-          bAllowMmco5 = false;
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
-          kpRefMarking->sMmcoRef[iIdx].uiLongTermPicNum = uiCode;
-        }
-        if (kuiMmco == MMCO_SHORT2LONG || kuiMmco == MMCO_LONG) {
-          if (kuiMmco == MMCO_LONG) {
-            WELS_VERIFY_RETURN_IF (-1, bMmco6Exist);
-            bMmco6Exist = true;
-          }
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_frame_idx
-          kpRefMarking->sMmcoRef[iIdx].iLongTermFrameIdx = uiCode;
-        } else if (kuiMmco == MMCO_SET_MAX_LONG) {
-          WELS_VERIFY_RETURN_IF (-1, bMmco4Exist);
-          bMmco4Exist = true;
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //max_long_term_frame_idx_plus1
-          kpRefMarking->sMmcoRef[iIdx].iMaxLongTermFrameIdx = -1 + uiCode;
-        } else if (kuiMmco == MMCO_RESET) {
-          WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist));
-          bMmco5Exist = true;
-
-          pCtx->iPrevPicOrderCntLsb = 0;
-          pCtx->iPrevPicOrderCntMsb = 0;
-          pCtx->pSliceHeader->iPicOrderCntLsb = 0;
-        }
-        ++ iIdx;
-
-      } while (iIdx < MAX_MMCO_COUNT);
-    }
-  }
-
-  return ERR_NONE;
-}
-
-bool FillDefaultSliceHeaderExt (PSliceHeaderExt pShExt, PNalUnitHeaderExt pNalExt) {
-  if (pShExt == NULL || pNalExt == NULL)
-    return false;
-
-  if (pNalExt->iNoInterLayerPredFlag || pNalExt->uiQualityId > 0)
-    pShExt->bBasePredWeightTableFlag = false;
-  else
-    pShExt->bBasePredWeightTableFlag = true;
-  pShExt->uiRefLayerDqId = (uint8_t) - 1;
-  pShExt->uiDisableInterLayerDeblockingFilterIdc        = 0;
-  pShExt->iInterLayerSliceAlphaC0Offset                 = 0;
-  pShExt->iInterLayerSliceBetaOffset                    = 0;
-  pShExt->bConstrainedIntraResamplingFlag               = false;
-  pShExt->uiRefLayerChromaPhaseXPlus1Flag               = 0;
-  pShExt->uiRefLayerChromaPhaseYPlus1                   = 1;
-  //memset(&pShExt->sScaledRefLayer, 0, sizeof(SPosOffset));
-
-  pShExt->iScaledRefLayerPicWidthInSampleLuma   = pShExt->sSliceHeader.iMbWidth << 4;
-  pShExt->iScaledRefLayerPicHeightInSampleLuma  = pShExt->sSliceHeader.iMbHeight << 4;
-
-  pShExt->bSliceSkipFlag                = false;
-  pShExt->bAdaptiveBaseModeFlag         = false;
-  pShExt->bDefaultBaseModeFlag          = false;
-  pShExt->bAdaptiveMotionPredFlag       = false;
-  pShExt->bDefaultMotionPredFlag        = false;
-  pShExt->bAdaptiveResidualPredFlag     = false;
-  pShExt->bDefaultResidualPredFlag      = false;
-  pShExt->bTCoeffLevelPredFlag          = false;
-  pShExt->uiScanIdxStart                = 0;
-  pShExt->uiScanIdxEnd                  = 15;
-
-  return true;
-}
-
-int32_t InitBsBuffer (PWelsDecoderContext pCtx) {
-  if (pCtx == NULL)
-    return ERR_INFO_INVALID_PTR;
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  pCtx->iMaxBsBufferSizeInByte = MIN_ACCESS_UNIT_CAPACITY * MAX_BUFFERED_NUM;
-  if ((pCtx->sRawData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
-                              "pCtx->sRawData.pHead"))) == NULL) {
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-  pCtx->sRawData.pStartPos = pCtx->sRawData.pCurPos = pCtx->sRawData.pHead;
-  pCtx->sRawData.pEnd = pCtx->sRawData.pHead + pCtx->iMaxBsBufferSizeInByte;
-  if (pCtx->pParam->bParseOnly) {
-    pCtx->pParserBsInfo = static_cast<SParserBsInfo*> (pMa->WelsMallocz (sizeof (SParserBsInfo), "pCtx->pParserBsInfo"));
-    if (pCtx->pParserBsInfo == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-    memset (pCtx->pParserBsInfo, 0, sizeof (SParserBsInfo));
-    pCtx->pParserBsInfo->pDstBuff = static_cast<uint8_t*> (pMa->WelsMallocz (MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t),
-                                    "pCtx->pParserBsInfo->pDstBuff"));
-    if (pCtx->pParserBsInfo->pDstBuff == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-    memset (pCtx->pParserBsInfo->pDstBuff, 0, MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t));
-
-    if ((pCtx->sSavedData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
-                                  "pCtx->sSavedData.pHead"))) == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-    pCtx->sSavedData.pStartPos = pCtx->sSavedData.pCurPos = pCtx->sSavedData.pHead;
-    pCtx->sSavedData.pEnd = pCtx->sSavedData.pHead + pCtx->iMaxBsBufferSizeInByte;
-
-    pCtx->iMaxNalNum = MAX_NAL_UNITS_IN_LAYER + 2; //2 reserved for SPS+PPS
-    pCtx->pParserBsInfo->pNalLenInByte = static_cast<int*> (pMa->WelsMallocz (pCtx->iMaxNalNum * sizeof (int),
-                                         "pCtx->pParserBsInfo->pNalLenInByte"));
-    if (pCtx->pParserBsInfo->pNalLenInByte == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-  }
-  return ERR_NONE;
-}
-
-int32_t ExpandBsBuffer (PWelsDecoderContext pCtx, const int kiSrcLen) {
-  if (pCtx == NULL)
-    return ERR_INFO_INVALID_PTR;
-  int32_t iExpandStepShift = 1;
-  int32_t iNewBuffLen = WELS_MAX ((kiSrcLen * MAX_BUFFERED_NUM), (pCtx->iMaxBsBufferSizeInByte << iExpandStepShift));
-  //allocate new bs buffer
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  //Realloc sRawData
-  uint8_t* pNewBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sRawData.pHead"));
-  if (pNewBsBuff == NULL) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewBsBuff (%d)", iNewBuffLen);
-    pCtx->iErrorCode |= dsOutOfMemory;
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-
-  //Calculate and set the bs start and end position
-  for (uint32_t i = 0; i <= pCtx->pAccessUnitList->uiActualUnitsNum; i++) {
-    PBitStringAux pSliceBitsRead = &pCtx->pAccessUnitList->pNalUnitsList[i]->sNalData.sVclNal.sSliceBitsRead;
-    pSliceBitsRead->pStartBuf = pSliceBitsRead->pStartBuf - pCtx->sRawData.pHead + pNewBsBuff;
-    pSliceBitsRead->pEndBuf   = pSliceBitsRead->pEndBuf   - pCtx->sRawData.pHead + pNewBsBuff;
-    pSliceBitsRead->pCurBuf   = pSliceBitsRead->pCurBuf   - pCtx->sRawData.pHead + pNewBsBuff;
-  }
-
-  //Copy current buffer status to new buffer
-  memcpy (pNewBsBuff, pCtx->sRawData.pHead, pCtx->iMaxBsBufferSizeInByte);
-  pCtx->sRawData.pStartPos = pNewBsBuff + (pCtx->sRawData.pStartPos - pCtx->sRawData.pHead);
-  pCtx->sRawData.pCurPos   = pNewBsBuff + (pCtx->sRawData.pCurPos   - pCtx->sRawData.pHead);
-  pCtx->sRawData.pEnd      = pNewBsBuff + iNewBuffLen;
-  pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData.pHead");
-  pCtx->sRawData.pHead = pNewBsBuff;
-
-  if (pCtx->pParam->bParseOnly) {
-    //Realloc sSavedData
-    uint8_t* pNewSavedBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sSavedData.pHead"));
-    if (pNewSavedBsBuff == NULL) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewSavedBsBuff (%d)", iNewBuffLen);
-      pCtx->iErrorCode |= dsOutOfMemory;
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-
-    //Copy current buffer status to new buffer
-    memcpy (pNewSavedBsBuff, pCtx->sSavedData.pHead, pCtx->iMaxBsBufferSizeInByte);
-    pCtx->sSavedData.pStartPos = pNewSavedBsBuff + (pCtx->sSavedData.pStartPos - pCtx->sSavedData.pHead);
-    pCtx->sSavedData.pCurPos   = pNewSavedBsBuff + (pCtx->sSavedData.pCurPos   - pCtx->sSavedData.pHead);
-    pCtx->sSavedData.pEnd      = pNewSavedBsBuff + iNewBuffLen;
-    pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData.pHead");
-    pCtx->sSavedData.pHead = pNewSavedBsBuff;
-  }
-
-  pCtx->iMaxBsBufferSizeInByte = iNewBuffLen;
-  return ERR_NONE;
-}
-
-int32_t ExpandBsLenBuffer (PWelsDecoderContext pCtx, const int kiCurrLen) {
-  SParserBsInfo* pParser = pCtx->pParserBsInfo;
-  if (!pParser->pNalLenInByte)
-    return ERR_INFO_INVALID_ACCESS;
-
-  int iNewLen = kiCurrLen;
-  if (kiCurrLen >= MAX_MB_SIZE + 2) { //exceeds the max MB number of level 5.2
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Current nal num (%d) exceededs %d.", kiCurrLen, MAX_MB_SIZE);
-    pCtx->iErrorCode |= dsOutOfMemory;
-    return ERR_INFO_OUT_OF_MEMORY;
-  } else {
-    iNewLen = kiCurrLen << 1;
-    iNewLen = WELS_MIN (iNewLen, MAX_MB_SIZE + 2);
-  }
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-  int* pNewLenBuffer = static_cast<int*> (pMa->WelsMallocz (iNewLen * sizeof (int),
-                                          "pCtx->pParserBsInfo->pNalLenInByte"));
-  if (pNewLenBuffer == NULL) {
-    pCtx->iErrorCode |= dsOutOfMemory;
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-
-  //copy existing data from old length buffer to new
-  memcpy (pNewLenBuffer, pParser->pNalLenInByte, pCtx->iMaxNalNum * sizeof (int));
-  pMa->WelsFree (pParser->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
-  pParser->pNalLenInByte = pNewLenBuffer;
-  pCtx->iMaxNalNum = iNewLen;
-  return ERR_NONE;
-}
-
-int32_t CheckBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen) {
-  if (kiSrcLen > MAX_ACCESS_UNIT_CAPACITY) { //exceeds max allowed data
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Max AU size exceeded. Allowed size = %d, current size = %d",
-             MAX_ACCESS_UNIT_CAPACITY,
-             kiSrcLen);
-    pCtx->iErrorCode |= dsBitstreamError;
-    return ERR_INFO_INVALID_ACCESS;
-  } else if (kiSrcLen > pCtx->iMaxBsBufferSizeInByte /
-             MAX_BUFFERED_NUM) { //may lead to buffer overwrite, prevent it by expanding buffer
-    if (ExpandBsBuffer (pCtx, kiSrcLen)) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-  }
-
-  return ERR_NONE;
-}
-
-/*
- * WelsInitStaticMemory
- * Memory request for new introduced data
- * Especially for:
- * rbsp_au_buffer, cur_dq_layer_ptr and ref_dq_layer_ptr in MB info cache.
- * return:
- *  0 - success; otherwise returned error_no defined in error_no.h.
-*/
-int32_t WelsInitStaticMemory (PWelsDecoderContext pCtx) {
-  if (pCtx == NULL) {
-    return ERR_INFO_INVALID_PTR;
-  }
-
-  if (MemInitNalList (&pCtx->pAccessUnitList, MAX_NAL_UNIT_NUM_IN_AU, pCtx->pMemAlign) != 0)
-    return ERR_INFO_OUT_OF_MEMORY;
-
-  if (InitBsBuffer (pCtx) != 0)
-    return ERR_INFO_OUT_OF_MEMORY;
-
-  pCtx->uiTargetDqId            = (uint8_t) - 1;
-  pCtx->bEndOfStreamFlag        = false;
-
-  return ERR_NONE;
-}
-
-/*
- * WelsFreeStaticMemory
- * Free memory introduced in WelsInitStaticMemory at destruction of decoder.
- *
- */
-void WelsFreeStaticMemory (PWelsDecoderContext pCtx) {
-  if (pCtx == NULL)
-    return;
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  MemFreeNalList (&pCtx->pAccessUnitList, pMa);
-
-  if (pCtx->sRawData.pHead) {
-    pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData->pHead");
-  }
-  pCtx->sRawData.pHead                = NULL;
-  pCtx->sRawData.pEnd                 = NULL;
-  pCtx->sRawData.pStartPos            = NULL;
-  pCtx->sRawData.pCurPos              = NULL;
-  if (pCtx->pParam->bParseOnly) {
-    if (pCtx->sSavedData.pHead) {
-      pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData->pHead");
-    }
-    pCtx->sSavedData.pHead                = NULL;
-    pCtx->sSavedData.pEnd                 = NULL;
-    pCtx->sSavedData.pStartPos            = NULL;
-    pCtx->sSavedData.pCurPos              = NULL;
-    if (pCtx->pParserBsInfo) {
-      if (pCtx->pParserBsInfo->pNalLenInByte) {
-        pMa->WelsFree (pCtx->pParserBsInfo->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
-        pCtx->pParserBsInfo->pNalLenInByte = NULL;
-        pCtx->iMaxNalNum = 0;
-      }
-      if (pCtx->pParserBsInfo->pDstBuff) {
-        pMa->WelsFree (pCtx->pParserBsInfo->pDstBuff, "pCtx->pParserBsInfo->pDstBuff");
-        pCtx->pParserBsInfo->pDstBuff = NULL;
-      }
-      pMa->WelsFree (pCtx->pParserBsInfo, "pCtx->pParserBsInfo");
-      pCtx->pParserBsInfo = NULL;
-    }
-  }
-
-  if (NULL != pCtx->pParam) {
-    pMa->WelsFree (pCtx->pParam, "pCtx->pParam");
-
-    pCtx->pParam = NULL;
-  }
-}
-/*
- *  DecodeNalHeaderExt
- *  Trigger condition: NAL_UNIT_TYPE = NAL_UNIT_PREFIX or NAL_UNIT_CODED_SLICE_EXT
- *  Parameter:
- *  pNal:   target NALUnit ptr
- *  pSrc:   NAL Unit bitstream
- */
-void DecodeNalHeaderExt (PNalUnit pNal, uint8_t* pSrc) {
-  PNalUnitHeaderExt pHeaderExt = &pNal->sNalHeaderExt;
-
-  uint8_t uiCurByte = *pSrc;
-  pHeaderExt->bIdrFlag              = !! (uiCurByte & 0x40);
-  pHeaderExt->uiPriorityId          = uiCurByte & 0x3F;
-
-  uiCurByte = * (++pSrc);
-  pHeaderExt->iNoInterLayerPredFlag = uiCurByte >> 7;
-  pHeaderExt->uiDependencyId        = (uiCurByte & 0x70) >> 4;
-  pHeaderExt->uiQualityId           = uiCurByte & 0x0F;
-  uiCurByte = * (++pSrc);
-  pHeaderExt->uiTemporalId          = uiCurByte >> 5;
-  pHeaderExt->bUseRefBasePicFlag    = !! (uiCurByte & 0x10);
-  pHeaderExt->bDiscardableFlag      = !! (uiCurByte & 0x08);
-  pHeaderExt->bOutputFlag           = !! (uiCurByte & 0x04);
-  pHeaderExt->uiReservedThree2Bits  = uiCurByte & 0x03;
-  pHeaderExt->uiLayerDqId           = (pHeaderExt->uiDependencyId << 4) | pHeaderExt->uiQualityId;
-}
-
-
-void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatistics,
-    PSps pSps, PPps pPps) {
-  pDecoderStatistics->iCurrentActiveSpsId = pSps->iSpsId;
-
-  pDecoderStatistics->iCurrentActivePpsId = pPps->iPpsId;
-  pDecoderStatistics->uiProfile = static_cast<unsigned int> (pSps->uiProfileIdc);
-  pDecoderStatistics->uiLevel = pSps->uiLevelIdc;
-}
-
-#define SLICE_HEADER_IDR_PIC_ID_MAX 65535
-#define SLICE_HEADER_REDUNDANT_PIC_CNT_MAX 127
-#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN -12
-#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX 12
-#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN -12
-#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX 12
-#define MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1 15
-#define MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1 15
-#define SLICE_HEADER_CABAC_INIT_IDC_MAX 2
-/*
- *  decode_slice_header_avc
- *  Parse slice header of bitstream in avc for storing data structure
- */
-int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) {
-  PNalUnit const kpCurNal               = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
-                                                                                 1];
-
-  PNalUnitHeaderExt pNalHeaderExt       = NULL;
-  PSliceHeader pSliceHead               = NULL;
-  PSliceHeaderExt pSliceHeadExt         = NULL;
-  PSubsetSps pSubsetSps                 = NULL;
-  PSps pSps                             = NULL;
-  PPps pPps                             = NULL;
-  EWelsNalUnitType eNalType             = static_cast<EWelsNalUnitType> (0);
-  int32_t iPpsId                        = 0;
-  int32_t iRet                          = ERR_NONE;
-  uint8_t uiSliceType                   = 0;
-  uint8_t uiQualityId                   = BASE_QUALITY_ID;
-  bool  bIdrFlag                        = false;
-  bool  bSgChangeCycleInvolved          = false;        // involved slice group change cycle ?
-  uint32_t uiCode;
-  int32_t iCode;
-  SLogContext* pLogCtx = & (pCtx->sLogCtx);
-
-  if (kpCurNal == NULL) {
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-
-  pNalHeaderExt = &kpCurNal->sNalHeaderExt;
-  pSliceHead    = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
-  eNalType      = pNalHeaderExt->sNalUnitHeader.eNalUnitType;
-
-  pSliceHeadExt = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt;
-
-  if (pSliceHeadExt) {
-    SRefBasePicMarking sBaseMarking;
-    const bool kbStoreRefBaseFlag = pSliceHeadExt->bStoreRefBasePicFlag;
-    memcpy (&sBaseMarking, &pSliceHeadExt->sRefBasePicMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
-    memset (pSliceHeadExt, 0, sizeof (SSliceHeaderExt));
-    pSliceHeadExt->bStoreRefBasePicFlag = kbStoreRefBaseFlag;
-    memcpy (&pSliceHeadExt->sRefBasePicMarking, &sBaseMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
-  }
-
-  kpCurNal->sNalData.sVclNal.bSliceHeaderExtFlag = kbExtensionFlag;
-
-  // first_mb_in_slice
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //first_mb_in_slice
-  WELS_CHECK_SE_UPPER_ERROR (uiCode, 36863u, "first_mb_in_slice", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                             ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
-  pSliceHead->iFirstMbInSlice = uiCode;
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //slice_type
-  uiSliceType = uiCode;
-  if (uiSliceType > 9) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "slice type too large (%d) at first_mb(%d)", uiSliceType,
-             pSliceHead->iFirstMbInSlice);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-  }
-  if (uiSliceType > 4)
-    uiSliceType -= 5;
-
-  if ((NAL_UNIT_CODED_SLICE_IDR == eNalType) && (I_SLICE != uiSliceType)) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d) in IDR picture. ", uiSliceType);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-  }
-
-  if (kbExtensionFlag) {
-    if (uiSliceType > 2) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d).", uiSliceType);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-    }
-  }
-
-  pSliceHead->eSliceType = static_cast <EWelsSliceType> (uiSliceType);
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_parameter_set_id
-  WELS_CHECK_SE_UPPER_ERROR (uiCode, (MAX_PPS_COUNT - 1), "iPpsId out of range",
-                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                 ERR_INFO_PPS_ID_OVERFLOW));
-  iPpsId = uiCode;
-
-  //add check PPS available here
-  if (pCtx->bPpsAvailFlags[iPpsId] == false) {
-    pCtx->sDecoderStatistics.iPpsReportErrorNum++;
-    if (pCtx->iPPSLastInvalidId != iPpsId) {
-      WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId,
-               pCtx->iPPSLastInvalidId, pCtx->iPPSInvalidNum);
-      pCtx->iPPSLastInvalidId = iPpsId;
-      pCtx->iPPSInvalidNum = 0;
-    } else {
-      pCtx->iPPSInvalidNum++;
-    }
-    pCtx->iErrorCode |= dsNoParamSets;
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID);
-  }
-  pCtx->iPPSLastInvalidId = -1;
-
-  pPps    = &pCtx->sPpsBuffer[iPpsId];
-
-  if (pPps->uiNumSliceGroups == 0) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced");
-    pCtx->iErrorCode |= dsNoParamSets;
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
-  }
-
-  if (kbExtensionFlag) {
-    pSubsetSps      = &pCtx->sSubsetSpsBuffer[pPps->iSpsId];
-    pSps            = &pSubsetSps->sSps;
-    if (pCtx->bSubspsAvailFlags[pPps->iSpsId] == false) {
-      pCtx->sDecoderStatistics.iSubSpsReportErrorNum++;
-      if (pCtx->iSubSPSLastInvalidId != pPps->iSpsId) {
-        WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
-                 pCtx->iSubSPSLastInvalidId, pCtx->iSubSPSInvalidNum);
-        pCtx->iSubSPSLastInvalidId = pPps->iSpsId;
-        pCtx->iSubSPSInvalidNum = 0;
-      } else {
-        pCtx->iSubSPSInvalidNum++;
-      }
-      pCtx->iErrorCode |= dsNoParamSets;
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
-    }
-    pCtx->iSubSPSLastInvalidId = -1;
-  } else {
-    if (pCtx->bSpsAvailFlags[pPps->iSpsId] == false) {
-      pCtx->sDecoderStatistics.iSpsReportErrorNum++;
-      if (pCtx->iSPSLastInvalidId != pPps->iSpsId) {
-        WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
-                 pCtx->iSPSLastInvalidId, pCtx->iSPSInvalidNum);
-        pCtx->iSPSLastInvalidId = pPps->iSpsId;
-        pCtx->iSPSInvalidNum = 0;
-      } else {
-        pCtx->iSPSInvalidNum++;
-      }
-      pCtx->iErrorCode |= dsNoParamSets;
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
-    }
-    pCtx->iSPSLastInvalidId = -1;
-    pSps = &pCtx->sSpsBuffer[pPps->iSpsId];
-  }
-  pSliceHead->iPpsId = iPpsId;
-  pSliceHead->iSpsId = pPps->iSpsId;
-  pSliceHead->pPps   = pPps;
-  pSliceHead->pSps   = pSps;
-
-  pSliceHeadExt->pSubsetSps = pSubsetSps;
-
-  if (pSps->iNumRefFrames == 0) {
-    if ((uiSliceType != I_SLICE) && (uiSliceType != SI_SLICE)) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "slice_type (%d) not supported for num_ref_frames = 0.", uiSliceType);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-    }
-  }
-
-  bIdrFlag = (!kbExtensionFlag && eNalType == NAL_UNIT_CODED_SLICE_IDR) || (kbExtensionFlag && pNalHeaderExt->bIdrFlag);
-  pSliceHead->bIdrFlag = bIdrFlag;
-
-  if (pSps->uiLog2MaxFrameNum == 0) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "non existing SPS referenced");
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
-  }
-  // check first_mb_in_slice
-  WELS_CHECK_SE_UPPER_ERROR ((uint32_t) (pSliceHead->iFirstMbInSlice), (pSps->uiTotalMbCount - 1), "first_mb_in_slice",
-                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
-  WELS_READ_VERIFY (BsGetBits (pBs, pSps->uiLog2MaxFrameNum, &uiCode)); //frame_num
-  pSliceHead->iFrameNum = uiCode;
-
-  pSliceHead->bFieldPicFlag    = false;
-  pSliceHead->bBottomFiledFlag = false;
-  if (!pSps->bFrameMbsOnlyFlag) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseSliceHeaderSyntaxs(): frame_mbs_only_flag = %d not supported. ",
-             pSps->bFrameMbsOnlyFlag);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MBAFF);
-  }
-  pSliceHead->iMbWidth  = pSps->iMbWidth;
-  pSliceHead->iMbHeight = pSps->iMbHeight / (1 + pSliceHead->bFieldPicFlag);
-
-  if (bIdrFlag) {
-    if (pSliceHead->iFrameNum != 0) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING,
-               "ParseSliceHeaderSyntaxs(), invaild frame number: %d due to IDR frame introduced!",
-               pSliceHead->iFrameNum);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FRAME_NUM);
-    }
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //idr_pic_id
-    // standard 7.4.3 idr_pic_id should be in range 0 to 65535, inclusive.
-    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_IDR_PIC_ID_MAX, "idr_pic_id", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                               ERR_INFO_INVALID_IDR_PIC_ID));
-    pSliceHead->uiIdrPicId = uiCode; /* uiIdrPicId */
-#ifdef LONG_TERM_REF
-    pCtx->uiCurIdrPicId = pSliceHead->uiIdrPicId;
-#endif
-  }
-
-  pSliceHead->iDeltaPicOrderCntBottom = 0;
-  pSliceHead->iDeltaPicOrderCnt[0] =
-    pSliceHead->iDeltaPicOrderCnt[1] = 0;
-  if (pSps->uiPocType == 0) {
-    WELS_READ_VERIFY (BsGetBits (pBs, pSps->iLog2MaxPocLsb, &uiCode)); //pic_order_cnt_lsb
-    const int32_t iMaxPocLsb = 1 << (pSps->iLog2MaxPocLsb);
-    pSliceHead->iPicOrderCntLsb = uiCode;
-    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt_bottom
-      pSliceHead->iDeltaPicOrderCntBottom = iCode;
-    }
-    //Calculate poc if necessary
-    int32_t pocLsb = pSliceHead->iPicOrderCntLsb;
-    if (pSliceHead->bIdrFlag || kpCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) {
-      pCtx->iPrevPicOrderCntMsb = 0;
-      pCtx->iPrevPicOrderCntLsb = 0;
-    }
-    int32_t pocMsb;
-    if (pocLsb < pCtx->iPrevPicOrderCntLsb && pCtx->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2)
-      pocMsb = pCtx->iPrevPicOrderCntMsb + iMaxPocLsb;
-    else if (pocLsb > pCtx->iPrevPicOrderCntLsb && pocLsb - pCtx->iPrevPicOrderCntLsb > iMaxPocLsb / 2)
-      pocMsb = pCtx->iPrevPicOrderCntMsb - iMaxPocLsb;
-    else
-      pocMsb = pCtx->iPrevPicOrderCntMsb;
-    pSliceHead->iPicOrderCntLsb = pocMsb + pocLsb;
-
-    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
-      pSliceHead->iPicOrderCntLsb += pSliceHead->iDeltaPicOrderCntBottom;
-    }
-
-    if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
-      pCtx->iPrevPicOrderCntLsb = pocLsb;
-      pCtx->iPrevPicOrderCntMsb = pocMsb;
-    }
-    //End of Calculating poc
-  } else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) {
-    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 0 ]
-    pSliceHead->iDeltaPicOrderCnt[0] = iCode;
-    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 1 ]
-      pSliceHead->iDeltaPicOrderCnt[1] = iCode;
-    }
-  }
-  pSliceHead->iRedundantPicCnt = 0;
-  if (pPps->bRedundantPicCntPresentFlag) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //redundant_pic_cnt
-    // standard section 7.4.3, redundant_pic_cnt should be in range 0 to 127, inclusive.
-    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_REDUNDANT_PIC_CNT_MAX, "redundant_pic_cnt",
-                               GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT));
-    pSliceHead->iRedundantPicCnt = uiCode;
-    if (pSliceHead->iRedundantPicCnt > 0) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "Redundant picture not supported!");
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT);
-    }
-  }
-
-  if (B_SLICE == uiSliceType) {
-    //fix me: it needs to use the this flag somewhere for B-Sclice
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //direct_spatial_mv_pred_flag
-    pSliceHead->iDirectSpatialMvPredFlag = uiCode;
-  }
-
-  //set defaults, might be overriden a few line later
-  pSliceHead->uiRefCount[0] = pPps->uiNumRefIdxL0Active;
-  pSliceHead->uiRefCount[1] = pPps->uiNumRefIdxL1Active;
-
-  bool bReadNumRefFlag = (P_SLICE == uiSliceType || B_SLICE == uiSliceType);
-  if (kbExtensionFlag) {
-    bReadNumRefFlag &= (BASE_QUALITY_ID == pNalHeaderExt->uiQualityId);
-  }
-  if (bReadNumRefFlag) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //num_ref_idx_active_override_flag
-    pSliceHead->bNumRefIdxActiveOverrideFlag = !!uiCode;
-    if (pSliceHead->bNumRefIdxActiveOverrideFlag) {
-      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l0_active_minus1
-      WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1, "num_ref_idx_l0_active_minus1",
-                                 GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1));
-      pSliceHead->uiRefCount[0] = 1 + uiCode;
-      if (B_SLICE == uiSliceType) {
-        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l1_active_minus1
-        WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1, "num_ref_idx_l1_active_minus1",
-                                   GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L1_ACTIVE_MINUS1));
-        pSliceHead->uiRefCount[1] = 1 + uiCode;
-      }
-    }
-  }
-
-  if (pSliceHead->uiRefCount[0] > MAX_REF_PIC_COUNT || pSliceHead->uiRefCount[1] > MAX_REF_PIC_COUNT) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "reference overflow");
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REF_COUNT_OVERFLOW);
-  }
-
-  if (BASE_QUALITY_ID == uiQualityId) {
-    iRet = ParseRefPicListReordering (pBs, pSliceHead);
-    if (iRet != ERR_NONE) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid ref pPic list reordering syntaxs!");
-      return iRet;
-    }
-
-    if ((pPps->bWeightedPredFlag && uiSliceType == P_SLICE) || (pPps->uiWeightedBipredIdc == 1 && uiSliceType == B_SLICE)) {
-      iRet = ParsePredWeightedTable (pBs, pSliceHead);
-      if (iRet != ERR_NONE) {
-        WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid weighted prediction syntaxs!");
-        return iRet;
-      }
-    }
-
-    if (kbExtensionFlag) {
-      if (pNalHeaderExt->iNoInterLayerPredFlag || pNalHeaderExt->uiQualityId > 0)
-        pSliceHeadExt->bBasePredWeightTableFlag = false;
-      else
-        pSliceHeadExt->bBasePredWeightTableFlag = true;
-    }
-
-    if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
-      iRet = ParseDecRefPicMarking (pCtx, pBs, pSliceHead, pSps, bIdrFlag);
-      if (iRet != ERR_NONE) {
-        return iRet;
-      }
-
-      if (kbExtensionFlag && !pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //store_ref_base_pic_flag
-        pSliceHeadExt->bStoreRefBasePicFlag = !!uiCode;
-        if ((pNalHeaderExt->bUseRefBasePicFlag || pSliceHeadExt->bStoreRefBasePicFlag) && !bIdrFlag) {
-          WelsLog (pLogCtx, WELS_LOG_WARNING,
-                   "ParseSliceHeaderSyntaxs(): bUseRefBasePicFlag or bStoreRefBasePicFlag = 1 not supported.");
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
-        }
-      }
-    }
-  }
-
-  if (pPps->bEntropyCodingModeFlag) {
-    if (pSliceHead->eSliceType != I_SLICE && pSliceHead->eSliceType != SI_SLICE) {
-      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
-      WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_CABAC_INIT_IDC_MAX, "cabac_init_idc", ERR_INFO_INVALID_CABAC_INIT_IDC);
-      pSliceHead->iCabacInitIdc = uiCode;
-    } else
-      pSliceHead->iCabacInitIdc = 0;
-  }
-
-  WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_qp_delta
-  pSliceHead->iSliceQpDelta     = iCode;
-  pSliceHead->iSliceQp          = pPps->iPicInitQp + pSliceHead->iSliceQpDelta;
-  if (pSliceHead->iSliceQp < 0 || pSliceHead->iSliceQp > 51) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "QP %d out of range", pSliceHead->iSliceQp);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_QP);
-  }
-
-  //FIXME qscale / qp ... stuff
-  if (!kbExtensionFlag) {
-    if (uiSliceType == SP_SLICE || uiSliceType == SI_SLICE) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "SP/SI not supported");
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SPSI);
-    }
-  }
-
-  pSliceHead->uiDisableDeblockingFilterIdc = 0;
-  pSliceHead->iSliceAlphaC0Offset          = 0;
-  pSliceHead->iSliceBetaOffset             = 0;
-  if (pPps->bDeblockingFilterControlPresentFlag) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_deblocking_filter_idc
-    pSliceHead->uiDisableDeblockingFilterIdc = uiCode;
-    //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
-    if (pSliceHead->uiDisableDeblockingFilterIdc > 6) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "disable_deblock_filter_idc (%d) out of range [0, 6]",
-               pSliceHead->uiDisableDeblockingFilterIdc);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
-    }
-    if (pSliceHead->uiDisableDeblockingFilterIdc != 1) {
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_alpha_c0_offset_div2
-      pSliceHead->iSliceAlphaC0Offset = iCode * 2;
-      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceAlphaC0Offset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
-                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                    ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_beta_offset_div2
-      pSliceHead->iSliceBetaOffset = iCode * 2;
-      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceBetaOffset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
-                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_beta_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                    ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
-    }
-  }
-
-  bSgChangeCycleInvolved = (pPps->uiNumSliceGroups > 1 && pPps->uiSliceGroupMapType >= 3
-                            && pPps->uiSliceGroupMapType <= 5);
-  if (kbExtensionFlag && bSgChangeCycleInvolved)
-    bSgChangeCycleInvolved = (bSgChangeCycleInvolved && (uiQualityId == BASE_QUALITY_ID));
-  if (bSgChangeCycleInvolved) {
-    if (pPps->uiSliceGroupChangeRate > 0) {
-      const int32_t kiNumBits = (int32_t)WELS_CEIL (log (static_cast<double> (1 + pPps->uiPicSizeInMapUnits /
-                                pPps->uiSliceGroupChangeRate)));
-      WELS_READ_VERIFY (BsGetBits (pBs, kiNumBits, &uiCode)); //lice_group_change_cycle
-      pSliceHead->iSliceGroupChangeCycle = uiCode;
-    } else
-      pSliceHead->iSliceGroupChangeCycle = 0;
-  }
-
-  if (!kbExtensionFlag) {
-    FillDefaultSliceHeaderExt (pSliceHeadExt, pNalHeaderExt);
-  } else {
-    /* Extra syntax elements newly introduced */
-    pSliceHeadExt->pSubsetSps = pSubsetSps;
-
-    if (!pNalHeaderExt->iNoInterLayerPredFlag && BASE_QUALITY_ID == uiQualityId) {
-      //the following should be deleted for CODE_CLEAN
-      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //ref_layer_dq_id
-      pSliceHeadExt->uiRefLayerDqId = uiCode;
-      if (pSubsetSps->sSpsSvcExt.bInterLayerDeblockingFilterCtrlPresentFlag) {
-        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_inter_layer_deblocking_filter_idc
-        pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc = uiCode;
-        //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
-        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc > 6) {
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "disable_inter_layer_deblock_filter_idc (%d) out of range [0, 6]",
-                   pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc);
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
-        }
-        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc != 1) {
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_alpha_c0_offset_div2
-          pSliceHeadExt->iInterLayerSliceAlphaC0Offset = iCode * 2;
-          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceAlphaC0Offset,
-                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX,
-                                    "inter_layer_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                        ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_beta_offset_div2
-          pSliceHeadExt->iInterLayerSliceBetaOffset = iCode * 2;
-          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceBetaOffset, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN,
-                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX, "inter_layer_slice_beta_offset_div2 * 2",
-                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
-        }
-      }
-
-      pSliceHeadExt->uiRefLayerChromaPhaseXPlus1Flag = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseXPlus1Flag;
-      pSliceHeadExt->uiRefLayerChromaPhaseYPlus1     = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseYPlus1;
-
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constrained_intra_resampling_flag
-      pSliceHeadExt->bConstrainedIntraResamplingFlag = !!uiCode;
-
-      {
-        SPosOffset pos;
-        pos.iLeftOffset   = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iLeftOffset;
-        pos.iTopOffset    = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iTopOffset * (2 - pSps->bFrameMbsOnlyFlag);
-        pos.iRightOffset  = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iRightOffset;
-        pos.iBottomOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iBottomOffset * (2 - pSps->bFrameMbsOnlyFlag);
-        //memcpy(&pSliceHeadExt->sScaledRefLayer, &pos, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
-        pSliceHeadExt->iScaledRefLayerPicWidthInSampleLuma  = (pSliceHead->iMbWidth << 4) -
-            (pos.iLeftOffset + pos.iRightOffset);
-        pSliceHeadExt->iScaledRefLayerPicHeightInSampleLuma = (pSliceHead->iMbHeight << 4) -
-            (pos.iTopOffset + pos.iBottomOffset) / (1 + pSliceHead->bFieldPicFlag);
-      }
-    } else if (uiQualityId > BASE_QUALITY_ID) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "MGS not supported.");
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
-    } else {
-      pSliceHeadExt->uiRefLayerDqId = (uint8_t) - 1;
-    }
-
-    pSliceHeadExt->bSliceSkipFlag            = false;
-    pSliceHeadExt->bAdaptiveBaseModeFlag     = false;
-    pSliceHeadExt->bDefaultBaseModeFlag      = false;
-    pSliceHeadExt->bAdaptiveMotionPredFlag   = false;
-    pSliceHeadExt->bDefaultMotionPredFlag    = false;
-    pSliceHeadExt->bAdaptiveResidualPredFlag = false;
-    pSliceHeadExt->bDefaultResidualPredFlag  = false;
-    if (pNalHeaderExt->iNoInterLayerPredFlag)
-      pSliceHeadExt->bTCoeffLevelPredFlag    = false;
-    else
-      pSliceHeadExt->bTCoeffLevelPredFlag    = pSubsetSps->sSpsSvcExt.bSeqTCoeffLevelPredFlag;
-
-    if (!pNalHeaderExt->iNoInterLayerPredFlag) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //slice_skip_flag
-      pSliceHeadExt->bSliceSkipFlag = !!uiCode;
-      if (pSliceHeadExt->bSliceSkipFlag) {
-        WelsLog (pLogCtx, WELS_LOG_WARNING, "bSliceSkipFlag == 1 not supported.");
-        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SLICESKIP);
-      } else {
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_base_mode_flag
-        pSliceHeadExt->bAdaptiveBaseModeFlag = !!uiCode;
-        if (!pSliceHeadExt->bAdaptiveBaseModeFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_base_mode_flag
-          pSliceHeadExt->bDefaultBaseModeFlag = !!uiCode;
-        }
-        if (!pSliceHeadExt->bDefaultBaseModeFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_motion_prediction_flag
-          pSliceHeadExt->bAdaptiveMotionPredFlag = !!uiCode;
-          if (!pSliceHeadExt->bAdaptiveMotionPredFlag) {
-            WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_motion_prediction_flag
-            pSliceHeadExt->bDefaultMotionPredFlag = !!uiCode;
-          }
-        }
-
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_residual_prediction_flag
-        pSliceHeadExt->bAdaptiveResidualPredFlag = !!uiCode;
-        if (!pSliceHeadExt->bAdaptiveResidualPredFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_residual_prediction_flag
-          pSliceHeadExt->bDefaultResidualPredFlag = !!uiCode;
-        }
-      }
-      if (pSubsetSps->sSpsSvcExt.bAdaptiveTCoeffLevelPredFlag) {
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //tcoeff_level_prediction_flag
-        pSliceHeadExt->bTCoeffLevelPredFlag = !!uiCode;
-      }
-    }
-
-    if (!pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
-      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_start
-      pSliceHeadExt->uiScanIdxStart = uiCode;
-      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_end
-      pSliceHeadExt->uiScanIdxEnd = uiCode;
-      if (pSliceHeadExt->uiScanIdxStart != 0 || pSliceHeadExt->uiScanIdxEnd != 15) {
-        WelsLog (pLogCtx, WELS_LOG_WARNING, "uiScanIdxStart (%d) != 0 and uiScanIdxEnd (%d) !=15 not supported here",
-                 pSliceHeadExt->uiScanIdxStart, pSliceHeadExt->uiScanIdxEnd);
-        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
-      }
-    } else {
-      pSliceHeadExt->uiScanIdxStart = 0;
-      pSliceHeadExt->uiScanIdxEnd   = 15;
-    }
-  }
-
-  return ERR_NONE;
-}
-
-/*
- *  Copy relative syntax elements of NALUnitHeaderExt, sRefPicBaseMarking and bStoreRefBasePicFlag in prefix nal unit.
- *  pSrc:   mark as decoded prefix NAL
- *  ppDst:  succeeded VCL NAL based AVC (I/P Slice)
- */
-bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst, PNalUnit const kpSrc) {
-  PNalUnitHeaderExt pNalHdrExtD = NULL, pNalHdrExtS = NULL;
-  PSliceHeaderExt pShExtD = NULL;
-  PPrefixNalUnit pPrefixS = NULL;
-  PSps pSps = NULL;
-  int32_t iIdx = 0;
-
-  if (kppDst == NULL || kpSrc == NULL)
-    return false;
-
-  pNalHdrExtD   = &kppDst->sNalHeaderExt;
-  pNalHdrExtS   = &kpSrc->sNalHeaderExt;
-  pShExtD       = &kppDst->sNalData.sVclNal.sSliceHeaderExt;
-  pPrefixS      = &kpSrc->sNalData.sPrefixNal;
-  pSps          = &pCtx->sSpsBuffer[pCtx->sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
-
-  pNalHdrExtD->uiDependencyId           = pNalHdrExtS->uiDependencyId;
-  pNalHdrExtD->uiQualityId              = pNalHdrExtS->uiQualityId;
-  pNalHdrExtD->uiTemporalId             = pNalHdrExtS->uiTemporalId;
-  pNalHdrExtD->uiPriorityId             = pNalHdrExtS->uiPriorityId;
-  pNalHdrExtD->bIdrFlag                 = pNalHdrExtS->bIdrFlag;
-  pNalHdrExtD->iNoInterLayerPredFlag    = pNalHdrExtS->iNoInterLayerPredFlag;
-  pNalHdrExtD->bDiscardableFlag         = pNalHdrExtS->bDiscardableFlag;
-  pNalHdrExtD->bOutputFlag              = pNalHdrExtS->bOutputFlag;
-  pNalHdrExtD->bUseRefBasePicFlag       = pNalHdrExtS->bUseRefBasePicFlag;
-  pNalHdrExtD->uiLayerDqId              = pNalHdrExtS->uiLayerDqId;
-
-  pShExtD->bStoreRefBasePicFlag         = pPrefixS->bStoreRefBasePicFlag;
-  memcpy (&pShExtD->sRefBasePicMarking, &pPrefixS->sRefPicBaseMarking,
-          sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
-  if (pShExtD->sRefBasePicMarking.bAdaptiveRefBasePicMarkingModeFlag) {
-    PRefBasePicMarking pRefBasePicMarking = &pShExtD->sRefBasePicMarking;
-    iIdx = 0;
-    do {
-      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_END)
-        break;
-      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_SHORT2UNUSED)
-        pRefBasePicMarking->mmco_base[iIdx].iShortFrameNum = (pShExtD->sSliceHeader.iFrameNum -
-            pRefBasePicMarking->mmco_base[iIdx].uiDiffOfPicNums) & ((1 << pSps->uiLog2MaxFrameNum) - 1);
-      ++ iIdx;
-    } while (iIdx < MAX_MMCO_COUNT);
-  }
-
-  return true;
-}
-
-
-
-int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu   = pCtx->pAccessUnitList;
-  int32_t iIdx         = pCurAu->uiEndPos;
-
-  // Conversed iterator
-  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iIdx]->sNalHeaderExt.uiLayerDqId;
-  pCurAu->uiActualUnitsNum  = iIdx + 1;
-  pCurAu->bCompletedAuFlag = true;
-
-  // Added for mosaic avoidance, 11/19/2009
-#ifdef LONG_TERM_REF
-  if (pCtx->bParamSetsLostFlag || pCtx->bNewSeqBegin)
-#else
-  if (pCtx->bReferenceLostAtT0Flag || pCtx->bNewSeqBegin)
-#endif
-  {
-    uint32_t uiActualIdx = 0;
-    while (uiActualIdx < pCurAu->uiActualUnitsNum) {
-      PNalUnit nal = pCurAu->pNalUnitsList[uiActualIdx];
-
-      if (nal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR || nal->sNalHeaderExt.bIdrFlag) {
-        break;
-      }
-      ++ uiActualIdx;
-    }
-    if (uiActualIdx ==
-        pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009
-
-      pCtx->sDecoderStatistics.uiIDRLostNum++;
-      if (!pCtx->bParamSetsLostFlag)
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                 "UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU.");
-      pCtx->iErrorCode |= dsRefLost;
-      if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-#ifdef LONG_TERM_REF
-        pCtx->iErrorCode |= dsNoParamSets;
-        return dsNoParamSets;
-#else
-        pCtx->iErrorCode |= dsRefLost;
-        return ERR_INFO_REFERENCE_PIC_LOST;
-#endif
-      }
-    }
-  }
-
-  return ERR_NONE;
-}
-
-int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) {
-  int32_t i = 0;
-
-  WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0))
-  pCtx->sMb.iMbWidth  = (kiMaxWidth + 15) >> 4;
-  pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4;
-
-  if (pCtx->bInitialDqLayersMem && kiMaxWidth <= pCtx->iPicWidthReq
-      && kiMaxHeight <= pCtx->iPicHeightReq) // have same dimension memory, skipped
-    return ERR_NONE;
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  UninitialDqLayersContext (pCtx);
-
-  do {
-    PDqLayer pDq = (PDqLayer)pMa->WelsMallocz (sizeof (SDqLayer), "PDqLayer");
-
-    if (pDq == NULL)
-      return ERR_INFO_OUT_OF_MEMORY;
-
-    pCtx->pDqLayersList[i] = pDq; //to keep consistence with in UninitialDqLayersContext()
-    memset (pDq, 0, sizeof (SDqLayer));
-
-    pCtx->sMb.pMbType[i] = (uint32_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t),
-                           "pCtx->sMb.pMbType[]");
-    pCtx->sMb.pMv[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                 int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]");
-    pCtx->sMb.pMv[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                 int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]");
-
-    pCtx->sMb.pRefIndex[i][LIST_0] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
-                                     pCtx->sMb.iMbHeight *
-                                     sizeof (
-                                       int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
-    pCtx->sMb.pRefIndex[i][LIST_1] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
-                                     pCtx->sMb.iMbHeight *
-                                     sizeof (
-                                       int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
-    pCtx->sMb.pDirect[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
-                           sizeof (
-                             int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pDirect[]");
-    pCtx->sMb.pLumaQp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                           "pCtx->sMb.pLumaQp[]");
-    pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
-        sizeof (
-          bool),
-        "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
-    pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
-                                         "pCtx->sMb.pTransformSize8x8Flag[]");
-    pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                               int8_t) * 2,
-                             "pCtx->sMb.pChromaQp[]");
-    pCtx->sMb.pMvd[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                  int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
-    pCtx->sMb.pMvd[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                  int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
-    pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
-                          "pCtx->sMb.pCbfDc[]");
-    pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
-                        "pCtx->sMb.pNzc[]");
-    pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
-                          "pCtx->sMb.pNzcRs[]");
-    pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
-                                 pCtx->sMb.iMbHeight *
-                                 sizeof (int16_t) * MB_COEFF_LIST_SIZE, "pCtx->sMb.pScaledTCoeff[]");
-    pCtx->sMb.pIntraPredMode[i] = (int8_t (*)[8])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                    int8_t) * 8,
-                                  "pCtx->sMb.pIntraPredMode[]");
-    pCtx->sMb.pIntra4x4FinalMode[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
-                                      pCtx->sMb.iMbHeight *
-                                      sizeof (int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pIntra4x4FinalMode[]");
-    pCtx->sMb.pIntraNxNAvailFlag[i] = (uint8_t (*))pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                        int8_t),
-                                      "pCtx->sMb.pIntraNxNAvailFlag");
-    pCtx->sMb.pChromaPredMode[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                                   "pCtx->sMb.pChromaPredMode[]");
-    pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                        "pCtx->sMb.pCbp[]");
-    pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
-                              sizeof (
-                                uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]");
-    pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t),
-                             "pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010
-    pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                                     "pCtx->sMb.pResidualPredFlag[]");
-    pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-        int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
-
-    pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-        bool),
-                                           "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
-    pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
-                                       "pCtx->pMbRefConcealedFlag[]");
-
-    // check memory block valid due above allocated..
-    WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY,
-                           ((NULL == pCtx->sMb.pMbType[i]) ||
-                            (NULL == pCtx->sMb.pMv[i][LIST_0]) ||
-                            (NULL == pCtx->sMb.pMv[i][LIST_1]) ||
-                            (NULL == pCtx->sMb.pRefIndex[i][LIST_0]) ||
-                            (NULL == pCtx->sMb.pRefIndex[i][LIST_1]) ||
-                            (NULL == pCtx->sMb.pDirect[i]) ||
-                            (NULL == pCtx->sMb.pLumaQp[i]) ||
-                            (NULL == pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) ||
-                            (NULL == pCtx->sMb.pTransformSize8x8Flag[i]) ||
-                            (NULL == pCtx->sMb.pChromaQp[i]) ||
-                            (NULL == pCtx->sMb.pMvd[i][LIST_0]) ||
-                            (NULL == pCtx->sMb.pMvd[i][LIST_1]) ||
-                            (NULL == pCtx->sMb.pCbfDc[i]) ||
-                            (NULL == pCtx->sMb.pNzc[i]) ||
-                            (NULL == pCtx->sMb.pNzcRs[i]) ||
-                            (NULL == pCtx->sMb.pScaledTCoeff[i]) ||
-                            (NULL == pCtx->sMb.pIntraPredMode[i]) ||
-                            (NULL == pCtx->sMb.pIntra4x4FinalMode[i]) ||
-                            (NULL == pCtx->sMb.pIntraNxNAvailFlag[i]) ||
-                            (NULL == pCtx->sMb.pChromaPredMode[i]) ||
-                            (NULL == pCtx->sMb.pCbp[i]) ||
-                            (NULL == pCtx->sMb.pSubMbType[i]) ||
-                            (NULL == pCtx->sMb.pSliceIdc[i]) ||
-                            (NULL == pCtx->sMb.pResidualPredFlag[i]) ||
-                            (NULL == pCtx->sMb.pInterPredictionDoneFlag[i]) ||
-                            (NULL == pCtx->sMb.pMbRefConcealedFlag[i]) ||
-                            (NULL == pCtx->sMb.pMbCorrectlyDecodedFlag[i])
-                           )
-                          )
-
-    memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
-
-    ++ i;
-  } while (i < LAYER_NUM_EXCHANGEABLE);
-
-  pCtx->bInitialDqLayersMem     = true;
-  pCtx->iPicWidthReq            = kiMaxWidth;
-  pCtx->iPicHeightReq           = kiMaxHeight;
-
-  return ERR_NONE;
-}
-
-void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
-  int32_t i = 0;
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  do {
-    PDqLayer pDq = pCtx->pDqLayersList[i];
-    if (pDq == NULL) {
-      ++ i;
-      continue;
-    }
-
-    if (pCtx->sMb.pMbType[i]) {
-      pMa->WelsFree (pCtx->sMb.pMbType[i], "pCtx->sMb.pMbType[]");
-
-      pCtx->sMb.pMbType[i] = NULL;
-    }
-
-    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
-      if (pCtx->sMb.pMv[i][listIdx]) {
-        pMa->WelsFree (pCtx->sMb.pMv[i][listIdx], "pCtx->sMb.pMv[][]");
-        pCtx->sMb.pMv[i][listIdx] = NULL;
-      }
-
-      if (pCtx->sMb.pRefIndex[i][listIdx]) {
-        pMa->WelsFree (pCtx->sMb.pRefIndex[i][listIdx], "pCtx->sMb.pRefIndex[][]");
-        pCtx->sMb.pRefIndex[i][listIdx] = NULL;
-      }
-
-      if (pCtx->sMb.pDirect[i]) {
-        pMa->WelsFree (pCtx->sMb.pDirect[i], "pCtx->sMb.pDirect[]");
-        pCtx->sMb.pDirect[i] = NULL;
-      }
-
-      if (pCtx->sMb.pMvd[i][listIdx]) {
-        pMa->WelsFree (pCtx->sMb.pMvd[i][listIdx], "pCtx->sMb.pMvd[][]");
-        pCtx->sMb.pMvd[i][listIdx] = NULL;
-      }
-    }
-
-    if (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) {
-      pMa->WelsFree (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i], "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
-
-      pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pTransformSize8x8Flag[i]) {
-      pMa->WelsFree (pCtx->sMb.pTransformSize8x8Flag[i], "pCtx->sMb.pTransformSize8x8Flag[]");
-
-      pCtx->sMb.pTransformSize8x8Flag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pLumaQp[i]) {
-      pMa->WelsFree (pCtx->sMb.pLumaQp[i], "pCtx->sMb.pLumaQp[]");
-
-      pCtx->sMb.pLumaQp[i] = NULL;
-    }
-
-    if (pCtx->sMb.pChromaQp[i]) {
-      pMa->WelsFree (pCtx->sMb.pChromaQp[i], "pCtx->sMb.pChromaQp[]");
-
-      pCtx->sMb.pChromaQp[i] = NULL;
-    }
-
-    if (pCtx->sMb.pCbfDc[i]) {
-      pMa->WelsFree (pCtx->sMb.pCbfDc[i], "pCtx->sMb.pCbfDc[]");
-      pCtx->sMb.pCbfDc[i] = NULL;
-    }
-
-    if (pCtx->sMb.pNzc[i]) {
-      pMa->WelsFree (pCtx->sMb.pNzc[i], "pCtx->sMb.pNzc[]");
-
-      pCtx->sMb.pNzc[i] = NULL;
-    }
-
-    if (pCtx->sMb.pNzcRs[i]) {
-      pMa->WelsFree (pCtx->sMb.pNzcRs[i], "pCtx->sMb.pNzcRs[]");
-
-      pCtx->sMb.pNzcRs[i] = NULL;
-    }
-
-    if (pCtx->sMb.pScaledTCoeff[i]) {
-      pMa->WelsFree (pCtx->sMb.pScaledTCoeff[i], "pCtx->sMb.pScaledTCoeff[]");
-
-      pCtx->sMb.pScaledTCoeff[i] = NULL;
-    }
-
-    if (pCtx->sMb.pIntraPredMode[i]) {
-      pMa->WelsFree (pCtx->sMb.pIntraPredMode[i], "pCtx->sMb.pIntraPredMode[]");
-
-      pCtx->sMb.pIntraPredMode[i] = NULL;
-    }
-
-    if (pCtx->sMb.pIntra4x4FinalMode[i]) {
-      pMa->WelsFree (pCtx->sMb.pIntra4x4FinalMode[i], "pCtx->sMb.pIntra4x4FinalMode[]");
-
-      pCtx->sMb.pIntra4x4FinalMode[i] = NULL;
-    }
-
-    if (pCtx->sMb.pIntraNxNAvailFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pIntraNxNAvailFlag[i], "pCtx->sMb.pIntraNxNAvailFlag");
-
-      pCtx->sMb.pIntraNxNAvailFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pChromaPredMode[i]) {
-      pMa->WelsFree (pCtx->sMb.pChromaPredMode[i], "pCtx->sMb.pChromaPredMode[]");
-
-      pCtx->sMb.pChromaPredMode[i] = NULL;
-    }
-
-    if (pCtx->sMb.pCbp[i]) {
-      pMa->WelsFree (pCtx->sMb.pCbp[i], "pCtx->sMb.pCbp[]");
-
-      pCtx->sMb.pCbp[i] = NULL;
-    }
-
-    //      if (pCtx->sMb.pMotionPredFlag[i])
-    //{
-    //  pMa->WelsFree( pCtx->sMb.pMotionPredFlag[i], "pCtx->sMb.pMotionPredFlag[]" );
-
-    //  pCtx->sMb.pMotionPredFlag[i] = NULL;
-    //}
-
-    if (pCtx->sMb.pSubMbType[i]) {
-      pMa->WelsFree (pCtx->sMb.pSubMbType[i], "pCtx->sMb.pSubMbType[]");
-
-      pCtx->sMb.pSubMbType[i] = NULL;
-    }
-
-    if (pCtx->sMb.pSliceIdc[i]) {
-      pMa->WelsFree (pCtx->sMb.pSliceIdc[i], "pCtx->sMb.pSliceIdc[]");
-
-      pCtx->sMb.pSliceIdc[i] = NULL;
-    }
-
-    if (pCtx->sMb.pResidualPredFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pResidualPredFlag[i], "pCtx->sMb.pResidualPredFlag[]");
-
-      pCtx->sMb.pResidualPredFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pInterPredictionDoneFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pInterPredictionDoneFlag[i], "pCtx->sMb.pInterPredictionDoneFlag[]");
-
-      pCtx->sMb.pInterPredictionDoneFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pMbCorrectlyDecodedFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pMbCorrectlyDecodedFlag[i], "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
-      pCtx->sMb.pMbCorrectlyDecodedFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pMbRefConcealedFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pMbRefConcealedFlag[i], "pCtx->sMb.pMbRefConcealedFlag[]");
-      pCtx->sMb.pMbRefConcealedFlag[i] = NULL;
-    }
-    pMa->WelsFree (pDq, "pDq");
-
-    pDq = NULL;
-    pCtx->pDqLayersList[i] = NULL;
-
-    ++ i;
-  } while (i < LAYER_NUM_EXCHANGEABLE);
-
-  pCtx->iPicWidthReq            = 0;
-  pCtx->iPicHeightReq           = 0;
-  pCtx->bInitialDqLayersMem     = false;
-}
-
-void ResetCurrentAccessUnit (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  pCurAu->uiStartPos            = 0;
-  pCurAu->uiEndPos              = 0;
-  pCurAu->bCompletedAuFlag      = false;
-  if (pCurAu->uiActualUnitsNum > 0) {
-    uint32_t iIdx = 0;
-    const uint32_t kuiActualNum = pCurAu->uiActualUnitsNum;
-    // a more simpler method to do nal units list management prefered here
-    const uint32_t kuiAvailNum  = pCurAu->uiAvailUnitsNum;
-    const uint32_t kuiLeftNum   = kuiAvailNum - kuiActualNum;
-
-    // Swapping active nal unit nodes of succeeding AU with leading of list
-    while (iIdx < kuiLeftNum) {
-      PNalUnit t = pCurAu->pNalUnitsList[kuiActualNum + iIdx];
-      pCurAu->pNalUnitsList[kuiActualNum + iIdx] = pCurAu->pNalUnitsList[iIdx];
-      pCurAu->pNalUnitsList[iIdx] = t;
-      ++ iIdx;
-    }
-    pCurAu->uiActualUnitsNum = pCurAu->uiAvailUnitsNum = kuiLeftNum;
-  }
-}
-
-/*!
- * \brief   Force reset current Acess Unit Nal list in case error parsing/decoding in current AU
- * \author
- * \history 11/16/2009
- */
-void ForceResetCurrentAccessUnit (PAccessUnit pAu) {
-  uint32_t uiSucAuIdx = pAu->uiEndPos + 1;
-  uint32_t uiCurAuIdx = 0;
-
-  // swap the succeeding AU's nal units to the front
-  while (uiSucAuIdx < pAu->uiAvailUnitsNum) {
-    PNalUnit t = pAu->pNalUnitsList[uiSucAuIdx];
-    pAu->pNalUnitsList[uiSucAuIdx] = pAu->pNalUnitsList[uiCurAuIdx];
-    pAu->pNalUnitsList[uiCurAuIdx] = t;
-    ++ uiSucAuIdx;
-    ++ uiCurAuIdx;
-  }
-
-  // Update avail/actual units num accordingly for next AU parsing
-  if (pAu->uiAvailUnitsNum > pAu->uiEndPos)
-    pAu->uiAvailUnitsNum -= (pAu->uiEndPos + 1);
-  else
-    pAu->uiAvailUnitsNum = 0;
-  pAu->uiActualUnitsNum = 0;
-  pAu->uiStartPos       = 0;
-  pAu->uiEndPos         = 0;
-  pAu->bCompletedAuFlag = false;
-}
-
-//clear current corrupted NAL from pNalUnitsList
-void ForceClearCurrentNal (PAccessUnit pAu) {
-  if (pAu->uiAvailUnitsNum > 0)
-    -- pAu->uiAvailUnitsNum;
-}
-
-void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) {
-  pCtx->bSpsExistAheadFlag = false;
-  pCtx->bSubspsExistAheadFlag = false;
-  pCtx->bPpsExistAheadFlag = false;
-
-  // Force clear the AU list
-  pCtx->pAccessUnitList->uiAvailUnitsNum        = 0;
-  pCtx->pAccessUnitList->uiActualUnitsNum       = 0;
-  pCtx->pAccessUnitList->uiStartPos             = 0;
-  pCtx->pAccessUnitList->uiEndPos               = 0;
-  pCtx->pAccessUnitList->bCompletedAuFlag       = false;
-}
-
-void CheckAvailNalUnitsListContinuity (PWelsDecoderContext pCtx, int32_t iStartIdx, int32_t iEndIdx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-
-  uint8_t uiLastNuDependencyId, uiLastNuLayerDqId;
-  uint8_t uiCurNuDependencyId, uiCurNuQualityId, uiCurNuLayerDqId, uiCurNuRefLayerDqId;
-
-  int32_t iCurNalUnitIdx = 0;
-
-  //check the continuity of pNalUnitsList forwards (from pIdxNoInterLayerPred to end_postion)
-  uiLastNuDependencyId = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiDependencyId;//starting nal unit
-  uiLastNuLayerDqId   = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiLayerDqId;//starting nal unit
-  iCurNalUnitIdx = iStartIdx + 1;//current nal unit
-  while (iCurNalUnitIdx <= iEndIdx) {
-    uiCurNuDependencyId   = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiDependencyId;
-    uiCurNuQualityId      = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiQualityId;
-    uiCurNuLayerDqId     = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
-    uiCurNuRefLayerDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalData.sVclNal.sSliceHeaderExt.uiRefLayerDqId;
-
-    if (uiCurNuDependencyId == uiLastNuDependencyId) {
-      uiLastNuLayerDqId = uiCurNuLayerDqId;
-      ++ iCurNalUnitIdx;
-    } else { //uiCurNuDependencyId != uiLastNuDependencyId, new dependency arrive
-      if (uiCurNuQualityId == 0) {
-        uiLastNuDependencyId = uiCurNuDependencyId;
-        if (uiCurNuRefLayerDqId == uiLastNuLayerDqId) {
-          uiLastNuLayerDqId = uiCurNuLayerDqId;
-          ++ iCurNalUnitIdx;
-        } else { //cur_nu_layer_id != next_nu_ref_layer_dq_id, the chain is broken at this point
-          break;
-        }
-      } else { //new dependency arrive, but no base quality layer, so we must stop in this point
-        break;
-      }
-    }
-  }
-
-  -- iCurNalUnitIdx;
-  pCurAu->uiEndPos = iCurNalUnitIdx;
-  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
-}
-
-//main purpose: to support multi-slice and to include all slice which have the same uiDependencyId, uiQualityId and frame_num
-//for single slice, pIdxNoInterLayerPred SHOULD NOT be modified
-void RefineIdxNoInterLayerPred (PAccessUnit pCurAu, int32_t* pIdxNoInterLayerPred) {
-  int32_t iLastNalDependId  = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiDependencyId;
-  int32_t iLastNalQualityId = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiQualityId;
-  uint8_t uiLastNalTId       = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiTemporalId;
-  int32_t iLastNalFrameNum  =
-    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
-  int32_t iLastNalPoc        =
-    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-  int32_t iLastNalFirstMb   =
-    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-  int32_t iCurNalDependId, iCurNalQualityId, iCurNalTId, iCurNalFrameNum, iCurNalPoc, iCurNalFirstMb, iCurIdx,
-          iFinalIdxNoInterLayerPred;
-
-  bool  bMultiSliceFind = false;
-
-  iFinalIdxNoInterLayerPred = 0;
-  iCurIdx = *pIdxNoInterLayerPred - 1;
-  while (iCurIdx >= 0) {
-    if (pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.iNoInterLayerPredFlag) {
-      iCurNalDependId  = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
-      iCurNalQualityId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
-      iCurNalTId       = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
-      iCurNalFrameNum  = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
-      iCurNalPoc        = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-      iCurNalFirstMb   = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-
-      if (iCurNalDependId == iLastNalDependId  &&
-          iCurNalQualityId == iLastNalQualityId &&
-          iCurNalTId       == uiLastNalTId       &&
-          iCurNalFrameNum  == iLastNalFrameNum  &&
-          iCurNalPoc        == iLastNalPoc        &&
-          iCurNalFirstMb   != iLastNalFirstMb) {
-        bMultiSliceFind = true;
-        iFinalIdxNoInterLayerPred = iCurIdx;
-        --iCurIdx;
-        continue;
-      } else {
-        break;
-      }
-    }
-    --iCurIdx;
-  }
-
-  if (bMultiSliceFind && *pIdxNoInterLayerPred != iFinalIdxNoInterLayerPred) {
-    *pIdxNoInterLayerPred = iFinalIdxNoInterLayerPred;
-  }
-}
-
-bool CheckPocOfCurValidNalUnits (PAccessUnit pCurAu, int32_t pIdxNoInterLayerPred) {
-  int32_t iEndIdx    = pCurAu->uiEndPos;
-  int32_t iCurAuPoc =
-    pCurAu->pNalUnitsList[pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-  int32_t iTmpPoc, i;
-  for (i = pIdxNoInterLayerPred + 1; i < iEndIdx; i++) {
-    iTmpPoc = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-    if (iTmpPoc != iCurAuPoc) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool CheckIntegrityNalUnitsList (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  const int32_t kiEndPos = pCurAu->uiEndPos;
-  int32_t iIdxNoInterLayerPred = 0;
-
-  if (!pCurAu->bCompletedAuFlag)
-    return false;
-
-  if (pCtx->bNewSeqBegin) {
-    pCurAu->uiStartPos = 0;
-    //step1: search the pNalUnit whose iNoInterLayerPredFlag equal to 1 backwards (from uiEndPos to 0)
-    iIdxNoInterLayerPred = kiEndPos;
-    while (iIdxNoInterLayerPred >= 0) {
-      if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-        break;
-      }
-      --iIdxNoInterLayerPred;
-    }
-    if (iIdxNoInterLayerPred < 0) {
-      //can not find the Nal Unit whose no_inter_pred_falg equal to 1, MUST STOP decode
-      return false;
-    }
-
-    //step2: support multi-slice, to include all base layer slice
-    RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-    pCurAu->uiStartPos = iIdxNoInterLayerPred;
-    CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
-
-    if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-      return false;
-    }
-
-    pCtx->iCurSeqIntervalTargetDependId = pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalHeaderExt.uiDependencyId;
-    pCtx->iCurSeqIntervalMaxPicWidth  =
-      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbWidth << 4;
-    pCtx->iCurSeqIntervalMaxPicHeight =
-      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbHeight << 4;
-  } else { //P_SLICE
-    //step 1: search uiDependencyId equal to pCtx->cur_seq_interval_target_dependency_id
-    bool bGetDependId = false;
-    int32_t iIdxDependId = 0;
-
-    iIdxDependId = kiEndPos;
-    while (iIdxDependId >= 0) {
-      if (pCtx->iCurSeqIntervalTargetDependId == pCurAu->pNalUnitsList[iIdxDependId]->sNalHeaderExt.uiDependencyId) {
-        bGetDependId = true;
-        break;
-      } else {
-        --iIdxDependId;
-      }
-    }
-
-    //step 2: switch according to whether or not find the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
-    if (bGetDependId) { //get the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
-      bool bGetNoInterPredFront = false;
-      //step 2a: search iNoInterLayerPredFlag [0....iIdxDependId]
-      iIdxNoInterLayerPred = iIdxDependId;
-      while (iIdxNoInterLayerPred >= 0) {
-        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-          bGetNoInterPredFront = true;
-          break;
-        }
-        --iIdxNoInterLayerPred;
-      }
-      //step 2b: switch, whether or not find the NAL unit whose no_inter_pred_flag equal to 1 among [0....iIdxDependId]
-      if (bGetNoInterPredFront) { //YES
-        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-        pCurAu->uiStartPos = iIdxNoInterLayerPred;
-        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, iIdxDependId);
-
-        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-          return false;
-        }
-      } else { //NO, should find the NAL unit whose no_inter_pred_flag equal to 1 among [iIdxDependId....uiEndPos]
-        iIdxNoInterLayerPred = iIdxDependId;
-        while (iIdxNoInterLayerPred <= kiEndPos) {
-          if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-            break;
-          }
-          ++iIdxNoInterLayerPred;
-        }
-
-        if (iIdxNoInterLayerPred > kiEndPos) {
-          return false; //cann't find the index of pNalUnit whose no_inter_pred_flag = 1
-        }
-
-        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-        pCurAu->uiStartPos = iIdxNoInterLayerPred;
-        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
-
-        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-          return false;
-        }
-      }
-    } else { //without the index of pNalUnit, should process this AU as common case
-      iIdxNoInterLayerPred = kiEndPos;
-      while (iIdxNoInterLayerPred >= 0) {
-        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-          break;
-        }
-        --iIdxNoInterLayerPred;
-      }
-      if (iIdxNoInterLayerPred < 0) {
-        return false; //cann't find the index of pNalUnit whose iNoInterLayerPredFlag = 1
-      }
-
-      RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-      pCurAu->uiStartPos = iIdxNoInterLayerPred;
-      CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
-
-      if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-void CheckOnlyOneLayerInAu (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-
-  int32_t iEndIdx = pCurAu->uiEndPos;
-  int32_t iCurIdx = pCurAu->uiStartPos;
-  uint8_t uiDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
-  uint8_t uiQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
-  uint8_t uiTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
-
-  uint8_t uiCurDId, uiCurQId, uiCurTId;
-
-  pCtx->bOnlyOneLayerInCurAuFlag = true;
-
-  if (iEndIdx == iCurIdx) { //only one NAL in pNalUnitsList
-    return;
-  }
-
-  ++iCurIdx;
-  while (iCurIdx <= iEndIdx) {
-    uiCurDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
-    uiCurQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
-    uiCurTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
-
-    if (uiDId != uiCurDId || uiQId != uiCurQId || uiTId != uiCurTId) {
-      pCtx->bOnlyOneLayerInCurAuFlag = false;
-      return;
-    }
-
-    ++iCurIdx;
-  }
-}
-
-int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) {
-  // Roll back NAL units not being belong to current access unit list for proceeded access unit
-  int32_t iRet = UpdateAccessUnit (pCtx);
-  if (iRet != ERR_NONE)
-    return iRet;
-
-  pCtx->pAccessUnitList->uiStartPos = 0;
-  if (!pCtx->bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
-    pCtx->iErrorCode |= dsBitstreamError;
-    return dsBitstreamError;
-  }
-
-  //check current AU has only one layer or not
-  //If YES, can use deblocking based on AVC
-  if (!pCtx->bAvcBasedFlag) {
-    CheckOnlyOneLayerInAu (pCtx);
-  }
-
-  return ERR_NONE;
-}
-
-void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) {
-  //save previous header info
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos];
-  memcpy (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
-  memcpy (&pCtx->sLastSliceHeader,
-          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader));
-  // uninitialize context of current access unit and rbsp buffer clean
-  ResetCurrentAccessUnit (pCtx);
-}
-
-/* CheckNewSeqBeginAndUpdateActiveLayerSps
- * return:
- * true - the AU to be construct is the start of new sequence; false - not
- */
-static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
-  bool bNewSeq = false;
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  PSps pTmpLayerSps[MAX_LAYER_NUM];
-  for (int i = 0; i < MAX_LAYER_NUM; i++) {
-    pTmpLayerSps[i] = NULL;
-  }
-  // track the layer sps for the current au
-  for (unsigned int i = pCurAu->uiStartPos; i <= pCurAu->uiEndPos; i++) {
-    uint32_t uiDid = pCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId;
-    pTmpLayerSps[uiDid] = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
-    if ((pCurAu->pNalUnitsList[i]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR)
-        || (pCurAu->pNalUnitsList[i]->sNalHeaderExt.bIdrFlag))
-      bNewSeq = true;
-  }
-  int iMaxActiveLayer = 0, iMaxCurrentLayer = 0;
-  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
-    if (pCtx->pActiveLayerSps[i] != NULL) {
-      iMaxActiveLayer = i;
-      break;
-    }
-  }
-  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
-    if (pTmpLayerSps[i] != NULL) {
-      iMaxCurrentLayer = i;
-      break;
-    }
-  }
-  if ((iMaxCurrentLayer != iMaxActiveLayer)
-      || (pTmpLayerSps[iMaxCurrentLayer]  != pCtx->pActiveLayerSps[iMaxActiveLayer])) {
-    bNewSeq = true;
-  }
-  // fill active sps if the current sps is not null while active layer is null
-  if (!bNewSeq) {
-    for (int i = 0; i < MAX_LAYER_NUM; i++) {
-      if (pCtx->pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
-        pCtx->pActiveLayerSps[i] = pTmpLayerSps[i];
-      }
-    }
-  } else {
-    // UpdateActiveLayerSps if new sequence start
-    memcpy (&pCtx->pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
-  }
-  return bNewSeq;
-}
-
-static void WriteBackActiveParameters (PWelsDecoderContext pCtx) {
-  if (pCtx->iOverwriteFlags & OVERWRITE_PPS) {
-    memcpy (&pCtx->sPpsBuffer[pCtx->sPpsBuffer[MAX_PPS_COUNT].iPpsId], &pCtx->sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
-  }
-  if (pCtx->iOverwriteFlags & OVERWRITE_SPS) {
-    memcpy (&pCtx->sSpsBuffer[pCtx->sSpsBuffer[MAX_SPS_COUNT].iSpsId], &pCtx->sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
-    pCtx->bNewSeqBegin = true;
-  }
-  if (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS) {
-    memcpy (&pCtx->sSubsetSpsBuffer[pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
-            &pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
-    pCtx->bNewSeqBegin = true;
-  }
-  pCtx->iOverwriteFlags = OVERWRITE_NONE;
-}
-
-/*
- * DecodeFinishUpdate
- * decoder finish decoding, update active parameter sets and new seq status
- *
- */
-
-void DecodeFinishUpdate (PWelsDecoderContext pCtx) {
-  pCtx->bNewSeqBegin = false;
-  WriteBackActiveParameters (pCtx);
-  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
-  pCtx->bNextNewSeqBegin = false; // reset it
-  if (pCtx->bNewSeqBegin)
-    ResetActiveSPSForEachLayer (pCtx);
-}
-
-/*
- * ConstructAccessUnit
- * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
- * joint a collective access unit.
- * parameter\
- *  buf:        bitstream data buffer
- *  bit_len:    size in bit length of data
- *  buf_len:    size in byte length of data
- *  coded_au:   mark an Access Unit decoding finished
- * return:
- *  0 - success; otherwise returned error_no defined in error_no.h
- */
-int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  int32_t iErr;
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  pCtx->bAuReadyFlag = false;
-  pCtx->bLastHasMmco5 = false;
-  bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx);
-  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin;
-  iErr = WelsDecodeAccessUnitStart (pCtx);
-  GetVclNalTemporalId (pCtx);
-
-  if (ERR_NONE != iErr) {
-    ForceResetCurrentAccessUnit (pCtx->pAccessUnitList);
-    if (!pCtx->pParam->bParseOnly)
-      pDstInfo->iBufferStatus = 0;
-    pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
-    pCtx->bNextNewSeqBegin = false; // reset it
-    if (pCtx->bNewSeqBegin)
-      ResetActiveSPSForEachLayer (pCtx);
-    return iErr;
-  }
-
-  pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
-  pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps;
-
-  //try to allocate or relocate DPB memory only when new sequence is coming.
-  if (pCtx->bNewSeqBegin) {
-    WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
-    iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
-
-    if (ERR_NONE != iErr) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed,  the error is %d", iErr);
-      return iErr;
-    }
-  }
-
-  iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo);
-
-  WelsDecodeAccessUnitEnd (pCtx);
-
-  if (ERR_NONE != iErr) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "returned error from decoding:[0x%x]", iErr);
-    return iErr;
-  }
-
-  return ERR_NONE;
-}
-
-static inline void InitDqLayerInfo (PDqLayer pDqLayer, PLayerInfo pLayerInfo, PNalUnit pNalUnit, PPicture pPicDec) {
-  PNalUnitHeaderExt pNalHdrExt    = &pNalUnit->sNalHeaderExt;
-  PSliceHeaderExt pShExt          = &pNalUnit->sNalData.sVclNal.sSliceHeaderExt;
-  PSliceHeader pSh                = &pShExt->sSliceHeader;
-  const uint8_t kuiQualityId      = pNalHdrExt->uiQualityId;
-
-  memcpy (&pDqLayer->sLayerInfo, pLayerInfo, sizeof (SLayerInfo)); //confirmed_safe_unsafe_usage
-
-  pDqLayer->pDec        = pPicDec;
-  pDqLayer->iMbWidth    = pSh->iMbWidth;        // MB width of this picture
-  pDqLayer->iMbHeight   = pSh->iMbHeight;// MB height of this picture
-
-  pDqLayer->iSliceIdcBackup = (pSh->iFirstMbInSlice << 7) | (pNalHdrExt->uiDependencyId << 4) | (pNalHdrExt->uiQualityId);
-
-  /* Common syntax elements across all slices of a DQLayer */
-  pDqLayer->uiPpsId                                     = pLayerInfo->pPps->iPpsId;
-  pDqLayer->uiDisableInterLayerDeblockingFilterIdc      = pShExt->uiDisableInterLayerDeblockingFilterIdc;
-  pDqLayer->iInterLayerSliceAlphaC0Offset               = pShExt->iInterLayerSliceAlphaC0Offset;
-  pDqLayer->iInterLayerSliceBetaOffset                  = pShExt->iInterLayerSliceBetaOffset;
-  pDqLayer->iSliceGroupChangeCycle                      = pSh->iSliceGroupChangeCycle;
-  pDqLayer->bStoreRefBasePicFlag                        = pShExt->bStoreRefBasePicFlag;
-  pDqLayer->bTCoeffLevelPredFlag                        = pShExt->bTCoeffLevelPredFlag;
-  pDqLayer->bConstrainedIntraResamplingFlag             = pShExt->bConstrainedIntraResamplingFlag;
-  pDqLayer->uiRefLayerDqId                              = pShExt->uiRefLayerDqId;
-  pDqLayer->uiRefLayerChromaPhaseXPlus1Flag             = pShExt->uiRefLayerChromaPhaseXPlus1Flag;
-  pDqLayer->uiRefLayerChromaPhaseYPlus1                 = pShExt->uiRefLayerChromaPhaseYPlus1;
-  pDqLayer->bUseWeightPredictionFlag                    = false;
-  pDqLayer->bUseWeightedBiPredIdc = false;
-  //memcpy(&pDqLayer->sScaledRefLayer, &pShExt->sScaledRefLayer, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
-
-  if (kuiQualityId == BASE_QUALITY_ID) {
-    pDqLayer->pRefPicListReordering = &pSh->pRefPicListReordering;
-    pDqLayer->pRefPicMarking = &pSh->sRefMarking;
-
-    pDqLayer->bUseWeightPredictionFlag = pSh->pPps->bWeightedPredFlag;
-    pDqLayer->bUseWeightedBiPredIdc = pSh->pPps->uiWeightedBipredIdc != 0;
-    if (pSh->pPps->bWeightedPredFlag || pSh->pPps->uiWeightedBipredIdc) {
-      pDqLayer->pPredWeightTable = &pSh->sPredWeightTable;
-    }
-    pDqLayer->pRefPicBaseMarking        = &pShExt->sRefBasePicMarking;
-  }
-
-  pDqLayer->uiLayerDqId                 = pNalHdrExt->uiLayerDqId;      // dq_id of current layer
-  pDqLayer->bUseRefBasePicFlag          = pNalHdrExt->bUseRefBasePicFlag;
-}
-
-void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pSps, PPps pPps) {
-  PSliceHeader pSh = &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
-
-  pCtx->eSliceType   = pSh->eSliceType;
-  pCtx->pSliceHeader = pSh;
-  pCtx->bUsedAsRef   = false;
-
-  pCtx->iFrameNum    = pSh->iFrameNum;
-  UpdateDecoderStatisticsForActiveParaset (& (pCtx->sDecoderStatistics),
-      pSps, pPps);
-}
-
-int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
-  int32_t iRet = ERR_NONE;
-  if (pCtx->eSliceType == B_SLICE) {
-    iRet = WelsInitBSliceRefList (pCtx, iPoc);
-    CreateImplicitWeightTable (pCtx);
-  } else
-    iRet = WelsInitRefList (pCtx, iPoc);
-  if ((pCtx->eSliceType != I_SLICE && pCtx->eSliceType != SI_SLICE)) {
-#if 0
-    if (pCtx->pSps->uiProfileIdc != 66 && pCtx->pPps->bEntropyCodingModeFlag)
-      iRet = WelsReorderRefList2 (pCtx);
-    else
-#endif
-      iRet = WelsReorderRefList (pCtx);
-  }
-
-  return iRet;
-}
-
-void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
-  if (NULL != pCtx && NULL != pCurDq) {
-    pCurDq->pMbType         = pCtx->sMb.pMbType[0];
-    pCurDq->pSliceIdc       = pCtx->sMb.pSliceIdc[0];
-    pCurDq->pMv[LIST_0]         = pCtx->sMb.pMv[0][LIST_0];
-    pCurDq->pMv[LIST_1]         = pCtx->sMb.pMv[0][LIST_1];
-    pCurDq->pRefIndex[LIST_0]    = pCtx->sMb.pRefIndex[0][LIST_0];
-    pCurDq->pRefIndex[LIST_1]   = pCtx->sMb.pRefIndex[0][LIST_1];
-    pCurDq->pDirect             = pCtx->sMb.pDirect[0];
-    pCurDq->pNoSubMbPartSizeLessThan8x8Flag = pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[0];
-    pCurDq->pTransformSize8x8Flag = pCtx->sMb.pTransformSize8x8Flag[0];
-    pCurDq->pLumaQp         = pCtx->sMb.pLumaQp[0];
-    pCurDq->pChromaQp       = pCtx->sMb.pChromaQp[0];
-    pCurDq->pMvd[LIST_0]         = pCtx->sMb.pMvd[0][LIST_0];
-    pCurDq->pMvd[LIST_1]          = pCtx->sMb.pMvd[0][LIST_1];
-    pCurDq->pCbfDc          = pCtx->sMb.pCbfDc[0];
-    pCurDq->pNzc            = pCtx->sMb.pNzc[0];
-    pCurDq->pNzcRs          = pCtx->sMb.pNzcRs[0];
-    pCurDq->pScaledTCoeff   = pCtx->sMb.pScaledTCoeff[0];
-    pCurDq->pIntraPredMode  = pCtx->sMb.pIntraPredMode[0];
-    pCurDq->pIntra4x4FinalMode = pCtx->sMb.pIntra4x4FinalMode[0];
-    pCurDq->pIntraNxNAvailFlag = pCtx->sMb.pIntraNxNAvailFlag[0];
-    pCurDq->pChromaPredMode = pCtx->sMb.pChromaPredMode[0];
-    pCurDq->pCbp            = pCtx->sMb.pCbp[0];
-    pCurDq->pSubMbType      = pCtx->sMb.pSubMbType[0];
-    pCurDq->pInterPredictionDoneFlag = pCtx->sMb.pInterPredictionDoneFlag[0];
-    pCurDq->pResidualPredFlag = pCtx->sMb.pResidualPredFlag[0];
-    pCurDq->pMbCorrectlyDecodedFlag = pCtx->sMb.pMbCorrectlyDecodedFlag[0];
-    pCurDq->pMbRefConcealedFlag = pCtx->sMb.pMbRefConcealedFlag[0];
-  }
-}
-
-/*
- * DecodeCurrentAccessUnit
- * Decode current access unit when current AU is completed.
- */
-int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  int32_t iRefCount[LIST_A];
-  PNalUnit pNalCur = NULL;
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-
-  int32_t iIdx = pCurAu->uiStartPos;
-  int32_t iEndIdx = pCurAu->uiEndPos;
-
-  int32_t iPpsId = 0;
-  int32_t iRet = ERR_NONE;
-
-  bool bAllRefComplete = true; // Assume default all ref picutres are complete
-
-  const uint8_t kuiTargetLayerDqId = GetTargetDqId (pCtx->uiTargetDqId, pCtx->pParam);
-  const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4;
-  int16_t iLastIdD = -1, iLastIdQ = -1;
-  int16_t iCurrIdD = 0, iCurrIdQ = 0;
-  uint8_t uiNalRefIdc = 0;
-  bool bFreshSliceAvailable =
-    true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008
-
-  //update pCurDqLayer at the starting of AU decoding
-  if (pCtx->bInitialDqLayersMem) {
-    pCtx->pCurDqLayer = pCtx->pDqLayersList[0];
-  }
-
-  InitCurDqLayerData (pCtx, pCtx->pCurDqLayer);
-
-  pNalCur = pCurAu->pNalUnitsList[iIdx];
-  while (iIdx <= iEndIdx) {
-    PDqLayer dq_cur = pCtx->pCurDqLayer;
-    SLayerInfo pLayerInfo;
-    PSliceHeaderExt pShExt = NULL;
-    PSliceHeader pSh = NULL;
-
-    if (pCtx->pDec == NULL) {
-      pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
-      if (pCtx->iTotalNumMbRec != 0)
-        pCtx->iTotalNumMbRec = 0;
-
-      if (NULL == pCtx->pDec) {
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-                 "DecodeCurrentAccessUnit()::::::PrefetchPic ERROR, pSps->iNumRefFrames:%d.",
-                 pCtx->pSps->iNumRefFrames);
-        // The error code here need to be separated from the dsOutOfMemory
-        pCtx->iErrorCode |= dsOutOfMemory;
-        return ERR_INFO_REF_COUNT_OVERFLOW;
-      }
-      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
-    } else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start
-      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
-    }
-    pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp;
-
-    if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode
-      for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i)
-        memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
-      memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
-      memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
-      pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
-      pCtx->pDec->iMbEcedNum = 0;
-      pCtx->pDec->iMbEcedPropNum = 0;
-    }
-    pCtx->bRPLRError = false;
-    GetI4LumaIChromaAddrTable (pCtx->iDecBlockOffsetArray, pCtx->pDec->iLinesize[0], pCtx->pDec->iLinesize[1]);
-
-    if (pNalCur->sNalHeaderExt.uiLayerDqId > kuiTargetLayerDqId) { // confirmed pNalCur will never be NULL
-      break; // Per formance it need not to decode the remaining bits any more due to given uiLayerDqId required, 9/2/2009
-    }
-
-    memset (&pLayerInfo, 0, sizeof (SLayerInfo));
-
-    /*
-     *  Loop decoding for slices (even FMO and/ multiple slices) within a dq layer
-     */
-    while (iIdx <= iEndIdx) {
-      bool         bReconstructSlice;
-      iCurrIdQ  = pNalCur->sNalHeaderExt.uiQualityId;
-      iCurrIdD  = pNalCur->sNalHeaderExt.uiDependencyId;
-      pSh       = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
-      pShExt    = &pNalCur->sNalData.sVclNal.sSliceHeaderExt;
-      pCtx->bRPLRError = false;
-      bReconstructSlice = CheckSliceNeedReconstruct (pNalCur->sNalHeaderExt.uiLayerDqId, kuiTargetLayerDqId);
-
-      memcpy (&pLayerInfo.sNalHeaderExt, &pNalCur->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); //confirmed_safe_unsafe_usage
-
-      pCtx->pDec->iFrameNum = pSh->iFrameNum;
-      pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2
-      pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag;
-
-      memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage
-      pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag      = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag;
-      pLayerInfo.sSliceInLayer.eSliceType               = pSh->eSliceType;
-      pLayerInfo.sSliceInLayer.iLastMbQp                = pSh->iSliceQp;
-      dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead;
-
-      uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
-
-      iPpsId = pSh->iPpsId;
-
-      pLayerInfo.pPps = pSh->pPps;
-      pLayerInfo.pSps = pSh->pSps;
-      pLayerInfo.pSubsetSps = pShExt->pSubsetSps;
-
-      pCtx->pFmo = &pCtx->sFmoList[iPpsId];
-      iRet = FmoParamUpdate (pCtx->pFmo, pLayerInfo.pSps, pLayerInfo.pPps, &pCtx->iActiveFmoNum, pCtx->pMemAlign);
-      if (ERR_NONE != iRet) {
-        if (iRet == ERR_INFO_OUT_OF_MEMORY) {
-          pCtx->iErrorCode |= dsOutOfMemory;
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "DecodeCurrentAccessUnit(), Fmo param alloc failed");
-        } else {
-          pCtx->iErrorCode |= dsBitstreamError;
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DecodeCurrentAccessUnit(), FmoParamUpdate failed, eSliceType: %d.",
-                   pSh->eSliceType);
-        }
-        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_FMO_INIT_FAIL);
-      }
-
-      bFreshSliceAvailable = (iCurrIdD != iLastIdD
-                              || iCurrIdQ != iLastIdQ);        // do not need condition of (first_mb == 0) due multiple slices might be disorder
-
-      WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps);
-
-      if (iCurrIdQ == BASE_QUALITY_ID) {
-        ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount));
-      }
-
-      if ((iLastIdD < 0) ||  //case 1: first layer
-          (iLastIdD == iCurrIdD)) { //case 2: same uiDId
-        InitDqLayerInfo (dq_cur, &pLayerInfo, pNalCur, pCtx->pDec);
-
-        if (!dq_cur->sLayerInfo.pSps->bGapsInFrameNumValueAllowedFlag) {
-          const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag
-                                 || (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR);
-          // Subclause 8.2.5.2 Decoding process for gaps in frame_num
-          if (!kbIdrFlag  &&
-              pSh->iFrameNum != pCtx->iPrevFrameNum &&
-              pSh->iFrameNum != ((pCtx->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - 1))) {
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                     "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", pCtx->iPrevFrameNum,
-                     pSh->iFrameNum);
-
-            bAllRefComplete = false;
-            pCtx->iErrorCode |= dsRefLost;
-            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-#ifdef LONG_TERM_REF
-              pCtx->bParamSetsLostFlag = true;
-#else
-              pCtx->bReferenceLostAtT0Flag = true;
-#endif
-              return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REFERENCE_PIC_LOST);
-            }
-          }
-        }
-
-        if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) {
-          iRet = InitRefPicList (pCtx, uiNalRefIdc, pSh->iPicOrderCntLsb);
-          if (iRet) {
-            pCtx->bRPLRError = true;
-            bAllRefComplete = false; // RPLR error, set ref pictures complete flag false
-            HandleReferenceLost (pCtx, pNalCur);
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
-                     "reference picture introduced by this frame is lost during transmission! uiTId: %d",
-                     pNalCur->sNalHeaderExt.uiTemporalId);
-            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-              if (pCtx->iTotalNumMbRec == 0)
-                pCtx->pDec = NULL;
-              return iRet;
-            }
-          }
-        }
-
-        iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
-
-        //Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case
-        if (iRet != ERR_NONE) {
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                   "DecodeCurrentAccessUnit() failed (%d) in frame: %d uiDId: %d uiQId: %d",
-                   iRet, pSh->iFrameNum, iCurrIdD, iCurrIdQ);
-          bAllRefComplete = false;
-          HandleReferenceLostL0 (pCtx, pNalCur);
-          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-            if (pCtx->iTotalNumMbRec == 0)
-              pCtx->pDec = NULL;
-            return iRet;
-          }
-        }
-
-        if (bReconstructSlice) {
-          if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
-            pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
-            return iRet;
-          }
-        }
-        if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
-          if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
-            bAllRefComplete &= CheckRefPicturesComplete (pCtx);
-          } else {
-            bAllRefComplete = false;
-          }
-        }
-      }
-#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "cur_frame : %d\tiCurrIdD : %d\n ",
-               dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFrameNum, iCurrIdD);
-#endif//#if !CODEC_FOR_TESTBED
-      iLastIdD = iCurrIdD;
-      iLastIdQ = iCurrIdQ;
-
-      //pNalUnitsList overflow.
-      ++ iIdx;
-      if (iIdx <= iEndIdx) {
-        pNalCur = pCurAu->pNalUnitsList[iIdx];
-      } else {
-        pNalCur = NULL;
-      }
-
-      if (pNalCur == NULL ||
-          iLastIdD != pNalCur->sNalHeaderExt.uiDependencyId ||
-          iLastIdQ != pNalCur->sNalHeaderExt.uiQualityId)
-        break;
-    }
-
-    // Set the current dec picture complete flag. The flag will be reset when current picture need do ErrorCon.
-    pCtx->pDec->bIsComplete = bAllRefComplete;
-    if (!pCtx->pDec->bIsComplete) {  // Ref pictures ECed, result in ECed
-      pCtx->iErrorCode |= dsDataErrorConcealed;
-    }
-
-    // A dq layer decoded here
-#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
-#undef fprintf
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "POC: #%d, FRAME: #%d, D: %d, Q: %d, T: %d, P: %d, %d\n",
-             pSh->iPicOrderCntLsb, pSh->iFrameNum, iCurrIdD, iCurrIdQ, dq_cur->sLayerInfo.sNalHeaderExt.uiTemporalId,
-             dq_cur->sLayerInfo.sNalHeaderExt.uiPriorityId, dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iSliceQp);
-#endif//#if !CODEC_FOR_TESTBED
-
-    if (dq_cur->uiLayerDqId == kuiTargetLayerDqId) {
-      if (!pCtx->bInstantDecFlag) {
-        if (!pCtx->pParam->bParseOnly) {
-          //Do error concealment here
-          if ((NeedErrorCon (pCtx)) && (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE)) {
-            ImplementErrorCon (pCtx);
-            pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
-            pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
-            pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
-          }
-        }
-      }
-
-      iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
-      if (iRet)
-        return iRet;
-
-      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
-      pCtx->bUsedAsRef = false;
-      if (uiNalRefIdc > 0) {
-        pCtx->bUsedAsRef = true;
-        //save MBType, MV and RefIndex for use in B-Slice direct mode
-        memcpy (pCtx->pDec->pMbType, pCtx->pCurDqLayer->pMbType, pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t));
-        memcpy (pCtx->pDec->pMv[LIST_0], pCtx->pCurDqLayer->pMv[LIST_0],
-                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
-        memcpy (pCtx->pDec->pMv[LIST_1], pCtx->pCurDqLayer->pMv[LIST_1],
-                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
-        memcpy (pCtx->pDec->pRefIndex[LIST_0], pCtx->pCurDqLayer->pRefIndex[LIST_0],
-                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
-        memcpy (pCtx->pDec->pRefIndex[LIST_1], pCtx->pCurDqLayer->pRefIndex[LIST_1],
-                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
-        for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
-          for (uint32_t i = 0; i < pCtx->sRefPic.uiRefCount[listIdx]; ++i) {
-            pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
-          }
-        }
-        iRet = WelsMarkAsRef (pCtx);
-        if (iRet != ERR_NONE) {
-          if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
-            pCtx->iErrorCode |= dsBitstreamError;
-          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-            pCtx->pDec = NULL;
-            return iRet;
-          }
-        }
-        if (!pCtx->pParam->bParseOnly)
-          ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
-                                    pCtx->pDec->iLinesize,
-                                    pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
-      }
-      pCtx->pDec = NULL; //after frame decoding, always set to NULL
-    }
-
-    // need update frame_num due current frame is well decoded
-    if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
-      pCtx->iPrevFrameNum = pSh->iFrameNum;
-    if (pCtx->bLastHasMmco5)
-      pCtx->iPrevFrameNum = 0;
-  }
-
-  return ERR_NONE;
-}
-
-bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  PAccessUnit pAu = pCtx->pAccessUnitList;
-  bool bAuBoundaryFlag = false;
-  if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data
-    PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos];
-    bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0)
-                      && (CheckAccessUnitBoundaryExt (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, &pCtx->sLastSliceHeader,
-                          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader));
-  } else { //non VCL
-    if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) {
-      bAuBoundaryFlag = true;
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) {
-      bAuBoundaryFlag = true;
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) {
-      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SPS);
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) {
-      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS);
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) {
-      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_PPS);
-    }
-    if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first
-      ConstructAccessUnit (pCtx, ppDst, pDstInfo);
-    }
-  }
-
-  //Do Error Concealment here
-  if (bAuBoundaryFlag && (pCtx->iTotalNumMbRec != 0) && NeedErrorCon (pCtx)) { //AU ready but frame not completely reconed
-    if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-      ImplementErrorCon (pCtx);
-      pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
-      pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
-      pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
-
-      DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
-      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
-      if (pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
-        MarkECFrameAsRef (pCtx);
-      }
-    } else if (pCtx->pParam->bParseOnly) { //clear parse only internal data status
-      pCtx->pParserBsInfo->iNalNum = 0;
-      pCtx->bFrameFinish = true; //clear frame pending status here!
-    } else {
-      if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) {
-        if ((pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) && (pCtx->sLastNalHdrExt.uiTemporalId == 0))
-          pCtx->iErrorCode |= dsNoParamSets;
-        else
-          pCtx->iErrorCode |= dsBitstreamError;
-        pCtx->pDec = NULL;
-        return false;
-      }
-    }
-    pCtx->pDec = NULL;
-    if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
-      pCtx->iPrevFrameNum = pCtx->sLastSliceHeader.iFrameNum; //save frame_num
-    if (pCtx->bLastHasMmco5)
-      pCtx->iPrevFrameNum = 0;
-  }
-  return ERR_NONE;
-}
-
-bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
-  // Multi Reference, RefIdx may differ
-  bool bAllRefComplete = true;
-  int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-  for (int32_t iMbIdx = 0; bAllRefComplete
-       && iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) {
-    switch (pCtx->pCurDqLayer->pMbType[iRealMbIdx]) {
-    case MB_TYPE_SKIP:
-    case MB_TYPE_16x16:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      break;
-
-    case MB_TYPE_16x8:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
-      break;
-
-    case MB_TYPE_8x16:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
-      break;
-
-    case MB_TYPE_8x8:
-    case MB_TYPE_8x8_REF0:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
-      break;
-
-    default:
-      break;
-    }
-    iRealMbIdx = (pCtx->pPps->uiNumSliceGroups > 1) ? FmoNextMb (pCtx->pFmo, iRealMbIdx) :
-                 (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice + iMbIdx);
-    if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb()
-      return false;
-  }
-  return bAllRefComplete;
-}
-} // namespace WelsDec
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *      decoder_core.c: Wels decoder framework core implementation
+ */
+
+#include "decoder_core.h"
+#include "error_code.h"
+#include "memmgr_nal_unit.h"
+#include "au_parser.h"
+#include "decode_slice.h"
+#include "manage_dec_ref.h"
+#include "expand_pic.h"
+#include "decoder.h"
+#include "decode_mb_aux.h"
+#include "memory_align.h"
+#include "error_concealment.h"
+
+namespace WelsDec {
+static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  PDqLayer pCurDq = pCtx->pCurDqLayer;
+  PPicture pPic = pCtx->pDec;
+
+  const int32_t kiWidth = pCurDq->iMbWidth << 4;
+  const int32_t kiHeight = pCurDq->iMbHeight << 4;
+
+  const int32_t kiTotalNumMbInCurLayer = pCurDq->iMbWidth * pCurDq->iMbHeight;
+  bool bFrameCompleteFlag = true;
+
+  if (pPic->bNewSeqBegin) {
+    memcpy (& (pCtx->sFrameCrop), & (pCurDq->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.pSps->sFrameCrop),
+            sizeof (SPosOffset)); //confirmed_safe_unsafe_usage
+#ifdef LONG_TERM_REF
+    pCtx->bParamSetsLostFlag      = false;
+#else
+    pCtx->bReferenceLostAtT0Flag = false; // need initialize it due new seq, 6/4/2010
+#endif //LONG_TERM_REF
+    if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) {
+      pCtx->bPrintFrameErrorTraceFlag = true;
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+               "DecodeFrameConstruction(): will output first frame of new sequence, %d x %d, crop_left:%d, crop_right:%d, crop_top:%d, crop_bottom:%d, ignored error packet:%d.",
+               kiWidth, kiHeight, pCtx->sFrameCrop.iLeftOffset, pCtx->sFrameCrop.iRightOffset, pCtx->sFrameCrop.iTopOffset,
+               pCtx->sFrameCrop.iBottomOffset, pCtx->iIgnoredErrorInfoPacketCount);
+      pCtx->iIgnoredErrorInfoPacketCount = 0;
+    }
+  }
+
+  const int32_t kiActualWidth = kiWidth - (pCtx->sFrameCrop.iLeftOffset + pCtx->sFrameCrop.iRightOffset) * 2;
+  const int32_t kiActualHeight = kiHeight - (pCtx->sFrameCrop.iTopOffset + pCtx->sFrameCrop.iBottomOffset) * 2;
+
+
+  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+    if ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
+        || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight)) {
+      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
+      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
+      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
+    }
+    UpdateDecStatNoFreezingInfo (pCtx);
+  }
+
+  if (pCtx->pParam->bParseOnly) { //should exit for parse only to prevent access NULL pDstInfo
+    PAccessUnit pCurAu = pCtx->pAccessUnitList;
+    if (dsErrorFree == pCtx->iErrorCode) { //correct decoding, add to data buffer
+      SParserBsInfo* pParser = pCtx->pParserBsInfo;
+      SNalUnit* pCurNal = NULL;
+      int32_t iTotalNalLen = 0;
+      int32_t iNalLen = 0;
+      int32_t iNum = 0;
+      while (iNum < pParser->iNalNum) {
+        iTotalNalLen += pParser->pNalLenInByte[iNum++];
+      }
+      uint8_t* pDstBuf = pParser->pDstBuff + iTotalNalLen;
+      int32_t iIdx = pCurAu->uiStartPos;
+      int32_t iEndIdx = pCurAu->uiEndPos;
+      uint8_t* pNalBs = NULL;
+      pParser->uiOutBsTimeStamp = (pCurAu->pNalUnitsList [iIdx]) ? pCurAu->pNalUnitsList [iIdx]->uiTimeStamp : 0;
+      //pParser->iNalNum = 0;
+      pParser->iSpsWidthInPixel = (pCtx->pSps->iMbWidth << 4) - ((pCtx->pSps->sFrameCrop.iLeftOffset +
+                                  pCtx->pSps->sFrameCrop.iRightOffset) << 1);
+      pParser->iSpsHeightInPixel = (pCtx->pSps->iMbHeight << 4) - ((pCtx->pSps->sFrameCrop.iTopOffset +
+                                   pCtx->pSps->sFrameCrop.iBottomOffset) << 1);
+
+      if (pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.bIdrFlag) { //IDR
+        if (pCtx->bFrameFinish) { //add required sps/pps
+          if (pParser->iNalNum > pCtx->iMaxNalNum - 2) { //2 reserved for sps+pps
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+                     "DecodeFrameConstruction(): current NAL num (%d) plus sps & pps exceeds permitted num (%d). Will expand",
+                     pParser->iNalNum, pCtx->iMaxNalNum);
+            WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + 2))
+          }
+          bool bSubSps = (NAL_UNIT_CODED_SLICE_EXT == pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.sNalUnitHeader.eNalUnitType);
+          SSpsBsInfo* pSpsBs = NULL;
+          SPpsBsInfo* pPpsBs = NULL;
+          int32_t iSpsId = pCtx->pSps->iSpsId;
+          int32_t iPpsId = pCtx->pPps->iPpsId;
+          pCtx->bParamSetsLostFlag = false;
+          //find required sps, pps and write into dst buff
+          pSpsBs = bSubSps ? &pCtx->sSubsetSpsBsInfo [iSpsId] : &pCtx->sSpsBsInfo [iSpsId];
+          pPpsBs = &pCtx->sPpsBsInfo [iPpsId];
+          if (pDstBuf - pParser->pDstBuff + pSpsBs->uiSpsBsLen + pPpsBs->uiPpsBsLen >= MAX_ACCESS_UNIT_CAPACITY) {
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+                     "DecodeFrameConstruction(): sps pps size: (%d %d) too large. Failed to parse. \n", pSpsBs->uiSpsBsLen,
+                     pPpsBs->uiPpsBsLen);
+            pCtx->iErrorCode |= dsOutOfMemory;
+            pCtx->pParserBsInfo->iNalNum = 0;
+            return ERR_INFO_OUT_OF_MEMORY;
+          }
+          memcpy (pDstBuf, pSpsBs->pSpsBsBuf, pSpsBs->uiSpsBsLen);
+          pParser->pNalLenInByte [pParser->iNalNum ++] = pSpsBs->uiSpsBsLen;
+          pDstBuf += pSpsBs->uiSpsBsLen;
+          memcpy (pDstBuf, pPpsBs->pPpsBsBuf, pPpsBs->uiPpsBsLen);
+          pParser->pNalLenInByte [pParser->iNalNum ++] = pPpsBs->uiPpsBsLen;
+          pDstBuf += pPpsBs->uiPpsBsLen;
+          pCtx->bFrameFinish = false;
+        }
+      }
+      //then VCL data re-write
+      if (pParser->iNalNum + iEndIdx - iIdx + 1 > pCtx->iMaxNalNum) { //calculate total NAL num
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+                 "DecodeFrameConstruction(): current NAL num (%d) exceeds permitted num (%d). Will expand",
+                 pParser->iNalNum + iEndIdx - iIdx + 1, pCtx->iMaxNalNum);
+        WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + iEndIdx - iIdx + 1))
+      }
+      while (iIdx <= iEndIdx) {
+        pCurNal = pCurAu->pNalUnitsList [iIdx ++];
+        iNalLen = pCurNal->sNalData.sVclNal.iNalLength;
+        pNalBs = pCurNal->sNalData.sVclNal.pNalPos;
+        pParser->pNalLenInByte [pParser->iNalNum ++] = iNalLen;
+        if (pDstBuf - pParser->pDstBuff + iNalLen >= MAX_ACCESS_UNIT_CAPACITY) {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+                   "DecodeFrameConstruction(): composed output size (%ld) exceeds (%d). Failed to parse. current data pos %d out of %d:, previously accumulated num: %d, total num: %d, previously accumulated len: %d, current len: %d, current buf pos: %p, header buf pos: %p \n",
+                   (long) (pDstBuf - pParser->pDstBuff + iNalLen), MAX_ACCESS_UNIT_CAPACITY, iIdx, iEndIdx, iNum, pParser->iNalNum,
+                   iTotalNalLen, iNalLen, pDstBuf, pParser->pDstBuff);
+          pCtx->iErrorCode |= dsOutOfMemory;
+          pCtx->pParserBsInfo->iNalNum = 0;
+          return ERR_INFO_OUT_OF_MEMORY;
+        }
+
+        memcpy (pDstBuf, pNalBs, iNalLen);
+        pDstBuf += iNalLen;
+      }
+      if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) { //frame complete
+        pCtx->iTotalNumMbRec = 0;
+        pCtx->bFramePending = false;
+        pCtx->bFrameFinish = true; //finish current frame and mark it
+      } else if (pCtx->iTotalNumMbRec != 0) { //frame incomplete
+        pCtx->bFramePending = true;
+        pCtx->pDec->bIsComplete = false;
+        pCtx->bFrameFinish = false; //current frame not finished
+        pCtx->iErrorCode |= dsFramePending;
+        return ERR_INFO_PARSEONLY_PENDING;
+        //pCtx->pParserBsInfo->iNalNum = 0;
+      }
+    } else { //error
+      pCtx->pParserBsInfo->uiOutBsTimeStamp = 0;
+      pCtx->pParserBsInfo->iNalNum = 0;
+      pCtx->pParserBsInfo->iSpsWidthInPixel = 0;
+      pCtx->pParserBsInfo->iSpsHeightInPixel = 0;
+      return ERR_INFO_PARSEONLY_ERROR;
+    }
+    return ERR_NONE;
+  }
+
+  if (pCtx->iTotalNumMbRec != kiTotalNumMbInCurLayer) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
+             "DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ",
+             pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight);
+    bFrameCompleteFlag = false; //return later after output buffer is done
+    if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice
+      return ERR_INFO_MB_NUM_INADEQUATE;
+  } else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag
+             && (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done
+    pCtx->pDec->bIsComplete = true;
+    pCtx->bFreezeOutput = false;
+  }
+
+  pCtx->iTotalNumMbRec = 0;
+
+  //////output:::normal path
+  pDstInfo->uiOutYuvTimeStamp = pPic->uiTimeStamp;
+  ppDst[0]      = pPic->pData[0];
+  ppDst[1]      = pPic->pData[1];
+  ppDst[2]      = pPic->pData[2];
+
+  pDstInfo->UsrData.sSystemBuffer.iFormat = videoFormatI420;
+
+  pDstInfo->UsrData.sSystemBuffer.iWidth = kiActualWidth;
+  pDstInfo->UsrData.sSystemBuffer.iHeight = kiActualHeight;
+  pDstInfo->UsrData.sSystemBuffer.iStride[0] = pPic->iLinesize[0];
+  pDstInfo->UsrData.sSystemBuffer.iStride[1] = pPic->iLinesize[1];
+  ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2;
+  ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
+  ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
+  pDstInfo->iBufferStatus = 1;
+
+  bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
+                       || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+  pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth;
+  pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight;
+  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete
+    pDstInfo->iBufferStatus = (int32_t) (bFrameCompleteFlag
+                                         && pPic->bIsComplete); // When EC disable, ECed picture not output
+  else if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE
+            || pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE)
+           && pCtx->iErrorCode && bOutResChange)
+    pCtx->bFreezeOutput = true;
+
+  if (pDstInfo->iBufferStatus == 0) {
+    if (!bFrameCompleteFlag)
+      pCtx->iErrorCode |= dsBitstreamError;
+    return ERR_INFO_MB_NUM_INADEQUATE;
+  }
+  if (pCtx->bFreezeOutput) {
+    pDstInfo->iBufferStatus = 0;
+    if (pPic->bNewSeqBegin) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+               "DecodeFrameConstruction():New sequence detected, but freezed, correct MBs (%d) out of whole MBs (%d).",
+               kiTotalNumMbInCurLayer - pCtx->iMbEcedNum, kiTotalNumMbInCurLayer);
+    }
+  }
+  pCtx->iMbEcedNum = pPic->iMbEcedNum;
+  pCtx->iMbNum = pPic->iMbNum;
+  pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum;
+  if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+    if (pDstInfo->iBufferStatus && ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
+                                    || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight))) {
+      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
+      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
+      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
+    }
+    UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0);
+  }
+  return ERR_NONE;
+}
+
+inline bool    CheckSliceNeedReconstruct (uint8_t uiLayerDqId, uint8_t uiTargetDqId) {
+  return (uiLayerDqId == uiTargetDqId); // target layer
+}
+
+inline uint8_t GetTargetDqId (uint8_t uiTargetDqId,  SDecodingParam* psParam) {
+  uint8_t  uiRequiredDqId = psParam ? psParam->uiTargetDqLayer : (uint8_t)255;
+
+  return WELS_MIN (uiTargetDqId, uiRequiredDqId);
+}
+
+
+inline void    HandleReferenceLostL0 (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
+  if (0 == pCurNal->sNalHeaderExt.uiTemporalId) {
+    pCtx->bReferenceLostAtT0Flag = true;
+  }
+  pCtx->iErrorCode |= dsBitstreamError;
+}
+
+inline void    HandleReferenceLost (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
+  if ((0 == pCurNal->sNalHeaderExt.uiTemporalId) || (1 == pCurNal->sNalHeaderExt.uiTemporalId)) {
+    pCtx->bReferenceLostAtT0Flag = true;
+  }
+  pCtx->iErrorCode |= dsRefLost;
+}
+
+inline int32_t  WelsDecodeConstructSlice (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
+  int32_t  iRet = WelsTargetSliceConstruction (pCtx);
+
+  if (iRet) {
+    HandleReferenceLostL0 (pCtx, pCurNal);
+  }
+
+  return iRet;
+}
+
+int32_t ParsePredWeightedTable (PBitStringAux pBs, PSliceHeader pSh) {
+  uint32_t uiCode;
+  int32_t iList = 0;
+  int32_t iCode;
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
+  WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "luma_log2_weight_denom",
+                                  GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM));
+  pSh->sPredWeightTable.uiLumaLog2WeightDenom = uiCode;
+  if (pSh->pSps->uiChromaArrayType != 0) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
+    WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "chroma_log2_weight_denom",
+                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM));
+    pSh->sPredWeightTable.uiChromaLog2WeightDenom = uiCode;
+  }
+
+  if ((pSh->sPredWeightTable.uiLumaLog2WeightDenom | pSh->sPredWeightTable.uiChromaLog2WeightDenom) > 7)
+    return ERR_NONE;
+
+  do {
+
+    for (int i = 0; i < pSh->uiRefCount[iList]; i++) {
+      //luma
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      if (!!uiCode) {
+
+        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_weight",
+                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_WEIGHT));
+        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = iCode;
+
+        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_offset",
+                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_OFFSET));
+        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = iCode;
+      } else {
+        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = 1 << (pSh->sPredWeightTable.uiLumaLog2WeightDenom);
+        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = 0;
+
+      }
+      //chroma
+      if (pSh->pSps->uiChromaArrayType == 0)
+        continue;
+
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      if (!!uiCode) {
+        for (int j = 0; j < 2; j++) {
+
+
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_weight",
+                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_WEIGHT));
+          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = iCode;
+
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_offset",
+                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_OFFSET));
+          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = iCode;
+        }
+      } else {
+        for (int j = 0; j < 2; j++) {
+
+
+          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = 1 << (pSh->sPredWeightTable.uiChromaLog2WeightDenom);
+          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = 0;
+        }
+      }
+
+    }
+    ++iList;
+    if (pSh->eSliceType != B_SLICE) {
+      break;
+    }
+  } while (iList < LIST_A);//TODO: SUPPORT LIST_A
+  return ERR_NONE;
+}
+
+void CreateImplicitWeightTable (PWelsDecoderContext pCtx) {
+
+  PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  if (pCurDqLayer->bUseWeightedBiPredIdc && pSliceHeader->pPps->uiWeightedBipredIdc == 2) {
+    int32_t iPoc = pSliceHeader->iPicOrderCntLsb;
+
+    if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1
+        && pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) {
+      pCurDqLayer->bUseWeightedBiPredIdc = false;
+      return;
+    }
+
+    pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom = 5;
+    pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom = 5;
+    for (int32_t iRef0 = 0; iRef0 < pSliceHeader->uiRefCount[0]; iRef0++) {
+      if (pCtx->sRefPic.pRefList[LIST_0][iRef0]) {
+        const int32_t iPoc0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->iFramePoc;
+        bool bIsLongRef0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->bIsLongRef;
+        for (int32_t iRef1 = 0; iRef1 < pSliceHeader->uiRefCount[1]; iRef1++) {
+          if (pCtx->sRefPic.pRefList[LIST_1][iRef1]) {
+            const int32_t iPoc1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->iFramePoc;
+            bool bIsLongRef1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->bIsLongRef;
+            pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 32;
+            if (!bIsLongRef0 && !bIsLongRef1) {
+              const int32_t iTd = WELS_CLIP3 (iPoc1 - iPoc0, -128, 127);
+              if (iTd) {
+                int32_t iTb = WELS_CLIP3 (iPoc - iPoc0, -128, 127);
+                int32_t iTx = (16384 + (WELS_ABS (iTd) >> 1)) / iTd;
+                int32_t iDistScaleFactor = (iTb * iTx + 32) >> 8;
+                if (iDistScaleFactor >= -64 && iDistScaleFactor <= 128) {
+                  pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 64 - iDistScaleFactor;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+/*
+ *  Predeclared function routines ..
+ */
+int32_t ParseRefPicListReordering (PBitStringAux pBs, PSliceHeader pSh) {
+  int32_t iList = 0;
+  const EWelsSliceType keSt = pSh->eSliceType;
+  PRefPicListReorderSyn pRefPicListReordering = &pSh->pRefPicListReordering;
+  PSps pSps = pSh->pSps;
+  uint32_t uiCode;
+  if (keSt == I_SLICE || keSt == SI_SLICE)
+    return ERR_NONE;
+
+  // Common syntaxs for P or B slices: list0, list1 followed if B slices used.
+  do {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //ref_pic_list_modification_flag_l0
+    pRefPicListReordering->bRefPicListReorderingFlag[iList] = !!uiCode;
+
+    if (pRefPicListReordering->bRefPicListReorderingFlag[iList]) {
+      int32_t iIdx = 0;
+      do {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //modification_of_pic_nums_idc
+        const uint32_t kuiIdc = uiCode;
+
+        //Fixed the referrence list reordering crash issue.(fault kIdc value > 3 case)---
+        if ((iIdx >= MAX_REF_PIC_COUNT) || (kuiIdc > 3)) {
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
+        }
+        pRefPicListReordering->sReorderingSyn[iList][iIdx].uiReorderingOfPicNumsIdc = kuiIdc;
+        if (kuiIdc == 3)
+          break;
+
+        if (iIdx >= pSh->uiRefCount[iList] || iIdx >= MAX_REF_PIC_COUNT)
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
+
+        if (kuiIdc == 0 || kuiIdc == 1) {
+          // abs_diff_pic_num_minus1 should be in range 0 to MaxPicNum-1, MaxPicNum is derived as
+          // 2^(4+log2_max_frame_num_minus4)
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //abs_diff_pic_num_minus1
+          WELS_CHECK_SE_UPPER_ERROR_NOLOG (uiCode, (uint32_t) (1 << pSps->uiLog2MaxFrameNum), "abs_diff_pic_num_minus1",
+                                           GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING));
+          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiAbsDiffPicNumMinus1 = uiCode; // uiAbsDiffPicNumMinus1
+        } else if (kuiIdc == 2) {
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
+          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiLongTermPicNum = uiCode;
+        }
+
+        ++ iIdx;
+      } while (true);
+    }
+    if (keSt != B_SLICE)
+      break;
+    ++ iList;
+  } while (iList < LIST_A);
+
+  return ERR_NONE;
+}
+
+int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSliceHeader pSh, PSps pSps,
+                               const bool kbIdrFlag) {
+  PRefPicMarking const kpRefMarking = &pSh->sRefMarking;
+  uint32_t uiCode;
+  if (kbIdrFlag) {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //no_output_of_prior_pics_flag
+    kpRefMarking->bNoOutputOfPriorPicsFlag = !!uiCode;
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //long_term_reference_flag
+    kpRefMarking->bLongTermRefFlag = !!uiCode;
+  } else {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_ref_pic_marking_mode_flag
+    kpRefMarking->bAdaptiveRefPicMarkingModeFlag = !!uiCode;
+    if (kpRefMarking->bAdaptiveRefPicMarkingModeFlag) {
+      int32_t iIdx = 0;
+      bool bAllowMmco5 = true, bMmco4Exist = false, bMmco5Exist = false, bMmco6Exist = false;
+      do {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //memory_management_control_operation
+        const uint32_t kuiMmco = uiCode;
+
+        kpRefMarking->sMmcoRef[iIdx].uiMmcoType = kuiMmco;
+        if (kuiMmco == MMCO_END)
+          break;
+
+        if (kuiMmco == MMCO_SHORT2UNUSED || kuiMmco == MMCO_SHORT2LONG) {
+          bAllowMmco5 = false;
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //difference_of_pic_nums_minus1
+          kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum = 1 + uiCode;
+          kpRefMarking->sMmcoRef[iIdx].iShortFrameNum = (pSh->iFrameNum - kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum) & ((
+                1 << pSps->uiLog2MaxFrameNum) - 1);
+        } else if (kuiMmco == MMCO_LONG2UNUSED) {
+          bAllowMmco5 = false;
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
+          kpRefMarking->sMmcoRef[iIdx].uiLongTermPicNum = uiCode;
+        }
+        if (kuiMmco == MMCO_SHORT2LONG || kuiMmco == MMCO_LONG) {
+          if (kuiMmco == MMCO_LONG) {
+            WELS_VERIFY_RETURN_IF (-1, bMmco6Exist);
+            bMmco6Exist = true;
+          }
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_frame_idx
+          kpRefMarking->sMmcoRef[iIdx].iLongTermFrameIdx = uiCode;
+        } else if (kuiMmco == MMCO_SET_MAX_LONG) {
+          WELS_VERIFY_RETURN_IF (-1, bMmco4Exist);
+          bMmco4Exist = true;
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //max_long_term_frame_idx_plus1
+          kpRefMarking->sMmcoRef[iIdx].iMaxLongTermFrameIdx = -1 + uiCode;
+        } else if (kuiMmco == MMCO_RESET) {
+          WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist));
+          bMmco5Exist = true;
+
+          pCtx->iPrevPicOrderCntLsb = 0;
+          pCtx->iPrevPicOrderCntMsb = 0;
+          pCtx->pSliceHeader->iPicOrderCntLsb = 0;
+        }
+        ++ iIdx;
+
+      } while (iIdx < MAX_MMCO_COUNT);
+    }
+  }
+
+  return ERR_NONE;
+}
+
+bool FillDefaultSliceHeaderExt (PSliceHeaderExt pShExt, PNalUnitHeaderExt pNalExt) {
+  if (pShExt == NULL || pNalExt == NULL)
+    return false;
+
+  if (pNalExt->iNoInterLayerPredFlag || pNalExt->uiQualityId > 0)
+    pShExt->bBasePredWeightTableFlag = false;
+  else
+    pShExt->bBasePredWeightTableFlag = true;
+  pShExt->uiRefLayerDqId = (uint8_t) - 1;
+  pShExt->uiDisableInterLayerDeblockingFilterIdc        = 0;
+  pShExt->iInterLayerSliceAlphaC0Offset                 = 0;
+  pShExt->iInterLayerSliceBetaOffset                    = 0;
+  pShExt->bConstrainedIntraResamplingFlag               = false;
+  pShExt->uiRefLayerChromaPhaseXPlus1Flag               = 0;
+  pShExt->uiRefLayerChromaPhaseYPlus1                   = 1;
+  //memset(&pShExt->sScaledRefLayer, 0, sizeof(SPosOffset));
+
+  pShExt->iScaledRefLayerPicWidthInSampleLuma   = pShExt->sSliceHeader.iMbWidth << 4;
+  pShExt->iScaledRefLayerPicHeightInSampleLuma  = pShExt->sSliceHeader.iMbHeight << 4;
+
+  pShExt->bSliceSkipFlag                = false;
+  pShExt->bAdaptiveBaseModeFlag         = false;
+  pShExt->bDefaultBaseModeFlag          = false;
+  pShExt->bAdaptiveMotionPredFlag       = false;
+  pShExt->bDefaultMotionPredFlag        = false;
+  pShExt->bAdaptiveResidualPredFlag     = false;
+  pShExt->bDefaultResidualPredFlag      = false;
+  pShExt->bTCoeffLevelPredFlag          = false;
+  pShExt->uiScanIdxStart                = 0;
+  pShExt->uiScanIdxEnd                  = 15;
+
+  return true;
+}
+
+int32_t InitBsBuffer (PWelsDecoderContext pCtx) {
+  if (pCtx == NULL)
+    return ERR_INFO_INVALID_PTR;
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  pCtx->iMaxBsBufferSizeInByte = MIN_ACCESS_UNIT_CAPACITY * MAX_BUFFERED_NUM;
+  if ((pCtx->sRawData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
+                              "pCtx->sRawData.pHead"))) == NULL) {
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+  pCtx->sRawData.pStartPos = pCtx->sRawData.pCurPos = pCtx->sRawData.pHead;
+  pCtx->sRawData.pEnd = pCtx->sRawData.pHead + pCtx->iMaxBsBufferSizeInByte;
+  if (pCtx->pParam->bParseOnly) {
+    pCtx->pParserBsInfo = static_cast<SParserBsInfo*> (pMa->WelsMallocz (sizeof (SParserBsInfo), "pCtx->pParserBsInfo"));
+    if (pCtx->pParserBsInfo == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+    memset (pCtx->pParserBsInfo, 0, sizeof (SParserBsInfo));
+    pCtx->pParserBsInfo->pDstBuff = static_cast<uint8_t*> (pMa->WelsMallocz (MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t),
+                                    "pCtx->pParserBsInfo->pDstBuff"));
+    if (pCtx->pParserBsInfo->pDstBuff == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+    memset (pCtx->pParserBsInfo->pDstBuff, 0, MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t));
+
+    if ((pCtx->sSavedData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
+                                  "pCtx->sSavedData.pHead"))) == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+    pCtx->sSavedData.pStartPos = pCtx->sSavedData.pCurPos = pCtx->sSavedData.pHead;
+    pCtx->sSavedData.pEnd = pCtx->sSavedData.pHead + pCtx->iMaxBsBufferSizeInByte;
+
+    pCtx->iMaxNalNum = MAX_NAL_UNITS_IN_LAYER + 2; //2 reserved for SPS+PPS
+    pCtx->pParserBsInfo->pNalLenInByte = static_cast<int*> (pMa->WelsMallocz (pCtx->iMaxNalNum * sizeof (int),
+                                         "pCtx->pParserBsInfo->pNalLenInByte"));
+    if (pCtx->pParserBsInfo->pNalLenInByte == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t ExpandBsBuffer (PWelsDecoderContext pCtx, const int kiSrcLen) {
+  if (pCtx == NULL)
+    return ERR_INFO_INVALID_PTR;
+  int32_t iExpandStepShift = 1;
+  int32_t iNewBuffLen = WELS_MAX ((kiSrcLen * MAX_BUFFERED_NUM), (pCtx->iMaxBsBufferSizeInByte << iExpandStepShift));
+  //allocate new bs buffer
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  //Realloc sRawData
+  uint8_t* pNewBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sRawData.pHead"));
+  if (pNewBsBuff == NULL) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewBsBuff (%d)", iNewBuffLen);
+    pCtx->iErrorCode |= dsOutOfMemory;
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+
+  //Calculate and set the bs start and end position
+  for (uint32_t i = 0; i <= pCtx->pAccessUnitList->uiActualUnitsNum; i++) {
+    PBitStringAux pSliceBitsRead = &pCtx->pAccessUnitList->pNalUnitsList[i]->sNalData.sVclNal.sSliceBitsRead;
+    pSliceBitsRead->pStartBuf = pSliceBitsRead->pStartBuf - pCtx->sRawData.pHead + pNewBsBuff;
+    pSliceBitsRead->pEndBuf   = pSliceBitsRead->pEndBuf   - pCtx->sRawData.pHead + pNewBsBuff;
+    pSliceBitsRead->pCurBuf   = pSliceBitsRead->pCurBuf   - pCtx->sRawData.pHead + pNewBsBuff;
+  }
+
+  //Copy current buffer status to new buffer
+  memcpy (pNewBsBuff, pCtx->sRawData.pHead, pCtx->iMaxBsBufferSizeInByte);
+  pCtx->sRawData.pStartPos = pNewBsBuff + (pCtx->sRawData.pStartPos - pCtx->sRawData.pHead);
+  pCtx->sRawData.pCurPos   = pNewBsBuff + (pCtx->sRawData.pCurPos   - pCtx->sRawData.pHead);
+  pCtx->sRawData.pEnd      = pNewBsBuff + iNewBuffLen;
+  pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData.pHead");
+  pCtx->sRawData.pHead = pNewBsBuff;
+
+  if (pCtx->pParam->bParseOnly) {
+    //Realloc sSavedData
+    uint8_t* pNewSavedBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sSavedData.pHead"));
+    if (pNewSavedBsBuff == NULL) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewSavedBsBuff (%d)", iNewBuffLen);
+      pCtx->iErrorCode |= dsOutOfMemory;
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+
+    //Copy current buffer status to new buffer
+    memcpy (pNewSavedBsBuff, pCtx->sSavedData.pHead, pCtx->iMaxBsBufferSizeInByte);
+    pCtx->sSavedData.pStartPos = pNewSavedBsBuff + (pCtx->sSavedData.pStartPos - pCtx->sSavedData.pHead);
+    pCtx->sSavedData.pCurPos   = pNewSavedBsBuff + (pCtx->sSavedData.pCurPos   - pCtx->sSavedData.pHead);
+    pCtx->sSavedData.pEnd      = pNewSavedBsBuff + iNewBuffLen;
+    pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData.pHead");
+    pCtx->sSavedData.pHead = pNewSavedBsBuff;
+  }
+
+  pCtx->iMaxBsBufferSizeInByte = iNewBuffLen;
+  return ERR_NONE;
+}
+
+int32_t ExpandBsLenBuffer (PWelsDecoderContext pCtx, const int kiCurrLen) {
+  SParserBsInfo* pParser = pCtx->pParserBsInfo;
+  if (!pParser->pNalLenInByte)
+    return ERR_INFO_INVALID_ACCESS;
+
+  int iNewLen = kiCurrLen;
+  if (kiCurrLen >= MAX_MB_SIZE + 2) { //exceeds the max MB number of level 5.2
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Current nal num (%d) exceededs %d.", kiCurrLen, MAX_MB_SIZE);
+    pCtx->iErrorCode |= dsOutOfMemory;
+    return ERR_INFO_OUT_OF_MEMORY;
+  } else {
+    iNewLen = kiCurrLen << 1;
+    iNewLen = WELS_MIN (iNewLen, MAX_MB_SIZE + 2);
+  }
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+  int* pNewLenBuffer = static_cast<int*> (pMa->WelsMallocz (iNewLen * sizeof (int),
+                                          "pCtx->pParserBsInfo->pNalLenInByte"));
+  if (pNewLenBuffer == NULL) {
+    pCtx->iErrorCode |= dsOutOfMemory;
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+
+  //copy existing data from old length buffer to new
+  memcpy (pNewLenBuffer, pParser->pNalLenInByte, pCtx->iMaxNalNum * sizeof (int));
+  pMa->WelsFree (pParser->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
+  pParser->pNalLenInByte = pNewLenBuffer;
+  pCtx->iMaxNalNum = iNewLen;
+  return ERR_NONE;
+}
+
+int32_t CheckBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen) {
+  if (kiSrcLen > MAX_ACCESS_UNIT_CAPACITY) { //exceeds max allowed data
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Max AU size exceeded. Allowed size = %d, current size = %d",
+             MAX_ACCESS_UNIT_CAPACITY,
+             kiSrcLen);
+    pCtx->iErrorCode |= dsBitstreamError;
+    return ERR_INFO_INVALID_ACCESS;
+  } else if (kiSrcLen > pCtx->iMaxBsBufferSizeInByte /
+             MAX_BUFFERED_NUM) { //may lead to buffer overwrite, prevent it by expanding buffer
+    if (ExpandBsBuffer (pCtx, kiSrcLen)) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+  }
+
+  return ERR_NONE;
+}
+
+/*
+ * WelsInitStaticMemory
+ * Memory request for new introduced data
+ * Especially for:
+ * rbsp_au_buffer, cur_dq_layer_ptr and ref_dq_layer_ptr in MB info cache.
+ * return:
+ *  0 - success; otherwise returned error_no defined in error_no.h.
+*/
+int32_t WelsInitStaticMemory (PWelsDecoderContext pCtx) {
+  if (pCtx == NULL) {
+    return ERR_INFO_INVALID_PTR;
+  }
+
+  if (MemInitNalList (&pCtx->pAccessUnitList, MAX_NAL_UNIT_NUM_IN_AU, pCtx->pMemAlign) != 0)
+    return ERR_INFO_OUT_OF_MEMORY;
+
+  if (InitBsBuffer (pCtx) != 0)
+    return ERR_INFO_OUT_OF_MEMORY;
+
+  pCtx->uiTargetDqId            = (uint8_t) - 1;
+  pCtx->bEndOfStreamFlag        = false;
+
+  return ERR_NONE;
+}
+
+/*
+ * WelsFreeStaticMemory
+ * Free memory introduced in WelsInitStaticMemory at destruction of decoder.
+ *
+ */
+void WelsFreeStaticMemory (PWelsDecoderContext pCtx) {
+  if (pCtx == NULL)
+    return;
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  MemFreeNalList (&pCtx->pAccessUnitList, pMa);
+
+  if (pCtx->sRawData.pHead) {
+    pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData->pHead");
+  }
+  pCtx->sRawData.pHead                = NULL;
+  pCtx->sRawData.pEnd                 = NULL;
+  pCtx->sRawData.pStartPos            = NULL;
+  pCtx->sRawData.pCurPos              = NULL;
+  if (pCtx->pParam->bParseOnly) {
+    if (pCtx->sSavedData.pHead) {
+      pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData->pHead");
+    }
+    pCtx->sSavedData.pHead                = NULL;
+    pCtx->sSavedData.pEnd                 = NULL;
+    pCtx->sSavedData.pStartPos            = NULL;
+    pCtx->sSavedData.pCurPos              = NULL;
+    if (pCtx->pParserBsInfo) {
+      if (pCtx->pParserBsInfo->pNalLenInByte) {
+        pMa->WelsFree (pCtx->pParserBsInfo->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
+        pCtx->pParserBsInfo->pNalLenInByte = NULL;
+        pCtx->iMaxNalNum = 0;
+      }
+      if (pCtx->pParserBsInfo->pDstBuff) {
+        pMa->WelsFree (pCtx->pParserBsInfo->pDstBuff, "pCtx->pParserBsInfo->pDstBuff");
+        pCtx->pParserBsInfo->pDstBuff = NULL;
+      }
+      pMa->WelsFree (pCtx->pParserBsInfo, "pCtx->pParserBsInfo");
+      pCtx->pParserBsInfo = NULL;
+    }
+  }
+
+  if (NULL != pCtx->pParam) {
+    pMa->WelsFree (pCtx->pParam, "pCtx->pParam");
+
+    pCtx->pParam = NULL;
+  }
+}
+/*
+ *  DecodeNalHeaderExt
+ *  Trigger condition: NAL_UNIT_TYPE = NAL_UNIT_PREFIX or NAL_UNIT_CODED_SLICE_EXT
+ *  Parameter:
+ *  pNal:   target NALUnit ptr
+ *  pSrc:   NAL Unit bitstream
+ */
+void DecodeNalHeaderExt (PNalUnit pNal, uint8_t* pSrc) {
+  PNalUnitHeaderExt pHeaderExt = &pNal->sNalHeaderExt;
+
+  uint8_t uiCurByte = *pSrc;
+  pHeaderExt->bIdrFlag              = !! (uiCurByte & 0x40);
+  pHeaderExt->uiPriorityId          = uiCurByte & 0x3F;
+
+  uiCurByte = * (++pSrc);
+  pHeaderExt->iNoInterLayerPredFlag = uiCurByte >> 7;
+  pHeaderExt->uiDependencyId        = (uiCurByte & 0x70) >> 4;
+  pHeaderExt->uiQualityId           = uiCurByte & 0x0F;
+  uiCurByte = * (++pSrc);
+  pHeaderExt->uiTemporalId          = uiCurByte >> 5;
+  pHeaderExt->bUseRefBasePicFlag    = !! (uiCurByte & 0x10);
+  pHeaderExt->bDiscardableFlag      = !! (uiCurByte & 0x08);
+  pHeaderExt->bOutputFlag           = !! (uiCurByte & 0x04);
+  pHeaderExt->uiReservedThree2Bits  = uiCurByte & 0x03;
+  pHeaderExt->uiLayerDqId           = (pHeaderExt->uiDependencyId << 4) | pHeaderExt->uiQualityId;
+}
+
+
+void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatistics,
+    PSps pSps, PPps pPps) {
+  pDecoderStatistics->iCurrentActiveSpsId = pSps->iSpsId;
+
+  pDecoderStatistics->iCurrentActivePpsId = pPps->iPpsId;
+  pDecoderStatistics->uiProfile = static_cast<unsigned int> (pSps->uiProfileIdc);
+  pDecoderStatistics->uiLevel = pSps->uiLevelIdc;
+}
+
+#define SLICE_HEADER_IDR_PIC_ID_MAX 65535
+#define SLICE_HEADER_REDUNDANT_PIC_CNT_MAX 127
+#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN -12
+#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX 12
+#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN -12
+#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX 12
+#define MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1 15
+#define MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1 15
+#define SLICE_HEADER_CABAC_INIT_IDC_MAX 2
+/*
+ *  decode_slice_header_avc
+ *  Parse slice header of bitstream in avc for storing data structure
+ */
+int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) {
+  PNalUnit const kpCurNal               = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
+                                                                                 1];
+
+  PNalUnitHeaderExt pNalHeaderExt       = NULL;
+  PSliceHeader pSliceHead               = NULL;
+  PSliceHeaderExt pSliceHeadExt         = NULL;
+  PSubsetSps pSubsetSps                 = NULL;
+  PSps pSps                             = NULL;
+  PPps pPps                             = NULL;
+  EWelsNalUnitType eNalType             = static_cast<EWelsNalUnitType> (0);
+  int32_t iPpsId                        = 0;
+  int32_t iRet                          = ERR_NONE;
+  uint8_t uiSliceType                   = 0;
+  uint8_t uiQualityId                   = BASE_QUALITY_ID;
+  bool  bIdrFlag                        = false;
+  bool  bSgChangeCycleInvolved          = false;        // involved slice group change cycle ?
+  uint32_t uiCode;
+  int32_t iCode;
+  SLogContext* pLogCtx = & (pCtx->sLogCtx);
+
+  if (kpCurNal == NULL) {
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+
+  pNalHeaderExt = &kpCurNal->sNalHeaderExt;
+  pSliceHead    = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+  eNalType      = pNalHeaderExt->sNalUnitHeader.eNalUnitType;
+
+  pSliceHeadExt = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt;
+
+  if (pSliceHeadExt) {
+    SRefBasePicMarking sBaseMarking;
+    const bool kbStoreRefBaseFlag = pSliceHeadExt->bStoreRefBasePicFlag;
+    memcpy (&sBaseMarking, &pSliceHeadExt->sRefBasePicMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
+    memset (pSliceHeadExt, 0, sizeof (SSliceHeaderExt));
+    pSliceHeadExt->bStoreRefBasePicFlag = kbStoreRefBaseFlag;
+    memcpy (&pSliceHeadExt->sRefBasePicMarking, &sBaseMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
+  }
+
+  kpCurNal->sNalData.sVclNal.bSliceHeaderExtFlag = kbExtensionFlag;
+
+  // first_mb_in_slice
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //first_mb_in_slice
+  WELS_CHECK_SE_UPPER_ERROR (uiCode, 36863u, "first_mb_in_slice", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                             ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
+  pSliceHead->iFirstMbInSlice = uiCode;
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //slice_type
+  uiSliceType = uiCode;
+  if (uiSliceType > 9) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "slice type too large (%d) at first_mb(%d)", uiSliceType,
+             pSliceHead->iFirstMbInSlice);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+  }
+  if (uiSliceType > 4)
+    uiSliceType -= 5;
+
+  if ((NAL_UNIT_CODED_SLICE_IDR == eNalType) && (I_SLICE != uiSliceType)) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d) in IDR picture. ", uiSliceType);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+  }
+
+  if (kbExtensionFlag) {
+    if (uiSliceType > 2) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d).", uiSliceType);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+    }
+  }
+
+  pSliceHead->eSliceType = static_cast <EWelsSliceType> (uiSliceType);
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_parameter_set_id
+  WELS_CHECK_SE_UPPER_ERROR (uiCode, (MAX_PPS_COUNT - 1), "iPpsId out of range",
+                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                 ERR_INFO_PPS_ID_OVERFLOW));
+  iPpsId = uiCode;
+
+  //add check PPS available here
+  if (pCtx->bPpsAvailFlags[iPpsId] == false) {
+    pCtx->sDecoderStatistics.iPpsReportErrorNum++;
+    if (pCtx->iPPSLastInvalidId != iPpsId) {
+      WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId,
+               pCtx->iPPSLastInvalidId, pCtx->iPPSInvalidNum);
+      pCtx->iPPSLastInvalidId = iPpsId;
+      pCtx->iPPSInvalidNum = 0;
+    } else {
+      pCtx->iPPSInvalidNum++;
+    }
+    pCtx->iErrorCode |= dsNoParamSets;
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID);
+  }
+  pCtx->iPPSLastInvalidId = -1;
+
+  pPps    = &pCtx->sPpsBuffer[iPpsId];
+
+  if (pPps->uiNumSliceGroups == 0) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced");
+    pCtx->iErrorCode |= dsNoParamSets;
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
+  }
+
+  if (kbExtensionFlag) {
+    pSubsetSps      = &pCtx->sSubsetSpsBuffer[pPps->iSpsId];
+    pSps            = &pSubsetSps->sSps;
+    if (pCtx->bSubspsAvailFlags[pPps->iSpsId] == false) {
+      pCtx->sDecoderStatistics.iSubSpsReportErrorNum++;
+      if (pCtx->iSubSPSLastInvalidId != pPps->iSpsId) {
+        WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
+                 pCtx->iSubSPSLastInvalidId, pCtx->iSubSPSInvalidNum);
+        pCtx->iSubSPSLastInvalidId = pPps->iSpsId;
+        pCtx->iSubSPSInvalidNum = 0;
+      } else {
+        pCtx->iSubSPSInvalidNum++;
+      }
+      pCtx->iErrorCode |= dsNoParamSets;
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
+    }
+    pCtx->iSubSPSLastInvalidId = -1;
+  } else {
+    if (pCtx->bSpsAvailFlags[pPps->iSpsId] == false) {
+      pCtx->sDecoderStatistics.iSpsReportErrorNum++;
+      if (pCtx->iSPSLastInvalidId != pPps->iSpsId) {
+        WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
+                 pCtx->iSPSLastInvalidId, pCtx->iSPSInvalidNum);
+        pCtx->iSPSLastInvalidId = pPps->iSpsId;
+        pCtx->iSPSInvalidNum = 0;
+      } else {
+        pCtx->iSPSInvalidNum++;
+      }
+      pCtx->iErrorCode |= dsNoParamSets;
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
+    }
+    pCtx->iSPSLastInvalidId = -1;
+    pSps = &pCtx->sSpsBuffer[pPps->iSpsId];
+  }
+  pSliceHead->iPpsId = iPpsId;
+  pSliceHead->iSpsId = pPps->iSpsId;
+  pSliceHead->pPps   = pPps;
+  pSliceHead->pSps   = pSps;
+
+  pSliceHeadExt->pSubsetSps = pSubsetSps;
+
+  if (pSps->iNumRefFrames == 0) {
+    if ((uiSliceType != I_SLICE) && (uiSliceType != SI_SLICE)) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "slice_type (%d) not supported for num_ref_frames = 0.", uiSliceType);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+    }
+  }
+
+  bIdrFlag = (!kbExtensionFlag && eNalType == NAL_UNIT_CODED_SLICE_IDR) || (kbExtensionFlag && pNalHeaderExt->bIdrFlag);
+  pSliceHead->bIdrFlag = bIdrFlag;
+
+  if (pSps->uiLog2MaxFrameNum == 0) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "non existing SPS referenced");
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
+  }
+  // check first_mb_in_slice
+  WELS_CHECK_SE_UPPER_ERROR ((uint32_t) (pSliceHead->iFirstMbInSlice), (pSps->uiTotalMbCount - 1), "first_mb_in_slice",
+                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
+  WELS_READ_VERIFY (BsGetBits (pBs, pSps->uiLog2MaxFrameNum, &uiCode)); //frame_num
+  pSliceHead->iFrameNum = uiCode;
+
+  pSliceHead->bFieldPicFlag    = false;
+  pSliceHead->bBottomFiledFlag = false;
+  if (!pSps->bFrameMbsOnlyFlag) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseSliceHeaderSyntaxs(): frame_mbs_only_flag = %d not supported. ",
+             pSps->bFrameMbsOnlyFlag);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MBAFF);
+  }
+  pSliceHead->iMbWidth  = pSps->iMbWidth;
+  pSliceHead->iMbHeight = pSps->iMbHeight / (1 + pSliceHead->bFieldPicFlag);
+
+  if (bIdrFlag) {
+    if (pSliceHead->iFrameNum != 0) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING,
+               "ParseSliceHeaderSyntaxs(), invaild frame number: %d due to IDR frame introduced!",
+               pSliceHead->iFrameNum);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FRAME_NUM);
+    }
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //idr_pic_id
+    // standard 7.4.3 idr_pic_id should be in range 0 to 65535, inclusive.
+    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_IDR_PIC_ID_MAX, "idr_pic_id", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                               ERR_INFO_INVALID_IDR_PIC_ID));
+    pSliceHead->uiIdrPicId = uiCode; /* uiIdrPicId */
+#ifdef LONG_TERM_REF
+    pCtx->uiCurIdrPicId = pSliceHead->uiIdrPicId;
+#endif
+  }
+
+  pSliceHead->iDeltaPicOrderCntBottom = 0;
+  pSliceHead->iDeltaPicOrderCnt[0] =
+    pSliceHead->iDeltaPicOrderCnt[1] = 0;
+  if (pSps->uiPocType == 0) {
+    WELS_READ_VERIFY (BsGetBits (pBs, pSps->iLog2MaxPocLsb, &uiCode)); //pic_order_cnt_lsb
+    const int32_t iMaxPocLsb = 1 << (pSps->iLog2MaxPocLsb);
+    pSliceHead->iPicOrderCntLsb = uiCode;
+    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt_bottom
+      pSliceHead->iDeltaPicOrderCntBottom = iCode;
+    }
+    //Calculate poc if necessary
+    int32_t pocLsb = pSliceHead->iPicOrderCntLsb;
+    if (pSliceHead->bIdrFlag || kpCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) {
+      pCtx->iPrevPicOrderCntMsb = 0;
+      pCtx->iPrevPicOrderCntLsb = 0;
+    }
+    int32_t pocMsb;
+    if (pocLsb < pCtx->iPrevPicOrderCntLsb && pCtx->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2)
+      pocMsb = pCtx->iPrevPicOrderCntMsb + iMaxPocLsb;
+    else if (pocLsb > pCtx->iPrevPicOrderCntLsb && pocLsb - pCtx->iPrevPicOrderCntLsb > iMaxPocLsb / 2)
+      pocMsb = pCtx->iPrevPicOrderCntMsb - iMaxPocLsb;
+    else
+      pocMsb = pCtx->iPrevPicOrderCntMsb;
+    pSliceHead->iPicOrderCntLsb = pocMsb + pocLsb;
+
+    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
+      pSliceHead->iPicOrderCntLsb += pSliceHead->iDeltaPicOrderCntBottom;
+    }
+
+    if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
+      pCtx->iPrevPicOrderCntLsb = pocLsb;
+      pCtx->iPrevPicOrderCntMsb = pocMsb;
+    }
+    //End of Calculating poc
+  } else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) {
+    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 0 ]
+    pSliceHead->iDeltaPicOrderCnt[0] = iCode;
+    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 1 ]
+      pSliceHead->iDeltaPicOrderCnt[1] = iCode;
+    }
+  }
+  pSliceHead->iRedundantPicCnt = 0;
+  if (pPps->bRedundantPicCntPresentFlag) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //redundant_pic_cnt
+    // standard section 7.4.3, redundant_pic_cnt should be in range 0 to 127, inclusive.
+    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_REDUNDANT_PIC_CNT_MAX, "redundant_pic_cnt",
+                               GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT));
+    pSliceHead->iRedundantPicCnt = uiCode;
+    if (pSliceHead->iRedundantPicCnt > 0) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "Redundant picture not supported!");
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT);
+    }
+  }
+
+  if (B_SLICE == uiSliceType) {
+    //fix me: it needs to use the this flag somewhere for B-Sclice
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //direct_spatial_mv_pred_flag
+    pSliceHead->iDirectSpatialMvPredFlag = uiCode;
+  }
+
+  //set defaults, might be overriden a few line later
+  pSliceHead->uiRefCount[0] = pPps->uiNumRefIdxL0Active;
+  pSliceHead->uiRefCount[1] = pPps->uiNumRefIdxL1Active;
+
+  bool bReadNumRefFlag = (P_SLICE == uiSliceType || B_SLICE == uiSliceType);
+  if (kbExtensionFlag) {
+    bReadNumRefFlag &= (BASE_QUALITY_ID == pNalHeaderExt->uiQualityId);
+  }
+  if (bReadNumRefFlag) {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //num_ref_idx_active_override_flag
+    pSliceHead->bNumRefIdxActiveOverrideFlag = !!uiCode;
+    if (pSliceHead->bNumRefIdxActiveOverrideFlag) {
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l0_active_minus1
+      WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1, "num_ref_idx_l0_active_minus1",
+                                 GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1));
+      pSliceHead->uiRefCount[0] = 1 + uiCode;
+      if (B_SLICE == uiSliceType) {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l1_active_minus1
+        WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1, "num_ref_idx_l1_active_minus1",
+                                   GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L1_ACTIVE_MINUS1));
+        pSliceHead->uiRefCount[1] = 1 + uiCode;
+      }
+    }
+  }
+
+  if (pSliceHead->uiRefCount[0] > MAX_REF_PIC_COUNT || pSliceHead->uiRefCount[1] > MAX_REF_PIC_COUNT) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "reference overflow");
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REF_COUNT_OVERFLOW);
+  }
+
+  if (BASE_QUALITY_ID == uiQualityId) {
+    iRet = ParseRefPicListReordering (pBs, pSliceHead);
+    if (iRet != ERR_NONE) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid ref pPic list reordering syntaxs!");
+      return iRet;
+    }
+
+    if ((pPps->bWeightedPredFlag && uiSliceType == P_SLICE) || (pPps->uiWeightedBipredIdc == 1 && uiSliceType == B_SLICE)) {
+      iRet = ParsePredWeightedTable (pBs, pSliceHead);
+      if (iRet != ERR_NONE) {
+        WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid weighted prediction syntaxs!");
+        return iRet;
+      }
+    }
+
+    if (kbExtensionFlag) {
+      if (pNalHeaderExt->iNoInterLayerPredFlag || pNalHeaderExt->uiQualityId > 0)
+        pSliceHeadExt->bBasePredWeightTableFlag = false;
+      else
+        pSliceHeadExt->bBasePredWeightTableFlag = true;
+    }
+
+    if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
+      iRet = ParseDecRefPicMarking (pCtx, pBs, pSliceHead, pSps, bIdrFlag);
+      if (iRet != ERR_NONE) {
+        return iRet;
+      }
+
+      if (kbExtensionFlag && !pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //store_ref_base_pic_flag
+        pSliceHeadExt->bStoreRefBasePicFlag = !!uiCode;
+        if ((pNalHeaderExt->bUseRefBasePicFlag || pSliceHeadExt->bStoreRefBasePicFlag) && !bIdrFlag) {
+          WelsLog (pLogCtx, WELS_LOG_WARNING,
+                   "ParseSliceHeaderSyntaxs(): bUseRefBasePicFlag or bStoreRefBasePicFlag = 1 not supported.");
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+        }
+      }
+    }
+  }
+
+  if (pPps->bEntropyCodingModeFlag) {
+    if (pSliceHead->eSliceType != I_SLICE && pSliceHead->eSliceType != SI_SLICE) {
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
+      WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_CABAC_INIT_IDC_MAX, "cabac_init_idc", ERR_INFO_INVALID_CABAC_INIT_IDC);
+      pSliceHead->iCabacInitIdc = uiCode;
+    } else
+      pSliceHead->iCabacInitIdc = 0;
+  }
+
+  WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_qp_delta
+  pSliceHead->iSliceQpDelta     = iCode;
+  pSliceHead->iSliceQp          = pPps->iPicInitQp + pSliceHead->iSliceQpDelta;
+  if (pSliceHead->iSliceQp < 0 || pSliceHead->iSliceQp > 51) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "QP %d out of range", pSliceHead->iSliceQp);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_QP);
+  }
+
+  //FIXME qscale / qp ... stuff
+  if (!kbExtensionFlag) {
+    if (uiSliceType == SP_SLICE || uiSliceType == SI_SLICE) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "SP/SI not supported");
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SPSI);
+    }
+  }
+
+  pSliceHead->uiDisableDeblockingFilterIdc = 0;
+  pSliceHead->iSliceAlphaC0Offset          = 0;
+  pSliceHead->iSliceBetaOffset             = 0;
+  if (pPps->bDeblockingFilterControlPresentFlag) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_deblocking_filter_idc
+    pSliceHead->uiDisableDeblockingFilterIdc = uiCode;
+    //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
+    if (pSliceHead->uiDisableDeblockingFilterIdc > 6) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "disable_deblock_filter_idc (%d) out of range [0, 6]",
+               pSliceHead->uiDisableDeblockingFilterIdc);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
+    }
+    if (pSliceHead->uiDisableDeblockingFilterIdc != 1) {
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_alpha_c0_offset_div2
+      pSliceHead->iSliceAlphaC0Offset = iCode * 2;
+      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceAlphaC0Offset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
+                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                    ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_beta_offset_div2
+      pSliceHead->iSliceBetaOffset = iCode * 2;
+      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceBetaOffset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
+                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_beta_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                    ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
+    }
+  }
+
+  bSgChangeCycleInvolved = (pPps->uiNumSliceGroups > 1 && pPps->uiSliceGroupMapType >= 3
+                            && pPps->uiSliceGroupMapType <= 5);
+  if (kbExtensionFlag && bSgChangeCycleInvolved)
+    bSgChangeCycleInvolved = (bSgChangeCycleInvolved && (uiQualityId == BASE_QUALITY_ID));
+  if (bSgChangeCycleInvolved) {
+    if (pPps->uiSliceGroupChangeRate > 0) {
+      const int32_t kiNumBits = (int32_t)WELS_CEIL (log (static_cast<double> (1 + pPps->uiPicSizeInMapUnits /
+                                pPps->uiSliceGroupChangeRate)));
+      WELS_READ_VERIFY (BsGetBits (pBs, kiNumBits, &uiCode)); //lice_group_change_cycle
+      pSliceHead->iSliceGroupChangeCycle = uiCode;
+    } else
+      pSliceHead->iSliceGroupChangeCycle = 0;
+  }
+
+  if (!kbExtensionFlag) {
+    FillDefaultSliceHeaderExt (pSliceHeadExt, pNalHeaderExt);
+  } else {
+    /* Extra syntax elements newly introduced */
+    pSliceHeadExt->pSubsetSps = pSubsetSps;
+
+    if (!pNalHeaderExt->iNoInterLayerPredFlag && BASE_QUALITY_ID == uiQualityId) {
+      //the following should be deleted for CODE_CLEAN
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //ref_layer_dq_id
+      pSliceHeadExt->uiRefLayerDqId = uiCode;
+      if (pSubsetSps->sSpsSvcExt.bInterLayerDeblockingFilterCtrlPresentFlag) {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_inter_layer_deblocking_filter_idc
+        pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc = uiCode;
+        //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
+        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc > 6) {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "disable_inter_layer_deblock_filter_idc (%d) out of range [0, 6]",
+                   pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc);
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
+        }
+        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc != 1) {
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_alpha_c0_offset_div2
+          pSliceHeadExt->iInterLayerSliceAlphaC0Offset = iCode * 2;
+          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceAlphaC0Offset,
+                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX,
+                                    "inter_layer_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                        ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_beta_offset_div2
+          pSliceHeadExt->iInterLayerSliceBetaOffset = iCode * 2;
+          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceBetaOffset, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN,
+                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX, "inter_layer_slice_beta_offset_div2 * 2",
+                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
+        }
+      }
+
+      pSliceHeadExt->uiRefLayerChromaPhaseXPlus1Flag = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseXPlus1Flag;
+      pSliceHeadExt->uiRefLayerChromaPhaseYPlus1     = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseYPlus1;
+
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constrained_intra_resampling_flag
+      pSliceHeadExt->bConstrainedIntraResamplingFlag = !!uiCode;
+
+      {
+        SPosOffset pos;
+        pos.iLeftOffset   = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iLeftOffset;
+        pos.iTopOffset    = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iTopOffset * (2 - pSps->bFrameMbsOnlyFlag);
+        pos.iRightOffset  = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iRightOffset;
+        pos.iBottomOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iBottomOffset * (2 - pSps->bFrameMbsOnlyFlag);
+        //memcpy(&pSliceHeadExt->sScaledRefLayer, &pos, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
+        pSliceHeadExt->iScaledRefLayerPicWidthInSampleLuma  = (pSliceHead->iMbWidth << 4) -
+            (pos.iLeftOffset + pos.iRightOffset);
+        pSliceHeadExt->iScaledRefLayerPicHeightInSampleLuma = (pSliceHead->iMbHeight << 4) -
+            (pos.iTopOffset + pos.iBottomOffset) / (1 + pSliceHead->bFieldPicFlag);
+      }
+    } else if (uiQualityId > BASE_QUALITY_ID) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "MGS not supported.");
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
+    } else {
+      pSliceHeadExt->uiRefLayerDqId = (uint8_t) - 1;
+    }
+
+    pSliceHeadExt->bSliceSkipFlag            = false;
+    pSliceHeadExt->bAdaptiveBaseModeFlag     = false;
+    pSliceHeadExt->bDefaultBaseModeFlag      = false;
+    pSliceHeadExt->bAdaptiveMotionPredFlag   = false;
+    pSliceHeadExt->bDefaultMotionPredFlag    = false;
+    pSliceHeadExt->bAdaptiveResidualPredFlag = false;
+    pSliceHeadExt->bDefaultResidualPredFlag  = false;
+    if (pNalHeaderExt->iNoInterLayerPredFlag)
+      pSliceHeadExt->bTCoeffLevelPredFlag    = false;
+    else
+      pSliceHeadExt->bTCoeffLevelPredFlag    = pSubsetSps->sSpsSvcExt.bSeqTCoeffLevelPredFlag;
+
+    if (!pNalHeaderExt->iNoInterLayerPredFlag) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //slice_skip_flag
+      pSliceHeadExt->bSliceSkipFlag = !!uiCode;
+      if (pSliceHeadExt->bSliceSkipFlag) {
+        WelsLog (pLogCtx, WELS_LOG_WARNING, "bSliceSkipFlag == 1 not supported.");
+        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SLICESKIP);
+      } else {
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_base_mode_flag
+        pSliceHeadExt->bAdaptiveBaseModeFlag = !!uiCode;
+        if (!pSliceHeadExt->bAdaptiveBaseModeFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_base_mode_flag
+          pSliceHeadExt->bDefaultBaseModeFlag = !!uiCode;
+        }
+        if (!pSliceHeadExt->bDefaultBaseModeFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_motion_prediction_flag
+          pSliceHeadExt->bAdaptiveMotionPredFlag = !!uiCode;
+          if (!pSliceHeadExt->bAdaptiveMotionPredFlag) {
+            WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_motion_prediction_flag
+            pSliceHeadExt->bDefaultMotionPredFlag = !!uiCode;
+          }
+        }
+
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_residual_prediction_flag
+        pSliceHeadExt->bAdaptiveResidualPredFlag = !!uiCode;
+        if (!pSliceHeadExt->bAdaptiveResidualPredFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_residual_prediction_flag
+          pSliceHeadExt->bDefaultResidualPredFlag = !!uiCode;
+        }
+      }
+      if (pSubsetSps->sSpsSvcExt.bAdaptiveTCoeffLevelPredFlag) {
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //tcoeff_level_prediction_flag
+        pSliceHeadExt->bTCoeffLevelPredFlag = !!uiCode;
+      }
+    }
+
+    if (!pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
+      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_start
+      pSliceHeadExt->uiScanIdxStart = uiCode;
+      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_end
+      pSliceHeadExt->uiScanIdxEnd = uiCode;
+      if (pSliceHeadExt->uiScanIdxStart != 0 || pSliceHeadExt->uiScanIdxEnd != 15) {
+        WelsLog (pLogCtx, WELS_LOG_WARNING, "uiScanIdxStart (%d) != 0 and uiScanIdxEnd (%d) !=15 not supported here",
+                 pSliceHeadExt->uiScanIdxStart, pSliceHeadExt->uiScanIdxEnd);
+        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
+      }
+    } else {
+      pSliceHeadExt->uiScanIdxStart = 0;
+      pSliceHeadExt->uiScanIdxEnd   = 15;
+    }
+  }
+
+  return ERR_NONE;
+}
+
+/*
+ *  Copy relative syntax elements of NALUnitHeaderExt, sRefPicBaseMarking and bStoreRefBasePicFlag in prefix nal unit.
+ *  pSrc:   mark as decoded prefix NAL
+ *  ppDst:  succeeded VCL NAL based AVC (I/P Slice)
+ */
+bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst, PNalUnit const kpSrc) {
+  PNalUnitHeaderExt pNalHdrExtD = NULL, pNalHdrExtS = NULL;
+  PSliceHeaderExt pShExtD = NULL;
+  PPrefixNalUnit pPrefixS = NULL;
+  PSps pSps = NULL;
+  int32_t iIdx = 0;
+
+  if (kppDst == NULL || kpSrc == NULL)
+    return false;
+
+  pNalHdrExtD   = &kppDst->sNalHeaderExt;
+  pNalHdrExtS   = &kpSrc->sNalHeaderExt;
+  pShExtD       = &kppDst->sNalData.sVclNal.sSliceHeaderExt;
+  pPrefixS      = &kpSrc->sNalData.sPrefixNal;
+  pSps          = &pCtx->sSpsBuffer[pCtx->sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
+
+  pNalHdrExtD->uiDependencyId           = pNalHdrExtS->uiDependencyId;
+  pNalHdrExtD->uiQualityId              = pNalHdrExtS->uiQualityId;
+  pNalHdrExtD->uiTemporalId             = pNalHdrExtS->uiTemporalId;
+  pNalHdrExtD->uiPriorityId             = pNalHdrExtS->uiPriorityId;
+  pNalHdrExtD->bIdrFlag                 = pNalHdrExtS->bIdrFlag;
+  pNalHdrExtD->iNoInterLayerPredFlag    = pNalHdrExtS->iNoInterLayerPredFlag;
+  pNalHdrExtD->bDiscardableFlag         = pNalHdrExtS->bDiscardableFlag;
+  pNalHdrExtD->bOutputFlag              = pNalHdrExtS->bOutputFlag;
+  pNalHdrExtD->bUseRefBasePicFlag       = pNalHdrExtS->bUseRefBasePicFlag;
+  pNalHdrExtD->uiLayerDqId              = pNalHdrExtS->uiLayerDqId;
+
+  pShExtD->bStoreRefBasePicFlag         = pPrefixS->bStoreRefBasePicFlag;
+  memcpy (&pShExtD->sRefBasePicMarking, &pPrefixS->sRefPicBaseMarking,
+          sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
+  if (pShExtD->sRefBasePicMarking.bAdaptiveRefBasePicMarkingModeFlag) {
+    PRefBasePicMarking pRefBasePicMarking = &pShExtD->sRefBasePicMarking;
+    iIdx = 0;
+    do {
+      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_END)
+        break;
+      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_SHORT2UNUSED)
+        pRefBasePicMarking->mmco_base[iIdx].iShortFrameNum = (pShExtD->sSliceHeader.iFrameNum -
+            pRefBasePicMarking->mmco_base[iIdx].uiDiffOfPicNums) & ((1 << pSps->uiLog2MaxFrameNum) - 1);
+      ++ iIdx;
+    } while (iIdx < MAX_MMCO_COUNT);
+  }
+
+  return true;
+}
+
+
+
+int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu   = pCtx->pAccessUnitList;
+  int32_t iIdx         = pCurAu->uiEndPos;
+
+  // Conversed iterator
+  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iIdx]->sNalHeaderExt.uiLayerDqId;
+  pCurAu->uiActualUnitsNum  = iIdx + 1;
+  pCurAu->bCompletedAuFlag = true;
+
+  // Added for mosaic avoidance, 11/19/2009
+#ifdef LONG_TERM_REF
+  if (pCtx->bParamSetsLostFlag || pCtx->bNewSeqBegin)
+#else
+  if (pCtx->bReferenceLostAtT0Flag || pCtx->bNewSeqBegin)
+#endif
+  {
+    uint32_t uiActualIdx = 0;
+    while (uiActualIdx < pCurAu->uiActualUnitsNum) {
+      PNalUnit nal = pCurAu->pNalUnitsList[uiActualIdx];
+
+      if (nal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR || nal->sNalHeaderExt.bIdrFlag) {
+        break;
+      }
+      ++ uiActualIdx;
+    }
+    if (uiActualIdx ==
+        pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009
+
+      pCtx->sDecoderStatistics.uiIDRLostNum++;
+      if (!pCtx->bParamSetsLostFlag)
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                 "UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU.");
+      pCtx->iErrorCode |= dsRefLost;
+      if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+#ifdef LONG_TERM_REF
+        pCtx->iErrorCode |= dsNoParamSets;
+        return dsNoParamSets;
+#else
+        pCtx->iErrorCode |= dsRefLost;
+        return ERR_INFO_REFERENCE_PIC_LOST;
+#endif
+      }
+    }
+  }
+
+  return ERR_NONE;
+}
+
+int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) {
+  int32_t i = 0;
+
+  WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0))
+  pCtx->sMb.iMbWidth  = (kiMaxWidth + 15) >> 4;
+  pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4;
+
+  if (pCtx->bInitialDqLayersMem && kiMaxWidth <= pCtx->iPicWidthReq
+      && kiMaxHeight <= pCtx->iPicHeightReq) // have same dimension memory, skipped
+    return ERR_NONE;
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  UninitialDqLayersContext (pCtx);
+
+  do {
+    PDqLayer pDq = (PDqLayer)pMa->WelsMallocz (sizeof (SDqLayer), "PDqLayer");
+
+    if (pDq == NULL)
+      return ERR_INFO_OUT_OF_MEMORY;
+
+    pCtx->pDqLayersList[i] = pDq; //to keep consistence with in UninitialDqLayersContext()
+    memset (pDq, 0, sizeof (SDqLayer));
+
+    pCtx->sMb.pMbType[i] = (uint32_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t),
+                           "pCtx->sMb.pMbType[]");
+    pCtx->sMb.pMv[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                 int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]");
+    pCtx->sMb.pMv[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                 int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]");
+
+    pCtx->sMb.pRefIndex[i][LIST_0] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                     pCtx->sMb.iMbHeight *
+                                     sizeof (
+                                       int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
+    pCtx->sMb.pRefIndex[i][LIST_1] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                     pCtx->sMb.iMbHeight *
+                                     sizeof (
+                                       int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
+    pCtx->sMb.pDirect[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+                           sizeof (
+                             int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pDirect[]");
+    pCtx->sMb.pLumaQp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                           "pCtx->sMb.pLumaQp[]");
+    pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+        sizeof (
+          bool),
+        "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
+    pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+                                         "pCtx->sMb.pTransformSize8x8Flag[]");
+    pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                               int8_t) * 2,
+                             "pCtx->sMb.pChromaQp[]");
+    pCtx->sMb.pMvd[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                  int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
+    pCtx->sMb.pMvd[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                  int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
+    pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
+                          "pCtx->sMb.pCbfDc[]");
+    pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+                        "pCtx->sMb.pNzc[]");
+    pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+                          "pCtx->sMb.pNzcRs[]");
+    pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                 pCtx->sMb.iMbHeight *
+                                 sizeof (int16_t) * MB_COEFF_LIST_SIZE, "pCtx->sMb.pScaledTCoeff[]");
+    pCtx->sMb.pIntraPredMode[i] = (int8_t (*)[8])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                    int8_t) * 8,
+                                  "pCtx->sMb.pIntraPredMode[]");
+    pCtx->sMb.pIntra4x4FinalMode[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                      pCtx->sMb.iMbHeight *
+                                      sizeof (int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pIntra4x4FinalMode[]");
+    pCtx->sMb.pIntraNxNAvailFlag[i] = (uint8_t (*))pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                        int8_t),
+                                      "pCtx->sMb.pIntraNxNAvailFlag");
+    pCtx->sMb.pChromaPredMode[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                                   "pCtx->sMb.pChromaPredMode[]");
+    pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                        "pCtx->sMb.pCbp[]");
+    pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+                              sizeof (
+                                uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]");
+    pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t),
+                             "pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010
+    pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                                     "pCtx->sMb.pResidualPredFlag[]");
+    pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+        int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
+
+    pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+        bool),
+                                           "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
+    pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+                                       "pCtx->pMbRefConcealedFlag[]");
+
+    // check memory block valid due above allocated..
+    WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY,
+                           ((NULL == pCtx->sMb.pMbType[i]) ||
+                            (NULL == pCtx->sMb.pMv[i][LIST_0]) ||
+                            (NULL == pCtx->sMb.pMv[i][LIST_1]) ||
+                            (NULL == pCtx->sMb.pRefIndex[i][LIST_0]) ||
+                            (NULL == pCtx->sMb.pRefIndex[i][LIST_1]) ||
+                            (NULL == pCtx->sMb.pDirect[i]) ||
+                            (NULL == pCtx->sMb.pLumaQp[i]) ||
+                            (NULL == pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) ||
+                            (NULL == pCtx->sMb.pTransformSize8x8Flag[i]) ||
+                            (NULL == pCtx->sMb.pChromaQp[i]) ||
+                            (NULL == pCtx->sMb.pMvd[i][LIST_0]) ||
+                            (NULL == pCtx->sMb.pMvd[i][LIST_1]) ||
+                            (NULL == pCtx->sMb.pCbfDc[i]) ||
+                            (NULL == pCtx->sMb.pNzc[i]) ||
+                            (NULL == pCtx->sMb.pNzcRs[i]) ||
+                            (NULL == pCtx->sMb.pScaledTCoeff[i]) ||
+                            (NULL == pCtx->sMb.pIntraPredMode[i]) ||
+                            (NULL == pCtx->sMb.pIntra4x4FinalMode[i]) ||
+                            (NULL == pCtx->sMb.pIntraNxNAvailFlag[i]) ||
+                            (NULL == pCtx->sMb.pChromaPredMode[i]) ||
+                            (NULL == pCtx->sMb.pCbp[i]) ||
+                            (NULL == pCtx->sMb.pSubMbType[i]) ||
+                            (NULL == pCtx->sMb.pSliceIdc[i]) ||
+                            (NULL == pCtx->sMb.pResidualPredFlag[i]) ||
+                            (NULL == pCtx->sMb.pInterPredictionDoneFlag[i]) ||
+                            (NULL == pCtx->sMb.pMbRefConcealedFlag[i]) ||
+                            (NULL == pCtx->sMb.pMbCorrectlyDecodedFlag[i])
+                           )
+                          )
+
+    memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
+
+    ++ i;
+  } while (i < LAYER_NUM_EXCHANGEABLE);
+
+  pCtx->bInitialDqLayersMem     = true;
+  pCtx->iPicWidthReq            = kiMaxWidth;
+  pCtx->iPicHeightReq           = kiMaxHeight;
+
+  return ERR_NONE;
+}
+
+void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
+  int32_t i = 0;
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  do {
+    PDqLayer pDq = pCtx->pDqLayersList[i];
+    if (pDq == NULL) {
+      ++ i;
+      continue;
+    }
+
+    if (pCtx->sMb.pMbType[i]) {
+      pMa->WelsFree (pCtx->sMb.pMbType[i], "pCtx->sMb.pMbType[]");
+
+      pCtx->sMb.pMbType[i] = NULL;
+    }
+
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (pCtx->sMb.pMv[i][listIdx]) {
+        pMa->WelsFree (pCtx->sMb.pMv[i][listIdx], "pCtx->sMb.pMv[][]");
+        pCtx->sMb.pMv[i][listIdx] = NULL;
+      }
+
+      if (pCtx->sMb.pRefIndex[i][listIdx]) {
+        pMa->WelsFree (pCtx->sMb.pRefIndex[i][listIdx], "pCtx->sMb.pRefIndex[][]");
+        pCtx->sMb.pRefIndex[i][listIdx] = NULL;
+      }
+
+      if (pCtx->sMb.pDirect[i]) {
+        pMa->WelsFree (pCtx->sMb.pDirect[i], "pCtx->sMb.pDirect[]");
+        pCtx->sMb.pDirect[i] = NULL;
+      }
+
+      if (pCtx->sMb.pMvd[i][listIdx]) {
+        pMa->WelsFree (pCtx->sMb.pMvd[i][listIdx], "pCtx->sMb.pMvd[][]");
+        pCtx->sMb.pMvd[i][listIdx] = NULL;
+      }
+    }
+
+    if (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) {
+      pMa->WelsFree (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i], "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
+
+      pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pTransformSize8x8Flag[i]) {
+      pMa->WelsFree (pCtx->sMb.pTransformSize8x8Flag[i], "pCtx->sMb.pTransformSize8x8Flag[]");
+
+      pCtx->sMb.pTransformSize8x8Flag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pLumaQp[i]) {
+      pMa->WelsFree (pCtx->sMb.pLumaQp[i], "pCtx->sMb.pLumaQp[]");
+
+      pCtx->sMb.pLumaQp[i] = NULL;
+    }
+
+    if (pCtx->sMb.pChromaQp[i]) {
+      pMa->WelsFree (pCtx->sMb.pChromaQp[i], "pCtx->sMb.pChromaQp[]");
+
+      pCtx->sMb.pChromaQp[i] = NULL;
+    }
+
+    if (pCtx->sMb.pCbfDc[i]) {
+      pMa->WelsFree (pCtx->sMb.pCbfDc[i], "pCtx->sMb.pCbfDc[]");
+      pCtx->sMb.pCbfDc[i] = NULL;
+    }
+
+    if (pCtx->sMb.pNzc[i]) {
+      pMa->WelsFree (pCtx->sMb.pNzc[i], "pCtx->sMb.pNzc[]");
+
+      pCtx->sMb.pNzc[i] = NULL;
+    }
+
+    if (pCtx->sMb.pNzcRs[i]) {
+      pMa->WelsFree (pCtx->sMb.pNzcRs[i], "pCtx->sMb.pNzcRs[]");
+
+      pCtx->sMb.pNzcRs[i] = NULL;
+    }
+
+    if (pCtx->sMb.pScaledTCoeff[i]) {
+      pMa->WelsFree (pCtx->sMb.pScaledTCoeff[i], "pCtx->sMb.pScaledTCoeff[]");
+
+      pCtx->sMb.pScaledTCoeff[i] = NULL;
+    }
+
+    if (pCtx->sMb.pIntraPredMode[i]) {
+      pMa->WelsFree (pCtx->sMb.pIntraPredMode[i], "pCtx->sMb.pIntraPredMode[]");
+
+      pCtx->sMb.pIntraPredMode[i] = NULL;
+    }
+
+    if (pCtx->sMb.pIntra4x4FinalMode[i]) {
+      pMa->WelsFree (pCtx->sMb.pIntra4x4FinalMode[i], "pCtx->sMb.pIntra4x4FinalMode[]");
+
+      pCtx->sMb.pIntra4x4FinalMode[i] = NULL;
+    }
+
+    if (pCtx->sMb.pIntraNxNAvailFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pIntraNxNAvailFlag[i], "pCtx->sMb.pIntraNxNAvailFlag");
+
+      pCtx->sMb.pIntraNxNAvailFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pChromaPredMode[i]) {
+      pMa->WelsFree (pCtx->sMb.pChromaPredMode[i], "pCtx->sMb.pChromaPredMode[]");
+
+      pCtx->sMb.pChromaPredMode[i] = NULL;
+    }
+
+    if (pCtx->sMb.pCbp[i]) {
+      pMa->WelsFree (pCtx->sMb.pCbp[i], "pCtx->sMb.pCbp[]");
+
+      pCtx->sMb.pCbp[i] = NULL;
+    }
+
+    //      if (pCtx->sMb.pMotionPredFlag[i])
+    //{
+    //  pMa->WelsFree( pCtx->sMb.pMotionPredFlag[i], "pCtx->sMb.pMotionPredFlag[]" );
+
+    //  pCtx->sMb.pMotionPredFlag[i] = NULL;
+    //}
+
+    if (pCtx->sMb.pSubMbType[i]) {
+      pMa->WelsFree (pCtx->sMb.pSubMbType[i], "pCtx->sMb.pSubMbType[]");
+
+      pCtx->sMb.pSubMbType[i] = NULL;
+    }
+
+    if (pCtx->sMb.pSliceIdc[i]) {
+      pMa->WelsFree (pCtx->sMb.pSliceIdc[i], "pCtx->sMb.pSliceIdc[]");
+
+      pCtx->sMb.pSliceIdc[i] = NULL;
+    }
+
+    if (pCtx->sMb.pResidualPredFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pResidualPredFlag[i], "pCtx->sMb.pResidualPredFlag[]");
+
+      pCtx->sMb.pResidualPredFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pInterPredictionDoneFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pInterPredictionDoneFlag[i], "pCtx->sMb.pInterPredictionDoneFlag[]");
+
+      pCtx->sMb.pInterPredictionDoneFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pMbCorrectlyDecodedFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pMbCorrectlyDecodedFlag[i], "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
+      pCtx->sMb.pMbCorrectlyDecodedFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pMbRefConcealedFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pMbRefConcealedFlag[i], "pCtx->sMb.pMbRefConcealedFlag[]");
+      pCtx->sMb.pMbRefConcealedFlag[i] = NULL;
+    }
+    pMa->WelsFree (pDq, "pDq");
+
+    pDq = NULL;
+    pCtx->pDqLayersList[i] = NULL;
+
+    ++ i;
+  } while (i < LAYER_NUM_EXCHANGEABLE);
+
+  pCtx->iPicWidthReq            = 0;
+  pCtx->iPicHeightReq           = 0;
+  pCtx->bInitialDqLayersMem     = false;
+}
+
+void ResetCurrentAccessUnit (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  pCurAu->uiStartPos            = 0;
+  pCurAu->uiEndPos              = 0;
+  pCurAu->bCompletedAuFlag      = false;
+  if (pCurAu->uiActualUnitsNum > 0) {
+    uint32_t iIdx = 0;
+    const uint32_t kuiActualNum = pCurAu->uiActualUnitsNum;
+    // a more simpler method to do nal units list management prefered here
+    const uint32_t kuiAvailNum  = pCurAu->uiAvailUnitsNum;
+    const uint32_t kuiLeftNum   = kuiAvailNum - kuiActualNum;
+
+    // Swapping active nal unit nodes of succeeding AU with leading of list
+    while (iIdx < kuiLeftNum) {
+      PNalUnit t = pCurAu->pNalUnitsList[kuiActualNum + iIdx];
+      pCurAu->pNalUnitsList[kuiActualNum + iIdx] = pCurAu->pNalUnitsList[iIdx];
+      pCurAu->pNalUnitsList[iIdx] = t;
+      ++ iIdx;
+    }
+    pCurAu->uiActualUnitsNum = pCurAu->uiAvailUnitsNum = kuiLeftNum;
+  }
+}
+
+/*!
+ * \brief   Force reset current Acess Unit Nal list in case error parsing/decoding in current AU
+ * \author
+ * \history 11/16/2009
+ */
+void ForceResetCurrentAccessUnit (PAccessUnit pAu) {
+  uint32_t uiSucAuIdx = pAu->uiEndPos + 1;
+  uint32_t uiCurAuIdx = 0;
+
+  // swap the succeeding AU's nal units to the front
+  while (uiSucAuIdx < pAu->uiAvailUnitsNum) {
+    PNalUnit t = pAu->pNalUnitsList[uiSucAuIdx];
+    pAu->pNalUnitsList[uiSucAuIdx] = pAu->pNalUnitsList[uiCurAuIdx];
+    pAu->pNalUnitsList[uiCurAuIdx] = t;
+    ++ uiSucAuIdx;
+    ++ uiCurAuIdx;
+  }
+
+  // Update avail/actual units num accordingly for next AU parsing
+  if (pAu->uiAvailUnitsNum > pAu->uiEndPos)
+    pAu->uiAvailUnitsNum -= (pAu->uiEndPos + 1);
+  else
+    pAu->uiAvailUnitsNum = 0;
+  pAu->uiActualUnitsNum = 0;
+  pAu->uiStartPos       = 0;
+  pAu->uiEndPos         = 0;
+  pAu->bCompletedAuFlag = false;
+}
+
+//clear current corrupted NAL from pNalUnitsList
+void ForceClearCurrentNal (PAccessUnit pAu) {
+  if (pAu->uiAvailUnitsNum > 0)
+    -- pAu->uiAvailUnitsNum;
+}
+
+void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) {
+  pCtx->bSpsExistAheadFlag = false;
+  pCtx->bSubspsExistAheadFlag = false;
+  pCtx->bPpsExistAheadFlag = false;
+
+  // Force clear the AU list
+  pCtx->pAccessUnitList->uiAvailUnitsNum        = 0;
+  pCtx->pAccessUnitList->uiActualUnitsNum       = 0;
+  pCtx->pAccessUnitList->uiStartPos             = 0;
+  pCtx->pAccessUnitList->uiEndPos               = 0;
+  pCtx->pAccessUnitList->bCompletedAuFlag       = false;
+}
+
+void CheckAvailNalUnitsListContinuity (PWelsDecoderContext pCtx, int32_t iStartIdx, int32_t iEndIdx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+
+  uint8_t uiLastNuDependencyId, uiLastNuLayerDqId;
+  uint8_t uiCurNuDependencyId, uiCurNuQualityId, uiCurNuLayerDqId, uiCurNuRefLayerDqId;
+
+  int32_t iCurNalUnitIdx = 0;
+
+  //check the continuity of pNalUnitsList forwards (from pIdxNoInterLayerPred to end_postion)
+  uiLastNuDependencyId = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiDependencyId;//starting nal unit
+  uiLastNuLayerDqId   = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiLayerDqId;//starting nal unit
+  iCurNalUnitIdx = iStartIdx + 1;//current nal unit
+  while (iCurNalUnitIdx <= iEndIdx) {
+    uiCurNuDependencyId   = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiDependencyId;
+    uiCurNuQualityId      = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiQualityId;
+    uiCurNuLayerDqId     = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
+    uiCurNuRefLayerDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalData.sVclNal.sSliceHeaderExt.uiRefLayerDqId;
+
+    if (uiCurNuDependencyId == uiLastNuDependencyId) {
+      uiLastNuLayerDqId = uiCurNuLayerDqId;
+      ++ iCurNalUnitIdx;
+    } else { //uiCurNuDependencyId != uiLastNuDependencyId, new dependency arrive
+      if (uiCurNuQualityId == 0) {
+        uiLastNuDependencyId = uiCurNuDependencyId;
+        if (uiCurNuRefLayerDqId == uiLastNuLayerDqId) {
+          uiLastNuLayerDqId = uiCurNuLayerDqId;
+          ++ iCurNalUnitIdx;
+        } else { //cur_nu_layer_id != next_nu_ref_layer_dq_id, the chain is broken at this point
+          break;
+        }
+      } else { //new dependency arrive, but no base quality layer, so we must stop in this point
+        break;
+      }
+    }
+  }
+
+  -- iCurNalUnitIdx;
+  pCurAu->uiEndPos = iCurNalUnitIdx;
+  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
+}
+
+//main purpose: to support multi-slice and to include all slice which have the same uiDependencyId, uiQualityId and frame_num
+//for single slice, pIdxNoInterLayerPred SHOULD NOT be modified
+void RefineIdxNoInterLayerPred (PAccessUnit pCurAu, int32_t* pIdxNoInterLayerPred) {
+  int32_t iLastNalDependId  = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiDependencyId;
+  int32_t iLastNalQualityId = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiQualityId;
+  uint8_t uiLastNalTId       = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiTemporalId;
+  int32_t iLastNalFrameNum  =
+    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
+  int32_t iLastNalPoc        =
+    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+  int32_t iLastNalFirstMb   =
+    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
+  int32_t iCurNalDependId, iCurNalQualityId, iCurNalTId, iCurNalFrameNum, iCurNalPoc, iCurNalFirstMb, iCurIdx,
+          iFinalIdxNoInterLayerPred;
+
+  bool  bMultiSliceFind = false;
+
+  iFinalIdxNoInterLayerPred = 0;
+  iCurIdx = *pIdxNoInterLayerPred - 1;
+  while (iCurIdx >= 0) {
+    if (pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.iNoInterLayerPredFlag) {
+      iCurNalDependId  = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
+      iCurNalQualityId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
+      iCurNalTId       = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
+      iCurNalFrameNum  = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
+      iCurNalPoc        = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+      iCurNalFirstMb   = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
+
+      if (iCurNalDependId == iLastNalDependId  &&
+          iCurNalQualityId == iLastNalQualityId &&
+          iCurNalTId       == uiLastNalTId       &&
+          iCurNalFrameNum  == iLastNalFrameNum  &&
+          iCurNalPoc        == iLastNalPoc        &&
+          iCurNalFirstMb   != iLastNalFirstMb) {
+        bMultiSliceFind = true;
+        iFinalIdxNoInterLayerPred = iCurIdx;
+        --iCurIdx;
+        continue;
+      } else {
+        break;
+      }
+    }
+    --iCurIdx;
+  }
+
+  if (bMultiSliceFind && *pIdxNoInterLayerPred != iFinalIdxNoInterLayerPred) {
+    *pIdxNoInterLayerPred = iFinalIdxNoInterLayerPred;
+  }
+}
+
+bool CheckPocOfCurValidNalUnits (PAccessUnit pCurAu, int32_t pIdxNoInterLayerPred) {
+  int32_t iEndIdx    = pCurAu->uiEndPos;
+  int32_t iCurAuPoc =
+    pCurAu->pNalUnitsList[pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+  int32_t iTmpPoc, i;
+  for (i = pIdxNoInterLayerPred + 1; i < iEndIdx; i++) {
+    iTmpPoc = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+    if (iTmpPoc != iCurAuPoc) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CheckIntegrityNalUnitsList (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  const int32_t kiEndPos = pCurAu->uiEndPos;
+  int32_t iIdxNoInterLayerPred = 0;
+
+  if (!pCurAu->bCompletedAuFlag)
+    return false;
+
+  if (pCtx->bNewSeqBegin) {
+    pCurAu->uiStartPos = 0;
+    //step1: search the pNalUnit whose iNoInterLayerPredFlag equal to 1 backwards (from uiEndPos to 0)
+    iIdxNoInterLayerPred = kiEndPos;
+    while (iIdxNoInterLayerPred >= 0) {
+      if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+        break;
+      }
+      --iIdxNoInterLayerPred;
+    }
+    if (iIdxNoInterLayerPred < 0) {
+      //can not find the Nal Unit whose no_inter_pred_falg equal to 1, MUST STOP decode
+      return false;
+    }
+
+    //step2: support multi-slice, to include all base layer slice
+    RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+    pCurAu->uiStartPos = iIdxNoInterLayerPred;
+    CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
+
+    if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+      return false;
+    }
+
+    pCtx->iCurSeqIntervalTargetDependId = pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalHeaderExt.uiDependencyId;
+    pCtx->iCurSeqIntervalMaxPicWidth  =
+      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbWidth << 4;
+    pCtx->iCurSeqIntervalMaxPicHeight =
+      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbHeight << 4;
+  } else { //P_SLICE
+    //step 1: search uiDependencyId equal to pCtx->cur_seq_interval_target_dependency_id
+    bool bGetDependId = false;
+    int32_t iIdxDependId = 0;
+
+    iIdxDependId = kiEndPos;
+    while (iIdxDependId >= 0) {
+      if (pCtx->iCurSeqIntervalTargetDependId == pCurAu->pNalUnitsList[iIdxDependId]->sNalHeaderExt.uiDependencyId) {
+        bGetDependId = true;
+        break;
+      } else {
+        --iIdxDependId;
+      }
+    }
+
+    //step 2: switch according to whether or not find the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
+    if (bGetDependId) { //get the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
+      bool bGetNoInterPredFront = false;
+      //step 2a: search iNoInterLayerPredFlag [0....iIdxDependId]
+      iIdxNoInterLayerPred = iIdxDependId;
+      while (iIdxNoInterLayerPred >= 0) {
+        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+          bGetNoInterPredFront = true;
+          break;
+        }
+        --iIdxNoInterLayerPred;
+      }
+      //step 2b: switch, whether or not find the NAL unit whose no_inter_pred_flag equal to 1 among [0....iIdxDependId]
+      if (bGetNoInterPredFront) { //YES
+        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+        pCurAu->uiStartPos = iIdxNoInterLayerPred;
+        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, iIdxDependId);
+
+        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+          return false;
+        }
+      } else { //NO, should find the NAL unit whose no_inter_pred_flag equal to 1 among [iIdxDependId....uiEndPos]
+        iIdxNoInterLayerPred = iIdxDependId;
+        while (iIdxNoInterLayerPred <= kiEndPos) {
+          if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+            break;
+          }
+          ++iIdxNoInterLayerPred;
+        }
+
+        if (iIdxNoInterLayerPred > kiEndPos) {
+          return false; //cann't find the index of pNalUnit whose no_inter_pred_flag = 1
+        }
+
+        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+        pCurAu->uiStartPos = iIdxNoInterLayerPred;
+        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
+
+        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+          return false;
+        }
+      }
+    } else { //without the index of pNalUnit, should process this AU as common case
+      iIdxNoInterLayerPred = kiEndPos;
+      while (iIdxNoInterLayerPred >= 0) {
+        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+          break;
+        }
+        --iIdxNoInterLayerPred;
+      }
+      if (iIdxNoInterLayerPred < 0) {
+        return false; //cann't find the index of pNalUnit whose iNoInterLayerPredFlag = 1
+      }
+
+      RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+      pCurAu->uiStartPos = iIdxNoInterLayerPred;
+      CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
+
+      if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void CheckOnlyOneLayerInAu (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+
+  int32_t iEndIdx = pCurAu->uiEndPos;
+  int32_t iCurIdx = pCurAu->uiStartPos;
+  uint8_t uiDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
+  uint8_t uiQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
+  uint8_t uiTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
+
+  uint8_t uiCurDId, uiCurQId, uiCurTId;
+
+  pCtx->bOnlyOneLayerInCurAuFlag = true;
+
+  if (iEndIdx == iCurIdx) { //only one NAL in pNalUnitsList
+    return;
+  }
+
+  ++iCurIdx;
+  while (iCurIdx <= iEndIdx) {
+    uiCurDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
+    uiCurQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
+    uiCurTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
+
+    if (uiDId != uiCurDId || uiQId != uiCurQId || uiTId != uiCurTId) {
+      pCtx->bOnlyOneLayerInCurAuFlag = false;
+      return;
+    }
+
+    ++iCurIdx;
+  }
+}
+
+int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) {
+  // Roll back NAL units not being belong to current access unit list for proceeded access unit
+  int32_t iRet = UpdateAccessUnit (pCtx);
+  if (iRet != ERR_NONE)
+    return iRet;
+
+  pCtx->pAccessUnitList->uiStartPos = 0;
+  if (!pCtx->bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
+    pCtx->iErrorCode |= dsBitstreamError;
+    return dsBitstreamError;
+  }
+
+  //check current AU has only one layer or not
+  //If YES, can use deblocking based on AVC
+  if (!pCtx->bAvcBasedFlag) {
+    CheckOnlyOneLayerInAu (pCtx);
+  }
+
+  return ERR_NONE;
+}
+
+void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) {
+  //save previous header info
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos];
+  memcpy (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
+  memcpy (&pCtx->sLastSliceHeader,
+          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader));
+  // uninitialize context of current access unit and rbsp buffer clean
+  ResetCurrentAccessUnit (pCtx);
+}
+
+/* CheckNewSeqBeginAndUpdateActiveLayerSps
+ * return:
+ * true - the AU to be construct is the start of new sequence; false - not
+ */
+static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
+  bool bNewSeq = false;
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  PSps pTmpLayerSps[MAX_LAYER_NUM];
+  for (int i = 0; i < MAX_LAYER_NUM; i++) {
+    pTmpLayerSps[i] = NULL;
+  }
+  // track the layer sps for the current au
+  for (unsigned int i = pCurAu->uiStartPos; i <= pCurAu->uiEndPos; i++) {
+    uint32_t uiDid = pCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId;
+    pTmpLayerSps[uiDid] = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
+    if ((pCurAu->pNalUnitsList[i]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR)
+        || (pCurAu->pNalUnitsList[i]->sNalHeaderExt.bIdrFlag))
+      bNewSeq = true;
+  }
+  int iMaxActiveLayer = 0, iMaxCurrentLayer = 0;
+  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
+    if (pCtx->pActiveLayerSps[i] != NULL) {
+      iMaxActiveLayer = i;
+      break;
+    }
+  }
+  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
+    if (pTmpLayerSps[i] != NULL) {
+      iMaxCurrentLayer = i;
+      break;
+    }
+  }
+  if ((iMaxCurrentLayer != iMaxActiveLayer)
+      || (pTmpLayerSps[iMaxCurrentLayer]  != pCtx->pActiveLayerSps[iMaxActiveLayer])) {
+    bNewSeq = true;
+  }
+  // fill active sps if the current sps is not null while active layer is null
+  if (!bNewSeq) {
+    for (int i = 0; i < MAX_LAYER_NUM; i++) {
+      if (pCtx->pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
+        pCtx->pActiveLayerSps[i] = pTmpLayerSps[i];
+      }
+    }
+  } else {
+    // UpdateActiveLayerSps if new sequence start
+    memcpy (&pCtx->pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
+  }
+  return bNewSeq;
+}
+
+static void WriteBackActiveParameters (PWelsDecoderContext pCtx) {
+  if (pCtx->iOverwriteFlags & OVERWRITE_PPS) {
+    memcpy (&pCtx->sPpsBuffer[pCtx->sPpsBuffer[MAX_PPS_COUNT].iPpsId], &pCtx->sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
+  }
+  if (pCtx->iOverwriteFlags & OVERWRITE_SPS) {
+    memcpy (&pCtx->sSpsBuffer[pCtx->sSpsBuffer[MAX_SPS_COUNT].iSpsId], &pCtx->sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
+    pCtx->bNewSeqBegin = true;
+  }
+  if (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS) {
+    memcpy (&pCtx->sSubsetSpsBuffer[pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
+            &pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
+    pCtx->bNewSeqBegin = true;
+  }
+  pCtx->iOverwriteFlags = OVERWRITE_NONE;
+}
+
+/*
+ * DecodeFinishUpdate
+ * decoder finish decoding, update active parameter sets and new seq status
+ *
+ */
+
+void DecodeFinishUpdate (PWelsDecoderContext pCtx) {
+  pCtx->bNewSeqBegin = false;
+  WriteBackActiveParameters (pCtx);
+  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
+  pCtx->bNextNewSeqBegin = false; // reset it
+  if (pCtx->bNewSeqBegin)
+    ResetActiveSPSForEachLayer (pCtx);
+}
+
+/*
+ * ConstructAccessUnit
+ * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
+ * joint a collective access unit.
+ * parameter\
+ *  buf:        bitstream data buffer
+ *  bit_len:    size in bit length of data
+ *  buf_len:    size in byte length of data
+ *  coded_au:   mark an Access Unit decoding finished
+ * return:
+ *  0 - success; otherwise returned error_no defined in error_no.h
+ */
+int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  int32_t iErr;
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  pCtx->bAuReadyFlag = false;
+  pCtx->bLastHasMmco5 = false;
+  bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx);
+  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin;
+  iErr = WelsDecodeAccessUnitStart (pCtx);
+  GetVclNalTemporalId (pCtx);
+
+  if (ERR_NONE != iErr) {
+    ForceResetCurrentAccessUnit (pCtx->pAccessUnitList);
+    if (!pCtx->pParam->bParseOnly)
+      pDstInfo->iBufferStatus = 0;
+    pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
+    pCtx->bNextNewSeqBegin = false; // reset it
+    if (pCtx->bNewSeqBegin)
+      ResetActiveSPSForEachLayer (pCtx);
+    return iErr;
+  }
+
+  pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
+  pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps;
+
+  //try to allocate or relocate DPB memory only when new sequence is coming.
+  if (pCtx->bNewSeqBegin) {
+    WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
+    iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
+
+    if (ERR_NONE != iErr) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed,  the error is %d", iErr);
+      return iErr;
+    }
+  }
+
+  iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo);
+
+  WelsDecodeAccessUnitEnd (pCtx);
+
+  if (ERR_NONE != iErr) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "returned error from decoding:[0x%x]", iErr);
+    return iErr;
+  }
+
+  return ERR_NONE;
+}
+
+static inline void InitDqLayerInfo (PDqLayer pDqLayer, PLayerInfo pLayerInfo, PNalUnit pNalUnit, PPicture pPicDec) {
+  PNalUnitHeaderExt pNalHdrExt    = &pNalUnit->sNalHeaderExt;
+  PSliceHeaderExt pShExt          = &pNalUnit->sNalData.sVclNal.sSliceHeaderExt;
+  PSliceHeader pSh                = &pShExt->sSliceHeader;
+  const uint8_t kuiQualityId      = pNalHdrExt->uiQualityId;
+
+  memcpy (&pDqLayer->sLayerInfo, pLayerInfo, sizeof (SLayerInfo)); //confirmed_safe_unsafe_usage
+
+  pDqLayer->pDec        = pPicDec;
+  pDqLayer->iMbWidth    = pSh->iMbWidth;        // MB width of this picture
+  pDqLayer->iMbHeight   = pSh->iMbHeight;// MB height of this picture
+
+  pDqLayer->iSliceIdcBackup = (pSh->iFirstMbInSlice << 7) | (pNalHdrExt->uiDependencyId << 4) | (pNalHdrExt->uiQualityId);
+
+  /* Common syntax elements across all slices of a DQLayer */
+  pDqLayer->uiPpsId                                     = pLayerInfo->pPps->iPpsId;
+  pDqLayer->uiDisableInterLayerDeblockingFilterIdc      = pShExt->uiDisableInterLayerDeblockingFilterIdc;
+  pDqLayer->iInterLayerSliceAlphaC0Offset               = pShExt->iInterLayerSliceAlphaC0Offset;
+  pDqLayer->iInterLayerSliceBetaOffset                  = pShExt->iInterLayerSliceBetaOffset;
+  pDqLayer->iSliceGroupChangeCycle                      = pSh->iSliceGroupChangeCycle;
+  pDqLayer->bStoreRefBasePicFlag                        = pShExt->bStoreRefBasePicFlag;
+  pDqLayer->bTCoeffLevelPredFlag                        = pShExt->bTCoeffLevelPredFlag;
+  pDqLayer->bConstrainedIntraResamplingFlag             = pShExt->bConstrainedIntraResamplingFlag;
+  pDqLayer->uiRefLayerDqId                              = pShExt->uiRefLayerDqId;
+  pDqLayer->uiRefLayerChromaPhaseXPlus1Flag             = pShExt->uiRefLayerChromaPhaseXPlus1Flag;
+  pDqLayer->uiRefLayerChromaPhaseYPlus1                 = pShExt->uiRefLayerChromaPhaseYPlus1;
+  pDqLayer->bUseWeightPredictionFlag                    = false;
+  pDqLayer->bUseWeightedBiPredIdc = false;
+  //memcpy(&pDqLayer->sScaledRefLayer, &pShExt->sScaledRefLayer, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
+
+  if (kuiQualityId == BASE_QUALITY_ID) {
+    pDqLayer->pRefPicListReordering = &pSh->pRefPicListReordering;
+    pDqLayer->pRefPicMarking = &pSh->sRefMarking;
+
+    pDqLayer->bUseWeightPredictionFlag = pSh->pPps->bWeightedPredFlag;
+    pDqLayer->bUseWeightedBiPredIdc = pSh->pPps->uiWeightedBipredIdc != 0;
+    if (pSh->pPps->bWeightedPredFlag || pSh->pPps->uiWeightedBipredIdc) {
+      pDqLayer->pPredWeightTable = &pSh->sPredWeightTable;
+    }
+    pDqLayer->pRefPicBaseMarking        = &pShExt->sRefBasePicMarking;
+  }
+
+  pDqLayer->uiLayerDqId                 = pNalHdrExt->uiLayerDqId;      // dq_id of current layer
+  pDqLayer->bUseRefBasePicFlag          = pNalHdrExt->bUseRefBasePicFlag;
+}
+
+void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pSps, PPps pPps) {
+  PSliceHeader pSh = &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+
+  pCtx->eSliceType   = pSh->eSliceType;
+  pCtx->pSliceHeader = pSh;
+  pCtx->bUsedAsRef   = false;
+
+  pCtx->iFrameNum    = pSh->iFrameNum;
+  UpdateDecoderStatisticsForActiveParaset (& (pCtx->sDecoderStatistics),
+      pSps, pPps);
+}
+
+int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
+  int32_t iRet = ERR_NONE;
+  if (pCtx->eSliceType == B_SLICE) {
+    iRet = WelsInitBSliceRefList (pCtx, iPoc);
+    CreateImplicitWeightTable (pCtx);
+  } else
+    iRet = WelsInitRefList (pCtx, iPoc);
+  if ((pCtx->eSliceType != I_SLICE && pCtx->eSliceType != SI_SLICE)) {
+#if 0
+    if (pCtx->pSps->uiProfileIdc != 66 && pCtx->pPps->bEntropyCodingModeFlag)
+      iRet = WelsReorderRefList2 (pCtx);
+    else
+#endif
+      iRet = WelsReorderRefList (pCtx);
+  }
+
+  return iRet;
+}
+
+void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
+  if (NULL != pCtx && NULL != pCurDq) {
+    pCurDq->pMbType         = pCtx->sMb.pMbType[0];
+    pCurDq->pSliceIdc       = pCtx->sMb.pSliceIdc[0];
+    pCurDq->pMv[LIST_0]         = pCtx->sMb.pMv[0][LIST_0];
+    pCurDq->pMv[LIST_1]         = pCtx->sMb.pMv[0][LIST_1];
+    pCurDq->pRefIndex[LIST_0]    = pCtx->sMb.pRefIndex[0][LIST_0];
+    pCurDq->pRefIndex[LIST_1]   = pCtx->sMb.pRefIndex[0][LIST_1];
+    pCurDq->pDirect             = pCtx->sMb.pDirect[0];
+    pCurDq->pNoSubMbPartSizeLessThan8x8Flag = pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[0];
+    pCurDq->pTransformSize8x8Flag = pCtx->sMb.pTransformSize8x8Flag[0];
+    pCurDq->pLumaQp         = pCtx->sMb.pLumaQp[0];
+    pCurDq->pChromaQp       = pCtx->sMb.pChromaQp[0];
+    pCurDq->pMvd[LIST_0]         = pCtx->sMb.pMvd[0][LIST_0];
+    pCurDq->pMvd[LIST_1]          = pCtx->sMb.pMvd[0][LIST_1];
+    pCurDq->pCbfDc          = pCtx->sMb.pCbfDc[0];
+    pCurDq->pNzc            = pCtx->sMb.pNzc[0];
+    pCurDq->pNzcRs          = pCtx->sMb.pNzcRs[0];
+    pCurDq->pScaledTCoeff   = pCtx->sMb.pScaledTCoeff[0];
+    pCurDq->pIntraPredMode  = pCtx->sMb.pIntraPredMode[0];
+    pCurDq->pIntra4x4FinalMode = pCtx->sMb.pIntra4x4FinalMode[0];
+    pCurDq->pIntraNxNAvailFlag = pCtx->sMb.pIntraNxNAvailFlag[0];
+    pCurDq->pChromaPredMode = pCtx->sMb.pChromaPredMode[0];
+    pCurDq->pCbp            = pCtx->sMb.pCbp[0];
+    pCurDq->pSubMbType      = pCtx->sMb.pSubMbType[0];
+    pCurDq->pInterPredictionDoneFlag = pCtx->sMb.pInterPredictionDoneFlag[0];
+    pCurDq->pResidualPredFlag = pCtx->sMb.pResidualPredFlag[0];
+    pCurDq->pMbCorrectlyDecodedFlag = pCtx->sMb.pMbCorrectlyDecodedFlag[0];
+    pCurDq->pMbRefConcealedFlag = pCtx->sMb.pMbRefConcealedFlag[0];
+  }
+}
+
+/*
+ * DecodeCurrentAccessUnit
+ * Decode current access unit when current AU is completed.
+ */
+int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  int32_t iRefCount[LIST_A];
+  PNalUnit pNalCur = NULL;
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+
+  int32_t iIdx = pCurAu->uiStartPos;
+  int32_t iEndIdx = pCurAu->uiEndPos;
+
+  int32_t iPpsId = 0;
+  int32_t iRet = ERR_NONE;
+
+  bool bAllRefComplete = true; // Assume default all ref picutres are complete
+
+  const uint8_t kuiTargetLayerDqId = GetTargetDqId (pCtx->uiTargetDqId, pCtx->pParam);
+  const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4;
+  int16_t iLastIdD = -1, iLastIdQ = -1;
+  int16_t iCurrIdD = 0, iCurrIdQ = 0;
+  uint8_t uiNalRefIdc = 0;
+  bool bFreshSliceAvailable =
+    true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008
+
+  //update pCurDqLayer at the starting of AU decoding
+  if (pCtx->bInitialDqLayersMem) {
+    pCtx->pCurDqLayer = pCtx->pDqLayersList[0];
+  }
+
+  InitCurDqLayerData (pCtx, pCtx->pCurDqLayer);
+
+  pNalCur = pCurAu->pNalUnitsList[iIdx];
+  while (iIdx <= iEndIdx) {
+    PDqLayer dq_cur = pCtx->pCurDqLayer;
+    SLayerInfo pLayerInfo;
+    PSliceHeaderExt pShExt = NULL;
+    PSliceHeader pSh = NULL;
+
+    if (pCtx->pDec == NULL) {
+      pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
+      if (pCtx->iTotalNumMbRec != 0)
+        pCtx->iTotalNumMbRec = 0;
+
+      if (NULL == pCtx->pDec) {
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+                 "DecodeCurrentAccessUnit()::::::PrefetchPic ERROR, pSps->iNumRefFrames:%d.",
+                 pCtx->pSps->iNumRefFrames);
+        // The error code here need to be separated from the dsOutOfMemory
+        pCtx->iErrorCode |= dsOutOfMemory;
+        return ERR_INFO_REF_COUNT_OVERFLOW;
+      }
+      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
+    } else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start
+      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
+    }
+    pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp;
+
+    if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode
+      for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i)
+        memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
+      memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
+      memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
+      pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
+      pCtx->pDec->iMbEcedNum = 0;
+      pCtx->pDec->iMbEcedPropNum = 0;
+    }
+    pCtx->bRPLRError = false;
+    GetI4LumaIChromaAddrTable (pCtx->iDecBlockOffsetArray, pCtx->pDec->iLinesize[0], pCtx->pDec->iLinesize[1]);
+
+    if (pNalCur->sNalHeaderExt.uiLayerDqId > kuiTargetLayerDqId) { // confirmed pNalCur will never be NULL
+      break; // Per formance it need not to decode the remaining bits any more due to given uiLayerDqId required, 9/2/2009
+    }
+
+    memset (&pLayerInfo, 0, sizeof (SLayerInfo));
+
+    /*
+     *  Loop decoding for slices (even FMO and/ multiple slices) within a dq layer
+     */
+    while (iIdx <= iEndIdx) {
+      bool         bReconstructSlice;
+      iCurrIdQ  = pNalCur->sNalHeaderExt.uiQualityId;
+      iCurrIdD  = pNalCur->sNalHeaderExt.uiDependencyId;
+      pSh       = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+      pShExt    = &pNalCur->sNalData.sVclNal.sSliceHeaderExt;
+      pCtx->bRPLRError = false;
+      bReconstructSlice = CheckSliceNeedReconstruct (pNalCur->sNalHeaderExt.uiLayerDqId, kuiTargetLayerDqId);
+
+      memcpy (&pLayerInfo.sNalHeaderExt, &pNalCur->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); //confirmed_safe_unsafe_usage
+
+      pCtx->pDec->iFrameNum = pSh->iFrameNum;
+      pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2
+      pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag;
+
+      memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage
+      pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag      = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag;
+      pLayerInfo.sSliceInLayer.eSliceType               = pSh->eSliceType;
+      pLayerInfo.sSliceInLayer.iLastMbQp                = pSh->iSliceQp;
+      dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead;
+
+      uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
+
+      iPpsId = pSh->iPpsId;
+
+      pLayerInfo.pPps = pSh->pPps;
+      pLayerInfo.pSps = pSh->pSps;
+      pLayerInfo.pSubsetSps = pShExt->pSubsetSps;
+
+      pCtx->pFmo = &pCtx->sFmoList[iPpsId];
+      iRet = FmoParamUpdate (pCtx->pFmo, pLayerInfo.pSps, pLayerInfo.pPps, &pCtx->iActiveFmoNum, pCtx->pMemAlign);
+      if (ERR_NONE != iRet) {
+        if (iRet == ERR_INFO_OUT_OF_MEMORY) {
+          pCtx->iErrorCode |= dsOutOfMemory;
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "DecodeCurrentAccessUnit(), Fmo param alloc failed");
+        } else {
+          pCtx->iErrorCode |= dsBitstreamError;
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DecodeCurrentAccessUnit(), FmoParamUpdate failed, eSliceType: %d.",
+                   pSh->eSliceType);
+        }
+        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_FMO_INIT_FAIL);
+      }
+
+      bFreshSliceAvailable = (iCurrIdD != iLastIdD
+                              || iCurrIdQ != iLastIdQ);        // do not need condition of (first_mb == 0) due multiple slices might be disorder
+
+      WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps);
+
+      if (iCurrIdQ == BASE_QUALITY_ID) {
+        ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount));
+      }
+
+      if ((iLastIdD < 0) ||  //case 1: first layer
+          (iLastIdD == iCurrIdD)) { //case 2: same uiDId
+        InitDqLayerInfo (dq_cur, &pLayerInfo, pNalCur, pCtx->pDec);
+
+        if (!dq_cur->sLayerInfo.pSps->bGapsInFrameNumValueAllowedFlag) {
+          const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag
+                                 || (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR);
+          // Subclause 8.2.5.2 Decoding process for gaps in frame_num
+          if (!kbIdrFlag  &&
+              pSh->iFrameNum != pCtx->iPrevFrameNum &&
+              pSh->iFrameNum != ((pCtx->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - 1))) {
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                     "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", pCtx->iPrevFrameNum,
+                     pSh->iFrameNum);
+
+            bAllRefComplete = false;
+            pCtx->iErrorCode |= dsRefLost;
+            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+#ifdef LONG_TERM_REF
+              pCtx->bParamSetsLostFlag = true;
+#else
+              pCtx->bReferenceLostAtT0Flag = true;
+#endif
+              return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REFERENCE_PIC_LOST);
+            }
+          }
+        }
+
+        if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) {
+          iRet = InitRefPicList (pCtx, uiNalRefIdc, pSh->iPicOrderCntLsb);
+          if (iRet) {
+            pCtx->bRPLRError = true;
+            bAllRefComplete = false; // RPLR error, set ref pictures complete flag false
+            HandleReferenceLost (pCtx, pNalCur);
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
+                     "reference picture introduced by this frame is lost during transmission! uiTId: %d",
+                     pNalCur->sNalHeaderExt.uiTemporalId);
+            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+              if (pCtx->iTotalNumMbRec == 0)
+                pCtx->pDec = NULL;
+              return iRet;
+            }
+          }
+        }
+
+        iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
+
+        //Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case
+        if (iRet != ERR_NONE) {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                   "DecodeCurrentAccessUnit() failed (%d) in frame: %d uiDId: %d uiQId: %d",
+                   iRet, pSh->iFrameNum, iCurrIdD, iCurrIdQ);
+          bAllRefComplete = false;
+          HandleReferenceLostL0 (pCtx, pNalCur);
+          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+            if (pCtx->iTotalNumMbRec == 0)
+              pCtx->pDec = NULL;
+            return iRet;
+          }
+        }
+
+        if (bReconstructSlice) {
+          if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
+            pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
+            return iRet;
+          }
+        }
+        if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
+          if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
+            bAllRefComplete &= CheckRefPicturesComplete (pCtx);
+          } else {
+            bAllRefComplete = false;
+          }
+        }
+      }
+#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "cur_frame : %d\tiCurrIdD : %d\n ",
+               dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFrameNum, iCurrIdD);
+#endif//#if !CODEC_FOR_TESTBED
+      iLastIdD = iCurrIdD;
+      iLastIdQ = iCurrIdQ;
+
+      //pNalUnitsList overflow.
+      ++ iIdx;
+      if (iIdx <= iEndIdx) {
+        pNalCur = pCurAu->pNalUnitsList[iIdx];
+      } else {
+        pNalCur = NULL;
+      }
+
+      if (pNalCur == NULL ||
+          iLastIdD != pNalCur->sNalHeaderExt.uiDependencyId ||
+          iLastIdQ != pNalCur->sNalHeaderExt.uiQualityId)
+        break;
+    }
+
+    // Set the current dec picture complete flag. The flag will be reset when current picture need do ErrorCon.
+    pCtx->pDec->bIsComplete = bAllRefComplete;
+    if (!pCtx->pDec->bIsComplete) {  // Ref pictures ECed, result in ECed
+      pCtx->iErrorCode |= dsDataErrorConcealed;
+    }
+
+    // A dq layer decoded here
+#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
+#undef fprintf
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "POC: #%d, FRAME: #%d, D: %d, Q: %d, T: %d, P: %d, %d\n",
+             pSh->iPicOrderCntLsb, pSh->iFrameNum, iCurrIdD, iCurrIdQ, dq_cur->sLayerInfo.sNalHeaderExt.uiTemporalId,
+             dq_cur->sLayerInfo.sNalHeaderExt.uiPriorityId, dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iSliceQp);
+#endif//#if !CODEC_FOR_TESTBED
+
+    if (dq_cur->uiLayerDqId == kuiTargetLayerDqId) {
+      if (!pCtx->bInstantDecFlag) {
+        if (!pCtx->pParam->bParseOnly) {
+          //Do error concealment here
+          if ((NeedErrorCon (pCtx)) && (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE)) {
+            ImplementErrorCon (pCtx);
+            pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
+            pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
+            pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
+          }
+        }
+      }
+
+      iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
+      if (iRet)
+        return iRet;
+
+      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
+      pCtx->bUsedAsRef = false;
+      if (uiNalRefIdc > 0) {
+        pCtx->bUsedAsRef = true;
+        //save MBType, MV and RefIndex for use in B-Slice direct mode
+        memcpy (pCtx->pDec->pMbType, pCtx->pCurDqLayer->pMbType, pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t));
+        memcpy (pCtx->pDec->pMv[LIST_0], pCtx->pCurDqLayer->pMv[LIST_0],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
+        memcpy (pCtx->pDec->pMv[LIST_1], pCtx->pCurDqLayer->pMv[LIST_1],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
+        memcpy (pCtx->pDec->pRefIndex[LIST_0], pCtx->pCurDqLayer->pRefIndex[LIST_0],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
+        memcpy (pCtx->pDec->pRefIndex[LIST_1], pCtx->pCurDqLayer->pRefIndex[LIST_1],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
+        for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+          for (uint32_t i = 0; i < pCtx->sRefPic.uiRefCount[listIdx]; ++i) {
+            pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
+          }
+        }
+        iRet = WelsMarkAsRef (pCtx);
+        if (iRet != ERR_NONE) {
+          if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
+            pCtx->iErrorCode |= dsBitstreamError;
+          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+            pCtx->pDec = NULL;
+            return iRet;
+          }
+        }
+        if (!pCtx->pParam->bParseOnly)
+          ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
+                                    pCtx->pDec->iLinesize,
+                                    pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
+      }
+      pCtx->pDec = NULL; //after frame decoding, always set to NULL
+    }
+
+    // need update frame_num due current frame is well decoded
+    if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
+      pCtx->iPrevFrameNum = pSh->iFrameNum;
+    if (pCtx->bLastHasMmco5)
+      pCtx->iPrevFrameNum = 0;
+  }
+
+  return ERR_NONE;
+}
+
+bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  PAccessUnit pAu = pCtx->pAccessUnitList;
+  bool bAuBoundaryFlag = false;
+  if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data
+    PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos];
+    bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0)
+                      && (CheckAccessUnitBoundaryExt (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, &pCtx->sLastSliceHeader,
+                          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader));
+  } else { //non VCL
+    if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) {
+      bAuBoundaryFlag = true;
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) {
+      bAuBoundaryFlag = true;
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) {
+      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SPS);
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) {
+      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS);
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) {
+      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_PPS);
+    }
+    if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first
+      ConstructAccessUnit (pCtx, ppDst, pDstInfo);
+    }
+  }
+
+  //Do Error Concealment here
+  if (bAuBoundaryFlag && (pCtx->iTotalNumMbRec != 0) && NeedErrorCon (pCtx)) { //AU ready but frame not completely reconed
+    if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+      ImplementErrorCon (pCtx);
+      pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
+      pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
+      pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
+
+      DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
+      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
+      if (pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
+        MarkECFrameAsRef (pCtx);
+      }
+    } else if (pCtx->pParam->bParseOnly) { //clear parse only internal data status
+      pCtx->pParserBsInfo->iNalNum = 0;
+      pCtx->bFrameFinish = true; //clear frame pending status here!
+    } else {
+      if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) {
+        if ((pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) && (pCtx->sLastNalHdrExt.uiTemporalId == 0))
+          pCtx->iErrorCode |= dsNoParamSets;
+        else
+          pCtx->iErrorCode |= dsBitstreamError;
+        pCtx->pDec = NULL;
+        return false;
+      }
+    }
+    pCtx->pDec = NULL;
+    if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
+      pCtx->iPrevFrameNum = pCtx->sLastSliceHeader.iFrameNum; //save frame_num
+    if (pCtx->bLastHasMmco5)
+      pCtx->iPrevFrameNum = 0;
+  }
+  return ERR_NONE;
+}
+
+bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
+  // Multi Reference, RefIdx may differ
+  bool bAllRefComplete = true;
+  int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
+  for (int32_t iMbIdx = 0; bAllRefComplete
+       && iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) {
+    switch (pCtx->pCurDqLayer->pMbType[iRealMbIdx]) {
+    case MB_TYPE_SKIP:
+    case MB_TYPE_16x16:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      break;
+
+    case MB_TYPE_16x8:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
+      break;
+
+    case MB_TYPE_8x16:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
+      break;
+
+    case MB_TYPE_8x8:
+    case MB_TYPE_8x8_REF0:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
+      break;
+
+    default:
+      break;
+    }
+    iRealMbIdx = (pCtx->pPps->uiNumSliceGroups > 1) ? FmoNextMb (pCtx->pFmo, iRealMbIdx) :
+                 (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice + iMbIdx);
+    if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb()
+      return false;
+  }
+  return bAllRefComplete;
+}
+} // namespace WelsDec